From 67e2b62461b5d02a1e63103e8a02c0bca75e26c7 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sun, 8 May 2022 11:37:07 +0200 Subject: [PATCH 01/69] floppy: use a statically allocated error counter commit f71f01394f742fc4558b3f9f4c7ef4c4cf3b07c8 upstream. Interrupt handler bad_flp_intr() may cause a UAF on the recently freed request just to increment the error count. There's no point keeping that one in the request anyway, and since the interrupt handler uses a static pointer to the error which cannot be kept in sync with the pending request, better make it use a static error counter that's reset for each new request. This reset now happens when entering redo_fd_request() for a new request via set_next_request(). One initial concern about a single error counter was that errors on one floppy drive could be reported on another one, but this problem is not real given that the driver uses a single drive at a time, as that PC-compatible controllers also have this limitation by using shared signals. As such the error count is always for the "current" drive. Reported-by: Minh Yuan Suggested-by: Linus Torvalds Tested-by: Denis Efremov Signed-off-by: Willy Tarreau Signed-off-by: Linus Torvalds Signed-off-by: Denis Efremov Signed-off-by: Greg Kroah-Hartman --- drivers/block/floppy.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index f24e3791e840..e133ff5fa596 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -521,8 +521,8 @@ static unsigned long fdc_busy; static DECLARE_WAIT_QUEUE_HEAD(fdc_wait); static DECLARE_WAIT_QUEUE_HEAD(command_done); -/* Errors during formatting are counted here. */ -static int format_errors; +/* errors encountered on the current (or last) request */ +static int floppy_errors; /* Format request descriptor. */ static struct format_descr format_req; @@ -542,7 +542,6 @@ static struct format_descr format_req; static char *floppy_track_buffer; static int max_buffer_sectors; -static int *errors; typedef void (*done_f)(int); static const struct cont_t { void (*interrupt)(void); @@ -1435,7 +1434,7 @@ static int interpret_errors(void) if (DP->flags & FTD_MSG) DPRINT("Over/Underrun - retrying\n"); bad = 0; - } else if (*errors >= DP->max_errors.reporting) { + } else if (floppy_errors >= DP->max_errors.reporting) { print_errors(); } if (ST2 & ST2_WC || ST2 & ST2_BC) @@ -2055,7 +2054,7 @@ static void bad_flp_intr(void) if (!next_valid_format()) return; } - err_count = ++(*errors); + err_count = ++floppy_errors; INFBOUND(DRWE->badness, err_count); if (err_count > DP->max_errors.abort) cont->done(0); @@ -2200,9 +2199,8 @@ static int do_format(int drive, struct format_descr *tmp_format_req) return -EINVAL; } format_req = *tmp_format_req; - format_errors = 0; cont = &format_cont; - errors = &format_errors; + floppy_errors = 0; ret = wait_til_done(redo_format, true); if (ret == -EINTR) return -EINTR; @@ -2677,7 +2675,7 @@ static int make_raw_rw_request(void) */ if (!direct || (indirect * 2 > direct * 3 && - *errors < DP->max_errors.read_track && + floppy_errors < DP->max_errors.read_track && ((!probing || (DP->read_track & (1 << DRS->probed_format)))))) { max_size = blk_rq_sectors(current_req); @@ -2801,10 +2799,11 @@ static int set_next_request(void) current_req = list_first_entry_or_null(&floppy_reqs, struct request, queuelist); if (current_req) { - current_req->error_count = 0; + floppy_errors = 0; list_del_init(¤t_req->queuelist); + return 1; } - return current_req != NULL; + return 0; } static void redo_fd_request(void) @@ -2860,7 +2859,6 @@ do_request: _floppy = floppy_type + DP->autodetect[DRS->probed_format]; } else probing = 0; - errors = &(current_req->error_count); tmp = make_raw_rw_request(); if (tmp < 2) { request_done(tmp); From 0d3817cb4ebe57754fa4f4bda87c13e6a2e000d9 Mon Sep 17 00:00:00 2001 From: Miroslav Benes Date: Thu, 26 Mar 2020 10:26:02 +0100 Subject: [PATCH 02/69] x86/xen: Make the boot CPU idle task reliable commit 2f62f36e62daec43aa7b9633ef7f18e042a80bed upstream. The unwinder reports the boot CPU idle task's stack on XEN PV as unreliable, which affects at least live patching. There are two reasons for this. First, the task does not follow the x86 convention that its stack starts at the offset right below saved pt_regs. It allows the unwinder to easily detect the end of the stack and verify it. Second, startup_xen() function does not store the return address before jumping to xen_start_kernel() which confuses the unwinder. Amend both issues by moving the starting point of initial stack in startup_xen() and storing the return address before the jump, which is exactly what call instruction does. Signed-off-by: Miroslav Benes Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Signed-off-by: Markus Boehme Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/xen-head.S | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index c1d8b90aa4e2..deb6abe5d346 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -35,7 +35,11 @@ ENTRY(startup_xen) rep __ASM_SIZE(stos) mov %_ASM_SI, xen_start_info - mov $init_thread_union+THREAD_SIZE, %_ASM_SP +#ifdef CONFIG_X86_64 + mov initial_stack(%rip), %rsp +#else + mov pa(initial_stack), %esp +#endif #ifdef CONFIG_X86_64 /* Set up %gs. @@ -51,7 +55,7 @@ ENTRY(startup_xen) wrmsr #endif - jmp xen_start_kernel + call xen_start_kernel END(startup_xen) __FINIT #endif From 05df3bdbc259fb673716f3df6e307d761637d1a2 Mon Sep 17 00:00:00 2001 From: Miroslav Benes Date: Thu, 26 Mar 2020 10:26:03 +0100 Subject: [PATCH 03/69] x86/xen: Make the secondary CPU idle tasks reliable commit c3881eb58d56116c79ac4ee4f40fd15ead124c4b upstream. The unwinder reports the secondary CPU idle tasks' stack on XEN PV as unreliable, which affects at least live patching. cpu_initialize_context() sets up the context of the CPU through VCPUOP_initialise hypercall. After it is woken up, the idle task starts in cpu_bringup_and_idle() function and its stack starts at the offset right below pt_regs. The unwinder correctly detects the end of stack there but it is confused by NULL return address in the last frame. Introduce a wrapper in assembly, which just calls cpu_bringup_and_idle(). The return address is thus pushed on the stack and the wrapper contains the annotation hint for the unwinder regarding the stack state. Signed-off-by: Miroslav Benes Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Signed-off-by: Markus Boehme Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/smp_pv.c | 3 ++- arch/x86/xen/xen-head.S | 10 ++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 64e6ec2c32a7..9d9777ded5f7 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -53,6 +53,7 @@ static DEFINE_PER_CPU(struct xen_common_irq, xen_irq_work) = { .irq = -1 }; static DEFINE_PER_CPU(struct xen_common_irq, xen_pmu_irq) = { .irq = -1 }; static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id); +void asm_cpu_bringup_and_idle(void); static void cpu_bringup(void) { @@ -310,7 +311,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) * pointing just below where pt_regs would be if it were a normal * kernel entry. */ - ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; + ctxt->user_regs.eip = (unsigned long)asm_cpu_bringup_and_idle; ctxt->flags = VGCF_IN_KERNEL; ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ ctxt->user_regs.ds = __USER_DS; diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index deb6abe5d346..591544b4b85a 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -58,6 +58,16 @@ ENTRY(startup_xen) call xen_start_kernel END(startup_xen) __FINIT + +#ifdef CONFIG_XEN_PV_SMP +.pushsection .text +SYM_CODE_START(asm_cpu_bringup_and_idle) + UNWIND_HINT_EMPTY + + call cpu_bringup_and_idle +SYM_CODE_END(asm_cpu_bringup_and_idle) +.popsection +#endif #endif .pushsection .text From 0c319b9988357135adaecf17bdfbf385dd31c992 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Fri, 10 Dec 2021 17:09:51 +0100 Subject: [PATCH 04/69] rtc: fix use-after-free on device removal [ Upstream commit c8fa17d9f08a448184f03d352145099b5beb618e ] If the irqwork is still scheduled or running while the RTC device is removed, a use-after-free occurs in rtc_timer_do_work(). Cleanup the timerqueue and ensure the work is stopped to fix this. BUG: KASAN: use-after-free in mutex_lock+0x94/0x110 Write of size 8 at addr ffffff801d846338 by task kworker/3:1/41 Workqueue: events rtc_timer_do_work Call trace: mutex_lock+0x94/0x110 rtc_timer_do_work+0xec/0x630 process_one_work+0x5fc/0x1344 ... Allocated by task 551: kmem_cache_alloc_trace+0x384/0x6e0 devm_rtc_allocate_device+0xf0/0x574 devm_rtc_device_register+0x2c/0x12c ... Freed by task 572: kfree+0x114/0x4d0 rtc_device_release+0x64/0x80 device_release+0x8c/0x1f4 kobject_put+0x1c4/0x4b0 put_device+0x20/0x30 devm_rtc_release_device+0x1c/0x30 devm_action_release+0x54/0x90 release_nodes+0x124/0x310 devres_release_group+0x170/0x240 i2c_device_remove+0xd8/0x314 ... Last potentially related work creation: insert_work+0x5c/0x330 queue_work_on+0xcc/0x154 rtc_set_time+0x188/0x5bc rtc_dev_ioctl+0x2ac/0xbd0 ... Signed-off-by: Vincent Whitchurch Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211210160951.7718-1-vincent.whitchurch@axis.com Signed-off-by: Sasha Levin --- drivers/rtc/class.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index 9458e6d6686a..8b434213bc7a 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -26,6 +26,15 @@ struct class *rtc_class; static void rtc_device_release(struct device *dev) { struct rtc_device *rtc = to_rtc_device(dev); + struct timerqueue_head *head = &rtc->timerqueue; + struct timerqueue_node *node; + + mutex_lock(&rtc->ops_lock); + while ((node = timerqueue_getnext(head))) + timerqueue_del(head, node); + mutex_unlock(&rtc->ops_lock); + + cancel_work_sync(&rtc->irqwork); ida_simple_remove(&rtc_ida, rtc->id); kfree(rtc); From 4fd3966956465c05dc6ab2594197bc84653a36b2 Mon Sep 17 00:00:00 2001 From: David Gow Date: Thu, 10 Feb 2022 11:43:53 +0800 Subject: [PATCH 05/69] um: Cleanup syscall_handler_t definition/cast, fix warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f4f03f299a56ce4d73c5431e0327b3b6cb55ebb9 ] The syscall_handler_t type for x86_64 was defined as 'long (*)(void)', but always cast to 'long (*)(long, long, long, long, long, long)' before use. This now triggers a warning (see below). Define syscall_handler_t as the latter instead, and remove the cast. This simplifies the code, and fixes the warning. Warning: In file included from ../arch/um/include/asm/processor-generic.h:13 from ../arch/x86/um/asm/processor.h:41 from ../include/linux/rcupdate.h:30 from ../include/linux/rculist.h:11 from ../include/linux/pid.h:5 from ../include/linux/sched.h:14 from ../include/linux/ptrace.h:6 from ../arch/um/kernel/skas/syscall.c:7: ../arch/um/kernel/skas/syscall.c: In function ‘handle_syscall’: ../arch/x86/um/shared/sysdep/syscalls_64.h:18:11: warning: cast between incompatible function types from ‘long int (*)(void)’ to ‘long int (*)(long int, long int, long int, long int, long int, long int)’ [ -Wcast-function-type] 18 | (((long (*)(long, long, long, long, long, long)) \ | ^ ../arch/x86/um/asm/ptrace.h:36:62: note: in definition of macro ‘PT_REGS_SET_SYSCALL_RETURN’ 36 | #define PT_REGS_SET_SYSCALL_RETURN(r, res) (PT_REGS_AX(r) = (res)) | ^~~ ../arch/um/kernel/skas/syscall.c:46:33: note: in expansion of macro ‘EXECUTE_SYSCALL’ 46 | EXECUTE_SYSCALL(syscall, regs)); | ^~~~~~~~~~~~~~~ Signed-off-by: David Gow Signed-off-by: Richard Weinberger Signed-off-by: Sasha Levin --- arch/x86/um/shared/sysdep/syscalls_64.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/um/shared/sysdep/syscalls_64.h b/arch/x86/um/shared/sysdep/syscalls_64.h index 8a7d5e1da98e..1e6875b4ffd8 100644 --- a/arch/x86/um/shared/sysdep/syscalls_64.h +++ b/arch/x86/um/shared/sysdep/syscalls_64.h @@ -10,13 +10,12 @@ #include #include -typedef long syscall_handler_t(void); +typedef long syscall_handler_t(long, long, long, long, long, long); extern syscall_handler_t *sys_call_table[]; #define EXECUTE_SYSCALL(syscall, regs) \ - (((long (*)(long, long, long, long, long, long)) \ - (*sys_call_table[syscall]))(UPT_SYSCALL_ARG1(®s->regs), \ + (((*sys_call_table[syscall]))(UPT_SYSCALL_ARG1(®s->regs), \ UPT_SYSCALL_ARG2(®s->regs), \ UPT_SYSCALL_ARG3(®s->regs), \ UPT_SYSCALL_ARG4(®s->regs), \ From bb83a744bc671804016e23861a892e9db2aee73f Mon Sep 17 00:00:00 2001 From: Jeff LaBundy Date: Sun, 20 Mar 2022 21:55:27 -0700 Subject: [PATCH 06/69] Input: add bounds checking to input_set_capability() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 409353cbe9fe48f6bc196114c442b1cff05a39bc ] Update input_set_capability() to prevent kernel panic in case the event code exceeds the bitmap for the given event type. Suggested-by: Tomasz Moń Signed-off-by: Jeff LaBundy Reviewed-by: Tomasz Moń Link: https://lore.kernel.org/r/20220320032537.545250-1-jeff@labundy.com Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/input.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/input/input.c b/drivers/input/input.c index e2eb9b9b8363..0e16a9980c6a 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -47,6 +47,17 @@ static DEFINE_MUTEX(input_mutex); static const struct input_value input_value_sync = { EV_SYN, SYN_REPORT, 1 }; +static const unsigned int input_max_code[EV_CNT] = { + [EV_KEY] = KEY_MAX, + [EV_REL] = REL_MAX, + [EV_ABS] = ABS_MAX, + [EV_MSC] = MSC_MAX, + [EV_SW] = SW_MAX, + [EV_LED] = LED_MAX, + [EV_SND] = SND_MAX, + [EV_FF] = FF_MAX, +}; + static inline int is_event_supported(unsigned int code, unsigned long *bm, unsigned int max) { @@ -1978,6 +1989,14 @@ EXPORT_SYMBOL(input_get_timestamp); */ void input_set_capability(struct input_dev *dev, unsigned int type, unsigned int code) { + if (type < EV_CNT && input_max_code[type] && + code > input_max_code[type]) { + pr_err("%s: invalid code %u for type %u\n", __func__, code, + type); + dump_stack(); + return; + } + switch (type) { case EV_KEY: __set_bit(code, dev->keybit); From 8c015cd524424e79b31270bd3dbdac9612c6f7db Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Sun, 20 Mar 2022 21:56:38 -0700 Subject: [PATCH 07/69] Input: stmfts - fix reference leak in stmfts_input_open [ Upstream commit 26623eea0da3476446909af96c980768df07bbd9 ] pm_runtime_get_sync() will increment pm usage counter even it failed. Forgetting to call pm_runtime_put_noidle will result in reference leak in stmfts_input_open, so we should fix it. Signed-off-by: Zheng Yongjun Link: https://lore.kernel.org/r/20220317131604.53538-1-zhengyongjun3@huawei.com Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/touchscreen/stmfts.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/input/touchscreen/stmfts.c b/drivers/input/touchscreen/stmfts.c index cd8805d71d97..be1dd504d5b1 100644 --- a/drivers/input/touchscreen/stmfts.c +++ b/drivers/input/touchscreen/stmfts.c @@ -339,11 +339,11 @@ static int stmfts_input_open(struct input_dev *dev) err = pm_runtime_get_sync(&sdata->client->dev); if (err < 0) - return err; + goto out; err = i2c_smbus_write_byte(sdata->client, STMFTS_MS_MT_SENSE_ON); if (err) - return err; + goto out; mutex_lock(&sdata->mutex); sdata->running = true; @@ -366,7 +366,9 @@ static int stmfts_input_open(struct input_dev *dev) "failed to enable touchkey\n"); } - return 0; +out: + pm_runtime_put_noidle(&sdata->client->dev); + return err; } static void stmfts_input_close(struct input_dev *dev) From f0213894337afa0b5367ea11c6879c27ece556c7 Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Thu, 17 Mar 2022 13:16:13 +0000 Subject: [PATCH 08/69] crypto: stm32 - fix reference leak in stm32_crc_remove [ Upstream commit e9a36feecee0ee5845f2e0656f50f9942dd0bed3 ] pm_runtime_get_sync() will increment pm usage counter even it failed. Forgetting to call pm_runtime_put_noidle will result in reference leak in stm32_crc_remove, so we should fix it. Signed-off-by: Zheng Yongjun Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/stm32/stm32-crc32.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/stm32/stm32-crc32.c b/drivers/crypto/stm32/stm32-crc32.c index fb640e0ea614..2ecc970f5cae 100644 --- a/drivers/crypto/stm32/stm32-crc32.c +++ b/drivers/crypto/stm32/stm32-crc32.c @@ -332,8 +332,10 @@ static int stm32_crc_remove(struct platform_device *pdev) struct stm32_crc *crc = platform_get_drvdata(pdev); int ret = pm_runtime_get_sync(crc->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_noidle(crc->dev); return ret; + } spin_lock(&crc_list.lock); list_del(&crc->list); From c0d86f2a3c031222f625d6264307a35bf09ea874 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 22 Mar 2022 12:48:10 +0100 Subject: [PATCH 09/69] crypto: x86/chacha20 - Avoid spurious jumps to other functions [ Upstream commit 4327d168515fd8b5b92fa1efdf1d219fb6514460 ] The chacha_Nblock_xor_avx512vl() functions all have their own, identical, .LdoneN label, however in one particular spot {2,4} jump to the 8 version instead of their own. Resulting in: arch/x86/crypto/chacha-x86_64.o: warning: objtool: chacha_2block_xor_avx512vl() falls through to next function chacha_8block_xor_avx512vl() arch/x86/crypto/chacha-x86_64.o: warning: objtool: chacha_4block_xor_avx512vl() falls through to next function chacha_8block_xor_avx512vl() Make each function consistently use its own done label. Reported-by: Stephen Rothwell Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Martin Willi Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- arch/x86/crypto/chacha-avx512vl-x86_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/crypto/chacha-avx512vl-x86_64.S b/arch/x86/crypto/chacha-avx512vl-x86_64.S index 848f9c75fd4f..596f91e3a774 100644 --- a/arch/x86/crypto/chacha-avx512vl-x86_64.S +++ b/arch/x86/crypto/chacha-avx512vl-x86_64.S @@ -172,7 +172,7 @@ ENTRY(chacha_2block_xor_avx512vl) # xor remaining bytes from partial register into output mov %rcx,%rax and $0xf,%rcx - jz .Ldone8 + jz .Ldone2 mov %rax,%r9 and $~0xf,%r9 @@ -438,7 +438,7 @@ ENTRY(chacha_4block_xor_avx512vl) # xor remaining bytes from partial register into output mov %rcx,%rax and $0xf,%rcx - jz .Ldone8 + jz .Ldone4 mov %rax,%r9 and $~0xf,%r9 From ee0323cc8bbb7fde782f1aec722de8994832953b Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Sat, 26 Mar 2022 00:05:00 +0800 Subject: [PATCH 10/69] ALSA: hda/realtek: Enable headset mic on Lenovo P360 [ Upstream commit 5a8738571747c1e275a40b69a608657603867b7e ] Lenovo P360 is another platform equipped with ALC897, and it needs ALC897_FIXUP_HEADSET_MIC_PIN quirk to make its headset mic work. Signed-off-by: Kai-Heng Feng Link: https://lore.kernel.org/r/20220325160501.705221-1-kai.heng.feng@canonical.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 851ea79da31c..78b5a0f22a41 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10233,6 +10233,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc051, "Samsung R720", ALC662_FIXUP_IDEAPAD), SND_PCI_QUIRK(0x14cd, 0x5003, "USI", ALC662_FIXUP_USI_HEADSET_MODE), SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC662_FIXUP_LENOVO_MULTI_CODECS), + SND_PCI_QUIRK(0x17aa, 0x1057, "Lenovo P360", ALC897_FIXUP_HEADSET_MIC_PIN), SND_PCI_QUIRK(0x17aa, 0x32ca, "Lenovo ThinkCentre M80", ALC897_FIXUP_HEADSET_MIC_PIN), SND_PCI_QUIRK(0x17aa, 0x32cb, "Lenovo ThinkCentre M70", ALC897_FIXUP_HEADSET_MIC_PIN), SND_PCI_QUIRK(0x17aa, 0x32cf, "Lenovo ThinkCentre M950", ALC897_FIXUP_HEADSET_MIC_PIN), From 7be785032c0504b92bbb0b1836caa5d11bce13df Mon Sep 17 00:00:00 2001 From: Anton Eidelman Date: Thu, 24 Mar 2022 13:05:11 -0600 Subject: [PATCH 11/69] nvme-multipath: fix hang when disk goes live over reconnect [ Upstream commit a4a6f3c8f61c3cfbda4998ad94596059ad7e4332 ] nvme_mpath_init_identify() invoked from nvme_init_identify() fetches a fresh ANA log from the ctrl. This is essential to have an up to date path states for both existing namespaces and for those scan_work may discover once the ctrl is up. This happens in the following cases: 1) A new ctrl is being connected. 2) An existing ctrl is successfully reconnected. 3) An existing ctrl is being reset. While in (1) ctrl->namespaces is empty, (2 & 3) may have namespaces, and nvme_read_ana_log() may call nvme_update_ns_ana_state(). This result in a hang when the ANA state of an existing namespace changes and makes the disk live: nvme_mpath_set_live() issues IO to the namespace through the ctrl, which does NOT have IO queues yet. See sample hang below. Solution: - nvme_update_ns_ana_state() to call set_live only if ctrl is live - nvme_read_ana_log() call from nvme_mpath_init_identify() therefore only fetches and parses the ANA log; any erros in this process will fail the ctrl setup as appropriate; - a separate function nvme_mpath_update() is called in nvme_start_ctrl(); this parses the ANA log without fetching it. At this point the ctrl is live, therefore, disks can be set live normally. Sample failure: nvme nvme0: starting error recovery nvme nvme0: Reconnecting in 10 seconds... block nvme0n6: no usable path - requeuing I/O INFO: task kworker/u8:3:312 blocked for more than 122 seconds. Tainted: G E 5.14.5-1.el7.elrepo.x86_64 #1 Workqueue: nvme-wq nvme_tcp_reconnect_ctrl_work [nvme_tcp] Call Trace: __schedule+0x2a2/0x7e0 schedule+0x4e/0xb0 io_schedule+0x16/0x40 wait_on_page_bit_common+0x15c/0x3e0 do_read_cache_page+0x1e0/0x410 read_cache_page+0x12/0x20 read_part_sector+0x46/0x100 read_lba+0x121/0x240 efi_partition+0x1d2/0x6a0 bdev_disk_changed.part.0+0x1df/0x430 bdev_disk_changed+0x18/0x20 blkdev_get_whole+0x77/0xe0 blkdev_get_by_dev+0xd2/0x3a0 __device_add_disk+0x1ed/0x310 device_add_disk+0x13/0x20 nvme_mpath_set_live+0x138/0x1b0 [nvme_core] nvme_update_ns_ana_state+0x2b/0x30 [nvme_core] nvme_update_ana_state+0xca/0xe0 [nvme_core] nvme_parse_ana_log+0xac/0x170 [nvme_core] nvme_read_ana_log+0x7d/0xe0 [nvme_core] nvme_mpath_init_identify+0x105/0x150 [nvme_core] nvme_init_identify+0x2df/0x4d0 [nvme_core] nvme_init_ctrl_finish+0x8d/0x3b0 [nvme_core] nvme_tcp_setup_ctrl+0x337/0x390 [nvme_tcp] nvme_tcp_reconnect_ctrl_work+0x24/0x40 [nvme_tcp] process_one_work+0x1bd/0x360 worker_thread+0x50/0x3d0 Signed-off-by: Anton Eidelman Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 1 + drivers/nvme/host/multipath.c | 25 +++++++++++++++++++++++-- drivers/nvme/host/nvme.h | 4 ++++ 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 6a9a42809f97..79e22618817d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4047,6 +4047,7 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl) if (ctrl->queue_count > 1) { nvme_queue_scan(ctrl); nvme_start_queues(ctrl); + nvme_mpath_update(ctrl); } ctrl->created = true; } diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 4d615337e6e2..811f7b96b551 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -501,8 +501,17 @@ static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc, ns->ana_grpid = le32_to_cpu(desc->grpid); ns->ana_state = desc->state; clear_bit(NVME_NS_ANA_PENDING, &ns->flags); - - if (nvme_state_is_live(ns->ana_state)) + /* + * nvme_mpath_set_live() will trigger I/O to the multipath path device + * and in turn to this path device. However we cannot accept this I/O + * if the controller is not live. This may deadlock if called from + * nvme_mpath_init_identify() and the ctrl will never complete + * initialization, preventing I/O from completing. For this case we + * will reprocess the ANA log page in nvme_mpath_update() once the + * controller is ready. + */ + if (nvme_state_is_live(ns->ana_state) && + ns->ctrl->state == NVME_CTRL_LIVE) nvme_mpath_set_live(ns); } @@ -586,6 +595,18 @@ static void nvme_ana_work(struct work_struct *work) nvme_read_ana_log(ctrl); } +void nvme_mpath_update(struct nvme_ctrl *ctrl) +{ + u32 nr_change_groups = 0; + + if (!ctrl->ana_log_buf) + return; + + mutex_lock(&ctrl->ana_lock); + nvme_parse_ana_log(ctrl, &nr_change_groups, nvme_update_ana_state); + mutex_unlock(&ctrl->ana_lock); +} + static void nvme_anatt_timeout(struct timer_list *t) { struct nvme_ctrl *ctrl = from_timer(ctrl, t, anatt_timer); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 2df90d4355b9..1d1431dd4f9e 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -551,6 +551,7 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id); void nvme_mpath_remove_disk(struct nvme_ns_head *head); int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id); void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl); +void nvme_mpath_update(struct nvme_ctrl *ctrl); void nvme_mpath_uninit(struct nvme_ctrl *ctrl); void nvme_mpath_stop(struct nvme_ctrl *ctrl); bool nvme_mpath_clear_current_path(struct nvme_ns *ns); @@ -648,6 +649,9 @@ static inline int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, "Please enable CONFIG_NVME_MULTIPATH for full support of multi-port devices.\n"); return 0; } +static inline void nvme_mpath_update(struct nvme_ctrl *ctrl) +{ +} static inline void nvme_mpath_uninit(struct nvme_ctrl *ctrl) { } From e7947c031ffe9459d7025d8065939e30215c3219 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 11 Jan 2022 16:57:50 -0600 Subject: [PATCH 12/69] rtc: mc146818-lib: Fix the AltCentury for AMD platforms [ Upstream commit 3ae8fd41573af4fb3a490c9ed947fc936ba87190 ] Setting the century forward has been failing on AMD platforms. There was a previous attempt at fixing this for family 0x17 as part of commit 7ad295d5196a ("rtc: Fix the AltCentury value on AMD/Hygon platform") but this was later reverted due to some problems reported that appeared to stem from an FW bug on a family 0x17 desktop system. The same comments mentioned in the previous commit continue to apply to the newer platforms as well. ``` MC146818 driver use function mc146818_set_time() to set register RTC_FREQ_SELECT(RTC_REG_A)'s bit4-bit6 field which means divider stage reset value on Intel platform to 0x7. While AMD/Hygon RTC_REG_A(0Ah)'s bit4 is defined as DV0 [Reference]: DV0 = 0 selects Bank 0, DV0 = 1 selects Bank 1. Bit5-bit6 is defined as reserved. DV0 is set to 1, it will select Bank 1, which will disable AltCentury register(0x32) access. As UEFI pass acpi_gbl_FADT.century 0x32 (AltCentury), the CMOS write will be failed on code: CMOS_WRITE(century, acpi_gbl_FADT.century). Correct RTC_REG_A bank select bit(DV0) to 0 on AMD/Hygon CPUs, it will enable AltCentury(0x32) register writing and finally setup century as expected. ``` However in closer examination the change previously submitted was also modifying bits 5 & 6 which are declared reserved in the AMD documentation. So instead modify just the DV0 bank selection bit. Being cognizant that there was a failure reported before, split the code change out to a static function that can also be used for exclusions if any regressions such as Mikhail's pop up again. Cc: Jinke Fan Cc: Mikhail Gavrilov Link: https://lore.kernel.org/all/CABXGCsMLob0DC25JS8wwAYydnDoHBSoMh2_YLPfqm3TTvDE-Zw@mail.gmail.com/ Link: https://www.amd.com/system/files/TechDocs/51192_Bolton_FCH_RRG.pdf Signed-off-by: Raul E Rangel Signed-off-by: Mario Limonciello Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20220111225750.1699-1-mario.limonciello@amd.com Signed-off-by: Sasha Levin --- drivers/rtc/rtc-mc146818-lib.c | 16 +++++++++++++++- include/linux/mc146818rtc.h | 2 ++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c index 5add637c9ad2..b036ff33fbe6 100644 --- a/drivers/rtc/rtc-mc146818-lib.c +++ b/drivers/rtc/rtc-mc146818-lib.c @@ -99,6 +99,17 @@ unsigned int mc146818_get_time(struct rtc_time *time) } EXPORT_SYMBOL_GPL(mc146818_get_time); +/* AMD systems don't allow access to AltCentury with DV1 */ +static bool apply_amd_register_a_behavior(void) +{ +#ifdef CONFIG_X86 + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) + return true; +#endif + return false; +} + /* Set the current date and time in the real time clock. */ int mc146818_set_time(struct rtc_time *time) { @@ -172,7 +183,10 @@ int mc146818_set_time(struct rtc_time *time) save_control = CMOS_READ(RTC_CONTROL); CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); save_freq_select = CMOS_READ(RTC_FREQ_SELECT); - CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); + if (apply_amd_register_a_behavior()) + CMOS_WRITE((save_freq_select & ~RTC_AMD_BANK_SELECT), RTC_FREQ_SELECT); + else + CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); #ifdef CONFIG_MACH_DECSTATION CMOS_WRITE(real_yrs, RTC_DEC_YEAR); diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h index 0661af17a758..1e0205811394 100644 --- a/include/linux/mc146818rtc.h +++ b/include/linux/mc146818rtc.h @@ -86,6 +86,8 @@ struct cmos_rtc_board_info { /* 2 values for divider stage reset, others for "testing purposes only" */ # define RTC_DIV_RESET1 0x60 # define RTC_DIV_RESET2 0x70 + /* In AMD BKDG bit 5 and 6 are reserved, bit 4 is for select dv0 bank */ +# define RTC_AMD_BANK_SELECT 0x10 /* Periodic intr. / Square wave rate select. 0=none, 1=32.8kHz,... 15=2Hz */ # define RTC_RATE_SELECT 0x0F From 83abb076f473b6266ededbf420e92b8b81d2f110 Mon Sep 17 00:00:00 2001 From: Xiaoke Wang Date: Fri, 25 Mar 2022 19:49:41 +0800 Subject: [PATCH 13/69] MIPS: lantiq: check the return value of kzalloc() [ Upstream commit 34123208bbcc8c884a0489f543a23fe9eebb5514 ] kzalloc() is a memory allocation function which can return NULL when some internal memory errors happen. So it is better to check the return value of it to prevent potential wrong memory access or memory leak. Signed-off-by: Xiaoke Wang Signed-off-by: Thomas Bogendoerfer Signed-off-by: Sasha Levin --- arch/mips/lantiq/falcon/sysctrl.c | 2 ++ arch/mips/lantiq/xway/gptu.c | 2 ++ arch/mips/lantiq/xway/sysctrl.c | 46 ++++++++++++++++++++----------- 3 files changed, 34 insertions(+), 16 deletions(-) diff --git a/arch/mips/lantiq/falcon/sysctrl.c b/arch/mips/lantiq/falcon/sysctrl.c index 037b08f3257e..a2837a54d972 100644 --- a/arch/mips/lantiq/falcon/sysctrl.c +++ b/arch/mips/lantiq/falcon/sysctrl.c @@ -167,6 +167,8 @@ static inline void clkdev_add_sys(const char *dev, unsigned int module, { struct clk *clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + if (!clk) + return; clk->cl.dev_id = dev; clk->cl.con_id = NULL; clk->cl.clk = clk; diff --git a/arch/mips/lantiq/xway/gptu.c b/arch/mips/lantiq/xway/gptu.c index 3d5683e75cf1..200fe9ff641d 100644 --- a/arch/mips/lantiq/xway/gptu.c +++ b/arch/mips/lantiq/xway/gptu.c @@ -122,6 +122,8 @@ static inline void clkdev_add_gptu(struct device *dev, const char *con, { struct clk *clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + if (!clk) + return; clk->cl.dev_id = dev_name(dev); clk->cl.con_id = con; clk->cl.clk = clk; diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c index 2ee68d6e8bb9..6c2d9779ac72 100644 --- a/arch/mips/lantiq/xway/sysctrl.c +++ b/arch/mips/lantiq/xway/sysctrl.c @@ -311,6 +311,8 @@ static void clkdev_add_pmu(const char *dev, const char *con, bool deactivate, { struct clk *clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + if (!clk) + return; clk->cl.dev_id = dev; clk->cl.con_id = con; clk->cl.clk = clk; @@ -334,6 +336,8 @@ static void clkdev_add_cgu(const char *dev, const char *con, { struct clk *clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + if (!clk) + return; clk->cl.dev_id = dev; clk->cl.con_id = con; clk->cl.clk = clk; @@ -352,24 +356,28 @@ static void clkdev_add_pci(void) struct clk *clk_ext = kzalloc(sizeof(struct clk), GFP_KERNEL); /* main pci clock */ - clk->cl.dev_id = "17000000.pci"; - clk->cl.con_id = NULL; - clk->cl.clk = clk; - clk->rate = CLOCK_33M; - clk->rates = valid_pci_rates; - clk->enable = pci_enable; - clk->disable = pmu_disable; - clk->module = 0; - clk->bits = PMU_PCI; - clkdev_add(&clk->cl); + if (clk) { + clk->cl.dev_id = "17000000.pci"; + clk->cl.con_id = NULL; + clk->cl.clk = clk; + clk->rate = CLOCK_33M; + clk->rates = valid_pci_rates; + clk->enable = pci_enable; + clk->disable = pmu_disable; + clk->module = 0; + clk->bits = PMU_PCI; + clkdev_add(&clk->cl); + } /* use internal/external bus clock */ - clk_ext->cl.dev_id = "17000000.pci"; - clk_ext->cl.con_id = "external"; - clk_ext->cl.clk = clk_ext; - clk_ext->enable = pci_ext_enable; - clk_ext->disable = pci_ext_disable; - clkdev_add(&clk_ext->cl); + if (clk_ext) { + clk_ext->cl.dev_id = "17000000.pci"; + clk_ext->cl.con_id = "external"; + clk_ext->cl.clk = clk_ext; + clk_ext->enable = pci_ext_enable; + clk_ext->disable = pci_ext_disable; + clkdev_add(&clk_ext->cl); + } } /* xway socs can generate clocks on gpio pins */ @@ -389,9 +397,15 @@ static void clkdev_add_clkout(void) char *name; name = kzalloc(sizeof("clkout0"), GFP_KERNEL); + if (!name) + continue; sprintf(name, "clkout%d", i); clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + if (!clk) { + kfree(name); + continue; + } clk->cl.dev_id = "1f103000.cgu"; clk->cl.con_id = name; clk->cl.clk = clk; From 232128f6e60f37a98cd6566d6d9685f0d43d0b77 Mon Sep 17 00:00:00 2001 From: Jakob Koschel Date: Fri, 1 Apr 2022 00:03:48 +0200 Subject: [PATCH 14/69] drbd: remove usage of list iterator variable after loop [ Upstream commit 901aeda62efa21f2eae937bccb71b49ae531be06 ] In preparation to limit the scope of a list iterator to the list traversal loop, use a dedicated pointer to iterate through the list [1]. Since that variable should not be used past the loop iteration, a separate variable is used to 'remember the current location within the loop'. To either continue iterating from that position or skip the iteration (if the previous iteration was complete) list_prepare_entry() is used. Link: https://lore.kernel.org/all/CAHk-=wgRr_D8CB-D9Kg-c=EHreAsk5SqXPwr9Y7k9sA6cWXJ6w@mail.gmail.com/ [1] Signed-off-by: Jakob Koschel Link: https://lore.kernel.org/r/20220331220349.885126-1-jakobkoschel@gmail.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/drbd/drbd_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index a18155cdce41..ba10fa24fa1f 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -183,7 +183,7 @@ void tl_release(struct drbd_connection *connection, unsigned int barrier_nr, unsigned int set_size) { struct drbd_request *r; - struct drbd_request *req = NULL; + struct drbd_request *req = NULL, *tmp = NULL; int expect_epoch = 0; int expect_size = 0; @@ -237,8 +237,11 @@ void tl_release(struct drbd_connection *connection, unsigned int barrier_nr, * to catch requests being barrier-acked "unexpectedly". * It usually should find the same req again, or some READ preceding it. */ list_for_each_entry(req, &connection->transfer_log, tl_requests) - if (req->epoch == expect_epoch) + if (req->epoch == expect_epoch) { + tmp = req; break; + } + req = list_prepare_entry(tmp, &connection->transfer_log, tl_requests); list_for_each_entry_safe_from(req, r, &connection->transfer_log, tl_requests) { if (req->epoch != expect_epoch) break; From 54f7358be14dcfdb3f4210e88585ea7c3f95cdac Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Wed, 9 Feb 2022 13:11:30 +0800 Subject: [PATCH 15/69] platform/chrome: cros_ec_debugfs: detach log reader wq from devm [ Upstream commit 0e8eb5e8acbad19ac2e1856b2fb2320184299b33 ] Debugfs console_log uses devm memory (e.g. debug_info in cros_ec_console_log_poll()). However, lifecycles of device and debugfs are independent. An use-after-free issue is observed if userland program operates the debugfs after the memory has been freed. The call trace: do_raw_spin_lock _raw_spin_lock_irqsave remove_wait_queue ep_unregister_pollwait ep_remove do_epoll_ctl A Python example to reproduce the issue: ... import select ... p = select.epoll() ... f = open('/sys/kernel/debug/cros_scp/console_log') ... p.register(f, select.POLLIN) ... p.poll(1) [(4, 1)] # 4=fd, 1=select.POLLIN [ shutdown cros_scp at the point ] ... p.poll(1) [(4, 16)] # 4=fd, 16=select.POLLHUP ... p.unregister(f) An use-after-free issue raises here. It called epoll_ctl with EPOLL_CTL_DEL which in turn to use the workqueue in the devm (i.e. log_wq). Detaches log reader's workqueue from devm to make sure it is persistent even if the device has been removed. Signed-off-by: Tzung-Bi Shih Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20220209051130.386175-1-tzungbi@google.com Signed-off-by: Benson Leung Signed-off-by: Sasha Levin --- drivers/platform/chrome/cros_ec_debugfs.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c index 6ae484989d1f..c4b57e1df192 100644 --- a/drivers/platform/chrome/cros_ec_debugfs.c +++ b/drivers/platform/chrome/cros_ec_debugfs.c @@ -26,6 +26,9 @@ #define CIRC_ADD(idx, size, value) (((idx) + (value)) & ((size) - 1)) +/* waitqueue for log readers */ +static DECLARE_WAIT_QUEUE_HEAD(cros_ec_debugfs_log_wq); + /** * struct cros_ec_debugfs - EC debugging information. * @@ -34,7 +37,6 @@ * @log_buffer: circular buffer for console log information * @read_msg: preallocated EC command and buffer to read console log * @log_mutex: mutex to protect circular buffer - * @log_wq: waitqueue for log readers * @log_poll_work: recurring task to poll EC for new console log data * @panicinfo_blob: panicinfo debugfs blob */ @@ -45,7 +47,6 @@ struct cros_ec_debugfs { struct circ_buf log_buffer; struct cros_ec_command *read_msg; struct mutex log_mutex; - wait_queue_head_t log_wq; struct delayed_work log_poll_work; /* EC panicinfo */ struct debugfs_blob_wrapper panicinfo_blob; @@ -108,7 +109,7 @@ static void cros_ec_console_log_work(struct work_struct *__work) buf_space--; } - wake_up(&debug_info->log_wq); + wake_up(&cros_ec_debugfs_log_wq); } mutex_unlock(&debug_info->log_mutex); @@ -142,7 +143,7 @@ static ssize_t cros_ec_console_log_read(struct file *file, char __user *buf, mutex_unlock(&debug_info->log_mutex); - ret = wait_event_interruptible(debug_info->log_wq, + ret = wait_event_interruptible(cros_ec_debugfs_log_wq, CIRC_CNT(cb->head, cb->tail, LOG_SIZE)); if (ret < 0) return ret; @@ -174,7 +175,7 @@ static __poll_t cros_ec_console_log_poll(struct file *file, struct cros_ec_debugfs *debug_info = file->private_data; __poll_t mask = 0; - poll_wait(file, &debug_info->log_wq, wait); + poll_wait(file, &cros_ec_debugfs_log_wq, wait); mutex_lock(&debug_info->log_mutex); if (CIRC_CNT(debug_info->log_buffer.head, @@ -359,7 +360,6 @@ static int cros_ec_create_console_log(struct cros_ec_debugfs *debug_info) debug_info->log_buffer.tail = 0; mutex_init(&debug_info->log_mutex); - init_waitqueue_head(&debug_info->log_wq); debugfs_create_file("console_log", S_IFREG | 0444, debug_info->dir, debug_info, &cros_ec_console_log_fops); From 77b71a4c876701ecc0ed0bd1285785009badf610 Mon Sep 17 00:00:00 2001 From: linyujun Date: Fri, 1 Apr 2022 10:52:47 +0100 Subject: [PATCH 16/69] ARM: 9191/1: arm/stacktrace, kasan: Silence KASAN warnings in unwind_frame() [ Upstream commit 9be4c88bb7924f68f88cfd47d925c2d046f51a73 ] The following KASAN warning is detected by QEMU. ================================================================== BUG: KASAN: stack-out-of-bounds in unwind_frame+0x508/0x870 Read of size 4 at addr c36bba90 by task cat/163 CPU: 1 PID: 163 Comm: cat Not tainted 5.10.0-rc1 #40 Hardware name: ARM-Versatile Express [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x98/0xb0) [] (dump_stack) from [] (print_address_description.constprop.0+0x58/0x4bc) [] (print_address_description.constprop.0) from [] (kasan_report+0x154/0x170) [] (kasan_report) from [] (unwind_frame+0x508/0x870) [] (unwind_frame) from [] (__save_stack_trace+0x110/0x134) [] (__save_stack_trace) from [] (stack_trace_save+0x8c/0xb4) [] (stack_trace_save) from [] (kasan_set_track+0x38/0x60) [] (kasan_set_track) from [] (kasan_set_free_info+0x20/0x2c) [] (kasan_set_free_info) from [] (__kasan_slab_free+0xec/0x120) [] (__kasan_slab_free) from [] (kmem_cache_free+0x7c/0x334) [] (kmem_cache_free) from [] (rcu_core+0x390/0xccc) [] (rcu_core) from [] (__do_softirq+0x180/0x518) [] (__do_softirq) from [] (irq_exit+0x9c/0xe0) [] (irq_exit) from [] (__handle_domain_irq+0xb0/0x110) [] (__handle_domain_irq) from [] (gic_handle_irq+0xa0/0xb8) [] (gic_handle_irq) from [] (__irq_svc+0x6c/0x94) Exception stack(0xc36bb928 to 0xc36bb970) b920: c36bb9c0 00000000 c0126919 c0101228 c36bb9c0 b76d7730 b940: c36b8000 c36bb9a0 c3335b00 c01ce0d8 00000003 c36bba3c c36bb940 c36bb978 b960: c010e298 c011373c 60000013 ffffffff [] (__irq_svc) from [] (unwind_frame+0x0/0x870) [] (unwind_frame) from [<00000000>] (0x0) The buggy address belongs to the page: page:(ptrval) refcount:0 mapcount:0 mapping:00000000 index:0x0 pfn:0x636bb flags: 0x0() raw: 00000000 00000000 ef867764 00000000 00000000 00000000 ffffffff 00000000 page dumped because: kasan: bad access detected addr c36bba90 is located in stack of task cat/163 at offset 48 in frame: stack_trace_save+0x0/0xb4 this frame has 1 object: [32, 48) 'trace' Memory state around the buggy address: c36bb980: f1 f1 f1 f1 00 04 f2 f2 00 00 f3 f3 00 00 00 00 c36bba00: 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1 >c36bba80: 00 00 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00 ^ c36bbb00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c36bbb80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ================================================================== There is a same issue on x86 and has been resolved by the commit f7d27c35ddff ("x86/mm, kasan: Silence KASAN warnings in get_wchan()"). The solution could be applied to arm architecture too. Signed-off-by: Lin Yujun Reported-by: He Ying Signed-off-by: Russell King (Oracle) Signed-off-by: Sasha Levin --- arch/arm/kernel/stacktrace.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c index db798eac7431..824774999825 100644 --- a/arch/arm/kernel/stacktrace.c +++ b/arch/arm/kernel/stacktrace.c @@ -53,17 +53,17 @@ int notrace unwind_frame(struct stackframe *frame) return -EINVAL; frame->sp = frame->fp; - frame->fp = *(unsigned long *)(fp); - frame->pc = *(unsigned long *)(fp + 4); + frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); + frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 4)); #else /* check current frame pointer is within bounds */ if (fp < low + 12 || fp > high - 4) return -EINVAL; /* restore the registers from the stack frame */ - frame->fp = *(unsigned long *)(fp - 12); - frame->sp = *(unsigned long *)(fp - 8); - frame->pc = *(unsigned long *)(fp - 4); + frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp - 12)); + frame->sp = READ_ONCE_NOCHECK(*(unsigned long *)(fp - 8)); + frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp - 4)); #endif return 0; From 307d021b1a7f33048b624f7aaeaa75e3eae571f1 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 1 Apr 2022 11:28:18 -0700 Subject: [PATCH 17/69] nilfs2: fix lockdep warnings in page operations for btree nodes [ Upstream commit e897be17a441fa637cd166fc3de1445131e57692 ] Patch series "nilfs2 lockdep warning fixes". The first two are to resolve the lockdep warning issue, and the last one is the accompanying cleanup and low priority. Based on your comment, this series solves the issue by separating inode object as needed. Since I was worried about the impact of the object composition changes, I tested the series carefully not to cause regressions especially for delicate functions such like disk space reclamation and snapshots. This patch (of 3): If CONFIG_LOCKDEP is enabled, nilfs2 hits lockdep warnings at inode_to_wb() during page/folio operations for btree nodes: WARNING: CPU: 0 PID: 6575 at include/linux/backing-dev.h:269 inode_to_wb include/linux/backing-dev.h:269 [inline] WARNING: CPU: 0 PID: 6575 at include/linux/backing-dev.h:269 folio_account_dirtied mm/page-writeback.c:2460 [inline] WARNING: CPU: 0 PID: 6575 at include/linux/backing-dev.h:269 __folio_mark_dirty+0xa7c/0xe30 mm/page-writeback.c:2509 Modules linked in: ... RIP: 0010:inode_to_wb include/linux/backing-dev.h:269 [inline] RIP: 0010:folio_account_dirtied mm/page-writeback.c:2460 [inline] RIP: 0010:__folio_mark_dirty+0xa7c/0xe30 mm/page-writeback.c:2509 ... Call Trace: __set_page_dirty include/linux/pagemap.h:834 [inline] mark_buffer_dirty+0x4e6/0x650 fs/buffer.c:1145 nilfs_btree_propagate_p fs/nilfs2/btree.c:1889 [inline] nilfs_btree_propagate+0x4ae/0xea0 fs/nilfs2/btree.c:2085 nilfs_bmap_propagate+0x73/0x170 fs/nilfs2/bmap.c:337 nilfs_collect_dat_data+0x45/0xd0 fs/nilfs2/segment.c:625 nilfs_segctor_apply_buffers+0x14a/0x470 fs/nilfs2/segment.c:1009 nilfs_segctor_scan_file+0x47a/0x700 fs/nilfs2/segment.c:1048 nilfs_segctor_collect_blocks fs/nilfs2/segment.c:1224 [inline] nilfs_segctor_collect fs/nilfs2/segment.c:1494 [inline] nilfs_segctor_do_construct+0x14f3/0x6c60 fs/nilfs2/segment.c:2036 nilfs_segctor_construct+0x7a7/0xb30 fs/nilfs2/segment.c:2372 nilfs_segctor_thread_construct fs/nilfs2/segment.c:2480 [inline] nilfs_segctor_thread+0x3c3/0xf90 fs/nilfs2/segment.c:2563 kthread+0x405/0x4f0 kernel/kthread.c:327 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 This is because nilfs2 uses two page caches for each inode and inode->i_mapping never points to one of them, the btree node cache. This causes inode_to_wb(inode) to refer to a different page cache than the caller page/folio operations such like __folio_start_writeback(), __folio_end_writeback(), or __folio_mark_dirty() acquired the lock. This patch resolves the issue by allocating and using an additional inode to hold the page cache of btree nodes. The inode is attached one-to-one to the traditional nilfs2 inode if it requires a block mapping with b-tree. This setup change is in memory only and does not affect the disk format. Link: https://lkml.kernel.org/r/1647867427-30498-1-git-send-email-konishi.ryusuke@gmail.com Link: https://lkml.kernel.org/r/1647867427-30498-2-git-send-email-konishi.ryusuke@gmail.com Link: https://lore.kernel.org/r/YXrYvIo8YRnAOJCj@casper.infradead.org Link: https://lore.kernel.org/r/9a20b33d-b38f-b4a2-4742-c1eb5b8e4d6c@redhat.com Signed-off-by: Ryusuke Konishi Reported-by: syzbot+0d5b462a6f07447991b3@syzkaller.appspotmail.com Reported-by: syzbot+34ef28bb2aeb28724aa0@syzkaller.appspotmail.com Reported-by: Hao Sun Reported-by: David Hildenbrand Tested-by: Ryusuke Konishi Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/nilfs2/btnode.c | 23 ++++++++-- fs/nilfs2/btnode.h | 1 + fs/nilfs2/btree.c | 27 ++++++++---- fs/nilfs2/gcinode.c | 7 +-- fs/nilfs2/inode.c | 104 ++++++++++++++++++++++++++++++++++++++------ fs/nilfs2/mdt.c | 7 +-- fs/nilfs2/nilfs.h | 14 +++--- fs/nilfs2/page.c | 7 ++- fs/nilfs2/segment.c | 9 ++-- fs/nilfs2/super.c | 5 +-- 10 files changed, 154 insertions(+), 50 deletions(-) diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index 4391fd3abd8f..e00e184b1261 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -20,6 +20,23 @@ #include "page.h" #include "btnode.h" + +/** + * nilfs_init_btnc_inode - initialize B-tree node cache inode + * @btnc_inode: inode to be initialized + * + * nilfs_init_btnc_inode() sets up an inode for B-tree node cache. + */ +void nilfs_init_btnc_inode(struct inode *btnc_inode) +{ + struct nilfs_inode_info *ii = NILFS_I(btnc_inode); + + btnc_inode->i_mode = S_IFREG; + ii->i_flags = 0; + memset(&ii->i_bmap_data, 0, sizeof(struct nilfs_bmap)); + mapping_set_gfp_mask(btnc_inode->i_mapping, GFP_NOFS); +} + void nilfs_btnode_cache_clear(struct address_space *btnc) { invalidate_mapping_pages(btnc, 0, -1); @@ -29,7 +46,7 @@ void nilfs_btnode_cache_clear(struct address_space *btnc) struct buffer_head * nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) { - struct inode *inode = NILFS_BTNC_I(btnc); + struct inode *inode = btnc->host; struct buffer_head *bh; bh = nilfs_grab_buffer(inode, btnc, blocknr, BIT(BH_NILFS_Node)); @@ -57,7 +74,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, struct buffer_head **pbh, sector_t *submit_ptr) { struct buffer_head *bh; - struct inode *inode = NILFS_BTNC_I(btnc); + struct inode *inode = btnc->host; struct page *page; int err; @@ -157,7 +174,7 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc, struct nilfs_btnode_chkey_ctxt *ctxt) { struct buffer_head *obh, *nbh; - struct inode *inode = NILFS_BTNC_I(btnc); + struct inode *inode = btnc->host; __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey; int err; diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h index 0f88dbc9bcb3..05ab64d354dc 100644 --- a/fs/nilfs2/btnode.h +++ b/fs/nilfs2/btnode.h @@ -30,6 +30,7 @@ struct nilfs_btnode_chkey_ctxt { struct buffer_head *newbh; }; +void nilfs_init_btnc_inode(struct inode *btnc_inode); void nilfs_btnode_cache_clear(struct address_space *); struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr); diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 23e043eca237..919d1238ce45 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -58,7 +58,8 @@ static void nilfs_btree_free_path(struct nilfs_btree_path *path) static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree, __u64 ptr, struct buffer_head **bhp) { - struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache; + struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; + struct address_space *btnc = btnc_inode->i_mapping; struct buffer_head *bh; bh = nilfs_btnode_create_block(btnc, ptr); @@ -470,7 +471,8 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, struct buffer_head **bhp, const struct nilfs_btree_readahead_info *ra) { - struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache; + struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; + struct address_space *btnc = btnc_inode->i_mapping; struct buffer_head *bh, *ra_bh; sector_t submit_ptr = 0; int ret; @@ -1742,6 +1744,10 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key, dat = nilfs_bmap_get_dat(btree); } + ret = nilfs_attach_btree_node_cache(&NILFS_BMAP_I(btree)->vfs_inode); + if (ret < 0) + return ret; + ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat); if (ret < 0) return ret; @@ -1914,7 +1920,7 @@ static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree, path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr; path[level].bp_ctxt.bh = path[level].bp_bh; ret = nilfs_btnode_prepare_change_key( - &NILFS_BMAP_I(btree)->i_btnode_cache, + NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); if (ret < 0) { nilfs_dat_abort_update(dat, @@ -1940,7 +1946,7 @@ static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree, if (buffer_nilfs_node(path[level].bp_bh)) { nilfs_btnode_commit_change_key( - &NILFS_BMAP_I(btree)->i_btnode_cache, + NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); path[level].bp_bh = path[level].bp_ctxt.bh; } @@ -1959,7 +1965,7 @@ static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree, &path[level].bp_newreq.bpr_req); if (buffer_nilfs_node(path[level].bp_bh)) nilfs_btnode_abort_change_key( - &NILFS_BMAP_I(btree)->i_btnode_cache, + NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); } @@ -2135,7 +2141,8 @@ static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree, static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree, struct list_head *listp) { - struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache; + struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; + struct address_space *btcache = btnc_inode->i_mapping; struct list_head lists[NILFS_BTREE_LEVEL_MAX]; struct pagevec pvec; struct buffer_head *bh, *head; @@ -2189,12 +2196,12 @@ static int nilfs_btree_assign_p(struct nilfs_bmap *btree, path[level].bp_ctxt.newkey = blocknr; path[level].bp_ctxt.bh = *bh; ret = nilfs_btnode_prepare_change_key( - &NILFS_BMAP_I(btree)->i_btnode_cache, + NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); if (ret < 0) return ret; nilfs_btnode_commit_change_key( - &NILFS_BMAP_I(btree)->i_btnode_cache, + NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); *bh = path[level].bp_ctxt.bh; } @@ -2399,6 +2406,10 @@ int nilfs_btree_init(struct nilfs_bmap *bmap) if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode)) ret = -EIO; + else + ret = nilfs_attach_btree_node_cache( + &NILFS_BMAP_I(bmap)->vfs_inode); + return ret; } diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index aa3c328ee189..114774ac2185 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -126,9 +126,10 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn, __u64 vbn, struct buffer_head **out_bh) { + struct inode *btnc_inode = NILFS_I(inode)->i_assoc_inode; int ret; - ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache, + ret = nilfs_btnode_submit_block(btnc_inode->i_mapping, vbn ? : pbn, pbn, REQ_OP_READ, 0, out_bh, &pbn); if (ret == -EEXIST) /* internal code (cache hit) */ @@ -170,7 +171,7 @@ int nilfs_init_gcinode(struct inode *inode) ii->i_flags = 0; nilfs_bmap_init_gc(ii->i_bmap); - return 0; + return nilfs_attach_btree_node_cache(inode); } /** @@ -185,7 +186,7 @@ void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs) ii = list_first_entry(head, struct nilfs_inode_info, i_dirty); list_del_init(&ii->i_dirty); truncate_inode_pages(&ii->vfs_inode.i_data, 0); - nilfs_btnode_cache_clear(&ii->i_btnode_cache); + nilfs_btnode_cache_clear(ii->i_assoc_inode->i_mapping); iput(&ii->vfs_inode); } } diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 671085512e0f..b0a0822e371c 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -28,12 +28,14 @@ * @cno: checkpoint number * @root: pointer on NILFS root object (mounted checkpoint) * @for_gc: inode for GC flag + * @for_btnc: inode for B-tree node cache flag */ struct nilfs_iget_args { u64 ino; __u64 cno; struct nilfs_root *root; - int for_gc; + bool for_gc; + bool for_btnc; }; static int nilfs_iget_test(struct inode *inode, void *opaque); @@ -322,7 +324,8 @@ static int nilfs_insert_inode_locked(struct inode *inode, unsigned long ino) { struct nilfs_iget_args args = { - .ino = ino, .root = root, .cno = 0, .for_gc = 0 + .ino = ino, .root = root, .cno = 0, .for_gc = false, + .for_btnc = false }; return insert_inode_locked4(inode, ino, nilfs_iget_test, &args); @@ -534,6 +537,13 @@ static int nilfs_iget_test(struct inode *inode, void *opaque) return 0; ii = NILFS_I(inode); + if (test_bit(NILFS_I_BTNC, &ii->i_state)) { + if (!args->for_btnc) + return 0; + } else if (args->for_btnc) { + return 0; + } + if (!test_bit(NILFS_I_GCINODE, &ii->i_state)) return !args->for_gc; @@ -545,15 +555,15 @@ static int nilfs_iget_set(struct inode *inode, void *opaque) struct nilfs_iget_args *args = opaque; inode->i_ino = args->ino; - if (args->for_gc) { + NILFS_I(inode)->i_cno = args->cno; + NILFS_I(inode)->i_root = args->root; + if (args->root && args->ino == NILFS_ROOT_INO) + nilfs_get_root(args->root); + + if (args->for_gc) NILFS_I(inode)->i_state = BIT(NILFS_I_GCINODE); - NILFS_I(inode)->i_cno = args->cno; - NILFS_I(inode)->i_root = NULL; - } else { - if (args->root && args->ino == NILFS_ROOT_INO) - nilfs_get_root(args->root); - NILFS_I(inode)->i_root = args->root; - } + if (args->for_btnc) + NILFS_I(inode)->i_state |= BIT(NILFS_I_BTNC); return 0; } @@ -561,7 +571,8 @@ struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, unsigned long ino) { struct nilfs_iget_args args = { - .ino = ino, .root = root, .cno = 0, .for_gc = 0 + .ino = ino, .root = root, .cno = 0, .for_gc = false, + .for_btnc = false }; return ilookup5(sb, ino, nilfs_iget_test, &args); @@ -571,7 +582,8 @@ struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, unsigned long ino) { struct nilfs_iget_args args = { - .ino = ino, .root = root, .cno = 0, .for_gc = 0 + .ino = ino, .root = root, .cno = 0, .for_gc = false, + .for_btnc = false }; return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); @@ -602,7 +614,8 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, __u64 cno) { struct nilfs_iget_args args = { - .ino = ino, .root = NULL, .cno = cno, .for_gc = 1 + .ino = ino, .root = NULL, .cno = cno, .for_gc = true, + .for_btnc = false }; struct inode *inode; int err; @@ -622,6 +635,68 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, return inode; } +/** + * nilfs_attach_btree_node_cache - attach a B-tree node cache to the inode + * @inode: inode object + * + * nilfs_attach_btree_node_cache() attaches a B-tree node cache to @inode, + * or does nothing if the inode already has it. This function allocates + * an additional inode to maintain page cache of B-tree nodes one-on-one. + * + * Return Value: On success, 0 is returned. On errors, one of the following + * negative error code is returned. + * + * %-ENOMEM - Insufficient memory available. + */ +int nilfs_attach_btree_node_cache(struct inode *inode) +{ + struct nilfs_inode_info *ii = NILFS_I(inode); + struct inode *btnc_inode; + struct nilfs_iget_args args; + + if (ii->i_assoc_inode) + return 0; + + args.ino = inode->i_ino; + args.root = ii->i_root; + args.cno = ii->i_cno; + args.for_gc = test_bit(NILFS_I_GCINODE, &ii->i_state) != 0; + args.for_btnc = true; + + btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, + nilfs_iget_set, &args); + if (unlikely(!btnc_inode)) + return -ENOMEM; + if (btnc_inode->i_state & I_NEW) { + nilfs_init_btnc_inode(btnc_inode); + unlock_new_inode(btnc_inode); + } + NILFS_I(btnc_inode)->i_assoc_inode = inode; + NILFS_I(btnc_inode)->i_bmap = ii->i_bmap; + ii->i_assoc_inode = btnc_inode; + + return 0; +} + +/** + * nilfs_detach_btree_node_cache - detach the B-tree node cache from the inode + * @inode: inode object + * + * nilfs_detach_btree_node_cache() detaches the B-tree node cache and its + * holder inode bound to @inode, or does nothing if @inode doesn't have it. + */ +void nilfs_detach_btree_node_cache(struct inode *inode) +{ + struct nilfs_inode_info *ii = NILFS_I(inode); + struct inode *btnc_inode = ii->i_assoc_inode; + + if (btnc_inode) { + NILFS_I(btnc_inode)->i_assoc_inode = NULL; + ii->i_assoc_inode = NULL; + iput(btnc_inode); + } +} + void nilfs_write_inode_common(struct inode *inode, struct nilfs_inode *raw_inode, int has_bmap) { @@ -770,7 +845,8 @@ static void nilfs_clear_inode(struct inode *inode) if (test_bit(NILFS_I_BMAP, &ii->i_state)) nilfs_bmap_clear(ii->i_bmap); - nilfs_btnode_cache_clear(&ii->i_btnode_cache); + if (!test_bit(NILFS_I_BTNC, &ii->i_state)) + nilfs_detach_btree_node_cache(inode); if (ii->i_root && inode->i_ino == NILFS_ROOT_INO) nilfs_put_root(ii->i_root); diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 700870a92bc4..3a1200220b97 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -531,7 +531,7 @@ int nilfs_mdt_save_to_shadow_map(struct inode *inode) goto out; ret = nilfs_copy_dirty_pages(&shadow->frozen_btnodes, - &ii->i_btnode_cache); + ii->i_assoc_inode->i_mapping); if (ret) goto out; @@ -622,8 +622,9 @@ void nilfs_mdt_restore_from_shadow_map(struct inode *inode) nilfs_clear_dirty_pages(inode->i_mapping, true); nilfs_copy_back_pages(inode->i_mapping, &shadow->frozen_data); - nilfs_clear_dirty_pages(&ii->i_btnode_cache, true); - nilfs_copy_back_pages(&ii->i_btnode_cache, &shadow->frozen_btnodes); + nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping, true); + nilfs_copy_back_pages(ii->i_assoc_inode->i_mapping, + &shadow->frozen_btnodes); nilfs_bmap_restore(ii->i_bmap, &shadow->bmap_store); diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 42395ba52da6..3b646d377237 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -28,7 +28,7 @@ * @i_xattr: * @i_dir_start_lookup: page index of last successful search * @i_cno: checkpoint number for GC inode - * @i_btnode_cache: cached pages of b-tree nodes + * @i_assoc_inode: associated inode (B-tree node cache holder or back pointer) * @i_dirty: list for connecting dirty files * @xattr_sem: semaphore for extended attributes processing * @i_bh: buffer contains disk inode @@ -43,7 +43,7 @@ struct nilfs_inode_info { __u64 i_xattr; /* sector_t ??? */ __u32 i_dir_start_lookup; __u64 i_cno; /* check point number for GC inode */ - struct address_space i_btnode_cache; + struct inode *i_assoc_inode; struct list_head i_dirty; /* List for connecting dirty files */ #ifdef CONFIG_NILFS_XATTR @@ -75,13 +75,6 @@ NILFS_BMAP_I(const struct nilfs_bmap *bmap) return container_of(bmap, struct nilfs_inode_info, i_bmap_data); } -static inline struct inode *NILFS_BTNC_I(struct address_space *btnc) -{ - struct nilfs_inode_info *ii = - container_of(btnc, struct nilfs_inode_info, i_btnode_cache); - return &ii->vfs_inode; -} - /* * Dynamic state flags of NILFS on-memory inode (i_state) */ @@ -98,6 +91,7 @@ enum { NILFS_I_INODE_SYNC, /* dsync is not allowed for inode */ NILFS_I_BMAP, /* has bmap and btnode_cache */ NILFS_I_GCINODE, /* inode for GC, on memory only */ + NILFS_I_BTNC, /* inode for btree node cache */ }; /* @@ -264,6 +258,8 @@ struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root, unsigned long ino); extern struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, __u64 cno); +int nilfs_attach_btree_node_cache(struct inode *inode); +void nilfs_detach_btree_node_cache(struct inode *inode); extern void nilfs_update_inode(struct inode *, struct buffer_head *, int); extern void nilfs_truncate(struct inode *); extern void nilfs_evict_inode(struct inode *); diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index d7fc8d369d89..98d21ad9a073 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -450,10 +450,9 @@ void nilfs_mapping_init(struct address_space *mapping, struct inode *inode) /* * NILFS2 needs clear_page_dirty() in the following two cases: * - * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears - * page dirty flags when it copies back pages from the shadow cache - * (gcdat->{i_mapping,i_btnode_cache}) to its original cache - * (dat->{i_mapping,i_btnode_cache}). + * 1) For B-tree node pages and data pages of DAT file, NILFS2 clears dirty + * flag of pages when it copies back pages from shadow cache to the + * original cache. * * 2) Some B-tree operations like insertion or deletion may dispose buffers * in dirty state, and this needs to cancel the dirty state of their pages. diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 91b58c897f92..eb3ac7619088 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -738,15 +738,18 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode, struct list_head *listp) { struct nilfs_inode_info *ii = NILFS_I(inode); - struct address_space *mapping = &ii->i_btnode_cache; + struct inode *btnc_inode = ii->i_assoc_inode; struct pagevec pvec; struct buffer_head *bh, *head; unsigned int i; pgoff_t index = 0; + if (!btnc_inode) + return; + pagevec_init(&pvec); - while (pagevec_lookup_tag(&pvec, mapping, &index, + while (pagevec_lookup_tag(&pvec, btnc_inode->i_mapping, &index, PAGECACHE_TAG_DIRTY)) { for (i = 0; i < pagevec_count(&pvec); i++) { bh = head = page_buffers(pvec.pages[i]); @@ -2410,7 +2413,7 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head) continue; list_del_init(&ii->i_dirty); truncate_inode_pages(&ii->vfs_inode.i_data, 0); - nilfs_btnode_cache_clear(&ii->i_btnode_cache); + nilfs_btnode_cache_clear(ii->i_assoc_inode->i_mapping); iput(&ii->vfs_inode); } } diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 5729ee86da9a..b5bc9f0c6a40 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -151,7 +151,8 @@ struct inode *nilfs_alloc_inode(struct super_block *sb) ii->i_bh = NULL; ii->i_state = 0; ii->i_cno = 0; - nilfs_mapping_init(&ii->i_btnode_cache, &ii->vfs_inode); + ii->i_assoc_inode = NULL; + ii->i_bmap = &ii->i_bmap_data; return &ii->vfs_inode; } @@ -1375,8 +1376,6 @@ static void nilfs_inode_init_once(void *obj) #ifdef CONFIG_NILFS_XATTR init_rwsem(&ii->xattr_sem); #endif - address_space_init_once(&ii->i_btnode_cache); - ii->i_bmap = &ii->i_bmap_data; inode_init_once(&ii->vfs_inode); } From 1e93f939927def64a6d091f9cd24681322b53b37 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 1 Apr 2022 11:28:21 -0700 Subject: [PATCH 18/69] nilfs2: fix lockdep warnings during disk space reclamation [ Upstream commit 6e211930f79aa45d422009a5f2e5467d2369ffe5 ] During disk space reclamation, nilfs2 still emits the following lockdep warning due to page/folio operations on shadowed page caches that nilfs2 uses to get a snapshot of DAT file in memory: WARNING: CPU: 0 PID: 2643 at include/linux/backing-dev.h:272 __folio_mark_dirty+0x645/0x670 ... RIP: 0010:__folio_mark_dirty+0x645/0x670 ... Call Trace: filemap_dirty_folio+0x74/0xd0 __set_page_dirty_nobuffers+0x85/0xb0 nilfs_copy_dirty_pages+0x288/0x510 [nilfs2] nilfs_mdt_save_to_shadow_map+0x50/0xe0 [nilfs2] nilfs_clean_segments+0xee/0x5d0 [nilfs2] nilfs_ioctl_clean_segments.isra.19+0xb08/0xf40 [nilfs2] nilfs_ioctl+0xc52/0xfb0 [nilfs2] __x64_sys_ioctl+0x11d/0x170 This fixes the remaining warning by using inode objects to hold those page caches. Link: https://lkml.kernel.org/r/1647867427-30498-3-git-send-email-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Tested-by: Ryusuke Konishi Cc: Matthew Wilcox Cc: David Hildenbrand Cc: Hao Sun Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/nilfs2/dat.c | 4 ++- fs/nilfs2/inode.c | 63 ++++++++++++++++++++++++++++++++++++++++++++--- fs/nilfs2/mdt.c | 38 +++++++++++++++++++--------- fs/nilfs2/mdt.h | 6 ++--- fs/nilfs2/nilfs.h | 2 ++ 5 files changed, 92 insertions(+), 21 deletions(-) diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 6f4066636be9..a3523a243e11 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -497,7 +497,9 @@ int nilfs_dat_read(struct super_block *sb, size_t entry_size, di = NILFS_DAT_I(dat); lockdep_set_class(&di->mi.mi_sem, &dat_lock_key); nilfs_palloc_setup_cache(dat, &di->palloc_cache); - nilfs_mdt_setup_shadow_map(dat, &di->shadow); + err = nilfs_mdt_setup_shadow_map(dat, &di->shadow); + if (err) + goto failed; err = nilfs_read_inode_common(dat, raw_inode); if (err) diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index b0a0822e371c..35b0bfe9324f 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -29,6 +29,7 @@ * @root: pointer on NILFS root object (mounted checkpoint) * @for_gc: inode for GC flag * @for_btnc: inode for B-tree node cache flag + * @for_shadow: inode for shadowed page cache flag */ struct nilfs_iget_args { u64 ino; @@ -36,6 +37,7 @@ struct nilfs_iget_args { struct nilfs_root *root; bool for_gc; bool for_btnc; + bool for_shadow; }; static int nilfs_iget_test(struct inode *inode, void *opaque); @@ -325,7 +327,7 @@ static int nilfs_insert_inode_locked(struct inode *inode, { struct nilfs_iget_args args = { .ino = ino, .root = root, .cno = 0, .for_gc = false, - .for_btnc = false + .for_btnc = false, .for_shadow = false }; return insert_inode_locked4(inode, ino, nilfs_iget_test, &args); @@ -543,6 +545,12 @@ static int nilfs_iget_test(struct inode *inode, void *opaque) } else if (args->for_btnc) { return 0; } + if (test_bit(NILFS_I_SHADOW, &ii->i_state)) { + if (!args->for_shadow) + return 0; + } else if (args->for_shadow) { + return 0; + } if (!test_bit(NILFS_I_GCINODE, &ii->i_state)) return !args->for_gc; @@ -564,6 +572,8 @@ static int nilfs_iget_set(struct inode *inode, void *opaque) NILFS_I(inode)->i_state = BIT(NILFS_I_GCINODE); if (args->for_btnc) NILFS_I(inode)->i_state |= BIT(NILFS_I_BTNC); + if (args->for_shadow) + NILFS_I(inode)->i_state |= BIT(NILFS_I_SHADOW); return 0; } @@ -572,7 +582,7 @@ struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, { struct nilfs_iget_args args = { .ino = ino, .root = root, .cno = 0, .for_gc = false, - .for_btnc = false + .for_btnc = false, .for_shadow = false }; return ilookup5(sb, ino, nilfs_iget_test, &args); @@ -583,7 +593,7 @@ struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, { struct nilfs_iget_args args = { .ino = ino, .root = root, .cno = 0, .for_gc = false, - .for_btnc = false + .for_btnc = false, .for_shadow = false }; return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); @@ -615,7 +625,7 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, { struct nilfs_iget_args args = { .ino = ino, .root = NULL, .cno = cno, .for_gc = true, - .for_btnc = false + .for_btnc = false, .for_shadow = false }; struct inode *inode; int err; @@ -662,6 +672,7 @@ int nilfs_attach_btree_node_cache(struct inode *inode) args.cno = ii->i_cno; args.for_gc = test_bit(NILFS_I_GCINODE, &ii->i_state) != 0; args.for_btnc = true; + args.for_shadow = test_bit(NILFS_I_SHADOW, &ii->i_state) != 0; btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, nilfs_iget_set, &args); @@ -697,6 +708,50 @@ void nilfs_detach_btree_node_cache(struct inode *inode) } } +/** + * nilfs_iget_for_shadow - obtain inode for shadow mapping + * @inode: inode object that uses shadow mapping + * + * nilfs_iget_for_shadow() allocates a pair of inodes that holds page + * caches for shadow mapping. The page cache for data pages is set up + * in one inode and the one for b-tree node pages is set up in the + * other inode, which is attached to the former inode. + * + * Return Value: On success, a pointer to the inode for data pages is + * returned. On errors, one of the following negative error code is returned + * in a pointer type. + * + * %-ENOMEM - Insufficient memory available. + */ +struct inode *nilfs_iget_for_shadow(struct inode *inode) +{ + struct nilfs_iget_args args = { + .ino = inode->i_ino, .root = NULL, .cno = 0, .for_gc = false, + .for_btnc = false, .for_shadow = true + }; + struct inode *s_inode; + int err; + + s_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, + nilfs_iget_set, &args); + if (unlikely(!s_inode)) + return ERR_PTR(-ENOMEM); + if (!(s_inode->i_state & I_NEW)) + return inode; + + NILFS_I(s_inode)->i_flags = 0; + memset(NILFS_I(s_inode)->i_bmap, 0, sizeof(struct nilfs_bmap)); + mapping_set_gfp_mask(s_inode->i_mapping, GFP_NOFS); + + err = nilfs_attach_btree_node_cache(s_inode); + if (unlikely(err)) { + iget_failed(s_inode); + return ERR_PTR(err); + } + unlock_new_inode(s_inode); + return s_inode; +} + void nilfs_write_inode_common(struct inode *inode, struct nilfs_inode *raw_inode, int has_bmap) { diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 3a1200220b97..7c9055d767d1 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -469,9 +469,18 @@ int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz) void nilfs_mdt_clear(struct inode *inode) { struct nilfs_mdt_info *mdi = NILFS_MDT(inode); + struct nilfs_shadow_map *shadow = mdi->mi_shadow; if (mdi->mi_palloc_cache) nilfs_palloc_destroy_cache(inode); + + if (shadow) { + struct inode *s_inode = shadow->inode; + + shadow->inode = NULL; + iput(s_inode); + mdi->mi_shadow = NULL; + } } /** @@ -505,12 +514,15 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode, struct nilfs_shadow_map *shadow) { struct nilfs_mdt_info *mi = NILFS_MDT(inode); + struct inode *s_inode; INIT_LIST_HEAD(&shadow->frozen_buffers); - address_space_init_once(&shadow->frozen_data); - nilfs_mapping_init(&shadow->frozen_data, inode); - address_space_init_once(&shadow->frozen_btnodes); - nilfs_mapping_init(&shadow->frozen_btnodes, inode); + + s_inode = nilfs_iget_for_shadow(inode); + if (IS_ERR(s_inode)) + return PTR_ERR(s_inode); + + shadow->inode = s_inode; mi->mi_shadow = shadow; return 0; } @@ -524,13 +536,14 @@ int nilfs_mdt_save_to_shadow_map(struct inode *inode) struct nilfs_mdt_info *mi = NILFS_MDT(inode); struct nilfs_inode_info *ii = NILFS_I(inode); struct nilfs_shadow_map *shadow = mi->mi_shadow; + struct inode *s_inode = shadow->inode; int ret; - ret = nilfs_copy_dirty_pages(&shadow->frozen_data, inode->i_mapping); + ret = nilfs_copy_dirty_pages(s_inode->i_mapping, inode->i_mapping); if (ret) goto out; - ret = nilfs_copy_dirty_pages(&shadow->frozen_btnodes, + ret = nilfs_copy_dirty_pages(NILFS_I(s_inode)->i_assoc_inode->i_mapping, ii->i_assoc_inode->i_mapping); if (ret) goto out; @@ -547,7 +560,7 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh) struct page *page; int blkbits = inode->i_blkbits; - page = grab_cache_page(&shadow->frozen_data, bh->b_page->index); + page = grab_cache_page(shadow->inode->i_mapping, bh->b_page->index); if (!page) return -ENOMEM; @@ -579,7 +592,7 @@ nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh) struct page *page; int n; - page = find_lock_page(&shadow->frozen_data, bh->b_page->index); + page = find_lock_page(shadow->inode->i_mapping, bh->b_page->index); if (page) { if (page_has_buffers(page)) { n = bh_offset(bh) >> inode->i_blkbits; @@ -620,11 +633,11 @@ void nilfs_mdt_restore_from_shadow_map(struct inode *inode) nilfs_palloc_clear_cache(inode); nilfs_clear_dirty_pages(inode->i_mapping, true); - nilfs_copy_back_pages(inode->i_mapping, &shadow->frozen_data); + nilfs_copy_back_pages(inode->i_mapping, shadow->inode->i_mapping); nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping, true); nilfs_copy_back_pages(ii->i_assoc_inode->i_mapping, - &shadow->frozen_btnodes); + NILFS_I(shadow->inode)->i_assoc_inode->i_mapping); nilfs_bmap_restore(ii->i_bmap, &shadow->bmap_store); @@ -639,10 +652,11 @@ void nilfs_mdt_clear_shadow_map(struct inode *inode) { struct nilfs_mdt_info *mi = NILFS_MDT(inode); struct nilfs_shadow_map *shadow = mi->mi_shadow; + struct inode *shadow_btnc_inode = NILFS_I(shadow->inode)->i_assoc_inode; down_write(&mi->mi_sem); nilfs_release_frozen_buffers(shadow); - truncate_inode_pages(&shadow->frozen_data, 0); - truncate_inode_pages(&shadow->frozen_btnodes, 0); + truncate_inode_pages(shadow->inode->i_mapping, 0); + truncate_inode_pages(shadow_btnc_inode->i_mapping, 0); up_write(&mi->mi_sem); } diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h index e77aea4bb921..9d8ac0d27c16 100644 --- a/fs/nilfs2/mdt.h +++ b/fs/nilfs2/mdt.h @@ -18,14 +18,12 @@ /** * struct nilfs_shadow_map - shadow mapping of meta data file * @bmap_store: shadow copy of bmap state - * @frozen_data: shadowed dirty data pages - * @frozen_btnodes: shadowed dirty b-tree nodes' pages + * @inode: holder of page caches used in shadow mapping * @frozen_buffers: list of frozen buffers */ struct nilfs_shadow_map { struct nilfs_bmap_store bmap_store; - struct address_space frozen_data; - struct address_space frozen_btnodes; + struct inode *inode; struct list_head frozen_buffers; }; diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 3b646d377237..4895d978369a 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -92,6 +92,7 @@ enum { NILFS_I_BMAP, /* has bmap and btnode_cache */ NILFS_I_GCINODE, /* inode for GC, on memory only */ NILFS_I_BTNC, /* inode for btree node cache */ + NILFS_I_SHADOW, /* inode for shadowed page cache */ }; /* @@ -260,6 +261,7 @@ extern struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, __u64 cno); int nilfs_attach_btree_node_cache(struct inode *inode); void nilfs_detach_btree_node_cache(struct inode *inode); +struct inode *nilfs_iget_for_shadow(struct inode *inode); extern void nilfs_update_inode(struct inode *, struct buffer_head *, int); extern void nilfs_truncate(struct inode *); extern void nilfs_evict_inode(struct inode *); From f10260f359925f010745c6b7d2c38f9d694b0803 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 17 May 2022 11:09:09 -0700 Subject: [PATCH 19/69] mmc: core: Specify timeouts for BKOPS and CACHE_FLUSH for eMMC commit 24ed3bd01d6a844fd5e8a75f48d0a3d10ed71bf9 upstream The timeout values used while waiting for a CMD6 for BKOPS or a CACHE_FLUSH to complete, are not defined by the eMMC spec. However, a timeout of 10 minutes as is currently being used, is just silly for both of these cases. Instead, let's specify more reasonable timeouts, 120s for BKOPS and 30s for CACHE_FLUSH. Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20200122142747.5690-2-ulf.hansson@linaro.org Signed-off-by: Florian Fainelli Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/mmc_ops.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c index 09311c2bd858..d8f419183144 100644 --- a/drivers/mmc/core/mmc_ops.c +++ b/drivers/mmc/core/mmc_ops.c @@ -19,7 +19,9 @@ #include "host.h" #include "mmc_ops.h" -#define MMC_OPS_TIMEOUT_MS (10 * 60 * 1000) /* 10 minute timeout */ +#define MMC_OPS_TIMEOUT_MS (10 * 60 * 1000) /* 10min*/ +#define MMC_BKOPS_TIMEOUT_MS (120 * 1000) /* 120s */ +#define MMC_CACHE_FLUSH_TIMEOUT_MS (30 * 1000) /* 30s */ static const u8 tuning_blk_pattern_4bit[] = { 0xff, 0x0f, 0xff, 0x00, 0xff, 0xcc, 0xc3, 0xcc, @@ -943,7 +945,7 @@ void mmc_run_bkops(struct mmc_card *card) * urgent levels by using an asynchronous background task, when idle. */ err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, - EXT_CSD_BKOPS_START, 1, MMC_OPS_TIMEOUT_MS); + EXT_CSD_BKOPS_START, 1, MMC_BKOPS_TIMEOUT_MS); if (err) pr_warn("%s: Error %d starting bkops\n", mmc_hostname(card->host), err); @@ -961,7 +963,8 @@ int mmc_flush_cache(struct mmc_card *card) if (mmc_cache_enabled(card->host)) { err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, - EXT_CSD_FLUSH_CACHE, 1, 0); + EXT_CSD_FLUSH_CACHE, 1, + MMC_CACHE_FLUSH_TIMEOUT_MS); if (err) pr_err("%s: cache flush error %d\n", mmc_hostname(card->host), err); From def047ae1266c02cac2ca7b01a6c5bf31dc2aa13 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 17 May 2022 11:09:10 -0700 Subject: [PATCH 20/69] mmc: block: Use generic_cmd6_time when modifying INAND_CMD38_ARG_EXT_CSD commit ad91619aa9d78ab1c6d4a969c3db68bc331ae76c upstream The INAND_CMD38_ARG_EXT_CSD is a vendor specific EXT_CSD register, which is used to prepare an erase/trim operation. However, it doesn't make sense to use a timeout of 10 minutes while updating the register, which becomes the case when the timeout_ms argument for mmc_switch() is set to zero. Instead, let's use the generic_cmd6_time, as that seems like a reasonable timeout to use for these cases. Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20200122142747.5690-3-ulf.hansson@linaro.org Signed-off-by: Florian Fainelli Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/block.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 362ad361d586..709f117fd577 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -1126,7 +1126,7 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req) card->erase_arg == MMC_TRIM_ARG ? INAND_CMD38_ARG_TRIM : INAND_CMD38_ARG_ERASE, - 0); + card->ext_csd.generic_cmd6_time); } if (!err) err = mmc_erase(card, from, nr, card->erase_arg); @@ -1168,7 +1168,7 @@ retry: arg == MMC_SECURE_TRIM1_ARG ? INAND_CMD38_ARG_SECTRIM1 : INAND_CMD38_ARG_SECERASE, - 0); + card->ext_csd.generic_cmd6_time); if (err) goto out_retry; } @@ -1186,7 +1186,7 @@ retry: err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, INAND_CMD38_ARG_EXT_CSD, INAND_CMD38_ARG_SECTRIM2, - 0); + card->ext_csd.generic_cmd6_time); if (err) goto out_retry; } From f3fe8d13ac899ce21df911017a7b13379de08a13 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 17 May 2022 11:09:11 -0700 Subject: [PATCH 21/69] mmc: core: Default to generic_cmd6_time as timeout in __mmc_switch() commit 533a6cfe08f96a7b5c65e06d20916d552c11b256 upstream All callers of __mmc_switch() should now be specifying a valid timeout for the CMD6 command. However, just to be sure, let's print a warning and default to use the generic_cmd6_time in case the provided timeout_ms argument is zero. In this context, let's also simplify some of the corresponding code and clarify some related comments. Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20200122142747.5690-4-ulf.hansson@linaro.org Signed-off-by: Florian Fainelli Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/mmc_ops.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c index d8f419183144..d495ba2f368c 100644 --- a/drivers/mmc/core/mmc_ops.c +++ b/drivers/mmc/core/mmc_ops.c @@ -460,10 +460,6 @@ static int mmc_poll_for_busy(struct mmc_card *card, unsigned int timeout_ms, bool expired = false; bool busy = false; - /* We have an unspecified cmd timeout, use the fallback value. */ - if (!timeout_ms) - timeout_ms = MMC_OPS_TIMEOUT_MS; - /* * In cases when not allowed to poll by using CMD13 or because we aren't * capable of polling by using ->card_busy(), then rely on waiting the @@ -536,6 +532,12 @@ int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value, mmc_retune_hold(host); + if (!timeout_ms) { + pr_warn("%s: unspecified timeout for CMD6 - use generic\n", + mmc_hostname(host)); + timeout_ms = card->ext_csd.generic_cmd6_time; + } + /* * If the cmd timeout and the max_busy_timeout of the host are both * specified, let's validate them. A failure means we need to prevent @@ -544,7 +546,7 @@ int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value, * which also means they are on their own when it comes to deal with the * busy timeout. */ - if (!(host->caps & MMC_CAP_NEED_RSP_BUSY) && timeout_ms && + if (!(host->caps & MMC_CAP_NEED_RSP_BUSY) && host->max_busy_timeout && (timeout_ms > host->max_busy_timeout)) use_r1b_resp = false; @@ -556,10 +558,6 @@ int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value, cmd.flags = MMC_CMD_AC; if (use_r1b_resp) { cmd.flags |= MMC_RSP_SPI_R1B | MMC_RSP_R1B; - /* - * A busy_timeout of zero means the host can decide to use - * whatever value it finds suitable. - */ cmd.busy_timeout = timeout_ms; } else { cmd.flags |= MMC_RSP_SPI_R1 | MMC_RSP_R1; From 2d6f096476e66a60aad3004f5d8aefff0b5b59df Mon Sep 17 00:00:00 2001 From: Meena Shanmugam Date: Wed, 18 May 2022 18:40:08 +0000 Subject: [PATCH 22/69] SUNRPC: Clean up scheduling of autoclose From: Trond Myklebust commit e26d9972720e2484f44cdd94ca4e31cc372ed2ed upstream. Consolidate duplicated code in xprt_force_disconnect() and xprt_conditional_disconnect(). Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker Signed-off-by: Meena Shanmugam Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprt.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 8ac579778e48..a7dedc12c982 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -716,6 +716,20 @@ void xprt_disconnect_done(struct rpc_xprt *xprt) } EXPORT_SYMBOL_GPL(xprt_disconnect_done); +/** + * xprt_schedule_autoclose_locked - Try to schedule an autoclose RPC call + * @xprt: transport to disconnect + */ +static void xprt_schedule_autoclose_locked(struct rpc_xprt *xprt) +{ + set_bit(XPRT_CLOSE_WAIT, &xprt->state); + if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) + queue_work(xprtiod_workqueue, &xprt->task_cleanup); + else if (xprt->snd_task && !test_bit(XPRT_SND_IS_COOKIE, &xprt->state)) + rpc_wake_up_queued_task_set_status(&xprt->pending, + xprt->snd_task, -ENOTCONN); +} + /** * xprt_force_disconnect - force a transport to disconnect * @xprt: transport to disconnect @@ -725,13 +739,7 @@ void xprt_force_disconnect(struct rpc_xprt *xprt) { /* Don't race with the test_bit() in xprt_clear_locked() */ spin_lock(&xprt->transport_lock); - set_bit(XPRT_CLOSE_WAIT, &xprt->state); - /* Try to schedule an autoclose RPC call */ - if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) - queue_work(xprtiod_workqueue, &xprt->task_cleanup); - else if (xprt->snd_task && !test_bit(XPRT_SND_IS_COOKIE, &xprt->state)) - rpc_wake_up_queued_task_set_status(&xprt->pending, - xprt->snd_task, -ENOTCONN); + xprt_schedule_autoclose_locked(xprt); spin_unlock(&xprt->transport_lock); } EXPORT_SYMBOL_GPL(xprt_force_disconnect); @@ -771,11 +779,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie) goto out; if (test_bit(XPRT_CLOSING, &xprt->state)) goto out; - set_bit(XPRT_CLOSE_WAIT, &xprt->state); - /* Try to schedule an autoclose RPC call */ - if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) - queue_work(xprtiod_workqueue, &xprt->task_cleanup); - xprt_wake_pending_tasks(xprt, -EAGAIN); + xprt_schedule_autoclose_locked(xprt); out: spin_unlock(&xprt->transport_lock); } From aa4d71edd60941bc05d2a0efe1e98a67c5dd59b7 Mon Sep 17 00:00:00 2001 From: Meena Shanmugam Date: Wed, 18 May 2022 18:40:09 +0000 Subject: [PATCH 23/69] SUNRPC: Prevent immediate close+reconnect From: Trond Myklebust commit 3be232f11a3cc9b0ef0795e39fa11bdb8e422a06 upstream. If we have already set up the socket and are waiting for it to connect, then don't immediately close and retry. Signed-off-by: Trond Myklebust Signed-off-by: Meena Shanmugam Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprt.c | 3 ++- net/sunrpc/xprtsock.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index a7dedc12c982..68d08dcba018 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -722,7 +722,8 @@ EXPORT_SYMBOL_GPL(xprt_disconnect_done); */ static void xprt_schedule_autoclose_locked(struct rpc_xprt *xprt) { - set_bit(XPRT_CLOSE_WAIT, &xprt->state); + if (test_and_set_bit(XPRT_CLOSE_WAIT, &xprt->state)) + return; if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) queue_work(xprtiod_workqueue, &xprt->task_cleanup); else if (xprt->snd_task && !test_bit(XPRT_SND_IS_COOKIE, &xprt->state)) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 43bc02dea80c..49ba817f4fb6 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2469,7 +2469,7 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task) WARN_ON_ONCE(!xprt_lock_connect(xprt, task, transport)); - if (transport->sock != NULL) { + if (transport->sock != NULL && !xprt_connecting(xprt)) { dprintk("RPC: xs_connect delayed xprt %p for %lu " "seconds\n", xprt, xprt->reestablish_timeout / HZ); From 975a0f14d5cd29fe4b33190b5afcfa9f244b769d Mon Sep 17 00:00:00 2001 From: Meena Shanmugam Date: Wed, 18 May 2022 18:40:10 +0000 Subject: [PATCH 24/69] SUNRPC: Don't call connect() more than once on a TCP socket From: Trond Myklebust commit 89f42494f92f448747bd8a7ab1ae8b5d5520577d upstream. Avoid socket state races due to repeated calls to ->connect() using the same socket. If connect() returns 0 due to the connection having completed, but we are in fact in a closing state, then we may leave the XPRT_CONNECTING flag set on the transport. Reported-by: Enrico Scholz Fixes: 3be232f11a3c ("SUNRPC: Prevent immediate close+reconnect") Signed-off-by: Trond Myklebust [meenashanmugam: Fix merge conflict in xs_tcp_setup_socket] Signed-off-by: Meena Shanmugam Signed-off-by: Greg Kroah-Hartman --- include/linux/sunrpc/xprtsock.h | 1 + net/sunrpc/xprtsock.c | 22 ++++++++++++---------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index a940de03808d..46deca97e806 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -90,6 +90,7 @@ struct sock_xprt { #define XPRT_SOCK_WAKE_WRITE (5) #define XPRT_SOCK_WAKE_PENDING (6) #define XPRT_SOCK_WAKE_DISCONNECT (7) +#define XPRT_SOCK_CONNECT_SENT (8) #endif /* __KERNEL__ */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 49ba817f4fb6..29e9c54a89d3 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2384,10 +2384,14 @@ static void xs_tcp_setup_socket(struct work_struct *work) struct rpc_xprt *xprt = &transport->xprt; int status = -EIO; - if (!sock) { - sock = xs_create_sock(xprt, transport, - xs_addr(xprt)->sa_family, SOCK_STREAM, - IPPROTO_TCP, true); + if (xprt_connected(xprt)) + goto out; + if (test_and_clear_bit(XPRT_SOCK_CONNECT_SENT, + &transport->sock_state) || + !sock) { + xs_reset_transport(transport); + sock = xs_create_sock(xprt, transport, xs_addr(xprt)->sa_family, + SOCK_STREAM, IPPROTO_TCP, true); if (IS_ERR(sock)) { status = PTR_ERR(sock); goto out; @@ -2418,6 +2422,8 @@ static void xs_tcp_setup_socket(struct work_struct *work) break; case 0: case -EINPROGRESS: + set_bit(XPRT_SOCK_CONNECT_SENT, &transport->sock_state); + fallthrough; case -EALREADY: xprt_unlock_connect(xprt, transport); return; @@ -2469,13 +2475,9 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task) WARN_ON_ONCE(!xprt_lock_connect(xprt, task, transport)); - if (transport->sock != NULL && !xprt_connecting(xprt)) { + if (transport->sock != NULL) { dprintk("RPC: xs_connect delayed xprt %p for %lu " - "seconds\n", - xprt, xprt->reestablish_timeout / HZ); - - /* Start by resetting any existing state */ - xs_reset_transport(transport); + "seconds\n", xprt, xprt->reestablish_timeout / HZ); delay = xprt_reconnect_delay(xprt); xprt_reconnect_backoff(xprt, XS_TCP_INIT_REEST_TO); From 2f8f6c393b11b5da059b1fc10a69fc2f2b6c446a Mon Sep 17 00:00:00 2001 From: Meena Shanmugam Date: Wed, 18 May 2022 18:40:11 +0000 Subject: [PATCH 25/69] SUNRPC: Ensure we flush any closed sockets before xs_xprt_free() From: Trond Myklebust commit f00432063db1a0db484e85193eccc6845435b80e upstream. We must ensure that all sockets are closed before we call xprt_free() and release the reference to the net namespace. The problem is that calling fput() will defer closing the socket until delayed_fput() gets called. Let's fix the situation by allowing rpciod and the transport teardown code (which runs on the system wq) to call __fput_sync(), and directly close the socket. Reported-by: Felix Fu Acked-by: Al Viro Fixes: a73881c96d73 ("SUNRPC: Fix an Oops in udp_poll()") Cc: stable@vger.kernel.org # 5.1.x: 3be232f11a3c: SUNRPC: Prevent immediate close+reconnect Cc: stable@vger.kernel.org # 5.1.x: 89f42494f92f: SUNRPC: Don't call connect() more than once on a TCP socket Cc: stable@vger.kernel.org # 5.1.x Signed-off-by: Trond Myklebust [meenashanmugam: Fix merge conflict in xprt_connect] Signed-off-by: Meena Shanmugam Signed-off-by: Greg Kroah-Hartman --- fs/file_table.c | 1 + net/sunrpc/xprt.c | 5 +---- net/sunrpc/xprtsock.c | 16 +++++++++++++--- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/fs/file_table.c b/fs/file_table.c index 30d55c9a1744..70e8fb68a171 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -375,6 +375,7 @@ void __fput_sync(struct file *file) } EXPORT_SYMBOL(fput); +EXPORT_SYMBOL(__fput_sync); void __init files_init(void) { diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 68d08dcba018..94ae95c57f78 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -868,10 +868,7 @@ void xprt_connect(struct rpc_task *task) if (!xprt_lock_write(xprt, task)) return; - if (test_and_clear_bit(XPRT_CLOSE_WAIT, &xprt->state)) - xprt->ops->close(xprt); - - if (!xprt_connected(xprt)) { + if (!xprt_connected(xprt) && !test_bit(XPRT_CLOSE_WAIT, &xprt->state)) { task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie; rpc_sleep_on_timeout(&xprt->pending, task, NULL, xprt_request_timeout(task->tk_rqstp)); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 29e9c54a89d3..81f0e03b71b6 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -989,7 +989,7 @@ static int xs_local_send_request(struct rpc_rqst *req) /* Close the stream if the previous transmission was incomplete */ if (xs_send_request_was_aborted(transport, req)) { - xs_close(xprt); + xprt_force_disconnect(xprt); return -ENOTCONN; } @@ -1027,7 +1027,7 @@ static int xs_local_send_request(struct rpc_rqst *req) -status); /* fall through */ case -EPIPE: - xs_close(xprt); + xprt_force_disconnect(xprt); status = -ENOTCONN; } @@ -1303,6 +1303,16 @@ static void xs_reset_transport(struct sock_xprt *transport) if (sk == NULL) return; + /* + * Make sure we're calling this in a context from which it is safe + * to call __fput_sync(). In practice that means rpciod and the + * system workqueue. + */ + if (!(current->flags & PF_WQ_WORKER)) { + WARN_ON_ONCE(1); + set_bit(XPRT_CLOSE_WAIT, &xprt->state); + return; + } if (atomic_read(&transport->xprt.swapper)) sk_clear_memalloc(sk); @@ -1326,7 +1336,7 @@ static void xs_reset_transport(struct sock_xprt *transport) mutex_unlock(&transport->recv_mutex); trace_rpc_socket_close(xprt, sock); - fput(filp); + __fput_sync(filp); xprt_disconnect_done(xprt); } From c8a5e14cb407749bfea9cfe3087361b4d1c41fa5 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 10 May 2022 12:36:26 +0200 Subject: [PATCH 26/69] ALSA: wavefront: Proper check of get_user() error commit a34ae6c0660d3b96b0055f68ef74dc9478852245 upstream. The antient ISA wavefront driver reads its sample patch data (uploaded over an ioctl) via __get_user() with no good reason; likely just for some performance optimizations in the past. Let's change this to the standard get_user() and the error check for handling the fault case properly. Reported-by: Linus Torvalds Cc: Link: https://lore.kernel.org/r/20220510103626.16635-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/isa/wavefront/wavefront_synth.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/isa/wavefront/wavefront_synth.c b/sound/isa/wavefront/wavefront_synth.c index d6420d224d09..09b368761cc0 100644 --- a/sound/isa/wavefront/wavefront_synth.c +++ b/sound/isa/wavefront/wavefront_synth.c @@ -1088,7 +1088,8 @@ wavefront_send_sample (snd_wavefront_t *dev, if (dataptr < data_end) { - __get_user (sample_short, dataptr); + if (get_user(sample_short, dataptr)) + return -EFAULT; dataptr += skip; if (data_is_unsigned) { /* GUS ? */ From dd0ea88b0a0f913f82500e988ef38158a9ad9885 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 May 2022 20:38:06 +0200 Subject: [PATCH 27/69] perf: Fix sys_perf_event_open() race against self commit 3ac6487e584a1eb54071dbe1212e05b884136704 upstream. Norbert reported that it's possible to race sys_perf_event_open() such that the looser ends up in another context from the group leader, triggering many WARNs. The move_group case checks for races against itself, but the !move_group case doesn't, seemingly relying on the previous group_leader->ctx == ctx check. However, that check is racy due to not holding any locks at that time. Therefore, re-check the result after acquiring locks and bailing if they no longer match. Additionally, clarify the not_move_group case from the move_group-vs-move_group race. Fixes: f63a8daa5812 ("perf: Fix event->ctx locking") Reported-by: Norbert Slusarek Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 52f4a9e46704..8336dcb2bd43 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -11114,6 +11114,9 @@ SYSCALL_DEFINE5(perf_event_open, * Do not allow to attach to a group in a different task * or CPU context. If we're moving SW events, we'll fix * this up later, so allow that. + * + * Racy, not holding group_leader->ctx->mutex, see comment with + * perf_event_ctx_lock(). */ if (!move_group && group_leader->ctx != ctx) goto err_context; @@ -11181,6 +11184,7 @@ SYSCALL_DEFINE5(perf_event_open, } else { perf_event_ctx_unlock(group_leader, gctx); move_group = 0; + goto not_move_group; } } @@ -11197,7 +11201,17 @@ SYSCALL_DEFINE5(perf_event_open, } } else { mutex_lock(&ctx->mutex); + + /* + * Now that we hold ctx->lock, (re)validate group_leader->ctx == ctx, + * see the group_leader && !move_group test earlier. + */ + if (group_leader && group_leader->ctx != ctx) { + err = -EINVAL; + goto err_locked; + } } +not_move_group: if (ctx->task == TASK_TOMBSTONE) { err = -ESRCH; From 3a12b2c413b20c17832ec51cb836a0b713b916ac Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 16 May 2022 16:42:13 +0800 Subject: [PATCH 28/69] Fix double fget() in vhost_net_set_backend() commit fb4554c2232e44d595920f4d5c66cf8f7d13f9bc upstream. Descriptor table is a shared resource; two fget() on the same descriptor may return different struct file references. get_tap_ptr_ring() is called after we'd found (and pinned) the socket we'll be using and it tries to find the private tun/tap data structures associated with it. Redoing the lookup by the same file descriptor we'd used to get the socket is racy - we need to same struct file. Thanks to Jason for spotting a braino in the original variant of patch - I'd missed the use of fd == -1 for disabling backend, and in that case we can end up with sock == NULL and sock != oldsock. Cc: stable@kernel.org Acked-by: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/net.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 1058aba8d573..3e0267ead718 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1446,13 +1446,9 @@ err: return ERR_PTR(r); } -static struct ptr_ring *get_tap_ptr_ring(int fd) +static struct ptr_ring *get_tap_ptr_ring(struct file *file) { struct ptr_ring *ring; - struct file *file = fget(fd); - - if (!file) - return NULL; ring = tun_get_tx_ring(file); if (!IS_ERR(ring)) goto out; @@ -1461,7 +1457,6 @@ static struct ptr_ring *get_tap_ptr_ring(int fd) goto out; ring = NULL; out: - fput(file); return ring; } @@ -1548,8 +1543,12 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) r = vhost_net_enable_vq(n, vq); if (r) goto err_used; - if (index == VHOST_NET_VQ_RX) - nvq->rx_ring = get_tap_ptr_ring(fd); + if (index == VHOST_NET_VQ_RX) { + if (sock) + nvq->rx_ring = get_tap_ptr_ring(sock->file); + else + nvq->rx_ring = NULL; + } oldubufs = nvq->ubufs; nvq->ubufs = ubufs; From de874518274901737a943c094d9b4adf232e3c2a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 31 Mar 2022 19:38:51 +0200 Subject: [PATCH 29/69] PCI/PM: Avoid putting Elo i2 PCIe Ports in D3cold commit 92597f97a40bf661bebceb92e26ff87c76d562d4 upstream. If a Root Port on Elo i2 is put into D3cold and then back into D0, the downstream device becomes permanently inaccessible, so add a bridge D3 DMI quirk for that system. This was exposed by 14858dcc3b35 ("PCI: Use pci_update_current_state() in pci_enable_device_flags()"), but before that commit the Root Port in question had never been put into D3cold for real due to a mismatch between its power state retrieved from the PCI_PM_CTRL register (which was accessible even though the platform firmware indicated that the port was in D3cold) and the state of an ACPI power resource involved in its power management. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=215715 Link: https://lore.kernel.org/r/11980172.O9o76ZdvQC@kreacher Reported-by: Stefan Gottwald Signed-off-by: Rafael J. Wysocki Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # v5.15+ Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index b9550cd4280c..d539eb379743 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2613,6 +2613,16 @@ static const struct dmi_system_id bridge_d3_blacklist[] = { DMI_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."), DMI_MATCH(DMI_BOARD_NAME, "X299 DESIGNARE EX-CF"), }, + /* + * Downstream device is not accessible after putting a root port + * into D3cold and back into D0 on Elo i2. + */ + .ident = "Elo i2", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Elo Touch Solutions"), + DMI_MATCH(DMI_PRODUCT_NAME, "Elo i2"), + DMI_MATCH(DMI_PRODUCT_VERSION, "RevB"), + }, }, #endif { } From f4a093215b8e28878319d35c463000105e8cf06b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 11 May 2022 14:51:22 +0000 Subject: [PATCH 30/69] KVM: x86/mmu: Update number of zapped pages even if page list is stable commit b28cb0cd2c5e80a8c0feb408a0e4b0dbb6d132c5 upstream. When zapping obsolete pages, update the running count of zapped pages regardless of whether or not the list has become unstable due to zapping a shadow page with its own child shadow pages. If the VM is backed by mostly 4kb pages, KVM can zap an absurd number of SPTEs without bumping the batch count and thus without yielding. In the worst case scenario, this can cause a soft lokcup. watchdog: BUG: soft lockup - CPU#12 stuck for 22s! [dirty_log_perf_:13020] RIP: 0010:workingset_activation+0x19/0x130 mark_page_accessed+0x266/0x2e0 kvm_set_pfn_accessed+0x31/0x40 mmu_spte_clear_track_bits+0x136/0x1c0 drop_spte+0x1a/0xc0 mmu_page_zap_pte+0xef/0x120 __kvm_mmu_prepare_zap_page+0x205/0x5e0 kvm_mmu_zap_all_fast+0xd7/0x190 kvm_mmu_invalidate_zap_pages_in_memslot+0xe/0x10 kvm_page_track_flush_slot+0x5c/0x80 kvm_arch_flush_shadow_memslot+0xe/0x10 kvm_set_memslot+0x1a8/0x5d0 __kvm_set_memory_region+0x337/0x590 kvm_vm_ioctl+0xb08/0x1040 Fixes: fbb158cb88b6 ("KVM: x86/mmu: Revert "Revert "KVM: MMU: zap pages in batch""") Reported-by: David Matlack Reviewed-by: Ben Gardon Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20220511145122.3133334-1-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/mmu.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index d3877dd713ae..015da62e4ad7 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5821,6 +5821,7 @@ static void kvm_zap_obsolete_pages(struct kvm *kvm) { struct kvm_mmu_page *sp, *node; int nr_zapped, batch = 0; + bool unstable; restart: list_for_each_entry_safe_reverse(sp, node, @@ -5853,11 +5854,12 @@ restart: goto restart; } - if (__kvm_mmu_prepare_zap_page(kvm, sp, - &kvm->arch.zapped_obsolete_pages, &nr_zapped)) { - batch += nr_zapped; + unstable = __kvm_mmu_prepare_zap_page(kvm, sp, + &kvm->arch.zapped_obsolete_pages, &nr_zapped); + batch += nr_zapped; + + if (unstable) goto restart; - } } /* From 8be06f62b426801dba43ddf8893952a0e62ab6ae Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Tue, 3 May 2022 13:50:10 +0200 Subject: [PATCH 31/69] crypto: qcom-rng - fix infinite loop on requests not multiple of WORD_SZ commit 16287397ec5c08aa58db6acf7dbc55470d78087d upstream. The commit referenced in the Fixes tag removed the 'break' from the else branch in qcom_rng_read(), causing an infinite loop whenever 'max' is not a multiple of WORD_SZ. This can be reproduced e.g. by running: kcapi-rng -b 67 >/dev/null There are many ways to fix this without adding back the 'break', but they all seem more awkward than simply adding it back, so do just that. Tested on a machine with Qualcomm Amberwing processor. Fixes: a680b1832ced ("crypto: qcom-rng - ensure buffer for generate is completely filled") Cc: stable@vger.kernel.org Signed-off-by: Ondrej Mosnacek Reviewed-by: Brian Masney Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/qcom-rng.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/qcom-rng.c b/drivers/crypto/qcom-rng.c index 3a633a0c40fd..6cc4fd005fe0 100644 --- a/drivers/crypto/qcom-rng.c +++ b/drivers/crypto/qcom-rng.c @@ -64,6 +64,7 @@ static int qcom_rng_read(struct qcom_rng *rng, u8 *data, unsigned int max) } else { /* copy only remaining bytes */ memcpy(data, &val, max - currsize); + break; } } while (currsize < max); From 8cf6c24ed4882269920b60121c650260e445c808 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Mon, 16 May 2022 11:20:42 +0800 Subject: [PATCH 32/69] drm/dp/mst: fix a possible memory leak in fetch_monitor_name() commit 6e03b13cc7d9427c2c77feed1549191015615202 upstream. drm_dp_mst_get_edid call kmemdup to create mst_edid. So mst_edid need to be freed after use. Signed-off-by: Hangyu Hua Reviewed-by: Lyude Paul Signed-off-by: Lyude Paul Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20220516032042.13166-1-hbh25y@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 2de1eebe591f..1ff13af13324 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -3657,6 +3657,7 @@ static void fetch_monitor_name(struct drm_dp_mst_topology_mgr *mgr, mst_edid = drm_dp_mst_get_edid(port->connector, mgr, port); drm_edid_get_monitor_name(mst_edid, name, namelen); + kfree(mst_edid); } /** From fe2a9469eca05b8f5f74669b72357eca99881173 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Pouiller?= Date: Tue, 17 May 2022 09:27:08 +0200 Subject: [PATCH 33/69] dma-buf: fix use of DMA_BUF_SET_NAME_{A,B} in userspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7c3e9fcad9c7d8bb5d69a576044fb16b1d2e8a01 upstream. The typedefs u32 and u64 are not available in userspace. Thus user get an error he try to use DMA_BUF_SET_NAME_A or DMA_BUF_SET_NAME_B: $ gcc -Wall -c -MMD -c -o ioctls_list.o ioctls_list.c In file included from /usr/include/x86_64-linux-gnu/asm/ioctl.h:1, from /usr/include/linux/ioctl.h:5, from /usr/include/asm-generic/ioctls.h:5, from ioctls_list.c:11: ioctls_list.c:463:29: error: ‘u32’ undeclared here (not in a function) 463 | { "DMA_BUF_SET_NAME_A", DMA_BUF_SET_NAME_A, -1, -1 }, // linux/dma-buf.h | ^~~~~~~~~~~~~~~~~~ ioctls_list.c:464:29: error: ‘u64’ undeclared here (not in a function) 464 | { "DMA_BUF_SET_NAME_B", DMA_BUF_SET_NAME_B, -1, -1 }, // linux/dma-buf.h | ^~~~~~~~~~~~~~~~~~ The issue was initially reported here[1]. [1]: https://github.com/jerome-pouiller/ioctl/pull/14 Signed-off-by: Jérôme Pouiller Reviewed-by: Christian König Fixes: a5bff92eaac4 ("dma-buf: Fix SET_NAME ioctl uapi") CC: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20220517072708.245265-1-Jerome.Pouiller@silabs.com Signed-off-by: Christian König Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/dma-buf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h index 7f30393b92c3..f76d11725c6c 100644 --- a/include/uapi/linux/dma-buf.h +++ b/include/uapi/linux/dma-buf.h @@ -44,7 +44,7 @@ struct dma_buf_sync { * between them in actual uapi, they're just different numbers. */ #define DMA_BUF_SET_NAME _IOW(DMA_BUF_BASE, 1, const char *) -#define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, u32) -#define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, u64) +#define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, __u32) +#define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, __u64) #endif From 6493ff94c0229b06d799a80bf9d59c447f321c00 Mon Sep 17 00:00:00 2001 From: Jae Hyun Yoo Date: Tue, 29 Mar 2022 10:39:26 -0700 Subject: [PATCH 34/69] ARM: dts: aspeed-g6: remove FWQSPID group in pinctrl dtsi [ Upstream commit efddaa397cceefb61476e383c26fafd1f8ab6356 ] FWSPIDQ2 and FWSPIDQ3 are not part of FWSPI18 interface so remove FWQSPID group in pinctrl dtsi. These pins must be used with the FWSPI pins that are dedicated for boot SPI interface which provides same 3.3v logic level. Fixes: 2f6edb6bcb2f ("ARM: dts: aspeed: Fix AST2600 quad spi group") Signed-off-by: Jae Hyun Yoo Reviewed-by: Andrew Jeffery Link: https://lore.kernel.org/r/20220329173932.2588289-2-quic_jaehyoo@quicinc.com Signed-off-by: Joel Stanley Signed-off-by: Sasha Levin --- arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi index f310f4d3bcc7..4792b3d9459d 100644 --- a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi +++ b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi @@ -117,11 +117,6 @@ groups = "FWSPID"; }; - pinctrl_fwqspid_default: fwqspid_default { - function = "FWSPID"; - groups = "FWQSPID"; - }; - pinctrl_fwspiwp_default: fwspiwp_default { function = "FWSPIWP"; groups = "FWSPIWP"; From a42ffe88332c52e8bf1d12b495070b480999e8d9 Mon Sep 17 00:00:00 2001 From: Jae Hyun Yoo Date: Tue, 29 Mar 2022 10:39:32 -0700 Subject: [PATCH 35/69] ARM: dts: aspeed-g6: fix SPI1/SPI2 quad pin group [ Upstream commit 890362d41b244536ab63591f813393f5fdf59ed7 ] Fix incorrect function mappings in pinctrl_qspi1_default and pinctrl_qspi2_default since their function should be SPI1 and SPI2 respectively. Fixes: f510f04c8c83 ("ARM: dts: aspeed: Add AST2600 pinmux nodes") Signed-off-by: Jae Hyun Yoo Reviewed-by: Andrew Jeffery Link: https://lore.kernel.org/r/20220329173932.2588289-8-quic_jaehyoo@quicinc.com Signed-off-by: Joel Stanley Signed-off-by: Sasha Levin --- arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi index 4792b3d9459d..ac723fe898c7 100644 --- a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi +++ b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi @@ -648,12 +648,12 @@ }; pinctrl_qspi1_default: qspi1_default { - function = "QSPI1"; + function = "SPI1"; groups = "QSPI1"; }; pinctrl_qspi2_default: qspi2_default { - function = "QSPI2"; + function = "SPI2"; groups = "QSPI2"; }; From 50f70ee30236d1b290378f3a15c58e537b66df7f Mon Sep 17 00:00:00 2001 From: Harini Katakam Date: Thu, 12 May 2022 22:49:00 +0530 Subject: [PATCH 36/69] net: macb: Increment rx bd head after allocating skb and buffer [ Upstream commit 9500acc631dbb8b73166e25700e656b11f6007b6 ] In gem_rx_refill rx_prepared_head is incremented at the beginning of the while loop preparing the skb and data buffers. If the skb or data buffer allocation fails, this BD will be unusable BDs until the head loops back to the same BD (and obviously buffer allocation succeeds). In the unlikely event that there's a string of allocation failures, there will be an equal number of unusable BDs and an inconsistent RX BD chain. Hence increment the head at the end of the while loop to be clean. Fixes: 4df95131ea80 ("net/macb: change RX path for GEM") Signed-off-by: Harini Katakam Signed-off-by: Michal Simek Signed-off-by: Radhey Shyam Pandey Reviewed-by: Claudiu Beznea Link: https://lore.kernel.org/r/20220512171900.32593-1-harini.katakam@xilinx.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/cadence/macb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 002a374f197b..78219a9943a7 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -927,7 +927,6 @@ static void gem_rx_refill(struct macb_queue *queue) /* Make hw descriptor updates visible to CPU */ rmb(); - queue->rx_prepared_head++; desc = macb_rx_desc(queue, entry); if (!queue->rx_skbuff[entry]) { @@ -966,6 +965,7 @@ static void gem_rx_refill(struct macb_queue *queue) dma_wmb(); desc->addr &= ~MACB_BIT(RX_USED); } + queue->rx_prepared_head++; } /* Make descriptor updates visible to hardware */ From 1eb2d785815537e50f51784840ef5de76f0dea72 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 13 May 2022 11:27:06 +0200 Subject: [PATCH 37/69] net/sched: act_pedit: sanitize shift argument before usage [ Upstream commit 4d42d54a7d6aa6d29221d3fd4f2ae9503e94f011 ] syzbot was able to trigger an Out-of-Bound on the pedit action: UBSAN: shift-out-of-bounds in net/sched/act_pedit.c:238:43 shift exponent 1400735974 is too large for 32-bit type 'unsigned int' CPU: 0 PID: 3606 Comm: syz-executor151 Not tainted 5.18.0-rc5-syzkaller-00165-g810c2f0a3f86 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 ubsan_epilogue+0xb/0x50 lib/ubsan.c:151 __ubsan_handle_shift_out_of_bounds.cold+0xb1/0x187 lib/ubsan.c:322 tcf_pedit_init.cold+0x1a/0x1f net/sched/act_pedit.c:238 tcf_action_init_1+0x414/0x690 net/sched/act_api.c:1367 tcf_action_init+0x530/0x8d0 net/sched/act_api.c:1432 tcf_action_add+0xf9/0x480 net/sched/act_api.c:1956 tc_ctl_action+0x346/0x470 net/sched/act_api.c:2015 rtnetlink_rcv_msg+0x413/0xb80 net/core/rtnetlink.c:5993 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2502 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x543/0x7f0 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x904/0xe00 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:705 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:725 ____sys_sendmsg+0x6e2/0x800 net/socket.c:2413 ___sys_sendmsg+0xf3/0x170 net/socket.c:2467 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2496 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7fe36e9e1b59 Code: 28 c3 e8 2a 14 00 00 66 2e 0f 1f 84 00 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffef796fe88 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fe36e9e1b59 RDX: 0000000000000000 RSI: 0000000020000300 RDI: 0000000000000003 RBP: 00007fe36e9a5d00 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fe36e9a5d90 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 The 'shift' field is not validated, and any value above 31 will trigger out-of-bounds. The issue predates the git history, but syzbot was able to trigger it only after the commit mentioned in the fixes tag, and this change only applies on top of such commit. Address the issue bounding the 'shift' value to the maximum allowed by the relevant operator. Reported-and-tested-by: syzbot+8ed8fc4c57e9dcf23ca6@syzkaller.appspotmail.com Fixes: 8b796475fd78 ("net/sched: act_pedit: really ensure the skb is writable") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sched/act_pedit.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 305cb190e997..f095a0fb75c6 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -231,6 +231,10 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, for (i = 0; i < p->tcfp_nkeys; ++i) { u32 cur = p->tcfp_keys[i].off; + /* sanitize the shift value for any later use */ + p->tcfp_keys[i].shift = min_t(size_t, BITS_PER_TYPE(int) - 1, + p->tcfp_keys[i].shift); + /* The AT option can read a single byte, we can bound the actual * value with uchar max. */ From 32f779e6fbbe0c0860a00777b7e3dee6b5ec0c1c Mon Sep 17 00:00:00 2001 From: Zixuan Fu Date: Sat, 14 May 2022 13:06:56 +0800 Subject: [PATCH 38/69] net: vmxnet3: fix possible use-after-free bugs in vmxnet3_rq_alloc_rx_buf() [ Upstream commit 9e7fef9521e73ca8afd7da9e58c14654b02dfad8 ] In vmxnet3_rq_alloc_rx_buf(), when dma_map_single() fails, rbi->skb is freed immediately. Similarly, in another branch, when dma_map_page() fails, rbi->page is also freed. In the two cases, vmxnet3_rq_alloc_rx_buf() returns an error to its callers vmxnet3_rq_init() -> vmxnet3_rq_init_all() -> vmxnet3_activate_dev(). Then vmxnet3_activate_dev() calls vmxnet3_rq_cleanup_all() in error handling code, and rbi->skb or rbi->page are freed again in vmxnet3_rq_cleanup_all(), causing use-after-free bugs. To fix these possible bugs, rbi->skb and rbi->page should be cleared after they are freed. The error log in our fault-injection testing is shown as follows: [ 14.319016] BUG: KASAN: use-after-free in consume_skb+0x2f/0x150 ... [ 14.321586] Call Trace: ... [ 14.325357] consume_skb+0x2f/0x150 [ 14.325671] vmxnet3_rq_cleanup_all+0x33a/0x4e0 [vmxnet3] [ 14.326150] vmxnet3_activate_dev+0xb9d/0x2ca0 [vmxnet3] [ 14.326616] vmxnet3_open+0x387/0x470 [vmxnet3] ... [ 14.361675] Allocated by task 351: ... [ 14.362688] __netdev_alloc_skb+0x1b3/0x6f0 [ 14.362960] vmxnet3_rq_alloc_rx_buf+0x1b0/0x8d0 [vmxnet3] [ 14.363317] vmxnet3_activate_dev+0x3e3/0x2ca0 [vmxnet3] [ 14.363661] vmxnet3_open+0x387/0x470 [vmxnet3] ... [ 14.367309] [ 14.367412] Freed by task 351: ... [ 14.368932] __dev_kfree_skb_any+0xd2/0xe0 [ 14.369193] vmxnet3_rq_alloc_rx_buf+0x71e/0x8d0 [vmxnet3] [ 14.369544] vmxnet3_activate_dev+0x3e3/0x2ca0 [vmxnet3] [ 14.369883] vmxnet3_open+0x387/0x470 [vmxnet3] [ 14.370174] __dev_open+0x28a/0x420 [ 14.370399] __dev_change_flags+0x192/0x590 [ 14.370667] dev_change_flags+0x7a/0x180 [ 14.370919] do_setlink+0xb28/0x3570 [ 14.371150] rtnl_newlink+0x1160/0x1740 [ 14.371399] rtnetlink_rcv_msg+0x5bf/0xa50 [ 14.371661] netlink_rcv_skb+0x1cd/0x3e0 [ 14.371913] netlink_unicast+0x5dc/0x840 [ 14.372169] netlink_sendmsg+0x856/0xc40 [ 14.372420] ____sys_sendmsg+0x8a7/0x8d0 [ 14.372673] __sys_sendmsg+0x1c2/0x270 [ 14.372914] do_syscall_64+0x41/0x90 [ 14.373145] entry_SYSCALL_64_after_hwframe+0x44/0xae ... Fixes: 5738a09d58d5a ("vmxnet3: fix checks for dma mapping errors") Reported-by: TOTE Robot Signed-off-by: Zixuan Fu Link: https://lore.kernel.org/r/20220514050656.2636588-1-r33s3n6@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/vmxnet3/vmxnet3_drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index cf090f88dac0..b71a019e9867 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -595,6 +595,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx, if (dma_mapping_error(&adapter->pdev->dev, rbi->dma_addr)) { dev_kfree_skb_any(rbi->skb); + rbi->skb = NULL; rq->stats.rx_buf_alloc_failure++; break; } @@ -619,6 +620,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx, if (dma_mapping_error(&adapter->pdev->dev, rbi->dma_addr)) { put_page(rbi->page); + rbi->page = NULL; rq->stats.rx_buf_alloc_failure++; break; } From dc64e8874e87dc1c1c723a1c6da7efc3305c18da Mon Sep 17 00:00:00 2001 From: Zixuan Fu Date: Sat, 14 May 2022 13:07:11 +0800 Subject: [PATCH 39/69] net: vmxnet3: fix possible NULL pointer dereference in vmxnet3_rq_cleanup() [ Upstream commit edf410cb74dc612fd47ef5be319c5a0bcd6e6ccd ] In vmxnet3_rq_create(), when dma_alloc_coherent() fails, vmxnet3_rq_destroy() is called. It sets rq->rx_ring[i].base to NULL. Then vmxnet3_rq_create() returns an error to its callers mxnet3_rq_create_all() -> vmxnet3_change_mtu(). Then vmxnet3_change_mtu() calls vmxnet3_force_close() -> dev_close() in error handling code. And the driver calls vmxnet3_close() -> vmxnet3_quiesce_dev() -> vmxnet3_rq_cleanup_all() -> vmxnet3_rq_cleanup(). In vmxnet3_rq_cleanup(), rq->rx_ring[ring_idx].base is accessed, but this variable is NULL, causing a NULL pointer dereference. To fix this possible bug, an if statement is added to check whether rq->rx_ring[0].base is NULL in vmxnet3_rq_cleanup() and exit early if so. The error log in our fault-injection testing is shown as follows: [ 65.220135] BUG: kernel NULL pointer dereference, address: 0000000000000008 ... [ 65.222633] RIP: 0010:vmxnet3_rq_cleanup_all+0x396/0x4e0 [vmxnet3] ... [ 65.227977] Call Trace: ... [ 65.228262] vmxnet3_quiesce_dev+0x80f/0x8a0 [vmxnet3] [ 65.228580] vmxnet3_close+0x2c4/0x3f0 [vmxnet3] [ 65.228866] __dev_close_many+0x288/0x350 [ 65.229607] dev_close_many+0xa4/0x480 [ 65.231124] dev_close+0x138/0x230 [ 65.231933] vmxnet3_force_close+0x1f0/0x240 [vmxnet3] [ 65.232248] vmxnet3_change_mtu+0x75d/0x920 [vmxnet3] ... Fixes: d1a890fa37f27 ("net: VMware virtual Ethernet NIC driver: vmxnet3") Reported-by: TOTE Robot Signed-off-by: Zixuan Fu Link: https://lore.kernel.org/r/20220514050711.2636709-1-r33s3n6@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/vmxnet3/vmxnet3_drv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index b71a019e9867..609f65530b9b 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1586,6 +1586,10 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq, u32 i, ring_idx; struct Vmxnet3_RxDesc *rxd; + /* ring has already been cleaned up */ + if (!rq->rx_ring[0].base) + return; + for (ring_idx = 0; ring_idx < 2; ring_idx++) { for (i = 0; i < rq->rx_ring[ring_idx].size; i++) { #ifdef __BIG_ENDIAN_BITFIELD From 8cb1a05fe38b90d02ff64e8de7a76f553ac6f21d Mon Sep 17 00:00:00 2001 From: Paul Greenwalt Date: Thu, 28 Apr 2022 14:11:42 -0700 Subject: [PATCH 40/69] ice: fix possible under reporting of ethtool Tx and Rx statistics [ Upstream commit 31b6298fd8e29effe9ed6b77351ac5969be56ce0 ] The hardware statistics counters are not cleared during resets so the drivers first access is to initialize the baseline and then subsequent reads are for reporting the counters. The statistics counters are read during the watchdog subtask when the interface is up. If the baseline is not initialized before the interface is up, then there can be a brief window in which some traffic can be transmitted/received before the initial baseline reading takes place. Directly initialize ethtool statistics in driver open so the baseline will be initialized when the interface is up, and any dropped packets incremented before the interface is up won't be reported. Fixes: 28dc1b86f8ea9 ("ice: ignore dropped packets during init") Signed-off-by: Paul Greenwalt Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_main.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 3eea68f3a526..88750a96cb3f 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3561,9 +3561,10 @@ static int ice_up_complete(struct ice_vsi *vsi) netif_carrier_on(vsi->netdev); } - /* clear this now, and the first stats read will be used as baseline */ - vsi->stat_offsets_loaded = false; - + /* Perform an initial read of the statistics registers now to + * set the baseline so counters are ready when interface is up + */ + ice_update_eth_stats(vsi); ice_service_task_schedule(pf); return 0; From d672eee9e404805e1c2aeb8b16ba43c9cdffff46 Mon Sep 17 00:00:00 2001 From: Codrin Ciubotariu Date: Wed, 13 Apr 2022 10:13:18 +0300 Subject: [PATCH 41/69] clk: at91: generated: consider range when calculating best rate [ Upstream commit d0031e6fbed955ff8d5f5bbc8fe7382482559cec ] clk_generated_best_diff() helps in finding the parent and the divisor to compute a rate closest to the required one. However, it doesn't take into account the request's range for the new rate. Make sure the new rate is within the required range. Fixes: 8a8f4bf0c480 ("clk: at91: clk-generated: create function to find best_diff") Signed-off-by: Codrin Ciubotariu Link: https://lore.kernel.org/r/20220413071318.244912-1-codrin.ciubotariu@microchip.com Reviewed-by: Claudiu Beznea Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/at91/clk-generated.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/clk/at91/clk-generated.c b/drivers/clk/at91/clk-generated.c index d7fe1303f79d..0e7ec5075689 100644 --- a/drivers/clk/at91/clk-generated.c +++ b/drivers/clk/at91/clk-generated.c @@ -105,6 +105,10 @@ static void clk_generated_best_diff(struct clk_rate_request *req, tmp_rate = parent_rate; else tmp_rate = parent_rate / div; + + if (tmp_rate < req->min_rate || tmp_rate > req->max_rate) + return; + tmp_diff = abs(req->rate - tmp_rate); if (*best_diff < 0 || *best_diff >= tmp_diff) { From b368e07fb44dd4a6dab2b89f7cdbf61931536bf4 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 15 May 2022 20:07:02 +0200 Subject: [PATCH 42/69] net/qla3xxx: Fix a test in ql_reset_work() [ Upstream commit 5361448e45fac6fb96738df748229432a62d78b6 ] test_bit() tests if one bit is set or not. Here the logic seems to check of bit QL_RESET_PER_SCSI (i.e. 4) OR bit QL_RESET_START (i.e. 3) is set. In fact, it checks if bit 7 (4 | 3 = 7) is set, that is to say QL_ADAPTER_UP. This looks harmless, because this bit is likely be set, and when the ql_reset_work() delayed work is scheduled in ql3xxx_isr() (the only place that schedule this work), QL_RESET_START or QL_RESET_PER_SCSI is set. This has been spotted by smatch. Fixes: 5a4faa873782 ("[PATCH] qla3xxx NIC driver") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/80e73e33f390001d9c0140ffa9baddf6466a41a2.1652637337.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qla3xxx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index da2862d59681..5e81cd317a32 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -3629,7 +3629,8 @@ static void ql_reset_work(struct work_struct *work) qdev->mem_map_registers; unsigned long hw_flags; - if (test_bit((QL_RESET_PER_SCSI | QL_RESET_START), &qdev->flags)) { + if (test_bit(QL_RESET_PER_SCSI, &qdev->flags) || + test_bit(QL_RESET_START, &qdev->flags)) { clear_bit(QL_LINK_MASTER, &qdev->flags); /* From 3277789f332ed34d46d35ee84992fd643f48aa4a Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Tue, 17 May 2022 09:25:30 +0800 Subject: [PATCH 43/69] NFC: nci: fix sleep in atomic context bugs caused by nci_skb_alloc [ Upstream commit 23dd4581350d4ffa23d58976ec46408f8f4c1e16 ] There are sleep in atomic context bugs when the request to secure element of st-nci is timeout. The root cause is that nci_skb_alloc with GFP_KERNEL parameter is called in st_nci_se_wt_timeout which is a timer handler. The call paths that could trigger bugs are shown below: (interrupt context 1) st_nci_se_wt_timeout nci_hci_send_event nci_hci_send_data nci_skb_alloc(..., GFP_KERNEL) //may sleep (interrupt context 2) st_nci_se_wt_timeout nci_hci_send_event nci_hci_send_data nci_send_data nci_queue_tx_data_frags nci_skb_alloc(..., GFP_KERNEL) //may sleep This patch changes allocation mode of nci_skb_alloc from GFP_KERNEL to GFP_ATOMIC in order to prevent atomic context sleeping. The GFP_ATOMIC flag makes memory allocation operation could be used in atomic context. Fixes: ed06aeefdac3 ("nfc: st-nci: Rename st21nfcb to st-nci") Signed-off-by: Duoming Zhou Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220517012530.75714-1-duoming@zju.edu.cn Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/nfc/nci/data.c | 2 +- net/nfc/nci/hci.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c index ce3382be937f..b002e18f38c8 100644 --- a/net/nfc/nci/data.c +++ b/net/nfc/nci/data.c @@ -118,7 +118,7 @@ static int nci_queue_tx_data_frags(struct nci_dev *ndev, skb_frag = nci_skb_alloc(ndev, (NCI_DATA_HDR_SIZE + frag_len), - GFP_KERNEL); + GFP_ATOMIC); if (skb_frag == NULL) { rc = -ENOMEM; goto free_exit; diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index 04e55ccb3383..4fe336ff2bfa 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -153,7 +153,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, i = 0; skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len + - NCI_DATA_HDR_SIZE, GFP_KERNEL); + NCI_DATA_HDR_SIZE, GFP_ATOMIC); if (!skb) return -ENOMEM; @@ -186,7 +186,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, if (i < data_len) { skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len + - NCI_DATA_HDR_SIZE, GFP_KERNEL); + NCI_DATA_HDR_SIZE, GFP_ATOMIC); if (!skb) return -ENOMEM; From c0be5fec786b5aab3f5db222b28a64d0946d6658 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 12 Apr 2022 18:37:03 +0300 Subject: [PATCH 44/69] net/mlx5e: Properly block LRO when XDP is enabled [ Upstream commit cf6e34c8c22fba66bd21244b95ea47e235f68974 ] LRO is incompatible and mutually exclusive with XDP. However, the needed checks are only made when enabling XDP. If LRO is enabled when XDP is already active, the command will succeed, and XDP will be skipped in the data path, although still enabled. This commit fixes the bug by checking the XDP status in mlx5e_fix_features and disabling LRO if XDP is enabled. Fixes: 86994156c736 ("net/mlx5e: XDP fast RX drop bpf programs support") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 2465165cbea7..73291051808f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3980,6 +3980,13 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, } } + if (params->xdp_prog) { + if (features & NETIF_F_LRO) { + netdev_warn(netdev, "LRO is incompatible with XDP\n"); + features &= ~NETIF_F_LRO; + } + } + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { features &= ~NETIF_F_RXHASH; if (netdev->features & NETIF_F_RXHASH) From 1b93631c77c9b18cf411eafa86e0f42934da84b4 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Tue, 17 May 2022 17:42:31 +0800 Subject: [PATCH 45/69] net: af_key: add check for pfkey_broadcast in function pfkey_process [ Upstream commit 4dc2a5a8f6754492180741facf2a8787f2c415d7 ] If skb_clone() returns null pointer, pfkey_broadcast() will return error. Therefore, it should be better to check the return value of pfkey_broadcast() and return error if fails. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jiasheng Jiang Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/key/af_key.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/key/af_key.c b/net/key/af_key.c index 2ac9560020f9..f67d3ba72c49 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2830,8 +2830,10 @@ static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb void *ext_hdrs[SADB_EXT_MAX]; int err; - pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, - BROADCAST_PROMISC_ONLY, NULL, sock_net(sk)); + err = pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, + BROADCAST_PROMISC_ONLY, NULL, sock_net(sk)); + if (err) + return err; memset(ext_hdrs, 0, sizeof(ext_hdrs)); err = parse_exthdrs(skb, hdr, ext_hdrs); From 463a7b957db0d2c96871f446b0c94d39a9bee244 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 20 Apr 2022 09:44:51 +0100 Subject: [PATCH 46/69] ARM: 9196/1: spectre-bhb: enable for Cortex-A15 [ Upstream commit 0dc14aa94ccd8ba35eb17a0f9b123d1566efd39e ] The Spectre-BHB mitigations were inadvertently left disabled for Cortex-A15, due to the fact that cpu_v7_bugs_init() is not called in that case. So fix that. Fixes: b9baf5c8c5c3 ("ARM: Spectre-BHB workaround") Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King (Oracle) Signed-off-by: Sasha Levin --- arch/arm/mm/proc-v7-bugs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index 097ef85bb7f2..bcb9181601d9 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -301,6 +301,7 @@ void cpu_v7_ca15_ibe(void) { if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(0))) cpu_v7_spectre_v2_init(); + cpu_v7_spectre_bhb_init(); } void cpu_v7_bugs_init(void) From eb92a8ecce23b2a8b7d0387322abd3196fa794b5 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 20 Apr 2022 09:46:17 +0100 Subject: [PATCH 47/69] ARM: 9197/1: spectre-bhb: fix loop8 sequence for Thumb2 [ Upstream commit 3cfb3019979666bdf33a1010147363cf05e0f17b ] In Thumb2, 'b . + 4' produces a branch instruction that uses a narrow encoding, and so it does not jump to the following instruction as expected. So use W(b) instead. Fixes: 6c7cb60bff7a ("ARM: fix Thumb2 regression with Spectre BHB") Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King (Oracle) Signed-off-by: Sasha Levin --- arch/arm/kernel/entry-armv.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 3d65fa56a0e5..8e8efe28d799 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -1043,7 +1043,7 @@ vector_bhb_loop8_\name: @ bhb workaround mov r0, #8 -3: b . + 4 +3: W(b) . + 4 subs r0, r0, #1 bne 3b dsb From 622be11fa3851891edfb4b63c40d5b137e88837f Mon Sep 17 00:00:00 2001 From: Kevin Mitchell Date: Tue, 17 May 2022 11:01:05 -0700 Subject: [PATCH 48/69] igb: skip phy status check where unavailable [ Upstream commit 942d2ad5d2e0df758a645ddfadffde2795322728 ] igb_read_phy_reg() will silently return, leaving phy_data untouched, if hw->ops.read_reg isn't set. Depending on the uninitialized value of phy_data, this led to the phy status check either succeeding immediately or looping continuously for 2 seconds before emitting a noisy err-level timeout. This message went out to the console even though there was no actual problem. Instead, first check if there is read_reg function pointer. If not, proceed without trying to check the phy status register. Fixes: b72f3f72005d ("igb: When GbE link up, wait for Remote receiver status condition") Signed-off-by: Kevin Mitchell Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igb/igb_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 3df25b231ab5..26c8d09ad4dd 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -5318,7 +5318,8 @@ static void igb_watchdog_task(struct work_struct *work) break; } - if (adapter->link_speed != SPEED_1000) + if (adapter->link_speed != SPEED_1000 || + !hw->phy.ops.read_reg) goto no_wait; /* wait for Remote receiver status OK */ From 1fe6dc5f5d19f0929431a775347dbcea5bf854f2 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 18 May 2022 02:58:40 +0200 Subject: [PATCH 49/69] net: bridge: Clear offload_fwd_mark when passing frame up bridge interface. [ Upstream commit fbb3abdf2223cd0dfc07de85fe5a43ba7f435bdf ] It is possible to stack bridges on top of each other. Consider the following which makes use of an Ethernet switch: br1 / \ / \ / \ br0.11 wlan0 | br0 / | \ p1 p2 p3 br0 is offloaded to the switch. Above br0 is a vlan interface, for vlan 11. This vlan interface is then a slave of br1. br1 also has a wireless interface as a slave. This setup trunks wireless lan traffic over the copper network inside a VLAN. A frame received on p1 which is passed up to the bridge has the skb->offload_fwd_mark flag set to true, indicating that the switch has dealt with forwarding the frame out ports p2 and p3 as needed. This flag instructs the software bridge it does not need to pass the frame back down again. However, the flag is not getting reset when the frame is passed upwards. As a result br1 sees the flag, wrongly interprets it, and fails to forward the frame to wlan0. When passing a frame upwards, clear the flag. This is the Rx equivalent of br_switchdev_frame_unmark() in br_dev_xmit(). Fixes: f1c2eddf4cb6 ("bridge: switchdev: Use an helper to clear forward mark") Signed-off-by: Andrew Lunn Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20220518005840.771575-1-andrew@lunn.ch Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/bridge/br_input.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 09b1dd8cd853..464f6a619444 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -42,6 +42,13 @@ static int br_pass_frame_up(struct sk_buff *skb) u64_stats_update_end(&brstats->syncp); vg = br_vlan_group_rcu(br); + + /* Reset the offload_fwd_mark because there could be a stacked + * bridge above, and it should not think this bridge it doing + * that bridge's work forwarding out its ports. + */ + br_switchdev_frame_unmark(skb); + /* Bridge is just like any other port. Make sure the * packet is allowed except in promisc modue when someone * may be running packet capture. From 3fdd67e83c4221408dea5c3de330d1644f699732 Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Wed, 11 May 2022 10:15:04 +0800 Subject: [PATCH 50/69] gpio: gpio-vf610: do not touch other bits when set the target bit [ Upstream commit 9bf3ac466faa83d51a8fe9212131701e58fdef74 ] For gpio controller contain register PDDR, when set one target bit, current logic will clear all other bits, this is wrong. Use operator '|=' to fix it. Fixes: 659d8a62311f ("gpio: vf610: add imx7ulp support") Reviewed-by: Peng Fan Signed-off-by: Haibo Chen Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpio-vf610.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c index 58776f2d69ff..1ae612c796ee 100644 --- a/drivers/gpio/gpio-vf610.c +++ b/drivers/gpio/gpio-vf610.c @@ -125,9 +125,13 @@ static int vf610_gpio_direction_output(struct gpio_chip *chip, unsigned gpio, { struct vf610_gpio_port *port = gpiochip_get_data(chip); unsigned long mask = BIT(gpio); + u32 val; - if (port->sdata && port->sdata->have_paddr) - vf610_gpio_writel(mask, port->gpio_base + GPIO_PDDR); + if (port->sdata && port->sdata->have_paddr) { + val = vf610_gpio_readl(port->gpio_base + GPIO_PDDR); + val |= mask; + vf610_gpio_writel(val, port->gpio_base + GPIO_PDDR); + } vf610_gpio_set(chip, gpio, value); From d1915d9c9fa3b2690c1092fce1b78d5e63adc983 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 11 May 2022 09:58:56 +0200 Subject: [PATCH 51/69] gpio: mvebu/pwm: Refuse requests with inverted polarity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3ecb10175b1f776f076553c24e2689e42953fef5 ] The driver doesn't take struct pwm_state::polarity into account when configuring the hardware, so refuse requests for inverted polarity. Fixes: 757642f9a584 ("gpio: mvebu: Add limited PWM support") Signed-off-by: Uwe Kleine-König Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpio-mvebu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c index 89a053b1d279..b5ae28fce9a8 100644 --- a/drivers/gpio/gpio-mvebu.c +++ b/drivers/gpio/gpio-mvebu.c @@ -697,6 +697,9 @@ static int mvebu_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, unsigned long flags; unsigned int on, off; + if (state->polarity != PWM_POLARITY_NORMAL) + return -EINVAL; + val = (unsigned long long) mvpwm->clk_rate * state->duty_cycle; do_div(val, NSEC_PER_SEC); if (val > UINT_MAX) From 74168c2207a5531a2b8a526c82b5c76bec9f5e95 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 20 May 2022 10:11:58 +0200 Subject: [PATCH 52/69] perf bench numa: Address compiler error on s390 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f8ac1c478424a9a14669b8cef7389b1e14e5229d ] The compilation on s390 results in this error: # make DEBUG=y bench/numa.o ... bench/numa.c: In function ‘__bench_numa’: bench/numa.c:1749:81: error: ‘%d’ directive output may be truncated writing between 1 and 11 bytes into a region of size between 10 and 20 [-Werror=format-truncation=] 1749 | snprintf(tname, sizeof(tname), "process%d:thread%d", p, t); ^~ ... bench/numa.c:1749:64: note: directive argument in the range [-2147483647, 2147483646] ... # The maximum length of the %d replacement is 11 characters because of the negative sign. Therefore extend the array by two more characters. Output after: # make DEBUG=y bench/numa.o > /dev/null 2>&1; ll bench/numa.o -rw-r--r-- 1 root root 418320 May 19 09:11 bench/numa.o # Fixes: 3aff8ba0a4c9c919 ("perf bench numa: Avoid possible truncation when using snprintf()") Suggested-by: Namhyung Kim Signed-off-by: Thomas Richter Cc: Heiko Carstens Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Vasily Gorbik Link: https://lore.kernel.org/r/20220520081158.2990006-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/bench/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 5797253b9700..69d62e57a0c3 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -1630,7 +1630,7 @@ static int __bench_numa(const char *name) "GB/sec,", "total-speed", "GB/sec total speed"); if (g->p.show_details >= 2) { - char tname[14 + 2 * 10 + 1]; + char tname[14 + 2 * 11 + 1]; struct thread_data *td; for (p = 0; p < g->p.nr_proc; p++) { for (t = 0; t < g->p.nr_threads; t++) { From 19e2cd737c166b9a57f9f0f687d16f36dde4046f Mon Sep 17 00:00:00 2001 From: Gleb Chesnokov Date: Fri, 15 Apr 2022 12:42:29 +0000 Subject: [PATCH 53/69] scsi: qla2xxx: Fix missed DMA unmap for aborted commands [ Upstream commit 26f9ce53817a8fd84b69a73473a7de852a24c897 ] Aborting commands that have already been sent to the firmware can cause BUG in qlt_free_cmd(): BUG_ON(cmd->sg_mapped) For instance: - Command passes rdx_to_xfer state, maps sgl, sends to the firmware - Reset occurs, qla2xxx performs ISP error recovery, aborts the command - Target stack calls qlt_abort_cmd() and then qlt_free_cmd() - BUG_ON(cmd->sg_mapped) in qlt_free_cmd() occurs because sgl was not unmapped Thus, unmap sgl in qlt_abort_cmd() for commands with the aborted flag set. Link: https://lore.kernel.org/r/AS8PR10MB4952D545F84B6B1DFD39EC1E9DEE9@AS8PR10MB4952.EURPRD10.PROD.OUTLOOK.COM Reviewed-by: Himanshu Madhani Signed-off-by: Gleb Chesnokov Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_target.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index df598c377161..cb97565b6a33 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -3768,6 +3768,9 @@ int qlt_abort_cmd(struct qla_tgt_cmd *cmd) spin_lock_irqsave(&cmd->cmd_lock, flags); if (cmd->aborted) { + if (cmd->sg_mapped) + qlt_unmap_sg(vha, cmd); + spin_unlock_irqrestore(&cmd->cmd_lock, flags); /* * It's normal to see 2 calls in this path: From 9919585e5f418ed25531d5c0aba377496a434cf5 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 20 Apr 2022 12:50:38 +0200 Subject: [PATCH 54/69] mac80211: fix rx reordering with non explicit / psmp ack policy [ Upstream commit 5e469ed9764d4722c59562da13120bd2dc6834c5 ] When the QoS ack policy was set to non explicit / psmp ack, frames are treated as not being part of a BA session, which causes extra latency on reordering. Fix this by only bypassing reordering for packets with no-ack policy Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20220420105038.36443-1-nbd@nbd.name Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/rx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index ab91683d9459..99d5f8b58e92 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1400,8 +1400,7 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, goto dont_reorder; /* not part of a BA session */ - if (ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_BLOCKACK && - ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_NORMAL) + if (ack_policy == IEEE80211_QOS_CTL_ACK_POLICY_NOACK) goto dont_reorder; /* new, potentially un-ordered, ampdu frame - process it */ From dd5de66f5c8a958ee53cd66ac7cad7421502e48d Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 4 May 2022 11:07:39 +0200 Subject: [PATCH 55/69] selftests: add ping test with ping_group_range tuned [ Upstream commit e71b7f1f44d3d88c677769c85ef0171caf9fc89f ] The 'ping' utility is able to manage two kind of sockets (raw or icmp), depending on the sysctl ping_group_range. By default, ping_group_range is set to '1 0', which forces ping to use an ip raw socket. Let's replay the ping tests by allowing 'ping' to use the ip icmp socket. After the previous patch, ipv4 tests results are the same with both kinds of socket. For ipv6, there are a lot a new failures (the previous patch fixes only two cases). Signed-off-by: Nicolas Dichtel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- tools/testing/selftests/net/fcnal-test.sh | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 157822331954..d2ac09b35dcf 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -757,10 +757,16 @@ ipv4_ping() setup set_sysctl net.ipv4.raw_l3mdev_accept=1 2>/dev/null ipv4_ping_novrf + setup + set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + ipv4_ping_novrf log_subsection "With VRF" setup "yes" ipv4_ping_vrf + setup "yes" + set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + ipv4_ping_vrf } ################################################################################ @@ -2005,10 +2011,16 @@ ipv6_ping() log_subsection "No VRF" setup ipv6_ping_novrf + setup + set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + ipv6_ping_novrf log_subsection "With VRF" setup "yes" ipv6_ping_vrf + setup "yes" + set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + ipv6_ping_vrf } ################################################################################ From 91d8d7edf19227f301628b20f895286baa994777 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 6 May 2022 17:42:50 +0800 Subject: [PATCH 56/69] ethernet: tulip: fix missing pci_disable_device() on error in tulip_init_one() [ Upstream commit 51ca86b4c9c7c75f5630fa0dbe5f8f0bd98e3c3e ] Fix the missing pci_disable_device() before return from tulip_init_one() in the error handling case. Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20220506094250.3630615-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/dec/tulip/tulip_core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c index 3e3e08698876..fea4223ad6f1 100644 --- a/drivers/net/ethernet/dec/tulip/tulip_core.c +++ b/drivers/net/ethernet/dec/tulip/tulip_core.c @@ -1410,8 +1410,10 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* alloc_etherdev ensures aligned and zeroed private structures */ dev = alloc_etherdev (sizeof (*tp)); - if (!dev) + if (!dev) { + pci_disable_device(pdev); return -ENOMEM; + } SET_NETDEV_DEV(dev, &pdev->dev); if (pci_resource_len (pdev, 0) < tulip_tbl[chip_idx].io_size) { @@ -1788,6 +1790,7 @@ err_out_free_res: err_out_free_netdev: free_netdev (dev); + pci_disable_device(pdev); return -ENODEV; } From e5307704c4adb525185ef32d9fc626c7ac5c4631 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 10 May 2022 11:13:16 +0800 Subject: [PATCH 57/69] net: stmmac: fix missing pci_disable_device() on error in stmmac_pci_probe() [ Upstream commit 0807ce0b010418a191e0e4009803b2d74c3245d5 ] Switch to using pcim_enable_device() to avoid missing pci_disable_device(). Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20220510031316.1780409-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index 292045f4581f..fceb0f9e797f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -481,7 +481,7 @@ static int stmmac_pci_probe(struct pci_dev *pdev, return -ENOMEM; /* Enable pci device */ - ret = pci_enable_device(pdev); + ret = pcim_enable_device(pdev); if (ret) { dev_err(&pdev->dev, "%s: ERROR: failed to enable device\n", __func__); @@ -538,8 +538,6 @@ static void stmmac_pci_remove(struct pci_dev *pdev) pcim_iounmap_regions(pdev, BIT(i)); break; } - - pci_disable_device(pdev); } static int __maybe_unused stmmac_pci_suspend(struct device *dev) From 1c450bdf2e8cdc71094e10de36bedf34535428fb Mon Sep 17 00:00:00 2001 From: Grant Grundler Date: Mon, 9 May 2022 19:28:26 -0700 Subject: [PATCH 58/69] net: atlantic: verify hw_head_ lies within TX buffer ring [ Upstream commit 2120b7f4d128433ad8c5f503a9584deba0684901 ] Bounds check hw_head index provided by NIC to verify it lies within the TX buffer ring. Reported-by: Aashay Shringarpure Reported-by: Yi Chou Reported-by: Shervin Oloumi Signed-off-by: Grant Grundler Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 2ad3fa6316ce..cb5954eeb409 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -674,6 +674,13 @@ static int hw_atl_b0_hw_ring_tx_head_update(struct aq_hw_s *self, err = -ENXIO; goto err_exit; } + + /* Validate that the new hw_head_ is reasonable. */ + if (hw_head_ >= ring->size) { + err = -ENXIO; + goto err_exit; + } + ring->hw_head = hw_head_; err = aq_hw_err_from_flags(self); From 36ac6caf742d9597a1ce3f2eeeb5de895fa75fd2 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 18 May 2022 14:28:32 -0700 Subject: [PATCH 59/69] Input: ili210x - fix reset timing commit e4920d42ce0e9c8aafb7f64b6d9d4ae02161e51e upstream. According to Ilitek "231x & ILI251x Programming Guide" Version: 2.30 "2.1. Power Sequence", "T4 Chip Reset and discharge time" is minimum 10ms and "T2 Chip initial time" is maximum 150ms. Adjust the reset timings such that T4 is 12ms and T2 is 160ms to fit those figures. This prevents sporadic touch controller start up failures when some systems with at least ILI251x controller boot, without this patch the systems sometimes fail to communicate with the touch controller. Fixes: 201f3c803544c ("Input: ili210x - add reset GPIO support") Signed-off-by: Marek Vasut Link: https://lore.kernel.org/r/20220518204901.93534-1-marex@denx.de Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/touchscreen/ili210x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c index 22839dde1d09..bc6f6a2ac4b9 100644 --- a/drivers/input/touchscreen/ili210x.c +++ b/drivers/input/touchscreen/ili210x.c @@ -290,9 +290,9 @@ static int ili210x_i2c_probe(struct i2c_client *client, if (error) return error; - usleep_range(50, 100); + usleep_range(12000, 15000); gpiod_set_value_cansleep(reset_gpio, 0); - msleep(100); + msleep(160); } priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); From 9ea8e6a8323e063c42536f3f8c83d3768cff5f4d Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 29 Jul 2021 11:42:26 +0800 Subject: [PATCH 60/69] block: return ELEVATOR_DISCARD_MERGE if possible commit 866663b7b52d2da267b28e12eed89ee781b8fed1 upstream. When merging one bio to request, if they are discard IO and the queue supports multi-range discard, we need to return ELEVATOR_DISCARD_MERGE because both block core and related drivers(nvme, virtio-blk) doesn't handle mixed discard io merge(traditional IO merge together with discard merge) well. Fix the issue by returning ELEVATOR_DISCARD_MERGE in this situation, so both blk-mq and drivers just need to handle multi-range discard. Reported-by: Oleksandr Natalenko Signed-off-by: Ming Lei Tested-by: Oleksandr Natalenko Fixes: 2705dfb20947 ("block: fix discard request merge") Link: https://lore.kernel.org/r/20210729034226.1591070-1-ming.lei@redhat.com Signed-off-by: Jens Axboe Signed-off-by: Gwendal Grignou Signed-off-by: Greg Kroah-Hartman --- block/bfq-iosched.c | 3 +++ block/blk-merge.c | 15 --------------- block/elevator.c | 3 +++ block/mq-deadline.c | 2 ++ include/linux/blkdev.h | 16 ++++++++++++++++ 5 files changed, 24 insertions(+), 15 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 1d443d17cf7c..d46806182b05 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2251,6 +2251,9 @@ static int bfq_request_merge(struct request_queue *q, struct request **req, __rq = bfq_find_rq_fmerge(bfqd, bio, q); if (__rq && elv_bio_merge_ok(__rq, bio)) { *req = __rq; + + if (blk_discard_mergable(__rq)) + return ELEVATOR_DISCARD_MERGE; return ELEVATOR_FRONT_MERGE; } diff --git a/block/blk-merge.c b/block/blk-merge.c index a62692d13566..5219064cd72b 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -721,21 +721,6 @@ static void blk_account_io_merge(struct request *req) part_stat_unlock(); } } -/* - * Two cases of handling DISCARD merge: - * If max_discard_segments > 1, the driver takes every bio - * as a range and send them to controller together. The ranges - * needn't to be contiguous. - * Otherwise, the bios/requests will be handled as same as - * others which should be contiguous. - */ -static inline bool blk_discard_mergable(struct request *req) -{ - if (req_op(req) == REQ_OP_DISCARD && - queue_max_discard_segments(req->q) > 1) - return true; - return false; -} static enum elv_merge blk_try_req_merge(struct request *req, struct request *next) diff --git a/block/elevator.c b/block/elevator.c index 78805c74ea8a..3ba826230c57 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -337,6 +337,9 @@ enum elv_merge elv_merge(struct request_queue *q, struct request **req, __rq = elv_rqhash_find(q, bio->bi_iter.bi_sector); if (__rq && elv_bio_merge_ok(__rq, bio)) { *req = __rq; + + if (blk_discard_mergable(__rq)) + return ELEVATOR_DISCARD_MERGE; return ELEVATOR_BACK_MERGE; } diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 19c6922e85f1..6d6dda5cfffa 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -452,6 +452,8 @@ static int dd_request_merge(struct request_queue *q, struct request **rq, if (elv_bio_merge_ok(__rq, bio)) { *rq = __rq; + if (blk_discard_mergable(__rq)) + return ELEVATOR_DISCARD_MERGE; return ELEVATOR_FRONT_MERGE; } } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8cc766743270..308c2d8cdca1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1409,6 +1409,22 @@ static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector return offset << SECTOR_SHIFT; } +/* + * Two cases of handling DISCARD merge: + * If max_discard_segments > 1, the driver takes every bio + * as a range and send them to controller together. The ranges + * needn't to be contiguous. + * Otherwise, the bios/requests will be handled as same as + * others which should be contiguous. + */ +static inline bool blk_discard_mergable(struct request *req) +{ + if (req_op(req) == REQ_OP_DISCARD && + queue_max_discard_segments(req->q) > 1) + return true; + return false; +} + static inline int bdev_discard_alignment(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); From e14e3856e94dd389f105de594974e1ade80a613a Mon Sep 17 00:00:00 2001 From: Tan Tee Min Date: Fri, 29 Apr 2022 19:58:07 +0800 Subject: [PATCH 61/69] net: stmmac: disable Split Header (SPH) for Intel platforms commit 47f753c1108e287edb3e27fad8a7511a9d55578e upstream. Based on DesignWare Ethernet QoS datasheet, we are seeing the limitation of Split Header (SPH) feature is not supported for Ipv4 fragmented packet. This SPH limitation will cause ping failure when the packets size exceed the MTU size. For example, the issue happens once the basic ping packet size is larger than the configured MTU size and the data is lost inside the fragmented packet, replaced by zeros/corrupted values, and leads to ping fail. So, disable the Split Header for Intel platforms. v2: Add fixes tag in commit message. Fixes: 67afd6d1cfdf("net: stmmac: Add Split Header support and enable it in XGMAC cores") Cc: # 5.10.x Suggested-by: Ong, Boon Leong Signed-off-by: Mohammad Athari Bin Ismail Signed-off-by: Wong Vee Khee Signed-off-by: Tan Tee Min Signed-off-by: David S. Miller Signed-off-by: Tan Tee Min Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c | 1 + include/linux/stmmac.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 9cbc0179d24e..9931724c4727 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4531,7 +4531,7 @@ int stmmac_dvr_probe(struct device *device, dev_info(priv->device, "TSO feature enabled\n"); } - if (priv->dma_cap.sphen) { + if (priv->dma_cap.sphen && !priv->plat->sph_disable) { ndev->hw_features |= NETIF_F_GRO; priv->sph = true; dev_info(priv->device, "SPH feature enabled\n"); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index fceb0f9e797f..0edcf3f704b7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -119,6 +119,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev, plat->has_gmac4 = 1; plat->force_sf_dma_mode = 0; plat->tso_en = 1; + plat->sph_disable = 1; plat->rx_sched_algorithm = MTL_RX_ALGORITHM_SP; diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index dc60d03c4b60..0b35747c9837 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -179,5 +179,6 @@ struct plat_stmmacenet_data { int mac_port_sel_speed; bool en_tx_lpi_clockgating; int has_xgmac; + bool sph_disable; }; #endif From b38cf3cb17dfde98ffe5913372d0d8eafede9e57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thi=C3=A9baud=20Weksteen?= Date: Mon, 2 May 2022 10:49:52 +1000 Subject: [PATCH 62/69] firmware_loader: use kernel credentials when reading firmware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 581dd69830341d299b0c097fc366097ab497d679 upstream. Device drivers may decide to not load firmware when probed to avoid slowing down the boot process should the firmware filesystem not be available yet. In this case, the firmware loading request may be done when a device file associated with the driver is first accessed. The credentials of the userspace process accessing the device file may be used to validate access to the firmware files requested by the driver. Ensure that the kernel assumes the responsibility of reading the firmware. This was observed on Android for a graphic driver loading their firmware when the device file (e.g. /dev/mali0) was first opened by userspace (i.e. surfaceflinger). The security context of surfaceflinger was used to validate the access to the firmware file (e.g. /vendor/firmware/mali.bin). Previously, Android configurations were not setting up the firmware_class.path command line argument and were relying on the userspace fallback mechanism. In this case, the security context of the userspace daemon (i.e. ueventd) was consistently used to read firmware files. More Android devices are now found to set firmware_class.path which gives the kernel the opportunity to read the firmware directly (via kernel_read_file_from_path_initns). In this scenario, the current process credentials were used, even if unrelated to the loading of the firmware file. Signed-off-by: Thiébaud Weksteen Cc: # 5.10 Reviewed-by: Paul Moore Acked-by: Luis Chamberlain Link: https://lore.kernel.org/r/20220502004952.3970800-1-tweek@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/firmware_loader/main.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c index 4f6b76bd957e..12ab50d29548 100644 --- a/drivers/base/firmware_loader/main.c +++ b/drivers/base/firmware_loader/main.c @@ -761,6 +761,8 @@ _request_firmware(const struct firmware **firmware_p, const char *name, enum fw_opt opt_flags) { struct firmware *fw = NULL; + struct cred *kern_cred = NULL; + const struct cred *old_cred; int ret; if (!firmware_p) @@ -776,6 +778,18 @@ _request_firmware(const struct firmware **firmware_p, const char *name, if (ret <= 0) /* error or already assigned */ goto out; + /* + * We are about to try to access the firmware file. Because we may have been + * called by a driver when serving an unrelated request from userland, we use + * the kernel credentials to read the file. + */ + kern_cred = prepare_kernel_cred(NULL); + if (!kern_cred) { + ret = -ENOMEM; + goto out; + } + old_cred = override_creds(kern_cred); + ret = fw_get_filesystem_firmware(device, fw->priv, "", NULL); #ifdef CONFIG_FW_LOADER_COMPRESS if (ret == -ENOENT) @@ -792,6 +806,9 @@ _request_firmware(const struct firmware **firmware_p, const char *name, } else ret = assign_fw(fw, device, opt_flags); + revert_creds(old_cred); + put_cred(kern_cred); + out: if (ret < 0) { fw_abort_batch_reqs(fw); From 060f38b1dfb499d0c02d14ea793a16f4e63c51b6 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Thu, 27 Jan 2022 16:10:51 +0200 Subject: [PATCH 63/69] ARM: dts: imx7: Use audio_mclk_post_div instead audio_mclk_root_clk commit 4cb7df64c732b2b9918424095c11660c2a8c4a33 upstream. The audio_mclk_root_clk was added as a gate with the CCGR121 (0x4790), but according to the reference manual, there is no such gate. Moreover, the consumer driver of the mentioned clock might gate it and leave the ECSPI2 (the true owner of that gate) hanging. So lets use the audio_mclk_post_div, which is the parent. Signed-off-by: Abel Vesa Signed-off-by: Shawn Guo [ps: backport to 5.4] Signed-off-by: Philippe Schenker Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/imx7-colibri.dtsi | 4 ++-- arch/arm/boot/dts/imx7-mba7.dtsi | 2 +- arch/arm/boot/dts/imx7d-nitrogen7.dts | 2 +- arch/arm/boot/dts/imx7d-pico-hobbit.dts | 4 ++-- arch/arm/boot/dts/imx7d-pico-pi.dts | 4 ++-- arch/arm/boot/dts/imx7d-sdb.dts | 2 +- arch/arm/boot/dts/imx7s-warp.dts | 4 ++-- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/arm/boot/dts/imx7-colibri.dtsi b/arch/arm/boot/dts/imx7-colibri.dtsi index 8bba03de51ad..1e61e3862106 100644 --- a/arch/arm/boot/dts/imx7-colibri.dtsi +++ b/arch/arm/boot/dts/imx7-colibri.dtsi @@ -77,7 +77,7 @@ dailink_master: simple-audio-card,codec { sound-dai = <&codec>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; }; }; }; @@ -152,7 +152,7 @@ compatible = "fsl,sgtl5000"; #sound-dai-cells = <0>; reg = <0x0a>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_sai1_mclk>; VDDA-supply = <®_module_3v3_avdd>; diff --git a/arch/arm/boot/dts/imx7-mba7.dtsi b/arch/arm/boot/dts/imx7-mba7.dtsi index 50abf18ad30b..887497e3bb4b 100644 --- a/arch/arm/boot/dts/imx7-mba7.dtsi +++ b/arch/arm/boot/dts/imx7-mba7.dtsi @@ -250,7 +250,7 @@ tlv320aic32x4: audio-codec@18 { compatible = "ti,tlv320aic32x4"; reg = <0x18>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; clock-names = "mclk"; ldoin-supply = <®_audio_3v3>; iov-supply = <®_audio_3v3>; diff --git a/arch/arm/boot/dts/imx7d-nitrogen7.dts b/arch/arm/boot/dts/imx7d-nitrogen7.dts index 6b4acea1ef79..ecfa179ccab1 100644 --- a/arch/arm/boot/dts/imx7d-nitrogen7.dts +++ b/arch/arm/boot/dts/imx7d-nitrogen7.dts @@ -284,7 +284,7 @@ codec: wm8960@1a { compatible = "wlf,wm8960"; reg = <0x1a>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; clock-names = "mclk"; wlf,shared-lrclk; }; diff --git a/arch/arm/boot/dts/imx7d-pico-hobbit.dts b/arch/arm/boot/dts/imx7d-pico-hobbit.dts index 7b2198a9372c..d917dc4f2f22 100644 --- a/arch/arm/boot/dts/imx7d-pico-hobbit.dts +++ b/arch/arm/boot/dts/imx7d-pico-hobbit.dts @@ -31,7 +31,7 @@ dailink_master: simple-audio-card,codec { sound-dai = <&sgtl5000>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; }; }; }; @@ -41,7 +41,7 @@ #sound-dai-cells = <0>; reg = <0x0a>; compatible = "fsl,sgtl5000"; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; VDDA-supply = <®_2p5v>; VDDIO-supply = <®_vref_1v8>; }; diff --git a/arch/arm/boot/dts/imx7d-pico-pi.dts b/arch/arm/boot/dts/imx7d-pico-pi.dts index 70bea95c06d8..f263e391e24c 100644 --- a/arch/arm/boot/dts/imx7d-pico-pi.dts +++ b/arch/arm/boot/dts/imx7d-pico-pi.dts @@ -31,7 +31,7 @@ dailink_master: simple-audio-card,codec { sound-dai = <&sgtl5000>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; }; }; }; @@ -41,7 +41,7 @@ #sound-dai-cells = <0>; reg = <0x0a>; compatible = "fsl,sgtl5000"; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; VDDA-supply = <®_2p5v>; VDDIO-supply = <®_vref_1v8>; }; diff --git a/arch/arm/boot/dts/imx7d-sdb.dts b/arch/arm/boot/dts/imx7d-sdb.dts index 869efbc4af42..a97cda17e484 100644 --- a/arch/arm/boot/dts/imx7d-sdb.dts +++ b/arch/arm/boot/dts/imx7d-sdb.dts @@ -356,7 +356,7 @@ codec: wm8960@1a { compatible = "wlf,wm8960"; reg = <0x1a>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; clock-names = "mclk"; wlf,shared-lrclk; }; diff --git a/arch/arm/boot/dts/imx7s-warp.dts b/arch/arm/boot/dts/imx7s-warp.dts index d6b4888fa686..e035dd5bf4f6 100644 --- a/arch/arm/boot/dts/imx7s-warp.dts +++ b/arch/arm/boot/dts/imx7s-warp.dts @@ -75,7 +75,7 @@ dailink_master: simple-audio-card,codec { sound-dai = <&codec>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; }; }; }; @@ -232,7 +232,7 @@ #sound-dai-cells = <0>; reg = <0x0a>; compatible = "fsl,sgtl5000"; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_sai1_mclk>; VDDA-supply = <&vgen4_reg>; From b2f140a9f980806f572d672e1780acea66b9a25c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 28 Mar 2022 11:37:05 -0700 Subject: [PATCH 64/69] Reinstate some of "swiotlb: rework "fix info leak with DMA_FROM_DEVICE"" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 901c7280ca0d5e2b4a8929fbe0bfb007ac2a6544 upstream. Halil Pasic points out [1] that the full revert of that commit (revert in bddac7c1e02b), and that a partial revert that only reverts the problematic case, but still keeps some of the cleanups is probably better.  And that partial revert [2] had already been verified by Oleksandr Natalenko to also fix the issue, I had just missed that in the long discussion. So let's reinstate the cleanups from commit aa6f8dcbab47 ("swiotlb: rework "fix info leak with DMA_FROM_DEVICE""), and effectively only revert the part that caused problems. Link: https://lore.kernel.org/all/20220328013731.017ae3e3.pasic@linux.ibm.com/ [1] Link: https://lore.kernel.org/all/20220324055732.GB12078@lst.de/ [2] Link: https://lore.kernel.org/all/4386660.LvFx2qVVIh@natalenko.name/ [3] Suggested-by: Halil Pasic Tested-by: Oleksandr Natalenko Cc: Christoph Hellwig" Signed-off-by: Linus Torvalds [OP: backport to 5.4: adjusted context] Signed-off-by: Ovidiu Panait Signed-off-by: Greg Kroah-Hartman --- Documentation/DMA-attributes.txt | 10 ---------- include/linux/dma-mapping.h | 8 -------- kernel/dma/swiotlb.c | 13 ++++++++----- 3 files changed, 8 insertions(+), 23 deletions(-) diff --git a/Documentation/DMA-attributes.txt b/Documentation/DMA-attributes.txt index 7193505a98ca..8f8d97f65d73 100644 --- a/Documentation/DMA-attributes.txt +++ b/Documentation/DMA-attributes.txt @@ -156,13 +156,3 @@ accesses to DMA buffers in both privileged "supervisor" and unprivileged subsystem that the buffer is fully accessible at the elevated privilege level (and ideally inaccessible or at least read-only at the lesser-privileged levels). - -DMA_ATTR_PRIVILEGED -------------------- - -Some advanced peripherals such as remote processors and GPUs perform -accesses to DMA buffers in both privileged "supervisor" and unprivileged -"user" modes. This attribute is used to indicate to the DMA-mapping -subsystem that the buffer is fully accessible at the elevated privilege -level (and ideally inaccessible or at least read-only at the -lesser-privileged levels). diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index da90f20e11c1..4d450672b7d6 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -70,14 +70,6 @@ */ #define DMA_ATTR_PRIVILEGED (1UL << 9) -/* - * This is a hint to the DMA-mapping subsystem that the device is expected - * to overwrite the entire mapped size, thus the caller does not require any - * of the previous buffer contents to be preserved. This allows - * bounce-buffering implementations to optimise DMA_FROM_DEVICE transfers. - */ -#define DMA_ATTR_OVERWRITE (1UL << 10) - /* * A dma_addr_t can hold any valid DMA or bus address for the platform. * It can be given to a device to use as a DMA source or target. A CPU cannot diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index f17b771856d1..913cb71198af 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -571,11 +571,14 @@ found: */ for (i = 0; i < nslots; i++) io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT); - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - (!(attrs & DMA_ATTR_OVERWRITE) || dir == DMA_TO_DEVICE || - dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE); - + /* + * When dir == DMA_FROM_DEVICE we could omit the copy from the orig + * to the tlb buffer, if we knew for sure the device will + * overwirte the entire current content. But we don't. Thus + * unconditional bounce may prevent leaking swiotlb content (i.e. + * kernel memory) to user-space. + */ + swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE); return tlb_addr; } From a12884ff43400e94b109bf4aa9fa5eed47ef5241 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 9 Apr 2020 09:00:01 +0200 Subject: [PATCH 65/69] x86/xen: fix booting 32-bit pv guest commit d6f34f4c6b4a962eb7a86c923fea206f866a40be upstream. Commit 2f62f36e62daec ("x86/xen: Make the boot CPU idle task reliable") introduced a regression for booting 32 bit Xen PV guests: the address of the initial stack needs to be a virtual one. Fixes: 2f62f36e62daec ("x86/xen: Make the boot CPU idle task reliable") Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20200409070001.16675-1-jgross@suse.com Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/xen-head.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 591544b4b85a..142da8547480 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -38,7 +38,7 @@ ENTRY(startup_xen) #ifdef CONFIG_X86_64 mov initial_stack(%rip), %rsp #else - mov pa(initial_stack), %esp + mov initial_stack, %esp #endif #ifdef CONFIG_X86_64 From 10a221e2d3d8cf53be4e55c92257197688cfe54f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 24 Jun 2021 11:41:00 +0200 Subject: [PATCH 66/69] x86/xen: Mark cpu_bringup_and_idle() as dead_end_function commit 9af9dcf11bda3e2c0e24c1acaacb8685ad974e93 upstream. The asm_cpu_bringup_and_idle() function is required to push the return value on the stack in order to make ORC happy, but the only reason objtool doesn't complain is because of a happy accident. The thing is that asm_cpu_bringup_and_idle() doesn't return, so validate_branch() never terminates and falls through to the next function, which in the normal case is the hypercall_page. And that, as it happens, is 4095 NOPs and a RET. Make asm_cpu_bringup_and_idle() terminate on it's own, by making the function it calls as a dead-end. This way we no longer rely on what code happens to come after. Fixes: c3881eb58d56 ("x86/xen: Make the secondary CPU idle tasks reliable") Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Juergen Gross Reviewed-by: Miroslav Benes Link: https://lore.kernel.org/r/20210624095147.693801717@infradead.org Signed-off-by: Greg Kroah-Hartman --- tools/objtool/check.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 06aaf04e629c..bae6b261481d 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -144,6 +144,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, "usercopy_abort", "machine_real_restart", "rewind_stack_do_exit", + "cpu_bringup_and_idle", }; if (!func) From ef5374d532caa1dd7da22093110fc2aeae956a08 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sat, 14 May 2022 10:31:47 +0800 Subject: [PATCH 67/69] i2c: mt7621: fix missing clk_disable_unprepare() on error in mtk_i2c_probe() [ Upstream commit a2537c98a8a3b57002e54a262d180b9490bc7190 ] Fix the missing clk_disable_unprepare() before return from mtk_i2c_probe() in the error handling case. Fixes: d04913ec5f89 ("i2c: mt7621: Add MediaTek MT7621/7628/7688 I2C driver") Signed-off-by: Yang Yingliang Reviewed-by: Stefan Roese Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-mt7621.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-mt7621.c b/drivers/i2c/busses/i2c-mt7621.c index 62df8379bc89..65e72101b393 100644 --- a/drivers/i2c/busses/i2c-mt7621.c +++ b/drivers/i2c/busses/i2c-mt7621.c @@ -304,7 +304,8 @@ static int mtk_i2c_probe(struct platform_device *pdev) if (i2c->bus_freq == 0) { dev_warn(i2c->dev, "clock-frequency 0 not supported\n"); - return -EINVAL; + ret = -EINVAL; + goto err_disable_clk; } adap = &i2c->adap; @@ -322,10 +323,15 @@ static int mtk_i2c_probe(struct platform_device *pdev) ret = i2c_add_adapter(adap); if (ret < 0) - return ret; + goto err_disable_clk; dev_info(&pdev->dev, "clock %u kHz\n", i2c->bus_freq / 1000); + return 0; + +err_disable_clk: + clk_disable_unprepare(i2c->clk); + return ret; } From dba1941f5bc3de6b460685155b89ae1182824fc8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 21 May 2022 08:18:28 +0100 Subject: [PATCH 68/69] afs: Fix afs_getattr() to refetch file status if callback break occurred [ Upstream commit 2aeb8c86d49967552394d5e723f87454cb53f501 ] If a callback break occurs (change notification), afs_getattr() needs to issue an FS.FetchStatus RPC operation to update the status of the file being examined by the stat-family of system calls. Fix afs_getattr() to do this if AFS_VNODE_CB_PROMISED has been cleared on a vnode by a callback break. Skip this if AT_STATX_DONT_SYNC is set. This can be tested by appending to a file on one AFS client and then using "stat -L" to examine its length on a machine running kafs. This can also be watched through tracing on the kafs machine. The callback break is seen: kworker/1:1-46 [001] ..... 978.910812: afs_cb_call: c=0000005f YFSCB.CallBack kworker/1:1-46 [001] ...1. 978.910829: afs_cb_break: 100058:23b4c:242d2c2 b=2 s=1 break-cb kworker/1:1-46 [001] ..... 978.911062: afs_call_done: c=0000005f ret=0 ab=0 [0000000082994ead] And then the stat command generated no traffic if unpatched, but with this change a call to fetch the status can be observed: stat-4471 [000] ..... 986.744122: afs_make_fs_call: c=000000ab 100058:023b4c:242d2c2 YFS.FetchStatus stat-4471 [000] ..... 986.745578: afs_call_done: c=000000ab ret=0 ab=0 [0000000087fc8c84] Fixes: 08e0e7c82eea ("[AF_RXRPC]: Make the in-kernel AFS filesystem use AF_RXRPC.") Reported-by: Markus Suvanto Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Tested-by: Markus Suvanto Tested-by: kafs-testing+fedora34_64checkkafs-build-496@auristor.com Link: https://bugzilla.kernel.org/show_bug.cgi?id=216010 Link: https://lore.kernel.org/r/165308359800.162686.14122417881564420962.stgit@warthog.procyon.org.uk/ # v1 Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/afs/inode.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 4f58b28a1edd..90eac3ec01cb 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -734,10 +734,22 @@ int afs_getattr(const struct path *path, struct kstat *stat, { struct inode *inode = d_inode(path->dentry); struct afs_vnode *vnode = AFS_FS_I(inode); - int seq = 0; + struct key *key; + int ret, seq = 0; _enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation); + if (!(query_flags & AT_STATX_DONT_SYNC) && + !test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { + key = afs_request_key(vnode->volume->cell); + if (IS_ERR(key)) + return PTR_ERR(key); + ret = afs_validate(vnode, key); + key_put(key); + if (ret < 0) + return ret; + } + do { read_seqbegin_or_lock(&vnode->cb_lock, &seq); generic_fillattr(inode, stat); From 04b092e4a01a3488e762897e2d29f85eda2c6a60 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 25 May 2022 09:14:39 +0200 Subject: [PATCH 69/69] Linux 5.4.196 Link: https://lore.kernel.org/r/20220523165802.500642349@linuxfoundation.org Tested-by: Florian Fainelli Tested-by: Shuah Khan Tested-by: Linux Kernel Functional Testing Tested-by: Sudip Mukherjee Tested-by: Guenter Roeck Tested-by: Khalid Masum Tested-by: Hulk Robot Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4297d0107bd6..c064ed925552 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 4 -SUBLEVEL = 195 +SUBLEVEL = 196 EXTRAVERSION = NAME = Kleptomaniac Octopus