diff --git a/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml index 2ab4642679c0..55c4f94a14d1 100644 --- a/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml +++ b/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml @@ -148,7 +148,7 @@ allOf: items: - const: oscclk - const: dout_clkcmu_fsys1_bus - - const: dout_clkcmu_fsys1_mmc_card + - const: gout_clkcmu_fsys1_mmc_card - const: dout_clkcmu_fsys1_usbdrd - if: diff --git a/arch/arm/boot/dts/at91rm9200.dtsi b/arch/arm/boot/dts/at91rm9200.dtsi index 7a113325abb9..6f9004ebf424 100644 --- a/arch/arm/boot/dts/at91rm9200.dtsi +++ b/arch/arm/boot/dts/at91rm9200.dtsi @@ -666,7 +666,7 @@ compatible = "atmel,at91rm9200-udc"; reg = <0xfffb0000 0x4000>; interrupts = <11 IRQ_TYPE_LEVEL_HIGH 2>; - clocks = <&pmc PMC_TYPE_PERIPHERAL 11>, <&pmc PMC_TYPE_SYSTEM 2>; + clocks = <&pmc PMC_TYPE_PERIPHERAL 11>, <&pmc PMC_TYPE_SYSTEM 1>; clock-names = "pclk", "hclk"; status = "disabled"; }; diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index d6cf535d8352..439e2bc5d5d8 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -14,16 +14,8 @@ #ifdef CONFIG_EFI extern void efi_init(void); - -bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg); #else #define efi_init() - -static inline -bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg) -{ - return false; -} #endif int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S index 67babd5f04c2..75691a2641c1 100644 --- a/arch/arm64/kernel/efi-rt-wrapper.S +++ b/arch/arm64/kernel/efi-rt-wrapper.S @@ -6,7 +6,7 @@ #include SYM_FUNC_START(__efi_rt_asm_wrapper) - stp x29, x30, [sp, #-112]! + stp x29, x30, [sp, #-32]! mov x29, sp /* @@ -16,20 +16,6 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) */ stp x1, x18, [sp, #16] - /* - * Preserve all callee saved registers and record the stack pointer - * value in a per-CPU variable so we can recover from synchronous - * exceptions occurring while running the firmware routines. - */ - stp x19, x20, [sp, #32] - stp x21, x22, [sp, #48] - stp x23, x24, [sp, #64] - stp x25, x26, [sp, #80] - stp x27, x28, [sp, #96] - - adr_this_cpu x8, __efi_rt_asm_recover_sp, x9 - str x29, [x8] - /* * We are lucky enough that no EFI runtime services take more than * 5 arguments, so all are passed in registers rather than via the @@ -45,7 +31,7 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) ldp x1, x2, [sp, #16] cmp x2, x18 - ldp x29, x30, [sp], #112 + ldp x29, x30, [sp], #32 b.ne 0f ret 0: @@ -59,18 +45,3 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) mov x18, x2 b efi_handle_corrupted_x18 // tail call SYM_FUNC_END(__efi_rt_asm_wrapper) - -SYM_FUNC_START(__efi_rt_asm_recover) - ldr_this_cpu x8, __efi_rt_asm_recover_sp, x9 - mov sp, x8 - - ldp x0, x18, [sp, #16] - ldp x19, x20, [sp, #32] - ldp x21, x22, [sp, #48] - ldp x23, x24, [sp, #64] - ldp x25, x26, [sp, #80] - ldp x27, x28, [sp, #96] - ldp x29, x30, [sp], #112 - - b efi_handle_runtime_exception -SYM_FUNC_END(__efi_rt_asm_recover) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index ee53f2a0aa03..a908a37f0367 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -9,7 +9,6 @@ #include #include -#include #include @@ -145,28 +144,3 @@ asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f) pr_err_ratelimited(FW_BUG "register x18 corrupted by EFI %s\n", f); return s; } - -asmlinkage DEFINE_PER_CPU(u64, __efi_rt_asm_recover_sp); - -asmlinkage efi_status_t __efi_rt_asm_recover(void); - -asmlinkage efi_status_t efi_handle_runtime_exception(const char *f) -{ - pr_err(FW_BUG "Synchronous exception occurred in EFI runtime service %s()\n", f); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return EFI_ABORTED; -} - -bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg) -{ - /* Check whether the exception occurred while running the firmware */ - if (current_work() != &efi_rts_work.work || regs->pc >= TASK_SIZE_64) - return false; - - pr_err(FW_BUG "Unable to handle %s in EFI runtime service\n", msg); - add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); - dump_stack(); - - regs->pc = (u64)__efi_rt_asm_recover; - return true; -} diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 3e9cf9826417..5b391490e045 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include @@ -392,9 +391,6 @@ static void __do_kernel_fault(unsigned long addr, unsigned long esr, msg = "paging request"; } - if (efi_runtime_fixup_exception(regs, msg)) - return; - die_kernel_fault(msg, addr, esr, regs); } diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index aa0e0e0d4ee5..79d5bfd913e0 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -490,6 +490,7 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd) return pmd; } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return !!(pmd_val(pmd) & _PAGE_ACCESSED); diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 6caec386ad2f..4678627673df 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -622,6 +622,7 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd) return pmd; } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return !!(pmd_val(pmd) & _PAGE_ACCESSED); diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index fa78595a6089..593cf09264d8 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -317,9 +317,9 @@ config SMP config NR_CPUS int "Maximum number of CPUs (2-512)" depends on SMP - range 2 512 if !SBI_V01 - range 2 32 if SBI_V01 && 32BIT - range 2 64 if SBI_V01 && 64BIT + range 2 512 if !RISCV_SBI_V01 + range 2 32 if RISCV_SBI_V01 && 32BIT + range 2 64 if RISCV_SBI_V01 && 64BIT default "32" if 32BIT default "64" if 64BIT diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index 1b471ff73178..816e753de636 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -23,6 +23,7 @@ #define REG_L __REG_SEL(ld, lw) #define REG_S __REG_SEL(sd, sw) #define REG_SC __REG_SEL(sc.d, sc.w) +#define REG_AMOSWAP_AQ __REG_SEL(amoswap.d.aq, amoswap.w.aq) #define REG_ASM __REG_SEL(.dword, .word) #define SZREG __REG_SEL(8, 4) #define LGREG __REG_SEL(3, 2) diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h index f74879a8f1ea..e229d7be4b66 100644 --- a/arch/riscv/include/asm/efi.h +++ b/arch/riscv/include/asm/efi.h @@ -10,6 +10,7 @@ #include #include #include +#include #ifdef CONFIG_EFI extern void efi_init(void); @@ -20,7 +21,10 @@ extern void efi_init(void); int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); -#define arch_efi_call_virt_setup() efi_virtmap_load() +#define arch_efi_call_virt_setup() ({ \ + sync_kernel_mappings(efi_mm.pgd); \ + efi_virtmap_load(); \ + }) #define arch_efi_call_virt_teardown() efi_virtmap_unload() #define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE) diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index 947f23d7b6af..59dc12b5b7e8 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -127,6 +127,13 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) #define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d) #endif /* __PAGETABLE_PMD_FOLDED */ +static inline void sync_kernel_mappings(pgd_t *pgd) +{ + memcpy(pgd + USER_PTRS_PER_PGD, + init_mm.pgd + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); +} + static inline pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *pgd; @@ -135,9 +142,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) if (likely(pgd != NULL)) { memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); /* Copy kernel mappings */ - memcpy(pgd + USER_PTRS_PER_PGD, - init_mm.pgd + USER_PTRS_PER_PGD, - (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + sync_kernel_mappings(pgd); } return pgd; } diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 7ec936910a96..92ec2d9d7273 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -600,6 +600,7 @@ static inline int pmd_dirty(pmd_t pmd) return pte_dirty(pmd_pte(pmd)); } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return pte_young(pmd_pte(pmd)); diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h index d3443be7eedc..3831b638ecab 100644 --- a/arch/riscv/include/asm/smp.h +++ b/arch/riscv/include/asm/smp.h @@ -50,6 +50,9 @@ void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops); /* Clear IPI for current CPU */ void riscv_clear_ipi(void); +/* Check other CPUs stop or not */ +bool smp_crash_stop_failed(void); + /* Secondary hart entry */ asmlinkage void smp_callin(void); diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index b9eda3fcbd6d..186abd146eaf 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -404,6 +404,19 @@ handle_syscall_trace_exit: #ifdef CONFIG_VMAP_STACK handle_kernel_stack_overflow: + /* + * Takes the psuedo-spinlock for the shadow stack, in case multiple + * harts are concurrently overflowing their kernel stacks. We could + * store any value here, but since we're overflowing the kernel stack + * already we only have SP to use as a scratch register. So we just + * swap in the address of the spinlock, as that's definately non-zero. + * + * Pairs with a store_release in handle_bad_stack(). + */ +1: la sp, spin_shadow_stack + REG_AMOSWAP_AQ sp, sp, (sp) + bnez sp, 1b + la sp, shadow_stack addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c index ee79e6839b86..2d139b724bc8 100644 --- a/arch/riscv/kernel/machine_kexec.c +++ b/arch/riscv/kernel/machine_kexec.c @@ -15,6 +15,8 @@ #include /* For unreachable() */ #include /* For cpu_down() */ #include +#include +#include /* * kexec_image_info - Print received image details @@ -138,20 +140,35 @@ void machine_shutdown(void) #endif } -/* Override the weak function in kernel/panic.c */ -void crash_smp_send_stop(void) +static void machine_kexec_mask_interrupts(void) { - static int cpus_stopped; + unsigned int i; + struct irq_desc *desc; - /* - * This function can be called twice in panic path, but obviously - * we execute this only once. - */ - if (cpus_stopped) - return; + for_each_irq_desc(i, desc) { + struct irq_chip *chip; + int ret; - smp_send_stop(); - cpus_stopped = 1; + chip = irq_desc_get_chip(desc); + if (!chip) + continue; + + /* + * First try to remove the active state. If this + * fails, try to EOI the interrupt. + */ + ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); + + if (ret && irqd_irq_inprogress(&desc->irq_data) && + chip->irq_eoi) + chip->irq_eoi(&desc->irq_data); + + if (chip->irq_mask) + chip->irq_mask(&desc->irq_data); + + if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) + chip->irq_disable(&desc->irq_data); + } } /* @@ -169,6 +186,8 @@ machine_crash_shutdown(struct pt_regs *regs) crash_smp_send_stop(); crash_save_cpu(regs, smp_processor_id()); + machine_kexec_mask_interrupts(); + pr_info("Starting crashdump kernel...\n"); } @@ -195,6 +214,11 @@ machine_kexec(struct kimage *image) void *control_code_buffer = page_address(image->control_code_page); riscv_kexec_method kexec_method = NULL; +#ifdef CONFIG_SMP + WARN(smp_crash_stop_failed(), + "Some CPUs may be stale, kdump will be unreliable.\n"); +#endif + if (image->type != KEXEC_TYPE_CRASH) kexec_method = control_code_buffer; else diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 67ec1fadcfe2..86acd690d529 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -322,10 +322,11 @@ subsys_initcall(topology_init); void free_initmem(void) { - if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) - set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end), - IS_ENABLED(CONFIG_64BIT) ? - set_memory_rw : set_memory_rw_nx); + if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) { + set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end), set_memory_rw_nx); + if (IS_ENABLED(CONFIG_64BIT)) + set_kernel_memory(__init_begin, __init_end, set_memory_nx); + } free_initmem_default(POISON_FREE_INITMEM); } diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index 760a64518c58..8c3b59f1f9b8 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -22,11 +23,13 @@ #include #include #include +#include enum ipi_message_type { IPI_RESCHEDULE, IPI_CALL_FUNC, IPI_CPU_STOP, + IPI_CPU_CRASH_STOP, IPI_IRQ_WORK, IPI_TIMER, IPI_MAX @@ -71,6 +74,32 @@ static void ipi_stop(void) wait_for_interrupt(); } +#ifdef CONFIG_KEXEC_CORE +static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0); + +static inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) +{ + crash_save_cpu(regs, cpu); + + atomic_dec(&waiting_for_crash_ipi); + + local_irq_disable(); + +#ifdef CONFIG_HOTPLUG_CPU + if (cpu_has_hotplug(cpu)) + cpu_ops[cpu]->cpu_stop(); +#endif + + for(;;) + wait_for_interrupt(); +} +#else +static inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) +{ + unreachable(); +} +#endif + static const struct riscv_ipi_ops *ipi_ops __ro_after_init; void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops) @@ -124,8 +153,9 @@ void arch_irq_work_raise(void) void handle_IPI(struct pt_regs *regs) { - unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits; - unsigned long *stats = ipi_data[smp_processor_id()].stats; + unsigned int cpu = smp_processor_id(); + unsigned long *pending_ipis = &ipi_data[cpu].bits; + unsigned long *stats = ipi_data[cpu].stats; riscv_clear_ipi(); @@ -154,6 +184,10 @@ void handle_IPI(struct pt_regs *regs) ipi_stop(); } + if (ops & (1 << IPI_CPU_CRASH_STOP)) { + ipi_cpu_crash_stop(cpu, get_irq_regs()); + } + if (ops & (1 << IPI_IRQ_WORK)) { stats[IPI_IRQ_WORK]++; irq_work_run(); @@ -176,6 +210,7 @@ static const char * const ipi_names[] = { [IPI_RESCHEDULE] = "Rescheduling interrupts", [IPI_CALL_FUNC] = "Function call interrupts", [IPI_CPU_STOP] = "CPU stop interrupts", + [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts", [IPI_IRQ_WORK] = "IRQ work interrupts", [IPI_TIMER] = "Timer broadcast interrupts", }; @@ -235,6 +270,64 @@ void smp_send_stop(void) cpumask_pr_args(cpu_online_mask)); } +#ifdef CONFIG_KEXEC_CORE +/* + * The number of CPUs online, not counting this CPU (which may not be + * fully online and so not counted in num_online_cpus()). + */ +static inline unsigned int num_other_online_cpus(void) +{ + unsigned int this_cpu_online = cpu_online(smp_processor_id()); + + return num_online_cpus() - this_cpu_online; +} + +void crash_smp_send_stop(void) +{ + static int cpus_stopped; + cpumask_t mask; + unsigned long timeout; + + /* + * This function can be called twice in panic path, but obviously + * we execute this only once. + */ + if (cpus_stopped) + return; + + cpus_stopped = 1; + + /* + * If this cpu is the only one alive at this point in time, online or + * not, there are no stop messages to be sent around, so just back out. + */ + if (num_other_online_cpus() == 0) + return; + + cpumask_copy(&mask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + + atomic_set(&waiting_for_crash_ipi, num_other_online_cpus()); + + pr_crit("SMP: stopping secondary CPUs\n"); + send_ipi_mask(&mask, IPI_CPU_CRASH_STOP); + + /* Wait up to one second for other CPUs to stop */ + timeout = USEC_PER_SEC; + while ((atomic_read(&waiting_for_crash_ipi) > 0) && timeout--) + udelay(1); + + if (atomic_read(&waiting_for_crash_ipi) > 0) + pr_warn("SMP: failed to stop secondary CPUs %*pbl\n", + cpumask_pr_args(&mask)); +} + +bool smp_crash_stop_failed(void) +{ + return (atomic_read(&waiting_for_crash_ipi) > 0); +} +#endif + void smp_send_reschedule(int cpu) { send_ipi_single(cpu, IPI_RESCHEDULE); diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index f3e96d60a2ff..7abd8e4c4df6 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -221,11 +221,29 @@ asmlinkage unsigned long get_overflow_stack(void) OVERFLOW_STACK_SIZE; } +/* + * A pseudo spinlock to protect the shadow stack from being used by multiple + * harts concurrently. This isn't a real spinlock because the lock side must + * be taken without a valid stack and only a single register, it's only taken + * while in the process of panicing anyway so the performance and error + * checking a proper spinlock gives us doesn't matter. + */ +unsigned long spin_shadow_stack; + asmlinkage void handle_bad_stack(struct pt_regs *regs) { unsigned long tsk_stk = (unsigned long)current->stack; unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack); + /* + * We're done with the shadow stack by this point, as we're on the + * overflow stack. Tell any other concurrent overflowing harts that + * they can proceed with panicing by releasing the pseudo-spinlock. + * + * This pairs with an amoswap.aq in handle_kernel_stack_overflow. + */ + smp_store_release(&spin_shadow_stack, 0); + console_verbose(); pr_emerg("Insufficient stack space to handle exception!\n"); diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index db6548509bb3..06e6b27f3bcc 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -17,6 +17,7 @@ vdso-syms += flush_icache obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o ccflags-y := -fno-stack-protector +ccflags-y += -DDISABLE_BRANCH_PROFILING ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index f1cb9391190d..11e901286414 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -763,6 +763,7 @@ static inline int pmd_dirty(pmd_t pmd) return (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0; } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0; diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index a779418ceba9..3bc9736bddb1 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -693,6 +693,7 @@ static inline unsigned long pmd_dirty(pmd_t pmd) return pte_dirty(pte); } +#define pmd_young pmd_young static inline unsigned long pmd_young(pmd_t pmd) { pte_t pte = __pte(pmd_val(pmd)); diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index c936ce9f0c47..dfdb103ae4f6 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -321,7 +321,7 @@ static inline void indirect_branch_prediction_barrier(void) /* The Intel SPEC CTRL MSR base value cache */ extern u64 x86_spec_ctrl_base; DECLARE_PER_CPU(u64, x86_spec_ctrl_current); -extern void write_spec_ctrl_current(u64 val, bool force); +extern void update_spec_ctrl_cond(u64 val); extern u64 spec_ctrl_current(void); /* diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 5059799bebe3..286a71810f9e 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -139,6 +139,7 @@ static inline int pmd_dirty(pmd_t pmd) return pmd_flags(pmd) & _PAGE_DIRTY; } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_ACCESSED; @@ -1438,6 +1439,14 @@ static inline bool arch_has_hw_pte_young(void) return true; } +#ifdef CONFIG_XEN_PV +#define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young +static inline bool arch_has_hw_nonleaf_pmd_young(void) +{ + return !cpu_feature_enabled(X86_FEATURE_XENPV); +} +#endif + #ifdef CONFIG_PAGE_TABLE_CHECK static inline bool pte_user_accessible_page(pte_t pte) { diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 3e3230cccaa7..6daf84229548 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -60,11 +60,18 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_current); static DEFINE_MUTEX(spec_ctrl_mutex); +/* Update SPEC_CTRL MSR and its cached copy unconditionally */ +static void update_spec_ctrl(u64 val) +{ + this_cpu_write(x86_spec_ctrl_current, val); + wrmsrl(MSR_IA32_SPEC_CTRL, val); +} + /* * Keep track of the SPEC_CTRL MSR value for the current task, which may differ * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update(). */ -void write_spec_ctrl_current(u64 val, bool force) +void update_spec_ctrl_cond(u64 val) { if (this_cpu_read(x86_spec_ctrl_current) == val) return; @@ -75,7 +82,7 @@ void write_spec_ctrl_current(u64 val, bool force) * When KERNEL_IBRS this MSR is written on return-to-user, unless * forced the update can be delayed until that time. */ - if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) + if (!cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) wrmsrl(MSR_IA32_SPEC_CTRL, val); } @@ -1328,7 +1335,7 @@ static void __init spec_ctrl_disable_kernel_rrsba(void) if (ia32_cap & ARCH_CAP_RRSBA) { x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; - write_spec_ctrl_current(x86_spec_ctrl_base, true); + update_spec_ctrl(x86_spec_ctrl_base); } } @@ -1450,7 +1457,7 @@ static void __init spectre_v2_select_mitigation(void) if (spectre_v2_in_ibrs_mode(mode)) { x86_spec_ctrl_base |= SPEC_CTRL_IBRS; - write_spec_ctrl_current(x86_spec_ctrl_base, true); + update_spec_ctrl(x86_spec_ctrl_base); } switch (mode) { @@ -1564,7 +1571,7 @@ static void __init spectre_v2_select_mitigation(void) static void update_stibp_msr(void * __unused) { u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP); - write_spec_ctrl_current(val, true); + update_spec_ctrl(val); } /* Update x86_spec_ctrl_base in case SMT state changed. */ @@ -1797,7 +1804,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) x86_amd_ssb_disable(); } else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; - write_spec_ctrl_current(x86_spec_ctrl_base, true); + update_spec_ctrl(x86_spec_ctrl_base); } } @@ -2048,7 +2055,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) void x86_spec_ctrl_setup_ap(void) { if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - write_spec_ctrl_current(x86_spec_ctrl_base, true); + update_spec_ctrl(x86_spec_ctrl_base); if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) x86_amd_ssb_disable(); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c21b7347a26d..e436c9c1ef3b 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -600,7 +600,7 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, } if (updmsr) - write_spec_ctrl_current(msr, false); + update_spec_ctrl_cond(msr); } static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) diff --git a/drivers/clk/at91/at91rm9200.c b/drivers/clk/at91/at91rm9200.c index b174f727a8ef..16870943a13e 100644 --- a/drivers/clk/at91/at91rm9200.c +++ b/drivers/clk/at91/at91rm9200.c @@ -40,7 +40,7 @@ static const struct clk_pll_characteristics rm9200_pll_characteristics = { }; static const struct sck at91rm9200_systemck[] = { - { .n = "udpck", .p = "usbck", .id = 2 }, + { .n = "udpck", .p = "usbck", .id = 1 }, { .n = "uhpck", .p = "usbck", .id = 4 }, { .n = "pck0", .p = "prog0", .id = 8 }, { .n = "pck1", .p = "prog1", .id = 9 }, diff --git a/drivers/clk/qcom/gcc-sc8280xp.c b/drivers/clk/qcom/gcc-sc8280xp.c index a18ed88f3b82..b3198784e1c3 100644 --- a/drivers/clk/qcom/gcc-sc8280xp.c +++ b/drivers/clk/qcom/gcc-sc8280xp.c @@ -5364,6 +5364,8 @@ static struct clk_branch gcc_ufs_1_card_clkref_clk = { .enable_mask = BIT(0), .hw.init = &(const struct clk_init_data) { .name = "gcc_ufs_1_card_clkref_clk", + .parent_data = &gcc_parent_data_tcxo, + .num_parents = 1, .ops = &clk_branch2_ops, }, }, @@ -5432,6 +5434,8 @@ static struct clk_branch gcc_ufs_card_clkref_clk = { .enable_mask = BIT(0), .hw.init = &(const struct clk_init_data) { .name = "gcc_ufs_card_clkref_clk", + .parent_data = &gcc_parent_data_tcxo, + .num_parents = 1, .ops = &clk_branch2_ops, }, }, @@ -5848,6 +5852,8 @@ static struct clk_branch gcc_ufs_ref_clkref_clk = { .enable_mask = BIT(0), .hw.init = &(const struct clk_init_data) { .name = "gcc_ufs_ref_clkref_clk", + .parent_data = &gcc_parent_data_tcxo, + .num_parents = 1, .ops = &clk_branch2_ops, }, }, diff --git a/drivers/clk/qcom/gdsc.c b/drivers/clk/qcom/gdsc.c index 7cf5e130e92f..0f21a8a767ac 100644 --- a/drivers/clk/qcom/gdsc.c +++ b/drivers/clk/qcom/gdsc.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -56,22 +55,6 @@ enum gdsc_status { GDSC_ON }; -static int gdsc_pm_runtime_get(struct gdsc *sc) -{ - if (!sc->dev) - return 0; - - return pm_runtime_resume_and_get(sc->dev); -} - -static int gdsc_pm_runtime_put(struct gdsc *sc) -{ - if (!sc->dev) - return 0; - - return pm_runtime_put_sync(sc->dev); -} - /* Returns 1 if GDSC status is status, 0 if not, and < 0 on error */ static int gdsc_check_status(struct gdsc *sc, enum gdsc_status status) { @@ -271,8 +254,9 @@ static void gdsc_retain_ff_on(struct gdsc *sc) regmap_update_bits(sc->regmap, sc->gdscr, mask, mask); } -static int _gdsc_enable(struct gdsc *sc) +static int gdsc_enable(struct generic_pm_domain *domain) { + struct gdsc *sc = domain_to_gdsc(domain); int ret; if (sc->pwrsts == PWRSTS_ON) @@ -328,22 +312,11 @@ static int _gdsc_enable(struct gdsc *sc) return 0; } -static int gdsc_enable(struct generic_pm_domain *domain) +static int gdsc_disable(struct generic_pm_domain *domain) { struct gdsc *sc = domain_to_gdsc(domain); int ret; - ret = gdsc_pm_runtime_get(sc); - if (ret) - return ret; - - return _gdsc_enable(sc); -} - -static int _gdsc_disable(struct gdsc *sc) -{ - int ret; - if (sc->pwrsts == PWRSTS_ON) return gdsc_assert_reset(sc); @@ -388,18 +361,6 @@ static int _gdsc_disable(struct gdsc *sc) return 0; } -static int gdsc_disable(struct generic_pm_domain *domain) -{ - struct gdsc *sc = domain_to_gdsc(domain); - int ret; - - ret = _gdsc_disable(sc); - - gdsc_pm_runtime_put(sc); - - return ret; -} - static int gdsc_init(struct gdsc *sc) { u32 mask, val; @@ -447,11 +408,6 @@ static int gdsc_init(struct gdsc *sc) return ret; } - /* ...and the power-domain */ - ret = gdsc_pm_runtime_get(sc); - if (ret) - goto err_disable_supply; - /* * Votable GDSCs can be ON due to Vote from other masters. * If a Votable GDSC is ON, make sure we have a Vote. @@ -459,14 +415,14 @@ static int gdsc_init(struct gdsc *sc) if (sc->flags & VOTABLE) { ret = gdsc_update_collapse_bit(sc, false); if (ret) - goto err_put_rpm; + goto err_disable_supply; } /* Turn on HW trigger mode if supported */ if (sc->flags & HW_CTRL) { ret = gdsc_hwctrl(sc, true); if (ret < 0) - goto err_put_rpm; + goto err_disable_supply; } /* @@ -496,13 +452,10 @@ static int gdsc_init(struct gdsc *sc) ret = pm_genpd_init(&sc->pd, NULL, !on); if (ret) - goto err_put_rpm; + goto err_disable_supply; return 0; -err_put_rpm: - if (on) - gdsc_pm_runtime_put(sc); err_disable_supply: if (on && sc->rsupply) regulator_disable(sc->rsupply); @@ -541,8 +494,6 @@ int gdsc_register(struct gdsc_desc *desc, for (i = 0; i < num; i++) { if (!scs[i]) continue; - if (pm_runtime_enabled(dev)) - scs[i]->dev = dev; scs[i]->regmap = regmap; scs[i]->rcdev = rcdev; ret = gdsc_init(scs[i]); diff --git a/drivers/clk/qcom/gdsc.h b/drivers/clk/qcom/gdsc.h index 981a12c8502d..803512688336 100644 --- a/drivers/clk/qcom/gdsc.h +++ b/drivers/clk/qcom/gdsc.h @@ -30,7 +30,6 @@ struct reset_controller_dev; * @resets: ids of resets associated with this gdsc * @reset_count: number of @resets * @rcdev: reset controller - * @dev: the device holding the GDSC, used for pm_runtime calls */ struct gdsc { struct generic_pm_domain pd; @@ -74,7 +73,6 @@ struct gdsc { const char *supply; struct regulator *rsupply; - struct device *dev; }; struct gdsc_desc { diff --git a/drivers/clk/samsung/clk-exynos-clkout.c b/drivers/clk/samsung/clk-exynos-clkout.c index 273f77d54dab..e6d6cbf8c4e6 100644 --- a/drivers/clk/samsung/clk-exynos-clkout.c +++ b/drivers/clk/samsung/clk-exynos-clkout.c @@ -81,17 +81,19 @@ MODULE_DEVICE_TABLE(of, exynos_clkout_ids); static int exynos_clkout_match_parent_dev(struct device *dev, u32 *mux_mask) { const struct exynos_clkout_variant *variant; + const struct of_device_id *match; if (!dev->parent) { dev_err(dev, "not instantiated from MFD\n"); return -EINVAL; } - variant = of_device_get_match_data(dev->parent); - if (!variant) { + match = of_match_device(exynos_clkout_ids, dev->parent); + if (!match) { dev_err(dev, "cannot match parent device\n"); return -EINVAL; } + variant = match->data; *mux_mask = variant->mux_mask; diff --git a/drivers/clk/samsung/clk-exynos7885.c b/drivers/clk/samsung/clk-exynos7885.c index 62ce6814f141..0d2a950ed184 100644 --- a/drivers/clk/samsung/clk-exynos7885.c +++ b/drivers/clk/samsung/clk-exynos7885.c @@ -231,7 +231,7 @@ static const struct samsung_div_clock top_div_clks[] __initconst = { CLK_CON_DIV_PLL_SHARED0_DIV2, 0, 1), DIV(CLK_DOUT_SHARED0_DIV3, "dout_shared0_div3", "fout_shared0_pll", CLK_CON_DIV_PLL_SHARED0_DIV3, 0, 2), - DIV(CLK_DOUT_SHARED0_DIV4, "dout_shared0_div4", "fout_shared0_pll", + DIV(CLK_DOUT_SHARED0_DIV4, "dout_shared0_div4", "dout_shared0_div2", CLK_CON_DIV_PLL_SHARED0_DIV4, 0, 1), DIV(CLK_DOUT_SHARED0_DIV5, "dout_shared0_div5", "fout_shared0_pll", CLK_CON_DIV_PLL_SHARED0_DIV5, 0, 3), @@ -239,7 +239,7 @@ static const struct samsung_div_clock top_div_clks[] __initconst = { CLK_CON_DIV_PLL_SHARED1_DIV2, 0, 1), DIV(CLK_DOUT_SHARED1_DIV3, "dout_shared1_div3", "fout_shared1_pll", CLK_CON_DIV_PLL_SHARED1_DIV3, 0, 2), - DIV(CLK_DOUT_SHARED1_DIV4, "dout_shared1_div4", "fout_shared1_pll", + DIV(CLK_DOUT_SHARED1_DIV4, "dout_shared1_div4", "dout_shared1_div2", CLK_CON_DIV_PLL_SHARED1_DIV4, 0, 1), /* CORE */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 0b52af415b28..ce64ca1c6e66 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -156,6 +156,9 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) break; case IP_VERSION(3, 0, 2): fw_name = FIRMWARE_VANGOGH; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; break; case IP_VERSION(3, 0, 16): fw_name = FIRMWARE_DIMGREY_CAVEFISH; diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 6925e0280dbe..f4f3d2665a6b 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -5,6 +5,7 @@ menu "Display Engine Configuration" config DRM_AMD_DC bool "AMD DC - Enable new display engine" default y + depends on BROKEN || !CC_IS_CLANG || X86_64 || SPARC64 || ARM64 select SND_HDA_COMPONENT if SND_HDA_CORE select DRM_AMD_DC_DCN if (X86 || PPC_LONG_DOUBLE_128) help @@ -12,6 +13,12 @@ config DRM_AMD_DC support for AMDGPU. This adds required support for Vega and Raven ASICs. + calculate_bandwidth() is presently broken on all !(X86_64 || SPARC64 || ARM64) + architectures built with Clang (all released versions), whereby the stack + frame gets blown up to well over 5k. This would cause an immediate kernel + panic on most architectures. We'll revert this when the following bug report + has been resolved: https://github.com/llvm/llvm-project/issues/41896. + config DRM_AMD_DC_DCN def_bool n help diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 461c62c88413..de77054195c6 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -3723,12 +3723,16 @@ out: static u8 bigjoiner_pipes(struct drm_i915_private *i915) { + u8 pipes; + if (DISPLAY_VER(i915) >= 12) - return BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D); + pipes = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D); else if (DISPLAY_VER(i915) >= 11) - return BIT(PIPE_B) | BIT(PIPE_C); + pipes = BIT(PIPE_B) | BIT(PIPE_C); else - return 0; + pipes = 0; + + return pipes & RUNTIME_INFO(i915)->pipe_mask; } static bool transcoder_ddi_func_is_enabled(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 5c931b6696c3..7caa3412a244 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -625,8 +625,13 @@ int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) return -EINTR; } - return timeout ? timeout : intel_uc_wait_for_idle(>->uc, - remaining_timeout); + if (timeout) + return timeout; + + if (remaining_timeout < 0) + remaining_timeout = 0; + + return intel_uc_wait_for_idle(>->uc, remaining_timeout); } int intel_gt_init(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index edb881d75630..1dfd01668c79 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -199,7 +199,7 @@ out_active: spin_lock(&timelines->lock); if (remaining_timeout) *remaining_timeout = timeout; - return active_count ? timeout : 0; + return active_count ? timeout ?: -ETIME : 0; } static void retire_work_handler(struct work_struct *work) diff --git a/drivers/gpu/drm/i915/intel_dram.c b/drivers/gpu/drm/i915/intel_dram.c index 2403ccd52c74..bba8cb6e8ae4 100644 --- a/drivers/gpu/drm/i915/intel_dram.c +++ b/drivers/gpu/drm/i915/intel_dram.c @@ -471,8 +471,7 @@ static int xelpdp_get_dram_info(struct drm_i915_private *i915) u32 val = intel_uncore_read(&i915->uncore, MTL_MEM_SS_INFO_GLOBAL); struct dram_info *dram_info = &i915->dram_info; - val = REG_FIELD_GET(MTL_DDR_TYPE_MASK, val); - switch (val) { + switch (REG_FIELD_GET(MTL_DDR_TYPE_MASK, val)) { case 0: dram_info->type = INTEL_DRAM_DDR4; break; diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c index 81e688975c6a..a901e4e33d81 100644 --- a/drivers/hwmon/asus-ec-sensors.c +++ b/drivers/hwmon/asus-ec-sensors.c @@ -938,6 +938,8 @@ static int asus_ec_probe(struct platform_device *pdev) ec_data->nr_sensors = hweight_long(ec_data->board_info->sensors); ec_data->sensors = devm_kcalloc(dev, ec_data->nr_sensors, sizeof(struct ec_sensor), GFP_KERNEL); + if (!ec_data->sensors) + return -ENOMEM; status = setup_lock_data(dev); if (status) { diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 8bf32c6c85d9..9bee4d33fbdf 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -242,10 +242,13 @@ static int adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev) */ if (host_bridge && host_bridge->vendor == PCI_VENDOR_ID_INTEL) { for (i = 0; i < ARRAY_SIZE(tjmax_pci_table); i++) { - if (host_bridge->device == tjmax_pci_table[i].device) + if (host_bridge->device == tjmax_pci_table[i].device) { + pci_dev_put(host_bridge); return tjmax_pci_table[i].tjmax; + } } } + pci_dev_put(host_bridge); for (i = 0; i < ARRAY_SIZE(tjmax_table); i++) { if (strstr(c->x86_model_id, tjmax_table[i].id)) @@ -533,6 +536,10 @@ static void coretemp_remove_core(struct platform_data *pdata, int indx) { struct temp_data *tdata = pdata->core_data[indx]; + /* if we errored on add then this is already gone */ + if (!tdata) + return; + /* Remove the sysfs attributes */ sysfs_remove_group(&pdata->hwmon_dev->kobj, &tdata->attr_group); diff --git a/drivers/hwmon/i5500_temp.c b/drivers/hwmon/i5500_temp.c index 05f68e9c9477..23b9f94fe0a9 100644 --- a/drivers/hwmon/i5500_temp.c +++ b/drivers/hwmon/i5500_temp.c @@ -117,7 +117,7 @@ static int i5500_temp_probe(struct pci_dev *pdev, u32 tstimer; s8 tsfsc; - err = pci_enable_device(pdev); + err = pcim_enable_device(pdev); if (err) { dev_err(&pdev->dev, "Failed to enable device\n"); return err; diff --git a/drivers/hwmon/ibmpex.c b/drivers/hwmon/ibmpex.c index f6ec165c0fa8..1837cccd993c 100644 --- a/drivers/hwmon/ibmpex.c +++ b/drivers/hwmon/ibmpex.c @@ -502,6 +502,7 @@ static void ibmpex_register_bmc(int iface, struct device *dev) return; out_register: + list_del(&data->list); hwmon_device_unregister(data->hwmon_dev); out_user: ipmi_destroy_user(data->user); diff --git a/drivers/hwmon/ina3221.c b/drivers/hwmon/ina3221.c index 2a57f4b60c29..e06186986444 100644 --- a/drivers/hwmon/ina3221.c +++ b/drivers/hwmon/ina3221.c @@ -228,7 +228,7 @@ static int ina3221_read_value(struct ina3221_data *ina, unsigned int reg, * Shunt Voltage Sum register has 14-bit value with 1-bit shift * Other Shunt Voltage registers have 12 bits with 3-bit shift */ - if (reg == INA3221_SHUNT_SUM) + if (reg == INA3221_SHUNT_SUM || reg == INA3221_CRIT_SUM) *val = sign_extend32(regval >> 1, 14); else *val = sign_extend32(regval >> 3, 12); @@ -465,7 +465,7 @@ static int ina3221_write_curr(struct device *dev, u32 attr, * SHUNT_SUM: (1 / 40uV) << 1 = 1 / 20uV * SHUNT[1-3]: (1 / 40uV) << 3 = 1 / 5uV */ - if (reg == INA3221_SHUNT_SUM) + if (reg == INA3221_SHUNT_SUM || reg == INA3221_CRIT_SUM) regval = DIV_ROUND_CLOSEST(voltage_uv, 20) & 0xfffe; else regval = DIV_ROUND_CLOSEST(voltage_uv, 5) & 0xfff8; diff --git a/drivers/hwmon/ltc2947-core.c b/drivers/hwmon/ltc2947-core.c index 7404e974762f..2dbbbac9de09 100644 --- a/drivers/hwmon/ltc2947-core.c +++ b/drivers/hwmon/ltc2947-core.c @@ -396,7 +396,7 @@ static int ltc2947_read_temp(struct device *dev, const u32 attr, long *val, return ret; /* in milidegrees celcius, temp is given by: */ - *val = (__val * 204) + 550; + *val = (__val * 204) + 5500; return 0; } diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 5a8f780e7ffd..bc94059a5b87 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -820,6 +820,7 @@ int __init dmar_dev_scope_init(void) info = dmar_alloc_pci_notify_info(dev, BUS_NOTIFY_ADD_DEVICE); if (!info) { + pci_dev_put(dev); return dmar_dev_scope_status; } else { dmar_pci_bus_add_dev(info); diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 996a8b5ee5ee..5287efe247b1 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1396,6 +1396,24 @@ static void domain_update_iotlb(struct dmar_domain *domain) spin_unlock_irqrestore(&domain->lock, flags); } +/* + * The extra devTLB flush quirk impacts those QAT devices with PCI device + * IDs ranging from 0x4940 to 0x4943. It is exempted from risky_device() + * check because it applies only to the built-in QAT devices and it doesn't + * grant additional privileges. + */ +#define BUGGY_QAT_DEVID_MASK 0x494c +static bool dev_needs_extra_dtlb_flush(struct pci_dev *pdev) +{ + if (pdev->vendor != PCI_VENDOR_ID_INTEL) + return false; + + if ((pdev->device & 0xfffc) != BUGGY_QAT_DEVID_MASK) + return false; + + return true; +} + static void iommu_enable_pci_caps(struct device_domain_info *info) { struct pci_dev *pdev; @@ -1478,6 +1496,7 @@ static void __iommu_flush_dev_iotlb(struct device_domain_info *info, qdep = info->ats_qdep; qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, qdep, addr, mask); + quirk_extra_dev_tlb_flush(info, addr, mask, PASID_RID2PASID, qdep); } static void iommu_flush_dev_iotlb(struct dmar_domain *domain, @@ -3854,8 +3873,10 @@ static inline bool has_external_pci(void) struct pci_dev *pdev = NULL; for_each_pci_dev(pdev) - if (pdev->external_facing) + if (pdev->external_facing) { + pci_dev_put(pdev); return true; + } return false; } @@ -4490,9 +4511,10 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) if (dev_is_pci(dev)) { if (ecap_dev_iotlb_support(iommu->ecap) && pci_ats_supported(pdev) && - dmar_ats_supported(pdev, iommu)) + dmar_ats_supported(pdev, iommu)) { info->ats_supported = 1; - + info->dtlb_extra_inval = dev_needs_extra_dtlb_flush(pdev); + } if (sm_supported(iommu)) { if (pasid_supported(iommu)) { int features = pci_pasid_features(pdev); @@ -4931,3 +4953,48 @@ static void __init check_tylersburg_isoch(void) pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n", vtisochctrl); } + +/* + * Here we deal with a device TLB defect where device may inadvertently issue ATS + * invalidation completion before posted writes initiated with translated address + * that utilized translations matching the invalidation address range, violating + * the invalidation completion ordering. + * Therefore, any use cases that cannot guarantee DMA is stopped before unmap is + * vulnerable to this defect. In other words, any dTLB invalidation initiated not + * under the control of the trusted/privileged host device driver must use this + * quirk. + * Device TLBs are invalidated under the following six conditions: + * 1. Device driver does DMA API unmap IOVA + * 2. Device driver unbind a PASID from a process, sva_unbind_device() + * 3. PASID is torn down, after PASID cache is flushed. e.g. process + * exit_mmap() due to crash + * 4. Under SVA usage, called by mmu_notifier.invalidate_range() where + * VM has to free pages that were unmapped + * 5. Userspace driver unmaps a DMA buffer + * 6. Cache invalidation in vSVA usage (upcoming) + * + * For #1 and #2, device drivers are responsible for stopping DMA traffic + * before unmap/unbind. For #3, iommu driver gets mmu_notifier to + * invalidate TLB the same way as normal user unmap which will use this quirk. + * The dTLB invalidation after PASID cache flush does not need this quirk. + * + * As a reminder, #6 will *NEED* this quirk as we enable nested translation. + */ +void quirk_extra_dev_tlb_flush(struct device_domain_info *info, + unsigned long address, unsigned long mask, + u32 pasid, u16 qdep) +{ + u16 sid; + + if (likely(!info->dtlb_extra_inval)) + return; + + sid = PCI_DEVID(info->bus, info->devfn); + if (pasid == PASID_RID2PASID) { + qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, + qdep, address, mask); + } else { + qi_flush_dev_iotlb_pasid(info->iommu, sid, info->pfsid, + pasid, qdep, address, mask); + } +} diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 92023dff9513..db9df7c3790c 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -623,6 +623,7 @@ struct device_domain_info { u8 pri_enabled:1; u8 ats_supported:1; u8 ats_enabled:1; + u8 dtlb_extra_inval:1; /* Quirk for devices need extra flush */ u8 ats_qdep; struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ struct intel_iommu *iommu; /* IOMMU used by this device */ @@ -728,6 +729,9 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr, void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid, u32 pasid, u16 qdep, u64 addr, unsigned int size_order); +void quirk_extra_dev_tlb_flush(struct device_domain_info *info, + unsigned long address, unsigned long pages, + u32 pasid, u16 qdep); void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, u64 granu, u32 pasid); diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 7d08eb034f2d..03b25358946c 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -184,10 +184,13 @@ static void __flush_svm_range_dev(struct intel_svm *svm, return; qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih); - if (info->ats_enabled) + if (info->ats_enabled) { qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid, svm->pasid, sdev->qdep, address, order_base_2(pages)); + quirk_extra_dev_tlb_flush(info, address, order_base_2(pages), + svm->pasid, sdev->qdep); + } } static void intel_flush_svm_range_dev(struct intel_svm *svm, @@ -745,12 +748,16 @@ bad_req: * If prq is to be handled outside iommu driver via receiver of * the fault notifiers, we skip the page response here. */ - if (!pdev || intel_svm_prq_report(iommu, &pdev->dev, req)) - handle_bad_prq_event(iommu, req, QI_RESP_INVALID); + if (!pdev) + goto bad_req; - trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1, - req->priv_data[0], req->priv_data[1], - iommu->prq_seq_number++); + if (intel_svm_prq_report(iommu, &pdev->dev, req)) + handle_bad_prq_event(iommu, req, QI_RESP_INVALID); + else + trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1, + req->priv_data[0], req->priv_data[1], + iommu->prq_seq_number++); + pci_dev_put(pdev); prq_advance: head = (head + sizeof(*req)) & PRQ_RING_MASK; } diff --git a/drivers/media/common/videobuf2/frame_vector.c b/drivers/media/common/videobuf2/frame_vector.c index 542dde9d2609..144027035892 100644 --- a/drivers/media/common/videobuf2/frame_vector.c +++ b/drivers/media/common/videobuf2/frame_vector.c @@ -35,11 +35,7 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, struct frame_vector *vec) { - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - int ret_pin_user_pages_fast = 0; - int ret = 0; - int err; + int ret; if (nr_frames == 0) return 0; @@ -52,57 +48,17 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, ret = pin_user_pages_fast(start, nr_frames, FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM, (struct page **)(vec->ptrs)); - if (ret > 0) { - vec->got_ref = true; - vec->is_pfns = false; - goto out_unlocked; - } - ret_pin_user_pages_fast = ret; + vec->got_ref = true; + vec->is_pfns = false; + vec->nr_frames = ret; - mmap_read_lock(mm); - vec->got_ref = false; - vec->is_pfns = true; - ret = 0; - do { - unsigned long *nums = frame_vector_pfns(vec); + if (likely(ret > 0)) + return ret; - vma = vma_lookup(mm, start); - if (!vma) - break; - - while (ret < nr_frames && start + PAGE_SIZE <= vma->vm_end) { - err = follow_pfn(vma, start, &nums[ret]); - if (err) { - if (ret) - goto out; - // If follow_pfn() returns -EINVAL, then this - // is not an IO mapping or a raw PFN mapping. - // In that case, return the original error from - // pin_user_pages_fast(). Otherwise this - // function would return -EINVAL when - // pin_user_pages_fast() returned -ENOMEM, - // which makes debugging hard. - if (err == -EINVAL && ret_pin_user_pages_fast) - ret = ret_pin_user_pages_fast; - else - ret = err; - goto out; - } - start += PAGE_SIZE; - ret++; - } - /* Bail out if VMA doesn't completely cover the tail page. */ - if (start < vma->vm_end) - break; - } while (ret < nr_frames); -out: - mmap_read_unlock(mm); -out_unlocked: - if (!ret) - ret = -EFAULT; - if (ret > 0) - vec->nr_frames = ret; - return ret; + /* This used to (racily) return non-refcounted pfns. Let people know */ + WARN_ONCE(1, "get_vaddr_frames() cannot follow VM_IO mapping"); + vec->nr_frames = 0; + return ret ? ret : -EFAULT; } EXPORT_SYMBOL(get_vaddr_frames); diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index c5de202f530a..de1cc9e1ae57 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -1484,6 +1484,11 @@ void mmc_init_erase(struct mmc_card *card) card->pref_erase = 0; } +static bool is_trim_arg(unsigned int arg) +{ + return (arg & MMC_TRIM_OR_DISCARD_ARGS) && arg != MMC_DISCARD_ARG; +} + static unsigned int mmc_mmc_erase_timeout(struct mmc_card *card, unsigned int arg, unsigned int qty) { @@ -1766,7 +1771,7 @@ int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr, !(card->ext_csd.sec_feature_support & EXT_CSD_SEC_ER_EN)) return -EOPNOTSUPP; - if (mmc_card_mmc(card) && (arg & MMC_TRIM_ARGS) && + if (mmc_card_mmc(card) && is_trim_arg(arg) && !(card->ext_csd.sec_feature_support & EXT_CSD_SEC_GB_CL_EN)) return -EOPNOTSUPP; @@ -1796,7 +1801,7 @@ int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr, * identified by the card->eg_boundary flag. */ rem = card->erase_size - (from % card->erase_size); - if ((arg & MMC_TRIM_ARGS) && (card->eg_boundary) && (nr > rem)) { + if ((arg & MMC_TRIM_OR_DISCARD_ARGS) && card->eg_boundary && nr > rem) { err = mmc_do_erase(card, from, from + rem - 1, arg); from += rem; if ((err) || (to <= from)) diff --git a/drivers/mmc/core/mmc_test.c b/drivers/mmc/core/mmc_test.c index 8d9bceeff986..155ce2bdfe62 100644 --- a/drivers/mmc/core/mmc_test.c +++ b/drivers/mmc/core/mmc_test.c @@ -3179,7 +3179,8 @@ static int __mmc_test_register_dbgfs_file(struct mmc_card *card, struct mmc_test_dbgfs_file *df; if (card->debugfs_root) - debugfs_create_file(name, mode, card->debugfs_root, card, fops); + file = debugfs_create_file(name, mode, card->debugfs_root, + card, fops); df = kmalloc(sizeof(*df), GFP_KERNEL); if (!df) { diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index df941438aef5..26bc59b5a7cc 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -2588,13 +2588,11 @@ static int msdc_of_clock_parse(struct platform_device *pdev, return PTR_ERR(host->src_clk_cg); } - host->sys_clk_cg = devm_clk_get_optional(&pdev->dev, "sys_cg"); + /* If present, always enable for this clock gate */ + host->sys_clk_cg = devm_clk_get_optional_enabled(&pdev->dev, "sys_cg"); if (IS_ERR(host->sys_clk_cg)) host->sys_clk_cg = NULL; - /* If present, always enable for this clock gate */ - clk_prepare_enable(host->sys_clk_cg); - host->bulk_clks[0].id = "pclk_cg"; host->bulk_clks[1].id = "axi_cg"; host->bulk_clks[2].id = "ahb_cg"; diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 31ea0a2fce35..ffeb5759830f 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -1512,7 +1512,7 @@ static void esdhc_cqe_enable(struct mmc_host *mmc) * system resume back. */ cqhci_writel(cq_host, 0, CQHCI_CTL); - if (cqhci_readl(cq_host, CQHCI_CTL) && CQHCI_HALT) + if (cqhci_readl(cq_host, CQHCI_CTL) & CQHCI_HALT) dev_err(mmc_dev(host->mmc), "failed to exit halt state when enable CQE\n"); diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c index b92a408f138d..bec3f9e3cd3f 100644 --- a/drivers/mmc/host/sdhci-sprd.c +++ b/drivers/mmc/host/sdhci-sprd.c @@ -470,7 +470,7 @@ static int sdhci_sprd_voltage_switch(struct mmc_host *mmc, struct mmc_ios *ios) } if (IS_ERR(sprd_host->pinctrl)) - return 0; + goto reset; switch (ios->signal_voltage) { case MMC_SIGNAL_VOLTAGE_180: @@ -498,6 +498,8 @@ static int sdhci_sprd_voltage_switch(struct mmc_host *mmc, struct mmc_ios *ios) /* Wait for 300 ~ 500 us for pin state stable */ usleep_range(300, 500); + +reset: sdhci_reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA); return 0; diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index fef03de85b99..c7ad32a75b57 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -373,6 +373,7 @@ static void sdhci_init(struct sdhci_host *host, int soft) if (soft) { /* force clock reconfiguration */ host->clock = 0; + host->reinit_uhs = true; mmc->ops->set_ios(mmc, &mmc->ios); } } @@ -2293,11 +2294,46 @@ void sdhci_set_uhs_signaling(struct sdhci_host *host, unsigned timing) } EXPORT_SYMBOL_GPL(sdhci_set_uhs_signaling); +static bool sdhci_timing_has_preset(unsigned char timing) +{ + switch (timing) { + case MMC_TIMING_UHS_SDR12: + case MMC_TIMING_UHS_SDR25: + case MMC_TIMING_UHS_SDR50: + case MMC_TIMING_UHS_SDR104: + case MMC_TIMING_UHS_DDR50: + case MMC_TIMING_MMC_DDR52: + return true; + }; + return false; +} + +static bool sdhci_preset_needed(struct sdhci_host *host, unsigned char timing) +{ + return !(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN) && + sdhci_timing_has_preset(timing); +} + +static bool sdhci_presetable_values_change(struct sdhci_host *host, struct mmc_ios *ios) +{ + /* + * Preset Values are: Driver Strength, Clock Generator and SDCLK/RCLK + * Frequency. Check if preset values need to be enabled, or the Driver + * Strength needs updating. Note, clock changes are handled separately. + */ + return !host->preset_enabled && + (sdhci_preset_needed(host, ios->timing) || host->drv_type != ios->drv_type); +} + void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) { struct sdhci_host *host = mmc_priv(mmc); + bool reinit_uhs = host->reinit_uhs; + bool turning_on_clk = false; u8 ctrl; + host->reinit_uhs = false; + if (ios->power_mode == MMC_POWER_UNDEFINED) return; @@ -2323,6 +2359,8 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) sdhci_enable_preset_value(host, false); if (!ios->clock || ios->clock != host->clock) { + turning_on_clk = ios->clock && !host->clock; + host->ops->set_clock(host, ios->clock); host->clock = ios->clock; @@ -2349,6 +2387,17 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) host->ops->set_bus_width(host, ios->bus_width); + /* + * Special case to avoid multiple clock changes during voltage + * switching. + */ + if (!reinit_uhs && + turning_on_clk && + host->timing == ios->timing && + host->version >= SDHCI_SPEC_300 && + !sdhci_presetable_values_change(host, ios)) + return; + ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL); if (!(host->quirks & SDHCI_QUIRK_NO_HISPD_BIT)) { @@ -2392,6 +2441,7 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) } sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2); + host->drv_type = ios->drv_type; } else { /* * According to SDHC Spec v3.00, if the Preset Value @@ -2419,19 +2469,14 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) host->ops->set_uhs_signaling(host, ios->timing); host->timing = ios->timing; - if (!(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN) && - ((ios->timing == MMC_TIMING_UHS_SDR12) || - (ios->timing == MMC_TIMING_UHS_SDR25) || - (ios->timing == MMC_TIMING_UHS_SDR50) || - (ios->timing == MMC_TIMING_UHS_SDR104) || - (ios->timing == MMC_TIMING_UHS_DDR50) || - (ios->timing == MMC_TIMING_MMC_DDR52))) { + if (sdhci_preset_needed(host, ios->timing)) { u16 preset; sdhci_enable_preset_value(host, true); preset = sdhci_get_preset_value(host); ios->drv_type = FIELD_GET(SDHCI_PRESET_DRV_MASK, preset); + host->drv_type = ios->drv_type; } /* Re-enable SD Clock */ @@ -3768,6 +3813,7 @@ int sdhci_resume_host(struct sdhci_host *host) sdhci_init(host, 0); host->pwr = 0; host->clock = 0; + host->reinit_uhs = true; mmc->ops->set_ios(mmc, &mmc->ios); } else { sdhci_init(host, (mmc->pm_flags & MMC_PM_KEEP_POWER)); @@ -3830,6 +3876,7 @@ int sdhci_runtime_resume_host(struct sdhci_host *host, int soft_reset) /* Force clock and power re-program */ host->pwr = 0; host->clock = 0; + host->reinit_uhs = true; mmc->ops->start_signal_voltage_switch(mmc, &mmc->ios); mmc->ops->set_ios(mmc, &mmc->ios); diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index d750c464bd1e..87a3aaa07438 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -524,6 +524,8 @@ struct sdhci_host { unsigned int clock; /* Current clock (MHz) */ u8 pwr; /* Current voltage */ + u8 drv_type; /* Current UHS-I driver type */ + bool reinit_uhs; /* Force UHS-related re-initialization */ bool runtime_suspended; /* Host is runtime suspended */ bool bus_on; /* Bus power prevents runtime suspend */ diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index da55ce45ac70..69e333922bea 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4304,7 +4304,7 @@ static void nvme_ns_remove(struct nvme_ns *ns) mutex_unlock(&ns->ctrl->subsys->lock); /* guarantee not available in head->list */ - synchronize_rcu(); + synchronize_srcu(&ns->head->srcu); if (!nvme_ns_head_multipath(ns->head)) nvme_cdev_del(&ns->cdev, &ns->cdev_device); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 93e2138a8b42..7e025b8948cb 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -174,11 +174,14 @@ void nvme_mpath_revalidate_paths(struct nvme_ns *ns) struct nvme_ns_head *head = ns->head; sector_t capacity = get_capacity(head->disk); int node; + int srcu_idx; + srcu_idx = srcu_read_lock(&head->srcu); list_for_each_entry_rcu(ns, &head->list, siblings) { if (capacity != get_capacity(ns->disk)) clear_bit(NVME_NS_READY, &ns->flags); } + srcu_read_unlock(&head->srcu, srcu_idx); for_each_node(node) rcu_assign_pointer(head->current_path[node], NULL); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f4335519399d..488ad7dabeb8 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -797,6 +797,8 @@ static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev, cmnd->dptr.prp1 = cpu_to_le64(iod->first_dma); if (bv->bv_len > first_prp_len) cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma + first_prp_len); + else + cmnd->dptr.prp2 = 0; return BLK_STS_OK; } diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index 52ecd66ce357..047a8374b4fd 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -436,9 +436,14 @@ static void __intel_gpio_set_direction(void __iomem *padcfg0, bool input) writel(value, padcfg0); } +static int __intel_gpio_get_gpio_mode(u32 value) +{ + return (value & PADCFG0_PMODE_MASK) >> PADCFG0_PMODE_SHIFT; +} + static int intel_gpio_get_gpio_mode(void __iomem *padcfg0) { - return (readl(padcfg0) & PADCFG0_PMODE_MASK) >> PADCFG0_PMODE_SHIFT; + return __intel_gpio_get_gpio_mode(readl(padcfg0)); } static void intel_gpio_set_gpio_mode(void __iomem *padcfg0) @@ -1674,6 +1679,7 @@ EXPORT_SYMBOL_GPL(intel_pinctrl_get_soc_data); static bool intel_pinctrl_should_save(struct intel_pinctrl *pctrl, unsigned int pin) { const struct pin_desc *pd = pin_desc_get(pctrl->pctldev, pin); + u32 value; if (!pd || !intel_pad_usable(pctrl, pin)) return false; @@ -1688,6 +1694,25 @@ static bool intel_pinctrl_should_save(struct intel_pinctrl *pctrl, unsigned int gpiochip_line_is_irq(&pctrl->chip, intel_pin_to_gpio(pctrl, pin))) return true; + /* + * The firmware on some systems may configure GPIO pins to be + * an interrupt source in so called "direct IRQ" mode. In such + * cases the GPIO controller driver has no idea if those pins + * are being used or not. At the same time, there is a known bug + * in the firmwares that don't restore the pin settings correctly + * after suspend, i.e. by an unknown reason the Rx value becomes + * inverted. + * + * Hence, let's save and restore the pins that are configured + * as GPIOs in the input mode with GPIROUTIOXAPIC bit set. + * + * See https://bugzilla.kernel.org/show_bug.cgi?id=214749. + */ + value = readl(intel_get_padcfg(pctrl, pin, PADCFG0)); + if ((value & PADCFG0_GPIROUTIOXAPIC) && (value & PADCFG0_GPIOTXDIS) && + (__intel_gpio_get_gpio_mode(value) == PADCFG0_PMODE_GPIO)) + return true; + return false; } diff --git a/drivers/pinctrl/mediatek/mtk-eint.c b/drivers/pinctrl/mediatek/mtk-eint.c index 65d312967619..27f0a54e12bf 100644 --- a/drivers/pinctrl/mediatek/mtk-eint.c +++ b/drivers/pinctrl/mediatek/mtk-eint.c @@ -303,12 +303,15 @@ static struct irq_chip mtk_eint_irq_chip = { static unsigned int mtk_eint_hw_init(struct mtk_eint *eint) { - void __iomem *reg = eint->base + eint->regs->dom_en; + void __iomem *dom_en = eint->base + eint->regs->dom_en; + void __iomem *mask_set = eint->base + eint->regs->mask_set; unsigned int i; for (i = 0; i < eint->hw->ap_num; i += 32) { - writel(0xffffffff, reg); - reg += 4; + writel(0xffffffff, dom_en); + writel(0xffffffff, mask_set); + dom_en += 4; + mask_set += 4; } return 0; diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index 67bec7ea0f8b..414ee6bb8ac9 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -727,7 +727,7 @@ static int pcs_allocate_pin_table(struct pcs_device *pcs) mux_bytes = pcs->width / BITS_PER_BYTE; - if (pcs->bits_per_mux) { + if (pcs->bits_per_mux && pcs->fmask) { pcs->bits_per_pin = fls(pcs->fmask); nr_pins = (pcs->size * BITS_PER_BYTE) / pcs->bits_per_pin; } else { diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 3b55e239705f..9930fa901039 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -111,6 +111,13 @@ static void nilfs_dat_commit_free(struct inode *dat, kunmap_atomic(kaddr); nilfs_dat_commit_entry(dat, req); + + if (unlikely(req->pr_desc_bh == NULL || req->pr_bitmap_bh == NULL)) { + nilfs_error(dat->i_sb, + "state inconsistency probably due to duplicate use of vblocknr = %llu", + (unsigned long long)req->pr_entry_nr); + return; + } nilfs_palloc_commit_free_entry(dat, req); } diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 492dce43236e..cab7cfebf40b 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -222,12 +222,16 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table); #define tlb_needs_table_invalidate() (true) #endif +void tlb_remove_table_sync_one(void); + #else #ifdef tlb_needs_table_invalidate #error tlb_needs_table_invalidate() requires MMU_GATHER_RCU_TABLE_FREE #endif +static inline void tlb_remove_table_sync_one(void) { } + #endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */ diff --git a/include/linux/gfp.h b/include/linux/gfp.h index ef4aea3b356e..65a78773dcca 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -210,6 +210,20 @@ alloc_pages_bulk_array_node(gfp_t gfp, int nid, unsigned long nr_pages, struct p return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, NULL, page_array); } +static inline void warn_if_node_offline(int this_node, gfp_t gfp_mask) +{ + gfp_t warn_gfp = gfp_mask & (__GFP_THISNODE|__GFP_NOWARN); + + if (warn_gfp != (__GFP_THISNODE|__GFP_NOWARN)) + return; + + if (node_online(this_node)) + return; + + pr_warn("%pGg allocation from offline node %d\n", &gfp_mask, this_node); + dump_stack(); +} + /* * Allocate pages, preferring the node given as nid. The node must be valid and * online. For more general interface, see alloc_pages_node(). @@ -218,7 +232,7 @@ static inline struct page * __alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) { VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); - VM_WARN_ON((gfp_mask & __GFP_THISNODE) && !node_online(nid)); + warn_if_node_offline(nid, gfp_mask); return __alloc_pages(gfp_mask, order, nid, NULL); } @@ -227,7 +241,7 @@ static inline struct folio *__folio_alloc_node(gfp_t gfp, unsigned int order, int nid) { VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); - VM_WARN_ON((gfp & __GFP_THISNODE) && !node_online(nid)); + warn_if_node_offline(nid, gfp); return __folio_alloc(gfp, order, nid, NULL); } diff --git a/include/linux/license.h b/include/linux/license.h index ad937f57f2cb..7cce390f120b 100644 --- a/include/linux/license.h +++ b/include/linux/license.h @@ -2,8 +2,6 @@ #ifndef __LICENSE_H #define __LICENSE_H -#include - static inline int license_is_gpl_compatible(const char *license) { return (strcmp(license, "GPL") == 0 diff --git a/include/linux/mm.h b/include/linux/mm.h index 8bbcccbc5565..974ccca609d2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1852,6 +1852,25 @@ static void __maybe_unused show_free_areas(unsigned int flags, nodemask_t *nodem __show_free_areas(flags, nodemask, MAX_NR_ZONES - 1); } +/* + * Parameter block passed down to zap_pte_range in exceptional cases. + */ +struct zap_details { + struct folio *single_folio; /* Locked folio to be unmapped */ + bool even_cows; /* Zap COWed private pages too? */ + zap_flags_t zap_flags; /* Extra flags for zapping */ +}; + +/* + * Whether to drop the pte markers, for example, the uffd-wp information for + * file-backed memory. This should only be specified when we will completely + * drop the page in the mm, either by truncation or unmapping of the vma. By + * default, the flag is not set. + */ +#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0)) +/* Set in unmap_vmas() to indicate a final unmap call. Only used by hugetlb */ +#define ZAP_FLAG_UNMAP ((__force zap_flags_t) BIT(1)) + #ifdef CONFIG_MMU extern bool can_do_mlock(void); #else @@ -1869,6 +1888,8 @@ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, unsigned long size); void zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned long size); +void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, + unsigned long size, struct zap_details *details); void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt, struct vm_area_struct *start_vma, unsigned long start, unsigned long end); @@ -3467,12 +3488,4 @@ madvise_set_anon_name(struct mm_struct *mm, unsigned long start, } #endif -/* - * Whether to drop the pte markers, for example, the uffd-wp information for - * file-backed memory. This should only be specified when we will completely - * drop the page in the mm, either by truncation or unmapping of the vma. By - * default, the flag is not set. - */ -#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0)) - #endif /* _LINUX_MM_H */ diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 9c50bc40f8ff..6f7993803ee7 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -451,7 +451,7 @@ static inline bool mmc_ready_for_data(u32 status) #define MMC_SECURE_TRIM1_ARG 0x80000001 #define MMC_SECURE_TRIM2_ARG 0x80008000 #define MMC_SECURE_ARGS 0x80000000 -#define MMC_TRIM_ARGS 0x00008001 +#define MMC_TRIM_OR_DISCARD_ARGS 0x00008003 #define mmc_driver_type_mask(n) (1 << (n)) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index a108b60a6962..5f0d7d0b9471 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -165,6 +165,13 @@ static inline pte_t *virt_to_kpte(unsigned long vaddr) return pmd_none(*pmd) ? NULL : pte_offset_kernel(pmd, vaddr); } +#ifndef pmd_young +static inline int pmd_young(pmd_t pmd) +{ + return 0; +} +#endif + #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, @@ -260,6 +267,17 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif +#ifndef arch_has_hw_nonleaf_pmd_young +/* + * Return whether the accessed bit in non-leaf PMD entries is supported on the + * local CPU. + */ +static inline bool arch_has_hw_nonleaf_pmd_young(void) +{ + return IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG); +} +#endif + #ifndef arch_has_hw_pte_young /* * Return whether the accessed bit is supported on the local CPU. diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index a1005415f0f4..3638b3424be5 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -399,6 +399,7 @@ config FRAME_WARN default 2048 if GCC_PLUGIN_LATENT_ENTROPY default 2048 if PARISC default 1536 if (!64BIT && XTENSA) + default 1280 if KASAN && !64BIT default 1024 if !64BIT default 2048 if 64BIT help @@ -1874,8 +1875,14 @@ config NETDEV_NOTIFIER_ERROR_INJECT If unsure, say N. config FUNCTION_ERROR_INJECTION - def_bool y + bool "Fault-injections of functions" depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES + help + Add fault injections into various functions that are annotated with + ALLOW_ERROR_INJECTION() in the kernel. BPF may also modify the return + value of theses functions. This is useful to test error paths of code. + + If unsure, say N config FAULT_INJECTION bool "Fault-injection framework" diff --git a/mm/compaction.c b/mm/compaction.c index c51f7f545afe..1f6da31dd9a5 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -984,22 +984,6 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, goto isolate_fail; } - /* - * Migration will fail if an anonymous page is pinned in memory, - * so avoid taking lru_lock and isolating it unnecessarily in an - * admittedly racy check. - */ - mapping = page_mapping(page); - if (!mapping && page_count(page) > page_mapcount(page)) - goto isolate_fail; - - /* - * Only allow to migrate anonymous pages in GFP_NOFS context - * because those do not depend on fs locks. - */ - if (!(cc->gfp_mask & __GFP_FS) && mapping) - goto isolate_fail; - /* * Be careful not to clear PageLRU until after we're * sure the page is not being freed elsewhere -- the @@ -1008,6 +992,22 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, if (unlikely(!get_page_unless_zero(page))) goto isolate_fail; + /* + * Migration will fail if an anonymous page is pinned in memory, + * so avoid taking lru_lock and isolating it unnecessarily in an + * admittedly racy check. + */ + mapping = page_mapping(page); + if (!mapping && (page_count(page) - 1) > total_mapcount(page)) + goto isolate_fail_put; + + /* + * Only allow to migrate anonymous pages in GFP_NOFS context + * because those do not depend on fs locks. + */ + if (!(cc->gfp_mask & __GFP_FS) && mapping) + goto isolate_fail_put; + /* Only take pages on LRU: a check now makes later tests safe */ if (!PageLRU(page)) goto isolate_fail_put; diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 5ce403378c20..07e5f1bdf025 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -2283,12 +2283,54 @@ static struct damos *damon_sysfs_mk_scheme( &wmarks); } +static void damon_sysfs_update_scheme(struct damos *scheme, + struct damon_sysfs_scheme *sysfs_scheme) +{ + struct damon_sysfs_access_pattern *access_pattern = + sysfs_scheme->access_pattern; + struct damon_sysfs_quotas *sysfs_quotas = sysfs_scheme->quotas; + struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights; + struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks; + + scheme->pattern.min_sz_region = access_pattern->sz->min; + scheme->pattern.max_sz_region = access_pattern->sz->max; + scheme->pattern.min_nr_accesses = access_pattern->nr_accesses->min; + scheme->pattern.max_nr_accesses = access_pattern->nr_accesses->max; + scheme->pattern.min_age_region = access_pattern->age->min; + scheme->pattern.max_age_region = access_pattern->age->max; + + scheme->action = sysfs_scheme->action; + + scheme->quota.ms = sysfs_quotas->ms; + scheme->quota.sz = sysfs_quotas->sz; + scheme->quota.reset_interval = sysfs_quotas->reset_interval_ms; + scheme->quota.weight_sz = sysfs_weights->sz; + scheme->quota.weight_nr_accesses = sysfs_weights->nr_accesses; + scheme->quota.weight_age = sysfs_weights->age; + + scheme->wmarks.metric = sysfs_wmarks->metric; + scheme->wmarks.interval = sysfs_wmarks->interval_us; + scheme->wmarks.high = sysfs_wmarks->high; + scheme->wmarks.mid = sysfs_wmarks->mid; + scheme->wmarks.low = sysfs_wmarks->low; +} + static int damon_sysfs_set_schemes(struct damon_ctx *ctx, struct damon_sysfs_schemes *sysfs_schemes) { - int i; + struct damos *scheme, *next; + int i = 0; - for (i = 0; i < sysfs_schemes->nr; i++) { + damon_for_each_scheme_safe(scheme, next, ctx) { + if (i < sysfs_schemes->nr) + damon_sysfs_update_scheme(scheme, + sysfs_schemes->schemes_arr[i]); + else + damon_destroy_scheme(scheme); + i++; + } + + for (; i < sysfs_schemes->nr; i++) { struct damos *scheme, *next; scheme = damon_sysfs_mk_scheme(sysfs_schemes->schemes_arr[i]); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f1385c3b6c96..e36ca75311a5 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5206,17 +5206,22 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb, __unmap_hugepage_range(tlb, vma, start, end, ref_page, zap_flags); - /* - * Unlock and free the vma lock before releasing i_mmap_rwsem. When - * the vma_lock is freed, this makes the vma ineligible for pmd - * sharing. And, i_mmap_rwsem is required to set up pmd sharing. - * This is important as page tables for this unmapped range will - * be asynchrously deleted. If the page tables are shared, there - * will be issues when accessed by someone else. - */ - __hugetlb_vma_unlock_write_free(vma); - - i_mmap_unlock_write(vma->vm_file->f_mapping); + if (zap_flags & ZAP_FLAG_UNMAP) { /* final unmap */ + /* + * Unlock and free the vma lock before releasing i_mmap_rwsem. + * When the vma_lock is freed, this makes the vma ineligible + * for pmd sharing. And, i_mmap_rwsem is required to set up + * pmd sharing. This is important as page tables for this + * unmapped range will be asynchrously deleted. If the page + * tables are shared, there will be issues when accessed by + * someone else. + */ + __hugetlb_vma_unlock_write_free(vma); + i_mmap_unlock_write(vma->vm_file->f_mapping); + } else { + i_mmap_unlock_write(vma->vm_file->f_mapping); + hugetlb_vma_unlock_write(vma); + } } void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, diff --git a/mm/khugepaged.c b/mm/khugepaged.c index a8d5ef2a77d2..3703a56571c1 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1051,6 +1051,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, _pmd = pmdp_collapse_flush(vma, address, pmd); spin_unlock(pmd_ptl); mmu_notifier_invalidate_range_end(&range); + tlb_remove_table_sync_one(); spin_lock(pte_ptl); result = __collapse_huge_page_isolate(vma, address, pte, cc, @@ -1379,16 +1380,43 @@ static int set_huge_pmd(struct vm_area_struct *vma, unsigned long addr, return SCAN_SUCCEED; } +/* + * A note about locking: + * Trying to take the page table spinlocks would be useless here because those + * are only used to synchronize: + * + * - modifying terminal entries (ones that point to a data page, not to another + * page table) + * - installing *new* non-terminal entries + * + * Instead, we need roughly the same kind of protection as free_pgtables() or + * mm_take_all_locks() (but only for a single VMA): + * The mmap lock together with this VMA's rmap locks covers all paths towards + * the page table entries we're messing with here, except for hardware page + * table walks and lockless_pages_from_mm(). + */ static void collapse_and_free_pmd(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { - spinlock_t *ptl; pmd_t pmd; + struct mmu_notifier_range range; mmap_assert_write_locked(mm); - ptl = pmd_lock(vma->vm_mm, pmdp); + if (vma->vm_file) + lockdep_assert_held_write(&vma->vm_file->f_mapping->i_mmap_rwsem); + /* + * All anon_vmas attached to the VMA have the same root and are + * therefore locked by the same lock. + */ + if (vma->anon_vma) + lockdep_assert_held_write(&vma->anon_vma->root->rwsem); + + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm, addr, + addr + HPAGE_PMD_SIZE); + mmu_notifier_invalidate_range_start(&range); pmd = pmdp_collapse_flush(vma, addr, pmdp); - spin_unlock(ptl); + tlb_remove_table_sync_one(); + mmu_notifier_invalidate_range_end(&range); mm_dec_nr_ptes(mm); page_table_check_pte_clear_range(mm, addr, pmd); pte_free(mm, pmd_pgtable(pmd)); @@ -1439,6 +1467,14 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, if (!hugepage_vma_check(vma, vma->vm_flags, false, false, false)) return SCAN_VMA_CHECK; + /* + * Symmetry with retract_page_tables(): Exclude MAP_PRIVATE mappings + * that got written to. Without this, we'd have to also lock the + * anon_vma if one exists. + */ + if (vma->anon_vma) + return SCAN_VMA_CHECK; + /* Keep pmd pgtable for uffd-wp; see comment in retract_page_tables() */ if (userfaultfd_wp(vma)) return SCAN_PTE_UFFD_WP; @@ -1472,6 +1508,20 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, goto drop_hpage; } + /* + * We need to lock the mapping so that from here on, only GUP-fast and + * hardware page walks can access the parts of the page tables that + * we're operating on. + * See collapse_and_free_pmd(). + */ + i_mmap_lock_write(vma->vm_file->f_mapping); + + /* + * This spinlock should be unnecessary: Nobody else should be accessing + * the page tables under spinlock protection here, only + * lockless_pages_from_mm() and the hardware page walker can access page + * tables while all the high-level locks are held in write mode. + */ start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl); result = SCAN_FAIL; @@ -1526,6 +1576,8 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, /* step 4: remove pte entries */ collapse_and_free_pmd(mm, vma, haddr, pmd); + i_mmap_unlock_write(vma->vm_file->f_mapping); + maybe_install_pmd: /* step 5: install pmd entry */ result = install_pmd @@ -1539,6 +1591,7 @@ drop_hpage: abort: pte_unmap_unlock(start_pte, ptl); + i_mmap_unlock_write(vma->vm_file->f_mapping); goto drop_hpage; } @@ -1595,7 +1648,8 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff, * An alternative would be drop the check, but check that page * table is clear before calling pmdp_collapse_flush() under * ptl. It has higher chance to recover THP for the VMA, but - * has higher cost too. + * has higher cost too. It would also probably require locking + * the anon_vma. */ if (vma->anon_vma) { result = SCAN_PAGE_ANON; diff --git a/mm/madvise.c b/mm/madvise.c index c7105ec6d08c..b913ba6efc10 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -772,8 +772,8 @@ static int madvise_free_single_vma(struct vm_area_struct *vma, * Application no longer needs these pages. If the pages are dirty, * it's OK to just throw them away. The app will be more careful about * data it wants to keep. Be sure to free swap resources too. The - * zap_page_range call sets things up for shrink_active_list to actually free - * these pages later if no one else has touched them in the meantime, + * zap_page_range_single call sets things up for shrink_active_list to actually + * free these pages later if no one else has touched them in the meantime, * although we could add these pages to a global reuse list for * shrink_active_list to pick up before reclaiming other pages. * @@ -790,7 +790,7 @@ static int madvise_free_single_vma(struct vm_area_struct *vma, static long madvise_dontneed_single_vma(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - zap_page_range(vma, start, end - start); + zap_page_range_single(vma, start, end - start, NULL); return 0; } diff --git a/mm/memory.c b/mm/memory.c index 2d43a75f1d6a..dcb42980fddf 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1342,15 +1342,6 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma) return ret; } -/* - * Parameter block passed down to zap_pte_range in exceptional cases. - */ -struct zap_details { - struct folio *single_folio; /* Locked folio to be unmapped */ - bool even_cows; /* Zap COWed private pages too? */ - zap_flags_t zap_flags; /* Extra flags for zapping */ -}; - /* Whether we should zap all COWed (private) pages too */ static inline bool should_zap_cows(struct zap_details *details) { @@ -1721,7 +1712,7 @@ void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt, { struct mmu_notifier_range range; struct zap_details details = { - .zap_flags = ZAP_FLAG_DROP_MARKER, + .zap_flags = ZAP_FLAG_DROP_MARKER | ZAP_FLAG_UNMAP, /* Careful - we need to zap private pages too! */ .even_cows = true, }; @@ -1775,19 +1766,27 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start, * * The range must fit into one VMA. */ -static void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, +void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, unsigned long size, struct zap_details *details) { + const unsigned long end = address + size; struct mmu_notifier_range range; struct mmu_gather tlb; lru_add_drain(); mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, - address, address + size); + address, end); + if (is_vm_hugetlb_page(vma)) + adjust_range_if_pmd_sharing_possible(vma, &range.start, + &range.end); tlb_gather_mmu(&tlb, vma->vm_mm); update_hiwater_rss(vma->vm_mm); mmu_notifier_invalidate_range_start(&range); - unmap_single_vma(&tlb, vma, address, range.end, details); + /* + * unmap 'address-end' not 'range.start-range.end' as range + * could have been expanded for hugetlb pmd sharing. + */ + unmap_single_vma(&tlb, vma, address, end, details); mmu_notifier_invalidate_range_end(&range); tlb_finish_mmu(&tlb); } diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index add4244e5790..3a2c3f8cad2f 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -153,7 +153,7 @@ static void tlb_remove_table_smp_sync(void *arg) /* Simply deliver the interrupt */ } -static void tlb_remove_table_sync_one(void) +void tlb_remove_table_sync_one(void) { /* * This isn't an RCU grace period and hence the page-tables cannot be @@ -177,8 +177,6 @@ static void tlb_remove_table_free(struct mmu_table_batch *batch) #else /* !CONFIG_MMU_GATHER_RCU_TABLE_FREE */ -static void tlb_remove_table_sync_one(void) { } - static void tlb_remove_table_free(struct mmu_table_batch *batch) { __tlb_remove_table_free(batch); diff --git a/mm/vmscan.c b/mm/vmscan.c index a65ebc0c6847..3fdce84898af 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3995,7 +3995,7 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area goto next; if (!pmd_trans_huge(pmd[i])) { - if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && + if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG)) pmdp_test_and_clear_young(vma, addr, pmd + i); goto next; @@ -4093,14 +4093,14 @@ restart: #endif walk->mm_stats[MM_NONLEAF_TOTAL]++; -#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG - if (get_cap(LRU_GEN_NONLEAF_YOUNG)) { + if (arch_has_hw_nonleaf_pmd_young() && + get_cap(LRU_GEN_NONLEAF_YOUNG)) { if (!pmd_young(val)) continue; walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos); } -#endif + if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i)) continue; @@ -5400,7 +5400,7 @@ static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, c if (arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK)) caps |= BIT(LRU_GEN_MM_WALK); - if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && get_cap(LRU_GEN_NONLEAF_YOUNG)) + if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG)) caps |= BIT(LRU_GEN_NONLEAF_YOUNG); return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps); diff --git a/sound/firewire/dice/dice-stream.c b/sound/firewire/dice/dice-stream.c index f99e00083141..4c677c8546c7 100644 --- a/sound/firewire/dice/dice-stream.c +++ b/sound/firewire/dice/dice-stream.c @@ -59,7 +59,7 @@ int snd_dice_stream_get_rate_mode(struct snd_dice *dice, unsigned int rate, static int select_clock(struct snd_dice *dice, unsigned int rate) { - __be32 reg; + __be32 reg, new; u32 data; int i; int err; @@ -83,15 +83,17 @@ static int select_clock(struct snd_dice *dice, unsigned int rate) if (completion_done(&dice->clock_accepted)) reinit_completion(&dice->clock_accepted); - reg = cpu_to_be32(data); + new = cpu_to_be32(data); err = snd_dice_transaction_write_global(dice, GLOBAL_CLOCK_SELECT, - ®, sizeof(reg)); + &new, sizeof(new)); if (err < 0) return err; if (wait_for_completion_timeout(&dice->clock_accepted, - msecs_to_jiffies(NOTIFICATION_TIMEOUT_MS)) == 0) - return -ETIMEDOUT; + msecs_to_jiffies(NOTIFICATION_TIMEOUT_MS)) == 0) { + if (reg != new) + return -ETIMEDOUT; + } return 0; } diff --git a/sound/soc/codecs/cs42l51.c b/sound/soc/codecs/cs42l51.c index 51721edd8f53..e88d9ff95cdf 100644 --- a/sound/soc/codecs/cs42l51.c +++ b/sound/soc/codecs/cs42l51.c @@ -143,7 +143,7 @@ static const struct snd_kcontrol_new cs42l51_snd_controls[] = { 0, 0xA0, 96, adc_att_tlv), SOC_DOUBLE_R_SX_TLV("PGA Volume", CS42L51_ALC_PGA_CTL, CS42L51_ALC_PGB_CTL, - 0, 0x19, 30, pga_tlv), + 0, 0x1A, 30, pga_tlv), SOC_SINGLE("Playback Deemphasis Switch", CS42L51_DAC_CTL, 3, 1, 0), SOC_SINGLE("Auto-Mute Switch", CS42L51_DAC_CTL, 2, 1, 0), SOC_SINGLE("Soft Ramp Switch", CS42L51_DAC_CTL, 1, 1, 0), diff --git a/sound/soc/codecs/tlv320adc3xxx.c b/sound/soc/codecs/tlv320adc3xxx.c index a969547708d4..52bb55724724 100644 --- a/sound/soc/codecs/tlv320adc3xxx.c +++ b/sound/soc/codecs/tlv320adc3xxx.c @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -1025,7 +1026,9 @@ static const struct gpio_chip adc3xxx_gpio_chip = { static void adc3xxx_free_gpio(struct adc3xxx *adc3xxx) { +#ifdef CONFIG_GPIOLIB gpiochip_remove(&adc3xxx->gpio_chip); +#endif } static void adc3xxx_init_gpio(struct adc3xxx *adc3xxx) diff --git a/sound/soc/fsl/fsl_micfil.c b/sound/soc/fsl/fsl_micfil.c index 79ef4e269bc9..4b86ef82fd93 100644 --- a/sound/soc/fsl/fsl_micfil.c +++ b/sound/soc/fsl/fsl_micfil.c @@ -194,6 +194,25 @@ static int fsl_micfil_reset(struct device *dev) if (ret) return ret; + /* + * SRES is self-cleared bit, but REG_MICFIL_CTRL1 is defined + * as non-volatile register, so SRES still remain in regmap + * cache after set, that every update of REG_MICFIL_CTRL1, + * software reset happens. so clear it explicitly. + */ + ret = regmap_clear_bits(micfil->regmap, REG_MICFIL_CTRL1, + MICFIL_CTRL1_SRES); + if (ret) + return ret; + + /* + * Set SRES should clear CHnF flags, But even add delay here + * the CHnF may not be cleared sometimes, so clear CHnF explicitly. + */ + ret = regmap_write_bits(micfil->regmap, REG_MICFIL_STAT, 0xFF, 0xFF); + if (ret) + return ret; + return 0; } diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index bd88de056358..55b009d3c681 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -452,7 +452,7 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, val = ucontrol->value.integer.value[0]; if (mc->platform_max && val > mc->platform_max) return -EINVAL; - if (val > max - min) + if (val > max) return -EINVAL; val_mask = mask << shift; val = (val + min) & mask; @@ -464,10 +464,15 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, ret = err; if (snd_soc_volsw_is_stereo(mc)) { - unsigned int val2; + unsigned int val2 = ucontrol->value.integer.value[1]; + + if (mc->platform_max && val2 > mc->platform_max) + return -EINVAL; + if (val2 > max) + return -EINVAL; val_mask = mask << rshift; - val2 = (ucontrol->value.integer.value[1] + min) & mask; + val2 = (val2 + min) & mask; val2 = val2 << rshift; err = snd_soc_component_update_bits(component, reg2, val_mask, diff --git a/tools/vm/slabinfo-gnuplot.sh b/tools/vm/slabinfo-gnuplot.sh index 26e193ffd2a2..873a892147e5 100644 --- a/tools/vm/slabinfo-gnuplot.sh +++ b/tools/vm/slabinfo-gnuplot.sh @@ -150,7 +150,7 @@ do_preprocess() let lines=3 out=`basename "$in"`"-slabs-by-loss" `cat "$in" | grep -A "$lines" 'Slabs sorted by loss' |\ - egrep -iv '\-\-|Name|Slabs'\ + grep -E -iv '\-\-|Name|Slabs'\ | awk '{print $1" "$4+$2*$3" "$4}' > "$out"` if [ $? -eq 0 ]; then do_slabs_plotting "$out" @@ -159,7 +159,7 @@ do_preprocess() let lines=3 out=`basename "$in"`"-slabs-by-size" `cat "$in" | grep -A "$lines" 'Slabs sorted by size' |\ - egrep -iv '\-\-|Name|Slabs'\ + grep -E -iv '\-\-|Name|Slabs'\ | awk '{print $1" "$4" "$4-$2*$3}' > "$out"` if [ $? -eq 0 ]; then do_slabs_plotting "$out"