From 944fad4583bc8a6d7dd80fbe39db50141da95793 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Tue, 1 Feb 2022 15:40:55 +0100 Subject: [PATCH 01/15] x86/fault: Cast an argument to the proper address space in prefetch() Commit in Fixes uses accessors based on the access mode, i.e., it distinguishes its access if instr carries a user address or a kernel address. Since that commit, sparse complains about passing an argument without __user annotation to get_user(), which expects a pointer of the __user address space: arch/x86/mm/fault.c:152:29: warning: incorrect type in argument 1 (different address spaces) arch/x86/mm/fault.c:152:29: expected void const volatile [noderef] __user *ptr arch/x86/mm/fault.c:152:29: got unsigned char *[assigned] instr Cast instr to __user when accessing user memory. No functional change. No change in the generated object code. [ bp: Simplify commit message. ] Fixes: 35f1c89b0cce ("x86/fault: Fix AMD erratum #91 errata fixup for user code") Signed-off-by: Lukas Bulwahn Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220201144055.5670-1-lukas.bulwahn@gmail.com --- arch/x86/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index d0074c6ed31a..fad8faa29d04 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -149,7 +149,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) unsigned char opcode; if (user_mode(regs)) { - if (get_user(opcode, instr)) + if (get_user(opcode, (unsigned char __user *) instr)) break; } else { if (get_kernel_nofault(opcode, instr)) From d812f7c475c6a4dcfff02a85fbfd7a9c87e6a094 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Wed, 6 Apr 2022 14:51:47 -0500 Subject: [PATCH 02/15] x86/platform/uv: Update NMI Handler for UV5 Update NMI handler for UV5 hardware. A platform register changed, and UV5 only uses one of the two NMI methods used on previous hardware. Signed-off-by: Mike Travis Signed-off-by: Steve Wahl Signed-off-by: Borislav Petkov Reviewed-by: Dimitri Sivanich Acked-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220406195149.228164-2-steve.wahl@hpe.com --- arch/x86/platform/uv/uv_nmi.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index 1e9ff28bc2e0..61ec3be77328 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c @@ -244,8 +244,10 @@ static inline bool uv_nmi_action_is(const char *action) /* Setup which NMI support is present in system */ static void uv_nmi_setup_mmrs(void) { + bool new_nmi_method_only = false; + /* First determine arch specific MMRs to handshake with BIOS */ - if (UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK) { + if (UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK) { /* UV2,3,4 setup */ uvh_nmi_mmrx = UVH_EVENT_OCCURRED0; uvh_nmi_mmrx_clear = UVH_EVENT_OCCURRED0_ALIAS; uvh_nmi_mmrx_shift = UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT; @@ -255,26 +257,25 @@ static void uv_nmi_setup_mmrs(void) uvh_nmi_mmrx_req = UVH_BIOS_KERNEL_MMR_ALIAS_2; uvh_nmi_mmrx_req_shift = 62; - } else if (UVH_EVENT_OCCURRED1_EXTIO_INT0_MASK) { + } else if (UVH_EVENT_OCCURRED1_EXTIO_INT0_MASK) { /* UV5+ setup */ uvh_nmi_mmrx = UVH_EVENT_OCCURRED1; uvh_nmi_mmrx_clear = UVH_EVENT_OCCURRED1_ALIAS; uvh_nmi_mmrx_shift = UVH_EVENT_OCCURRED1_EXTIO_INT0_SHFT; uvh_nmi_mmrx_type = "OCRD1-EXTIO_INT0"; - uvh_nmi_mmrx_supported = UVH_EXTIO_INT0_BROADCAST; - uvh_nmi_mmrx_req = UVH_BIOS_KERNEL_MMR_ALIAS_2; - uvh_nmi_mmrx_req_shift = 62; + new_nmi_method_only = true; /* Newer nmi always valid on UV5+ */ + uvh_nmi_mmrx_req = 0; /* no request bit to clear */ } else { - pr_err("UV:%s:cannot find EVENT_OCCURRED*_EXTIO_INT0\n", - __func__); + pr_err("UV:%s:NMI support not available on this system\n", __func__); return; } /* Then find out if new NMI is supported */ - if (likely(uv_read_local_mmr(uvh_nmi_mmrx_supported))) { - uv_write_local_mmr(uvh_nmi_mmrx_req, - 1UL << uvh_nmi_mmrx_req_shift); + if (new_nmi_method_only || uv_read_local_mmr(uvh_nmi_mmrx_supported)) { + if (uvh_nmi_mmrx_req) + uv_write_local_mmr(uvh_nmi_mmrx_req, + 1UL << uvh_nmi_mmrx_req_shift); nmi_mmr = uvh_nmi_mmrx; nmi_mmr_clear = uvh_nmi_mmrx_clear; nmi_mmr_pending = 1UL << uvh_nmi_mmrx_shift; From bb3ab81bdbd53f88f26ffabc9fb15bd8466486ec Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Wed, 6 Apr 2022 14:51:48 -0500 Subject: [PATCH 03/15] x86/platform/uv: Update TSC sync state for UV5 The UV5 platform synchronizes the TSCs among all chassis, and will not proceed to OS boot without achieving synchronization. Previous UV platforms provided a register indicating successful synchronization. This is no longer available on UV5. On this platform TSC_ADJUST should not be reset by the kernel. Signed-off-by: Mike Travis Signed-off-by: Steve Wahl Signed-off-by: Borislav Petkov Reviewed-by: Dimitri Sivanich Acked-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220406195149.228164-3-steve.wahl@hpe.com --- arch/x86/kernel/apic/x2apic_uv_x.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index f5a48e66e4f5..a6e9c2794ef5 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -199,7 +199,13 @@ static void __init uv_tsc_check_sync(void) int mmr_shift; char *state; - /* Different returns from different UV BIOS versions */ + /* UV5 guarantees synced TSCs; do not zero TSC_ADJUST */ + if (!is_uv(UV2|UV3|UV4)) { + mark_tsc_async_resets("UV5+"); + return; + } + + /* UV2,3,4, UV BIOS TSC sync state available */ mmr = uv_early_read_mmr(UVH_TSC_SYNC_MMR); mmr_shift = is_uv2_hub() ? UVH_TSC_SYNC_SHIFT_UV2K : UVH_TSC_SYNC_SHIFT; From 327c348988c6f0bacd7abd29c151f37bdf1e2a02 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Wed, 6 Apr 2022 14:51:49 -0500 Subject: [PATCH 04/15] x86/platform/uv: Log gap hole end size Show value of gap end in the kernel log which equates to number of physical address bits used by system. Signed-off-by: Mike Travis Signed-off-by: Steve Wahl Signed-off-by: Borislav Petkov Acked-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220406195149.228164-4-steve.wahl@hpe.com --- arch/x86/kernel/apic/x2apic_uv_x.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index a6e9c2794ef5..482855227964 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -1346,7 +1346,7 @@ static void __init decode_gam_params(unsigned long ptr) static void __init decode_gam_rng_tbl(unsigned long ptr) { struct uv_gam_range_entry *gre = (struct uv_gam_range_entry *)ptr; - unsigned long lgre = 0; + unsigned long lgre = 0, gend = 0; int index = 0; int sock_min = 999999, pnode_min = 99999; int sock_max = -1, pnode_max = -1; @@ -1380,6 +1380,9 @@ static void __init decode_gam_rng_tbl(unsigned long ptr) flag, size, suffix[order], gre->type, gre->nasid, gre->sockid, gre->pnode); + if (gre->type == UV_GAM_RANGE_TYPE_HOLE) + gend = (unsigned long)gre->limit << UV_GAM_RANGE_SHFT; + /* update to next range start */ lgre = gre->limit; if (sock_min > gre->sockid) @@ -1397,7 +1400,8 @@ static void __init decode_gam_rng_tbl(unsigned long ptr) _max_pnode = pnode_max; _gr_table_len = index; - pr_info("UV: GRT: %d entries, sockets(min:%x,max:%x) pnodes(min:%x,max:%x)\n", index, _min_socket, _max_socket, _min_pnode, _max_pnode); + pr_info("UV: GRT: %d entries, sockets(min:%x,max:%x), pnodes(min:%x,max:%x), gap_end(%d)\n", + index, _min_socket, _max_socket, _min_pnode, _max_pnode, fls64(gend)); } /* Walk through UVsystab decoding the fields */ From f5d9283ecb33329073033029fe427155aa0abfb1 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 25 Mar 2022 11:39:50 -0400 Subject: [PATCH 05/15] x86/32: Simplify ELF_CORE_COPY_REGS GS is now always a user segment, so there is no difference between user and kernel registers. Signed-off-by: Brian Gerst Signed-off-by: Borislav Petkov Reviewed-by: Thomas Gleixner Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20220325153953.162643-2-brgerst@gmail.com --- arch/x86/include/asm/elf.h | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 29fea180a665..cb0ff1055ab1 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -116,7 +116,7 @@ extern unsigned int vdso32_enabled; * now struct_user_regs, they are different) */ -#define ELF_CORE_COPY_REGS_COMMON(pr_reg, regs) \ +#define ELF_CORE_COPY_REGS(pr_reg, regs) \ do { \ pr_reg[0] = regs->bx; \ pr_reg[1] = regs->cx; \ @@ -128,6 +128,7 @@ do { \ pr_reg[7] = regs->ds; \ pr_reg[8] = regs->es; \ pr_reg[9] = regs->fs; \ + savesegment(gs, pr_reg[10]); \ pr_reg[11] = regs->orig_ax; \ pr_reg[12] = regs->ip; \ pr_reg[13] = regs->cs; \ @@ -136,18 +137,6 @@ do { \ pr_reg[16] = regs->ss; \ } while (0); -#define ELF_CORE_COPY_REGS(pr_reg, regs) \ -do { \ - ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\ - pr_reg[10] = get_user_gs(regs); \ -} while (0); - -#define ELF_CORE_COPY_KERNEL_REGS(pr_reg, regs) \ -do { \ - ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\ - savesegment(gs, pr_reg[10]); \ -} while (0); - #define ELF_PLATFORM (utsname()->machine) #define set_personality_64bit() do { } while (0) From daf3af4705ba8f49d33ea9b7bafdc9fd9efd49e0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Apr 2022 22:34:21 +0200 Subject: [PATCH 06/15] x86/apic: Clarify i82489DX bit overlap in APIC_LVT0 Daniel stumbled over the bit overlap of the i82498DX external APIC and the TSC deadline timer configuration bit in modern APICs, which is neither documented in the code nor in the current SDM. Maciej provided links to the original i82489DX/486 documentation. See Link. Remove the i82489DX macro maze, use a i82489DX specific define in the apic code and document the overlap in a comment. Reported-by: Daniel Vacek Signed-off-by: Thomas Gleixner Cc: Maciej W. Rozycki Link: https://lore.kernel.org/r/87ee22f3ci.ffs@tglx --- arch/x86/include/asm/apicdef.h | 6 ------ arch/x86/kernel/apic/apic.c | 11 ++++++++++- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 5716f22f81ac..92035eb3afee 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -95,12 +95,6 @@ #define APIC_LVTTHMR 0x330 #define APIC_LVTPC 0x340 #define APIC_LVT0 0x350 -#define APIC_LVT_TIMER_BASE_MASK (0x3 << 18) -#define GET_APIC_TIMER_BASE(x) (((x) >> 18) & 0x3) -#define SET_APIC_TIMER_BASE(x) (((x) << 18)) -#define APIC_TIMER_BASE_CLKIN 0x0 -#define APIC_TIMER_BASE_TMBASE 0x1 -#define APIC_TIMER_BASE_DIV 0x2 #define APIC_LVT_TIMER_ONESHOT (0 << 17) #define APIC_LVT_TIMER_PERIODIC (1 << 17) #define APIC_LVT_TIMER_TSCDEADLINE (2 << 17) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index b70344bf6600..13819bfa8dde 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -320,6 +320,9 @@ int lapic_get_maxlvt(void) #define APIC_DIVISOR 16 #define TSC_DIVISOR 8 +/* i82489DX specific */ +#define I82489DX_BASE_DIVIDER (((0x2) << 18)) + /* * This function sets up the local APIC timer, with a timeout of * 'clocks' APIC bus clock. During calibration we actually call @@ -340,8 +343,14 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) else if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) lvtt_value |= APIC_LVT_TIMER_TSCDEADLINE; + /* + * The i82489DX APIC uses bit 18 and 19 for the base divider. This + * overlaps with bit 18 on integrated APICs, but is not documented + * in the SDM. No problem though. i82489DX equipped systems do not + * have TSC deadline timer. + */ if (!lapic_is_integrated()) - lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); + lvtt_value |= I82489DX_BASE_DIVIDER; if (!irqen) lvtt_value |= APIC_LVT_MASKED; From b57a7c9dd732ca29c4400a9a710c56c55877086d Mon Sep 17 00:00:00 2001 From: Eric DeVolder Date: Wed, 13 Apr 2022 12:42:30 -0400 Subject: [PATCH 07/15] x86/crash: Fix minor typo/bug in debug message The pr_debug() intends to display the memsz member, but the parameter is actually the bufsz member (which is already displayed). Correct this to display memsz value. Signed-off-by: Eric DeVolder Signed-off-by: Borislav Petkov Acked-by: Baoquan He Link: https://lore.kernel.org/r/20220413164237.20845-2-eric.devolder@oracle.com --- arch/x86/kernel/crash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index e8326a8d1c5d..9730c88530fc 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -407,7 +407,7 @@ int crash_load_segments(struct kimage *image) } image->elf_load_addr = kbuf.mem; pr_debug("Loaded ELF headers at 0x%lx bufsz=0x%lx memsz=0x%lx\n", - image->elf_load_addr, kbuf.bufsz, kbuf.bufsz); + image->elf_load_addr, kbuf.bufsz, kbuf.memsz); return ret; } From 9554e908fb5d02e48a681d1eca180225bf109e83 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 25 Mar 2022 11:39:51 -0400 Subject: [PATCH 08/15] ELF: Remove elf_core_copy_kernel_regs() x86-32 was the last architecture that implemented separate user and kernel registers. Signed-off-by: Brian Gerst Signed-off-by: Borislav Petkov Reviewed-by: Thomas Gleixner Acked-by: Andy Lutomirski Acked-by: Michael Ellerman Link: https://lore.kernel.org/r/20220325153953.162643-3-brgerst@gmail.com --- arch/powerpc/kernel/fadump.c | 2 +- arch/powerpc/platforms/powernv/opal-core.c | 2 +- include/linux/elfcore.h | 9 --------- kernel/kexec_core.c | 2 +- 4 files changed, 3 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 65562c4a0a69..4c09c6688ac6 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -752,7 +752,7 @@ u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs) * FIXME: How do i get PID? Do I really need it? * prstatus.pr_pid = ???? */ - elf_core_copy_kernel_regs(&prstatus.pr_reg, regs); + elf_core_copy_regs(&prstatus.pr_reg, regs); buf = append_elf_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS, &prstatus, sizeof(prstatus)); return buf; diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c index b97bc179f65a..adcb1a1a2bfe 100644 --- a/arch/powerpc/platforms/powernv/opal-core.c +++ b/arch/powerpc/platforms/powernv/opal-core.c @@ -112,7 +112,7 @@ static void __init fill_prstatus(struct elf_prstatus *prstatus, int pir, struct pt_regs *regs) { memset(prstatus, 0, sizeof(struct elf_prstatus)); - elf_core_copy_kernel_regs(&(prstatus->pr_reg), regs); + elf_core_copy_regs(&(prstatus->pr_reg), regs); /* * Overload PID with PIR value. diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index f8e206e82476..346a8b56cdc8 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -84,15 +84,6 @@ static inline void elf_core_copy_regs(elf_gregset_t *elfregs, struct pt_regs *re #endif } -static inline void elf_core_copy_kernel_regs(elf_gregset_t *elfregs, struct pt_regs *regs) -{ -#ifdef ELF_CORE_COPY_KERNEL_REGS - ELF_CORE_COPY_KERNEL_REGS((*elfregs), regs); -#else - elf_core_copy_regs(elfregs, regs); -#endif -} - static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t* elfregs) { #if defined (ELF_CORE_COPY_TASK_REGS) diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 68480f731192..be4b54c2c615 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -1078,7 +1078,7 @@ void crash_save_cpu(struct pt_regs *regs, int cpu) return; memset(&prstatus, 0, sizeof(prstatus)); prstatus.common.pr_pid = current->pid; - elf_core_copy_kernel_regs(&prstatus.pr_reg, regs); + elf_core_copy_regs(&prstatus.pr_reg, regs); buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, &prstatus, sizeof(prstatus)); final_note(buf); From 3a24a60854d2ef19e0edcd11cdbbb4fabc655dde Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 25 Mar 2022 11:39:52 -0400 Subject: [PATCH 09/15] x86/32: Remove lazy GS macros GS is always a user segment now. Signed-off-by: Brian Gerst Signed-off-by: Borislav Petkov Reviewed-by: Thomas Gleixner Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20220325153953.162643-4-brgerst@gmail.com --- arch/x86/include/asm/mmu_context.h | 2 +- arch/x86/include/asm/segment.h | 5 ----- arch/x86/kernel/process.c | 5 +---- arch/x86/kernel/process_32.c | 11 ++++------- arch/x86/kernel/ptrace.c | 6 +++--- arch/x86/kernel/signal.c | 8 +++++--- arch/x86/kernel/vm86_32.c | 4 ++-- arch/x86/lib/insn-eval.c | 5 +++-- arch/x86/math-emu/get_address.c | 2 +- 9 files changed, 20 insertions(+), 28 deletions(-) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 27516046117a..b8d40ddeab00 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -141,7 +141,7 @@ do { \ #ifdef CONFIG_X86_32 #define deactivate_mm(tsk, mm) \ do { \ - lazy_load_gs(0); \ + loadsegment(gs, 0); \ } while (0) #else #define deactivate_mm(tsk, mm) \ diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 656ed6531d03..617b3663e4dd 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -354,11 +354,6 @@ static inline void __loadsegment_fs(unsigned short value) * x86-32 user GS accessors. This is ugly and could do with some cleaning up. */ #ifdef CONFIG_X86_32 -# define get_user_gs(regs) (u16)({ unsigned long v; savesegment(gs, v); v; }) -# define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v)) -# define task_user_gs(tsk) ((tsk)->thread.gs) -# define lazy_save_gs(v) savesegment(gs, (v)) -# define lazy_load_gs(v) loadsegment(gs, (v)) # define load_gs_index(v) loadsegment(gs, (v)) #endif /* X86_32 */ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index b370767f5b19..96a9885b9c3a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -160,6 +160,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg, savesegment(ds, p->thread.ds); #else p->thread.sp0 = (unsigned long) (childregs + 1); + savesegment(gs, p->thread.gs); /* * Clear all status flags including IF and set fixed bit. 64bit * does not have this initialization as the frame does not contain @@ -191,10 +192,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg, if (sp) childregs->sp = sp; -#ifdef CONFIG_X86_32 - task_user_gs(p) = get_user_gs(current_pt_regs()); -#endif - if (unlikely(p->flags & PF_IO_WORKER)) { /* * An IO thread is a user space thread, but it doesn't diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 26edb1cd07a4..877358f3dba7 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -63,10 +63,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode, unsigned long d0, d1, d2, d3, d6, d7; unsigned short gs; - if (user_mode(regs)) - gs = get_user_gs(regs); - else - savesegment(gs, gs); + savesegment(gs, gs); show_ip(regs, log_lvl); @@ -114,7 +111,7 @@ void release_thread(struct task_struct *dead_task) void start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) { - set_user_gs(regs, 0); + loadsegment(gs, 0); regs->fs = 0; regs->ds = __USER_DS; regs->es = __USER_DS; @@ -177,7 +174,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * used %fs or %gs (it does not today), or if the kernel is * running inside of a hypervisor layer. */ - lazy_save_gs(prev->gs); + savesegment(gs, prev->gs); /* * Load the per-thread Thread-Local Storage descriptor. @@ -208,7 +205,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * Restore %gs if needed (which is common) */ if (prev->gs | next->gs) - lazy_load_gs(next->gs); + loadsegment(gs, next->gs); this_cpu_write(current_task, next_p); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 98d10ef60571..37c12fb92906 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -170,9 +170,9 @@ static u16 get_segment_reg(struct task_struct *task, unsigned long offset) retval = *pt_regs_access(task_pt_regs(task), offset); else { if (task == current) - retval = get_user_gs(task_pt_regs(task)); + savesegment(gs, retval); else - retval = task_user_gs(task); + retval = task->thread.gs; } return retval; } @@ -210,7 +210,7 @@ static int set_segment_reg(struct task_struct *task, break; case offsetof(struct user_regs_struct, gs): - task_user_gs(task) = value; + task->thread.gs = value; } return 0; diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index e439eb14325f..9c7265b524c7 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -93,7 +93,7 @@ static bool restore_sigcontext(struct pt_regs *regs, return false; #ifdef CONFIG_X86_32 - set_user_gs(regs, sc.gs); + loadsegment(gs, sc.gs); regs->fs = sc.fs; regs->es = sc.es; regs->ds = sc.ds; @@ -146,8 +146,10 @@ __unsafe_setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, struct pt_regs *regs, unsigned long mask) { #ifdef CONFIG_X86_32 - unsafe_put_user(get_user_gs(regs), - (unsigned int __user *)&sc->gs, Efault); + unsigned int gs; + savesegment(gs, gs); + + unsafe_put_user(gs, (unsigned int __user *)&sc->gs, Efault); unsafe_put_user(regs->fs, (unsigned int __user *)&sc->fs, Efault); unsafe_put_user(regs->es, (unsigned int __user *)&sc->es, Efault); unsafe_put_user(regs->ds, (unsigned int __user *)&sc->ds, Efault); diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index c21bcd668284..e9e803a4d44c 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -151,7 +151,7 @@ exit_vm86: memcpy(®s->pt, &vm86->regs32, sizeof(struct pt_regs)); - lazy_load_gs(vm86->regs32.gs); + loadsegment(gs, vm86->regs32.gs); regs->pt.ax = retval; return; @@ -325,7 +325,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) * Save old state */ vm86->saved_sp0 = tsk->thread.sp0; - lazy_save_gs(vm86->regs32.gs); + savesegment(gs, vm86->regs32.gs); /* make room for real-mode segments */ preempt_disable(); diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index b781d324211b..21104c41cba0 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -342,9 +342,9 @@ static int resolve_seg_reg(struct insn *insn, struct pt_regs *regs, int regoff) */ static short get_segment_selector(struct pt_regs *regs, int seg_reg_idx) { -#ifdef CONFIG_X86_64 unsigned short sel; +#ifdef CONFIG_X86_64 switch (seg_reg_idx) { case INAT_SEG_REG_IGNORE: return 0; @@ -402,7 +402,8 @@ static short get_segment_selector(struct pt_regs *regs, int seg_reg_idx) case INAT_SEG_REG_FS: return (unsigned short)(regs->fs & 0xffff); case INAT_SEG_REG_GS: - return get_user_gs(regs); + savesegment(gs, sel); + return sel; case INAT_SEG_REG_IGNORE: default: return -EINVAL; diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c index b82ca14ba718..4a9fd9029a53 100644 --- a/arch/x86/math-emu/get_address.c +++ b/arch/x86/math-emu/get_address.c @@ -153,7 +153,7 @@ static long pm_address(u_char FPU_modrm, u_char segment, switch (segment) { case PREFIX_GS_ - 1: /* user gs handling can be lazy, use special accessors */ - addr->selector = get_user_gs(FPU_info->regs); + savesegment(gs, addr->selector); break; default: addr->selector = PM_REG_(segment); From 203d8919a9eda5d1bc68ac3cd7637588334c9dc1 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 25 Mar 2022 11:39:53 -0400 Subject: [PATCH 10/15] x86/asm: Merge load_gs_index() Merge the 32- and 64-bit implementations of load_gs_index(). Signed-off-by: Brian Gerst Signed-off-by: Borislav Petkov Reviewed-by: Thomas Gleixner Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20220325153953.162643-5-brgerst@gmail.com --- arch/x86/include/asm/segment.h | 7 ------- arch/x86/include/asm/special_insns.h | 7 ++++--- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 617b3663e4dd..2e7890dd58a4 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -350,13 +350,6 @@ static inline void __loadsegment_fs(unsigned short value) #define savesegment(seg, value) \ asm("mov %%" #seg ",%0":"=r" (value) : : "memory") -/* - * x86-32 user GS accessors. This is ugly and could do with some cleaning up. - */ -#ifdef CONFIG_X86_32 -# define load_gs_index(v) loadsegment(gs, (v)) -#endif /* X86_32 */ - #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 68c257a3de0d..45b18eb94fa1 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -184,14 +184,15 @@ static inline void wbinvd(void) native_wbinvd(); } -#ifdef CONFIG_X86_64 static inline void load_gs_index(unsigned int selector) { +#ifdef CONFIG_X86_64 native_load_gs_index(selector); -} - +#else + loadsegment(gs, selector); #endif +} #endif /* CONFIG_PARAVIRT_XXL */ From 8ad7e8f696951f192c6629a0cbda9ac94c773159 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Apr 2022 14:11:25 +0200 Subject: [PATCH 11/15] x86/fpu/xsave: Support XSAVEC in the kernel XSAVEC is the user space counterpart of XSAVES which cannot save supervisor state. In virtualization scenarios the hypervisor does not expose XSAVES but XSAVEC to the guest, though the kernel does not make use of it. That's unfortunate because XSAVEC uses the compacted format of saving the XSTATE. This is more efficient in terms of storage space vs. XSAVE[OPT] as it does not create holes for XSTATE components which are not supported or enabled by the kernel but are available in hardware. There is room for further optimizations when XSAVEC/S and XGETBV1 are supported. In order to support XSAVEC: - Define the XSAVEC ASM macro as it's not yet supported by the required minimal toolchain. - Create a software defined X86_FEATURE_XCOMPACTED to select the compacted XSTATE buffer format for both XSAVEC and XSAVES. - Make XSAVEC an option in the 'XSAVE' ASM alternatives Requested-by: Andrew Cooper Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220404104820.598704095@linutronix.de --- arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/kernel/fpu/xstate.c | 58 +++++++++++++++++++----------- arch/x86/kernel/fpu/xstate.h | 14 +++++--- 3 files changed, 48 insertions(+), 26 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 73e643ae94b6..ff08da857847 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -201,7 +201,7 @@ #define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ -/* FREE! ( 7*32+10) */ +#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */ diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 39e1c8626ab9..31c12f4d0770 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -142,7 +142,8 @@ static unsigned int xfeature_get_offset(u64 xcomp_bv, int xfeature) * Non-compacted format and legacy features use the cached fixed * offsets. */ - if (!cpu_feature_enabled(X86_FEATURE_XSAVES) || xfeature <= XFEATURE_SSE) + if (!cpu_feature_enabled(X86_FEATURE_XCOMPACTED) || + xfeature <= XFEATURE_SSE) return xstate_offsets[xfeature]; /* @@ -369,12 +370,12 @@ static void __init setup_init_fpu_buf(void) /* * All components are now in init state. Read the state back so * that init_fpstate contains all non-zero init state. This only - * works with XSAVE, but not with XSAVEOPT and XSAVES because + * works with XSAVE, but not with XSAVEOPT and XSAVEC/S because * those use the init optimization which skips writing data for * components in init state. * * XSAVE could be used, but that would require to reshuffle the - * data when XSAVES is available because XSAVES uses xstate + * data when XSAVEC/S is available because XSAVEC/S uses xstate * compaction. But doing so is a pointless exercise because most * components have an all zeros init state except for the legacy * ones (FP and SSE). Those can be saved with FXSAVE into the @@ -584,7 +585,8 @@ static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted) */ static bool __init paranoid_xstate_size_valid(unsigned int kernel_size) { - bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES); + bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED); + bool xsaves = cpu_feature_enabled(X86_FEATURE_XSAVES); unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE; int i; @@ -595,7 +597,7 @@ static bool __init paranoid_xstate_size_valid(unsigned int kernel_size) * Supervisor state components can be managed only by * XSAVES. */ - if (!compacted && xfeature_is_supervisor(i)) { + if (!xsaves && xfeature_is_supervisor(i)) { XSTATE_WARN_ON(1); return false; } @@ -612,8 +614,11 @@ static bool __init paranoid_xstate_size_valid(unsigned int kernel_size) * the size of the *user* states. If we use it to size a buffer * that we use 'XSAVES' on, we could potentially overflow the * buffer because 'XSAVES' saves system states too. + * + * This also takes compaction into account. So this works for + * XSAVEC as well. */ -static unsigned int __init get_xsaves_size(void) +static unsigned int __init get_compacted_size(void) { unsigned int eax, ebx, ecx, edx; /* @@ -623,6 +628,10 @@ static unsigned int __init get_xsaves_size(void) * containing all the state components * corresponding to bits currently set in * XCR0 | IA32_XSS. + * + * When XSAVES is not available but XSAVEC is (virt), then there + * are no supervisor states, but XSAVEC still uses compacted + * format. */ cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); return ebx; @@ -632,13 +641,13 @@ static unsigned int __init get_xsaves_size(void) * Get the total size of the enabled xstates without the independent supervisor * features. */ -static unsigned int __init get_xsaves_size_no_independent(void) +static unsigned int __init get_xsave_compacted_size(void) { u64 mask = xfeatures_mask_independent(); unsigned int size; if (!mask) - return get_xsaves_size(); + return get_compacted_size(); /* Disable independent features. */ wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor()); @@ -647,7 +656,7 @@ static unsigned int __init get_xsaves_size_no_independent(void) * Ask the hardware what size is required of the buffer. * This is the size required for the task->fpu buffer. */ - size = get_xsaves_size(); + size = get_compacted_size(); /* Re-enable independent features so XSAVES will work on them again. */ wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask); @@ -687,20 +696,21 @@ static int __init init_xstate_size(void) { /* Recompute the context size for enabled features: */ unsigned int user_size, kernel_size, kernel_default_size; - bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES); + bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED); /* Uncompacted user space size */ user_size = get_xsave_size_user(); /* - * XSAVES kernel size includes supervisor states and - * uses compacted format when available. + * XSAVES kernel size includes supervisor states and uses compacted + * format. XSAVEC uses compacted format, but does not save + * supervisor states. * - * XSAVE does not support supervisor states so - * kernel and user size is identical. + * XSAVE[OPT] do not support supervisor states so kernel and user + * size is identical. */ if (compacted) - kernel_size = get_xsaves_size_no_independent(); + kernel_size = get_xsave_compacted_size(); else kernel_size = user_size; @@ -813,8 +823,11 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) if (!cpu_feature_enabled(X86_FEATURE_XFD)) fpu_kernel_cfg.max_features &= ~XFEATURE_MASK_USER_DYNAMIC; - fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED | - XFEATURE_MASK_SUPERVISOR_SUPPORTED; + if (!cpu_feature_enabled(X86_FEATURE_XSAVES)) + fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED; + else + fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED | + XFEATURE_MASK_SUPERVISOR_SUPPORTED; fpu_user_cfg.max_features = fpu_kernel_cfg.max_features; fpu_user_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED; @@ -837,6 +850,11 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) */ init_fpstate.xfd = fpu_user_cfg.max_features & XFEATURE_MASK_USER_DYNAMIC; + /* Set up compaction feature bit */ + if (cpu_feature_enabled(X86_FEATURE_XSAVEC) || + cpu_feature_enabled(X86_FEATURE_XSAVES)) + setup_force_cpu_cap(X86_FEATURE_XCOMPACTED); + /* Enable xstate instructions to be able to continue with initialization: */ fpu__init_cpu_xstate(); @@ -873,7 +891,7 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n", fpu_kernel_cfg.max_features, fpu_kernel_cfg.max_size, - boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard"); + boot_cpu_has(X86_FEATURE_XCOMPACTED) ? "compacted" : "standard"); return; out_disable: @@ -917,7 +935,7 @@ static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr) if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr))) return NULL; - if (cpu_feature_enabled(X86_FEATURE_XSAVES)) { + if (cpu_feature_enabled(X86_FEATURE_XCOMPACTED)) { if (WARN_ON_ONCE(!(xcomp_bv & BIT_ULL(xfeature_nr)))) return NULL; } @@ -1525,7 +1543,7 @@ static int __xstate_request_perm(u64 permitted, u64 requested, bool guest) * vendors into extending XFD for the pre AMX states, especially * AVX512. */ - bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES); + bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED); struct fpu *fpu = ¤t->group_leader->thread.fpu; struct fpu_state_perm *perm; unsigned int ksize, usize; diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index d22ace092ca2..5ad47031383b 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -16,7 +16,7 @@ static inline void xstate_init_xcomp_bv(struct xregs_state *xsave, u64 mask) * XRSTORS requires these bits set in xcomp_bv, or it will * trigger #GP: */ - if (cpu_feature_enabled(X86_FEATURE_XSAVES)) + if (cpu_feature_enabled(X86_FEATURE_XCOMPACTED)) xsave->header.xcomp_bv = mask | XCOMP_BV_COMPACTED_FORMAT; } @@ -79,6 +79,7 @@ static inline u64 xfeatures_mask_independent(void) /* These macros all use (%edi)/(%rdi) as the single memory argument. */ #define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" #define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" +#define XSAVEC ".byte " REX_PREFIX "0x0f,0xc7,0x27" #define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f" #define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f" #define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f" @@ -97,9 +98,11 @@ static inline u64 xfeatures_mask_independent(void) : "memory") /* - * If XSAVES is enabled, it replaces XSAVEOPT because it supports a compact - * format and supervisor states in addition to modified optimization in - * XSAVEOPT. + * If XSAVES is enabled, it replaces XSAVEC because it supports supervisor + * states in addition to XSAVEC. + * + * Otherwise if XSAVEC is enabled, it replaces XSAVEOPT because it supports + * compacted storage format in addition to XSAVEOPT. * * Otherwise, if XSAVEOPT is enabled, XSAVEOPT replaces XSAVE because XSAVEOPT * supports modified optimization which is not supported by XSAVE. @@ -111,8 +114,9 @@ static inline u64 xfeatures_mask_independent(void) * address of the instruction where we might get an exception at. */ #define XSTATE_XSAVE(st, lmask, hmask, err) \ - asm volatile(ALTERNATIVE_2(XSAVE, \ + asm volatile(ALTERNATIVE_3(XSAVE, \ XSAVEOPT, X86_FEATURE_XSAVEOPT, \ + XSAVEC, X86_FEATURE_XSAVEC, \ XSAVES, X86_FEATURE_XSAVES) \ "\n" \ "xor %[err], %[err]\n" \ From 7a116a2dd32d96869f0f93bac00b900859ba0434 Mon Sep 17 00:00:00 2001 From: Kuppuswamy Sathyanarayanan Date: Sat, 16 Apr 2022 02:45:32 +0000 Subject: [PATCH 12/15] x86/apic: Do apic driver probe for "nosmp" use case For the "nosmp" use case, the APIC initialization code selects "APIC_SYMMETRIC_IO_NO_ROUTING" as the default interrupt mode and avoids probing APIC drivers. This works well for the default APIC modes, but for the x2APIC case the probe function is required to allocate the cluster_hotplug mask. So in the APIC_SYMMETRIC_IO_NO_ROUTING case when the x2APIC is initialized it dereferences a NULL pointer and the kernel crashes. This was observed on a TDX platform where x2APIC is enabled and "nosmp" command line option is allowed. To fix this issue, probe APIC drivers via default_setup_apic_routing() for the APIC_SYMMETRIC_IO_NO_ROUTING interrupt mode too. Suggested-by: Kirill A. Shutemov Suggested-by: Rafael J. Wysocki Signed-off-by: Kuppuswamy Sathyanarayanan Signed-off-by: Thomas Gleixner Reviewed-by: Tony Luck Link: https://lore.kernel.org/r/a64f864e1114bcd63593286aaf61142cfce384ea.1650076869.git.sathyanarayanan.kuppuswamy@intel.com --- arch/x86/kernel/apic/apic.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 13819bfa8dde..25e92d7a7ae5 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1428,22 +1428,21 @@ void __init apic_intr_mode_init(void) return; case APIC_VIRTUAL_WIRE: pr_info("APIC: Switch to virtual wire mode setup\n"); - default_setup_apic_routing(); break; case APIC_VIRTUAL_WIRE_NO_CONFIG: pr_info("APIC: Switch to virtual wire mode setup with no configuration\n"); upmode = true; - default_setup_apic_routing(); break; case APIC_SYMMETRIC_IO: pr_info("APIC: Switch to symmetric I/O mode setup\n"); - default_setup_apic_routing(); break; case APIC_SYMMETRIC_IO_NO_ROUTING: pr_info("APIC: Switch to symmetric I/O mode setup in no SMP routine\n"); break; } + default_setup_apic_routing(); + if (x86_platform.apic_post_init) x86_platform.apic_post_init(); From b91c0922bf1ed15b67a6faa404bc64e3ed532ec2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 2 May 2022 09:20:42 +0200 Subject: [PATCH 13/15] x86/fpu: Cleanup variable shadowing Addresses: warning: Local variable 'mask' shadows outer variable Remove extra variable declaration and switch the bit mask assignment to use BIT_ULL() while at it. Fixes: 522e92743b35 ("x86/fpu: Deduplicate copy_uabi_from_user/kernel_to_xstate()") Reported-by: kernel test robot Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/lkml/202204262032.jFYKit5j-lkp@intel.com --- arch/x86/kernel/fpu/xstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 31c12f4d0770..81fcd04247de 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1233,7 +1233,7 @@ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf, } for (i = 0; i < XFEATURE_MAX; i++) { - u64 mask = ((u64)1 << i); + mask = BIT_ULL(i); if (hdr.xfeatures & mask) { void *dst = __raw_xsave_addr(xsave, i); From a7fed5c0431dbfa707037848830f980e0f93cfb3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 15 May 2022 13:39:34 +0200 Subject: [PATCH 14/15] x86/nmi: Make register_nmi_handler() more robust register_nmi_handler() has no sanity check whether a handler has been registered already. Such an unintended double-add leads to list corruption and hard to diagnose problems during the next NMI handling. Init the list head in the static NMI action struct and check it for being empty in register_nmi_handler(). [ bp: Fixups. ] Reported-by: Sean Christopherson Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/lkml/20220511234332.3654455-1-seanjc@google.com --- arch/x86/include/asm/nmi.h | 1 + arch/x86/kernel/nmi.c | 12 ++++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 1cb9c17a4cb4..5c5f1e56c404 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -47,6 +47,7 @@ struct nmiaction { #define register_nmi_handler(t, fn, fg, n, init...) \ ({ \ static struct nmiaction init fn##_na = { \ + .list = LIST_HEAD_INIT(fn##_na.list), \ .handler = (fn), \ .name = (n), \ .flags = (fg), \ diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index e73f7df362f5..cec0bfa3bc04 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -157,7 +157,7 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action) struct nmi_desc *desc = nmi_to_desc(type); unsigned long flags; - if (!action->handler) + if (WARN_ON_ONCE(!action->handler || !list_empty(&action->list))) return -EINVAL; raw_spin_lock_irqsave(&desc->lock, flags); @@ -177,7 +177,7 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action) list_add_rcu(&action->list, &desc->head); else list_add_tail_rcu(&action->list, &desc->head); - + raw_spin_unlock_irqrestore(&desc->lock, flags); return 0; } @@ -186,7 +186,7 @@ EXPORT_SYMBOL(__register_nmi_handler); void unregister_nmi_handler(unsigned int type, const char *name) { struct nmi_desc *desc = nmi_to_desc(type); - struct nmiaction *n; + struct nmiaction *n, *found = NULL; unsigned long flags; raw_spin_lock_irqsave(&desc->lock, flags); @@ -200,12 +200,16 @@ void unregister_nmi_handler(unsigned int type, const char *name) WARN(in_nmi(), "Trying to free NMI (%s) from NMI context!\n", n->name); list_del_rcu(&n->list); + found = n; break; } } raw_spin_unlock_irqrestore(&desc->lock, flags); - synchronize_rcu(); + if (found) { + synchronize_rcu(); + INIT_LIST_HEAD(&found->list); + } } EXPORT_SYMBOL_GPL(unregister_nmi_handler); From 69505e3d9a39a988aaed9b58aa6b3482238f6516 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 12 May 2022 06:56:23 -0700 Subject: [PATCH 15/15] bug: Use normal relative pointers in 'struct bug_entry' With CONFIG_GENERIC_BUG_RELATIVE_POINTERS, the addr/file relative pointers are calculated weirdly: based on the beginning of the bug_entry struct address, rather than their respective pointer addresses. Make the relative pointers less surprising to both humans and tools by calculating them the normal way. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Mark Rutland Acked-by: Peter Zijlstra (Intel) Acked-by: Sven Schnelle # s390 Acked-by: Michael Ellerman (powerpc) Acked-by: Catalin Marinas Tested-by: Mark Rutland [arm64] Link: https://lkml.kernel.org/r/f0e05be797a16f4fc2401eeb88c8450dcbe61df6.1652362951.git.jpoimboe@kernel.org --- arch/arm64/include/asm/asm-bug.h | 4 ++-- arch/powerpc/include/asm/bug.h | 14 ++++++++------ arch/riscv/include/asm/bug.h | 4 ++-- arch/s390/include/asm/bug.h | 5 +++-- arch/x86/include/asm/bug.h | 2 +- lib/bug.c | 15 +++++++-------- 6 files changed, 23 insertions(+), 21 deletions(-) diff --git a/arch/arm64/include/asm/asm-bug.h b/arch/arm64/include/asm/asm-bug.h index 03f52f84a4f3..c762038ba400 100644 --- a/arch/arm64/include/asm/asm-bug.h +++ b/arch/arm64/include/asm/asm-bug.h @@ -14,7 +14,7 @@ 14472: .string file; \ .popsection; \ \ - .long 14472b - 14470b; \ + .long 14472b - .; \ .short line; #else #define _BUGVERBOSE_LOCATION(file, line) @@ -25,7 +25,7 @@ #define __BUG_ENTRY(flags) \ .pushsection __bug_table,"aw"; \ .align 2; \ - 14470: .long 14471f - 14470b; \ + 14470: .long 14471f - .; \ _BUGVERBOSE_LOCATION(__FILE__, __LINE__) \ .short flags; \ .popsection; \ diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index ecbae1832de3..61a4736355c2 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -13,7 +13,8 @@ #ifdef CONFIG_DEBUG_BUGVERBOSE .macro __EMIT_BUG_ENTRY addr,file,line,flags .section __bug_table,"aw" -5001: .4byte \addr - 5001b, 5002f - 5001b +5001: .4byte \addr - . + .4byte 5002f - . .short \line, \flags .org 5001b+BUG_ENTRY_SIZE .previous @@ -24,7 +25,7 @@ #else .macro __EMIT_BUG_ENTRY addr,file,line,flags .section __bug_table,"aw" -5001: .4byte \addr - 5001b +5001: .4byte \addr - . .short \flags .org 5001b+BUG_ENTRY_SIZE .previous @@ -49,15 +50,16 @@ #ifdef CONFIG_DEBUG_BUGVERBOSE #define _EMIT_BUG_ENTRY \ ".section __bug_table,\"aw\"\n" \ - "2:\t.4byte 1b - 2b, %0 - 2b\n" \ - "\t.short %1, %2\n" \ + "2: .4byte 1b - .\n" \ + " .4byte %0 - .\n" \ + " .short %1, %2\n" \ ".org 2b+%3\n" \ ".previous\n" #else #define _EMIT_BUG_ENTRY \ ".section __bug_table,\"aw\"\n" \ - "2:\t.4byte 1b - 2b\n" \ - "\t.short %2\n" \ + "2: .4byte 1b - .\n" \ + " .short %2\n" \ ".org 2b+%3\n" \ ".previous\n" #endif diff --git a/arch/riscv/include/asm/bug.h b/arch/riscv/include/asm/bug.h index d3804a2f9aad..1aaea81fb141 100644 --- a/arch/riscv/include/asm/bug.h +++ b/arch/riscv/include/asm/bug.h @@ -30,8 +30,8 @@ typedef u32 bug_insn_t; #ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS -#define __BUG_ENTRY_ADDR RISCV_INT " 1b - 2b" -#define __BUG_ENTRY_FILE RISCV_INT " %0 - 2b" +#define __BUG_ENTRY_ADDR RISCV_INT " 1b - ." +#define __BUG_ENTRY_FILE RISCV_INT " %0 - ." #else #define __BUG_ENTRY_ADDR RISCV_PTR " 1b" #define __BUG_ENTRY_FILE RISCV_PTR " %0" diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h index 0b25f28351ed..aebe1e22c7be 100644 --- a/arch/s390/include/asm/bug.h +++ b/arch/s390/include/asm/bug.h @@ -15,7 +15,8 @@ "1: .asciz \""__FILE__"\"\n" \ ".previous\n" \ ".section __bug_table,\"awM\",@progbits,%2\n" \ - "2: .long 0b-2b,1b-2b\n" \ + "2: .long 0b-.\n" \ + " .long 1b-.\n" \ " .short %0,%1\n" \ " .org 2b+%2\n" \ ".previous\n" \ @@ -30,7 +31,7 @@ asm_inline volatile( \ "0: mc 0,0\n" \ ".section __bug_table,\"awM\",@progbits,%1\n" \ - "1: .long 0b-1b\n" \ + "1: .long 0b-.\n" \ " .short %0\n" \ " .org 1b+%1\n" \ ".previous\n" \ diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index 4d20a293c6fd..76fbe24cccf9 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -18,7 +18,7 @@ #ifdef CONFIG_X86_32 # define __BUG_REL(val) ".long " __stringify(val) #else -# define __BUG_REL(val) ".long " __stringify(val) " - 2b" +# define __BUG_REL(val) ".long " __stringify(val) " - ." #endif #ifdef CONFIG_DEBUG_BUGVERBOSE diff --git a/lib/bug.c b/lib/bug.c index 45a0584f6541..c223a2575b72 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -6,8 +6,7 @@ CONFIG_BUG - emit BUG traps. Nothing happens without this. CONFIG_GENERIC_BUG - enable this code. - CONFIG_GENERIC_BUG_RELATIVE_POINTERS - use 32-bit pointers relative to - the containing struct bug_entry for bug_addr and file. + CONFIG_GENERIC_BUG_RELATIVE_POINTERS - use 32-bit relative pointers for bug_addr and file CONFIG_DEBUG_BUGVERBOSE - emit full file+line information for each BUG CONFIG_BUG and CONFIG_DEBUG_BUGVERBOSE are potentially user-settable @@ -53,10 +52,10 @@ extern struct bug_entry __start___bug_table[], __stop___bug_table[]; static inline unsigned long bug_addr(const struct bug_entry *bug) { -#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS - return bug->bug_addr; +#ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS + return (unsigned long)&bug->bug_addr_disp + bug->bug_addr_disp; #else - return (unsigned long)bug + bug->bug_addr_disp; + return bug->bug_addr; #endif } @@ -131,10 +130,10 @@ void bug_get_file_line(struct bug_entry *bug, const char **file, unsigned int *line) { #ifdef CONFIG_DEBUG_BUGVERBOSE -#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS - *file = bug->file; +#ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS + *file = (const char *)&bug->file_disp + bug->file_disp; #else - *file = (const char *)bug + bug->file_disp; + *file = bug->file; #endif *line = bug->line; #else