diff --git a/arch/arm64/include/asm/asm-bug.h b/arch/arm64/include/asm/asm-bug.h index 03f52f84a4f3..c762038ba400 100644 --- a/arch/arm64/include/asm/asm-bug.h +++ b/arch/arm64/include/asm/asm-bug.h @@ -14,7 +14,7 @@ 14472: .string file; \ .popsection; \ \ - .long 14472b - 14470b; \ + .long 14472b - .; \ .short line; #else #define _BUGVERBOSE_LOCATION(file, line) @@ -25,7 +25,7 @@ #define __BUG_ENTRY(flags) \ .pushsection __bug_table,"aw"; \ .align 2; \ - 14470: .long 14471f - 14470b; \ + 14470: .long 14471f - .; \ _BUGVERBOSE_LOCATION(__FILE__, __LINE__) \ .short flags; \ .popsection; \ diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index ecbae1832de3..61a4736355c2 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -13,7 +13,8 @@ #ifdef CONFIG_DEBUG_BUGVERBOSE .macro __EMIT_BUG_ENTRY addr,file,line,flags .section __bug_table,"aw" -5001: .4byte \addr - 5001b, 5002f - 5001b +5001: .4byte \addr - . + .4byte 5002f - . .short \line, \flags .org 5001b+BUG_ENTRY_SIZE .previous @@ -24,7 +25,7 @@ #else .macro __EMIT_BUG_ENTRY addr,file,line,flags .section __bug_table,"aw" -5001: .4byte \addr - 5001b +5001: .4byte \addr - . .short \flags .org 5001b+BUG_ENTRY_SIZE .previous @@ -49,15 +50,16 @@ #ifdef CONFIG_DEBUG_BUGVERBOSE #define _EMIT_BUG_ENTRY \ ".section __bug_table,\"aw\"\n" \ - "2:\t.4byte 1b - 2b, %0 - 2b\n" \ - "\t.short %1, %2\n" \ + "2: .4byte 1b - .\n" \ + " .4byte %0 - .\n" \ + " .short %1, %2\n" \ ".org 2b+%3\n" \ ".previous\n" #else #define _EMIT_BUG_ENTRY \ ".section __bug_table,\"aw\"\n" \ - "2:\t.4byte 1b - 2b\n" \ - "\t.short %2\n" \ + "2: .4byte 1b - .\n" \ + " .short %2\n" \ ".org 2b+%3\n" \ ".previous\n" #endif diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 65562c4a0a69..4c09c6688ac6 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -752,7 +752,7 @@ u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs) * FIXME: How do i get PID? Do I really need it? * prstatus.pr_pid = ???? */ - elf_core_copy_kernel_regs(&prstatus.pr_reg, regs); + elf_core_copy_regs(&prstatus.pr_reg, regs); buf = append_elf_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS, &prstatus, sizeof(prstatus)); return buf; diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c index b97bc179f65a..adcb1a1a2bfe 100644 --- a/arch/powerpc/platforms/powernv/opal-core.c +++ b/arch/powerpc/platforms/powernv/opal-core.c @@ -112,7 +112,7 @@ static void __init fill_prstatus(struct elf_prstatus *prstatus, int pir, struct pt_regs *regs) { memset(prstatus, 0, sizeof(struct elf_prstatus)); - elf_core_copy_kernel_regs(&(prstatus->pr_reg), regs); + elf_core_copy_regs(&(prstatus->pr_reg), regs); /* * Overload PID with PIR value. diff --git a/arch/riscv/include/asm/bug.h b/arch/riscv/include/asm/bug.h index d3804a2f9aad..1aaea81fb141 100644 --- a/arch/riscv/include/asm/bug.h +++ b/arch/riscv/include/asm/bug.h @@ -30,8 +30,8 @@ typedef u32 bug_insn_t; #ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS -#define __BUG_ENTRY_ADDR RISCV_INT " 1b - 2b" -#define __BUG_ENTRY_FILE RISCV_INT " %0 - 2b" +#define __BUG_ENTRY_ADDR RISCV_INT " 1b - ." +#define __BUG_ENTRY_FILE RISCV_INT " %0 - ." #else #define __BUG_ENTRY_ADDR RISCV_PTR " 1b" #define __BUG_ENTRY_FILE RISCV_PTR " %0" diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h index 0b25f28351ed..aebe1e22c7be 100644 --- a/arch/s390/include/asm/bug.h +++ b/arch/s390/include/asm/bug.h @@ -15,7 +15,8 @@ "1: .asciz \""__FILE__"\"\n" \ ".previous\n" \ ".section __bug_table,\"awM\",@progbits,%2\n" \ - "2: .long 0b-2b,1b-2b\n" \ + "2: .long 0b-.\n" \ + " .long 1b-.\n" \ " .short %0,%1\n" \ " .org 2b+%2\n" \ ".previous\n" \ @@ -30,7 +31,7 @@ asm_inline volatile( \ "0: mc 0,0\n" \ ".section __bug_table,\"awM\",@progbits,%1\n" \ - "1: .long 0b-1b\n" \ + "1: .long 0b-.\n" \ " .short %0\n" \ " .org 1b+%1\n" \ ".previous\n" \ diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 5716f22f81ac..92035eb3afee 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -95,12 +95,6 @@ #define APIC_LVTTHMR 0x330 #define APIC_LVTPC 0x340 #define APIC_LVT0 0x350 -#define APIC_LVT_TIMER_BASE_MASK (0x3 << 18) -#define GET_APIC_TIMER_BASE(x) (((x) >> 18) & 0x3) -#define SET_APIC_TIMER_BASE(x) (((x) << 18)) -#define APIC_TIMER_BASE_CLKIN 0x0 -#define APIC_TIMER_BASE_TMBASE 0x1 -#define APIC_TIMER_BASE_DIV 0x2 #define APIC_LVT_TIMER_ONESHOT (0 << 17) #define APIC_LVT_TIMER_PERIODIC (1 << 17) #define APIC_LVT_TIMER_TSCDEADLINE (2 << 17) diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index aaf0cb0db4ae..a3ec87d198ac 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -18,7 +18,7 @@ #ifdef CONFIG_X86_32 # define __BUG_REL(val) ".long " __stringify(val) #else -# define __BUG_REL(val) ".long " __stringify(val) " - 2b" +# define __BUG_REL(val) ".long " __stringify(val) " - ." #endif #ifdef CONFIG_DEBUG_BUGVERBOSE diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 20df73b51025..6e0dbbf847f3 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -201,7 +201,7 @@ #define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ -/* FREE! ( 7*32+10) */ +#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */ diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 29fea180a665..cb0ff1055ab1 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -116,7 +116,7 @@ extern unsigned int vdso32_enabled; * now struct_user_regs, they are different) */ -#define ELF_CORE_COPY_REGS_COMMON(pr_reg, regs) \ +#define ELF_CORE_COPY_REGS(pr_reg, regs) \ do { \ pr_reg[0] = regs->bx; \ pr_reg[1] = regs->cx; \ @@ -128,6 +128,7 @@ do { \ pr_reg[7] = regs->ds; \ pr_reg[8] = regs->es; \ pr_reg[9] = regs->fs; \ + savesegment(gs, pr_reg[10]); \ pr_reg[11] = regs->orig_ax; \ pr_reg[12] = regs->ip; \ pr_reg[13] = regs->cs; \ @@ -136,18 +137,6 @@ do { \ pr_reg[16] = regs->ss; \ } while (0); -#define ELF_CORE_COPY_REGS(pr_reg, regs) \ -do { \ - ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\ - pr_reg[10] = get_user_gs(regs); \ -} while (0); - -#define ELF_CORE_COPY_KERNEL_REGS(pr_reg, regs) \ -do { \ - ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\ - savesegment(gs, pr_reg[10]); \ -} while (0); - #define ELF_PLATFORM (utsname()->machine) #define set_personality_64bit() do { } while (0) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 27516046117a..b8d40ddeab00 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -141,7 +141,7 @@ do { \ #ifdef CONFIG_X86_32 #define deactivate_mm(tsk, mm) \ do { \ - lazy_load_gs(0); \ + loadsegment(gs, 0); \ } while (0) #else #define deactivate_mm(tsk, mm) \ diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 1cb9c17a4cb4..5c5f1e56c404 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -47,6 +47,7 @@ struct nmiaction { #define register_nmi_handler(t, fn, fg, n, init...) \ ({ \ static struct nmiaction init fn##_na = { \ + .list = LIST_HEAD_INIT(fn##_na.list), \ .handler = (fn), \ .name = (n), \ .flags = (fg), \ diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 656ed6531d03..2e7890dd58a4 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -350,18 +350,6 @@ static inline void __loadsegment_fs(unsigned short value) #define savesegment(seg, value) \ asm("mov %%" #seg ",%0":"=r" (value) : : "memory") -/* - * x86-32 user GS accessors. This is ugly and could do with some cleaning up. - */ -#ifdef CONFIG_X86_32 -# define get_user_gs(regs) (u16)({ unsigned long v; savesegment(gs, v); v; }) -# define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v)) -# define task_user_gs(tsk) ((tsk)->thread.gs) -# define lazy_save_gs(v) savesegment(gs, (v)) -# define lazy_load_gs(v) loadsegment(gs, (v)) -# define load_gs_index(v) loadsegment(gs, (v)) -#endif /* X86_32 */ - #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 68c257a3de0d..45b18eb94fa1 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -184,14 +184,15 @@ static inline void wbinvd(void) native_wbinvd(); } -#ifdef CONFIG_X86_64 static inline void load_gs_index(unsigned int selector) { +#ifdef CONFIG_X86_64 native_load_gs_index(selector); -} - +#else + loadsegment(gs, selector); #endif +} #endif /* CONFIG_PARAVIRT_XXL */ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 3c8f2c797a98..9b87ec6acdec 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -320,6 +320,9 @@ int lapic_get_maxlvt(void) #define APIC_DIVISOR 16 #define TSC_DIVISOR 8 +/* i82489DX specific */ +#define I82489DX_BASE_DIVIDER (((0x2) << 18)) + /* * This function sets up the local APIC timer, with a timeout of * 'clocks' APIC bus clock. During calibration we actually call @@ -340,8 +343,14 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) else if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) lvtt_value |= APIC_LVT_TIMER_TSCDEADLINE; + /* + * The i82489DX APIC uses bit 18 and 19 for the base divider. This + * overlaps with bit 18 on integrated APICs, but is not documented + * in the SDM. No problem though. i82489DX equipped systems do not + * have TSC deadline timer. + */ if (!lapic_is_integrated()) - lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); + lvtt_value |= I82489DX_BASE_DIVIDER; if (!irqen) lvtt_value |= APIC_LVT_MASKED; @@ -1419,22 +1428,21 @@ void __init apic_intr_mode_init(void) return; case APIC_VIRTUAL_WIRE: pr_info("APIC: Switch to virtual wire mode setup\n"); - default_setup_apic_routing(); break; case APIC_VIRTUAL_WIRE_NO_CONFIG: pr_info("APIC: Switch to virtual wire mode setup with no configuration\n"); upmode = true; - default_setup_apic_routing(); break; case APIC_SYMMETRIC_IO: pr_info("APIC: Switch to symmetric I/O mode setup\n"); - default_setup_apic_routing(); break; case APIC_SYMMETRIC_IO_NO_ROUTING: pr_info("APIC: Switch to symmetric I/O mode setup in no SMP routine\n"); break; } + default_setup_apic_routing(); + if (x86_platform.apic_post_init) x86_platform.apic_post_init(); diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index f5a48e66e4f5..482855227964 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -199,7 +199,13 @@ static void __init uv_tsc_check_sync(void) int mmr_shift; char *state; - /* Different returns from different UV BIOS versions */ + /* UV5 guarantees synced TSCs; do not zero TSC_ADJUST */ + if (!is_uv(UV2|UV3|UV4)) { + mark_tsc_async_resets("UV5+"); + return; + } + + /* UV2,3,4, UV BIOS TSC sync state available */ mmr = uv_early_read_mmr(UVH_TSC_SYNC_MMR); mmr_shift = is_uv2_hub() ? UVH_TSC_SYNC_SHIFT_UV2K : UVH_TSC_SYNC_SHIFT; @@ -1340,7 +1346,7 @@ static void __init decode_gam_params(unsigned long ptr) static void __init decode_gam_rng_tbl(unsigned long ptr) { struct uv_gam_range_entry *gre = (struct uv_gam_range_entry *)ptr; - unsigned long lgre = 0; + unsigned long lgre = 0, gend = 0; int index = 0; int sock_min = 999999, pnode_min = 99999; int sock_max = -1, pnode_max = -1; @@ -1374,6 +1380,9 @@ static void __init decode_gam_rng_tbl(unsigned long ptr) flag, size, suffix[order], gre->type, gre->nasid, gre->sockid, gre->pnode); + if (gre->type == UV_GAM_RANGE_TYPE_HOLE) + gend = (unsigned long)gre->limit << UV_GAM_RANGE_SHFT; + /* update to next range start */ lgre = gre->limit; if (sock_min > gre->sockid) @@ -1391,7 +1400,8 @@ static void __init decode_gam_rng_tbl(unsigned long ptr) _max_pnode = pnode_max; _gr_table_len = index; - pr_info("UV: GRT: %d entries, sockets(min:%x,max:%x) pnodes(min:%x,max:%x)\n", index, _min_socket, _max_socket, _min_pnode, _max_pnode); + pr_info("UV: GRT: %d entries, sockets(min:%x,max:%x), pnodes(min:%x,max:%x), gap_end(%d)\n", + index, _min_socket, _max_socket, _min_pnode, _max_pnode, fls64(gend)); } /* Walk through UVsystab decoding the fields */ diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index e8326a8d1c5d..9730c88530fc 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -407,7 +407,7 @@ int crash_load_segments(struct kimage *image) } image->elf_load_addr = kbuf.mem; pr_debug("Loaded ELF headers at 0x%lx bufsz=0x%lx memsz=0x%lx\n", - image->elf_load_addr, kbuf.bufsz, kbuf.bufsz); + image->elf_load_addr, kbuf.bufsz, kbuf.memsz); return ret; } diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 1b016a186c11..c8340156bfd2 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -142,7 +142,8 @@ static unsigned int xfeature_get_offset(u64 xcomp_bv, int xfeature) * Non-compacted format and legacy features use the cached fixed * offsets. */ - if (!cpu_feature_enabled(X86_FEATURE_XSAVES) || xfeature <= XFEATURE_SSE) + if (!cpu_feature_enabled(X86_FEATURE_XCOMPACTED) || + xfeature <= XFEATURE_SSE) return xstate_offsets[xfeature]; /* @@ -369,12 +370,12 @@ static void __init setup_init_fpu_buf(void) /* * All components are now in init state. Read the state back so * that init_fpstate contains all non-zero init state. This only - * works with XSAVE, but not with XSAVEOPT and XSAVES because + * works with XSAVE, but not with XSAVEOPT and XSAVEC/S because * those use the init optimization which skips writing data for * components in init state. * * XSAVE could be used, but that would require to reshuffle the - * data when XSAVES is available because XSAVES uses xstate + * data when XSAVEC/S is available because XSAVEC/S uses xstate * compaction. But doing so is a pointless exercise because most * components have an all zeros init state except for the legacy * ones (FP and SSE). Those can be saved with FXSAVE into the @@ -584,7 +585,8 @@ static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted) */ static bool __init paranoid_xstate_size_valid(unsigned int kernel_size) { - bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES); + bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED); + bool xsaves = cpu_feature_enabled(X86_FEATURE_XSAVES); unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE; int i; @@ -595,7 +597,7 @@ static bool __init paranoid_xstate_size_valid(unsigned int kernel_size) * Supervisor state components can be managed only by * XSAVES. */ - if (!compacted && xfeature_is_supervisor(i)) { + if (!xsaves && xfeature_is_supervisor(i)) { XSTATE_WARN_ON(1); return false; } @@ -612,8 +614,11 @@ static bool __init paranoid_xstate_size_valid(unsigned int kernel_size) * the size of the *user* states. If we use it to size a buffer * that we use 'XSAVES' on, we could potentially overflow the * buffer because 'XSAVES' saves system states too. + * + * This also takes compaction into account. So this works for + * XSAVEC as well. */ -static unsigned int __init get_xsaves_size(void) +static unsigned int __init get_compacted_size(void) { unsigned int eax, ebx, ecx, edx; /* @@ -623,6 +628,10 @@ static unsigned int __init get_xsaves_size(void) * containing all the state components * corresponding to bits currently set in * XCR0 | IA32_XSS. + * + * When XSAVES is not available but XSAVEC is (virt), then there + * are no supervisor states, but XSAVEC still uses compacted + * format. */ cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); return ebx; @@ -632,13 +641,13 @@ static unsigned int __init get_xsaves_size(void) * Get the total size of the enabled xstates without the independent supervisor * features. */ -static unsigned int __init get_xsaves_size_no_independent(void) +static unsigned int __init get_xsave_compacted_size(void) { u64 mask = xfeatures_mask_independent(); unsigned int size; if (!mask) - return get_xsaves_size(); + return get_compacted_size(); /* Disable independent features. */ wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor()); @@ -647,7 +656,7 @@ static unsigned int __init get_xsaves_size_no_independent(void) * Ask the hardware what size is required of the buffer. * This is the size required for the task->fpu buffer. */ - size = get_xsaves_size(); + size = get_compacted_size(); /* Re-enable independent features so XSAVES will work on them again. */ wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask); @@ -687,20 +696,21 @@ static int __init init_xstate_size(void) { /* Recompute the context size for enabled features: */ unsigned int user_size, kernel_size, kernel_default_size; - bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES); + bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED); /* Uncompacted user space size */ user_size = get_xsave_size_user(); /* - * XSAVES kernel size includes supervisor states and - * uses compacted format when available. + * XSAVES kernel size includes supervisor states and uses compacted + * format. XSAVEC uses compacted format, but does not save + * supervisor states. * - * XSAVE does not support supervisor states so - * kernel and user size is identical. + * XSAVE[OPT] do not support supervisor states so kernel and user + * size is identical. */ if (compacted) - kernel_size = get_xsaves_size_no_independent(); + kernel_size = get_xsave_compacted_size(); else kernel_size = user_size; @@ -813,8 +823,11 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) if (!cpu_feature_enabled(X86_FEATURE_XFD)) fpu_kernel_cfg.max_features &= ~XFEATURE_MASK_USER_DYNAMIC; - fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED | - XFEATURE_MASK_SUPERVISOR_SUPPORTED; + if (!cpu_feature_enabled(X86_FEATURE_XSAVES)) + fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED; + else + fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED | + XFEATURE_MASK_SUPERVISOR_SUPPORTED; fpu_user_cfg.max_features = fpu_kernel_cfg.max_features; fpu_user_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED; @@ -837,6 +850,11 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) */ init_fpstate.xfd = fpu_user_cfg.max_features & XFEATURE_MASK_USER_DYNAMIC; + /* Set up compaction feature bit */ + if (cpu_feature_enabled(X86_FEATURE_XSAVEC) || + cpu_feature_enabled(X86_FEATURE_XSAVES)) + setup_force_cpu_cap(X86_FEATURE_XCOMPACTED); + /* Enable xstate instructions to be able to continue with initialization: */ fpu__init_cpu_xstate(); @@ -873,7 +891,7 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n", fpu_kernel_cfg.max_features, fpu_kernel_cfg.max_size, - boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard"); + boot_cpu_has(X86_FEATURE_XCOMPACTED) ? "compacted" : "standard"); return; out_disable: @@ -917,7 +935,7 @@ static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr) if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr))) return NULL; - if (cpu_feature_enabled(X86_FEATURE_XSAVES)) { + if (cpu_feature_enabled(X86_FEATURE_XCOMPACTED)) { if (WARN_ON_ONCE(!(xcomp_bv & BIT_ULL(xfeature_nr)))) return NULL; } @@ -1215,7 +1233,7 @@ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf, } for (i = 0; i < XFEATURE_MAX; i++) { - u64 mask = ((u64)1 << i); + mask = BIT_ULL(i); if (hdr.xfeatures & mask) { void *dst = __raw_xsave_addr(xsave, i); @@ -1525,7 +1543,7 @@ static int __xstate_request_perm(u64 permitted, u64 requested, bool guest) * vendors into extending XFD for the pre AMX states, especially * AVX512. */ - bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES); + bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED); struct fpu *fpu = ¤t->group_leader->thread.fpu; struct fpu_state_perm *perm; unsigned int ksize, usize; diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index d22ace092ca2..5ad47031383b 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -16,7 +16,7 @@ static inline void xstate_init_xcomp_bv(struct xregs_state *xsave, u64 mask) * XRSTORS requires these bits set in xcomp_bv, or it will * trigger #GP: */ - if (cpu_feature_enabled(X86_FEATURE_XSAVES)) + if (cpu_feature_enabled(X86_FEATURE_XCOMPACTED)) xsave->header.xcomp_bv = mask | XCOMP_BV_COMPACTED_FORMAT; } @@ -79,6 +79,7 @@ static inline u64 xfeatures_mask_independent(void) /* These macros all use (%edi)/(%rdi) as the single memory argument. */ #define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" #define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" +#define XSAVEC ".byte " REX_PREFIX "0x0f,0xc7,0x27" #define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f" #define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f" #define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f" @@ -97,9 +98,11 @@ static inline u64 xfeatures_mask_independent(void) : "memory") /* - * If XSAVES is enabled, it replaces XSAVEOPT because it supports a compact - * format and supervisor states in addition to modified optimization in - * XSAVEOPT. + * If XSAVES is enabled, it replaces XSAVEC because it supports supervisor + * states in addition to XSAVEC. + * + * Otherwise if XSAVEC is enabled, it replaces XSAVEOPT because it supports + * compacted storage format in addition to XSAVEOPT. * * Otherwise, if XSAVEOPT is enabled, XSAVEOPT replaces XSAVE because XSAVEOPT * supports modified optimization which is not supported by XSAVE. @@ -111,8 +114,9 @@ static inline u64 xfeatures_mask_independent(void) * address of the instruction where we might get an exception at. */ #define XSTATE_XSAVE(st, lmask, hmask, err) \ - asm volatile(ALTERNATIVE_2(XSAVE, \ + asm volatile(ALTERNATIVE_3(XSAVE, \ XSAVEOPT, X86_FEATURE_XSAVEOPT, \ + XSAVEC, X86_FEATURE_XSAVEC, \ XSAVES, X86_FEATURE_XSAVES) \ "\n" \ "xor %[err], %[err]\n" \ diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index e73f7df362f5..cec0bfa3bc04 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -157,7 +157,7 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action) struct nmi_desc *desc = nmi_to_desc(type); unsigned long flags; - if (!action->handler) + if (WARN_ON_ONCE(!action->handler || !list_empty(&action->list))) return -EINVAL; raw_spin_lock_irqsave(&desc->lock, flags); @@ -177,7 +177,7 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action) list_add_rcu(&action->list, &desc->head); else list_add_tail_rcu(&action->list, &desc->head); - + raw_spin_unlock_irqrestore(&desc->lock, flags); return 0; } @@ -186,7 +186,7 @@ EXPORT_SYMBOL(__register_nmi_handler); void unregister_nmi_handler(unsigned int type, const char *name) { struct nmi_desc *desc = nmi_to_desc(type); - struct nmiaction *n; + struct nmiaction *n, *found = NULL; unsigned long flags; raw_spin_lock_irqsave(&desc->lock, flags); @@ -200,12 +200,16 @@ void unregister_nmi_handler(unsigned int type, const char *name) WARN(in_nmi(), "Trying to free NMI (%s) from NMI context!\n", n->name); list_del_rcu(&n->list); + found = n; break; } } raw_spin_unlock_irqrestore(&desc->lock, flags); - synchronize_rcu(); + if (found) { + synchronize_rcu(); + INIT_LIST_HEAD(&found->list); + } } EXPORT_SYMBOL_GPL(unregister_nmi_handler); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index d5275ecb1e92..cbe6aa3f649d 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -161,6 +161,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg, savesegment(ds, p->thread.ds); #else p->thread.sp0 = (unsigned long) (childregs + 1); + savesegment(gs, p->thread.gs); /* * Clear all status flags including IF and set fixed bit. 64bit * does not have this initialization as the frame does not contain @@ -192,10 +193,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg, if (sp) childregs->sp = sp; -#ifdef CONFIG_X86_32 - task_user_gs(p) = get_user_gs(current_pt_regs()); -#endif - if (unlikely(p->flags & PF_IO_WORKER)) { /* * An IO thread is a user space thread, but it doesn't diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 0faa5e28dd64..2f314b170c9f 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -63,10 +63,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode, unsigned long d0, d1, d2, d3, d6, d7; unsigned short gs; - if (user_mode(regs)) - gs = get_user_gs(regs); - else - savesegment(gs, gs); + savesegment(gs, gs); show_ip(regs, log_lvl); @@ -114,7 +111,7 @@ void release_thread(struct task_struct *dead_task) void start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) { - set_user_gs(regs, 0); + loadsegment(gs, 0); regs->fs = 0; regs->ds = __USER_DS; regs->es = __USER_DS; @@ -177,7 +174,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * used %fs or %gs (it does not today), or if the kernel is * running inside of a hypervisor layer. */ - lazy_save_gs(prev->gs); + savesegment(gs, prev->gs); /* * Load the per-thread Thread-Local Storage descriptor. @@ -208,7 +205,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * Restore %gs if needed (which is common) */ if (prev->gs | next->gs) - lazy_load_gs(next->gs); + loadsegment(gs, next->gs); this_cpu_write(current_task, next_p); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 98d10ef60571..37c12fb92906 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -170,9 +170,9 @@ static u16 get_segment_reg(struct task_struct *task, unsigned long offset) retval = *pt_regs_access(task_pt_regs(task), offset); else { if (task == current) - retval = get_user_gs(task_pt_regs(task)); + savesegment(gs, retval); else - retval = task_user_gs(task); + retval = task->thread.gs; } return retval; } @@ -210,7 +210,7 @@ static int set_segment_reg(struct task_struct *task, break; case offsetof(struct user_regs_struct, gs): - task_user_gs(task) = value; + task->thread.gs = value; } return 0; diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index e439eb14325f..9c7265b524c7 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -93,7 +93,7 @@ static bool restore_sigcontext(struct pt_regs *regs, return false; #ifdef CONFIG_X86_32 - set_user_gs(regs, sc.gs); + loadsegment(gs, sc.gs); regs->fs = sc.fs; regs->es = sc.es; regs->ds = sc.ds; @@ -146,8 +146,10 @@ __unsafe_setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, struct pt_regs *regs, unsigned long mask) { #ifdef CONFIG_X86_32 - unsafe_put_user(get_user_gs(regs), - (unsigned int __user *)&sc->gs, Efault); + unsigned int gs; + savesegment(gs, gs); + + unsafe_put_user(gs, (unsigned int __user *)&sc->gs, Efault); unsafe_put_user(regs->fs, (unsigned int __user *)&sc->fs, Efault); unsafe_put_user(regs->es, (unsigned int __user *)&sc->es, Efault); unsafe_put_user(regs->ds, (unsigned int __user *)&sc->ds, Efault); diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index c21bcd668284..e9e803a4d44c 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -151,7 +151,7 @@ exit_vm86: memcpy(®s->pt, &vm86->regs32, sizeof(struct pt_regs)); - lazy_load_gs(vm86->regs32.gs); + loadsegment(gs, vm86->regs32.gs); regs->pt.ax = retval; return; @@ -325,7 +325,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) * Save old state */ vm86->saved_sp0 = tsk->thread.sp0; - lazy_save_gs(vm86->regs32.gs); + savesegment(gs, vm86->regs32.gs); /* make room for real-mode segments */ preempt_disable(); diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index b781d324211b..21104c41cba0 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -342,9 +342,9 @@ static int resolve_seg_reg(struct insn *insn, struct pt_regs *regs, int regoff) */ static short get_segment_selector(struct pt_regs *regs, int seg_reg_idx) { -#ifdef CONFIG_X86_64 unsigned short sel; +#ifdef CONFIG_X86_64 switch (seg_reg_idx) { case INAT_SEG_REG_IGNORE: return 0; @@ -402,7 +402,8 @@ static short get_segment_selector(struct pt_regs *regs, int seg_reg_idx) case INAT_SEG_REG_FS: return (unsigned short)(regs->fs & 0xffff); case INAT_SEG_REG_GS: - return get_user_gs(regs); + savesegment(gs, sel); + return sel; case INAT_SEG_REG_IGNORE: default: return -EINVAL; diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c index b82ca14ba718..4a9fd9029a53 100644 --- a/arch/x86/math-emu/get_address.c +++ b/arch/x86/math-emu/get_address.c @@ -153,7 +153,7 @@ static long pm_address(u_char FPU_modrm, u_char segment, switch (segment) { case PREFIX_GS_ - 1: /* user gs handling can be lazy, use special accessors */ - addr->selector = get_user_gs(FPU_info->regs); + savesegment(gs, addr->selector); break; default: addr->selector = PM_REG_(segment); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index d0074c6ed31a..fad8faa29d04 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -149,7 +149,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) unsigned char opcode; if (user_mode(regs)) { - if (get_user(opcode, instr)) + if (get_user(opcode, (unsigned char __user *) instr)) break; } else { if (get_kernel_nofault(opcode, instr)) diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index ea277fc08357..a60af0230e27 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c @@ -244,8 +244,10 @@ static inline bool uv_nmi_action_is(const char *action) /* Setup which NMI support is present in system */ static void uv_nmi_setup_mmrs(void) { + bool new_nmi_method_only = false; + /* First determine arch specific MMRs to handshake with BIOS */ - if (UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK) { + if (UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK) { /* UV2,3,4 setup */ uvh_nmi_mmrx = UVH_EVENT_OCCURRED0; uvh_nmi_mmrx_clear = UVH_EVENT_OCCURRED0_ALIAS; uvh_nmi_mmrx_shift = UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT; @@ -255,26 +257,25 @@ static void uv_nmi_setup_mmrs(void) uvh_nmi_mmrx_req = UVH_BIOS_KERNEL_MMR_ALIAS_2; uvh_nmi_mmrx_req_shift = 62; - } else if (UVH_EVENT_OCCURRED1_EXTIO_INT0_MASK) { + } else if (UVH_EVENT_OCCURRED1_EXTIO_INT0_MASK) { /* UV5+ setup */ uvh_nmi_mmrx = UVH_EVENT_OCCURRED1; uvh_nmi_mmrx_clear = UVH_EVENT_OCCURRED1_ALIAS; uvh_nmi_mmrx_shift = UVH_EVENT_OCCURRED1_EXTIO_INT0_SHFT; uvh_nmi_mmrx_type = "OCRD1-EXTIO_INT0"; - uvh_nmi_mmrx_supported = UVH_EXTIO_INT0_BROADCAST; - uvh_nmi_mmrx_req = UVH_BIOS_KERNEL_MMR_ALIAS_2; - uvh_nmi_mmrx_req_shift = 62; + new_nmi_method_only = true; /* Newer nmi always valid on UV5+ */ + uvh_nmi_mmrx_req = 0; /* no request bit to clear */ } else { - pr_err("UV:%s:cannot find EVENT_OCCURRED*_EXTIO_INT0\n", - __func__); + pr_err("UV:%s:NMI support not available on this system\n", __func__); return; } /* Then find out if new NMI is supported */ - if (likely(uv_read_local_mmr(uvh_nmi_mmrx_supported))) { - uv_write_local_mmr(uvh_nmi_mmrx_req, - 1UL << uvh_nmi_mmrx_req_shift); + if (new_nmi_method_only || uv_read_local_mmr(uvh_nmi_mmrx_supported)) { + if (uvh_nmi_mmrx_req) + uv_write_local_mmr(uvh_nmi_mmrx_req, + 1UL << uvh_nmi_mmrx_req_shift); nmi_mmr = uvh_nmi_mmrx; nmi_mmr_clear = uvh_nmi_mmrx_clear; nmi_mmr_pending = 1UL << uvh_nmi_mmrx_shift; diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index f8e206e82476..346a8b56cdc8 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -84,15 +84,6 @@ static inline void elf_core_copy_regs(elf_gregset_t *elfregs, struct pt_regs *re #endif } -static inline void elf_core_copy_kernel_regs(elf_gregset_t *elfregs, struct pt_regs *regs) -{ -#ifdef ELF_CORE_COPY_KERNEL_REGS - ELF_CORE_COPY_KERNEL_REGS((*elfregs), regs); -#else - elf_core_copy_regs(elfregs, regs); -#endif -} - static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t* elfregs) { #if defined (ELF_CORE_COPY_TASK_REGS) diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 68480f731192..be4b54c2c615 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -1078,7 +1078,7 @@ void crash_save_cpu(struct pt_regs *regs, int cpu) return; memset(&prstatus, 0, sizeof(prstatus)); prstatus.common.pr_pid = current->pid; - elf_core_copy_kernel_regs(&prstatus.pr_reg, regs); + elf_core_copy_regs(&prstatus.pr_reg, regs); buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, &prstatus, sizeof(prstatus)); final_note(buf); diff --git a/lib/bug.c b/lib/bug.c index 45a0584f6541..c223a2575b72 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -6,8 +6,7 @@ CONFIG_BUG - emit BUG traps. Nothing happens without this. CONFIG_GENERIC_BUG - enable this code. - CONFIG_GENERIC_BUG_RELATIVE_POINTERS - use 32-bit pointers relative to - the containing struct bug_entry for bug_addr and file. + CONFIG_GENERIC_BUG_RELATIVE_POINTERS - use 32-bit relative pointers for bug_addr and file CONFIG_DEBUG_BUGVERBOSE - emit full file+line information for each BUG CONFIG_BUG and CONFIG_DEBUG_BUGVERBOSE are potentially user-settable @@ -53,10 +52,10 @@ extern struct bug_entry __start___bug_table[], __stop___bug_table[]; static inline unsigned long bug_addr(const struct bug_entry *bug) { -#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS - return bug->bug_addr; +#ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS + return (unsigned long)&bug->bug_addr_disp + bug->bug_addr_disp; #else - return (unsigned long)bug + bug->bug_addr_disp; + return bug->bug_addr; #endif } @@ -131,10 +130,10 @@ void bug_get_file_line(struct bug_entry *bug, const char **file, unsigned int *line) { #ifdef CONFIG_DEBUG_BUGVERBOSE -#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS - *file = bug->file; +#ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS + *file = (const char *)&bug->file_disp + bug->file_disp; #else - *file = (const char *)bug + bug->file_disp; + *file = bug->file; #endif *line = bug->line; #else