Merge ca1b66922a ("Merge tag 'ras_updates_for_v5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip") into android-mainline

Tiny steps on the way to 5.10-rc1

Change-Id: I7e303e545e85a0c989f21f98784a7fe6810478c7
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
This commit is contained in:
Greg Kroah-Hartman
2020-10-21 07:45:06 +02:00
52 changed files with 871 additions and 702 deletions

View File

@@ -135,7 +135,7 @@ config PPC
select ARCH_HAS_STRICT_KERNEL_RWX if (PPC32 && !HIBERNATION)
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UACCESS_FLUSHCACHE
select ARCH_HAS_UACCESS_MCSAFE if PPC64
select ARCH_HAS_COPY_MC if PPC64
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_KEEP_MEMBLOCK

View File

@@ -53,9 +53,7 @@ void *__memmove(void *to, const void *from, __kernel_size_t n);
#ifndef CONFIG_KASAN
#define __HAVE_ARCH_MEMSET32
#define __HAVE_ARCH_MEMSET64
#define __HAVE_ARCH_MEMCPY_MCSAFE
extern int memcpy_mcsafe(void *dst, const void *src, __kernel_size_t sz);
extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t);
extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t);
extern void *__memset64(uint64_t *, uint64_t v, __kernel_size_t);

View File

@@ -435,6 +435,32 @@ do { \
extern unsigned long __copy_tofrom_user(void __user *to,
const void __user *from, unsigned long size);
#ifdef CONFIG_ARCH_HAS_COPY_MC
unsigned long __must_check
copy_mc_generic(void *to, const void *from, unsigned long size);
static inline unsigned long __must_check
copy_mc_to_kernel(void *to, const void *from, unsigned long size)
{
return copy_mc_generic(to, from, size);
}
#define copy_mc_to_kernel copy_mc_to_kernel
static inline unsigned long __must_check
copy_mc_to_user(void __user *to, const void *from, unsigned long n)
{
if (likely(check_copy_size(from, n, true))) {
if (access_ok(to, n)) {
allow_write_to_user(to, n);
n = copy_mc_generic((void *)to, from, n);
prevent_write_to_user(to, n);
}
}
return n;
}
#endif
#ifdef __powerpc64__
static inline unsigned long
raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
@@ -523,20 +549,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
return ret;
}
static __always_inline unsigned long __must_check
copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n)
{
if (likely(check_copy_size(from, n, true))) {
if (access_ok(to, n)) {
allow_write_to_user(to, n);
n = memcpy_mcsafe((void *)to, from, n);
prevent_write_to_user(to, n);
}
}
return n;
}
unsigned long __arch_clear_user(void __user *addr, unsigned long size);
static inline unsigned long clear_user(void __user *addr, unsigned long size)

View File

@@ -39,7 +39,7 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
memcpy_power7.o
obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
memcpy_64.o memcpy_mcsafe_64.o
memcpy_64.o copy_mc_64.o
ifndef CONFIG_PPC_QUEUED_SPINLOCKS
obj64-$(CONFIG_SMP) += locks.o

View File

@@ -50,7 +50,7 @@ err3; stb r0,0(r3)
blr
_GLOBAL(memcpy_mcsafe)
_GLOBAL(copy_mc_generic)
mr r7,r5
cmpldi r5,16
blt .Lshort_copy
@@ -239,4 +239,4 @@ err1; stb r0,0(r3)
15: li r3,0
blr
EXPORT_SYMBOL_GPL(memcpy_mcsafe);
EXPORT_SYMBOL_GPL(copy_mc_generic);

View File

@@ -75,7 +75,7 @@ config X86
select ARCH_HAS_PTE_DEVMAP if X86_64
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
select ARCH_HAS_UACCESS_MCSAFE if X86_64 && X86_MCE
select ARCH_HAS_COPY_MC if X86_64
select ARCH_HAS_SET_MEMORY
select ARCH_HAS_SET_DIRECT_MAP
select ARCH_HAS_STRICT_KERNEL_RWX

View File

@@ -62,7 +62,7 @@ config EARLY_PRINTK_USB_XDBC
You should normally say N here, unless you want to debug early
crashes or need a very simple printk logging facility.
config MCSAFE_TEST
config COPY_MC_TEST
def_bool n
config EFI_PGT_DUMP

View File

@@ -5,6 +5,7 @@
#include <asm/string.h>
#include <asm/page.h>
#include <asm/checksum.h>
#include <asm/mce.h>
#include <asm-generic/asm-prototypes.h>

View File

@@ -135,6 +135,9 @@
# define _ASM_EXTABLE_UA(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
# define _ASM_EXTABLE_CPY(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_copy)
# define _ASM_EXTABLE_FAULT(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
@@ -160,6 +163,9 @@
# define _ASM_EXTABLE_UA(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
# define _ASM_EXTABLE_CPY(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_copy)
# define _ASM_EXTABLE_FAULT(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)

View File

@@ -0,0 +1,75 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _COPY_MC_TEST_H_
#define _COPY_MC_TEST_H_
#ifndef __ASSEMBLY__
#ifdef CONFIG_COPY_MC_TEST
extern unsigned long copy_mc_test_src;
extern unsigned long copy_mc_test_dst;
static inline void copy_mc_inject_src(void *addr)
{
if (addr)
copy_mc_test_src = (unsigned long) addr;
else
copy_mc_test_src = ~0UL;
}
static inline void copy_mc_inject_dst(void *addr)
{
if (addr)
copy_mc_test_dst = (unsigned long) addr;
else
copy_mc_test_dst = ~0UL;
}
#else /* CONFIG_COPY_MC_TEST */
static inline void copy_mc_inject_src(void *addr)
{
}
static inline void copy_mc_inject_dst(void *addr)
{
}
#endif /* CONFIG_COPY_MC_TEST */
#else /* __ASSEMBLY__ */
#include <asm/export.h>
#ifdef CONFIG_COPY_MC_TEST
.macro COPY_MC_TEST_CTL
.pushsection .data
.align 8
.globl copy_mc_test_src
copy_mc_test_src:
.quad 0
EXPORT_SYMBOL_GPL(copy_mc_test_src)
.globl copy_mc_test_dst
copy_mc_test_dst:
.quad 0
EXPORT_SYMBOL_GPL(copy_mc_test_dst)
.popsection
.endm
.macro COPY_MC_TEST_SRC reg count target
leaq \count(\reg), %r9
cmp copy_mc_test_src, %r9
ja \target
.endm
.macro COPY_MC_TEST_DST reg count target
leaq \count(\reg), %r9
cmp copy_mc_test_dst, %r9
ja \target
.endm
#else
.macro COPY_MC_TEST_CTL
.endm
.macro COPY_MC_TEST_SRC reg count target
.endm
.macro COPY_MC_TEST_DST reg count target
.endm
#endif /* CONFIG_COPY_MC_TEST */
#endif /* __ASSEMBLY__ */
#endif /* _COPY_MC_TEST_H_ */

View File

@@ -29,10 +29,17 @@ struct pt_regs;
(b)->handler = (tmp).handler - (delta); \
} while (0)
enum handler_type {
EX_HANDLER_NONE,
EX_HANDLER_FAULT,
EX_HANDLER_UACCESS,
EX_HANDLER_OTHER
};
extern int fixup_exception(struct pt_regs *regs, int trapnr,
unsigned long error_code, unsigned long fault_addr);
extern int fixup_bug(struct pt_regs *regs, int trapnr);
extern bool ex_has_fault_handler(unsigned long ip);
extern enum handler_type ex_get_fault_handler_type(unsigned long ip);
extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
#endif

View File

@@ -136,8 +136,23 @@
#define MCE_HANDLED_NFIT BIT_ULL(3)
#define MCE_HANDLED_EDAC BIT_ULL(4)
#define MCE_HANDLED_MCELOG BIT_ULL(5)
/*
* Indicates an MCE which has happened in kernel space but from
* which the kernel can recover simply by executing fixup_exception()
* so that an error is returned to the caller of the function that
* hit the machine check.
*/
#define MCE_IN_KERNEL_RECOV BIT_ULL(6)
/*
* Indicates an MCE that happened in kernel space while copying data
* from user. In this case fixup_exception() gets the kernel to the
* error exit for the copy function. Machine check handler can then
* treat it like a fault taken in user mode.
*/
#define MCE_IN_KERNEL_COPYIN BIT_ULL(7)
/*
* This structure contains all data related to the MCE log. Also
* carries a signature to make it easier to find from external
@@ -174,6 +189,15 @@ extern void mce_unregister_decode_chain(struct notifier_block *nb);
extern int mce_p5_enabled;
#ifdef CONFIG_ARCH_HAS_COPY_MC
extern void enable_copy_mc_fragile(void);
unsigned long __must_check copy_mc_fragile(void *dst, const void *src, unsigned cnt);
#else
static inline void enable_copy_mc_fragile(void)
{
}
#endif
#ifdef CONFIG_X86_MCE
int mcheck_init(void);
void mcheck_cpu_init(struct cpuinfo_x86 *c);
@@ -200,12 +224,8 @@ void mce_setup(struct mce *m);
void mce_log(struct mce *m);
DECLARE_PER_CPU(struct device *, mce_device);
/*
* Maximum banks number.
* This is the limit of the current register layout on
* Intel CPUs.
*/
#define MAX_NR_BANKS 32
/* Maximum number of MCA banks per CPU. */
#define MAX_NR_BANKS 64
#ifdef CONFIG_X86_MCE_INTEL
void mce_intel_feature_init(struct cpuinfo_x86 *c);
@@ -328,7 +348,6 @@ enum smca_bank_types {
struct smca_hwid {
unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */
u32 hwid_mcatype; /* (hwid,mcatype) tuple */
u32 xec_bitmap; /* Bitmap of valid ExtErrorCodes; current max is 21. */
u8 count; /* Number of instances. */
};

View File

@@ -1,75 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _MCSAFE_TEST_H_
#define _MCSAFE_TEST_H_
#ifndef __ASSEMBLY__
#ifdef CONFIG_MCSAFE_TEST
extern unsigned long mcsafe_test_src;
extern unsigned long mcsafe_test_dst;
static inline void mcsafe_inject_src(void *addr)
{
if (addr)
mcsafe_test_src = (unsigned long) addr;
else
mcsafe_test_src = ~0UL;
}
static inline void mcsafe_inject_dst(void *addr)
{
if (addr)
mcsafe_test_dst = (unsigned long) addr;
else
mcsafe_test_dst = ~0UL;
}
#else /* CONFIG_MCSAFE_TEST */
static inline void mcsafe_inject_src(void *addr)
{
}
static inline void mcsafe_inject_dst(void *addr)
{
}
#endif /* CONFIG_MCSAFE_TEST */
#else /* __ASSEMBLY__ */
#include <asm/export.h>
#ifdef CONFIG_MCSAFE_TEST
.macro MCSAFE_TEST_CTL
.pushsection .data
.align 8
.globl mcsafe_test_src
mcsafe_test_src:
.quad 0
EXPORT_SYMBOL_GPL(mcsafe_test_src)
.globl mcsafe_test_dst
mcsafe_test_dst:
.quad 0
EXPORT_SYMBOL_GPL(mcsafe_test_dst)
.popsection
.endm
.macro MCSAFE_TEST_SRC reg count target
leaq \count(\reg), %r9
cmp mcsafe_test_src, %r9
ja \target
.endm
.macro MCSAFE_TEST_DST reg count target
leaq \count(\reg), %r9
cmp mcsafe_test_dst, %r9
ja \target
.endm
#else
.macro MCSAFE_TEST_CTL
.endm
.macro MCSAFE_TEST_SRC reg count target
.endm
.macro MCSAFE_TEST_DST reg count target
.endm
#endif /* CONFIG_MCSAFE_TEST */
#endif /* __ASSEMBLY__ */
#endif /* _MCSAFE_TEST_H_ */

View File

@@ -82,38 +82,6 @@ int strcmp(const char *cs, const char *ct);
#endif
#define __HAVE_ARCH_MEMCPY_MCSAFE 1
__must_check unsigned long __memcpy_mcsafe(void *dst, const void *src,
size_t cnt);
DECLARE_STATIC_KEY_FALSE(mcsafe_key);
/**
* memcpy_mcsafe - copy memory with indication if a machine check happened
*
* @dst: destination address
* @src: source address
* @cnt: number of bytes to copy
*
* Low level memory copy function that catches machine checks
* We only call into the "safe" function on systems that can
* actually do machine check recovery. Everyone else can just
* use memcpy().
*
* Return 0 for success, or number of bytes not copied if there was an
* exception.
*/
static __always_inline __must_check unsigned long
memcpy_mcsafe(void *dst, const void *src, size_t cnt)
{
#ifdef CONFIG_X86_MCE
if (static_branch_unlikely(&mcsafe_key))
return __memcpy_mcsafe(dst, src, cnt);
else
#endif
memcpy(dst, src, cnt);
return 0;
}
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
#define __HAVE_ARCH_MEMCPY_FLUSHCACHE 1
void __memcpy_flushcache(void *dst, const void *src, size_t cnt);

View File

@@ -35,6 +35,8 @@ extern int panic_on_unrecovered_nmi;
void math_emulate(struct math_emu_info *);
bool fault_in_kernel_space(unsigned long address);
#ifdef CONFIG_VMAP_STACK
void __noreturn handle_stack_overflow(const char *message,
struct pt_regs *regs,

View File

@@ -455,6 +455,15 @@ extern __must_check long strnlen_user(const char __user *str, long n);
unsigned long __must_check clear_user(void __user *mem, unsigned long len);
unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
#ifdef CONFIG_ARCH_HAS_COPY_MC
unsigned long __must_check
copy_mc_to_kernel(void *to, const void *from, unsigned len);
#define copy_mc_to_kernel copy_mc_to_kernel
unsigned long __must_check
copy_mc_to_user(void *to, const void *from, unsigned len);
#endif
/*
* movsl can be slow when source and dest are not both 8-byte aligned
*/

View File

@@ -46,22 +46,6 @@ copy_user_generic(void *to, const void *from, unsigned len)
return ret;
}
static __always_inline __must_check unsigned long
copy_to_user_mcsafe(void *to, const void *from, unsigned len)
{
unsigned long ret;
__uaccess_begin();
/*
* Note, __memcpy_mcsafe() is explicitly used since it can
* handle exceptions / faults. memcpy_mcsafe() may fall back to
* memcpy() which lacks this handling.
*/
ret = __memcpy_mcsafe(to, from, len);
__uaccess_end();
return ret;
}
static __always_inline __must_check unsigned long
raw_copy_from_user(void *dst, const void __user *src, unsigned long size)
{
@@ -102,8 +86,4 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
kasan_check_write(dst, size);
return __copy_user_flushcache(dst, src, size);
}
unsigned long
mcsafe_handle_tail(char *to, char *from, unsigned len);
#endif /* _ASM_X86_UACCESS_64_H */

View File

@@ -132,49 +132,49 @@ static enum smca_bank_types smca_get_bank_type(unsigned int bank)
}
static struct smca_hwid smca_hwid_mcatypes[] = {
/* { bank_type, hwid_mcatype, xec_bitmap } */
/* { bank_type, hwid_mcatype } */
/* Reserved type */
{ SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 },
{ SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0) },
/* ZN Core (HWID=0xB0) MCA types */
{ SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFFF },
{ SMCA_LS_V2, HWID_MCATYPE(0xB0, 0x10), 0xFFFFFF },
{ SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
{ SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF },
{ SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF },
{ SMCA_LS, HWID_MCATYPE(0xB0, 0x0) },
{ SMCA_LS_V2, HWID_MCATYPE(0xB0, 0x10) },
{ SMCA_IF, HWID_MCATYPE(0xB0, 0x1) },
{ SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2) },
{ SMCA_DE, HWID_MCATYPE(0xB0, 0x3) },
/* HWID 0xB0 MCATYPE 0x4 is Reserved */
{ SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0xFFF },
{ SMCA_FP, HWID_MCATYPE(0xB0, 0x6), 0x7F },
{ SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF },
{ SMCA_EX, HWID_MCATYPE(0xB0, 0x5) },
{ SMCA_FP, HWID_MCATYPE(0xB0, 0x6) },
{ SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7) },
/* Data Fabric MCA types */
{ SMCA_CS, HWID_MCATYPE(0x2E, 0x0), 0x1FF },
{ SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0x1F },
{ SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2), 0x3FFF },
{ SMCA_CS, HWID_MCATYPE(0x2E, 0x0) },
{ SMCA_PIE, HWID_MCATYPE(0x2E, 0x1) },
{ SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2) },
/* Unified Memory Controller MCA type */
{ SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0xFF },
{ SMCA_UMC, HWID_MCATYPE(0x96, 0x0) },
/* Parameter Block MCA type */
{ SMCA_PB, HWID_MCATYPE(0x05, 0x0), 0x1 },
{ SMCA_PB, HWID_MCATYPE(0x05, 0x0) },
/* Platform Security Processor MCA type */
{ SMCA_PSP, HWID_MCATYPE(0xFF, 0x0), 0x1 },
{ SMCA_PSP_V2, HWID_MCATYPE(0xFF, 0x1), 0x3FFFF },
{ SMCA_PSP, HWID_MCATYPE(0xFF, 0x0) },
{ SMCA_PSP_V2, HWID_MCATYPE(0xFF, 0x1) },
/* System Management Unit MCA type */
{ SMCA_SMU, HWID_MCATYPE(0x01, 0x0), 0x1 },
{ SMCA_SMU_V2, HWID_MCATYPE(0x01, 0x1), 0x7FF },
{ SMCA_SMU, HWID_MCATYPE(0x01, 0x0) },
{ SMCA_SMU_V2, HWID_MCATYPE(0x01, 0x1) },
/* Microprocessor 5 Unit MCA type */
{ SMCA_MP5, HWID_MCATYPE(0x01, 0x2), 0x3FF },
{ SMCA_MP5, HWID_MCATYPE(0x01, 0x2) },
/* Northbridge IO Unit MCA type */
{ SMCA_NBIO, HWID_MCATYPE(0x18, 0x0), 0x1F },
{ SMCA_NBIO, HWID_MCATYPE(0x18, 0x0) },
/* PCI Express Unit MCA type */
{ SMCA_PCIE, HWID_MCATYPE(0x46, 0x0), 0x1F },
{ SMCA_PCIE, HWID_MCATYPE(0x46, 0x0) },
};
struct smca_bank smca_banks[MAX_NR_BANKS];

View File

@@ -40,7 +40,6 @@
#include <linux/debugfs.h>
#include <linux/irq_work.h>
#include <linux/export.h>
#include <linux/jump_label.h>
#include <linux/set_memory.h>
#include <linux/sync_core.h>
#include <linux/task_work.h>
@@ -373,42 +372,105 @@ static int msr_to_offset(u32 msr)
return -1;
}
/* MSR access wrappers used for error injection */
static u64 mce_rdmsrl(u32 msr)
__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr)
{
u64 v;
pr_emerg("MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n",
(unsigned int)regs->cx, regs->ip, (void *)regs->ip);
if (__this_cpu_read(injectm.finished)) {
int offset = msr_to_offset(msr);
show_stack_regs(regs);
if (offset < 0)
return 0;
return *(u64 *)((char *)this_cpu_ptr(&injectm) + offset);
}
panic("MCA architectural violation!\n");
if (rdmsrl_safe(msr, &v)) {
WARN_ONCE(1, "mce: Unable to read MSR 0x%x!\n", msr);
/*
* Return zero in case the access faulted. This should
* not happen normally but can happen if the CPU does
* something weird, or if the code is buggy.
*/
v = 0;
}
while (true)
cpu_relax();
return v;
return true;
}
static void mce_wrmsrl(u32 msr, u64 v)
/* MSR access wrappers used for error injection */
static noinstr u64 mce_rdmsrl(u32 msr)
{
if (__this_cpu_read(injectm.finished)) {
int offset = msr_to_offset(msr);
DECLARE_ARGS(val, low, high);
if (__this_cpu_read(injectm.finished)) {
int offset;
u64 ret;
instrumentation_begin();
offset = msr_to_offset(msr);
if (offset < 0)
ret = 0;
else
ret = *(u64 *)((char *)this_cpu_ptr(&injectm) + offset);
instrumentation_end();
return ret;
}
/*
* RDMSR on MCA MSRs should not fault. If they do, this is very much an
* architectural violation and needs to be reported to hw vendor. Panic
* the box to not allow any further progress.
*/
asm volatile("1: rdmsr\n"
"2:\n"
_ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_fault)
: EAX_EDX_RET(val, low, high) : "c" (msr));
return EAX_EDX_VAL(val, low, high);
}
__visible bool ex_handler_wrmsr_fault(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr)
{
pr_emerg("MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n",
(unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->ax,
regs->ip, (void *)regs->ip);
show_stack_regs(regs);
panic("MCA architectural violation!\n");
while (true)
cpu_relax();
return true;
}
static noinstr void mce_wrmsrl(u32 msr, u64 v)
{
u32 low, high;
if (__this_cpu_read(injectm.finished)) {
int offset;
instrumentation_begin();
offset = msr_to_offset(msr);
if (offset >= 0)
*(u64 *)((char *)this_cpu_ptr(&injectm) + offset) = v;
instrumentation_end();
return;
}
wrmsrl(msr, v);
low = (u32)v;
high = (u32)(v >> 32);
/* See comment in mce_rdmsrl() */
asm volatile("1: wrmsr\n"
"2:\n"
_ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_fault)
: : "c" (msr), "a"(low), "d" (high) : "memory");
}
/*
@@ -745,7 +807,7 @@ log_it:
goto clear_it;
mce_read_aux(&m, i);
m.severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);
m.severity = mce_severity(&m, NULL, mca_cfg.tolerant, NULL, false);
/*
* Don't get the IP here because it's unlikely to
* have anything to do with the actual error location.
@@ -794,7 +856,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
quirk_no_way_out(i, m, regs);
m->bank = i;
if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
if (mce_severity(m, regs, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
mce_read_aux(m, i);
*msg = tmp;
return 1;
@@ -872,7 +934,6 @@ static void mce_reign(void)
struct mce *m = NULL;
int global_worst = 0;
char *msg = NULL;
char *nmsg = NULL;
/*
* This CPU is the Monarch and the other CPUs have run
@@ -880,12 +941,10 @@ static void mce_reign(void)
* Grade the severity of the errors of all the CPUs.
*/
for_each_possible_cpu(cpu) {
int severity = mce_severity(&per_cpu(mces_seen, cpu),
mca_cfg.tolerant,
&nmsg, true);
if (severity > global_worst) {
msg = nmsg;
global_worst = severity;
struct mce *mtmp = &per_cpu(mces_seen, cpu);
if (mtmp->severity > global_worst) {
global_worst = mtmp->severity;
m = &per_cpu(mces_seen, cpu);
}
}
@@ -895,8 +954,11 @@ static void mce_reign(void)
* This dumps all the mces in the log buffer and stops the
* other CPUs.
*/
if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
/* call mce_severity() to get "msg" for panic */
mce_severity(m, NULL, mca_cfg.tolerant, &msg, true);
mce_panic("Fatal machine check", m, msg);
}
/*
* For UC somewhere we let the CPU who detects it handle it.
@@ -1105,7 +1167,7 @@ static noinstr bool mce_check_crashing_cpu(void)
return false;
}
static void __mc_scan_banks(struct mce *m, struct mce *final,
static void __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
unsigned long *toclear, unsigned long *valid_banks,
int no_way_out, int *worst)
{
@@ -1140,7 +1202,7 @@ static void __mc_scan_banks(struct mce *m, struct mce *final,
/* Set taint even when machine check was not enabled. */
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
severity = mce_severity(m, cfg->tolerant, NULL, true);
severity = mce_severity(m, regs, cfg->tolerant, NULL, true);
/*
* When machine check was for corrected/deferred handler don't
@@ -1188,13 +1250,34 @@ static void kill_me_maybe(struct callback_head *cb)
if (!p->mce_ripv)
flags |= MF_MUST_KILL;
if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags)) {
if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags) &&
!(p->mce_kflags & MCE_IN_KERNEL_COPYIN)) {
set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
sync_core();
return;
}
pr_err("Memory error not recovered");
kill_me_now(cb);
if (p->mce_vaddr != (void __user *)-1l) {
force_sig_mceerr(BUS_MCEERR_AR, p->mce_vaddr, PAGE_SHIFT);
} else {
pr_err("Memory error not recovered");
kill_me_now(cb);
}
}
static void queue_task_work(struct mce *m, int kill_it)
{
current->mce_addr = m->addr;
current->mce_kflags = m->kflags;
current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV);
current->mce_whole_page = whole_page(m);
if (kill_it)
current->mce_kill_me.func = kill_me_now;
else
current->mce_kill_me.func = kill_me_maybe;
task_work_add(current, &current->mce_kill_me, true);
}
/*
@@ -1291,7 +1374,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
order = mce_start(&no_way_out);
}
__mc_scan_banks(&m, final, toclear, valid_banks, no_way_out, &worst);
__mc_scan_banks(&m, regs, final, toclear, valid_banks, no_way_out, &worst);
if (!no_way_out)
mce_clear_state(toclear);
@@ -1313,7 +1396,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
* make sure we have the right "msg".
*/
if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
mce_severity(&m, cfg->tolerant, &msg, true);
mce_severity(&m, regs, cfg->tolerant, &msg, true);
mce_panic("Local fatal machine check!", &m, msg);
}
}
@@ -1330,25 +1413,16 @@ noinstr void do_machine_check(struct pt_regs *regs)
if (worst > 0)
irq_work_queue(&mce_irq_work);
mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
sync_core();
if (worst != MCE_AR_SEVERITY && !kill_it)
return;
goto out;
/* Fault was in user mode and we need to take some action */
if ((m.cs & 3) == 3) {
/* If this triggers there is no way to recover. Die hard. */
BUG_ON(!on_thread_stack() || !user_mode(regs));
current->mce_addr = m.addr;
current->mce_ripv = !!(m.mcgstatus & MCG_STATUS_RIPV);
current->mce_whole_page = whole_page(&m);
current->mce_kill_me.func = kill_me_maybe;
if (kill_it)
current->mce_kill_me.func = kill_me_now;
task_work_add(current, &current->mce_kill_me, true);
queue_task_work(&m, kill_it);
} else {
/*
* Handle an MCE which has happened in kernel space but from
@@ -1363,7 +1437,12 @@ noinstr void do_machine_check(struct pt_regs *regs)
if (!fixup_exception(regs, X86_TRAP_MC, 0, 0))
mce_panic("Failed kernel mode recovery", &m, msg);
}
if (m.kflags & MCE_IN_KERNEL_COPYIN)
queue_task_work(&m, kill_it);
}
out:
mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
}
EXPORT_SYMBOL_GPL(do_machine_check);
@@ -2064,7 +2143,7 @@ void mce_disable_bank(int bank)
and older.
* mce=nobootlog Don't log MCEs from before booting.
* mce=bios_cmci_threshold Don't program the CMCI threshold
* mce=recovery force enable memcpy_mcsafe()
* mce=recovery force enable copy_mc_fragile()
*/
static int __init mcheck_enable(char *str)
{
@@ -2672,13 +2751,10 @@ static void __init mcheck_debugfs_init(void)
static void __init mcheck_debugfs_init(void) { }
#endif
DEFINE_STATIC_KEY_FALSE(mcsafe_key);
EXPORT_SYMBOL_GPL(mcsafe_key);
static int __init mcheck_late_init(void)
{
if (mca_cfg.recovery)
static_branch_inc(&mcsafe_key);
enable_copy_mc_fragile();
mcheck_debugfs_init();

View File

@@ -67,7 +67,9 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
unlock:
mutex_unlock(&mce_chrdev_read_mutex);
mce->kflags |= MCE_HANDLED_MCELOG;
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
mce->kflags |= MCE_HANDLED_MCELOG;
return NOTIFY_OK;
}

View File

@@ -38,7 +38,8 @@ int mce_gen_pool_add(struct mce *mce);
int mce_gen_pool_init(void);
struct llist_node *mce_gen_pool_prepare_records(void);
extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
extern int (*mce_severity)(struct mce *a, struct pt_regs *regs,
int tolerant, char **msg, bool is_excp);
struct dentry *mce_get_debugfs_dir(void);
extern mce_banks_t mce_banks_ce_disabled;
@@ -185,4 +186,14 @@ extern bool amd_filter_mce(struct mce *m);
static inline bool amd_filter_mce(struct mce *m) { return false; };
#endif
__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr);
__visible bool ex_handler_wrmsr_fault(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr);
#endif /* __X86_MCE_INTERNAL_H__ */

View File

@@ -9,9 +9,14 @@
#include <linux/seq_file.h>
#include <linux/init.h>
#include <linux/debugfs.h>
#include <asm/mce.h>
#include <linux/uaccess.h>
#include <asm/mce.h>
#include <asm/intel-family.h>
#include <asm/traps.h>
#include <asm/insn.h>
#include <asm/insn-eval.h>
#include "internal.h"
/*
@@ -40,9 +45,14 @@ static struct severity {
unsigned char context;
unsigned char excp;
unsigned char covered;
unsigned char cpu_model;
unsigned char cpu_minstepping;
unsigned char bank_lo, bank_hi;
char *msg;
} severities[] = {
#define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
#define BANK_RANGE(l, h) .bank_lo = l, .bank_hi = h
#define MODEL_STEPPING(m, s) .cpu_model = m, .cpu_minstepping = s
#define KERNEL .context = IN_KERNEL
#define USER .context = IN_USER
#define KERNEL_RECOV .context = IN_KERNEL_RECOV
@@ -89,15 +99,10 @@ static struct severity {
PANIC, "In kernel and no restart IP",
EXCP, KERNEL_RECOV, MCGMASK(MCG_STATUS_RIPV, 0)
),
MCESEV(
DEFERRED, "Deferred error",
NOSER, MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
),
MCESEV(
KEEP, "Corrected error",
NOSER, BITCLR(MCI_STATUS_UC)
),
/*
* known AO MCACODs reported via MCE or CMC:
*
@@ -113,6 +118,18 @@ static struct severity {
AO, "Action optional: last level cache writeback error",
SER, MASK(MCI_UC_AR|MCACOD, MCI_STATUS_UC|MCACOD_L3WB)
),
/*
* Quirk for Skylake/Cascade Lake. Patrol scrubber may be configured
* to report uncorrected errors using CMCI with a special signature.
* UC=0, MSCOD=0x0010, MCACOD=binary(000X 0000 1100 XXXX) reported
* in one of the memory controller banks.
* Set severity to "AO" for same action as normal patrol scrub error.
*/
MCESEV(
AO, "Uncorrected Patrol Scrub Error",
SER, MASK(MCI_STATUS_UC|MCI_ADDR|0xffffeff0, MCI_ADDR|0x001000c0),
MODEL_STEPPING(INTEL_FAM6_SKYLAKE_X, 4), BANK_RANGE(13, 18)
),
/* ignore OVER for UCNA */
MCESEV(
@@ -198,6 +215,47 @@ static struct severity {
#define mc_recoverable(mcg) (((mcg) & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) == \
(MCG_STATUS_RIPV|MCG_STATUS_EIPV))
static bool is_copy_from_user(struct pt_regs *regs)
{
u8 insn_buf[MAX_INSN_SIZE];
struct insn insn;
unsigned long addr;
if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip, MAX_INSN_SIZE))
return false;
kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE);
insn_get_opcode(&insn);
if (!insn.opcode.got)
return false;
switch (insn.opcode.value) {
/* MOV mem,reg */
case 0x8A: case 0x8B:
/* MOVZ mem,reg */
case 0xB60F: case 0xB70F:
insn_get_modrm(&insn);
insn_get_sib(&insn);
if (!insn.modrm.got || !insn.sib.got)
return false;
addr = (unsigned long)insn_get_addr_ref(&insn, regs);
break;
/* REP MOVS */
case 0xA4: case 0xA5:
addr = regs->si;
break;
default:
return false;
}
if (fault_in_kernel_space(addr))
return false;
current->mce_vaddr = (void __user *)addr;
return true;
}
/*
* If mcgstatus indicated that ip/cs on the stack were
* no good, then "m->cs" will be zero and we will have
@@ -209,15 +267,25 @@ static struct severity {
* distinguish an exception taken in user from from one
* taken in the kernel.
*/
static int error_context(struct mce *m)
static int error_context(struct mce *m, struct pt_regs *regs)
{
enum handler_type t;
if ((m->cs & 3) == 3)
return IN_USER;
if (!mc_recoverable(m->mcgstatus))
return IN_KERNEL;
if (mc_recoverable(m->mcgstatus) && ex_has_fault_handler(m->ip)) {
t = ex_get_fault_handler_type(m->ip);
if (t == EX_HANDLER_FAULT) {
m->kflags |= MCE_IN_KERNEL_RECOV;
return IN_KERNEL_RECOV;
}
if (t == EX_HANDLER_UACCESS && regs && is_copy_from_user(regs)) {
m->kflags |= MCE_IN_KERNEL_RECOV;
m->kflags |= MCE_IN_KERNEL_COPYIN;
return IN_KERNEL_RECOV;
}
return IN_KERNEL;
}
@@ -253,9 +321,10 @@ static int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
* See AMD Error Scope Hierarchy table in a newer BKDG. For example
* 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
*/
static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_excp)
static int mce_severity_amd(struct mce *m, struct pt_regs *regs, int tolerant,
char **msg, bool is_excp)
{
enum context ctx = error_context(m);
enum context ctx = error_context(m, regs);
/* Processor Context Corrupt, no need to fumble too much, die! */
if (m->status & MCI_STATUS_PCC)
@@ -305,10 +374,11 @@ static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_exc
return MCE_KEEP_SEVERITY;
}
static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_excp)
static int mce_severity_intel(struct mce *m, struct pt_regs *regs,
int tolerant, char **msg, bool is_excp)
{
enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP);
enum context ctx = error_context(m);
enum context ctx = error_context(m, regs);
struct severity *s;
for (s = severities;; s++) {
@@ -324,6 +394,12 @@ static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_e
continue;
if (s->excp && excp != s->excp)
continue;
if (s->cpu_model && boot_cpu_data.x86_model != s->cpu_model)
continue;
if (s->cpu_minstepping && boot_cpu_data.x86_stepping < s->cpu_minstepping)
continue;
if (s->bank_lo && (m->bank < s->bank_lo || m->bank > s->bank_hi))
continue;
if (msg)
*msg = s->msg;
s->covered = 1;
@@ -336,7 +412,7 @@ static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_e
}
/* Default to mce_severity_intel */
int (*mce_severity)(struct mce *m, int tolerant, char **msg, bool is_excp) =
int (*mce_severity)(struct mce *m, struct pt_regs *regs, int tolerant, char **msg, bool is_excp) =
mce_severity_intel;
void __init mcheck_vendor_init_severity(void)

View File

@@ -8,6 +8,7 @@
#include <asm/hpet.h>
#include <asm/setup.h>
#include <asm/mce.h>
#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI)
@@ -624,10 +625,6 @@ static void amd_disable_seq_and_redirect_scrub(struct pci_dev *dev)
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3,
amd_disable_seq_and_redirect_scrub);
#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
#include <linux/jump_label.h>
#include <asm/string_64.h>
/* Ivy Bridge, Haswell, Broadwell */
static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev)
{
@@ -636,7 +633,7 @@ static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev)
pci_read_config_dword(pdev, 0x84, &capid0);
if (capid0 & 0x10)
static_branch_inc(&mcsafe_key);
enable_copy_mc_fragile();
}
/* Skylake */
@@ -653,7 +650,7 @@ static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev)
* enabled, so memory machine check recovery is also enabled.
*/
if ((capid0 & 0xc0) == 0xc0 || (capid5 & 0x1e0))
static_branch_inc(&mcsafe_key);
enable_copy_mc_fragile();
}
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap);
@@ -661,7 +658,6 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, quirk_intel_brickland_xeon_ras_cap);
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2083, quirk_intel_purley_xeon_ras_cap);
#endif
#endif
bool x86_apple_machine;
EXPORT_SYMBOL(x86_apple_machine);

View File

@@ -44,6 +44,7 @@ obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o
lib-y := delay.o misc.o cmdline.o cpu.o
lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
lib-y += memcpy_$(BITS).o
lib-$(CONFIG_ARCH_HAS_COPY_MC) += copy_mc.o copy_mc_64.o
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o

96
arch/x86/lib/copy_mc.c Normal file
View File

@@ -0,0 +1,96 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */
#include <linux/jump_label.h>
#include <linux/uaccess.h>
#include <linux/export.h>
#include <linux/string.h>
#include <linux/types.h>
#include <asm/mce.h>
#ifdef CONFIG_X86_MCE
/*
* See COPY_MC_TEST for self-test of the copy_mc_fragile()
* implementation.
*/
static DEFINE_STATIC_KEY_FALSE(copy_mc_fragile_key);
void enable_copy_mc_fragile(void)
{
static_branch_inc(&copy_mc_fragile_key);
}
#define copy_mc_fragile_enabled (static_branch_unlikely(&copy_mc_fragile_key))
/*
* Similar to copy_user_handle_tail, probe for the write fault point, or
* source exception point.
*/
__visible notrace unsigned long
copy_mc_fragile_handle_tail(char *to, char *from, unsigned len)
{
for (; len; --len, to++, from++)
if (copy_mc_fragile(to, from, 1))
break;
return len;
}
#else
/*
* No point in doing careful copying, or consulting a static key when
* there is no #MC handler in the CONFIG_X86_MCE=n case.
*/
void enable_copy_mc_fragile(void)
{
}
#define copy_mc_fragile_enabled (0)
#endif
unsigned long copy_mc_enhanced_fast_string(void *dst, const void *src, unsigned len);
/**
* copy_mc_to_kernel - memory copy that handles source exceptions
*
* @dst: destination address
* @src: source address
* @len: number of bytes to copy
*
* Call into the 'fragile' version on systems that benefit from avoiding
* corner case poison consumption scenarios, For example, accessing
* poison across 2 cachelines with a single instruction. Almost all
* other uses case can use copy_mc_enhanced_fast_string() for a fast
* recoverable copy, or fallback to plain memcpy.
*
* Return 0 for success, or number of bytes not copied if there was an
* exception.
*/
unsigned long __must_check copy_mc_to_kernel(void *dst, const void *src, unsigned len)
{
if (copy_mc_fragile_enabled)
return copy_mc_fragile(dst, src, len);
if (static_cpu_has(X86_FEATURE_ERMS))
return copy_mc_enhanced_fast_string(dst, src, len);
memcpy(dst, src, len);
return 0;
}
EXPORT_SYMBOL_GPL(copy_mc_to_kernel);
unsigned long __must_check copy_mc_to_user(void *dst, const void *src, unsigned len)
{
unsigned long ret;
if (copy_mc_fragile_enabled) {
__uaccess_begin();
ret = copy_mc_fragile(dst, src, len);
__uaccess_end();
return ret;
}
if (static_cpu_has(X86_FEATURE_ERMS)) {
__uaccess_begin();
ret = copy_mc_enhanced_fast_string(dst, src, len);
__uaccess_end();
return ret;
}
return copy_user_generic(dst, src, len);
}

163
arch/x86/lib/copy_mc_64.S Normal file
View File

@@ -0,0 +1,163 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */
#include <linux/linkage.h>
#include <asm/copy_mc_test.h>
#include <asm/export.h>
#include <asm/asm.h>
#ifndef CONFIG_UML
#ifdef CONFIG_X86_MCE
COPY_MC_TEST_CTL
/*
* copy_mc_fragile - copy memory with indication if an exception / fault happened
*
* The 'fragile' version is opted into by platform quirks and takes
* pains to avoid unrecoverable corner cases like 'fast-string'
* instruction sequences, and consuming poison across a cacheline
* boundary. The non-fragile version is equivalent to memcpy()
* regardless of CPU machine-check-recovery capability.
*/
SYM_FUNC_START(copy_mc_fragile)
cmpl $8, %edx
/* Less than 8 bytes? Go to byte copy loop */
jb .L_no_whole_words
/* Check for bad alignment of source */
testl $7, %esi
/* Already aligned */
jz .L_8byte_aligned
/* Copy one byte at a time until source is 8-byte aligned */
movl %esi, %ecx
andl $7, %ecx
subl $8, %ecx
negl %ecx
subl %ecx, %edx
.L_read_leading_bytes:
movb (%rsi), %al
COPY_MC_TEST_SRC %rsi 1 .E_leading_bytes
COPY_MC_TEST_DST %rdi 1 .E_leading_bytes
.L_write_leading_bytes:
movb %al, (%rdi)
incq %rsi
incq %rdi
decl %ecx
jnz .L_read_leading_bytes
.L_8byte_aligned:
movl %edx, %ecx
andl $7, %edx
shrl $3, %ecx
jz .L_no_whole_words
.L_read_words:
movq (%rsi), %r8
COPY_MC_TEST_SRC %rsi 8 .E_read_words
COPY_MC_TEST_DST %rdi 8 .E_write_words
.L_write_words:
movq %r8, (%rdi)
addq $8, %rsi
addq $8, %rdi
decl %ecx
jnz .L_read_words
/* Any trailing bytes? */
.L_no_whole_words:
andl %edx, %edx
jz .L_done_memcpy_trap
/* Copy trailing bytes */
movl %edx, %ecx
.L_read_trailing_bytes:
movb (%rsi), %al
COPY_MC_TEST_SRC %rsi 1 .E_trailing_bytes
COPY_MC_TEST_DST %rdi 1 .E_trailing_bytes
.L_write_trailing_bytes:
movb %al, (%rdi)
incq %rsi
incq %rdi
decl %ecx
jnz .L_read_trailing_bytes
/* Copy successful. Return zero */
.L_done_memcpy_trap:
xorl %eax, %eax
.L_done:
ret
SYM_FUNC_END(copy_mc_fragile)
EXPORT_SYMBOL_GPL(copy_mc_fragile)
.section .fixup, "ax"
/*
* Return number of bytes not copied for any failure. Note that
* there is no "tail" handling since the source buffer is 8-byte
* aligned and poison is cacheline aligned.
*/
.E_read_words:
shll $3, %ecx
.E_leading_bytes:
addl %edx, %ecx
.E_trailing_bytes:
mov %ecx, %eax
jmp .L_done
/*
* For write fault handling, given the destination is unaligned,
* we handle faults on multi-byte writes with a byte-by-byte
* copy up to the write-protected page.
*/
.E_write_words:
shll $3, %ecx
addl %edx, %ecx
movl %ecx, %edx
jmp copy_mc_fragile_handle_tail
.previous
_ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
_ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
_ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
_ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
_ASM_EXTABLE(.L_write_words, .E_write_words)
_ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
#endif /* CONFIG_X86_MCE */
/*
* copy_mc_enhanced_fast_string - memory copy with exception handling
*
* Fast string copy + fault / exception handling. If the CPU does
* support machine check exception recovery, but does not support
* recovering from fast-string exceptions then this CPU needs to be
* added to the copy_mc_fragile_key set of quirks. Otherwise, absent any
* machine check recovery support this version should be no slower than
* standard memcpy.
*/
SYM_FUNC_START(copy_mc_enhanced_fast_string)
movq %rdi, %rax
movq %rdx, %rcx
.L_copy:
rep movsb
/* Copy successful. Return zero */
xorl %eax, %eax
ret
SYM_FUNC_END(copy_mc_enhanced_fast_string)
.section .fixup, "ax"
.E_copy:
/*
* On fault %rcx is updated such that the copy instruction could
* optionally be restarted at the fault position, i.e. it
* contains 'bytes remaining'. A non-zero return indicates error
* to copy_mc_generic() users, or indicate short transfers to
* user-copy routines.
*/
movq %rcx, %rax
ret
.previous
_ASM_EXTABLE_FAULT(.L_copy, .E_copy)
#endif /* !CONFIG_UML */

View File

@@ -15,6 +15,7 @@
#include <asm/asm.h>
#include <asm/smap.h>
#include <asm/export.h>
#include <asm/trapnr.h>
.macro ALIGN_DESTINATION
/* check for bad alignment of destination */
@@ -36,8 +37,8 @@
jmp .Lcopy_user_handle_tail
.previous
_ASM_EXTABLE_UA(100b, 103b)
_ASM_EXTABLE_UA(101b, 103b)
_ASM_EXTABLE_CPY(100b, 103b)
_ASM_EXTABLE_CPY(101b, 103b)
.endm
/*
@@ -116,26 +117,26 @@ SYM_FUNC_START(copy_user_generic_unrolled)
60: jmp .Lcopy_user_handle_tail /* ecx is zerorest also */
.previous
_ASM_EXTABLE_UA(1b, 30b)
_ASM_EXTABLE_UA(2b, 30b)
_ASM_EXTABLE_UA(3b, 30b)
_ASM_EXTABLE_UA(4b, 30b)
_ASM_EXTABLE_UA(5b, 30b)
_ASM_EXTABLE_UA(6b, 30b)
_ASM_EXTABLE_UA(7b, 30b)
_ASM_EXTABLE_UA(8b, 30b)
_ASM_EXTABLE_UA(9b, 30b)
_ASM_EXTABLE_UA(10b, 30b)
_ASM_EXTABLE_UA(11b, 30b)
_ASM_EXTABLE_UA(12b, 30b)
_ASM_EXTABLE_UA(13b, 30b)
_ASM_EXTABLE_UA(14b, 30b)
_ASM_EXTABLE_UA(15b, 30b)
_ASM_EXTABLE_UA(16b, 30b)
_ASM_EXTABLE_UA(18b, 40b)
_ASM_EXTABLE_UA(19b, 40b)
_ASM_EXTABLE_UA(21b, 50b)
_ASM_EXTABLE_UA(22b, 50b)
_ASM_EXTABLE_CPY(1b, 30b)
_ASM_EXTABLE_CPY(2b, 30b)
_ASM_EXTABLE_CPY(3b, 30b)
_ASM_EXTABLE_CPY(4b, 30b)
_ASM_EXTABLE_CPY(5b, 30b)
_ASM_EXTABLE_CPY(6b, 30b)
_ASM_EXTABLE_CPY(7b, 30b)
_ASM_EXTABLE_CPY(8b, 30b)
_ASM_EXTABLE_CPY(9b, 30b)
_ASM_EXTABLE_CPY(10b, 30b)
_ASM_EXTABLE_CPY(11b, 30b)
_ASM_EXTABLE_CPY(12b, 30b)
_ASM_EXTABLE_CPY(13b, 30b)
_ASM_EXTABLE_CPY(14b, 30b)
_ASM_EXTABLE_CPY(15b, 30b)
_ASM_EXTABLE_CPY(16b, 30b)
_ASM_EXTABLE_CPY(18b, 40b)
_ASM_EXTABLE_CPY(19b, 40b)
_ASM_EXTABLE_CPY(21b, 50b)
_ASM_EXTABLE_CPY(22b, 50b)
SYM_FUNC_END(copy_user_generic_unrolled)
EXPORT_SYMBOL(copy_user_generic_unrolled)
@@ -180,8 +181,8 @@ SYM_FUNC_START(copy_user_generic_string)
jmp .Lcopy_user_handle_tail
.previous
_ASM_EXTABLE_UA(1b, 11b)
_ASM_EXTABLE_UA(3b, 12b)
_ASM_EXTABLE_CPY(1b, 11b)
_ASM_EXTABLE_CPY(3b, 12b)
SYM_FUNC_END(copy_user_generic_string)
EXPORT_SYMBOL(copy_user_generic_string)
@@ -213,7 +214,7 @@ SYM_FUNC_START(copy_user_enhanced_fast_string)
jmp .Lcopy_user_handle_tail
.previous
_ASM_EXTABLE_UA(1b, 12b)
_ASM_EXTABLE_CPY(1b, 12b)
SYM_FUNC_END(copy_user_enhanced_fast_string)
EXPORT_SYMBOL(copy_user_enhanced_fast_string)
@@ -221,6 +222,7 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string)
* Try to copy last bytes and clear the rest if needed.
* Since protection fault in copy_from/to_user is not a normal situation,
* it is not necessary to optimize tail handling.
* Don't try to copy the tail if machine check happened
*
* Input:
* rdi destination
@@ -232,12 +234,25 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string)
*/
SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
movl %edx,%ecx
cmp $X86_TRAP_MC,%eax /* check if X86_TRAP_MC */
je 3f
1: rep movsb
2: mov %ecx,%eax
ASM_CLAC
ret
_ASM_EXTABLE_UA(1b, 2b)
/*
* Return zero to pretend that this copy succeeded. This
* is counter-intuitive, but needed to prevent the code
* in lib/iov_iter.c from retrying and running back into
* the poison cache line again. The machine check handler
* will ensure that a SIGBUS is sent to the task.
*/
3: xorl %eax,%eax
ASM_CLAC
ret
_ASM_EXTABLE_CPY(1b, 2b)
SYM_CODE_END(.Lcopy_user_handle_tail)
/*
@@ -366,27 +381,27 @@ SYM_FUNC_START(__copy_user_nocache)
jmp .Lcopy_user_handle_tail
.previous
_ASM_EXTABLE_UA(1b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_UA(2b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_UA(3b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_UA(4b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_UA(5b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_UA(6b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_UA(7b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_UA(8b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_UA(9b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_UA(10b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_UA(11b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_UA(12b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_UA(13b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_UA(14b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_UA(15b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_UA(16b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_UA(20b, .L_fixup_8b_copy)
_ASM_EXTABLE_UA(21b, .L_fixup_8b_copy)
_ASM_EXTABLE_UA(30b, .L_fixup_4b_copy)
_ASM_EXTABLE_UA(31b, .L_fixup_4b_copy)
_ASM_EXTABLE_UA(40b, .L_fixup_1b_copy)
_ASM_EXTABLE_UA(41b, .L_fixup_1b_copy)
_ASM_EXTABLE_CPY(1b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(2b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(3b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(4b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(5b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(6b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(7b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(8b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(9b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(10b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(11b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(12b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(13b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(14b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(15b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(16b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(20b, .L_fixup_8b_copy)
_ASM_EXTABLE_CPY(21b, .L_fixup_8b_copy)
_ASM_EXTABLE_CPY(30b, .L_fixup_4b_copy)
_ASM_EXTABLE_CPY(31b, .L_fixup_4b_copy)
_ASM_EXTABLE_CPY(40b, .L_fixup_1b_copy)
_ASM_EXTABLE_CPY(41b, .L_fixup_1b_copy)
SYM_FUNC_END(__copy_user_nocache)
EXPORT_SYMBOL(__copy_user_nocache)

View File

@@ -4,7 +4,6 @@
#include <linux/linkage.h>
#include <asm/errno.h>
#include <asm/cpufeatures.h>
#include <asm/mcsafe_test.h>
#include <asm/alternative-asm.h>
#include <asm/export.h>
@@ -187,117 +186,3 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
SYM_FUNC_END(memcpy_orig)
.popsection
#ifndef CONFIG_UML
MCSAFE_TEST_CTL
/*
* __memcpy_mcsafe - memory copy with machine check exception handling
* Note that we only catch machine checks when reading the source addresses.
* Writes to target are posted and don't generate machine checks.
*/
SYM_FUNC_START(__memcpy_mcsafe)
cmpl $8, %edx
/* Less than 8 bytes? Go to byte copy loop */
jb .L_no_whole_words
/* Check for bad alignment of source */
testl $7, %esi
/* Already aligned */
jz .L_8byte_aligned
/* Copy one byte at a time until source is 8-byte aligned */
movl %esi, %ecx
andl $7, %ecx
subl $8, %ecx
negl %ecx
subl %ecx, %edx
.L_read_leading_bytes:
movb (%rsi), %al
MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
.L_write_leading_bytes:
movb %al, (%rdi)
incq %rsi
incq %rdi
decl %ecx
jnz .L_read_leading_bytes
.L_8byte_aligned:
movl %edx, %ecx
andl $7, %edx
shrl $3, %ecx
jz .L_no_whole_words
.L_read_words:
movq (%rsi), %r8
MCSAFE_TEST_SRC %rsi 8 .E_read_words
MCSAFE_TEST_DST %rdi 8 .E_write_words
.L_write_words:
movq %r8, (%rdi)
addq $8, %rsi
addq $8, %rdi
decl %ecx
jnz .L_read_words
/* Any trailing bytes? */
.L_no_whole_words:
andl %edx, %edx
jz .L_done_memcpy_trap
/* Copy trailing bytes */
movl %edx, %ecx
.L_read_trailing_bytes:
movb (%rsi), %al
MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
.L_write_trailing_bytes:
movb %al, (%rdi)
incq %rsi
incq %rdi
decl %ecx
jnz .L_read_trailing_bytes
/* Copy successful. Return zero */
.L_done_memcpy_trap:
xorl %eax, %eax
.L_done:
ret
SYM_FUNC_END(__memcpy_mcsafe)
EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
.section .fixup, "ax"
/*
* Return number of bytes not copied for any failure. Note that
* there is no "tail" handling since the source buffer is 8-byte
* aligned and poison is cacheline aligned.
*/
.E_read_words:
shll $3, %ecx
.E_leading_bytes:
addl %edx, %ecx
.E_trailing_bytes:
mov %ecx, %eax
jmp .L_done
/*
* For write fault handling, given the destination is unaligned,
* we handle faults on multi-byte writes with a byte-by-byte
* copy up to the write-protected page.
*/
.E_write_words:
shll $3, %ecx
addl %edx, %ecx
movl %ecx, %edx
jmp mcsafe_handle_tail
.previous
_ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
_ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
_ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
_ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
_ASM_EXTABLE(.L_write_words, .E_write_words)
_ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
#endif

View File

@@ -56,27 +56,6 @@ unsigned long clear_user(void __user *to, unsigned long n)
}
EXPORT_SYMBOL(clear_user);
/*
* Similar to copy_user_handle_tail, probe for the write fault point,
* but reuse __memcpy_mcsafe in case a new read error is encountered.
* clac() is handled in _copy_to_iter_mcsafe().
*/
__visible notrace unsigned long
mcsafe_handle_tail(char *to, char *from, unsigned len)
{
for (; len; --len, to++, from++) {
/*
* Call the assembly routine back directly since
* memcpy_mcsafe() may silently fallback to memcpy.
*/
unsigned long rem = __memcpy_mcsafe(to, from, 1);
if (rem)
break;
}
return len;
}
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
/**
* clean_cache_range - write back a cache range with CLWB

View File

@@ -80,6 +80,18 @@ __visible bool ex_handler_uaccess(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL(ex_handler_uaccess);
__visible bool ex_handler_copy(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr)
{
WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?");
regs->ip = ex_fixup_addr(fixup);
regs->ax = trapnr;
return true;
}
EXPORT_SYMBOL(ex_handler_copy);
__visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
@@ -125,17 +137,21 @@ __visible bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL(ex_handler_clear_fs);
__visible bool ex_has_fault_handler(unsigned long ip)
enum handler_type ex_get_fault_handler_type(unsigned long ip)
{
const struct exception_table_entry *e;
ex_handler_t handler;
e = search_exception_tables(ip);
if (!e)
return false;
return EX_HANDLER_NONE;
handler = ex_fixup_handler(e);
return handler == ex_handler_fault;
if (handler == ex_handler_fault)
return EX_HANDLER_FAULT;
else if (handler == ex_handler_uaccess || handler == ex_handler_copy)
return EX_HANDLER_UACCESS;
else
return EX_HANDLER_OTHER;
}
int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code,

View File

@@ -1128,7 +1128,7 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
return 0;
}
static int fault_in_kernel_space(unsigned long address)
bool fault_in_kernel_space(unsigned long address)
{
/*
* On 64-bit systems, the vsyscall page is at an address above

View File

@@ -999,10 +999,8 @@ static void decode_smca_error(struct mce *m)
pr_emerg(HW_ERR "%s Ext. Error Code: %d", ip_name, xec);
/* Only print the decode of valid error codes */
if (xec < smca_mce_descs[bank_type].num_descs &&
(hwid->xec_bitmap & BIT_ULL(xec))) {
if (xec < smca_mce_descs[bank_type].num_descs)
pr_cont(", %s.\n", smca_mce_descs[bank_type].descs[xec]);
}
if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc)
decode_dram_ecc(cpu_to_node(m->extcpu), m);

View File

@@ -49,7 +49,7 @@ do { \
#define pmem_assign(dest, src) ((dest) = (src))
#endif
#if defined(__HAVE_ARCH_MEMCPY_MCSAFE) && defined(DM_WRITECACHE_HAS_PMEM)
#if IS_ENABLED(CONFIG_ARCH_HAS_COPY_MC) && defined(DM_WRITECACHE_HAS_PMEM)
#define DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
#endif
@@ -992,7 +992,8 @@ static void writecache_resume(struct dm_target *ti)
}
wc->freelist_size = 0;
r = memcpy_mcsafe(&sb_seq_count, &sb(wc)->seq_count, sizeof(uint64_t));
r = copy_mc_to_kernel(&sb_seq_count, &sb(wc)->seq_count,
sizeof(uint64_t));
if (r) {
writecache_error(wc, r, "hardware memory error when reading superblock: %d", r);
sb_seq_count = cpu_to_le64(0);
@@ -1008,7 +1009,8 @@ static void writecache_resume(struct dm_target *ti)
e->seq_count = -1;
continue;
}
r = memcpy_mcsafe(&wme, memory_entry(wc, e), sizeof(struct wc_memory_entry));
r = copy_mc_to_kernel(&wme, memory_entry(wc, e),
sizeof(struct wc_memory_entry));
if (r) {
writecache_error(wc, r, "hardware memory error when reading metadata entry %lu: %d",
(unsigned long)b, r);
@@ -1206,7 +1208,7 @@ static void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data
if (rw == READ) {
int r;
r = memcpy_mcsafe(buf, data, size);
r = copy_mc_to_kernel(buf, data, size);
flush_dcache_page(bio_page(bio));
if (unlikely(r)) {
writecache_error(wc, r, "hardware memory error when reading data: %d", r);
@@ -2349,7 +2351,7 @@ invalid_optional:
}
}
r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock));
r = copy_mc_to_kernel(&s, sb(wc), sizeof(struct wc_memory_superblock));
if (r) {
ti->error = "Hardware memory error when reading superblock";
goto bad;
@@ -2360,7 +2362,8 @@ invalid_optional:
ti->error = "Unable to initialize device";
goto bad;
}
r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock));
r = copy_mc_to_kernel(&s, sb(wc),
sizeof(struct wc_memory_superblock));
if (r) {
ti->error = "Hardware memory error when reading superblock";
goto bad;

View File

@@ -268,7 +268,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
if (rw == READ) {
if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align)))
return -EIO;
if (memcpy_mcsafe(buf, nsio->addr + offset, size) != 0)
if (copy_mc_to_kernel(buf, nsio->addr + offset, size) != 0)
return -EIO;
return 0;
}

View File

@@ -125,7 +125,7 @@ static blk_status_t read_pmem(struct page *page, unsigned int off,
while (len) {
mem = kmap_atomic(page);
chunk = min_t(unsigned int, len, PAGE_SIZE - off);
rem = memcpy_mcsafe(mem + off, pmem_addr, chunk);
rem = copy_mc_to_kernel(mem + off, pmem_addr, chunk);
kunmap_atomic(mem);
if (rem)
return BLK_STS_IOERR;
@@ -304,7 +304,7 @@ static long pmem_dax_direct_access(struct dax_device *dax_dev,
/*
* Use the 'no check' versions of copy_from_iter_flushcache() and
* copy_to_iter_mcsafe() to bypass HARDENED_USERCOPY overhead. Bounds
* copy_mc_to_iter() to bypass HARDENED_USERCOPY overhead. Bounds
* checking, both file offset and device offset, is handled by
* dax_iomap_actor()
*/
@@ -317,7 +317,7 @@ static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
return _copy_to_iter_mcsafe(addr, bytes, i);
return _copy_mc_to_iter(addr, bytes, i);
}
static const struct dax_operations pmem_dax_ops = {

View File

@@ -435,7 +435,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(action_threshold_ops, u64_get, action_threshold_set, "%
static const char * const bins[] = { "00", "01", "10", "11" };
static int array_dump(struct seq_file *m, void *v)
static int array_show(struct seq_file *m, void *v)
{
struct ce_array *ca = &ce_arr;
int i;
@@ -467,18 +467,7 @@ static int array_dump(struct seq_file *m, void *v)
return 0;
}
static int array_open(struct inode *inode, struct file *filp)
{
return single_open(filp, array_dump, NULL);
}
static const struct file_operations array_ops = {
.owner = THIS_MODULE,
.open = array_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
DEFINE_SHOW_ATTRIBUTE(array);
static int __init create_debugfs_nodes(void)
{
@@ -513,7 +502,7 @@ static int __init create_debugfs_nodes(void)
goto err;
}
array = debugfs_create_file("array", S_IRUSR, d, NULL, &array_ops);
array = debugfs_create_file("array", S_IRUSR, d, NULL, &array_fops);
if (!array) {
pr_warn("Error creating array debugfs node!\n");
goto err;
@@ -553,20 +542,20 @@ static struct notifier_block cec_nb = {
.priority = MCE_PRIO_CEC,
};
static void __init cec_init(void)
static int __init cec_init(void)
{
if (ce_arr.disabled)
return;
return -ENODEV;
ce_arr.array = (void *)get_zeroed_page(GFP_KERNEL);
if (!ce_arr.array) {
pr_err("Error allocating CE array page!\n");
return;
return -ENOMEM;
}
if (create_debugfs_nodes()) {
free_page((unsigned long)ce_arr.array);
return;
return -ENOMEM;
}
INIT_DELAYED_WORK(&cec_work, cec_work_fn);
@@ -575,6 +564,7 @@ static void __init cec_init(void)
mce_register_decode_chain(&cec_nb);
pr_info("Correctable Errors collector initialized.\n");
return 0;
}
late_initcall(cec_init);

View File

@@ -1313,6 +1313,8 @@ struct task_struct {
#endif
#ifdef CONFIG_X86_MCE
void __user *mce_vaddr;
__u64 mce_kflags;
u64 mce_addr;
__u64 mce_ripv : 1,
mce_whole_page : 1,

View File

@@ -161,20 +161,13 @@ extern int bcmp(const void *,const void *,__kernel_size_t);
#ifndef __HAVE_ARCH_MEMCHR
extern void * memchr(const void *,int,__kernel_size_t);
#endif
#ifndef __HAVE_ARCH_MEMCPY_MCSAFE
static inline __must_check unsigned long memcpy_mcsafe(void *dst,
const void *src, size_t cnt)
{
memcpy(dst, src, cnt);
return 0;
}
#endif
#ifndef __HAVE_ARCH_MEMCPY_FLUSHCACHE
static inline void memcpy_flushcache(void *dst, const void *src, size_t cnt)
{
memcpy(dst, src, cnt);
}
#endif
void *memchr_inv(const void *s, int c, size_t n);
char *strreplace(char *s, char old, char new);

View File

@@ -179,6 +179,19 @@ copy_in_user(void __user *to, const void __user *from, unsigned long n)
}
#endif
#ifndef copy_mc_to_kernel
/*
* Without arch opt-in this generic copy_mc_to_kernel() will not handle
* #MC (or arch equivalent) during source read.
*/
static inline unsigned long __must_check
copy_mc_to_kernel(void *dst, const void *src, size_t cnt)
{
memcpy(dst, src, cnt);
return 0;
}
#endif
static __always_inline void pagefault_disabled_inc(void)
{
current->pagefault_disabled++;

View File

@@ -185,10 +185,10 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i);
#define _copy_from_iter_flushcache _copy_from_iter_nocache
#endif
#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i);
#ifdef CONFIG_ARCH_HAS_COPY_MC
size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
#else
#define _copy_to_iter_mcsafe _copy_to_iter
#define _copy_mc_to_iter _copy_to_iter
#endif
static __always_inline __must_check
@@ -201,12 +201,12 @@ size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
}
static __always_inline __must_check
size_t copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i)
size_t copy_mc_to_iter(void *addr, size_t bytes, struct iov_iter *i)
{
if (unlikely(!check_copy_size(addr, bytes, true)))
return 0;
else
return _copy_to_iter_mcsafe(addr, bytes, i);
return _copy_mc_to_iter(addr, bytes, i);
}
size_t iov_iter_zero(size_t bytes, struct iov_iter *);

View File

@@ -635,7 +635,12 @@ config UACCESS_MEMCPY
config ARCH_HAS_UACCESS_FLUSHCACHE
bool
config ARCH_HAS_UACCESS_MCSAFE
# arch has a concept of a recoverable synchronous exception due to a
# memory-read error like x86 machine-check or ARM data-abort, and
# implements copy_mc_to_{user,kernel} to abort and report
# 'bytes-transferred' if that exception fires when accessing the source
# buffer.
config ARCH_HAS_COPY_MC
bool
# Temporary. Goes away when all archs are cleaned up

View File

@@ -637,30 +637,30 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
}
EXPORT_SYMBOL(_copy_to_iter);
#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
static int copyout_mcsafe(void __user *to, const void *from, size_t n)
#ifdef CONFIG_ARCH_HAS_COPY_MC
static int copyout_mc(void __user *to, const void *from, size_t n)
{
if (access_ok(to, n)) {
instrument_copy_to_user(to, from, n);
n = copy_to_user_mcsafe((__force void *) to, from, n);
n = copy_mc_to_user((__force void *) to, from, n);
}
return n;
}
static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset,
static unsigned long copy_mc_to_page(struct page *page, size_t offset,
const char *from, size_t len)
{
unsigned long ret;
char *to;
to = kmap_atomic(page);
ret = memcpy_mcsafe(to + offset, from, len);
ret = copy_mc_to_kernel(to + offset, from, len);
kunmap_atomic(to);
return ret;
}
static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
struct iov_iter *i)
{
struct pipe_inode_info *pipe = i->pipe;
@@ -678,7 +678,7 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
unsigned long rem;
rem = memcpy_mcsafe_to_page(pipe->bufs[i_head & p_mask].page,
rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page,
off, addr, chunk);
i->head = i_head;
i->iov_offset = off + chunk - rem;
@@ -695,18 +695,17 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
}
/**
* _copy_to_iter_mcsafe - copy to user with source-read error exception handling
* _copy_mc_to_iter - copy to iter with source memory error exception handling
* @addr: source kernel address
* @bytes: total transfer length
* @iter: destination iterator
*
* The pmem driver arranges for filesystem-dax to use this facility via
* dax_copy_to_iter() for protecting read/write to persistent memory.
* Unless / until an architecture can guarantee identical performance
* between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a
* performance regression to switch more users to the mcsafe version.
* The pmem driver deploys this for the dax operation
* (dax_copy_to_iter()) for dax reads (bypass page-cache and the
* block-layer). Upon #MC read(2) aborts and returns EIO or the bytes
* successfully copied.
*
* Otherwise, the main differences between this and typical _copy_to_iter().
* The main differences between this and typical _copy_to_iter().
*
* * Typical tail/residue handling after a fault retries the copy
* byte-by-byte until the fault happens again. Re-triggering machine
@@ -717,23 +716,22 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
* * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
* Compare to copy_to_iter() where only ITER_IOVEC attempts might return
* a short copy.
*
* See MCSAFE_TEST for self-test.
*/
size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
{
const char *from = addr;
unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
if (unlikely(iov_iter_is_pipe(i)))
return copy_pipe_to_iter_mcsafe(addr, bytes, i);
return copy_mc_pipe_to_iter(addr, bytes, i);
if (iter_is_iovec(i))
might_fault();
iterate_and_advance(i, bytes, v,
copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len,
v.iov_len),
({
rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset,
(from += v.bv_len) - v.bv_len, v.bv_len);
rem = copy_mc_to_page(v.bv_page, v.bv_offset,
(from += v.bv_len) - v.bv_len, v.bv_len);
if (rem) {
curr_addr = (unsigned long) from;
bytes = curr_addr - s_addr - rem;
@@ -741,8 +739,8 @@ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
}
}),
({
rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len,
v.iov_len);
rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len)
- v.iov_len, v.iov_len);
if (rem) {
curr_addr = (unsigned long) from;
bytes = curr_addr - s_addr - rem;
@@ -753,8 +751,8 @@ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
return bytes;
}
EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe);
#endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */
EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
#endif /* CONFIG_ARCH_HAS_COPY_MC */
size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
{

View File

@@ -1,13 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _MCSAFE_TEST_H_
#define _MCSAFE_TEST_H_
.macro MCSAFE_TEST_CTL
.endm
.macro MCSAFE_TEST_SRC reg count target
.endm
.macro MCSAFE_TEST_DST reg count target
.endm
#endif /* _MCSAFE_TEST_H_ */

View File

@@ -4,7 +4,6 @@
#include <linux/linkage.h>
#include <asm/errno.h>
#include <asm/cpufeatures.h>
#include <asm/mcsafe_test.h>
#include <asm/alternative-asm.h>
#include <asm/export.h>
@@ -187,117 +186,3 @@ SYM_FUNC_START(memcpy_orig)
SYM_FUNC_END(memcpy_orig)
.popsection
#ifndef CONFIG_UML
MCSAFE_TEST_CTL
/*
* __memcpy_mcsafe - memory copy with machine check exception handling
* Note that we only catch machine checks when reading the source addresses.
* Writes to target are posted and don't generate machine checks.
*/
SYM_FUNC_START(__memcpy_mcsafe)
cmpl $8, %edx
/* Less than 8 bytes? Go to byte copy loop */
jb .L_no_whole_words
/* Check for bad alignment of source */
testl $7, %esi
/* Already aligned */
jz .L_8byte_aligned
/* Copy one byte at a time until source is 8-byte aligned */
movl %esi, %ecx
andl $7, %ecx
subl $8, %ecx
negl %ecx
subl %ecx, %edx
.L_read_leading_bytes:
movb (%rsi), %al
MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
.L_write_leading_bytes:
movb %al, (%rdi)
incq %rsi
incq %rdi
decl %ecx
jnz .L_read_leading_bytes
.L_8byte_aligned:
movl %edx, %ecx
andl $7, %edx
shrl $3, %ecx
jz .L_no_whole_words
.L_read_words:
movq (%rsi), %r8
MCSAFE_TEST_SRC %rsi 8 .E_read_words
MCSAFE_TEST_DST %rdi 8 .E_write_words
.L_write_words:
movq %r8, (%rdi)
addq $8, %rsi
addq $8, %rdi
decl %ecx
jnz .L_read_words
/* Any trailing bytes? */
.L_no_whole_words:
andl %edx, %edx
jz .L_done_memcpy_trap
/* Copy trailing bytes */
movl %edx, %ecx
.L_read_trailing_bytes:
movb (%rsi), %al
MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
.L_write_trailing_bytes:
movb %al, (%rdi)
incq %rsi
incq %rdi
decl %ecx
jnz .L_read_trailing_bytes
/* Copy successful. Return zero */
.L_done_memcpy_trap:
xorl %eax, %eax
.L_done:
ret
SYM_FUNC_END(__memcpy_mcsafe)
EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
.section .fixup, "ax"
/*
* Return number of bytes not copied for any failure. Note that
* there is no "tail" handling since the source buffer is 8-byte
* aligned and poison is cacheline aligned.
*/
.E_read_words:
shll $3, %ecx
.E_leading_bytes:
addl %edx, %ecx
.E_trailing_bytes:
mov %ecx, %eax
jmp .L_done
/*
* For write fault handling, given the destination is unaligned,
* we handle faults on multi-byte writes with a byte-by-byte
* copy up to the write-protected page.
*/
.E_write_words:
shll $3, %ecx
addl %edx, %ecx
movl %ecx, %edx
jmp mcsafe_handle_tail
.previous
_ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
_ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
_ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
_ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
_ASM_EXTABLE(.L_write_words, .E_write_words)
_ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
#endif

View File

@@ -548,8 +548,9 @@ static const char *uaccess_safe_builtin[] = {
"__ubsan_handle_shift_out_of_bounds",
/* misc */
"csum_partial_copy_generic",
"__memcpy_mcsafe",
"mcsafe_handle_tail",
"copy_mc_fragile",
"copy_mc_fragile_handle_tail",
"copy_mc_enhanced_fast_string",
"ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */
NULL
};

View File

@@ -13,7 +13,6 @@ perf-y += synthesize.o
perf-y += kallsyms-parse.o
perf-y += find-bit-bench.o
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o

View File

@@ -1,24 +0,0 @@
/*
* From code in arch/x86/lib/usercopy_64.c, copied to keep tools/ copy
* of the kernel's arch/x86/lib/memcpy_64.s used in 'perf bench mem memcpy'
* happy.
*/
#include <linux/types.h>
unsigned long __memcpy_mcsafe(void *dst, const void *src, size_t cnt);
unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len);
unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len)
{
for (; len; --len, to++, from++) {
/*
* Call the assembly routine back directly since
* memcpy_mcsafe() may silently fallback to memcpy.
*/
unsigned long rem = __memcpy_mcsafe(to, from, 1);
if (rem)
break;
}
return len;
}

View File

@@ -23,7 +23,8 @@
#include "nfit_test.h"
#include "../watermark.h"
#include <asm/mcsafe_test.h>
#include <asm/copy_mc_test.h>
#include <asm/mce.h>
/*
* Generate an NFIT table to describe the following topology:
@@ -3283,7 +3284,7 @@ static struct platform_driver nfit_test_driver = {
.id_table = nfit_test_id,
};
static char mcsafe_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
static char copy_mc_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
enum INJECT {
INJECT_NONE,
@@ -3291,7 +3292,7 @@ enum INJECT {
INJECT_DST,
};
static void mcsafe_test_init(char *dst, char *src, size_t size)
static void copy_mc_test_init(char *dst, char *src, size_t size)
{
size_t i;
@@ -3300,7 +3301,7 @@ static void mcsafe_test_init(char *dst, char *src, size_t size)
src[i] = (char) i;
}
static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src,
static bool copy_mc_test_validate(unsigned char *dst, unsigned char *src,
size_t size, unsigned long rem)
{
size_t i;
@@ -3321,12 +3322,12 @@ static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src,
return true;
}
void mcsafe_test(void)
void copy_mc_test(void)
{
char *inject_desc[] = { "none", "source", "destination" };
enum INJECT inj;
if (IS_ENABLED(CONFIG_MCSAFE_TEST)) {
if (IS_ENABLED(CONFIG_COPY_MC_TEST)) {
pr_info("%s: run...\n", __func__);
} else {
pr_info("%s: disabled, skip.\n", __func__);
@@ -3344,31 +3345,31 @@ void mcsafe_test(void)
switch (inj) {
case INJECT_NONE:
mcsafe_inject_src(NULL);
mcsafe_inject_dst(NULL);
dst = &mcsafe_buf[2048];
src = &mcsafe_buf[1024 - i];
copy_mc_inject_src(NULL);
copy_mc_inject_dst(NULL);
dst = &copy_mc_buf[2048];
src = &copy_mc_buf[1024 - i];
expect = 0;
break;
case INJECT_SRC:
mcsafe_inject_src(&mcsafe_buf[1024]);
mcsafe_inject_dst(NULL);
dst = &mcsafe_buf[2048];
src = &mcsafe_buf[1024 - i];
copy_mc_inject_src(&copy_mc_buf[1024]);
copy_mc_inject_dst(NULL);
dst = &copy_mc_buf[2048];
src = &copy_mc_buf[1024 - i];
expect = 512 - i;
break;
case INJECT_DST:
mcsafe_inject_src(NULL);
mcsafe_inject_dst(&mcsafe_buf[2048]);
dst = &mcsafe_buf[2048 - i];
src = &mcsafe_buf[1024];
copy_mc_inject_src(NULL);
copy_mc_inject_dst(&copy_mc_buf[2048]);
dst = &copy_mc_buf[2048 - i];
src = &copy_mc_buf[1024];
expect = 512 - i;
break;
}
mcsafe_test_init(dst, src, 512);
rem = __memcpy_mcsafe(dst, src, 512);
valid = mcsafe_test_validate(dst, src, 512, expect);
copy_mc_test_init(dst, src, 512);
rem = copy_mc_fragile(dst, src, 512);
valid = copy_mc_test_validate(dst, src, 512, expect);
if (rem == expect && valid)
continue;
pr_info("%s: copy(%#lx, %#lx, %d) off: %d rem: %ld %s expect: %ld\n",
@@ -3380,8 +3381,8 @@ void mcsafe_test(void)
}
}
mcsafe_inject_src(NULL);
mcsafe_inject_dst(NULL);
copy_mc_inject_src(NULL);
copy_mc_inject_dst(NULL);
}
static __init int nfit_test_init(void)
@@ -3392,7 +3393,7 @@ static __init int nfit_test_init(void)
libnvdimm_test();
acpi_nfit_test();
device_dax_test();
mcsafe_test();
copy_mc_test();
dax_pmem_test();
dax_pmem_core_test();
#ifdef CONFIG_DEV_DAX_PMEM_COMPAT

View File

@@ -12,4 +12,4 @@ memcpy_p7_t1
copyuser_64_exc_t0
copyuser_64_exc_t1
copyuser_64_exc_t2
memcpy_mcsafe_64
copy_mc_64

View File

@@ -12,7 +12,7 @@ ASFLAGS = $(CFLAGS) -Wa,-mpower4
TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \
copyuser_p7_t0 copyuser_p7_t1 \
memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \
memcpy_p7_t0 memcpy_p7_t1 memcpy_mcsafe_64 \
memcpy_p7_t0 memcpy_p7_t1 copy_mc_64 \
copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2
EXTRA_SOURCES := validate.c ../harness.c stubs.S
@@ -45,9 +45,9 @@ $(OUTPUT)/memcpy_p7_t%: memcpy_power7.S $(EXTRA_SOURCES)
-D SELFTEST_CASE=$(subst memcpy_p7_t,,$(notdir $@)) \
-o $@ $^
$(OUTPUT)/memcpy_mcsafe_64: memcpy_mcsafe_64.S $(EXTRA_SOURCES)
$(OUTPUT)/copy_mc_64: copy_mc_64.S $(EXTRA_SOURCES)
$(CC) $(CPPFLAGS) $(CFLAGS) \
-D COPY_LOOP=test_memcpy_mcsafe \
-D COPY_LOOP=test_copy_mc_generic \
-o $@ $^
$(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \

View File

@@ -0,0 +1 @@
../../../../../arch/powerpc/lib/copy_mc_64.S

View File

@@ -1 +0,0 @@
../../../../../arch/powerpc/lib/memcpy_mcsafe_64.S