diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7fa8bdd0ed7e..2e8f6fd28e59 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1328,7 +1328,7 @@ config MICROCODE config MICROCODE_INTEL bool "Intel microcode loading support" - depends on MICROCODE + depends on CPU_SUP_INTEL && MICROCODE default MICROCODE help This options enables microcode patch loading support for Intel @@ -1340,7 +1340,7 @@ config MICROCODE_INTEL config MICROCODE_AMD bool "AMD microcode loading support" - depends on MICROCODE + depends on CPU_SUP_AMD && MICROCODE help If you select this option, microcode patch loading support for AMD processors will be enabled. @@ -2330,7 +2330,9 @@ choice it can be used to assist security vulnerability exploitation. This setting can be changed at boot time via the kernel command - line parameter vsyscall=[emulate|xonly|none]. + line parameter vsyscall=[emulate|xonly|none]. Emulate mode + is deprecated and can only be enabled using the kernel command + line. On a system with recent enough glibc (2.14 or newer) and no static binaries, you can say None without a performance penalty @@ -2338,20 +2340,6 @@ choice If unsure, select "Emulate execution only". - config LEGACY_VSYSCALL_EMULATE - bool "Full emulation" - help - The kernel traps and emulates calls into the fixed vsyscall - address mapping. This makes the mapping non-executable, but - it still contains readable known contents, which could be - used in certain rare security vulnerability exploits. This - configuration is recommended when using legacy userspace - that still uses vsyscalls along with legacy binary - instrumentation tools that require code to be readable. - - An example of this type of legacy userspace is running - Pin on an old binary that still uses vsyscalls. - config LEGACY_VSYSCALL_XONLY bool "Emulate execution only" help diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 235a5794296a..1000d457c332 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -438,7 +438,7 @@ bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs) static __init int vdso_setup(char *s) { vdso64_enabled = simple_strtoul(s, NULL, 0); - return 0; + return 1; } __setup("vdso=", vdso_setup); diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index fd2ee9408e91..4af81df133ee 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -48,7 +48,7 @@ static enum { EMULATE, XONLY, NONE } vsyscall_mode __ro_after_init = #elif defined(CONFIG_LEGACY_VSYSCALL_XONLY) XONLY; #else - EMULATE; + #error VSYSCALL config is broken #endif static int __init vsyscall_setup(char *str) diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 00d1a400b7a1..ed0eaf65c437 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -16,7 +16,6 @@ extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[]; extern bool early_is_amd_nb(u32 value); extern struct resource *amd_get_mmconfig_range(struct resource *res); -extern int amd_cache_northbridges(void); extern void amd_flush_garts(void); extern int amd_numa_init(void); extern int amd_get_subcaches(int); diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 020c906f7934..190e0f763375 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -188,7 +188,7 @@ int amd_smn_write(u16 node, u32 address, u32 value) EXPORT_SYMBOL_GPL(amd_smn_write); -int amd_cache_northbridges(void) +static int amd_cache_northbridges(void) { const struct pci_device_id *misc_ids = amd_nb_misc_ids; const struct pci_device_id *link_ids = amd_nb_link_ids; @@ -210,14 +210,14 @@ int amd_cache_northbridges(void) } misc = NULL; - while ((misc = next_northbridge(misc, misc_ids)) != NULL) + while ((misc = next_northbridge(misc, misc_ids))) misc_count++; if (!misc_count) return -ENODEV; root = NULL; - while ((root = next_northbridge(root, root_ids)) != NULL) + while ((root = next_northbridge(root, root_ids))) root_count++; if (root_count) { @@ -290,7 +290,6 @@ int amd_cache_northbridges(void) return 0; } -EXPORT_SYMBOL_GPL(amd_cache_northbridges); /* * Ignores subdevice/subvendor but as far as I can figure out diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 9b87ec6acdec..189d3a5e471a 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -170,7 +170,7 @@ static __init int setup_apicpmtimer(char *s) { apic_calibrate_pmtmr = 1; notsc_setup(NULL); - return 0; + return 1; } __setup("apicpmtimer", setup_apicpmtimer); #endif diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index e6c37f38c5ea..7860241a67db 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -94,7 +94,7 @@ static bool ring3mwait_disabled __read_mostly; static int __init ring3mwait_disable(char *__unused) { ring3mwait_disabled = true; - return 0; + return 1; } __setup("ring3mwait=disable", ring3mwait_disable); diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c index 7c63a1911fae..3c24e6124d95 100644 --- a/arch/x86/kernel/cpu/sgx/encl.c +++ b/arch/x86/kernel/cpu/sgx/encl.c @@ -12,6 +12,92 @@ #include "encls.h" #include "sgx.h" +#define PCMDS_PER_PAGE (PAGE_SIZE / sizeof(struct sgx_pcmd)) +/* + * 32 PCMD entries share a PCMD page. PCMD_FIRST_MASK is used to + * determine the page index associated with the first PCMD entry + * within a PCMD page. + */ +#define PCMD_FIRST_MASK GENMASK(4, 0) + +/** + * reclaimer_writing_to_pcmd() - Query if any enclave page associated with + * a PCMD page is in process of being reclaimed. + * @encl: Enclave to which PCMD page belongs + * @start_addr: Address of enclave page using first entry within the PCMD page + * + * When an enclave page is reclaimed some Paging Crypto MetaData (PCMD) is + * stored. The PCMD data of a reclaimed enclave page contains enough + * information for the processor to verify the page at the time + * it is loaded back into the Enclave Page Cache (EPC). + * + * The backing storage to which enclave pages are reclaimed is laid out as + * follows: + * Encrypted enclave pages:SECS page:PCMD pages + * + * Each PCMD page contains the PCMD metadata of + * PAGE_SIZE/sizeof(struct sgx_pcmd) enclave pages. + * + * A PCMD page can only be truncated if it is (a) empty, and (b) not in the + * process of getting data (and thus soon being non-empty). (b) is tested with + * a check if an enclave page sharing the PCMD page is in the process of being + * reclaimed. + * + * The reclaimer sets the SGX_ENCL_PAGE_BEING_RECLAIMED flag when it + * intends to reclaim that enclave page - it means that the PCMD page + * associated with that enclave page is about to get some data and thus + * even if the PCMD page is empty, it should not be truncated. + * + * Context: Enclave mutex (&sgx_encl->lock) must be held. + * Return: 1 if the reclaimer is about to write to the PCMD page + * 0 if the reclaimer has no intention to write to the PCMD page + */ +static int reclaimer_writing_to_pcmd(struct sgx_encl *encl, + unsigned long start_addr) +{ + int reclaimed = 0; + int i; + + /* + * PCMD_FIRST_MASK is based on number of PCMD entries within + * PCMD page being 32. + */ + BUILD_BUG_ON(PCMDS_PER_PAGE != 32); + + for (i = 0; i < PCMDS_PER_PAGE; i++) { + struct sgx_encl_page *entry; + unsigned long addr; + + addr = start_addr + i * PAGE_SIZE; + + /* + * Stop when reaching the SECS page - it does not + * have a page_array entry and its reclaim is + * started and completed with enclave mutex held so + * it does not use the SGX_ENCL_PAGE_BEING_RECLAIMED + * flag. + */ + if (addr == encl->base + encl->size) + break; + + entry = xa_load(&encl->page_array, PFN_DOWN(addr)); + if (!entry) + continue; + + /* + * VA page slot ID uses same bit as the flag so it is important + * to ensure that the page is not already in backing store. + */ + if (entry->epc_page && + (entry->desc & SGX_ENCL_PAGE_BEING_RECLAIMED)) { + reclaimed = 1; + break; + } + } + + return reclaimed; +} + /* * Calculate byte offset of a PCMD struct associated with an enclave page. PCMD's * follow right after the EPC data in the backing storage. In addition to the @@ -47,6 +133,7 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page, unsigned long va_offset = encl_page->desc & SGX_ENCL_PAGE_VA_OFFSET_MASK; struct sgx_encl *encl = encl_page->encl; pgoff_t page_index, page_pcmd_off; + unsigned long pcmd_first_page; struct sgx_pageinfo pginfo; struct sgx_backing b; bool pcmd_page_empty; @@ -58,6 +145,11 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page, else page_index = PFN_DOWN(encl->size); + /* + * Address of enclave page using the first entry within the PCMD page. + */ + pcmd_first_page = PFN_PHYS(page_index & ~PCMD_FIRST_MASK) + encl->base; + page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index); ret = sgx_encl_get_backing(encl, page_index, &b); @@ -84,6 +176,7 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page, } memset(pcmd_page + b.pcmd_offset, 0, sizeof(struct sgx_pcmd)); + set_page_dirty(b.pcmd); /* * The area for the PCMD in the page was zeroed above. Check if the @@ -94,12 +187,20 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page, kunmap_atomic(pcmd_page); kunmap_atomic((void *)(unsigned long)pginfo.contents); - sgx_encl_put_backing(&b, false); + get_page(b.pcmd); + sgx_encl_put_backing(&b); sgx_encl_truncate_backing_page(encl, page_index); - if (pcmd_page_empty) + if (pcmd_page_empty && !reclaimer_writing_to_pcmd(encl, pcmd_first_page)) { sgx_encl_truncate_backing_page(encl, PFN_DOWN(page_pcmd_off)); + pcmd_page = kmap_atomic(b.pcmd); + if (memchr_inv(pcmd_page, 0, PAGE_SIZE)) + pr_warn("PCMD page not empty after truncate.\n"); + kunmap_atomic(pcmd_page); + } + + put_page(b.pcmd); return ret; } @@ -645,15 +746,9 @@ int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, /** * sgx_encl_put_backing() - Unpin the backing storage * @backing: data for accessing backing storage for the page - * @do_write: mark pages dirty */ -void sgx_encl_put_backing(struct sgx_backing *backing, bool do_write) +void sgx_encl_put_backing(struct sgx_backing *backing) { - if (do_write) { - set_page_dirty(backing->pcmd); - set_page_dirty(backing->contents); - } - put_page(backing->pcmd); put_page(backing->contents); } diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h index fec43ca65065..d44e7372151f 100644 --- a/arch/x86/kernel/cpu/sgx/encl.h +++ b/arch/x86/kernel/cpu/sgx/encl.h @@ -107,7 +107,7 @@ void sgx_encl_release(struct kref *ref); int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm); int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, struct sgx_backing *backing); -void sgx_encl_put_backing(struct sgx_backing *backing, bool do_write); +void sgx_encl_put_backing(struct sgx_backing *backing); int sgx_encl_test_and_clear_young(struct mm_struct *mm, struct sgx_encl_page *page); diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index 8e4bc6453d26..ab4ec54bbdd9 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -191,6 +191,8 @@ static int __sgx_encl_ewb(struct sgx_epc_page *epc_page, void *va_slot, backing->pcmd_offset; ret = __ewb(&pginfo, sgx_get_epc_virt_addr(epc_page), va_slot); + set_page_dirty(backing->pcmd); + set_page_dirty(backing->contents); kunmap_atomic((void *)(unsigned long)(pginfo.metadata - backing->pcmd_offset)); @@ -308,6 +310,7 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page, sgx_encl_ewb(epc_page, backing); encl_page->epc_page = NULL; encl->secs_child_cnt--; + sgx_encl_put_backing(backing); if (!encl->secs_child_cnt && test_bit(SGX_ENCL_INITIALIZED, &encl->flags)) { ret = sgx_encl_get_backing(encl, PFN_DOWN(encl->size), @@ -320,7 +323,7 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page, sgx_encl_free_epc_page(encl->secs.epc_page); encl->secs.epc_page = NULL; - sgx_encl_put_backing(&secs_backing, true); + sgx_encl_put_backing(&secs_backing); } out: @@ -379,11 +382,14 @@ static void sgx_reclaim_pages(void) goto skip; page_index = PFN_DOWN(encl_page->desc - encl_page->encl->base); - ret = sgx_encl_get_backing(encl_page->encl, page_index, &backing[i]); - if (ret) - goto skip; mutex_lock(&encl_page->encl->lock); + ret = sgx_encl_get_backing(encl_page->encl, page_index, &backing[i]); + if (ret) { + mutex_unlock(&encl_page->encl->lock); + goto skip; + } + encl_page->desc |= SGX_ENCL_PAGE_BEING_RECLAIMED; mutex_unlock(&encl_page->encl->lock); continue; @@ -411,7 +417,6 @@ skip: encl_page = epc_page->owner; sgx_reclaimer_write(epc_page, &backing[i]); - sgx_encl_put_backing(&backing[i], true); kref_put(&encl_page->encl->refcount, sgx_encl_release); epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED; diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 660b78827638..8cc653ffdccd 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -68,9 +68,6 @@ static int __init control_va_addr_alignment(char *str) if (*str == 0) return 1; - if (*str == '=') - str++; - if (!strcmp(str, "32")) va_align.flags = ALIGN_VA_32; else if (!strcmp(str, "64")) @@ -80,11 +77,11 @@ static int __init control_va_addr_alignment(char *str) else if (!strcmp(str, "on")) va_align.flags = ALIGN_VA_32 | ALIGN_VA_64; else - return 0; + pr_warn("invalid option value: 'align_va_addr=%s'\n", str); return 1; } -__setup("align_va_addr", control_va_addr_alignment); +__setup("align_va_addr=", control_va_addr_alignment); SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index 65d15df6212d..0e65d00e2339 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -54,8 +54,8 @@ static void delay_loop(u64 __loops) " jnz 2b \n" "3: dec %0 \n" - : /* we don't need output */ - :"a" (loops) + : "+a" (loops) + : ); } diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index 4ba2a3ee4bce..d5ef64ddd35e 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -101,7 +101,7 @@ int pat_debug_enable; static int __init pat_debug_setup(char *str) { pat_debug_enable = 1; - return 0; + return 1; } __setup("debugpat", pat_debug_setup); diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index dc78a4fb879e..84a4aa9312cf 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c @@ -327,7 +327,7 @@ static int cache_nbs(struct pci_dev *pdev, u32 cap_ptr) { int i; - if (amd_cache_northbridges() < 0) + if (!amd_nb_num()) return -ENODEV; if (!amd_nb_has_feature(AMD_NB_GART)) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 812baa48b290..2f854feeeb23 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -4336,7 +4336,7 @@ static int __init amd64_edac_init(void) if (!x86_match_cpu(amd64_cpuids)) return -ENODEV; - if (amd_cache_northbridges() < 0) + if (!amd_nb_num()) return -ENODEV; opstate_init();