diff --git a/Documentation/driver-api/io-mapping.rst b/Documentation/driver-api/io-mapping.rst index a966239f04e4..a0cfb15988df 100644 --- a/Documentation/driver-api/io-mapping.rst +++ b/Documentation/driver-api/io-mapping.rst @@ -20,78 +20,72 @@ A mapping object is created during driver initialization using:: mappable, while 'size' indicates how large a mapping region to enable. Both are in bytes. -This _wc variant provides a mapping which may only be used -with the io_mapping_map_atomic_wc or io_mapping_map_wc. +This _wc variant provides a mapping which may only be used with +io_mapping_map_atomic_wc(), io_mapping_map_local_wc() or +io_mapping_map_wc(). -With this mapping object, individual pages can be mapped either atomically -or not, depending on the necessary scheduling environment. Of course, atomic -maps are more efficient:: +With this mapping object, individual pages can be mapped either temporarily +or long term, depending on the requirements. Of course, temporary maps are +more efficient. They come in two flavours:: + + void *io_mapping_map_local_wc(struct io_mapping *mapping, + unsigned long offset) void *io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset) -'offset' is the offset within the defined mapping region. -Accessing addresses beyond the region specified in the -creation function yields undefined results. Using an offset -which is not page aligned yields an undefined result. The -return value points to a single page in CPU address space. +'offset' is the offset within the defined mapping region. Accessing +addresses beyond the region specified in the creation function yields +undefined results. Using an offset which is not page aligned yields an +undefined result. The return value points to a single page in CPU address +space. -This _wc variant returns a write-combining map to the -page and may only be used with mappings created by -io_mapping_create_wc +This _wc variant returns a write-combining map to the page and may only be +used with mappings created by io_mapping_create_wc() -Note that the task may not sleep while holding this page -mapped. +Temporary mappings are only valid in the context of the caller. The mapping +is not guaranteed to be globaly visible. -:: +io_mapping_map_local_wc() has a side effect on X86 32bit as it disables +migration to make the mapping code work. No caller can rely on this side +effect. +io_mapping_map_atomic_wc() has the side effect of disabling preemption and +pagefaults. Don't use in new code. Use io_mapping_map_local_wc() instead. + +Nested mappings need to be undone in reverse order because the mapping +code uses a stack for keeping track of them:: + + addr1 = io_mapping_map_local_wc(map1, offset1); + addr2 = io_mapping_map_local_wc(map2, offset2); + ... + io_mapping_unmap_local(addr2); + io_mapping_unmap_local(addr1); + +The mappings are released with:: + + void io_mapping_unmap_local(void *vaddr) void io_mapping_unmap_atomic(void *vaddr) -'vaddr' must be the value returned by the last -io_mapping_map_atomic_wc call. This unmaps the specified -page and allows the task to sleep once again. +'vaddr' must be the value returned by the last io_mapping_map_local_wc() or +io_mapping_map_atomic_wc() call. This unmaps the specified mapping and +undoes the side effects of the mapping functions. -If you need to sleep while holding the lock, you can use the non-atomic -variant, although they may be significantly slower. - -:: +If you need to sleep while holding a mapping, you can use the regular +variant, although this may be significantly slower:: void *io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) -This works like io_mapping_map_atomic_wc except it allows -the task to sleep while holding the page mapped. +This works like io_mapping_map_atomic/local_wc() except it has no side +effects and the pointer is globaly visible. - -:: +The mappings are released with:: void io_mapping_unmap(void *vaddr) -This works like io_mapping_unmap_atomic, except it is used -for pages mapped with io_mapping_map_wc. +Use for pages mapped with io_mapping_map_wc(). At driver close time, the io_mapping object must be freed:: void io_mapping_free(struct io_mapping *mapping) - -Current Implementation -====================== - -The initial implementation of these functions uses existing mapping -mechanisms and so provides only an abstraction layer and no new -functionality. - -On 64-bit processors, io_mapping_create_wc calls ioremap_wc for the whole -range, creating a permanent kernel-visible mapping to the resource. The -map_atomic and map functions add the requested offset to the base of the -virtual address returned by ioremap_wc. - -On 32-bit processors with HIGHMEM defined, io_mapping_map_atomic_wc uses -kmap_atomic_pfn to map the specified page in an atomic fashion; -kmap_atomic_pfn isn't really supposed to be used with device pages, but it -provides an efficient mapping for this usage. - -On 32-bit processors without HIGHMEM defined, io_mapping_map_atomic_wc and -io_mapping_map_wc both use ioremap_wc, a terribly inefficient function which -performs an IPI to inform all processors about the new mapping. This results -in a significant performance penalty. diff --git a/arch/alpha/include/asm/kmap_types.h b/arch/alpha/include/asm/kmap_types.h deleted file mode 100644 index 651714b45729..000000000000 --- a/arch/alpha/include/asm/kmap_types.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_KMAP_TYPES_H -#define _ASM_KMAP_TYPES_H - -/* Dummy header just to define km_type. */ - -#ifdef CONFIG_DEBUG_HIGHMEM -#define __WITH_KM_FENCE -#endif - -#include - -#undef __WITH_KM_FENCE - -#endif diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 0a89cc9def65..d8804001d550 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -507,6 +507,7 @@ config LINUX_RAM_BASE config HIGHMEM bool "High Memory Support" select ARCH_DISCONTIGMEM_ENABLE + select KMAP_LOCAL help With ARC 2G:2G address split, only upper 2G is directly addressable by kernel. Enable this to potentially allow access to rest of 2G and PAE diff --git a/arch/arc/include/asm/highmem.h b/arch/arc/include/asm/highmem.h index 6e5eafb3afdd..a6b8e2c352c4 100644 --- a/arch/arc/include/asm/highmem.h +++ b/arch/arc/include/asm/highmem.h @@ -9,17 +9,29 @@ #ifdef CONFIG_HIGHMEM #include -#include +#include + +#define FIXMAP_SIZE PGDIR_SIZE +#define PKMAP_SIZE PGDIR_SIZE /* start after vmalloc area */ #define FIXMAP_BASE (PAGE_OFFSET - FIXMAP_SIZE - PKMAP_SIZE) -#define FIXMAP_SIZE PGDIR_SIZE /* only 1 PGD worth */ -#define KM_TYPE_NR ((FIXMAP_SIZE >> PAGE_SHIFT)/NR_CPUS) -#define FIXMAP_ADDR(nr) (FIXMAP_BASE + ((nr) << PAGE_SHIFT)) + +#define FIX_KMAP_SLOTS (KM_MAX_IDX * NR_CPUS) +#define FIX_KMAP_BEGIN (0UL) +#define FIX_KMAP_END ((FIX_KMAP_BEGIN + FIX_KMAP_SLOTS) - 1) + +#define FIXADDR_TOP (FIXMAP_BASE + (FIX_KMAP_END << PAGE_SHIFT)) + +/* + * This should be converted to the asm-generic version, but of course this + * is needlessly different from all other architectures. Sigh - tglx + */ +#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) +#define __virt_to_fix(x) (((FIXADDR_TOP - ((x) & PAGE_MASK))) >> PAGE_SHIFT) /* start after fixmap area */ #define PKMAP_BASE (FIXMAP_BASE + FIXMAP_SIZE) -#define PKMAP_SIZE PGDIR_SIZE #define LAST_PKMAP (PKMAP_SIZE >> PAGE_SHIFT) #define LAST_PKMAP_MASK (LAST_PKMAP - 1) #define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) @@ -29,11 +41,13 @@ extern void kmap_init(void); +#define arch_kmap_local_post_unmap(vaddr) \ + local_flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE) + static inline void flush_cache_kmaps(void) { flush_cache_all(); } - #endif #endif diff --git a/arch/arc/include/asm/kmap_types.h b/arch/arc/include/asm/kmap_types.h deleted file mode 100644 index fecf7851ec32..000000000000 --- a/arch/arc/include/asm/kmap_types.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2015 Synopsys, Inc. (www.synopsys.com) - */ - -#ifndef _ASM_KMAP_TYPES_H -#define _ASM_KMAP_TYPES_H - -/* - * We primarily need to define KM_TYPE_NR here but that in turn - * is a function of PGDIR_SIZE etc. - * To avoid circular deps issue, put everything in asm/highmem.h - */ -#endif diff --git a/arch/arc/mm/highmem.c b/arch/arc/mm/highmem.c index 1b9f473c6369..c79912a6b196 100644 --- a/arch/arc/mm/highmem.c +++ b/arch/arc/mm/highmem.c @@ -36,9 +36,8 @@ * This means each only has 1 PGDIR_SIZE worth of kvaddr mappings, which means * 2M of kvaddr space for typical config (8K page and 11:8:13 traversal split) * - * - fixmap anyhow needs a limited number of mappings. So 2M kvaddr == 256 PTE - * slots across NR_CPUS would be more than sufficient (generic code defines - * KM_TYPE_NR as 20). + * - The fixed KMAP slots for kmap_local/atomic() require KM_MAX_IDX slots per + * CPU. So the number of CPUs sharing a single PTE page is limited. * * - pkmap being preemptible, in theory could do with more than 256 concurrent * mappings. However, generic pkmap code: map_new_virtual(), doesn't traverse @@ -47,48 +46,6 @@ */ extern pte_t * pkmap_page_table; -static pte_t * fixmap_page_table; - -void *kmap_atomic_high_prot(struct page *page, pgprot_t prot) -{ - int idx, cpu_idx; - unsigned long vaddr; - - cpu_idx = kmap_atomic_idx_push(); - idx = cpu_idx + KM_TYPE_NR * smp_processor_id(); - vaddr = FIXMAP_ADDR(idx); - - set_pte_at(&init_mm, vaddr, fixmap_page_table + idx, - mk_pte(page, prot)); - - return (void *)vaddr; -} -EXPORT_SYMBOL(kmap_atomic_high_prot); - -void kunmap_atomic_high(void *kv) -{ - unsigned long kvaddr = (unsigned long)kv; - - if (kvaddr >= FIXMAP_BASE && kvaddr < (FIXMAP_BASE + FIXMAP_SIZE)) { - - /* - * Because preemption is disabled, this vaddr can be associated - * with the current allocated index. - * But in case of multiple live kmap_atomic(), it still relies on - * callers to unmap in right order. - */ - int cpu_idx = kmap_atomic_idx(); - int idx = cpu_idx + KM_TYPE_NR * smp_processor_id(); - - WARN_ON(kvaddr != FIXMAP_ADDR(idx)); - - pte_clear(&init_mm, kvaddr, fixmap_page_table + idx); - local_flush_tlb_kernel_range(kvaddr, kvaddr + PAGE_SIZE); - - kmap_atomic_idx_pop(); - } -} -EXPORT_SYMBOL(kunmap_atomic_high); static noinline pte_t * __init alloc_kmap_pgtable(unsigned long kvaddr) { @@ -108,10 +65,9 @@ void __init kmap_init(void) { /* Due to recursive include hell, we can't do this in processor.h */ BUILD_BUG_ON(PAGE_OFFSET < (VMALLOC_END + FIXMAP_SIZE + PKMAP_SIZE)); - - BUILD_BUG_ON(KM_TYPE_NR > PTRS_PER_PTE); - pkmap_page_table = alloc_kmap_pgtable(PKMAP_BASE); - BUILD_BUG_ON(LAST_PKMAP > PTRS_PER_PTE); - fixmap_page_table = alloc_kmap_pgtable(FIXMAP_BASE); + BUILD_BUG_ON(FIX_KMAP_SLOTS > PTRS_PER_PTE); + + pkmap_page_table = alloc_kmap_pgtable(PKMAP_BASE); + alloc_kmap_pgtable(FIXMAP_BASE); } diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 002e0cf025f5..4708ede3b826 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1499,6 +1499,7 @@ config HAVE_ARCH_PFN_VALID config HIGHMEM bool "High Memory Support" depends on MMU + select KMAP_LOCAL help The address space of ARM processors is only 4 Gigabytes large and it has to accommodate user address space, kernel address diff --git a/arch/arm/include/asm/fixmap.h b/arch/arm/include/asm/fixmap.h index fc56fc3e1931..c279a8a463a2 100644 --- a/arch/arm/include/asm/fixmap.h +++ b/arch/arm/include/asm/fixmap.h @@ -7,14 +7,14 @@ #define FIXADDR_TOP (FIXADDR_END - PAGE_SIZE) #include -#include +#include enum fixed_addresses { FIX_EARLYCON_MEM_BASE, __end_of_permanent_fixed_addresses, FIX_KMAP_BEGIN = __end_of_permanent_fixed_addresses, - FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_TYPE_NR * NR_CPUS) - 1, + FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * NR_CPUS) - 1, /* Support writing RO kernel text via kprobes, jump labels, etc. */ FIX_TEXT_POKE0, diff --git a/arch/arm/include/asm/highmem.h b/arch/arm/include/asm/highmem.h index 31811be38d78..b4b66220952d 100644 --- a/arch/arm/include/asm/highmem.h +++ b/arch/arm/include/asm/highmem.h @@ -2,7 +2,8 @@ #ifndef _ASM_HIGHMEM_H #define _ASM_HIGHMEM_H -#include +#include +#include #define PKMAP_BASE (PAGE_OFFSET - PMD_SIZE) #define LAST_PKMAP PTRS_PER_PTE @@ -46,19 +47,32 @@ extern pte_t *pkmap_page_table; #ifdef ARCH_NEEDS_KMAP_HIGH_GET extern void *kmap_high_get(struct page *page); -#else + +static inline void *arch_kmap_local_high_get(struct page *page) +{ + if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !cache_is_vivt()) + return NULL; + return kmap_high_get(page); +} +#define arch_kmap_local_high_get arch_kmap_local_high_get + +#else /* ARCH_NEEDS_KMAP_HIGH_GET */ static inline void *kmap_high_get(struct page *page) { return NULL; } -#endif +#endif /* !ARCH_NEEDS_KMAP_HIGH_GET */ -/* - * The following functions are already defined by - * when CONFIG_HIGHMEM is not set. - */ -#ifdef CONFIG_HIGHMEM -extern void *kmap_atomic_pfn(unsigned long pfn); -#endif +#define arch_kmap_local_post_map(vaddr, pteval) \ + local_flush_tlb_kernel_page(vaddr) + +#define arch_kmap_local_pre_unmap(vaddr) \ +do { \ + if (cache_is_vivt()) \ + __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE); \ +} while (0) + +#define arch_kmap_local_post_unmap(vaddr) \ + local_flush_tlb_kernel_page(vaddr) #endif diff --git a/arch/arm/include/asm/kmap_types.h b/arch/arm/include/asm/kmap_types.h deleted file mode 100644 index 5590940ee43d..000000000000 --- a/arch/arm/include/asm/kmap_types.h +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ARM_KMAP_TYPES_H -#define __ARM_KMAP_TYPES_H - -/* - * This is the "bare minimum". AIO seems to require this. - */ -#define KM_TYPE_NR 16 - -#endif diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index 7cb1699fbfc4..c4ce477c5261 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -19,7 +19,6 @@ obj-$(CONFIG_MODULES) += proc-syms.o obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o obj-$(CONFIG_ALIGNMENT_TRAP) += alignment.o -obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_ARM_PV_FIXUP) += pv-fixup-asm.o diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c deleted file mode 100644 index 187fab227b50..000000000000 --- a/arch/arm/mm/highmem.c +++ /dev/null @@ -1,121 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * arch/arm/mm/highmem.c -- ARM highmem support - * - * Author: Nicolas Pitre - * Created: september 8, 2008 - * Copyright: Marvell Semiconductors Inc. - */ - -#include -#include -#include -#include -#include -#include -#include "mm.h" - -static inline void set_fixmap_pte(int idx, pte_t pte) -{ - unsigned long vaddr = __fix_to_virt(idx); - pte_t *ptep = virt_to_kpte(vaddr); - - set_pte_ext(ptep, pte, 0); - local_flush_tlb_kernel_page(vaddr); -} - -static inline pte_t get_fixmap_pte(unsigned long vaddr) -{ - pte_t *ptep = virt_to_kpte(vaddr); - - return *ptep; -} - -void *kmap_atomic_high_prot(struct page *page, pgprot_t prot) -{ - unsigned int idx; - unsigned long vaddr; - void *kmap; - int type; - -#ifdef CONFIG_DEBUG_HIGHMEM - /* - * There is no cache coherency issue when non VIVT, so force the - * dedicated kmap usage for better debugging purposes in that case. - */ - if (!cache_is_vivt()) - kmap = NULL; - else -#endif - kmap = kmap_high_get(page); - if (kmap) - return kmap; - - type = kmap_atomic_idx_push(); - - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); - vaddr = __fix_to_virt(idx); -#ifdef CONFIG_DEBUG_HIGHMEM - /* - * With debugging enabled, kunmap_atomic forces that entry to 0. - * Make sure it was indeed properly unmapped. - */ - BUG_ON(!pte_none(get_fixmap_pte(vaddr))); -#endif - /* - * When debugging is off, kunmap_atomic leaves the previous mapping - * in place, so the contained TLB flush ensures the TLB is updated - * with the new mapping. - */ - set_fixmap_pte(idx, mk_pte(page, prot)); - - return (void *)vaddr; -} -EXPORT_SYMBOL(kmap_atomic_high_prot); - -void kunmap_atomic_high(void *kvaddr) -{ - unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - int idx, type; - - if (kvaddr >= (void *)FIXADDR_START) { - type = kmap_atomic_idx(); - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); - - if (cache_is_vivt()) - __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE); -#ifdef CONFIG_DEBUG_HIGHMEM - BUG_ON(vaddr != __fix_to_virt(idx)); - set_fixmap_pte(idx, __pte(0)); -#else - (void) idx; /* to kill a warning */ -#endif - kmap_atomic_idx_pop(); - } else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) { - /* this address was obtained through kmap_high_get() */ - kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)])); - } -} -EXPORT_SYMBOL(kunmap_atomic_high); - -void *kmap_atomic_pfn(unsigned long pfn) -{ - unsigned long vaddr; - int idx, type; - struct page *page = pfn_to_page(pfn); - - preempt_disable(); - pagefault_disable(); - if (!PageHighMem(page)) - return page_address(page); - - type = kmap_atomic_idx_push(); - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); - vaddr = __fix_to_virt(idx); -#ifdef CONFIG_DEBUG_HIGHMEM - BUG_ON(!pte_none(get_fixmap_pte(vaddr))); -#endif - set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot)); - - return (void *)vaddr; -} diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index 268fad5f51cf..7a86481a22ff 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -286,6 +286,7 @@ config NR_CPUS config HIGHMEM bool "High Memory Support" depends on !CPU_CK610 + select KMAP_LOCAL default y config FORCE_MAX_ZONEORDER diff --git a/arch/csky/include/asm/fixmap.h b/arch/csky/include/asm/fixmap.h index 81f9477d5330..4b589cc20900 100644 --- a/arch/csky/include/asm/fixmap.h +++ b/arch/csky/include/asm/fixmap.h @@ -8,7 +8,7 @@ #include #ifdef CONFIG_HIGHMEM #include -#include +#include #endif enum fixed_addresses { @@ -17,7 +17,7 @@ enum fixed_addresses { #endif #ifdef CONFIG_HIGHMEM FIX_KMAP_BEGIN, - FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_TYPE_NR * NR_CPUS) - 1, + FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * NR_CPUS) - 1, #endif __end_of_fixed_addresses }; diff --git a/arch/csky/include/asm/highmem.h b/arch/csky/include/asm/highmem.h index 14645e3d5cd5..1f4ed3f4c0d9 100644 --- a/arch/csky/include/asm/highmem.h +++ b/arch/csky/include/asm/highmem.h @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include /* undef for production */ @@ -32,10 +32,12 @@ extern pte_t *pkmap_page_table; #define ARCH_HAS_KMAP_FLUSH_TLB extern void kmap_flush_tlb(unsigned long addr); -extern void *kmap_atomic_pfn(unsigned long pfn); #define flush_cache_kmaps() do {} while (0) +#define arch_kmap_local_post_map(vaddr, pteval) kmap_flush_tlb(vaddr) +#define arch_kmap_local_post_unmap(vaddr) kmap_flush_tlb(vaddr) + extern void kmap_init(void); #endif /* __KERNEL__ */ diff --git a/arch/csky/mm/highmem.c b/arch/csky/mm/highmem.c index 89c10800a002..4161df3c6c15 100644 --- a/arch/csky/mm/highmem.c +++ b/arch/csky/mm/highmem.c @@ -9,8 +9,6 @@ #include #include -static pte_t *kmap_pte; - unsigned long highstart_pfn, highend_pfn; void kmap_flush_tlb(unsigned long addr) @@ -19,67 +17,7 @@ void kmap_flush_tlb(unsigned long addr) } EXPORT_SYMBOL(kmap_flush_tlb); -void *kmap_atomic_high_prot(struct page *page, pgprot_t prot) -{ - unsigned long vaddr; - int idx, type; - - type = kmap_atomic_idx_push(); - idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); -#ifdef CONFIG_DEBUG_HIGHMEM - BUG_ON(!pte_none(*(kmap_pte - idx))); -#endif - set_pte(kmap_pte-idx, mk_pte(page, prot)); - flush_tlb_one((unsigned long)vaddr); - - return (void *)vaddr; -} -EXPORT_SYMBOL(kmap_atomic_high_prot); - -void kunmap_atomic_high(void *kvaddr) -{ - unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - int idx; - - if (vaddr < FIXADDR_START) - return; - -#ifdef CONFIG_DEBUG_HIGHMEM - idx = KM_TYPE_NR*smp_processor_id() + kmap_atomic_idx(); - - BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); - - pte_clear(&init_mm, vaddr, kmap_pte - idx); - flush_tlb_one(vaddr); -#else - (void) idx; /* to kill a warning */ -#endif - kmap_atomic_idx_pop(); -} -EXPORT_SYMBOL(kunmap_atomic_high); - -/* - * This is the same as kmap_atomic() but can map memory that doesn't - * have a struct page associated with it. - */ -void *kmap_atomic_pfn(unsigned long pfn) -{ - unsigned long vaddr; - int idx, type; - - pagefault_disable(); - - type = kmap_atomic_idx_push(); - idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - set_pte(kmap_pte-idx, pfn_pte(pfn, PAGE_KERNEL)); - flush_tlb_one(vaddr); - - return (void *) vaddr; -} - -static void __init kmap_pages_init(void) +void __init kmap_init(void) { unsigned long vaddr; pgd_t *pgd; @@ -96,14 +34,3 @@ static void __init kmap_pages_init(void) pte = pte_offset_kernel(pmd, vaddr); pkmap_page_table = pte; } - -void __init kmap_init(void) -{ - unsigned long vaddr; - - kmap_pages_init(); - - vaddr = __fix_to_virt(FIX_KMAP_BEGIN); - - kmap_pte = pte_offset_kernel((pmd_t *)pgd_offset_k(vaddr), vaddr); -} diff --git a/arch/ia64/include/asm/kmap_types.h b/arch/ia64/include/asm/kmap_types.h deleted file mode 100644 index 5c268cf7c2bd..000000000000 --- a/arch/ia64/include/asm/kmap_types.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_KMAP_TYPES_H -#define _ASM_IA64_KMAP_TYPES_H - -#ifdef CONFIG_DEBUG_HIGHMEM -#define __WITH_KM_FENCE -#endif - -#include - -#undef __WITH_KM_FENCE - -#endif /* _ASM_IA64_KMAP_TYPES_H */ diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 33925ffed68f..7f6ca0ab4f81 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -155,6 +155,7 @@ config XILINX_UNCACHED_SHADOW config HIGHMEM bool "High memory support" depends on MMU + select KMAP_LOCAL help The address space of Microblaze processors is only 4 Gigabytes large and it has to accommodate user address space, kernel address diff --git a/arch/microblaze/include/asm/fixmap.h b/arch/microblaze/include/asm/fixmap.h index 0379ce5229e3..e6e9288bff76 100644 --- a/arch/microblaze/include/asm/fixmap.h +++ b/arch/microblaze/include/asm/fixmap.h @@ -20,7 +20,7 @@ #include #ifdef CONFIG_HIGHMEM #include -#include +#include #endif #define FIXADDR_TOP ((unsigned long)(-PAGE_SIZE)) @@ -47,7 +47,7 @@ enum fixed_addresses { FIX_HOLE, #ifdef CONFIG_HIGHMEM FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ - FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_TYPE_NR * num_possible_cpus()) - 1, + FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * num_possible_cpus()) - 1, #endif __end_of_fixed_addresses }; diff --git a/arch/microblaze/include/asm/highmem.h b/arch/microblaze/include/asm/highmem.h index 284ca8fb54c1..4418633fb163 100644 --- a/arch/microblaze/include/asm/highmem.h +++ b/arch/microblaze/include/asm/highmem.h @@ -25,7 +25,6 @@ #include #include -extern pte_t *kmap_pte; extern pte_t *pkmap_page_table; /* @@ -52,6 +51,11 @@ extern pte_t *pkmap_page_table; #define flush_cache_kmaps() { flush_icache(); flush_dcache(); } +#define arch_kmap_local_post_map(vaddr, pteval) \ + local_flush_tlb_page(NULL, vaddr); +#define arch_kmap_local_post_unmap(vaddr) \ + local_flush_tlb_page(NULL, vaddr); + #endif /* __KERNEL__ */ #endif /* _ASM_HIGHMEM_H */ diff --git a/arch/microblaze/mm/Makefile b/arch/microblaze/mm/Makefile index 1b16875cea70..8ced71100047 100644 --- a/arch/microblaze/mm/Makefile +++ b/arch/microblaze/mm/Makefile @@ -6,4 +6,3 @@ obj-y := consistent.o init.o obj-$(CONFIG_MMU) += pgtable.o mmu_context.o fault.o -obj-$(CONFIG_HIGHMEM) += highmem.o diff --git a/arch/microblaze/mm/highmem.c b/arch/microblaze/mm/highmem.c deleted file mode 100644 index 92e0890416c9..000000000000 --- a/arch/microblaze/mm/highmem.c +++ /dev/null @@ -1,78 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * highmem.c: virtual kernel memory mappings for high memory - * - * PowerPC version, stolen from the i386 version. - * - * Used in CONFIG_HIGHMEM systems for memory pages which - * are not addressable by direct kernel virtual addresses. - * - * Copyright (C) 1999 Gerhard Wichert, Siemens AG - * Gerhard.Wichert@pdb.siemens.de - * - * - * Redesigned the x86 32-bit VM architecture to deal with - * up to 16 Terrabyte physical memory. With current x86 CPUs - * we now support up to 64 Gigabytes physical RAM. - * - * Copyright (C) 1999 Ingo Molnar - * - * Reworked for PowerPC by various contributors. Moved from - * highmem.h by Benjamin Herrenschmidt (c) 2009 IBM Corp. - */ - -#include -#include - -/* - * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap - * gives a more generic (and caching) interface. But kmap_atomic can - * be used in IRQ contexts, so in some (very limited) cases we need - * it. - */ -#include - -void *kmap_atomic_high_prot(struct page *page, pgprot_t prot) -{ - - unsigned long vaddr; - int idx, type; - - type = kmap_atomic_idx_push(); - idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); -#ifdef CONFIG_DEBUG_HIGHMEM - BUG_ON(!pte_none(*(kmap_pte-idx))); -#endif - set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot)); - local_flush_tlb_page(NULL, vaddr); - - return (void *) vaddr; -} -EXPORT_SYMBOL(kmap_atomic_high_prot); - -void kunmap_atomic_high(void *kvaddr) -{ - unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - int type; - unsigned int idx; - - if (vaddr < __fix_to_virt(FIX_KMAP_END)) - return; - - type = kmap_atomic_idx(); - - idx = type + KM_TYPE_NR * smp_processor_id(); -#ifdef CONFIG_DEBUG_HIGHMEM - BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); -#endif - /* - * force other mappings to Oops if they'll try to access - * this pte without first remap it - */ - pte_clear(&init_mm, vaddr, kmap_pte-idx); - local_flush_tlb_page(NULL, vaddr); - - kmap_atomic_idx_pop(); -} -EXPORT_SYMBOL(kunmap_atomic_high); diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 45da639bd22c..a444778e59de 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -50,16 +50,11 @@ EXPORT_SYMBOL(min_low_pfn); EXPORT_SYMBOL(max_low_pfn); #ifdef CONFIG_HIGHMEM -pte_t *kmap_pte; -EXPORT_SYMBOL(kmap_pte); - static void __init highmem_init(void) { pr_debug("%x\n", (u32)PKMAP_BASE); map_page(PKMAP_BASE, 0, 0); /* XXX gross */ pkmap_page_table = virt_to_kpte(PKMAP_BASE); - - kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN)); } static void highmem_setup(void) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 2000bb2b0220..6b762bebff33 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2719,6 +2719,7 @@ config WAR_MIPS34K_MISSED_ITLB config HIGHMEM bool "High Memory Support" depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA + select KMAP_LOCAL config CPU_SUPPORTS_HIGHMEM bool diff --git a/arch/mips/include/asm/fixmap.h b/arch/mips/include/asm/fixmap.h index 743535be7528..beea14761cef 100644 --- a/arch/mips/include/asm/fixmap.h +++ b/arch/mips/include/asm/fixmap.h @@ -17,7 +17,7 @@ #include #ifdef CONFIG_HIGHMEM #include -#include +#include #endif /* @@ -52,7 +52,7 @@ enum fixed_addresses { #ifdef CONFIG_HIGHMEM /* reserved pte's for temporary kernel mappings */ FIX_KMAP_BEGIN = FIX_CMAP_END + 1, - FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, + FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * NR_CPUS) - 1, #endif __end_of_fixed_addresses }; diff --git a/arch/mips/include/asm/highmem.h b/arch/mips/include/asm/highmem.h index f1f788b57166..19edf8e69971 100644 --- a/arch/mips/include/asm/highmem.h +++ b/arch/mips/include/asm/highmem.h @@ -24,7 +24,7 @@ #include #include #include -#include +#include /* declarations for highmem.c */ extern unsigned long highstart_pfn, highend_pfn; @@ -48,11 +48,11 @@ extern pte_t *pkmap_page_table; #define ARCH_HAS_KMAP_FLUSH_TLB extern void kmap_flush_tlb(unsigned long addr); -extern void *kmap_atomic_pfn(unsigned long pfn); #define flush_cache_kmaps() BUG_ON(cpu_has_dc_aliases) -extern void kmap_init(void); +#define arch_kmap_local_post_map(vaddr, pteval) local_flush_tlb_one(vaddr) +#define arch_kmap_local_post_unmap(vaddr) local_flush_tlb_one(vaddr) #endif /* __KERNEL__ */ diff --git a/arch/mips/include/asm/kmap_types.h b/arch/mips/include/asm/kmap_types.h deleted file mode 100644 index 16665dc2431b..000000000000 --- a/arch/mips/include/asm/kmap_types.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_KMAP_TYPES_H -#define _ASM_KMAP_TYPES_H - -#ifdef CONFIG_DEBUG_HIGHMEM -#define __WITH_KM_FENCE -#endif - -#include - -#undef __WITH_KM_FENCE - -#endif diff --git a/arch/mips/mm/highmem.c b/arch/mips/mm/highmem.c index 5fec7f45d79a..57e2f08f00d0 100644 --- a/arch/mips/mm/highmem.c +++ b/arch/mips/mm/highmem.c @@ -8,8 +8,6 @@ #include #include -static pte_t *kmap_pte; - unsigned long highstart_pfn, highend_pfn; void kmap_flush_tlb(unsigned long addr) @@ -17,78 +15,3 @@ void kmap_flush_tlb(unsigned long addr) flush_tlb_one(addr); } EXPORT_SYMBOL(kmap_flush_tlb); - -void *kmap_atomic_high_prot(struct page *page, pgprot_t prot) -{ - unsigned long vaddr; - int idx, type; - - type = kmap_atomic_idx_push(); - idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); -#ifdef CONFIG_DEBUG_HIGHMEM - BUG_ON(!pte_none(*(kmap_pte - idx))); -#endif - set_pte(kmap_pte-idx, mk_pte(page, prot)); - local_flush_tlb_one((unsigned long)vaddr); - - return (void*) vaddr; -} -EXPORT_SYMBOL(kmap_atomic_high_prot); - -void kunmap_atomic_high(void *kvaddr) -{ - unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - int type __maybe_unused; - - if (vaddr < FIXADDR_START) - return; - - type = kmap_atomic_idx(); -#ifdef CONFIG_DEBUG_HIGHMEM - { - int idx = type + KM_TYPE_NR * smp_processor_id(); - - BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); - - /* - * force other mappings to Oops if they'll try to access - * this pte without first remap it - */ - pte_clear(&init_mm, vaddr, kmap_pte-idx); - local_flush_tlb_one(vaddr); - } -#endif - kmap_atomic_idx_pop(); -} -EXPORT_SYMBOL(kunmap_atomic_high); - -/* - * This is the same as kmap_atomic() but can map memory that doesn't - * have a struct page associated with it. - */ -void *kmap_atomic_pfn(unsigned long pfn) -{ - unsigned long vaddr; - int idx, type; - - preempt_disable(); - pagefault_disable(); - - type = kmap_atomic_idx_push(); - idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - set_pte(kmap_pte-idx, pfn_pte(pfn, PAGE_KERNEL)); - flush_tlb_one(vaddr); - - return (void*) vaddr; -} - -void __init kmap_init(void) -{ - unsigned long kmap_vstart; - - /* cache the first kmap pte */ - kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); - kmap_pte = virt_to_kpte(kmap_vstart); -} diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 07e84a774938..bc80893e5c0f 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include #include @@ -402,9 +401,6 @@ void __init paging_init(void) pagetable_init(); -#ifdef CONFIG_HIGHMEM - kmap_init(); -#endif #ifdef CONFIG_ZONE_DMA max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; #endif diff --git a/arch/nds32/Kconfig.cpu b/arch/nds32/Kconfig.cpu index f88a12fdf0f3..c10759952485 100644 --- a/arch/nds32/Kconfig.cpu +++ b/arch/nds32/Kconfig.cpu @@ -157,6 +157,7 @@ config HW_SUPPORT_UNALIGNMENT_ACCESS config HIGHMEM bool "High Memory Support" depends on MMU && !CPU_CACHE_ALIASING + select KMAP_LOCAL help The address space of Andes processors is only 4 Gigabytes large and it has to accommodate user address space, kernel address diff --git a/arch/nds32/include/asm/fixmap.h b/arch/nds32/include/asm/fixmap.h index 5a4bf11e5800..2fa09a2de428 100644 --- a/arch/nds32/include/asm/fixmap.h +++ b/arch/nds32/include/asm/fixmap.h @@ -6,7 +6,7 @@ #ifdef CONFIG_HIGHMEM #include -#include +#include #endif enum fixed_addresses { @@ -14,7 +14,7 @@ enum fixed_addresses { FIX_KMAP_RESERVED, FIX_KMAP_BEGIN, #ifdef CONFIG_HIGHMEM - FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_TYPE_NR * NR_CPUS), + FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * NR_CPUS) - 1, #endif FIX_EARLYCON_MEM_BASE, __end_of_fixed_addresses diff --git a/arch/nds32/include/asm/highmem.h b/arch/nds32/include/asm/highmem.h index fe986d0e6e3f..16159a8716f2 100644 --- a/arch/nds32/include/asm/highmem.h +++ b/arch/nds32/include/asm/highmem.h @@ -5,7 +5,6 @@ #define _ASM_HIGHMEM_H #include -#include #include /* @@ -45,11 +44,22 @@ extern pte_t *pkmap_page_table; extern void kmap_init(void); /* - * The following functions are already defined by - * when CONFIG_HIGHMEM is not set. + * FIXME: The below looks broken vs. a kmap_atomic() in task context which + * is interupted and another kmap_atomic() happens in interrupt context. + * But what do I know about nds32. -- tglx */ -#ifdef CONFIG_HIGHMEM -extern void *kmap_atomic_pfn(unsigned long pfn); -#endif +#define arch_kmap_local_post_map(vaddr, pteval) \ + do { \ + __nds32__tlbop_inv(vaddr); \ + __nds32__mtsr_dsb(vaddr, NDS32_SR_TLB_VPN); \ + __nds32__tlbop_rwr(pteval); \ + __nds32__isb(); \ + } while (0) + +#define arch_kmap_local_pre_unmap(vaddr) \ + do { \ + __nds32__tlbop_inv(vaddr); \ + __nds32__isb(); \ + } while (0) #endif diff --git a/arch/nds32/mm/Makefile b/arch/nds32/mm/Makefile index 897ecaf5cf54..14fb2e8eb036 100644 --- a/arch/nds32/mm/Makefile +++ b/arch/nds32/mm/Makefile @@ -3,7 +3,6 @@ obj-y := extable.o tlb.o fault.o init.o mmap.o \ mm-nds32.o cacheflush.o proc.o obj-$(CONFIG_ALIGNMENT_TRAP) += alignment.o -obj-$(CONFIG_HIGHMEM) += highmem.o ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_proc.o = $(CC_FLAGS_FTRACE) diff --git a/arch/nds32/mm/highmem.c b/arch/nds32/mm/highmem.c deleted file mode 100644 index 4284cd59e21a..000000000000 --- a/arch/nds32/mm/highmem.c +++ /dev/null @@ -1,48 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -// Copyright (C) 2005-2017 Andes Technology Corporation - -#include -#include -#include -#include -#include -#include -#include -#include - -void *kmap_atomic_high_prot(struct page *page, pgprot_t prot) -{ - unsigned int idx; - unsigned long vaddr, pte; - int type; - pte_t *ptep; - - type = kmap_atomic_idx_push(); - - idx = type + KM_TYPE_NR * smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - pte = (page_to_pfn(page) << PAGE_SHIFT) | prot; - ptep = pte_offset_kernel(pmd_off_k(vaddr), vaddr); - set_pte(ptep, pte); - - __nds32__tlbop_inv(vaddr); - __nds32__mtsr_dsb(vaddr, NDS32_SR_TLB_VPN); - __nds32__tlbop_rwr(pte); - __nds32__isb(); - return (void *)vaddr; -} -EXPORT_SYMBOL(kmap_atomic_high_prot); - -void kunmap_atomic_high(void *kvaddr) -{ - if (kvaddr >= (void *)FIXADDR_START) { - unsigned long vaddr = (unsigned long)kvaddr; - pte_t *ptep; - kmap_atomic_idx_pop(); - __nds32__tlbop_inv(vaddr); - __nds32__isb(); - ptep = pte_offset_kernel(pmd_off_k(vaddr), vaddr); - set_pte(ptep, 0); - } -} -EXPORT_SYMBOL(kunmap_atomic_high); diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c index 8348feaaf46e..bf9b2310fc93 100644 --- a/arch/openrisc/mm/init.c +++ b/arch/openrisc/mm/init.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c index a978590d802d..5aed97a18bac 100644 --- a/arch/openrisc/mm/ioremap.c +++ b/arch/openrisc/mm/ioremap.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/parisc/include/asm/kmap_types.h b/arch/parisc/include/asm/kmap_types.h deleted file mode 100644 index 3e70b5cd1123..000000000000 --- a/arch/parisc/include/asm/kmap_types.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_KMAP_TYPES_H -#define _ASM_KMAP_TYPES_H - -#ifdef CONFIG_DEBUG_HIGHMEM -#define __WITH_KM_FENCE -#endif - -#include - -#undef __WITH_KM_FENCE - -#endif diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 5181872f9452..d4b2dfb2746f 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -410,6 +410,7 @@ menu "Kernel options" config HIGHMEM bool "High memory support" depends on PPC32 + select KMAP_LOCAL source "kernel/Kconfig.hz" diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index 6bfc87915d5d..8d03c16a3663 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -20,7 +20,7 @@ #include #ifdef CONFIG_HIGHMEM #include -#include +#include #endif #ifdef CONFIG_KASAN @@ -55,7 +55,7 @@ enum fixed_addresses { FIX_EARLY_DEBUG_BASE = FIX_EARLY_DEBUG_TOP+(ALIGN(SZ_128K, PAGE_SIZE)/PAGE_SIZE)-1, #ifdef CONFIG_HIGHMEM FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ - FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, + FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * NR_CPUS) - 1, #endif #ifdef CONFIG_PPC_8xx /* For IMMR we need an aligned 512K area */ diff --git a/arch/powerpc/include/asm/highmem.h b/arch/powerpc/include/asm/highmem.h index 104026f7d6bc..80a5ae771c65 100644 --- a/arch/powerpc/include/asm/highmem.h +++ b/arch/powerpc/include/asm/highmem.h @@ -24,12 +24,10 @@ #ifdef __KERNEL__ #include -#include #include #include #include -extern pte_t *kmap_pte; extern pte_t *pkmap_page_table; /* @@ -60,6 +58,11 @@ extern pte_t *pkmap_page_table; #define flush_cache_kmaps() flush_cache_all() +#define arch_kmap_local_post_map(vaddr, pteval) \ + local_flush_tlb_page(NULL, vaddr) +#define arch_kmap_local_post_unmap(vaddr) \ + local_flush_tlb_page(NULL, vaddr) + #endif /* __KERNEL__ */ #endif /* _ASM_HIGHMEM_H */ diff --git a/arch/powerpc/include/asm/kmap_types.h b/arch/powerpc/include/asm/kmap_types.h deleted file mode 100644 index c8fa182d48c8..000000000000 --- a/arch/powerpc/include/asm/kmap_types.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _ASM_POWERPC_KMAP_TYPES_H -#define _ASM_POWERPC_KMAP_TYPES_H - -#ifdef __KERNEL__ - -/* - */ - -#define KM_TYPE_NR 16 - -#endif /* __KERNEL__ */ -#endif /* _ASM_POWERPC_KMAP_TYPES_H */ diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 55b4a8bd408a..3b4e9e4e25ea 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -16,7 +16,6 @@ obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o obj-$(CONFIG_PPC_MM_SLICES) += slice.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o -obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o obj-$(CONFIG_PPC_PTDUMP) += ptdump/ obj-$(CONFIG_KASAN) += kasan/ diff --git a/arch/powerpc/mm/highmem.c b/arch/powerpc/mm/highmem.c deleted file mode 100644 index 624b4438aff9..000000000000 --- a/arch/powerpc/mm/highmem.c +++ /dev/null @@ -1,67 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * highmem.c: virtual kernel memory mappings for high memory - * - * PowerPC version, stolen from the i386 version. - * - * Used in CONFIG_HIGHMEM systems for memory pages which - * are not addressable by direct kernel virtual addresses. - * - * Copyright (C) 1999 Gerhard Wichert, Siemens AG - * Gerhard.Wichert@pdb.siemens.de - * - * - * Redesigned the x86 32-bit VM architecture to deal with - * up to 16 Terrabyte physical memory. With current x86 CPUs - * we now support up to 64 Gigabytes physical RAM. - * - * Copyright (C) 1999 Ingo Molnar - * - * Reworked for PowerPC by various contributors. Moved from - * highmem.h by Benjamin Herrenschmidt (c) 2009 IBM Corp. - */ - -#include -#include - -void *kmap_atomic_high_prot(struct page *page, pgprot_t prot) -{ - unsigned long vaddr; - int idx, type; - - type = kmap_atomic_idx_push(); - idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - WARN_ON(IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !pte_none(*(kmap_pte - idx))); - __set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot), 1); - local_flush_tlb_page(NULL, vaddr); - - return (void*) vaddr; -} -EXPORT_SYMBOL(kmap_atomic_high_prot); - -void kunmap_atomic_high(void *kvaddr) -{ - unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - - if (vaddr < __fix_to_virt(FIX_KMAP_END)) - return; - - if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM)) { - int type = kmap_atomic_idx(); - unsigned int idx; - - idx = type + KM_TYPE_NR * smp_processor_id(); - WARN_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); - - /* - * force other mappings to Oops if they'll try to access - * this pte without first remap it - */ - pte_clear(&init_mm, vaddr, kmap_pte-idx); - local_flush_tlb_page(NULL, vaddr); - } - - kmap_atomic_idx_pop(); -} -EXPORT_SYMBOL(kunmap_atomic_high); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 3fc325bebe4d..25284fdb300c 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -62,11 +62,6 @@ unsigned long long memory_limit; bool init_mem_is_free; -#ifdef CONFIG_HIGHMEM -pte_t *kmap_pte; -EXPORT_SYMBOL(kmap_pte); -#endif - pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { @@ -236,8 +231,6 @@ void __init paging_init(void) map_kernel_page(PKMAP_BASE, 0, __pgprot(0)); /* XXX gross */ pkmap_page_table = virt_to_kpte(PKMAP_BASE); - - kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN)); #endif /* CONFIG_HIGHMEM */ printk(KERN_DEBUG "Top of RAM: 0x%llx, Total RAM: 0x%llx\n", diff --git a/arch/sh/include/asm/fixmap.h b/arch/sh/include/asm/fixmap.h index f38adc189b83..b07fbc7f7bc6 100644 --- a/arch/sh/include/asm/fixmap.h +++ b/arch/sh/include/asm/fixmap.h @@ -13,9 +13,6 @@ #include #include #include -#ifdef CONFIG_HIGHMEM -#include -#endif /* * Here we define all the compile-time 'special' virtual @@ -53,11 +50,6 @@ enum fixed_addresses { FIX_CMAP_BEGIN, FIX_CMAP_END = FIX_CMAP_BEGIN + (FIX_N_COLOURS * NR_CPUS) - 1, -#ifdef CONFIG_HIGHMEM - FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ - FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_TYPE_NR * NR_CPUS) - 1, -#endif - #ifdef CONFIG_IOREMAP_FIXED /* * FIX_IOREMAP entries are useful for mapping physical address diff --git a/arch/sh/include/asm/kmap_types.h b/arch/sh/include/asm/kmap_types.h deleted file mode 100644 index b78107f923dd..000000000000 --- a/arch/sh/include/asm/kmap_types.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __SH_KMAP_TYPES_H -#define __SH_KMAP_TYPES_H - -/* Dummy header just to define km_type. */ - -#ifdef CONFIG_DEBUG_HIGHMEM -#define __WITH_KM_FENCE -#endif - -#include - -#undef __WITH_KM_FENCE - -#endif diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 3348e0c4d769..0db6919af8d3 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -362,9 +362,6 @@ void __init mem_init(void) mem_init_print_info(NULL); pr_info("virtual kernel memory layout:\n" " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" -#ifdef CONFIG_HIGHMEM - " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" -#endif " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n" " lowmem : 0x%08lx - 0x%08lx (%4ld MB) (cached)\n" #ifdef CONFIG_UNCACHED_MAPPING @@ -376,11 +373,6 @@ void __init mem_init(void) FIXADDR_START, FIXADDR_TOP, (FIXADDR_TOP - FIXADDR_START) >> 10, -#ifdef CONFIG_HIGHMEM - PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, - (LAST_PKMAP*PAGE_SIZE) >> 10, -#endif - (unsigned long)VMALLOC_START, VMALLOC_END, (VMALLOC_END - VMALLOC_START) >> 20, diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index a6ca135442f9..e841708cb830 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -139,6 +139,7 @@ config MMU config HIGHMEM bool default y if SPARC32 + select KMAP_LOCAL config ZONE_DMA bool diff --git a/arch/sparc/include/asm/highmem.h b/arch/sparc/include/asm/highmem.h index 6c35f0d27ee1..875116209ec1 100644 --- a/arch/sparc/include/asm/highmem.h +++ b/arch/sparc/include/asm/highmem.h @@ -24,7 +24,6 @@ #include #include #include -#include #include /* declarations for highmem.c */ @@ -33,8 +32,6 @@ extern unsigned long highstart_pfn, highend_pfn; #define kmap_prot __pgprot(SRMMU_ET_PTE | SRMMU_PRIV | SRMMU_CACHE) extern pte_t *pkmap_page_table; -void kmap_init(void) __init; - /* * Right now we initialize only a single pte table. It can be extended * easily, subsequent pte tables have to be allocated in one physical @@ -53,6 +50,11 @@ void kmap_init(void) __init; #define flush_cache_kmaps() flush_cache_all() +/* FIXME: Use __flush_tlb_one(vaddr) instead of flush_cache_all() -- Anton */ +#define arch_kmap_local_post_map(vaddr, pteval) flush_cache_all() +#define arch_kmap_local_post_unmap(vaddr) flush_cache_all() + + #endif /* __KERNEL__ */ #endif /* _ASM_HIGHMEM_H */ diff --git a/arch/sparc/include/asm/kmap_types.h b/arch/sparc/include/asm/kmap_types.h deleted file mode 100644 index 55a99b6bd91e..000000000000 --- a/arch/sparc/include/asm/kmap_types.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_KMAP_TYPES_H -#define _ASM_KMAP_TYPES_H - -/* Dummy header just to define km_type. None of this - * is actually used on sparc. -DaveM - */ - -#include - -#endif diff --git a/arch/sparc/include/asm/vaddrs.h b/arch/sparc/include/asm/vaddrs.h index 84d054b07a6f..4fec0341e2a8 100644 --- a/arch/sparc/include/asm/vaddrs.h +++ b/arch/sparc/include/asm/vaddrs.h @@ -32,13 +32,13 @@ #define SRMMU_NOCACHE_ALCRATIO 64 /* 256 pages per 64MB of system RAM */ #ifndef __ASSEMBLY__ -#include +#include enum fixed_addresses { FIX_HOLE, #ifdef CONFIG_HIGHMEM FIX_KMAP_BEGIN, - FIX_KMAP_END = (KM_TYPE_NR * NR_CPUS), + FIX_KMAP_END = (KM_MAX_IDX * NR_CPUS), #endif __end_of_fixed_addresses }; diff --git a/arch/sparc/mm/Makefile b/arch/sparc/mm/Makefile index b078205b70e0..68db1f859b02 100644 --- a/arch/sparc/mm/Makefile +++ b/arch/sparc/mm/Makefile @@ -15,6 +15,3 @@ obj-$(CONFIG_SPARC32) += leon_mm.o # Only used by sparc64 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o - -# Only used by sparc32 -obj-$(CONFIG_HIGHMEM) += highmem.o diff --git a/arch/sparc/mm/highmem.c b/arch/sparc/mm/highmem.c deleted file mode 100644 index 8f2a2afb048a..000000000000 --- a/arch/sparc/mm/highmem.c +++ /dev/null @@ -1,115 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * highmem.c: virtual kernel memory mappings for high memory - * - * Provides kernel-static versions of atomic kmap functions originally - * found as inlines in include/asm-sparc/highmem.h. These became - * needed as kmap_atomic() and kunmap_atomic() started getting - * called from within modules. - * -- Tomas Szepe , September 2002 - * - * But kmap_atomic() and kunmap_atomic() cannot be inlined in - * modules because they are loaded with btfixup-ped functions. - */ - -/* - * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap - * gives a more generic (and caching) interface. But kmap_atomic can - * be used in IRQ contexts, so in some (very limited) cases we need it. - * - * XXX This is an old text. Actually, it's good to use atomic kmaps, - * provided you remember that they are atomic and not try to sleep - * with a kmap taken, much like a spinlock. Non-atomic kmaps are - * shared by CPUs, and so precious, and establishing them requires IPI. - * Atomic kmaps are lightweight and we may have NCPUS more of them. - */ -#include -#include -#include - -#include -#include -#include - -static pte_t *kmap_pte; - -void __init kmap_init(void) -{ - unsigned long address = __fix_to_virt(FIX_KMAP_BEGIN); - - /* cache the first kmap pte */ - kmap_pte = virt_to_kpte(address); -} - -void *kmap_atomic_high_prot(struct page *page, pgprot_t prot) -{ - unsigned long vaddr; - long idx, type; - - type = kmap_atomic_idx_push(); - idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - -/* XXX Fix - Anton */ -#if 0 - __flush_cache_one(vaddr); -#else - flush_cache_all(); -#endif - -#ifdef CONFIG_DEBUG_HIGHMEM - BUG_ON(!pte_none(*(kmap_pte-idx))); -#endif - set_pte(kmap_pte-idx, mk_pte(page, prot)); -/* XXX Fix - Anton */ -#if 0 - __flush_tlb_one(vaddr); -#else - flush_tlb_all(); -#endif - - return (void*) vaddr; -} -EXPORT_SYMBOL(kmap_atomic_high_prot); - -void kunmap_atomic_high(void *kvaddr) -{ - unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - int type; - - if (vaddr < FIXADDR_START) - return; - - type = kmap_atomic_idx(); - -#ifdef CONFIG_DEBUG_HIGHMEM - { - unsigned long idx; - - idx = type + KM_TYPE_NR * smp_processor_id(); - BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx)); - - /* XXX Fix - Anton */ -#if 0 - __flush_cache_one(vaddr); -#else - flush_cache_all(); -#endif - - /* - * force other mappings to Oops if they'll try to access - * this pte without first remap it - */ - pte_clear(&init_mm, vaddr, kmap_pte-idx); - /* XXX Fix - Anton */ -#if 0 - __flush_tlb_one(vaddr); -#else - flush_tlb_all(); -#endif - } -#endif - - kmap_atomic_idx_pop(); -} -EXPORT_SYMBOL(kunmap_atomic_high); diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index 0070f8b9a753..a03caa5f6628 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -971,8 +971,6 @@ void __init srmmu_paging_init(void) sparc_context_init(num_contexts); - kmap_init(); - { unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 }; diff --git a/arch/um/include/asm/fixmap.h b/arch/um/include/asm/fixmap.h index 2c697a145ac1..2efac5827188 100644 --- a/arch/um/include/asm/fixmap.h +++ b/arch/um/include/asm/fixmap.h @@ -3,7 +3,6 @@ #define __UM_FIXMAP_H #include -#include #include #include #include diff --git a/arch/um/include/asm/kmap_types.h b/arch/um/include/asm/kmap_types.h deleted file mode 100644 index b0bd12de1d23..000000000000 --- a/arch/um/include/asm/kmap_types.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) - */ - -#ifndef __UM_KMAP_TYPES_H -#define __UM_KMAP_TYPES_H - -/* No more #include "asm/arch/kmap_types.h" ! */ - -#define KM_TYPE_NR 14 - -#endif diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 88a4fa909766..eeb87fce9c6f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -14,10 +14,11 @@ config X86_32 select ARCH_WANT_IPC_PARSE_VERSION select CLKSRC_I8253 select CLONE_BACKWARDS + select GENERIC_VDSO_32 select HAVE_DEBUG_STACKOVERFLOW + select KMAP_LOCAL select MODULES_USE_ELF_REL select OLD_SIGACTION - select GENERIC_VDSO_32 config X86_64 def_bool y @@ -92,6 +93,7 @@ config X86 select ARCH_SUPPORTS_ACPI select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_NUMA_BALANCING if X86_64 + select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP if NR_CPUS <= 4096 select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 4e3099d9ae62..34cb3c159481 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -259,6 +259,7 @@ static inline u64 native_x2apic_icr_read(void) extern int x2apic_mode; extern int x2apic_phys; +extern void __init x2apic_set_max_apicid(u32 apicid); extern void __init check_x2apic(void); extern void x2apic_setup(void); static inline int x2apic_enabled(void) @@ -305,11 +306,10 @@ struct apic { void (*send_IPI_all)(int vector); void (*send_IPI_self)(int vector); - /* dest_logical is used by the IPI functions */ - u32 dest_logical; u32 disable_esr; - u32 irq_delivery_mode; - u32 irq_dest_mode; + + enum apic_delivery_modes delivery_mode; + bool dest_mode_logical; u32 (*calc_dest_apicid)(unsigned int cpu); @@ -520,12 +520,10 @@ static inline void apic_smt_update(void) { } #endif struct msi_msg; +struct irq_cfg; -#ifdef CONFIG_PCI_MSI -void x86_vector_msi_compose_msg(struct irq_data *data, struct msi_msg *msg); -#else -# define x86_vector_msi_compose_msg NULL -#endif +extern void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg, + bool dmar); extern void ioapic_zap_locks(void); diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 05e694ed8386..5716f22f81ac 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -432,15 +432,13 @@ struct local_apic { #define BAD_APICID 0xFFFFu #endif -enum ioapic_irq_destination_types { - dest_Fixed = 0, - dest_LowestPrio = 1, - dest_SMI = 2, - dest__reserved_1 = 3, - dest_NMI = 4, - dest_INIT = 5, - dest__reserved_2 = 6, - dest_ExtINT = 7 +enum apic_delivery_modes { + APIC_DELIVERY_MODE_FIXED = 0, + APIC_DELIVERY_MODE_LOWESTPRIO = 1, + APIC_DELIVERY_MODE_SMI = 2, + APIC_DELIVERY_MODE_NMI = 4, + APIC_DELIVERY_MODE_INIT = 5, + APIC_DELIVERY_MODE_EXTINT = 7, }; #endif /* _ASM_X86_APICDEF_H */ diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 77217bd292bd..9f1a0a987e5e 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -14,13 +14,20 @@ #ifndef _ASM_X86_FIXMAP_H #define _ASM_X86_FIXMAP_H +#include + /* * Exposed to assembly code for setting up initial page tables. Cannot be * calculated in assembly code (fixmap entries are an enum), but is sanity * checked in the actual fixmap C code to make sure that the fixmap is * covered fully. */ -#define FIXMAP_PMD_NUM 2 +#ifndef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP +# define FIXMAP_PMD_NUM 2 +#else +# define KM_PMDS (KM_MAX_IDX * ((CONFIG_NR_CPUS + 511) / 512)) +# define FIXMAP_PMD_NUM (KM_PMDS + 2) +#endif /* fixmap starts downwards from the 507th entry in level2_fixmap_pgt */ #define FIXMAP_PMD_TOP 507 @@ -31,7 +38,6 @@ #include #ifdef CONFIG_X86_32 #include -#include #else #include #endif @@ -92,9 +98,9 @@ enum fixed_addresses { FIX_IO_APIC_BASE_0, FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, #endif -#ifdef CONFIG_X86_32 +#ifdef CONFIG_KMAP_LOCAL FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ - FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, + FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * NR_CPUS) - 1, #ifdef CONFIG_PCI_MMCONFIG FIX_PCIE_MCFG, #endif @@ -151,7 +157,6 @@ extern void reserve_top_address(unsigned long reserve); extern int fixmaps_set; -extern pte_t *kmap_pte; extern pte_t *pkmap_page_table; void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index dcd9503b1098..a5aba4ab0224 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -29,17 +29,32 @@ extern void fpregs_mark_activate(void); * A context switch will (and softirq might) save CPU's FPU registers to * fpu->state and set TIF_NEED_FPU_LOAD leaving CPU's FPU registers in * a random state. + * + * local_bh_disable() protects against both preemption and soft interrupts + * on !RT kernels. + * + * On RT kernels local_bh_disable() is not sufficient because it only + * serializes soft interrupt related sections via a local lock, but stays + * preemptible. Disabling preemption is the right choice here as bottom + * half processing is always in thread context on RT kernels so it + * implicitly prevents bottom half processing as well. + * + * Disabling preemption also serializes against kernel_fpu_begin(). */ static inline void fpregs_lock(void) { - preempt_disable(); - local_bh_disable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_bh_disable(); + else + preempt_disable(); } static inline void fpregs_unlock(void) { - local_bh_enable(); - preempt_enable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_bh_enable(); + else + preempt_enable(); } #ifdef CONFIG_X86_DEBUG_FPU diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h index 0f420b24e0fc..032e020853aa 100644 --- a/arch/x86/include/asm/highmem.h +++ b/arch/x86/include/asm/highmem.h @@ -23,7 +23,6 @@ #include #include -#include #include #include #include @@ -58,11 +57,17 @@ extern unsigned long highstart_pfn, highend_pfn; #define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) #define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) -void *kmap_atomic_pfn(unsigned long pfn); -void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot); - #define flush_cache_kmaps() do { } while (0) +#define arch_kmap_local_post_map(vaddr, pteval) \ + arch_flush_lazy_mmu_mode() + +#define arch_kmap_local_post_unmap(vaddr) \ + do { \ + flush_tlb_one_kernel((vaddr)); \ + arch_flush_lazy_mmu_mode(); \ + } while (0) + extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn, unsigned long end_pfn); diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index 6352dee37cda..ab9f3dd87c80 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h @@ -74,17 +74,6 @@ extern void hpet_disable(void); extern unsigned int hpet_readl(unsigned int a); extern void force_hpet_resume(void); -struct irq_data; -struct hpet_channel; -struct irq_domain; - -extern void hpet_msi_unmask(struct irq_data *data); -extern void hpet_msi_mask(struct irq_data *data); -extern void hpet_msi_write(struct hpet_channel *hc, struct msi_msg *msg); -extern struct irq_domain *hpet_create_irq_domain(int hpet_id); -extern int hpet_assign_irq(struct irq_domain *domain, - struct hpet_channel *hc, int dev_num); - #ifdef CONFIG_HPET_EMULATE_RTC #include diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index a4aeeaace040..d465ece58151 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -39,18 +39,16 @@ enum irq_alloc_type { X86_IRQ_ALLOC_TYPE_PCI_MSI, X86_IRQ_ALLOC_TYPE_PCI_MSIX, X86_IRQ_ALLOC_TYPE_DMAR, + X86_IRQ_ALLOC_TYPE_AMDVI, X86_IRQ_ALLOC_TYPE_UV, - X86_IRQ_ALLOC_TYPE_IOAPIC_GET_PARENT, - X86_IRQ_ALLOC_TYPE_HPET_GET_PARENT, }; struct ioapic_alloc_info { - int pin; - int node; - u32 trigger : 1; - u32 polarity : 1; - u32 valid : 1; - struct IO_APIC_route_entry *entry; + int pin; + int node; + u32 is_level : 1; + u32 active_low : 1; + u32 valid : 1; }; struct uv_alloc_info { diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 0ed20e8bba9e..6bf42aed387e 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -23,6 +23,13 @@ #define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005 #define HYPERV_CPUID_NESTED_FEATURES 0x4000000A +#define HYPERV_CPUID_VIRT_STACK_INTERFACE 0x40000081 +#define HYPERV_VS_INTERFACE_EAX_SIGNATURE 0x31235356 /* "VS#1" */ + +#define HYPERV_CPUID_VIRT_STACK_PROPERTIES 0x40000082 +/* Support for the extended IOAPIC RTE format */ +#define HYPERV_VS_PROPERTIES_EAX_EXTENDED_IOAPIC_RTE BIT(2) + #define HYPERV_HYPERVISOR_PRESENT_BIT 0x80000000 #define HYPERV_CPUID_MIN 0x40000005 #define HYPERV_CPUID_MAX 0x4000ffff diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index a1a26f6d3aa4..437aa8d00e53 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -13,15 +13,6 @@ * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar */ -/* I/O Unit Redirection Table */ -#define IO_APIC_REDIR_VECTOR_MASK 0x000FF -#define IO_APIC_REDIR_DEST_LOGICAL 0x00800 -#define IO_APIC_REDIR_DEST_PHYSICAL 0x00000 -#define IO_APIC_REDIR_SEND_PENDING (1 << 12) -#define IO_APIC_REDIR_REMOTE_IRR (1 << 14) -#define IO_APIC_REDIR_LEVEL_TRIGGER (1 << 15) -#define IO_APIC_REDIR_MASKED (1 << 16) - /* * The structure of the IO-APIC: */ @@ -65,52 +56,40 @@ union IO_APIC_reg_03 { }; struct IO_APIC_route_entry { - __u32 vector : 8, - delivery_mode : 3, /* 000: FIXED - * 001: lowest prio - * 111: ExtINT - */ - dest_mode : 1, /* 0: physical, 1: logical */ - delivery_status : 1, - polarity : 1, - irr : 1, - trigger : 1, /* 0: edge, 1: level */ - mask : 1, /* 0: enabled, 1: disabled */ - __reserved_2 : 15; - - __u32 __reserved_3 : 24, - dest : 8; -} __attribute__ ((packed)); - -struct IR_IO_APIC_route_entry { - __u64 vector : 8, - zero : 3, - index2 : 1, - delivery_status : 1, - polarity : 1, - irr : 1, - trigger : 1, - mask : 1, - reserved : 31, - format : 1, - index : 15; + union { + struct { + u64 vector : 8, + delivery_mode : 3, + dest_mode_logical : 1, + delivery_status : 1, + active_low : 1, + irr : 1, + is_level : 1, + masked : 1, + reserved_0 : 15, + reserved_1 : 17, + virt_destid_8_14 : 7, + destid_0_7 : 8; + }; + struct { + u64 ir_shared_0 : 8, + ir_zero : 3, + ir_index_15 : 1, + ir_shared_1 : 5, + ir_reserved_0 : 31, + ir_format : 1, + ir_index_0_14 : 15; + }; + struct { + u64 w1 : 32, + w2 : 32; + }; + }; } __attribute__ ((packed)); struct irq_alloc_info; struct ioapic_domain_cfg; -#define IOAPIC_EDGE 0 -#define IOAPIC_LEVEL 1 - -#define IOAPIC_MASKED 1 -#define IOAPIC_UNMASKED 0 - -#define IOAPIC_POL_HIGH 0 -#define IOAPIC_POL_LOW 1 - -#define IOAPIC_DEST_MODE_PHYSICAL 0 -#define IOAPIC_DEST_MODE_LOGICAL 1 - #define IOAPIC_MAP_ALLOC 0x1 #define IOAPIC_MAP_CHECK 0x2 diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h index bacf68c4d70e..e2de092fc38c 100644 --- a/arch/x86/include/asm/iomap.h +++ b/arch/x86/include/asm/iomap.h @@ -9,19 +9,14 @@ #include #include #include +#include #include #include -void __iomem * -iomap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot); +void __iomem *__iomap_local_pfn_prot(unsigned long pfn, pgprot_t prot); -void -iounmap_atomic(void __iomem *kvaddr); +int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot); -int -iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot); - -void -iomap_free(resource_size_t base, unsigned long size); +void iomap_free(resource_size_t base, unsigned long size); #endif /* _ASM_X86_IOMAP_H */ diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index af4a151d70b3..7cc49432187f 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -44,9 +44,6 @@ extern int irq_remapping_reenable(int); extern int irq_remap_enable_fault_handling(void); extern void panic_if_irq_remap(const char *msg); -extern struct irq_domain * -irq_remapping_get_irq_domain(struct irq_alloc_info *info); - /* Create PCI MSI/MSIx irqdomain, use @parent as the parent irqdomain. */ extern struct irq_domain * arch_create_remap_msi_irq_domain(struct irq_domain *par, const char *n, int id); @@ -71,11 +68,5 @@ static inline void panic_if_irq_remap(const char *msg) { } -static inline struct irq_domain * -irq_remapping_get_irq_domain(struct irq_alloc_info *info) -{ - return NULL; -} - #endif /* CONFIG_IRQ_REMAP */ #endif /* __X86_IRQ_REMAPPING_H */ diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h index cd684d45cb5f..125c23b7bad3 100644 --- a/arch/x86/include/asm/irqdomain.h +++ b/arch/x86/include/asm/irqdomain.h @@ -12,6 +12,9 @@ enum { X86_IRQ_ALLOC_LEGACY = 0x2, }; +extern int x86_fwspec_is_ioapic(struct irq_fwspec *fwspec); +extern int x86_fwspec_is_hpet(struct irq_fwspec *fwspec); + extern struct irq_domain *x86_vector_domain; extern void init_irq_alloc_info(struct irq_alloc_info *info, diff --git a/arch/x86/include/asm/kmap_types.h b/arch/x86/include/asm/kmap_types.h deleted file mode 100644 index 04ab8266e347..000000000000 --- a/arch/x86/include/asm/kmap_types.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_KMAP_TYPES_H -#define _ASM_X86_KMAP_TYPES_H - -#if defined(CONFIG_X86_32) && defined(CONFIG_DEBUG_HIGHMEM) -#define __WITH_KM_FENCE -#endif - -#include - -#undef __WITH_KM_FENCE - -#endif /* _ASM_X86_KMAP_TYPES_H */ diff --git a/arch/x86/include/asm/msi.h b/arch/x86/include/asm/msi.h index cd30013d15d3..b85147d75626 100644 --- a/arch/x86/include/asm/msi.h +++ b/arch/x86/include/asm/msi.h @@ -9,4 +9,54 @@ typedef struct irq_alloc_info msi_alloc_info_t; int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec, msi_alloc_info_t *arg); +/* Structs and defines for the X86 specific MSI message format */ + +typedef struct x86_msi_data { + u32 vector : 8, + delivery_mode : 3, + dest_mode_logical : 1, + reserved : 2, + active_low : 1, + is_level : 1; + + u32 dmar_subhandle; +} __attribute__ ((packed)) arch_msi_msg_data_t; +#define arch_msi_msg_data x86_msi_data + +typedef struct x86_msi_addr_lo { + union { + struct { + u32 reserved_0 : 2, + dest_mode_logical : 1, + redirect_hint : 1, + reserved_1 : 1, + virt_destid_8_14 : 7, + destid_0_7 : 8, + base_address : 12; + }; + struct { + u32 dmar_reserved_0 : 2, + dmar_index_15 : 1, + dmar_subhandle_valid : 1, + dmar_format : 1, + dmar_index_0_14 : 15, + dmar_base_address : 12; + }; + }; +} __attribute__ ((packed)) arch_msi_msg_addr_lo_t; +#define arch_msi_msg_addr_lo x86_msi_addr_lo + +#define X86_MSI_BASE_ADDRESS_LOW (0xfee00000 >> 20) + +typedef struct x86_msi_addr_hi { + u32 reserved : 8, + destid_8_31 : 24; +} __attribute__ ((packed)) arch_msi_msg_addr_hi_t; +#define arch_msi_msg_addr_hi x86_msi_addr_hi + +#define X86_MSI_BASE_ADDRESS_HIGH (0) + +struct msi_msg; +u32 x86_msi_msg_get_destid(struct msi_msg *msg, bool extid); + #endif /* _ASM_X86_MSI_H */ diff --git a/arch/x86/include/asm/msidef.h b/arch/x86/include/asm/msidef.h deleted file mode 100644 index ee2f8ccc32d0..000000000000 --- a/arch/x86/include/asm/msidef.h +++ /dev/null @@ -1,57 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_MSIDEF_H -#define _ASM_X86_MSIDEF_H - -/* - * Constants for Intel APIC based MSI messages. - */ - -/* - * Shifts for MSI data - */ - -#define MSI_DATA_VECTOR_SHIFT 0 -#define MSI_DATA_VECTOR_MASK 0x000000ff -#define MSI_DATA_VECTOR(v) (((v) << MSI_DATA_VECTOR_SHIFT) & \ - MSI_DATA_VECTOR_MASK) - -#define MSI_DATA_DELIVERY_MODE_SHIFT 8 -#define MSI_DATA_DELIVERY_FIXED (0 << MSI_DATA_DELIVERY_MODE_SHIFT) -#define MSI_DATA_DELIVERY_LOWPRI (1 << MSI_DATA_DELIVERY_MODE_SHIFT) - -#define MSI_DATA_LEVEL_SHIFT 14 -#define MSI_DATA_LEVEL_DEASSERT (0 << MSI_DATA_LEVEL_SHIFT) -#define MSI_DATA_LEVEL_ASSERT (1 << MSI_DATA_LEVEL_SHIFT) - -#define MSI_DATA_TRIGGER_SHIFT 15 -#define MSI_DATA_TRIGGER_EDGE (0 << MSI_DATA_TRIGGER_SHIFT) -#define MSI_DATA_TRIGGER_LEVEL (1 << MSI_DATA_TRIGGER_SHIFT) - -/* - * Shift/mask fields for msi address - */ - -#define MSI_ADDR_BASE_HI 0 -#define MSI_ADDR_BASE_LO 0xfee00000 - -#define MSI_ADDR_DEST_MODE_SHIFT 2 -#define MSI_ADDR_DEST_MODE_PHYSICAL (0 << MSI_ADDR_DEST_MODE_SHIFT) -#define MSI_ADDR_DEST_MODE_LOGICAL (1 << MSI_ADDR_DEST_MODE_SHIFT) - -#define MSI_ADDR_REDIRECTION_SHIFT 3 -#define MSI_ADDR_REDIRECTION_CPU (0 << MSI_ADDR_REDIRECTION_SHIFT) - /* dedicated cpu */ -#define MSI_ADDR_REDIRECTION_LOWPRI (1 << MSI_ADDR_REDIRECTION_SHIFT) - /* lowest priority */ - -#define MSI_ADDR_DEST_ID_SHIFT 12 -#define MSI_ADDR_DEST_ID_MASK 0x00ffff0 -#define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \ - MSI_ADDR_DEST_ID_MASK) -#define MSI_ADDR_EXT_DEST_ID(dest) ((dest) & 0xffffff00) - -#define MSI_ADDR_IR_EXT_INT (1 << 4) -#define MSI_ADDR_IR_SHV (1 << 3) -#define MSI_ADDR_IR_INDEX1(index) ((index & 0x8000) >> 13) -#define MSI_ADDR_IR_INDEX2(index) ((index & 0x7fff) << 5) -#endif /* _ASM_X86_MSIDEF_H */ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 0fad9f61c76a..b6b02b7c19cc 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -41,7 +41,6 @@ #ifndef __ASSEMBLY__ #include -#include #include #include diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 52e5f5f2240d..91ac10654570 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -143,7 +143,11 @@ extern unsigned int ptrs_per_p4d; #define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) /* The module sections ends with the start of the fixmap */ -#define MODULES_END _AC(0xffffffffff000000, UL) +#ifndef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP +# define MODULES_END _AC(0xffffffffff000000, UL) +#else +# define MODULES_END _AC(0xfffffffffe000000, UL) +#endif #define MODULES_LEN (MODULES_END - MODULES_VADDR) #define ESPFIX_PGD_ENTRY _AC(-2, UL) diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index dde5b3f1e7cd..5c69f7eb5d47 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -116,6 +116,7 @@ struct x86_init_pci { * @init_platform: platform setup * @guest_late_init: guest late init * @x2apic_available: X2APIC detection + * @msi_ext_dest_id: MSI supports 15-bit APIC IDs * @init_mem_mapping: setup early mappings during init_mem_mapping() * @init_after_bootmem: guest init after boot allocator is finished */ @@ -123,6 +124,7 @@ struct x86_hyper_init { void (*init_platform)(void); void (*guest_late_init)(void); bool (*x2apic_available)(void); + bool (*msi_ext_dest_id)(void); void (*init_mem_mapping)(void); void (*init_after_bootmem)(void); }; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index b3eef1d5c903..6bd20c0de8bc 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -93,6 +93,11 @@ static unsigned int disabled_cpu_apicid __ro_after_init = BAD_APICID; */ static int apic_extnmi __ro_after_init = APIC_EXTNMI_BSP; +/* + * Hypervisor supports 15 bits of APIC ID in MSI Extended Destination ID + */ +static bool virt_ext_dest_id __ro_after_init; + /* * Map cpu index to physical APIC ID */ @@ -1591,7 +1596,7 @@ static void setup_local_APIC(void) apic->init_apic_ldr(); #ifdef CONFIG_X86_32 - if (apic->dest_logical) { + if (apic->dest_mode_logical) { int logical_apicid, ldr_apicid; /* @@ -1841,20 +1846,34 @@ static __init void try_to_enable_x2apic(int remap_mode) return; if (remap_mode != IRQ_REMAP_X2APIC_MODE) { - /* IR is required if there is APIC ID > 255 even when running - * under KVM + u32 apic_limit = 255; + + /* + * Using X2APIC without IR is not architecturally supported + * on bare metal but may be supported in guests. */ - if (max_physical_apicid > 255 || - !x86_init.hyper.x2apic_available()) { + if (!x86_init.hyper.x2apic_available()) { pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n"); x2apic_disable(); return; } /* - * without IR all CPUs can be addressed by IOAPIC/MSI - * only in physical mode + * If the hypervisor supports extended destination ID in + * MSI, that increases the maximum APIC ID that can be + * used for non-remapped IRQ domains. */ + if (x86_init.hyper.msi_ext_dest_id()) { + virt_ext_dest_id = 1; + apic_limit = 32767; + } + + /* + * Without IR, all CPUs can be addressed by IOAPIC/MSI only + * in physical mode, and CPUs with an APIC ID that cannnot + * be addressed must not be brought online. + */ + x2apic_set_max_apicid(apic_limit); x2apic_phys = 1; } x2apic_enable(); @@ -2478,6 +2497,46 @@ int hard_smp_processor_id(void) return read_apic_id(); } +void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg, + bool dmar) +{ + memset(msg, 0, sizeof(*msg)); + + msg->arch_addr_lo.base_address = X86_MSI_BASE_ADDRESS_LOW; + msg->arch_addr_lo.dest_mode_logical = apic->dest_mode_logical; + msg->arch_addr_lo.destid_0_7 = cfg->dest_apicid & 0xFF; + + msg->arch_data.delivery_mode = APIC_DELIVERY_MODE_FIXED; + msg->arch_data.vector = cfg->vector; + + msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH; + /* + * Only the IOMMU itself can use the trick of putting destination + * APIC ID into the high bits of the address. Anything else would + * just be writing to memory if it tried that, and needs IR to + * address APICs which can't be addressed in the normal 32-bit + * address range at 0xFFExxxxx. That is typically just 8 bits, but + * some hypervisors allow the extended destination ID field in bits + * 5-11 to be used, giving support for 15 bits of APIC IDs in total. + */ + if (dmar) + msg->arch_addr_hi.destid_8_31 = cfg->dest_apicid >> 8; + else if (virt_ext_dest_id && cfg->dest_apicid < 0x8000) + msg->arch_addr_lo.virt_destid_8_14 = cfg->dest_apicid >> 8; + else + WARN_ON_ONCE(cfg->dest_apicid > 0xFF); +} + +u32 x86_msi_msg_get_destid(struct msi_msg *msg, bool extid) +{ + u32 dest = msg->arch_addr_lo.destid_0_7; + + if (extid) + dest |= msg->arch_addr_hi.destid_8_31 << 8; + return dest; +} +EXPORT_SYMBOL_GPL(x86_msi_msg_get_destid); + /* * Override the generic EOI implementation with an optimized version. * Only called during early boot when only one CPU is active and with diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 7862b152a052..8f72b4351c9f 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -53,7 +53,7 @@ static void _flat_send_IPI_mask(unsigned long mask, int vector) unsigned long flags; local_irq_save(flags); - __default_send_IPI_dest_field(mask, vector, apic->dest_logical); + __default_send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL); local_irq_restore(flags); } @@ -113,15 +113,13 @@ static struct apic apic_flat __ro_after_init = { .apic_id_valid = default_apic_id_valid, .apic_id_registered = flat_apic_id_registered, - .irq_delivery_mode = dest_Fixed, - .irq_dest_mode = 1, /* logical */ + .delivery_mode = APIC_DELIVERY_MODE_FIXED, + .dest_mode_logical = true, .disable_esr = 0, - .dest_logical = APIC_DEST_LOGICAL, + .check_apicid_used = NULL, - .init_apic_ldr = flat_init_apic_ldr, - .ioapic_phys_id_map = NULL, .setup_apic_routing = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, @@ -206,15 +204,13 @@ static struct apic apic_physflat __ro_after_init = { .apic_id_valid = default_apic_id_valid, .apic_id_registered = flat_apic_id_registered, - .irq_delivery_mode = dest_Fixed, - .irq_dest_mode = 0, /* physical */ + .delivery_mode = APIC_DELIVERY_MODE_FIXED, + .dest_mode_logical = false, .disable_esr = 0, - .dest_logical = 0, + .check_apicid_used = NULL, - .init_apic_ldr = physflat_init_apic_ldr, - .ioapic_phys_id_map = NULL, .setup_apic_routing = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 780c702969b7..fe78319e0f7a 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -95,19 +95,15 @@ struct apic apic_noop __ro_after_init = { .apic_id_valid = default_apic_id_valid, .apic_id_registered = noop_apic_id_registered, - .irq_delivery_mode = dest_Fixed, - /* logical delivery broadcast to all CPUs: */ - .irq_dest_mode = 1, + .delivery_mode = APIC_DELIVERY_MODE_FIXED, + .dest_mode_logical = true, .disable_esr = 0, - .dest_logical = APIC_DEST_LOGICAL, + .check_apicid_used = default_check_apicid_used, - .init_apic_ldr = noop_init_apic_ldr, - .ioapic_phys_id_map = default_ioapic_phys_id_map, .setup_apic_routing = NULL, - .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = physid_set_mask_of_physid, diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 35edd57f064a..a54d817eb4b6 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -246,15 +246,13 @@ static const struct apic apic_numachip1 __refconst = { .apic_id_valid = numachip_apic_id_valid, .apic_id_registered = numachip_apic_id_registered, - .irq_delivery_mode = dest_Fixed, - .irq_dest_mode = 0, /* physical */ + .delivery_mode = APIC_DELIVERY_MODE_FIXED, + .dest_mode_logical = false, .disable_esr = 0, - .dest_logical = 0, + .check_apicid_used = NULL, - .init_apic_ldr = flat_init_apic_ldr, - .ioapic_phys_id_map = NULL, .setup_apic_routing = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, @@ -295,15 +293,13 @@ static const struct apic apic_numachip2 __refconst = { .apic_id_valid = numachip_apic_id_valid, .apic_id_registered = numachip_apic_id_registered, - .irq_delivery_mode = dest_Fixed, - .irq_dest_mode = 0, /* physical */ + .delivery_mode = APIC_DELIVERY_MODE_FIXED, + .dest_mode_logical = false, .disable_esr = 0, - .dest_logical = 0, + .check_apicid_used = NULL, - .init_apic_ldr = flat_init_apic_ldr, - .ioapic_phys_id_map = NULL, .setup_apic_routing = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 98d015a4405a..77555f66c14d 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -127,16 +127,13 @@ static struct apic apic_bigsmp __ro_after_init = { .apic_id_valid = default_apic_id_valid, .apic_id_registered = bigsmp_apic_id_registered, - .irq_delivery_mode = dest_Fixed, - /* phys delivery to target CPU: */ - .irq_dest_mode = 0, + .delivery_mode = APIC_DELIVERY_MODE_FIXED, + .dest_mode_logical = false, .disable_esr = 1, - .dest_logical = 0, + .check_apicid_used = bigsmp_check_apicid_used, - .init_apic_ldr = bigsmp_init_apic_ldr, - .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, .setup_apic_routing = bigsmp_setup_apic_routing, .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 7b3c7e0d4a09..e4ab4804b20d 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -48,6 +48,7 @@ #include /* time_after() */ #include #include +#include #include #include @@ -63,7 +64,6 @@ #include #include #include - #include #define for_each_ioapic(idx) \ @@ -89,12 +89,12 @@ struct irq_pin_list { }; struct mp_chip_data { - struct list_head irq_2_pin; - struct IO_APIC_route_entry entry; - int trigger; - int polarity; + struct list_head irq_2_pin; + struct IO_APIC_route_entry entry; + bool is_level; + bool active_low; + bool isa_irq; u32 count; - bool isa_irq; }; struct mp_ioapic_gsi { @@ -286,31 +286,26 @@ static void io_apic_write(unsigned int apic, unsigned int reg, writel(value, &io_apic->data); } -union entry_union { - struct { u32 w1, w2; }; - struct IO_APIC_route_entry entry; -}; - static struct IO_APIC_route_entry __ioapic_read_entry(int apic, int pin) { - union entry_union eu; + struct IO_APIC_route_entry entry; - eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); - eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); + entry.w1 = io_apic_read(apic, 0x10 + 2 * pin); + entry.w2 = io_apic_read(apic, 0x11 + 2 * pin); - return eu.entry; + return entry; } static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) { - union entry_union eu; + struct IO_APIC_route_entry entry; unsigned long flags; raw_spin_lock_irqsave(&ioapic_lock, flags); - eu.entry = __ioapic_read_entry(apic, pin); + entry = __ioapic_read_entry(apic, pin); raw_spin_unlock_irqrestore(&ioapic_lock, flags); - return eu.entry; + return entry; } /* @@ -321,11 +316,8 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) */ static void __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) { - union entry_union eu = {{0, 0}}; - - eu.entry = e; - io_apic_write(apic, 0x11 + 2*pin, eu.w2); - io_apic_write(apic, 0x10 + 2*pin, eu.w1); + io_apic_write(apic, 0x11 + 2*pin, e.w2); + io_apic_write(apic, 0x10 + 2*pin, e.w1); } static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) @@ -344,12 +336,12 @@ static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) */ static void ioapic_mask_entry(int apic, int pin) { + struct IO_APIC_route_entry e = { .masked = true }; unsigned long flags; - union entry_union eu = { .entry.mask = IOAPIC_MASKED }; raw_spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0x10 + 2*pin, eu.w1); - io_apic_write(apic, 0x11 + 2*pin, eu.w2); + io_apic_write(apic, 0x10 + 2*pin, e.w1); + io_apic_write(apic, 0x11 + 2*pin, e.w2); raw_spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -422,20 +414,15 @@ static void __init replace_pin_at_irq_node(struct mp_chip_data *data, int node, add_pin_to_irq_node(data, node, newapic, newpin); } -static void io_apic_modify_irq(struct mp_chip_data *data, - int mask_and, int mask_or, +static void io_apic_modify_irq(struct mp_chip_data *data, bool masked, void (*final)(struct irq_pin_list *entry)) { - union entry_union eu; struct irq_pin_list *entry; - eu.entry = data->entry; - eu.w1 &= mask_and; - eu.w1 |= mask_or; - data->entry = eu.entry; + data->entry.masked = masked; for_each_irq_pin(entry, data->irq_2_pin) { - io_apic_write(entry->apic, 0x10 + 2 * entry->pin, eu.w1); + io_apic_write(entry->apic, 0x10 + 2 * entry->pin, data->entry.w1); if (final) final(entry); } @@ -459,13 +446,13 @@ static void mask_ioapic_irq(struct irq_data *irq_data) unsigned long flags; raw_spin_lock_irqsave(&ioapic_lock, flags); - io_apic_modify_irq(data, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); + io_apic_modify_irq(data, true, &io_apic_sync); raw_spin_unlock_irqrestore(&ioapic_lock, flags); } static void __unmask_ioapic(struct mp_chip_data *data) { - io_apic_modify_irq(data, ~IO_APIC_REDIR_MASKED, 0, NULL); + io_apic_modify_irq(data, false, NULL); } static void unmask_ioapic_irq(struct irq_data *irq_data) @@ -506,8 +493,8 @@ static void __eoi_ioapic_pin(int apic, int pin, int vector) /* * Mask the entry and change the trigger mode to edge. */ - entry1.mask = IOAPIC_MASKED; - entry1.trigger = IOAPIC_EDGE; + entry1.masked = true; + entry1.is_level = false; __ioapic_write_entry(apic, pin, entry1); @@ -535,15 +522,15 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) /* Check delivery_mode to be sure we're not clearing an SMI pin */ entry = ioapic_read_entry(apic, pin); - if (entry.delivery_mode == dest_SMI) + if (entry.delivery_mode == APIC_DELIVERY_MODE_SMI) return; /* * Make sure the entry is masked and re-read the contents to check * if it is a level triggered pin and if the remote-IRR is set. */ - if (entry.mask == IOAPIC_UNMASKED) { - entry.mask = IOAPIC_MASKED; + if (!entry.masked) { + entry.masked = true; ioapic_write_entry(apic, pin, entry); entry = ioapic_read_entry(apic, pin); } @@ -556,8 +543,8 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) * doesn't clear the remote-IRR if the trigger mode is not * set to level. */ - if (entry.trigger == IOAPIC_EDGE) { - entry.trigger = IOAPIC_LEVEL; + if (!entry.is_level) { + entry.is_level = true; ioapic_write_entry(apic, pin, entry); } raw_spin_lock_irqsave(&ioapic_lock, flags); @@ -659,8 +646,8 @@ void mask_ioapic_entries(void) struct IO_APIC_route_entry entry; entry = ioapics[apic].saved_registers[pin]; - if (entry.mask == IOAPIC_UNMASKED) { - entry.mask = IOAPIC_MASKED; + if (!entry.masked) { + entry.masked = true; ioapic_write_entry(apic, pin, entry); } } @@ -745,44 +732,7 @@ static int __init find_isa_irq_apic(int irq, int type) return -1; } -#ifdef CONFIG_EISA -/* - * EISA Edge/Level control register, ELCR - */ -static int EISA_ELCR(unsigned int irq) -{ - if (irq < nr_legacy_irqs()) { - unsigned int port = 0x4d0 + (irq >> 3); - return (inb(port) >> (irq & 7)) & 1; - } - apic_printk(APIC_VERBOSE, KERN_INFO - "Broken MPtable reports ISA irq %d\n", irq); - return 0; -} - -#endif - -/* ISA interrupts are always active high edge triggered, - * when listed as conforming in the MP table. */ - -#define default_ISA_trigger(idx) (IOAPIC_EDGE) -#define default_ISA_polarity(idx) (IOAPIC_POL_HIGH) - -/* EISA interrupts are always polarity zero and can be edge or level - * trigger depending on the ELCR value. If an interrupt is listed as - * EISA conforming in the MP table, that means its trigger type must - * be read in from the ELCR */ - -#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].srcbusirq)) -#define default_EISA_polarity(idx) default_ISA_polarity(idx) - -/* PCI interrupts are always active low level triggered, - * when listed as conforming in the MP table. */ - -#define default_PCI_trigger(idx) (IOAPIC_LEVEL) -#define default_PCI_polarity(idx) (IOAPIC_POL_LOW) - -static int irq_polarity(int idx) +static bool irq_active_low(int idx) { int bus = mp_irqs[idx].srcbus; @@ -791,90 +741,139 @@ static int irq_polarity(int idx) */ switch (mp_irqs[idx].irqflag & MP_IRQPOL_MASK) { case MP_IRQPOL_DEFAULT: - /* conforms to spec, ie. bus-type dependent polarity */ - if (test_bit(bus, mp_bus_not_pci)) - return default_ISA_polarity(idx); - else - return default_PCI_polarity(idx); + /* + * Conforms to spec, ie. bus-type dependent polarity. PCI + * defaults to low active. [E]ISA defaults to high active. + */ + return !test_bit(bus, mp_bus_not_pci); case MP_IRQPOL_ACTIVE_HIGH: - return IOAPIC_POL_HIGH; + return false; case MP_IRQPOL_RESERVED: pr_warn("IOAPIC: Invalid polarity: 2, defaulting to low\n"); fallthrough; case MP_IRQPOL_ACTIVE_LOW: default: /* Pointless default required due to do gcc stupidity */ - return IOAPIC_POL_LOW; + return true; } } #ifdef CONFIG_EISA -static int eisa_irq_trigger(int idx, int bus, int trigger) +/* + * EISA Edge/Level control register, ELCR + */ +static bool EISA_ELCR(unsigned int irq) +{ + if (irq < nr_legacy_irqs()) { + unsigned int port = 0x4d0 + (irq >> 3); + return (inb(port) >> (irq & 7)) & 1; + } + apic_printk(APIC_VERBOSE, KERN_INFO + "Broken MPtable reports ISA irq %d\n", irq); + return false; +} + +/* + * EISA interrupts are always active high and can be edge or level + * triggered depending on the ELCR value. If an interrupt is listed as + * EISA conforming in the MP table, that means its trigger type must be + * read in from the ELCR. + */ +static bool eisa_irq_is_level(int idx, int bus, bool level) { switch (mp_bus_id_to_type[bus]) { case MP_BUS_PCI: case MP_BUS_ISA: - return trigger; + return level; case MP_BUS_EISA: - return default_EISA_trigger(idx); + return EISA_ELCR(mp_irqs[idx].srcbusirq); } pr_warn("IOAPIC: Invalid srcbus: %d defaulting to level\n", bus); - return IOAPIC_LEVEL; + return true; } #else -static inline int eisa_irq_trigger(int idx, int bus, int trigger) +static inline int eisa_irq_is_level(int idx, int bus, bool level) { - return trigger; + return level; } #endif -static int irq_trigger(int idx) +static bool irq_is_level(int idx) { int bus = mp_irqs[idx].srcbus; - int trigger; + bool level; /* * Determine IRQ trigger mode (edge or level sensitive): */ switch (mp_irqs[idx].irqflag & MP_IRQTRIG_MASK) { case MP_IRQTRIG_DEFAULT: - /* conforms to spec, ie. bus-type dependent trigger mode */ - if (test_bit(bus, mp_bus_not_pci)) - trigger = default_ISA_trigger(idx); - else - trigger = default_PCI_trigger(idx); + /* + * Conforms to spec, ie. bus-type dependent trigger + * mode. PCI defaults to level, ISA to edge. + */ + level = !test_bit(bus, mp_bus_not_pci); /* Take EISA into account */ - return eisa_irq_trigger(idx, bus, trigger); + return eisa_irq_is_level(idx, bus, level); case MP_IRQTRIG_EDGE: - return IOAPIC_EDGE; + return false; case MP_IRQTRIG_RESERVED: pr_warn("IOAPIC: Invalid trigger mode 2 defaulting to level\n"); fallthrough; case MP_IRQTRIG_LEVEL: default: /* Pointless default required due to do gcc stupidity */ - return IOAPIC_LEVEL; + return true; } } +static int __acpi_get_override_irq(u32 gsi, bool *trigger, bool *polarity) +{ + int ioapic, pin, idx; + + if (skip_ioapic_setup) + return -1; + + ioapic = mp_find_ioapic(gsi); + if (ioapic < 0) + return -1; + + pin = mp_find_ioapic_pin(ioapic, gsi); + if (pin < 0) + return -1; + + idx = find_irq_entry(ioapic, pin, mp_INT); + if (idx < 0) + return -1; + + *trigger = irq_is_level(idx); + *polarity = irq_active_low(idx); + return 0; +} + +#ifdef CONFIG_ACPI +int acpi_get_override_irq(u32 gsi, int *is_level, int *active_low) +{ + *is_level = *active_low = 0; + return __acpi_get_override_irq(gsi, (bool *)is_level, + (bool *)active_low); +} +#endif + void ioapic_set_alloc_attr(struct irq_alloc_info *info, int node, int trigger, int polarity) { init_irq_alloc_info(info, NULL); info->type = X86_IRQ_ALLOC_TYPE_IOAPIC; info->ioapic.node = node; - info->ioapic.trigger = trigger; - info->ioapic.polarity = polarity; + info->ioapic.is_level = trigger; + info->ioapic.active_low = polarity; info->ioapic.valid = 1; } -#ifndef CONFIG_ACPI -int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity); -#endif - static void ioapic_copy_alloc_attr(struct irq_alloc_info *dst, struct irq_alloc_info *src, u32 gsi, int ioapic_idx, int pin) { - int trigger, polarity; + bool level, pol_low; copy_irq_alloc_info(dst, src); dst->type = X86_IRQ_ALLOC_TYPE_IOAPIC; @@ -883,20 +882,20 @@ static void ioapic_copy_alloc_attr(struct irq_alloc_info *dst, dst->ioapic.valid = 1; if (src && src->ioapic.valid) { dst->ioapic.node = src->ioapic.node; - dst->ioapic.trigger = src->ioapic.trigger; - dst->ioapic.polarity = src->ioapic.polarity; + dst->ioapic.is_level = src->ioapic.is_level; + dst->ioapic.active_low = src->ioapic.active_low; } else { dst->ioapic.node = NUMA_NO_NODE; - if (acpi_get_override_irq(gsi, &trigger, &polarity) >= 0) { - dst->ioapic.trigger = trigger; - dst->ioapic.polarity = polarity; + if (__acpi_get_override_irq(gsi, &level, &pol_low) >= 0) { + dst->ioapic.is_level = level; + dst->ioapic.active_low = pol_low; } else { /* * PCI interrupts are always active low level * triggered. */ - dst->ioapic.trigger = IOAPIC_LEVEL; - dst->ioapic.polarity = IOAPIC_POL_LOW; + dst->ioapic.is_level = true; + dst->ioapic.active_low = true; } } } @@ -906,12 +905,12 @@ static int ioapic_alloc_attr_node(struct irq_alloc_info *info) return (info && info->ioapic.valid) ? info->ioapic.node : NUMA_NO_NODE; } -static void mp_register_handler(unsigned int irq, unsigned long trigger) +static void mp_register_handler(unsigned int irq, bool level) { irq_flow_handler_t hdl; bool fasteoi; - if (trigger) { + if (level) { irq_set_status_flags(irq, IRQ_LEVEL); fasteoi = true; } else { @@ -933,14 +932,14 @@ static bool mp_check_pin_attr(int irq, struct irq_alloc_info *info) * pin with real trigger and polarity attributes. */ if (irq < nr_legacy_irqs() && data->count == 1) { - if (info->ioapic.trigger != data->trigger) - mp_register_handler(irq, info->ioapic.trigger); - data->entry.trigger = data->trigger = info->ioapic.trigger; - data->entry.polarity = data->polarity = info->ioapic.polarity; + if (info->ioapic.is_level != data->is_level) + mp_register_handler(irq, info->ioapic.is_level); + data->entry.is_level = data->is_level = info->ioapic.is_level; + data->entry.active_low = data->active_low = info->ioapic.active_low; } - return data->trigger == info->ioapic.trigger && - data->polarity == info->ioapic.polarity; + return data->is_level == info->ioapic.is_level && + data->active_low == info->ioapic.active_low; } static int alloc_irq_from_domain(struct irq_domain *domain, int ioapic, u32 gsi, @@ -1219,10 +1218,9 @@ void ioapic_zap_locks(void) static void io_apic_print_entries(unsigned int apic, unsigned int nr_entries) { - int i; - char buf[256]; struct IO_APIC_route_entry entry; - struct IR_IO_APIC_route_entry *ir_entry = (void *)&entry; + char buf[256]; + int i; printk(KERN_DEBUG "IOAPIC %d:\n", apic); for (i = 0; i <= nr_entries; i++) { @@ -1230,20 +1228,21 @@ static void io_apic_print_entries(unsigned int apic, unsigned int nr_entries) snprintf(buf, sizeof(buf), " pin%02x, %s, %s, %s, V(%02X), IRR(%1d), S(%1d)", i, - entry.mask == IOAPIC_MASKED ? "disabled" : "enabled ", - entry.trigger == IOAPIC_LEVEL ? "level" : "edge ", - entry.polarity == IOAPIC_POL_LOW ? "low " : "high", + entry.masked ? "disabled" : "enabled ", + entry.is_level ? "level" : "edge ", + entry.active_low ? "low " : "high", entry.vector, entry.irr, entry.delivery_status); - if (ir_entry->format) + if (entry.ir_format) { printk(KERN_DEBUG "%s, remapped, I(%04X), Z(%X)\n", - buf, (ir_entry->index2 << 15) | ir_entry->index, - ir_entry->zero); - else - printk(KERN_DEBUG "%s, %s, D(%02X), M(%1d)\n", buf, - entry.dest_mode == IOAPIC_DEST_MODE_LOGICAL ? - "logical " : "physical", - entry.dest, entry.delivery_mode); + (entry.ir_index_15 << 15) | entry.ir_index_0_14, + entry.ir_zero); + } else { + printk(KERN_DEBUG "%s, %s, D(%02X%02X), M(%1d)\n", buf, + entry.dest_mode_logical ? "logical " : "physical", + entry.virt_destid_8_14, entry.destid_0_7, + entry.delivery_mode); + } } } @@ -1368,7 +1367,8 @@ void __init enable_IO_APIC(void) /* If the interrupt line is enabled and in ExtInt mode * I have found the pin where the i8259 is connected. */ - if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) { + if (!entry.masked && + entry.delivery_mode == APIC_DELIVERY_MODE_EXTINT) { ioapic_i8259.apic = apic; ioapic_i8259.pin = pin; goto found_i8259; @@ -1410,14 +1410,16 @@ void native_restore_boot_irq_mode(void) */ if (ioapic_i8259.pin != -1) { struct IO_APIC_route_entry entry; + u32 apic_id = read_apic_id(); memset(&entry, 0, sizeof(entry)); - entry.mask = IOAPIC_UNMASKED; - entry.trigger = IOAPIC_EDGE; - entry.polarity = IOAPIC_POL_HIGH; - entry.dest_mode = IOAPIC_DEST_MODE_PHYSICAL; - entry.delivery_mode = dest_ExtINT; - entry.dest = read_apic_id(); + entry.masked = false; + entry.is_level = false; + entry.active_low = false; + entry.dest_mode_logical = false; + entry.delivery_mode = APIC_DELIVERY_MODE_EXTINT; + entry.destid_0_7 = apic_id & 0xFF; + entry.virt_destid_8_14 = apic_id >> 8; /* * Add it to the IO-APIC irq-routing table: @@ -1618,21 +1620,16 @@ static void __init delay_without_tsc(void) static int __init timer_irq_works(void) { unsigned long t1 = jiffies; - unsigned long flags; if (no_timer_check) return 1; - local_save_flags(flags); local_irq_enable(); - if (boot_cpu_has(X86_FEATURE_TSC)) delay_with_tsc(); else delay_without_tsc(); - local_irq_restore(flags); - /* * Expect a few ticks at least, to be sure some possible * glue logic does not lock up after one or two first @@ -1641,10 +1638,10 @@ static int __init timer_irq_works(void) * least one tick may be lost due to delays. */ - /* jiffies wrap? */ - if (time_after(jiffies, t1 + 4)) - return 1; - return 0; + local_irq_disable(); + + /* Did jiffies advance? */ + return time_after(jiffies, t1 + 4); } /* @@ -1696,13 +1693,13 @@ static bool io_apic_level_ack_pending(struct mp_chip_data *data) raw_spin_lock_irqsave(&ioapic_lock, flags); for_each_irq_pin(entry, data->irq_2_pin) { - unsigned int reg; + struct IO_APIC_route_entry e; int pin; pin = entry->pin; - reg = io_apic_read(entry->apic, 0x10 + pin*2); + e.w1 = io_apic_read(entry->apic, 0x10 + pin*2); /* Is the remote IRR bit set? */ - if (reg & IO_APIC_REDIR_REMOTE_IRR) { + if (e.irr) { raw_spin_unlock_irqrestore(&ioapic_lock, flags); return true; } @@ -1849,21 +1846,62 @@ static void ioapic_ir_ack_level(struct irq_data *irq_data) eoi_ioapic_pin(data->entry.vector, data); } +/* + * The I/OAPIC is just a device for generating MSI messages from legacy + * interrupt pins. Various fields of the RTE translate into bits of the + * resulting MSI which had a historical meaning. + * + * With interrupt remapping, many of those bits have different meanings + * in the underlying MSI, but the way that the I/OAPIC transforms them + * from its RTE to the MSI message is the same. This function allows + * the parent IRQ domain to compose the MSI message, then takes the + * relevant bits to put them in the appropriate places in the RTE in + * order to generate that message when the IRQ happens. + * + * The setup here relies on a preconfigured route entry (is_level, + * active_low, masked) because the parent domain is merely composing the + * generic message routing information which is used for the MSI. + */ +static void ioapic_setup_msg_from_msi(struct irq_data *irq_data, + struct IO_APIC_route_entry *entry) +{ + struct msi_msg msg; + + /* Let the parent domain compose the MSI message */ + irq_chip_compose_msi_msg(irq_data, &msg); + + /* + * - Real vector + * - DMAR/IR: 8bit subhandle (ioapic.pin) + * - AMD/IR: 8bit IRTE index + */ + entry->vector = msg.arch_data.vector; + /* Delivery mode (for DMAR/IR all 0) */ + entry->delivery_mode = msg.arch_data.delivery_mode; + /* Destination mode or DMAR/IR index bit 15 */ + entry->dest_mode_logical = msg.arch_addr_lo.dest_mode_logical; + /* DMAR/IR: 1, 0 for all other modes */ + entry->ir_format = msg.arch_addr_lo.dmar_format; + /* + * - DMAR/IR: index bit 0-14. + * + * - Virt: If the host supports x2apic without a virtualized IR + * unit then bit 0-6 of dmar_index_0_14 are providing bit + * 8-14 of the destination id. + * + * All other modes have bit 0-6 of dmar_index_0_14 cleared and the + * topmost 8 bits are destination id bit 0-7 (entry::destid_0_7). + */ + entry->ir_index_0_14 = msg.arch_addr_lo.dmar_index_0_14; +} + static void ioapic_configure_entry(struct irq_data *irqd) { struct mp_chip_data *mpd = irqd->chip_data; - struct irq_cfg *cfg = irqd_cfg(irqd); struct irq_pin_list *entry; - /* - * Only update when the parent is the vector domain, don't touch it - * if the parent is the remapping domain. Check the installed - * ioapic chip to verify that. - */ - if (irqd->chip == &ioapic_chip) { - mpd->entry.dest = cfg->dest_apicid; - mpd->entry.vector = cfg->vector; - } + ioapic_setup_msg_from_msi(irqd, &mpd->entry); + for_each_irq_pin(entry, mpd->irq_2_pin) __ioapic_write_entry(entry->apic, entry->pin, mpd->entry); } @@ -1919,7 +1957,7 @@ static int ioapic_irq_get_chip_state(struct irq_data *irqd, * irrelevant because the IO-APIC treats them as fire and * forget. */ - if (rentry.irr && rentry.trigger) { + if (rentry.irr && rentry.is_level) { *state = true; break; } @@ -2027,6 +2065,7 @@ static inline void __init unlock_ExtINT_logic(void) int apic, pin, i; struct IO_APIC_route_entry entry0, entry1; unsigned char save_control, save_freq_select; + u32 apic_id; pin = find_isa_irq_pin(8, mp_INT); if (pin == -1) { @@ -2042,14 +2081,16 @@ static inline void __init unlock_ExtINT_logic(void) entry0 = ioapic_read_entry(apic, pin); clear_IO_APIC_pin(apic, pin); + apic_id = hard_smp_processor_id(); memset(&entry1, 0, sizeof(entry1)); - entry1.dest_mode = IOAPIC_DEST_MODE_PHYSICAL; - entry1.mask = IOAPIC_UNMASKED; - entry1.dest = hard_smp_processor_id(); - entry1.delivery_mode = dest_ExtINT; - entry1.polarity = entry0.polarity; - entry1.trigger = IOAPIC_EDGE; + entry1.dest_mode_logical = true; + entry1.masked = false; + entry1.destid_0_7 = apic_id & 0xFF; + entry1.virt_destid_8_14 = apic_id >> 8; + entry1.delivery_mode = APIC_DELIVERY_MODE_EXTINT; + entry1.active_low = entry0.active_low; + entry1.is_level = false; entry1.vector = 0; ioapic_write_entry(apic, pin, entry1); @@ -2117,13 +2158,12 @@ static inline void __init check_timer(void) struct irq_cfg *cfg = irqd_cfg(irq_data); int node = cpu_to_node(0); int apic1, pin1, apic2, pin2; - unsigned long flags; int no_pin1 = 0; if (!global_clock_event) return; - local_irq_save(flags); + local_irq_disable(); /* * get/set the timer IRQ vector: @@ -2178,9 +2218,9 @@ static inline void __init check_timer(void) * so only need to unmask if it is level-trigger * do we really have level trigger timer? */ - int idx; - idx = find_irq_entry(apic1, pin1, mp_INT); - if (idx != -1 && irq_trigger(idx)) + int idx = find_irq_entry(apic1, pin1, mp_INT); + + if (idx != -1 && irq_is_level(idx)) unmask_ioapic_irq(irq_get_irq_data(0)); } irq_domain_deactivate_irq(irq_data); @@ -2191,7 +2231,6 @@ static inline void __init check_timer(void) goto out; } panic_if_irq_remap("timer doesn't work through Interrupt-remapped IO-APIC"); - local_irq_disable(); clear_IO_APIC_pin(apic1, pin1); if (!no_pin1) apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " @@ -2215,7 +2254,6 @@ static inline void __init check_timer(void) /* * Cleanup, just in case ... */ - local_irq_disable(); legacy_pic->mask(0); clear_IO_APIC_pin(apic2, pin2); apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); @@ -2232,7 +2270,6 @@ static inline void __init check_timer(void) apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); goto out; } - local_irq_disable(); legacy_pic->mask(0); apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); @@ -2251,7 +2288,6 @@ static inline void __init check_timer(void) apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); goto out; } - local_irq_disable(); apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); if (apic_is_x2apic_enabled()) apic_printk(APIC_QUIET, KERN_INFO @@ -2260,7 +2296,7 @@ static inline void __init check_timer(void) panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " "report. Then try booting with the 'noapic' option.\n"); out: - local_irq_restore(flags); + local_irq_enable(); } /* @@ -2284,36 +2320,37 @@ out: static int mp_irqdomain_create(int ioapic) { - struct irq_alloc_info info; struct irq_domain *parent; int hwirqs = mp_ioapic_pin_count(ioapic); struct ioapic *ip = &ioapics[ioapic]; struct ioapic_domain_cfg *cfg = &ip->irqdomain_cfg; struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic); struct fwnode_handle *fn; - char *name = "IO-APIC"; + struct irq_fwspec fwspec; if (cfg->type == IOAPIC_DOMAIN_INVALID) return 0; - init_irq_alloc_info(&info, NULL); - info.type = X86_IRQ_ALLOC_TYPE_IOAPIC_GET_PARENT; - info.devid = mpc_ioapic_id(ioapic); - parent = irq_remapping_get_irq_domain(&info); - if (!parent) - parent = x86_vector_domain; - else - name = "IO-APIC-IR"; - /* Handle device tree enumerated APICs proper */ if (cfg->dev) { fn = of_node_to_fwnode(cfg->dev); } else { - fn = irq_domain_alloc_named_id_fwnode(name, ioapic); + fn = irq_domain_alloc_named_id_fwnode("IO-APIC", mpc_ioapic_id(ioapic)); if (!fn) return -ENOMEM; } + fwspec.fwnode = fn; + fwspec.param_count = 1; + fwspec.param[0] = mpc_ioapic_id(ioapic); + + parent = irq_find_matching_fwspec(&fwspec, DOMAIN_BUS_ANY); + if (!parent) { + if (!cfg->dev) + irq_domain_free_fwnode(fn); + return -ENODEV; + } + ip->irqdomain = irq_domain_create_linear(fn, hwirqs, cfg->ops, (void *)(long)ioapic); @@ -2587,30 +2624,6 @@ static int io_apic_get_version(int ioapic) return reg_01.bits.version; } -int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity) -{ - int ioapic, pin, idx; - - if (skip_ioapic_setup) - return -1; - - ioapic = mp_find_ioapic(gsi); - if (ioapic < 0) - return -1; - - pin = mp_find_ioapic_pin(ioapic, gsi); - if (pin < 0) - return -1; - - idx = find_irq_entry(ioapic, pin, mp_INT); - if (idx < 0) - return -1; - - *trigger = irq_trigger(idx); - *polarity = irq_polarity(idx); - return 0; -} - /* * This function updates target affinity of IOAPIC interrupts to include * the CPUs which came online during SMP bringup. @@ -2934,44 +2947,49 @@ static void mp_irqdomain_get_attr(u32 gsi, struct mp_chip_data *data, struct irq_alloc_info *info) { if (info && info->ioapic.valid) { - data->trigger = info->ioapic.trigger; - data->polarity = info->ioapic.polarity; - } else if (acpi_get_override_irq(gsi, &data->trigger, - &data->polarity) < 0) { + data->is_level = info->ioapic.is_level; + data->active_low = info->ioapic.active_low; + } else if (__acpi_get_override_irq(gsi, &data->is_level, + &data->active_low) < 0) { /* PCI interrupts are always active low level triggered. */ - data->trigger = IOAPIC_LEVEL; - data->polarity = IOAPIC_POL_LOW; + data->is_level = true; + data->active_low = true; } } -static void mp_setup_entry(struct irq_cfg *cfg, struct mp_chip_data *data, - struct IO_APIC_route_entry *entry) +/* + * Configure the I/O-APIC specific fields in the routing entry. + * + * This is important to setup the I/O-APIC specific bits (is_level, + * active_low, masked) because the underlying parent domain will only + * provide the routing information and is oblivious of the I/O-APIC + * specific bits. + * + * The entry is just preconfigured at this point and not written into the + * RTE. This happens later during activation which will fill in the actual + * routing information. + */ +static void mp_preconfigure_entry(struct mp_chip_data *data) { + struct IO_APIC_route_entry *entry = &data->entry; + memset(entry, 0, sizeof(*entry)); - entry->delivery_mode = apic->irq_delivery_mode; - entry->dest_mode = apic->irq_dest_mode; - entry->dest = cfg->dest_apicid; - entry->vector = cfg->vector; - entry->trigger = data->trigger; - entry->polarity = data->polarity; + entry->is_level = data->is_level; + entry->active_low = data->active_low; /* * Mask level triggered irqs. Edge triggered irqs are masked * by the irq core code in case they fire. */ - if (data->trigger == IOAPIC_LEVEL) - entry->mask = IOAPIC_MASKED; - else - entry->mask = IOAPIC_UNMASKED; + entry->masked = data->is_level; } int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs, void *arg) { - int ret, ioapic, pin; - struct irq_cfg *cfg; - struct irq_data *irq_data; - struct mp_chip_data *data; struct irq_alloc_info *info = arg; + struct mp_chip_data *data; + struct irq_data *irq_data; + int ret, ioapic, pin; unsigned long flags; if (!info || nr_irqs > 1) @@ -2989,7 +3007,6 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, if (!data) return -ENOMEM; - info->ioapic.entry = &data->entry; ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info); if (ret < 0) { kfree(data); @@ -3003,22 +3020,20 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, irq_data->chip_data = data; mp_irqdomain_get_attr(mp_pin_to_gsi(ioapic, pin), data, info); - cfg = irqd_cfg(irq_data); add_pin_to_irq_node(data, ioapic_alloc_attr_node(info), ioapic, pin); + mp_preconfigure_entry(data); + mp_register_handler(virq, data->is_level); + local_irq_save(flags); - if (info->ioapic.entry) - mp_setup_entry(cfg, data, info->ioapic.entry); - mp_register_handler(virq, data->trigger); if (virq < nr_legacy_irqs()) legacy_pic->mask(virq); local_irq_restore(flags); apic_printk(APIC_VERBOSE, KERN_DEBUG - "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i Dest:%d)\n", - ioapic, mpc_ioapic_id(ioapic), pin, cfg->vector, - virq, data->trigger, data->polarity, cfg->dest_apicid); - + "IOAPIC[%d]: Preconfigured routing entry (%d-%d -> IRQ %d Level:%i ActiveLow:%i)\n", + ioapic, mpc_ioapic_id(ioapic), pin, virq, + data->is_level, data->active_low); return 0; } diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index 387154e39e08..d1fb874fbe64 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -260,7 +260,7 @@ void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, for_each_cpu(query_cpu, mask) __default_send_IPI_dest_field( early_per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, apic->dest_logical); + vector, APIC_DEST_LOGICAL); local_irq_restore(flags); } @@ -279,7 +279,7 @@ void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, continue; __default_send_IPI_dest_field( early_per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, apic->dest_logical); + vector, APIC_DEST_LOGICAL); } local_irq_restore(flags); } @@ -297,7 +297,7 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector) local_irq_save(flags); WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); - __default_send_IPI_dest_field(mask, vector, apic->dest_logical); + __default_send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL); local_irq_restore(flags); } diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 6313f0a05db7..44ebe25e7703 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -23,38 +22,11 @@ struct irq_domain *x86_pci_msi_default_domain __ro_after_init; -static void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg) -{ - msg->address_hi = MSI_ADDR_BASE_HI; - - if (x2apic_enabled()) - msg->address_hi |= MSI_ADDR_EXT_DEST_ID(cfg->dest_apicid); - - msg->address_lo = - MSI_ADDR_BASE_LO | - ((apic->irq_dest_mode == 0) ? - MSI_ADDR_DEST_MODE_PHYSICAL : - MSI_ADDR_DEST_MODE_LOGICAL) | - MSI_ADDR_REDIRECTION_CPU | - MSI_ADDR_DEST_ID(cfg->dest_apicid); - - msg->data = - MSI_DATA_TRIGGER_EDGE | - MSI_DATA_LEVEL_ASSERT | - MSI_DATA_DELIVERY_FIXED | - MSI_DATA_VECTOR(cfg->vector); -} - -void x86_vector_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) -{ - __irq_msi_compose_msg(irqd_cfg(data), msg); -} - static void irq_msi_update_msg(struct irq_data *irqd, struct irq_cfg *cfg) { struct msi_msg msg[2] = { [1] = { }, }; - __irq_msi_compose_msg(cfg, msg); + __irq_msi_compose_msg(cfg, msg, false); irq_data_get_irq_chip(irqd)->irq_write_msi_msg(irqd, msg); } @@ -276,6 +248,17 @@ struct irq_domain *arch_create_remap_msi_irq_domain(struct irq_domain *parent, #endif #ifdef CONFIG_DMAR_TABLE +/* + * The Intel IOMMU (ab)uses the high bits of the MSI address to contain the + * high bits of the destination APIC ID. This can't be done in the general + * case for MSIs as it would be targeting real memory above 4GiB not the + * APIC. + */ +static void dmar_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) +{ + __irq_msi_compose_msg(irqd_cfg(data), msg, true); +} + static void dmar_msi_write_msg(struct irq_data *data, struct msi_msg *msg) { dmar_msi_write(data->irq, msg); @@ -288,6 +271,7 @@ static struct irq_chip dmar_msi_controller = { .irq_ack = irq_chip_ack_parent, .irq_set_affinity = msi_domain_set_affinity, .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_compose_msi_msg = dmar_msi_compose_msg, .irq_write_msi_msg = dmar_msi_write_msg, .flags = IRQCHIP_SKIP_SET_WAKE, }; @@ -356,114 +340,3 @@ void dmar_free_hwirq(int irq) irq_domain_free_irqs(irq, 1); } #endif - -/* - * MSI message composition - */ -#ifdef CONFIG_HPET_TIMER -static inline int hpet_dev_id(struct irq_domain *domain) -{ - struct msi_domain_info *info = msi_get_domain_info(domain); - - return (int)(long)info->data; -} - -static void hpet_msi_write_msg(struct irq_data *data, struct msi_msg *msg) -{ - hpet_msi_write(irq_data_get_irq_handler_data(data), msg); -} - -static struct irq_chip hpet_msi_controller __ro_after_init = { - .name = "HPET-MSI", - .irq_unmask = hpet_msi_unmask, - .irq_mask = hpet_msi_mask, - .irq_ack = irq_chip_ack_parent, - .irq_set_affinity = msi_domain_set_affinity, - .irq_retrigger = irq_chip_retrigger_hierarchy, - .irq_write_msi_msg = hpet_msi_write_msg, - .flags = IRQCHIP_SKIP_SET_WAKE, -}; - -static int hpet_msi_init(struct irq_domain *domain, - struct msi_domain_info *info, unsigned int virq, - irq_hw_number_t hwirq, msi_alloc_info_t *arg) -{ - irq_set_status_flags(virq, IRQ_MOVE_PCNTXT); - irq_domain_set_info(domain, virq, arg->hwirq, info->chip, NULL, - handle_edge_irq, arg->data, "edge"); - - return 0; -} - -static void hpet_msi_free(struct irq_domain *domain, - struct msi_domain_info *info, unsigned int virq) -{ - irq_clear_status_flags(virq, IRQ_MOVE_PCNTXT); -} - -static struct msi_domain_ops hpet_msi_domain_ops = { - .msi_init = hpet_msi_init, - .msi_free = hpet_msi_free, -}; - -static struct msi_domain_info hpet_msi_domain_info = { - .ops = &hpet_msi_domain_ops, - .chip = &hpet_msi_controller, - .flags = MSI_FLAG_USE_DEF_DOM_OPS, -}; - -struct irq_domain *hpet_create_irq_domain(int hpet_id) -{ - struct msi_domain_info *domain_info; - struct irq_domain *parent, *d; - struct irq_alloc_info info; - struct fwnode_handle *fn; - - if (x86_vector_domain == NULL) - return NULL; - - domain_info = kzalloc(sizeof(*domain_info), GFP_KERNEL); - if (!domain_info) - return NULL; - - *domain_info = hpet_msi_domain_info; - domain_info->data = (void *)(long)hpet_id; - - init_irq_alloc_info(&info, NULL); - info.type = X86_IRQ_ALLOC_TYPE_HPET_GET_PARENT; - info.devid = hpet_id; - parent = irq_remapping_get_irq_domain(&info); - if (parent == NULL) - parent = x86_vector_domain; - else - hpet_msi_controller.name = "IR-HPET-MSI"; - - fn = irq_domain_alloc_named_id_fwnode(hpet_msi_controller.name, - hpet_id); - if (!fn) { - kfree(domain_info); - return NULL; - } - - d = msi_create_irq_domain(fn, domain_info, parent); - if (!d) { - irq_domain_free_fwnode(fn); - kfree(domain_info); - } - return d; -} - -int hpet_assign_irq(struct irq_domain *domain, struct hpet_channel *hc, - int dev_num) -{ - struct irq_alloc_info info; - - init_irq_alloc_info(&info, NULL); - info.type = X86_IRQ_ALLOC_TYPE_HPET; - info.data = hc; - info.devid = hpet_dev_id(domain); - info.hwirq = dev_num; - - return irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, &info); -} -#endif diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 67b6f7c049ec..a61f642b1b90 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -69,16 +69,13 @@ static struct apic apic_default __ro_after_init = { .apic_id_valid = default_apic_id_valid, .apic_id_registered = default_apic_id_registered, - .irq_delivery_mode = dest_Fixed, - /* logical delivery broadcast to all CPUs: */ - .irq_dest_mode = 1, + .delivery_mode = APIC_DELIVERY_MODE_FIXED, + .dest_mode_logical = true, .disable_esr = 0, - .dest_logical = APIC_DEST_LOGICAL, + .check_apicid_used = default_check_apicid_used, - .init_apic_ldr = default_init_apic_ldr, - .ioapic_phys_id_map = default_ioapic_phys_id_map, .setup_apic_routing = setup_apic_flat_routing, .cpu_present_to_apicid = default_cpu_present_to_apicid, diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 758bbf25ef74..3c9c7492252f 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -640,7 +640,50 @@ static void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d, } #endif +int x86_fwspec_is_ioapic(struct irq_fwspec *fwspec) +{ + if (fwspec->param_count != 1) + return 0; + + if (is_fwnode_irqchip(fwspec->fwnode)) { + const char *fwname = fwnode_get_name(fwspec->fwnode); + return fwname && !strncmp(fwname, "IO-APIC-", 8) && + simple_strtol(fwname+8, NULL, 10) == fwspec->param[0]; + } + return to_of_node(fwspec->fwnode) && + of_device_is_compatible(to_of_node(fwspec->fwnode), + "intel,ce4100-ioapic"); +} + +int x86_fwspec_is_hpet(struct irq_fwspec *fwspec) +{ + if (fwspec->param_count != 1) + return 0; + + if (is_fwnode_irqchip(fwspec->fwnode)) { + const char *fwname = fwnode_get_name(fwspec->fwnode); + return fwname && !strncmp(fwname, "HPET-MSI-", 9) && + simple_strtol(fwname+9, NULL, 10) == fwspec->param[0]; + } + return 0; +} + +static int x86_vector_select(struct irq_domain *d, struct irq_fwspec *fwspec, + enum irq_domain_bus_token bus_token) +{ + /* + * HPET and I/OAPIC cannot be parented in the vector domain + * if IRQ remapping is enabled. APIC IDs above 15 bits are + * only permitted if IRQ remapping is enabled, so check that. + */ + if (apic->apic_id_valid(32768)) + return 0; + + return x86_fwspec_is_ioapic(fwspec) || x86_fwspec_is_hpet(fwspec); +} + static const struct irq_domain_ops x86_vector_domain_ops = { + .select = x86_vector_select, .alloc = x86_vector_alloc_irqs, .free = x86_vector_free_irqs, .activate = x86_vector_activate, @@ -822,6 +865,12 @@ void apic_ack_edge(struct irq_data *irqd) apic_ack_irq(irqd); } +static void x86_vector_msi_compose_msg(struct irq_data *data, + struct msi_msg *msg) +{ + __irq_msi_compose_msg(irqd_cfg(data), msg, false); +} + static struct irq_chip lapic_controller = { .name = "APIC", .irq_ack = apic_ack_edge, diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index b0889c48a2ac..df6adc5674c9 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -61,7 +61,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) if (!dest) continue; - __x2apic_send_IPI_dest(dest, vector, apic->dest_logical); + __x2apic_send_IPI_dest(dest, vector, APIC_DEST_LOGICAL); /* Remove cluster CPUs from tmpmask */ cpumask_andnot(tmpmsk, tmpmsk, &cmsk->mask); } @@ -184,15 +184,13 @@ static struct apic apic_x2apic_cluster __ro_after_init = { .apic_id_valid = x2apic_apic_id_valid, .apic_id_registered = x2apic_apic_id_registered, - .irq_delivery_mode = dest_Fixed, - .irq_dest_mode = 1, /* logical */ + .delivery_mode = APIC_DELIVERY_MODE_FIXED, + .dest_mode_logical = true, .disable_esr = 0, - .dest_logical = APIC_DEST_LOGICAL, + .check_apicid_used = NULL, - .init_apic_ldr = init_x2apic_ldr, - .ioapic_phys_id_map = NULL, .setup_apic_routing = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index bc9693841353..0e4e81971567 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -8,6 +8,12 @@ int x2apic_phys; static struct apic apic_x2apic_phys; +static u32 x2apic_max_apicid __ro_after_init; + +void __init x2apic_set_max_apicid(u32 apicid) +{ + x2apic_max_apicid = apicid; +} static int __init set_x2apic_phys_mode(char *arg) { @@ -98,6 +104,9 @@ static int x2apic_phys_probe(void) /* Common x2apic functions, also used by x2apic_cluster */ int x2apic_apic_id_valid(u32 apicid) { + if (x2apic_max_apicid && apicid > x2apic_max_apicid) + return 0; + return 1; } @@ -148,15 +157,13 @@ static struct apic apic_x2apic_phys __ro_after_init = { .apic_id_valid = x2apic_apic_id_valid, .apic_id_registered = x2apic_apic_id_registered, - .irq_delivery_mode = dest_Fixed, - .irq_dest_mode = 0, /* physical */ + .delivery_mode = APIC_DELIVERY_MODE_FIXED, + .dest_mode_logical = false, .disable_esr = 0, - .dest_logical = 0, + .check_apicid_used = NULL, - .init_apic_ldr = init_x2apic_ldr, - .ioapic_phys_id_map = NULL, .setup_apic_routing = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 4bde125a5bf1..52bc217ca8c3 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -728,9 +728,9 @@ static void uv_send_IPI_one(int cpu, int vector) unsigned long dmode, val; if (vector == NMI_VECTOR) - dmode = dest_NMI; + dmode = APIC_DELIVERY_MODE_NMI; else - dmode = dest_Fixed; + dmode = APIC_DELIVERY_MODE_FIXED; val = (1UL << UVH_IPI_INT_SEND_SHFT) | (apicid << UVH_IPI_INT_APIC_ID_SHFT) | @@ -832,15 +832,13 @@ static struct apic apic_x2apic_uv_x __ro_after_init = { .apic_id_valid = uv_apic_id_valid, .apic_id_registered = uv_apic_id_registered, - .irq_delivery_mode = dest_Fixed, - .irq_dest_mode = 0, /* Physical */ + .delivery_mode = APIC_DELIVERY_MODE_FIXED, + .dest_mode_logical = false, .disable_esr = 0, - .dest_logical = APIC_DEST_LOGICAL, + .check_apicid_used = NULL, - .init_apic_ldr = uv_init_apic_ldr, - .ioapic_phys_id_map = NULL, .setup_apic_routing = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 05ef1f4550cb..f628e3dc150f 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -366,9 +366,38 @@ static void __init ms_hyperv_init_platform(void) #endif } +static bool __init ms_hyperv_x2apic_available(void) +{ + return x2apic_supported(); +} + +/* + * If ms_hyperv_msi_ext_dest_id() returns true, hyperv_prepare_irq_remapping() + * returns -ENODEV and the Hyper-V IOMMU driver is not used; instead, the + * generic support of the 15-bit APIC ID is used: see __irq_msi_compose_msg(). + * + * Note: for a VM on Hyper-V, the I/O-APIC is the only device which + * (logically) generates MSIs directly to the system APIC irq domain. + * There is no HPET, and PCI MSI/MSI-X interrupts are remapped by the + * pci-hyperv host bridge. + */ +static bool __init ms_hyperv_msi_ext_dest_id(void) +{ + u32 eax; + + eax = cpuid_eax(HYPERV_CPUID_VIRT_STACK_INTERFACE); + if (eax != HYPERV_VS_INTERFACE_EAX_SIGNATURE) + return false; + + eax = cpuid_eax(HYPERV_CPUID_VIRT_STACK_PROPERTIES); + return eax & HYPERV_VS_PROPERTIES_EAX_EXTENDED_IOAPIC_RTE; +} + const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = { .name = "Microsoft Hyper-V", .detect = ms_hyperv_platform, .type = X86_HYPER_MS_HYPERV, + .init.x2apic_available = ms_hyperv_x2apic_available, + .init.msi_ext_dest_id = ms_hyperv_msi_ext_dest_id, .init.init_platform = ms_hyperv_init_platform, }; diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c index 33ee47670b99..5fcac46aaf6b 100644 --- a/arch/x86/kernel/crash_dump_32.c +++ b/arch/x86/kernel/crash_dump_32.c @@ -13,8 +13,6 @@ #include -static void *kdump_buf_page; - static inline bool is_crashed_pfn_valid(unsigned long pfn) { #ifndef CONFIG_X86_PAE @@ -41,15 +39,11 @@ static inline bool is_crashed_pfn_valid(unsigned long pfn) * @userbuf: if set, @buf is in user address space, use copy_to_user(), * otherwise @buf is in kernel address space, use memcpy(). * - * Copy a page from "oldmem". For this page, there is no pte mapped - * in the current kernel. We stitch up a pte, similar to kmap_atomic. - * - * Calling copy_to_user() in atomic context is not desirable. Hence first - * copying the data to a pre-allocated kernel page and then copying to user - * space in non-atomic context. + * Copy a page from "oldmem". For this page, there might be no pte mapped + * in the current kernel. */ -ssize_t copy_oldmem_page(unsigned long pfn, char *buf, - size_t csize, unsigned long offset, int userbuf) +ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize, + unsigned long offset, int userbuf) { void *vaddr; @@ -59,38 +53,16 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, if (!is_crashed_pfn_valid(pfn)) return -EFAULT; - vaddr = kmap_atomic_pfn(pfn); + vaddr = kmap_local_pfn(pfn); if (!userbuf) { - memcpy(buf, (vaddr + offset), csize); - kunmap_atomic(vaddr); + memcpy(buf, vaddr + offset, csize); } else { - if (!kdump_buf_page) { - printk(KERN_WARNING "Kdump: Kdump buffer page not" - " allocated\n"); - kunmap_atomic(vaddr); - return -EFAULT; - } - copy_page(kdump_buf_page, vaddr); - kunmap_atomic(vaddr); - if (copy_to_user(buf, (kdump_buf_page + offset), csize)) - return -EFAULT; + if (copy_to_user(buf, vaddr + offset, csize)) + csize = -EFAULT; } + kunmap_local(vaddr); + return csize; } - -static int __init kdump_buf_page_init(void) -{ - int ret = 0; - - kdump_buf_page = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!kdump_buf_page) { - printk(KERN_WARNING "Kdump: Failed to allocate kdump buffer" - " page\n"); - ret = -ENOMEM; - } - - return ret; -} -arch_initcall(kdump_buf_page_init); diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index ddffd80f5c52..6a4cb71c2498 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -184,31 +184,31 @@ static unsigned int ioapic_id; struct of_ioapic_type { u32 out_type; - u32 trigger; - u32 polarity; + u32 is_level; + u32 active_low; }; static struct of_ioapic_type of_ioapic_type[] = { { - .out_type = IRQ_TYPE_EDGE_RISING, - .trigger = IOAPIC_EDGE, - .polarity = 1, - }, - { - .out_type = IRQ_TYPE_LEVEL_LOW, - .trigger = IOAPIC_LEVEL, - .polarity = 0, + .out_type = IRQ_TYPE_EDGE_FALLING, + .is_level = 0, + .active_low = 1, }, { .out_type = IRQ_TYPE_LEVEL_HIGH, - .trigger = IOAPIC_LEVEL, - .polarity = 1, + .is_level = 1, + .active_low = 0, }, { - .out_type = IRQ_TYPE_EDGE_FALLING, - .trigger = IOAPIC_EDGE, - .polarity = 0, + .out_type = IRQ_TYPE_LEVEL_LOW, + .is_level = 1, + .active_low = 1, + }, + { + .out_type = IRQ_TYPE_EDGE_RISING, + .is_level = 0, + .active_low = 0, }, }; @@ -228,7 +228,7 @@ static int dt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, return -EINVAL; it = &of_ioapic_type[type_index]; - ioapic_set_alloc_attr(&tmp, NUMA_NO_NODE, it->trigger, it->polarity); + ioapic_set_alloc_attr(&tmp, NUMA_NO_NODE, it->is_level, it->active_low); tmp.devid = mpc_ioapic_id(mp_irqdomain_ioapic_idx(domain)); tmp.ioapic.pin = fwspec->param[0]; diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 7a50f0b62a70..08651a4e6aa0 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -50,7 +51,7 @@ unsigned long hpet_address; u8 hpet_blockid; /* OS timer block num */ bool hpet_msi_disable; -#ifdef CONFIG_PCI_MSI +#ifdef CONFIG_GENERIC_MSI_IRQ static DEFINE_PER_CPU(struct hpet_channel *, cpu_hpet_channel); static struct irq_domain *hpet_domain; #endif @@ -467,9 +468,8 @@ static void __init hpet_legacy_clockevent_register(struct hpet_channel *hc) /* * HPET MSI Support */ -#ifdef CONFIG_PCI_MSI - -void hpet_msi_unmask(struct irq_data *data) +#ifdef CONFIG_GENERIC_MSI_IRQ +static void hpet_msi_unmask(struct irq_data *data) { struct hpet_channel *hc = irq_data_get_irq_handler_data(data); unsigned int cfg; @@ -479,7 +479,7 @@ void hpet_msi_unmask(struct irq_data *data) hpet_writel(cfg, HPET_Tn_CFG(hc->num)); } -void hpet_msi_mask(struct irq_data *data) +static void hpet_msi_mask(struct irq_data *data) { struct hpet_channel *hc = irq_data_get_irq_handler_data(data); unsigned int cfg; @@ -489,12 +489,122 @@ void hpet_msi_mask(struct irq_data *data) hpet_writel(cfg, HPET_Tn_CFG(hc->num)); } -void hpet_msi_write(struct hpet_channel *hc, struct msi_msg *msg) +static void hpet_msi_write(struct hpet_channel *hc, struct msi_msg *msg) { hpet_writel(msg->data, HPET_Tn_ROUTE(hc->num)); hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hc->num) + 4); } +static void hpet_msi_write_msg(struct irq_data *data, struct msi_msg *msg) +{ + hpet_msi_write(irq_data_get_irq_handler_data(data), msg); +} + +static struct irq_chip hpet_msi_controller __ro_after_init = { + .name = "HPET-MSI", + .irq_unmask = hpet_msi_unmask, + .irq_mask = hpet_msi_mask, + .irq_ack = irq_chip_ack_parent, + .irq_set_affinity = msi_domain_set_affinity, + .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_write_msi_msg = hpet_msi_write_msg, + .flags = IRQCHIP_SKIP_SET_WAKE, +}; + +static int hpet_msi_init(struct irq_domain *domain, + struct msi_domain_info *info, unsigned int virq, + irq_hw_number_t hwirq, msi_alloc_info_t *arg) +{ + irq_set_status_flags(virq, IRQ_MOVE_PCNTXT); + irq_domain_set_info(domain, virq, arg->hwirq, info->chip, NULL, + handle_edge_irq, arg->data, "edge"); + + return 0; +} + +static void hpet_msi_free(struct irq_domain *domain, + struct msi_domain_info *info, unsigned int virq) +{ + irq_clear_status_flags(virq, IRQ_MOVE_PCNTXT); +} + +static struct msi_domain_ops hpet_msi_domain_ops = { + .msi_init = hpet_msi_init, + .msi_free = hpet_msi_free, +}; + +static struct msi_domain_info hpet_msi_domain_info = { + .ops = &hpet_msi_domain_ops, + .chip = &hpet_msi_controller, + .flags = MSI_FLAG_USE_DEF_DOM_OPS, +}; + +static struct irq_domain *hpet_create_irq_domain(int hpet_id) +{ + struct msi_domain_info *domain_info; + struct irq_domain *parent, *d; + struct fwnode_handle *fn; + struct irq_fwspec fwspec; + + if (x86_vector_domain == NULL) + return NULL; + + domain_info = kzalloc(sizeof(*domain_info), GFP_KERNEL); + if (!domain_info) + return NULL; + + *domain_info = hpet_msi_domain_info; + domain_info->data = (void *)(long)hpet_id; + + fn = irq_domain_alloc_named_id_fwnode(hpet_msi_controller.name, + hpet_id); + if (!fn) { + kfree(domain_info); + return NULL; + } + + fwspec.fwnode = fn; + fwspec.param_count = 1; + fwspec.param[0] = hpet_id; + + parent = irq_find_matching_fwspec(&fwspec, DOMAIN_BUS_ANY); + if (!parent) { + irq_domain_free_fwnode(fn); + kfree(domain_info); + return NULL; + } + if (parent != x86_vector_domain) + hpet_msi_controller.name = "IR-HPET-MSI"; + + d = msi_create_irq_domain(fn, domain_info, parent); + if (!d) { + irq_domain_free_fwnode(fn); + kfree(domain_info); + } + return d; +} + +static inline int hpet_dev_id(struct irq_domain *domain) +{ + struct msi_domain_info *info = msi_get_domain_info(domain); + + return (int)(long)info->data; +} + +static int hpet_assign_irq(struct irq_domain *domain, struct hpet_channel *hc, + int dev_num) +{ + struct irq_alloc_info info; + + init_irq_alloc_info(&info, NULL); + info.type = X86_IRQ_ALLOC_TYPE_HPET; + info.data = hc; + info.devid = hpet_dev_id(domain); + info.hwirq = dev_num; + + return irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, &info); +} + static int hpet_clkevt_msi_resume(struct clock_event_device *evt) { struct hpet_channel *hc = clockevent_to_channel(evt); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 7f57ede3cb8e..5e78e01ca3b4 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -740,6 +740,11 @@ static void __init kvm_apic_init(void) #endif } +static bool __init kvm_msi_ext_dest_id(void) +{ + return kvm_para_has_feature(KVM_FEATURE_MSI_EXT_DEST_ID); +} + static void __init kvm_init_platform(void) { kvmclock_init(); @@ -769,6 +774,7 @@ const __initconst struct hypervisor_x86 x86_hyper_kvm = { .type = X86_HYPER_KVM, .init.guest_late_init = kvm_guest_init, .init.x2apic_available = kvm_para_available, + .init.msi_ext_dest_id = kvm_msi_ext_dest_id, .init.init_platform = kvm_init_platform, #if defined(CONFIG_AMD_MEM_ENCRYPT) .runtime.sev_es_hcall_prepare = kvm_sev_es_hcall_prepare, diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 9278ed7b564e..8ca66af96a54 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -752,13 +752,14 @@ static void __init smp_quirk_init_udelay(void) int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip) { + u32 dm = apic->dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL; unsigned long send_status, accept_status = 0; int maxlvt; /* Target chip */ /* Boot on the stack */ /* Kick the second */ - apic_icr_write(APIC_DM_NMI | apic->dest_logical, apicid); + apic_icr_write(APIC_DM_NMI | dm, apicid); pr_debug("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); @@ -985,10 +986,7 @@ wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid, if (!boot_error) { enable_start_cpu0 = 1; *cpu0_nmi_registered = 1; - if (apic->dest_logical == APIC_DEST_LOGICAL) - id = cpu0_logical_apicid; - else - id = apicid; + id = apic->dest_mode_logical ? cpu0_logical_apicid : apicid; boot_error = wakeup_secondary_cpu_via_nmi(id, start_ip); } diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index a3038d8deb6a..8b395821cb8d 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -110,6 +110,7 @@ struct x86_init_ops x86_init __initdata = { .init_platform = x86_init_noop, .guest_late_init = x86_init_noop, .x2apic_available = bool_x86_init_noop, + .msi_ext_dest_id = bool_x86_init_noop, .init_mem_mapping = x86_init_noop, .init_after_bootmem = x86_init_noop, }, diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 4aa1c2e00e2a..8a4de3f12820 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -16,8 +16,6 @@ #include -#include - #include "irq.h" #include "ioapic.h" @@ -104,22 +102,19 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, struct kvm_lapic_irq *irq) { - trace_kvm_msi_set_irq(e->msi.address_lo | (kvm->arch.x2apic_format ? - (u64)e->msi.address_hi << 32 : 0), - e->msi.data); + struct msi_msg msg = { .address_lo = e->msi.address_lo, + .address_hi = e->msi.address_hi, + .data = e->msi.data }; - irq->dest_id = (e->msi.address_lo & - MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; - if (kvm->arch.x2apic_format) - irq->dest_id |= MSI_ADDR_EXT_DEST_ID(e->msi.address_hi); - irq->vector = (e->msi.data & - MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT; - irq->dest_mode = kvm_lapic_irq_dest_mode( - !!((1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo)); - irq->trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data; - irq->delivery_mode = e->msi.data & 0x700; - irq->msi_redir_hint = ((e->msi.address_lo - & MSI_ADDR_REDIRECTION_LOWPRI) > 0); + trace_kvm_msi_set_irq(msg.address_lo | (kvm->arch.x2apic_format ? + (u64)msg.address_hi << 32 : 0), msg.data); + + irq->dest_id = x86_msi_msg_get_destid(&msg, kvm->arch.x2apic_format); + irq->vector = msg.arch_data.vector; + irq->dest_mode = kvm_lapic_irq_dest_mode(msg.arch_addr_lo.dest_mode_logical); + irq->trig_mode = msg.arch_data.is_level; + irq->delivery_mode = msg.arch_data.delivery_mode << 8; + irq->msi_redir_hint = msg.arch_addr_lo.redirect_hint; irq->level = 1; irq->shorthand = APIC_DEST_NOSHORT; } diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 075fe51317b0..2c54b76d8f84 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -4,65 +4,6 @@ #include /* for totalram_pages */ #include -void *kmap_atomic_high_prot(struct page *page, pgprot_t prot) -{ - unsigned long vaddr; - int idx, type; - - type = kmap_atomic_idx_push(); - idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - BUG_ON(!pte_none(*(kmap_pte-idx))); - set_pte(kmap_pte-idx, mk_pte(page, prot)); - arch_flush_lazy_mmu_mode(); - - return (void *)vaddr; -} -EXPORT_SYMBOL(kmap_atomic_high_prot); - -/* - * This is the same as kmap_atomic() but can map memory that doesn't - * have a struct page associated with it. - */ -void *kmap_atomic_pfn(unsigned long pfn) -{ - return kmap_atomic_prot_pfn(pfn, kmap_prot); -} -EXPORT_SYMBOL_GPL(kmap_atomic_pfn); - -void kunmap_atomic_high(void *kvaddr) -{ - unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - - if (vaddr >= __fix_to_virt(FIX_KMAP_END) && - vaddr <= __fix_to_virt(FIX_KMAP_BEGIN)) { - int idx, type; - - type = kmap_atomic_idx(); - idx = type + KM_TYPE_NR * smp_processor_id(); - -#ifdef CONFIG_DEBUG_HIGHMEM - WARN_ON_ONCE(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); -#endif - /* - * Force other mappings to Oops if they'll try to access this - * pte without first remap it. Keeping stale mappings around - * is a bad idea also, in case the page changes cacheability - * attributes or becomes a protected page in a hypervisor. - */ - kpte_clear_flush(kmap_pte-idx, vaddr); - kmap_atomic_idx_pop(); - arch_flush_lazy_mmu_mode(); - } -#ifdef CONFIG_DEBUG_HIGHMEM - else { - BUG_ON(vaddr < PAGE_OFFSET); - BUG_ON(vaddr >= (unsigned long)high_memory); - } -#endif -} -EXPORT_SYMBOL(kunmap_atomic_high); - void __init set_highmem_pages_init(void) { struct zone *zone; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 7c055259de3a..da31c2635ee4 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -394,19 +394,6 @@ repeat: return last_map_addr; } -pte_t *kmap_pte; - -static void __init kmap_init(void) -{ - unsigned long kmap_vstart; - - /* - * Cache the first kmap pte: - */ - kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); - kmap_pte = virt_to_kpte(kmap_vstart); -} - #ifdef CONFIG_HIGHMEM static void __init permanent_kmaps_init(pgd_t *pgd_base) { @@ -712,8 +699,6 @@ void __init paging_init(void) __flush_tlb_all(); - kmap_init(); - /* * NOTE: at this point the bootmem allocator is fully available. */ diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index f60398aeb644..9aaa756ddf21 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -44,28 +44,7 @@ void iomap_free(resource_size_t base, unsigned long size) } EXPORT_SYMBOL_GPL(iomap_free); -void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) -{ - unsigned long vaddr; - int idx, type; - - preempt_disable(); - pagefault_disable(); - - type = kmap_atomic_idx_push(); - idx = type + KM_TYPE_NR * smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - set_pte(kmap_pte - idx, pfn_pte(pfn, prot)); - arch_flush_lazy_mmu_mode(); - - return (void *)vaddr; -} - -/* - * Map 'pfn' using protections 'prot' - */ -void __iomem * -iomap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) +void __iomem *__iomap_local_pfn_prot(unsigned long pfn, pgprot_t prot) { /* * For non-PAT systems, translate non-WB request to UC- just in @@ -81,36 +60,6 @@ iomap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) /* Filter out unsupported __PAGE_KERNEL* bits: */ pgprot_val(prot) &= __default_kernel_pte_mask; - return (void __force __iomem *) kmap_atomic_prot_pfn(pfn, prot); + return (void __force __iomem *)__kmap_local_pfn_prot(pfn, prot); } -EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn); - -void -iounmap_atomic(void __iomem *kvaddr) -{ - unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - - if (vaddr >= __fix_to_virt(FIX_KMAP_END) && - vaddr <= __fix_to_virt(FIX_KMAP_BEGIN)) { - int idx, type; - - type = kmap_atomic_idx(); - idx = type + KM_TYPE_NR * smp_processor_id(); - -#ifdef CONFIG_DEBUG_HIGHMEM - WARN_ON_ONCE(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); -#endif - /* - * Force other mappings to Oops if they'll try to access this - * pte without first remap it. Keeping stale mappings around - * is a bad idea also, in case the page changes cacheability - * attributes or becomes a protected page in a hypervisor. - */ - kpte_clear_flush(kmap_pte-idx, vaddr); - kmap_atomic_idx_pop(); - } - - pagefault_enable(); - preempt_enable(); -} -EXPORT_SYMBOL_GPL(iounmap_atomic); +EXPORT_SYMBOL_GPL(__iomap_local_pfn_prot); diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c index 24ca4ee2802f..95e2e6bd8d8c 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid_pci.c @@ -215,7 +215,7 @@ static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, static int intel_mid_pci_irq_enable(struct pci_dev *dev) { struct irq_alloc_info info; - int polarity; + bool polarity_low; int ret; u8 gsi; @@ -230,7 +230,7 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev) switch (intel_mid_identify_cpu()) { case INTEL_MID_CPU_CHIP_TANGIER: - polarity = IOAPIC_POL_HIGH; + polarity_low = false; /* Special treatment for IRQ0 */ if (gsi == 0) { @@ -252,11 +252,11 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev) } break; default: - polarity = IOAPIC_POL_LOW; + polarity_low = true; break; } - ioapic_set_alloc_attr(&info, dev_to_node(&dev->dev), 1, polarity); + ioapic_set_alloc_attr(&info, dev_to_node(&dev->dev), 1, polarity_low); /* * MRST only have IOAPIC, the PCI irq lines are 1:1 mapped to diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index c552cd2d0632..3d41a09c2c14 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -152,7 +152,6 @@ static int acpi_register_gsi_xen(struct device *dev, u32 gsi, #if defined(CONFIG_PCI_MSI) #include -#include struct xen_pci_frontend_ops *xen_pci_frontend; EXPORT_SYMBOL_GPL(xen_pci_frontend); @@ -210,23 +209,20 @@ free: return ret; } -#define XEN_PIRQ_MSI_DATA (MSI_DATA_TRIGGER_EDGE | \ - MSI_DATA_LEVEL_ASSERT | (3 << 8) | MSI_DATA_VECTOR(0)) - static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq, struct msi_msg *msg) { - /* We set vector == 0 to tell the hypervisor we don't care about it, - * but we want a pirq setup instead. - * We use the dest_id field to pass the pirq that we want. */ - msg->address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(pirq); - msg->address_lo = - MSI_ADDR_BASE_LO | - MSI_ADDR_DEST_MODE_PHYSICAL | - MSI_ADDR_REDIRECTION_CPU | - MSI_ADDR_DEST_ID(pirq); - - msg->data = XEN_PIRQ_MSI_DATA; + /* + * We set vector == 0 to tell the hypervisor we don't care about + * it, but we want a pirq setup instead. We use the dest_id fields + * to pass the pirq that we want. + */ + memset(msg, 0, sizeof(*msg)); + msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH; + msg->arch_addr_hi.destid_8_31 = pirq >> 8; + msg->arch_addr_lo.destid_0_7 = pirq & 0xFF; + msg->arch_addr_lo.base_address = X86_MSI_BASE_ADDRESS_LOW; + msg->arch_data.delivery_mode = APIC_DELIVERY_MODE_EXTINT; } static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index 18ca2261cc9a..1a536a187d74 100644 --- a/arch/x86/platform/uv/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c @@ -35,8 +35,8 @@ static void uv_program_mmr(struct irq_cfg *cfg, struct uv_irq_2_mmr_pnode *info) mmr_value = 0; entry = (struct uv_IO_APIC_route_entry *)&mmr_value; entry->vector = cfg->vector; - entry->delivery_mode = apic->irq_delivery_mode; - entry->dest_mode = apic->irq_dest_mode; + entry->delivery_mode = apic->delivery_mode; + entry->dest_mode = apic->dest_mode_logical; entry->polarity = 0; entry->trigger = 0; entry->mask = 0; diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index e82fd1910dae..0d46cc283cf5 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -148,15 +148,12 @@ static struct apic xen_pv_apic = { .apic_id_valid = xen_id_always_valid, .apic_id_registered = xen_id_always_registered, - /* .irq_delivery_mode - used in native_compose_msi_msg only */ - /* .irq_dest_mode - used in native_compose_msi_msg only */ + /* .delivery_mode and .dest_mode_logical not used by XENPV */ .disable_esr = 0, - /* .dest_logical - default_send_IPI_ use it but we use our own. */ + .check_apicid_used = default_check_apicid_used, /* Used on 32-bit */ - .init_apic_ldr = xen_noop, /* setup_local_APIC calls it */ - .ioapic_phys_id_map = default_ioapic_phys_id_map, /* Used on 32-bit */ .setup_apic_routing = NULL, .cpu_present_to_apicid = xen_cpu_present_to_apicid, diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index d0dfa50bd0bb..dc22ef3cf4be 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -666,6 +666,7 @@ endchoice config HIGHMEM bool "High Memory Support" depends on MMU + select KMAP_LOCAL help Linux can use the full amount of RAM in the system by default. However, the default MMUv2 setup only maps the diff --git a/arch/xtensa/include/asm/fixmap.h b/arch/xtensa/include/asm/fixmap.h index a06ffb0c61c7..1c65dc1d3397 100644 --- a/arch/xtensa/include/asm/fixmap.h +++ b/arch/xtensa/include/asm/fixmap.h @@ -16,64 +16,23 @@ #ifdef CONFIG_HIGHMEM #include #include -#include -#endif +#include -/* - * Here we define all the compile-time 'special' virtual - * addresses. The point is to have a constant address at - * compile time, but to set the physical address only - * in the boot process. We allocate these special addresses - * from the start of the consistent memory region upwards. - * Also this lets us do fail-safe vmalloc(), we - * can guarantee that these special addresses and - * vmalloc()-ed addresses never overlap. - * - * these 'compile-time allocated' memory buffers are - * fixed-size 4k pages. (or larger if used with an increment - * higher than 1) use fixmap_set(idx,phys) to associate - * physical memory with fixmap indices. - */ +/* The map slots for temporary mappings via kmap_atomic/local(). */ enum fixed_addresses { -#ifdef CONFIG_HIGHMEM - /* reserved pte's for temporary kernel mappings */ FIX_KMAP_BEGIN, FIX_KMAP_END = FIX_KMAP_BEGIN + - (KM_TYPE_NR * NR_CPUS * DCACHE_N_COLORS) - 1, -#endif + (KM_MAX_IDX * NR_CPUS * DCACHE_N_COLORS) - 1, __end_of_fixed_addresses }; -#define FIXADDR_TOP (XCHAL_KSEG_CACHED_VADDR - PAGE_SIZE) +#define FIXADDR_END (XCHAL_KSEG_CACHED_VADDR - PAGE_SIZE) #define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) -#define FIXADDR_START ((FIXADDR_TOP - FIXADDR_SIZE) & PMD_MASK) +/* Enforce that FIXADDR_START is PMD aligned to handle cache aliasing */ +#define FIXADDR_START ((FIXADDR_END - FIXADDR_SIZE) & PMD_MASK) +#define FIXADDR_TOP (FIXADDR_START + FIXADDR_SIZE - PAGE_SIZE) -#define __fix_to_virt(x) (FIXADDR_START + ((x) << PAGE_SHIFT)) -#define __virt_to_fix(x) (((x) - FIXADDR_START) >> PAGE_SHIFT) - -#ifndef __ASSEMBLY__ -/* - * 'index to address' translation. If anyone tries to use the idx - * directly without translation, we catch the bug with a NULL-deference - * kernel oops. Illegal ranges of incoming indices are caught too. - */ -static __always_inline unsigned long fix_to_virt(const unsigned int idx) -{ - /* Check if this memory layout is broken because fixmap overlaps page - * table. - */ - BUILD_BUG_ON(FIXADDR_START < - TLBTEMP_BASE_1 + TLBTEMP_SIZE); - BUILD_BUG_ON(idx >= __end_of_fixed_addresses); - return __fix_to_virt(idx); -} - -static inline unsigned long virt_to_fix(const unsigned long vaddr) -{ - BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); - return __virt_to_fix(vaddr); -} - -#endif +#include +#endif /* CONFIG_HIGHMEM */ #endif diff --git a/arch/xtensa/include/asm/highmem.h b/arch/xtensa/include/asm/highmem.h index eac503215f17..34b8b620e7f1 100644 --- a/arch/xtensa/include/asm/highmem.h +++ b/arch/xtensa/include/asm/highmem.h @@ -12,13 +12,13 @@ #ifndef _XTENSA_HIGHMEM_H #define _XTENSA_HIGHMEM_H +#ifdef CONFIG_HIGHMEM #include #include #include #include -#include -#define PKMAP_BASE ((FIXADDR_START - \ +#define PKMAP_BASE ((FIXADDR_START - \ (LAST_PKMAP + 1) * PAGE_SIZE) & PMD_MASK) #define LAST_PKMAP (PTRS_PER_PTE * DCACHE_N_COLORS) #define LAST_PKMAP_MASK (LAST_PKMAP - 1) @@ -59,6 +59,13 @@ static inline wait_queue_head_t *get_pkmap_wait_queue_head(unsigned int color) { return pkmap_map_wait_arr + color; } + +enum fixed_addresses kmap_local_map_idx(int type, unsigned long pfn); +#define arch_kmap_local_map_idx kmap_local_map_idx + +enum fixed_addresses kmap_local_unmap_idx(int type, unsigned long addr); +#define arch_kmap_local_unmap_idx kmap_local_unmap_idx + #endif extern pte_t *pkmap_page_table; @@ -68,6 +75,10 @@ static inline void flush_cache_kmaps(void) flush_cache_all(); } +#define arch_kmap_local_post_unmap(vaddr) \ + local_flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE) + void kmap_init(void); +#endif /* CONFIG_HIGHMEM */ #endif diff --git a/arch/xtensa/mm/highmem.c b/arch/xtensa/mm/highmem.c index 673196fe862e..35c4f7d4a333 100644 --- a/arch/xtensa/mm/highmem.c +++ b/arch/xtensa/mm/highmem.c @@ -12,8 +12,6 @@ #include #include -static pte_t *kmap_pte; - #if DCACHE_WAY_SIZE > PAGE_SIZE unsigned int last_pkmap_nr_arr[DCACHE_N_COLORS]; wait_queue_head_t pkmap_map_wait_arr[DCACHE_N_COLORS]; @@ -25,67 +23,37 @@ static void __init kmap_waitqueues_init(void) for (i = 0; i < ARRAY_SIZE(pkmap_map_wait_arr); ++i) init_waitqueue_head(pkmap_map_wait_arr + i); } -#else -static inline void kmap_waitqueues_init(void) -{ -} -#endif static inline enum fixed_addresses kmap_idx(int type, unsigned long color) { - return (type + KM_TYPE_NR * smp_processor_id()) * DCACHE_N_COLORS + - color; + int idx = (type + KM_MAX_IDX * smp_processor_id()) * DCACHE_N_COLORS; + + /* + * The fixmap operates top down, so the color offset needs to be + * reverse as well. + */ + return idx + DCACHE_N_COLORS - 1 - color; } -void *kmap_atomic_high_prot(struct page *page, pgprot_t prot) +enum fixed_addresses kmap_local_map_idx(int type, unsigned long pfn) { - enum fixed_addresses idx; - unsigned long vaddr; + return kmap_idx(type, DCACHE_ALIAS(pfn << PAGE_SHIFT)); +} - idx = kmap_idx(kmap_atomic_idx_push(), - DCACHE_ALIAS(page_to_phys(page))); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); -#ifdef CONFIG_DEBUG_HIGHMEM - BUG_ON(!pte_none(*(kmap_pte + idx))); +enum fixed_addresses kmap_local_unmap_idx(int type, unsigned long addr) +{ + return kmap_idx(type, DCACHE_ALIAS(addr)); +} + +#else +static inline void kmap_waitqueues_init(void) { } #endif - set_pte(kmap_pte + idx, mk_pte(page, prot)); - - return (void *)vaddr; -} -EXPORT_SYMBOL(kmap_atomic_high_prot); - -void kunmap_atomic_high(void *kvaddr) -{ - if (kvaddr >= (void *)FIXADDR_START && - kvaddr < (void *)FIXADDR_TOP) { - int idx = kmap_idx(kmap_atomic_idx(), - DCACHE_ALIAS((unsigned long)kvaddr)); - - /* - * Force other mappings to Oops if they'll try to access this - * pte without first remap it. Keeping stale mappings around - * is a bad idea also, in case the page changes cacheability - * attributes or becomes a protected page in a hypervisor. - */ - pte_clear(&init_mm, kvaddr, kmap_pte + idx); - local_flush_tlb_kernel_range((unsigned long)kvaddr, - (unsigned long)kvaddr + PAGE_SIZE); - - kmap_atomic_idx_pop(); - } -} -EXPORT_SYMBOL(kunmap_atomic_high); void __init kmap_init(void) { - unsigned long kmap_vstart; - /* Check if this memory layout is broken because PKMAP overlaps * page table. */ BUILD_BUG_ON(PKMAP_BASE < TLBTEMP_BASE_1 + TLBTEMP_SIZE); - /* cache the first kmap pte */ - kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); - kmap_pte = virt_to_kpte(kmap_vstart); kmap_waitqueues_init(); } diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index 8731b7ad9308..2daeba9e454e 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -147,8 +147,8 @@ void __init mem_init(void) #ifdef CONFIG_HIGHMEM PKMAP_BASE, PKMAP_BASE + LAST_PKMAP * PAGE_SIZE, (LAST_PKMAP*PAGE_SIZE) >> 10, - FIXADDR_START, FIXADDR_TOP, - (FIXADDR_TOP - FIXADDR_START) >> 10, + FIXADDR_START, FIXADDR_END, + (FIXADDR_END - FIXADDR_START) >> 10, #endif PAGE_OFFSET, PAGE_OFFSET + (max_low_pfn - min_low_pfn) * PAGE_SIZE, diff --git a/arch/xtensa/mm/mmu.c b/arch/xtensa/mm/mmu.c index fd2193df8a14..7e4d97dc8bd8 100644 --- a/arch/xtensa/mm/mmu.c +++ b/arch/xtensa/mm/mmu.c @@ -52,7 +52,8 @@ static void * __init init_pmd(unsigned long vaddr, unsigned long n_pages) static void __init fixedrange_init(void) { - init_pmd(__fix_to_virt(0), __end_of_fixed_addresses); + BUILD_BUG_ON(FIXADDR_START < TLBTEMP_BASE_1 + TLBTEMP_SIZE); + init_pmd(FIXADDR_START, __end_of_fixed_addresses); } #endif diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 494b42a31b7a..52d70ca36169 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -897,7 +897,7 @@ struct amd_ir_data { }; struct amd_irte_ops { - void (*prepare)(void *, u32, u32, u8, u32, int); + void (*prepare)(void *, u32, bool, u8, u32, int); void (*activate)(void *, u16, u16); void (*deactivate)(void *, u16, u16); void (*set_affinity)(void *, u16, u16, u8, u32); diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 23a790f8f550..5ff5687a87c7 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -23,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -1575,14 +1575,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) break; } - /* - * Note: Since iommu_update_intcapxt() leverages - * the IOMMU MMIO access to MSI capability block registers - * for MSI address lo/hi/data, we need to check both - * EFR[XtSup] and EFR[MsiCapMmioSup] for x2APIC support. - */ - if ((h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT)) && - (h->efr_reg & BIT(IOMMU_EFR_MSICAPMMIOSUP_SHIFT))) + if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT)) amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE; break; default: @@ -1619,9 +1612,11 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) if (ret) return ret; - ret = amd_iommu_create_irq_domain(iommu); - if (ret) - return ret; + if (amd_iommu_irq_remap) { + ret = amd_iommu_create_irq_domain(iommu); + if (ret) + return ret; + } /* * Make sure IOMMU is not considered to translate itself. The IVRS @@ -1983,98 +1978,190 @@ static int iommu_setup_msi(struct amd_iommu *iommu) return 0; } -#define XT_INT_DEST_MODE(x) (((x) & 0x1ULL) << 2) -#define XT_INT_DEST_LO(x) (((x) & 0xFFFFFFULL) << 8) -#define XT_INT_VEC(x) (((x) & 0xFFULL) << 32) -#define XT_INT_DEST_HI(x) ((((x) >> 24) & 0xFFULL) << 56) +union intcapxt { + u64 capxt; + struct { + u64 reserved_0 : 2, + dest_mode_logical : 1, + reserved_1 : 5, + destid_0_23 : 24, + vector : 8, + reserved_2 : 16, + destid_24_31 : 8; + }; +} __attribute__ ((packed)); /* - * Setup the IntCapXT registers with interrupt routing information - * based on the PCI MSI capability block registers, accessed via - * MMIO MSI address low/hi and MSI data registers. + * There isn't really any need to mask/unmask at the irqchip level because + * the 64-bit INTCAPXT registers can be updated atomically without tearing + * when the affinity is being updated. */ -static void iommu_update_intcapxt(struct amd_iommu *iommu) +static void intcapxt_unmask_irq(struct irq_data *data) { - u64 val; - u32 addr_lo = readl(iommu->mmio_base + MMIO_MSI_ADDR_LO_OFFSET); - u32 addr_hi = readl(iommu->mmio_base + MMIO_MSI_ADDR_HI_OFFSET); - u32 data = readl(iommu->mmio_base + MMIO_MSI_DATA_OFFSET); - bool dm = (addr_lo >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1; - u32 dest = ((addr_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xFF); +} - if (x2apic_enabled()) - dest |= MSI_ADDR_EXT_DEST_ID(addr_hi); +static void intcapxt_mask_irq(struct irq_data *data) +{ +} - val = XT_INT_VEC(data & 0xFF) | - XT_INT_DEST_MODE(dm) | - XT_INT_DEST_LO(dest) | - XT_INT_DEST_HI(dest); +static struct irq_chip intcapxt_controller; + +static int intcapxt_irqdomain_activate(struct irq_domain *domain, + struct irq_data *irqd, bool reserve) +{ + struct amd_iommu *iommu = irqd->chip_data; + struct irq_cfg *cfg = irqd_cfg(irqd); + union intcapxt xt; + + xt.capxt = 0ULL; + xt.dest_mode_logical = apic->dest_mode_logical; + xt.vector = cfg->vector; + xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0); + xt.destid_24_31 = cfg->dest_apicid >> 24; /** * Current IOMMU implemtation uses the same IRQ for all * 3 IOMMU interrupts. */ - writeq(val, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET); - writeq(val, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET); - writeq(val, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET); + writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET); + writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET); + writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET); + return 0; } -static void _irq_notifier_notify(struct irq_affinity_notify *notify, - const cpumask_t *mask) +static void intcapxt_irqdomain_deactivate(struct irq_domain *domain, + struct irq_data *irqd) { - struct amd_iommu *iommu; - - for_each_iommu(iommu) { - if (iommu->dev->irq == notify->irq) { - iommu_update_intcapxt(iommu); - break; - } - } + intcapxt_mask_irq(irqd); } -static void _irq_notifier_release(struct kref *ref) + +static int intcapxt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) { -} + struct irq_alloc_info *info = arg; + int i, ret; -static int iommu_init_intcapxt(struct amd_iommu *iommu) -{ - int ret; - struct irq_affinity_notify *notify = &iommu->intcapxt_notify; + if (!info || info->type != X86_IRQ_ALLOC_TYPE_AMDVI) + return -EINVAL; - /** - * IntCapXT requires XTSup=1 and MsiCapMmioSup=1, - * which can be inferred from amd_iommu_xt_mode. - */ - if (amd_iommu_xt_mode != IRQ_REMAP_X2APIC_MODE) - return 0; - - /** - * Also, we need to setup notifier to update the IntCapXT registers - * whenever the irq affinity is changed from user-space. - */ - notify->irq = iommu->dev->irq; - notify->notify = _irq_notifier_notify, - notify->release = _irq_notifier_release, - ret = irq_set_affinity_notifier(iommu->dev->irq, notify); - if (ret) { - pr_err("Failed to register irq affinity notifier (devid=%#x, irq %d)\n", - iommu->devid, iommu->dev->irq); + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); + if (ret < 0) return ret; + + for (i = virq; i < virq + nr_irqs; i++) { + struct irq_data *irqd = irq_domain_get_irq_data(domain, i); + + irqd->chip = &intcapxt_controller; + irqd->chip_data = info->data; + __irq_set_handler(i, handle_edge_irq, 0, "edge"); } - iommu_update_intcapxt(iommu); - iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN); return ret; } -static int iommu_init_msi(struct amd_iommu *iommu) +static void intcapxt_irqdomain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ + irq_domain_free_irqs_top(domain, virq, nr_irqs); +} + +static int intcapxt_set_affinity(struct irq_data *irqd, + const struct cpumask *mask, bool force) +{ + struct irq_data *parent = irqd->parent_data; + int ret; + + ret = parent->chip->irq_set_affinity(parent, mask, force); + if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) + return ret; + + return intcapxt_irqdomain_activate(irqd->domain, irqd, false); +} + +static struct irq_chip intcapxt_controller = { + .name = "IOMMU-MSI", + .irq_unmask = intcapxt_unmask_irq, + .irq_mask = intcapxt_mask_irq, + .irq_ack = irq_chip_ack_parent, + .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_set_affinity = intcapxt_set_affinity, + .flags = IRQCHIP_SKIP_SET_WAKE, +}; + +static const struct irq_domain_ops intcapxt_domain_ops = { + .alloc = intcapxt_irqdomain_alloc, + .free = intcapxt_irqdomain_free, + .activate = intcapxt_irqdomain_activate, + .deactivate = intcapxt_irqdomain_deactivate, +}; + + +static struct irq_domain *iommu_irqdomain; + +static struct irq_domain *iommu_get_irqdomain(void) +{ + struct fwnode_handle *fn; + + /* No need for locking here (yet) as the init is single-threaded */ + if (iommu_irqdomain) + return iommu_irqdomain; + + fn = irq_domain_alloc_named_fwnode("AMD-Vi-MSI"); + if (!fn) + return NULL; + + iommu_irqdomain = irq_domain_create_hierarchy(x86_vector_domain, 0, 0, + fn, &intcapxt_domain_ops, + NULL); + if (!iommu_irqdomain) + irq_domain_free_fwnode(fn); + + return iommu_irqdomain; +} + +static int iommu_setup_intcapxt(struct amd_iommu *iommu) +{ + struct irq_domain *domain; + struct irq_alloc_info info; + int irq, ret; + + domain = iommu_get_irqdomain(); + if (!domain) + return -ENXIO; + + init_irq_alloc_info(&info, NULL); + info.type = X86_IRQ_ALLOC_TYPE_AMDVI; + info.data = iommu; + + irq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, &info); + if (irq < 0) { + irq_domain_remove(domain); + return irq; + } + + ret = request_threaded_irq(irq, amd_iommu_int_handler, + amd_iommu_int_thread, 0, "AMD-Vi", iommu); + if (ret) { + irq_domain_free_irqs(irq, 1); + irq_domain_remove(domain); + return ret; + } + + iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN); + return 0; +} + +static int iommu_init_irq(struct amd_iommu *iommu) { int ret; if (iommu->int_enabled) goto enable_faults; - if (iommu->dev->msi_cap) + if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) + ret = iommu_setup_intcapxt(iommu); + else if (iommu->dev->msi_cap) ret = iommu_setup_msi(iommu); else ret = -ENODEV; @@ -2083,10 +2170,6 @@ static int iommu_init_msi(struct amd_iommu *iommu) return ret; enable_faults: - ret = iommu_init_intcapxt(iommu); - if (ret) - return ret; - iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); if (iommu->ppr_log != NULL) @@ -2709,7 +2792,7 @@ static int amd_iommu_enable_interrupts(void) int ret = 0; for_each_iommu(iommu) { - ret = iommu_init_msi(iommu); + ret = iommu_init_irq(iommu); if (ret) goto out; } diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index b9cf59443843..463d322a4f3b 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include @@ -3466,7 +3465,7 @@ static void free_irte(u16 devid, int index) } static void irte_prepare(void *entry, - u32 delivery_mode, u32 dest_mode, + u32 delivery_mode, bool dest_mode, u8 vector, u32 dest_apicid, int devid) { union irte *irte = (union irte *) entry; @@ -3480,7 +3479,7 @@ static void irte_prepare(void *entry, } static void irte_ga_prepare(void *entry, - u32 delivery_mode, u32 dest_mode, + u32 delivery_mode, bool dest_mode, u8 vector, u32 dest_apicid, int devid) { struct irte_ga *irte = (struct irte_ga *) entry; @@ -3602,10 +3601,8 @@ static int get_devid(struct irq_alloc_info *info) { switch (info->type) { case X86_IRQ_ALLOC_TYPE_IOAPIC: - case X86_IRQ_ALLOC_TYPE_IOAPIC_GET_PARENT: return get_ioapic_devid(info->devid); case X86_IRQ_ALLOC_TYPE_HPET: - case X86_IRQ_ALLOC_TYPE_HPET_GET_PARENT: return get_hpet_devid(info->devid); case X86_IRQ_ALLOC_TYPE_PCI_MSI: case X86_IRQ_ALLOC_TYPE_PCI_MSIX: @@ -3616,54 +3613,28 @@ static int get_devid(struct irq_alloc_info *info) } } -static struct irq_domain *get_irq_domain_for_devid(struct irq_alloc_info *info, - int devid) -{ - struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; - - if (!iommu) - return NULL; - - switch (info->type) { - case X86_IRQ_ALLOC_TYPE_IOAPIC_GET_PARENT: - case X86_IRQ_ALLOC_TYPE_HPET_GET_PARENT: - return iommu->ir_domain; - default: - WARN_ON_ONCE(1); - return NULL; - } -} - -static struct irq_domain *get_irq_domain(struct irq_alloc_info *info) -{ - int devid; - - if (!info) - return NULL; - - devid = get_devid(info); - if (devid < 0) - return NULL; - return get_irq_domain_for_devid(info, devid); -} - struct irq_remap_ops amd_iommu_irq_ops = { .prepare = amd_iommu_prepare, .enable = amd_iommu_enable, .disable = amd_iommu_disable, .reenable = amd_iommu_reenable, .enable_faulting = amd_iommu_enable_faulting, - .get_irq_domain = get_irq_domain, }; +static void fill_msi_msg(struct msi_msg *msg, u32 index) +{ + msg->data = index; + msg->address_lo = 0; + msg->arch_addr_lo.base_address = X86_MSI_BASE_ADDRESS_LOW; + msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH; +} + static void irq_remapping_prepare_irte(struct amd_ir_data *data, struct irq_cfg *irq_cfg, struct irq_alloc_info *info, int devid, int index, int sub_handle) { struct irq_2_irte *irte_info = &data->irq_2_irte; - struct msi_msg *msg = &data->msi_entry; - struct IO_APIC_route_entry *entry; struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; if (!iommu) @@ -3671,31 +3642,16 @@ static void irq_remapping_prepare_irte(struct amd_ir_data *data, data->irq_2_irte.devid = devid; data->irq_2_irte.index = index + sub_handle; - iommu->irte_ops->prepare(data->entry, apic->irq_delivery_mode, - apic->irq_dest_mode, irq_cfg->vector, + iommu->irte_ops->prepare(data->entry, apic->delivery_mode, + apic->dest_mode_logical, irq_cfg->vector, irq_cfg->dest_apicid, devid); switch (info->type) { case X86_IRQ_ALLOC_TYPE_IOAPIC: - /* Setup IOAPIC entry */ - entry = info->ioapic.entry; - info->ioapic.entry = NULL; - memset(entry, 0, sizeof(*entry)); - entry->vector = index; - entry->mask = 0; - entry->trigger = info->ioapic.trigger; - entry->polarity = info->ioapic.polarity; - /* Mask level triggered irqs. */ - if (info->ioapic.trigger) - entry->mask = 1; - break; - case X86_IRQ_ALLOC_TYPE_HPET: case X86_IRQ_ALLOC_TYPE_PCI_MSI: case X86_IRQ_ALLOC_TYPE_PCI_MSIX: - msg->address_hi = MSI_ADDR_BASE_HI; - msg->address_lo = MSI_ADDR_BASE_LO; - msg->data = irte_info->index; + fill_msi_msg(&data->msi_entry, irte_info->index); break; default: @@ -3892,7 +3848,26 @@ static void irq_remapping_deactivate(struct irq_domain *domain, irte_info->index); } +static int irq_remapping_select(struct irq_domain *d, struct irq_fwspec *fwspec, + enum irq_domain_bus_token bus_token) +{ + struct amd_iommu *iommu; + int devid = -1; + + if (x86_fwspec_is_ioapic(fwspec)) + devid = get_ioapic_devid(fwspec->param[0]); + else if (x86_fwspec_is_hpet(fwspec)) + devid = get_hpet_devid(fwspec->param[0]); + + if (devid < 0) + return 0; + + iommu = amd_iommu_rlookup_table[devid]; + return iommu && iommu->ir_domain == d; +} + static const struct irq_domain_ops amd_ir_domain_ops = { + .select = irq_remapping_select, .alloc = irq_remapping_alloc, .free = irq_remapping_free, .activate = irq_remapping_activate, @@ -3943,8 +3918,8 @@ int amd_iommu_deactivate_guest_mode(void *data) entry->hi.val = 0; entry->lo.fields_remap.valid = valid; - entry->lo.fields_remap.dm = apic->irq_dest_mode; - entry->lo.fields_remap.int_type = apic->irq_delivery_mode; + entry->lo.fields_remap.dm = apic->dest_mode_logical; + entry->lo.fields_remap.int_type = apic->delivery_mode; entry->hi.fields.vector = cfg->vector; entry->lo.fields_remap.destination = APICID_TO_IRTE_DEST_LO(cfg->dest_apicid); diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c index e09e2d734c57..1d21a0b5f724 100644 --- a/drivers/iommu/hyperv-iommu.c +++ b/drivers/iommu/hyperv-iommu.c @@ -40,7 +40,6 @@ static int hyperv_ir_set_affinity(struct irq_data *data, { struct irq_data *parent = data->parent_data; struct irq_cfg *cfg = irqd_cfg(data); - struct IO_APIC_route_entry *entry; int ret; /* Return error If new irq affinity is out of ioapic_max_cpumask. */ @@ -51,9 +50,6 @@ static int hyperv_ir_set_affinity(struct irq_data *data, if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) return ret; - entry = data->chip_data; - entry->dest = cfg->dest_apicid; - entry->vector = cfg->vector; send_cleanup_vector(cfg); return 0; @@ -89,20 +85,6 @@ static int hyperv_irq_remapping_alloc(struct irq_domain *domain, irq_data->chip = &hyperv_ir_chip; - /* - * If there is interrupt remapping function of IOMMU, setting irq - * affinity only needs to change IRTE of IOMMU. But Hyper-V doesn't - * support interrupt remapping function, setting irq affinity of IO-APIC - * interrupts still needs to change IO-APIC registers. But ioapic_ - * configure_entry() will ignore value of cfg->vector and cfg-> - * dest_apicid when IO-APIC's parent irq domain is not the vector - * domain.(See ioapic_configure_entry()) In order to setting vector - * and dest_apicid to IO-APIC register, IO-APIC entry pointer is saved - * in the chip_data and hyperv_irq_remapping_activate()/hyperv_ir_set_ - * affinity() set vector and dest_apicid directly into IO-APIC entry. - */ - irq_data->chip_data = info->ioapic.entry; - /* * Hypver-V IO APIC irq affinity should be in the scope of * ioapic_max_cpumask because no irq remapping support. @@ -119,22 +101,18 @@ static void hyperv_irq_remapping_free(struct irq_domain *domain, irq_domain_free_irqs_common(domain, virq, nr_irqs); } -static int hyperv_irq_remapping_activate(struct irq_domain *domain, - struct irq_data *irq_data, bool reserve) +static int hyperv_irq_remapping_select(struct irq_domain *d, + struct irq_fwspec *fwspec, + enum irq_domain_bus_token bus_token) { - struct irq_cfg *cfg = irqd_cfg(irq_data); - struct IO_APIC_route_entry *entry = irq_data->chip_data; - - entry->dest = cfg->dest_apicid; - entry->vector = cfg->vector; - - return 0; + /* Claim the only I/O APIC emulated by Hyper-V */ + return x86_fwspec_is_ioapic(fwspec); } static const struct irq_domain_ops hyperv_ir_domain_ops = { + .select = hyperv_irq_remapping_select, .alloc = hyperv_irq_remapping_alloc, .free = hyperv_irq_remapping_free, - .activate = hyperv_irq_remapping_activate, }; static int __init hyperv_prepare_irq_remapping(void) @@ -143,6 +121,7 @@ static int __init hyperv_prepare_irq_remapping(void) int i; if (!hypervisor_is_type(X86_HYPER_MS_HYPERV) || + x86_init.hyper.msi_ext_dest_id() || !x2apic_supported()) return -ENODEV; @@ -182,18 +161,9 @@ static int __init hyperv_enable_irq_remapping(void) return IRQ_REMAP_X2APIC_MODE; } -static struct irq_domain *hyperv_get_irq_domain(struct irq_alloc_info *info) -{ - if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC_GET_PARENT) - return ioapic_ir_domain; - else - return NULL; -} - struct irq_remap_ops hyperv_irq_remap_ops = { .prepare = hyperv_prepare_irq_remapping, .enable = hyperv_enable_irq_remapping, - .get_irq_domain = hyperv_get_irq_domain, }; #endif diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index 0cfce1d3b7bb..aeffda92b10b 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -20,7 +20,6 @@ #include #include #include -#include #include "../irq_remapping.h" @@ -204,13 +203,13 @@ static int modify_irte(struct irq_2_iommu *irq_iommu, return rc; } -static struct irq_domain *map_hpet_to_ir(u8 hpet_id) +static struct intel_iommu *map_hpet_to_iommu(u8 hpet_id) { int i; for (i = 0; i < MAX_HPET_TBS; i++) { if (ir_hpet[i].id == hpet_id && ir_hpet[i].iommu) - return ir_hpet[i].iommu->ir_domain; + return ir_hpet[i].iommu; } return NULL; } @@ -226,13 +225,6 @@ static struct intel_iommu *map_ioapic_to_iommu(int apic) return NULL; } -static struct irq_domain *map_ioapic_to_ir(int apic) -{ - struct intel_iommu *iommu = map_ioapic_to_iommu(apic); - - return iommu ? iommu->ir_domain : NULL; -} - static struct irq_domain *map_dev_to_ir(struct pci_dev *dev) { struct dmar_drhd_unit *drhd = dmar_find_matched_drhd_unit(dev); @@ -1113,7 +1105,7 @@ static void prepare_irte(struct irte *irte, int vector, unsigned int dest) memset(irte, 0, sizeof(*irte)); irte->present = 1; - irte->dst_mode = apic->irq_dest_mode; + irte->dst_mode = apic->dest_mode_logical; /* * Trigger mode in the IRTE will always be edge, and for IO-APIC, the * actual level or edge trigger will be setup in the IO-APIC @@ -1122,35 +1114,18 @@ static void prepare_irte(struct irte *irte, int vector, unsigned int dest) * irq migration in the presence of interrupt-remapping. */ irte->trigger_mode = 0; - irte->dlvry_mode = apic->irq_delivery_mode; + irte->dlvry_mode = apic->delivery_mode; irte->vector = vector; irte->dest_id = IRTE_DEST(dest); irte->redir_hint = 1; } -static struct irq_domain *intel_get_irq_domain(struct irq_alloc_info *info) -{ - if (!info) - return NULL; - - switch (info->type) { - case X86_IRQ_ALLOC_TYPE_IOAPIC_GET_PARENT: - return map_ioapic_to_ir(info->devid); - case X86_IRQ_ALLOC_TYPE_HPET_GET_PARENT: - return map_hpet_to_ir(info->devid); - default: - WARN_ON_ONCE(1); - return NULL; - } -} - struct irq_remap_ops intel_irq_remap_ops = { .prepare = intel_prepare_irq_remapping, .enable = intel_enable_irq_remapping, .disable = disable_irq_remapping, .reenable = reenable_irq_remapping, .enable_faulting = enable_drhd_fault_handling, - .get_irq_domain = intel_get_irq_domain, }; static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force) @@ -1260,16 +1235,30 @@ static struct irq_chip intel_ir_chip = { .irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity, }; +static void fill_msi_msg(struct msi_msg *msg, u32 index, u32 subhandle) +{ + memset(msg, 0, sizeof(*msg)); + + msg->arch_addr_lo.dmar_base_address = X86_MSI_BASE_ADDRESS_LOW; + msg->arch_addr_lo.dmar_subhandle_valid = true; + msg->arch_addr_lo.dmar_format = true; + msg->arch_addr_lo.dmar_index_0_14 = index & 0x7FFF; + msg->arch_addr_lo.dmar_index_15 = !!(index & 0x8000); + + msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH; + + msg->arch_data.dmar_subhandle = subhandle; +} + static void intel_irq_remapping_prepare_irte(struct intel_ir_data *data, struct irq_cfg *irq_cfg, struct irq_alloc_info *info, int index, int sub_handle) { - struct IR_IO_APIC_route_entry *entry; struct irte *irte = &data->irte_entry; - struct msi_msg *msg = &data->msi_entry; prepare_irte(irte, irq_cfg->vector, irq_cfg->dest_apicid); + switch (info->type) { case X86_IRQ_ALLOC_TYPE_IOAPIC: /* Set source-id of interrupt request */ @@ -1280,46 +1269,20 @@ static void intel_irq_remapping_prepare_irte(struct intel_ir_data *data, irte->trigger_mode, irte->dlvry_mode, irte->avail, irte->vector, irte->dest_id, irte->sid, irte->sq, irte->svt); - - entry = (struct IR_IO_APIC_route_entry *)info->ioapic.entry; - info->ioapic.entry = NULL; - memset(entry, 0, sizeof(*entry)); - entry->index2 = (index >> 15) & 0x1; - entry->zero = 0; - entry->format = 1; - entry->index = (index & 0x7fff); - /* - * IO-APIC RTE will be configured with virtual vector. - * irq handler will do the explicit EOI to the io-apic. - */ - entry->vector = info->ioapic.pin; - entry->mask = 0; /* enable IRQ */ - entry->trigger = info->ioapic.trigger; - entry->polarity = info->ioapic.polarity; - if (info->ioapic.trigger) - entry->mask = 1; /* Mask level triggered irqs. */ + sub_handle = info->ioapic.pin; break; - case X86_IRQ_ALLOC_TYPE_HPET: + set_hpet_sid(irte, info->devid); + break; case X86_IRQ_ALLOC_TYPE_PCI_MSI: case X86_IRQ_ALLOC_TYPE_PCI_MSIX: - if (info->type == X86_IRQ_ALLOC_TYPE_HPET) - set_hpet_sid(irte, info->devid); - else - set_msi_sid(irte, msi_desc_to_pci_dev(info->desc)); - - msg->address_hi = MSI_ADDR_BASE_HI; - msg->data = sub_handle; - msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT | - MSI_ADDR_IR_SHV | - MSI_ADDR_IR_INDEX1(index) | - MSI_ADDR_IR_INDEX2(index); + set_msi_sid(irte, msi_desc_to_pci_dev(info->desc)); break; - default: BUG_ON(1); break; } + fill_msi_msg(&data->msi_entry, index, sub_handle); } static void intel_free_irq_resources(struct irq_domain *domain, @@ -1444,7 +1407,22 @@ static void intel_irq_remapping_deactivate(struct irq_domain *domain, modify_irte(&data->irq_2_iommu, &entry); } +static int intel_irq_remapping_select(struct irq_domain *d, + struct irq_fwspec *fwspec, + enum irq_domain_bus_token bus_token) +{ + struct intel_iommu *iommu = NULL; + + if (x86_fwspec_is_ioapic(fwspec)) + iommu = map_ioapic_to_iommu(fwspec->param[0]); + else if (x86_fwspec_is_hpet(fwspec)) + iommu = map_hpet_to_iommu(fwspec->param[0]); + + return iommu && d == iommu->ir_domain; +} + static const struct irq_domain_ops intel_ir_domain_ops = { + .select = intel_irq_remapping_select, .alloc = intel_irq_remapping_alloc, .free = intel_irq_remapping_free, .activate = intel_irq_remapping_activate, diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 2d84b1ed205e..83314b9d8f38 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -158,17 +158,3 @@ void panic_if_irq_remap(const char *msg) if (irq_remapping_enabled) panic(msg); } - -/** - * irq_remapping_get_irq_domain - Get the irqdomain serving the request @info - * @info: interrupt allocation information, used to identify the IOMMU device - * - * Returns pointer to IRQ domain, or NULL on failure. - */ -struct irq_domain *irq_remapping_get_irq_domain(struct irq_alloc_info *info) -{ - if (!remap_ops || !remap_ops->get_irq_domain) - return NULL; - - return remap_ops->get_irq_domain(info); -} diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h index 1661b3d75920..8c89cb947cdb 100644 --- a/drivers/iommu/irq_remapping.h +++ b/drivers/iommu/irq_remapping.h @@ -42,9 +42,6 @@ struct irq_remap_ops { /* Enable fault handling */ int (*enable_faulting)(void); - - /* Get the irqdomain associated to IOMMU device */ - struct irq_domain *(*get_irq_domain)(struct irq_alloc_info *); }; extern struct irq_remap_ops intel_irq_remap_ops; diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 03ed5cb1c4b2..6db8d96a78eb 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -1226,7 +1226,7 @@ static void hv_irq_unmask(struct irq_data *data) params->int_target.vector = cfg->vector; /* - * Honoring apic->irq_delivery_mode set to dest_Fixed by + * Honoring apic->delivery_mode set to APIC_DELIVERY_MODE_FIXED by * setting the HV_DEVICE_INTERRUPT_TARGET_MULTICAST flag results in a * spurious interrupt storm. Not doing so does not seem to have a * negative effect (yet?). @@ -1324,7 +1324,7 @@ static u32 hv_compose_msi_req_v1( int_pkt->wslot.slot = slot; int_pkt->int_desc.vector = vector; int_pkt->int_desc.vector_count = 1; - int_pkt->int_desc.delivery_mode = dest_Fixed; + int_pkt->int_desc.delivery_mode = APIC_DELIVERY_MODE_FIXED; /* * Create MSI w/ dummy vCPU set, overwritten by subsequent retarget in @@ -1345,7 +1345,7 @@ static u32 hv_compose_msi_req_v2( int_pkt->wslot.slot = slot; int_pkt->int_desc.vector = vector; int_pkt->int_desc.vector_count = 1; - int_pkt->int_desc.delivery_mode = dest_Fixed; + int_pkt->int_desc.delivery_mode = APIC_DELIVERY_MODE_FIXED; /* * Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c index f375c21ceeb1..6f8795454e5a 100644 --- a/drivers/pci/controller/vmd.c +++ b/drivers/pci/controller/vmd.c @@ -18,7 +18,6 @@ #include #include #include -#include #define VMD_CFGBAR 0 #define VMD_MEMBAR1 2 @@ -131,10 +130,10 @@ static void vmd_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) struct vmd_irq_list *irq = vmdirq->irq; struct vmd_dev *vmd = irq_data_get_irq_handler_data(data); - msg->address_hi = MSI_ADDR_BASE_HI; - msg->address_lo = MSI_ADDR_BASE_LO | - MSI_ADDR_DEST_ID(index_from_irqs(vmd, irq)); - msg->data = 0; + memset(msg, 0, sizeof(*msg)); + msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH; + msg->arch_addr_lo.base_address = X86_MSI_BASE_ADDRESS_LOW; + msg->arch_addr_lo.destid_0_7 = index_from_irqs(vmd, irq); } /* diff --git a/fs/aio.c b/fs/aio.c index 6a21d8919409..76ce0cc3ee4e 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -43,7 +43,6 @@ #include #include -#include #include #include diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0b29bdb25105..d13006588647 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index e78bbb9a07e9..4365b9aa3e3f 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -30,7 +30,7 @@ mandatory-y += irq.h mandatory-y += irq_regs.h mandatory-y += irq_work.h mandatory-y += kdebug.h -mandatory-y += kmap_types.h +mandatory-y += kmap_size.h mandatory-y += kprobes.h mandatory-y += linkage.h mandatory-y += local.h diff --git a/include/asm-generic/kmap_size.h b/include/asm-generic/kmap_size.h new file mode 100644 index 000000000000..6e36b2443ece --- /dev/null +++ b/include/asm-generic/kmap_size.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_GENERIC_KMAP_SIZE_H +#define _ASM_GENERIC_KMAP_SIZE_H + +/* For debug this provides guard pages between the maps */ +#ifdef CONFIG_DEBUG_KMAP_LOCAL +# define KM_MAX_IDX 33 +#else +# define KM_MAX_IDX 16 +#endif + +#endif diff --git a/include/asm-generic/kmap_types.h b/include/asm-generic/kmap_types.h deleted file mode 100644 index 9f95b7b63d19..000000000000 --- a/include/asm-generic/kmap_types.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_GENERIC_KMAP_TYPES_H -#define _ASM_GENERIC_KMAP_TYPES_H - -#ifdef __WITH_KM_FENCE -# define KM_TYPE_NR 41 -#else -# define KM_TYPE_NR 20 -#endif - -#endif diff --git a/include/asm-generic/msi.h b/include/asm-generic/msi.h index e6795f088bdd..25344de0e8f9 100644 --- a/include/asm-generic/msi.h +++ b/include/asm-generic/msi.h @@ -4,6 +4,8 @@ #include +#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN + #ifndef NUM_MSI_ALLOC_SCRATCHPAD_REGS # define NUM_MSI_ALLOC_SCRATCHPAD_REGS 2 #endif @@ -30,4 +32,6 @@ typedef struct msi_alloc_info { #define GENERIC_MSI_DOMAIN_OPS 1 +#endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */ + #endif diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h new file mode 100644 index 000000000000..1bbe96dc8be6 --- /dev/null +++ b/include/linux/highmem-internal.h @@ -0,0 +1,232 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_HIGHMEM_INTERNAL_H +#define _LINUX_HIGHMEM_INTERNAL_H + +/* + * Outside of CONFIG_HIGHMEM to support X86 32bit iomap_atomic() cruft. + */ +#ifdef CONFIG_KMAP_LOCAL +void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot); +void *__kmap_local_page_prot(struct page *page, pgprot_t prot); +void kunmap_local_indexed(void *vaddr); +void kmap_local_fork(struct task_struct *tsk); +void __kmap_local_sched_out(void); +void __kmap_local_sched_in(void); +static inline void kmap_assert_nomap(void) +{ + DEBUG_LOCKS_WARN_ON(current->kmap_ctrl.idx); +} +#else +static inline void kmap_local_fork(struct task_struct *tsk) { } +static inline void kmap_assert_nomap(void) { } +#endif + +#ifdef CONFIG_HIGHMEM +#include + +#ifndef ARCH_HAS_KMAP_FLUSH_TLB +static inline void kmap_flush_tlb(unsigned long addr) { } +#endif + +#ifndef kmap_prot +#define kmap_prot PAGE_KERNEL +#endif + +void *kmap_high(struct page *page); +void kunmap_high(struct page *page); +void __kmap_flush_unused(void); +struct page *__kmap_to_page(void *addr); + +static inline void *kmap(struct page *page) +{ + void *addr; + + might_sleep(); + if (!PageHighMem(page)) + addr = page_address(page); + else + addr = kmap_high(page); + kmap_flush_tlb((unsigned long)addr); + return addr; +} + +static inline void kunmap(struct page *page) +{ + might_sleep(); + if (!PageHighMem(page)) + return; + kunmap_high(page); +} + +static inline struct page *kmap_to_page(void *addr) +{ + return __kmap_to_page(addr); +} + +static inline void kmap_flush_unused(void) +{ + __kmap_flush_unused(); +} + +static inline void *kmap_local_page(struct page *page) +{ + return __kmap_local_page_prot(page, kmap_prot); +} + +static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot) +{ + return __kmap_local_page_prot(page, prot); +} + +static inline void *kmap_local_pfn(unsigned long pfn) +{ + return __kmap_local_pfn_prot(pfn, kmap_prot); +} + +static inline void __kunmap_local(void *vaddr) +{ + kunmap_local_indexed(vaddr); +} + +static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) +{ + preempt_disable(); + pagefault_disable(); + return __kmap_local_page_prot(page, prot); +} + +static inline void *kmap_atomic(struct page *page) +{ + return kmap_atomic_prot(page, kmap_prot); +} + +static inline void *kmap_atomic_pfn(unsigned long pfn) +{ + preempt_disable(); + pagefault_disable(); + return __kmap_local_pfn_prot(pfn, kmap_prot); +} + +static inline void __kunmap_atomic(void *addr) +{ + kunmap_local_indexed(addr); + pagefault_enable(); + preempt_enable(); +} + +unsigned int __nr_free_highpages(void); +extern atomic_long_t _totalhigh_pages; + +static inline unsigned int nr_free_highpages(void) +{ + return __nr_free_highpages(); +} + +static inline unsigned long totalhigh_pages(void) +{ + return (unsigned long)atomic_long_read(&_totalhigh_pages); +} + +static inline void totalhigh_pages_inc(void) +{ + atomic_long_inc(&_totalhigh_pages); +} + +static inline void totalhigh_pages_add(long count) +{ + atomic_long_add(count, &_totalhigh_pages); +} + +#else /* CONFIG_HIGHMEM */ + +static inline struct page *kmap_to_page(void *addr) +{ + return virt_to_page(addr); +} + +static inline void *kmap(struct page *page) +{ + might_sleep(); + return page_address(page); +} + +static inline void kunmap_high(struct page *page) { } +static inline void kmap_flush_unused(void) { } + +static inline void kunmap(struct page *page) +{ +#ifdef ARCH_HAS_FLUSH_ON_KUNMAP + kunmap_flush_on_unmap(page_address(page)); +#endif +} + +static inline void *kmap_local_page(struct page *page) +{ + return page_address(page); +} + +static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot) +{ + return kmap_local_page(page); +} + +static inline void *kmap_local_pfn(unsigned long pfn) +{ + return kmap_local_page(pfn_to_page(pfn)); +} + +static inline void __kunmap_local(void *addr) +{ +#ifdef ARCH_HAS_FLUSH_ON_KUNMAP + kunmap_flush_on_unmap(addr); +#endif +} + +static inline void *kmap_atomic(struct page *page) +{ + preempt_disable(); + pagefault_disable(); + return page_address(page); +} + +static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) +{ + return kmap_atomic(page); +} + +static inline void *kmap_atomic_pfn(unsigned long pfn) +{ + return kmap_atomic(pfn_to_page(pfn)); +} + +static inline void __kunmap_atomic(void *addr) +{ +#ifdef ARCH_HAS_FLUSH_ON_KUNMAP + kunmap_flush_on_unmap(addr); +#endif + pagefault_enable(); + preempt_enable(); +} + +static inline unsigned int nr_free_highpages(void) { return 0; } +static inline unsigned long totalhigh_pages(void) { return 0UL; } + +#endif /* CONFIG_HIGHMEM */ + +/* + * Prevent people trying to call kunmap_atomic() as if it were kunmap() + * kunmap_atomic() should get the return value of kmap_atomic, not the page. + */ +#define kunmap_atomic(__addr) \ +do { \ + BUILD_BUG_ON(__same_type((__addr), struct page *)); \ + __kunmap_atomic(__addr); \ +} while (0) + +#define kunmap_local(__addr) \ +do { \ + BUILD_BUG_ON(__same_type((__addr), struct page *)); \ + __kunmap_local(__addr); \ +} while (0) + +#endif diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 35f052babdba..3728b13c6ca6 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -11,6 +11,119 @@ #include +#include "highmem-internal.h" + +/** + * kmap - Map a page for long term usage + * @page: Pointer to the page to be mapped + * + * Returns: The virtual address of the mapping + * + * Can only be invoked from preemptible task context because on 32bit + * systems with CONFIG_HIGHMEM enabled this function might sleep. + * + * For systems with CONFIG_HIGHMEM=n and for pages in the low memory area + * this returns the virtual address of the direct kernel mapping. + * + * The returned virtual address is globally visible and valid up to the + * point where it is unmapped via kunmap(). The pointer can be handed to + * other contexts. + * + * For highmem pages on 32bit systems this can be slow as the mapping space + * is limited and protected by a global lock. In case that there is no + * mapping slot available the function blocks until a slot is released via + * kunmap(). + */ +static inline void *kmap(struct page *page); + +/** + * kunmap - Unmap the virtual address mapped by kmap() + * @addr: Virtual address to be unmapped + * + * Counterpart to kmap(). A NOOP for CONFIG_HIGHMEM=n and for mappings of + * pages in the low memory area. + */ +static inline void kunmap(struct page *page); + +/** + * kmap_to_page - Get the page for a kmap'ed address + * @addr: The address to look up + * + * Returns: The page which is mapped to @addr. + */ +static inline struct page *kmap_to_page(void *addr); + +/** + * kmap_flush_unused - Flush all unused kmap mappings in order to + * remove stray mappings + */ +static inline void kmap_flush_unused(void); + +/** + * kmap_local_page - Map a page for temporary usage + * @page: Pointer to the page to be mapped + * + * Returns: The virtual address of the mapping + * + * Can be invoked from any context. + * + * Requires careful handling when nesting multiple mappings because the map + * management is stack based. The unmap has to be in the reverse order of + * the map operation: + * + * addr1 = kmap_local_page(page1); + * addr2 = kmap_local_page(page2); + * ... + * kunmap_local(addr2); + * kunmap_local(addr1); + * + * Unmapping addr1 before addr2 is invalid and causes malfunction. + * + * Contrary to kmap() mappings the mapping is only valid in the context of + * the caller and cannot be handed to other contexts. + * + * On CONFIG_HIGHMEM=n kernels and for low memory pages this returns the + * virtual address of the direct mapping. Only real highmem pages are + * temporarily mapped. + * + * While it is significantly faster than kmap() for the higmem case it + * comes with restrictions about the pointer validity. Only use when really + * necessary. + * + * On HIGHMEM enabled systems mapping a highmem page has the side effect of + * disabling migration in order to keep the virtual address stable across + * preemption. No caller of kmap_local_page() can rely on this side effect. + */ +static inline void *kmap_local_page(struct page *page); + +/** + * kmap_atomic - Atomically map a page for temporary usage - Deprecated! + * @page: Pointer to the page to be mapped + * + * Returns: The virtual address of the mapping + * + * Effectively a wrapper around kmap_local_page() which disables pagefaults + * and preemption. + * + * Do not use in new code. Use kmap_local_page() instead. + */ +static inline void *kmap_atomic(struct page *page); + +/** + * kunmap_atomic - Unmap the virtual address mapped by kmap_atomic() + * @addr: Virtual address to be unmapped + * + * Counterpart to kmap_atomic(). + * + * Effectively a wrapper around kunmap_local() which additionally undoes + * the side effects of kmap_atomic(), i.e. reenabling pagefaults and + * preemption. + */ + +/* Highmem related interfaces for management code */ +static inline unsigned int nr_free_highpages(void); +static inline unsigned long totalhigh_pages(void); + #ifndef ARCH_HAS_FLUSH_ANON_PAGE static inline void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr) { @@ -29,199 +142,6 @@ static inline void invalidate_kernel_vmap_range(void *vaddr, int size) } #endif -#include - -#ifdef CONFIG_HIGHMEM -extern void *kmap_atomic_high_prot(struct page *page, pgprot_t prot); -extern void kunmap_atomic_high(void *kvaddr); -#include - -#ifndef ARCH_HAS_KMAP_FLUSH_TLB -static inline void kmap_flush_tlb(unsigned long addr) { } -#endif - -#ifndef kmap_prot -#define kmap_prot PAGE_KERNEL -#endif - -void *kmap_high(struct page *page); -static inline void *kmap(struct page *page) -{ - void *addr; - - might_sleep(); - if (!PageHighMem(page)) - addr = page_address(page); - else - addr = kmap_high(page); - kmap_flush_tlb((unsigned long)addr); - return addr; -} - -void kunmap_high(struct page *page); - -static inline void kunmap(struct page *page) -{ - might_sleep(); - if (!PageHighMem(page)) - return; - kunmap_high(page); -} - -/* - * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because - * no global lock is needed and because the kmap code must perform a global TLB - * invalidation when the kmap pool wraps. - * - * However when holding an atomic kmap it is not legal to sleep, so atomic - * kmaps are appropriate for short, tight code paths only. - * - * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap - * gives a more generic (and caching) interface. But kmap_atomic can - * be used in IRQ contexts, so in some (very limited) cases we need - * it. - */ -static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) -{ - preempt_disable(); - pagefault_disable(); - if (!PageHighMem(page)) - return page_address(page); - return kmap_atomic_high_prot(page, prot); -} -#define kmap_atomic(page) kmap_atomic_prot(page, kmap_prot) - -/* declarations for linux/mm/highmem.c */ -unsigned int nr_free_highpages(void); -extern atomic_long_t _totalhigh_pages; -static inline unsigned long totalhigh_pages(void) -{ - return (unsigned long)atomic_long_read(&_totalhigh_pages); -} - -static inline void totalhigh_pages_inc(void) -{ - atomic_long_inc(&_totalhigh_pages); -} - -static inline void totalhigh_pages_dec(void) -{ - atomic_long_dec(&_totalhigh_pages); -} - -static inline void totalhigh_pages_add(long count) -{ - atomic_long_add(count, &_totalhigh_pages); -} - -static inline void totalhigh_pages_set(long val) -{ - atomic_long_set(&_totalhigh_pages, val); -} - -void kmap_flush_unused(void); - -struct page *kmap_to_page(void *addr); - -#else /* CONFIG_HIGHMEM */ - -static inline unsigned int nr_free_highpages(void) { return 0; } - -static inline struct page *kmap_to_page(void *addr) -{ - return virt_to_page(addr); -} - -static inline unsigned long totalhigh_pages(void) { return 0UL; } - -static inline void *kmap(struct page *page) -{ - might_sleep(); - return page_address(page); -} - -static inline void kunmap_high(struct page *page) -{ -} - -static inline void kunmap(struct page *page) -{ -#ifdef ARCH_HAS_FLUSH_ON_KUNMAP - kunmap_flush_on_unmap(page_address(page)); -#endif -} - -static inline void *kmap_atomic(struct page *page) -{ - preempt_disable(); - pagefault_disable(); - return page_address(page); -} -#define kmap_atomic_prot(page, prot) kmap_atomic(page) - -static inline void kunmap_atomic_high(void *addr) -{ - /* - * Mostly nothing to do in the CONFIG_HIGHMEM=n case as kunmap_atomic() - * handles re-enabling faults + preemption - */ -#ifdef ARCH_HAS_FLUSH_ON_KUNMAP - kunmap_flush_on_unmap(addr); -#endif -} - -#define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn)) - -#define kmap_flush_unused() do {} while(0) - -#endif /* CONFIG_HIGHMEM */ - -#if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) - -DECLARE_PER_CPU(int, __kmap_atomic_idx); - -static inline int kmap_atomic_idx_push(void) -{ - int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1; - -#ifdef CONFIG_DEBUG_HIGHMEM - WARN_ON_ONCE(in_irq() && !irqs_disabled()); - BUG_ON(idx >= KM_TYPE_NR); -#endif - return idx; -} - -static inline int kmap_atomic_idx(void) -{ - return __this_cpu_read(__kmap_atomic_idx) - 1; -} - -static inline void kmap_atomic_idx_pop(void) -{ -#ifdef CONFIG_DEBUG_HIGHMEM - int idx = __this_cpu_dec_return(__kmap_atomic_idx); - - BUG_ON(idx < 0); -#else - __this_cpu_dec(__kmap_atomic_idx); -#endif -} - -#endif - -/* - * Prevent people trying to call kunmap_atomic() as if it were kunmap() - * kunmap_atomic() should get the return value of kmap_atomic, not the page. - */ -#define kunmap_atomic(addr) \ -do { \ - BUILD_BUG_ON(__same_type((addr), struct page *)); \ - kunmap_atomic_high(addr); \ - pagefault_enable(); \ - preempt_enable(); \ -} while (0) - - /* when CONFIG_HIGHMEM is not set these will be plain clear/copy_page */ #ifndef clear_user_highpage static inline void clear_user_highpage(struct page *page, unsigned long vaddr) diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index c75e4d3d8833..c093e81310a9 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -69,13 +69,32 @@ io_mapping_map_atomic_wc(struct io_mapping *mapping, BUG_ON(offset >= mapping->size); phys_addr = mapping->base + offset; - return iomap_atomic_prot_pfn(PHYS_PFN(phys_addr), mapping->prot); + preempt_disable(); + pagefault_disable(); + return __iomap_local_pfn_prot(PHYS_PFN(phys_addr), mapping->prot); } static inline void io_mapping_unmap_atomic(void __iomem *vaddr) { - iounmap_atomic(vaddr); + kunmap_local_indexed((void __force *)vaddr); + pagefault_enable(); + preempt_enable(); +} + +static inline void __iomem * +io_mapping_map_local_wc(struct io_mapping *mapping, unsigned long offset) +{ + resource_size_t phys_addr; + + BUG_ON(offset >= mapping->size); + phys_addr = mapping->base + offset; + return __iomap_local_pfn_prot(PHYS_PFN(phys_addr), mapping->prot); +} + +static inline void io_mapping_unmap_local(void __iomem *vaddr) +{ + kunmap_local_indexed((void __force *)vaddr); } static inline void __iomem * @@ -97,7 +116,7 @@ io_mapping_unmap(void __iomem *vaddr) iounmap(vaddr); } -#else +#else /* HAVE_ATOMIC_IOMAP */ #include @@ -162,7 +181,18 @@ io_mapping_unmap_atomic(void __iomem *vaddr) preempt_enable(); } -#endif /* HAVE_ATOMIC_IOMAP */ +static inline void __iomem * +io_mapping_map_local_wc(struct io_mapping *mapping, unsigned long offset) +{ + return io_mapping_map_wc(mapping, offset, PAGE_SIZE); +} + +static inline void io_mapping_unmap_local(void __iomem *vaddr) +{ + io_mapping_unmap(vaddr); +} + +#endif /* !HAVE_ATOMIC_IOMAP */ static inline struct io_mapping * io_mapping_create_wc(resource_size_t base, diff --git a/include/linux/msi.h b/include/linux/msi.h index 6b584cc4757c..360a0a7e7341 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -4,11 +4,50 @@ #include #include +#include +/* Dummy shadow structures if an architecture does not define them */ +#ifndef arch_msi_msg_addr_lo +typedef struct arch_msi_msg_addr_lo { + u32 address_lo; +} __attribute__ ((packed)) arch_msi_msg_addr_lo_t; +#endif + +#ifndef arch_msi_msg_addr_hi +typedef struct arch_msi_msg_addr_hi { + u32 address_hi; +} __attribute__ ((packed)) arch_msi_msg_addr_hi_t; +#endif + +#ifndef arch_msi_msg_data +typedef struct arch_msi_msg_data { + u32 data; +} __attribute__ ((packed)) arch_msi_msg_data_t; +#endif + +/** + * msi_msg - Representation of a MSI message + * @address_lo: Low 32 bits of msi message address + * @arch_addrlo: Architecture specific shadow of @address_lo + * @address_hi: High 32 bits of msi message address + * (only used when device supports it) + * @arch_addrhi: Architecture specific shadow of @address_hi + * @data: MSI message data (usually 16 bits) + * @arch_data: Architecture specific shadow of @data + */ struct msi_msg { - u32 address_lo; /* low 32 bits of msi message address */ - u32 address_hi; /* high 32 bits of msi message address */ - u32 data; /* 16 bits of msi message data */ + union { + u32 address_lo; + arch_msi_msg_addr_lo_t arch_addr_lo; + }; + union { + u32 address_hi; + arch_msi_msg_addr_hi_t arch_addr_hi; + }; + union { + u32 data; + arch_msi_msg_data_t arch_data; + }; }; extern int pci_msi_ignore_mask; @@ -243,7 +282,6 @@ struct msi_controller { #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN #include -#include struct irq_domain; struct irq_domain_ops; diff --git a/include/linux/sched.h b/include/linux/sched.h index 1911aad24014..b6c3da92107d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -36,6 +36,7 @@ #include #include #include +#include /* task_struct member predeclarations (sorted alphabetically): */ struct audit_context; @@ -639,6 +640,13 @@ struct wake_q_node { struct wake_q_node *next; }; +struct kmap_ctrl { +#ifdef CONFIG_KMAP_LOCAL + int idx; + pte_t pteval[KM_MAX_IDX]; +#endif +}; + struct task_struct { #ifdef CONFIG_THREAD_INFO_IN_TASK /* @@ -1324,6 +1332,7 @@ struct task_struct { unsigned int sequential_io; unsigned int sequential_io_avg; #endif + struct kmap_ctrl kmap_ctrl; #ifdef CONFIG_DEBUG_ATOMIC_SLEEP unsigned long task_state_change; #endif diff --git a/kernel/entry/common.c b/kernel/entry/common.c index d6b73937dab3..378341642f94 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -203,6 +204,7 @@ static void exit_to_user_mode_prepare(struct pt_regs *regs) /* Ensure that the address limit is intact and no locks are held */ addr_limit_user_check(); + kmap_assert_nomap(); lockdep_assert_irqs_disabled(); lockdep_sys_exit(); } diff --git a/kernel/fork.c b/kernel/fork.c index 4ab709884d11..eca1bd81db41 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -934,6 +934,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) account_kernel_stack(tsk, 1); kcov_task_init(tsk); + kmap_local_fork(tsk); #ifdef CONFIG_FAULT_INJECTION tsk->fail_nth = 0; diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index e4ca69608f3b..e1c0ee725ab7 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -42,7 +42,16 @@ static inline void debugfs_add_domain_dir(struct irq_domain *d) { } static inline void debugfs_remove_domain_dir(struct irq_domain *d) { } #endif -const struct fwnode_operations irqchip_fwnode_ops; +static const char *irqchip_fwnode_get_name(const struct fwnode_handle *fwnode) +{ + struct irqchip_fwid *fwid = container_of(fwnode, struct irqchip_fwid, fwnode); + + return fwid->name; +} + +const struct fwnode_operations irqchip_fwnode_ops = { + .get_name = irqchip_fwnode_get_name, +}; EXPORT_SYMBOL_GPL(irqchip_fwnode_ops); /** diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0fa30ec4baf1..0fac0c8ad8a6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4138,6 +4138,22 @@ static inline void finish_lock_switch(struct rq *rq) # define finish_arch_post_lock_switch() do { } while (0) #endif +static inline void kmap_local_sched_out(void) +{ +#ifdef CONFIG_KMAP_LOCAL + if (unlikely(current->kmap_ctrl.idx)) + __kmap_local_sched_out(); +#endif +} + +static inline void kmap_local_sched_in(void) +{ +#ifdef CONFIG_KMAP_LOCAL + if (unlikely(current->kmap_ctrl.idx)) + __kmap_local_sched_in(); +#endif +} + /** * prepare_task_switch - prepare to switch tasks * @rq: the runqueue preparing to switch @@ -4160,6 +4176,7 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev, perf_event_task_sched_out(prev, next); rseq_preempt(prev); fire_sched_out_preempt_notifiers(prev, next); + kmap_local_sched_out(); prepare_task(next); prepare_arch_switch(next); } @@ -4226,6 +4243,14 @@ static struct rq *finish_task_switch(struct task_struct *prev) finish_lock_switch(rq); finish_arch_post_lock_switch(); kcov_finish_switch(current); + /* + * kmap_local_sched_out() is invoked with rq::lock held and + * interrupts disabled. There is no requirement for that, but the + * sched out code does not have an interrupt enabled section. + * Restoring the maps on sched in does not require interrupts being + * disabled either. + */ + kmap_local_sched_in(); fire_sched_in_preempt_notifiers(current); /* diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c789b39ed527..e952f892f047 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -849,9 +849,31 @@ config DEBUG_PER_CPU_MAPS Say N if unsure. +config DEBUG_KMAP_LOCAL + bool "Debug kmap_local temporary mappings" + depends on DEBUG_KERNEL && KMAP_LOCAL + help + This option enables additional error checking for the kmap_local + infrastructure. Disable for production use. + +config ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP + bool + +config DEBUG_KMAP_LOCAL_FORCE_MAP + bool "Enforce kmap_local temporary mappings" + depends on DEBUG_KERNEL && ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP + select KMAP_LOCAL + select DEBUG_KMAP_LOCAL + help + This option enforces temporary mappings through the kmap_local + mechanism for non-highmem pages and on non-highmem systems. + Disable this for production systems! + config DEBUG_HIGHMEM bool "Highmem debugging" depends on DEBUG_KERNEL && HIGHMEM + select DEBUG_KMAP_LOCAL_FORCE_MAP if ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP + select DEBUG_KMAP_LOCAL help This option enables additional error checking for high memory systems. Disable for production systems. diff --git a/mm/Kconfig b/mm/Kconfig index 390165ffbb0f..8c49d09da214 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -859,4 +859,7 @@ config ARCH_HAS_HUGEPD config MAPPING_DIRTY_HELPERS bool +config KMAP_LOCAL + bool + endmenu diff --git a/mm/highmem.c b/mm/highmem.c index 1352a27951e3..83f9660f168f 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -31,10 +31,6 @@ #include #include -#if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) -DEFINE_PER_CPU(int, __kmap_atomic_idx); -#endif - /* * Virtual_count is not a pure "count". * 0 means that it is not mapped, and has not been mapped @@ -108,9 +104,7 @@ static inline wait_queue_head_t *get_pkmap_wait_queue_head(unsigned int color) atomic_long_t _totalhigh_pages __read_mostly; EXPORT_SYMBOL(_totalhigh_pages); -EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx); - -unsigned int nr_free_highpages (void) +unsigned int __nr_free_highpages (void) { struct zone *zone; unsigned int pages = 0; @@ -147,7 +141,7 @@ pte_t * pkmap_page_table; do { spin_unlock(&kmap_lock); (void)(flags); } while (0) #endif -struct page *kmap_to_page(void *vaddr) +struct page *__kmap_to_page(void *vaddr) { unsigned long addr = (unsigned long)vaddr; @@ -158,7 +152,7 @@ struct page *kmap_to_page(void *vaddr) return virt_to_page(addr); } -EXPORT_SYMBOL(kmap_to_page); +EXPORT_SYMBOL(__kmap_to_page); static void flush_all_zero_pkmaps(void) { @@ -200,10 +194,7 @@ static void flush_all_zero_pkmaps(void) flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP)); } -/** - * kmap_flush_unused - flush all unused kmap mappings in order to remove stray mappings - */ -void kmap_flush_unused(void) +void __kmap_flush_unused(void) { lock_kmap(); flush_all_zero_pkmaps(); @@ -367,9 +358,260 @@ void kunmap_high(struct page *page) if (need_wakeup) wake_up(pkmap_map_wait); } - EXPORT_SYMBOL(kunmap_high); -#endif /* CONFIG_HIGHMEM */ +#endif /* CONFIG_HIGHMEM */ + +#ifdef CONFIG_KMAP_LOCAL + +#include + +/* + * With DEBUG_KMAP_LOCAL the stack depth is doubled and every second + * slot is unused which acts as a guard page + */ +#ifdef CONFIG_DEBUG_KMAP_LOCAL +# define KM_INCR 2 +#else +# define KM_INCR 1 +#endif + +static inline int kmap_local_idx_push(void) +{ + WARN_ON_ONCE(in_irq() && !irqs_disabled()); + current->kmap_ctrl.idx += KM_INCR; + BUG_ON(current->kmap_ctrl.idx >= KM_MAX_IDX); + return current->kmap_ctrl.idx - 1; +} + +static inline int kmap_local_idx(void) +{ + return current->kmap_ctrl.idx - 1; +} + +static inline void kmap_local_idx_pop(void) +{ + current->kmap_ctrl.idx -= KM_INCR; + BUG_ON(current->kmap_ctrl.idx < 0); +} + +#ifndef arch_kmap_local_post_map +# define arch_kmap_local_post_map(vaddr, pteval) do { } while (0) +#endif + +#ifndef arch_kmap_local_pre_unmap +# define arch_kmap_local_pre_unmap(vaddr) do { } while (0) +#endif + +#ifndef arch_kmap_local_post_unmap +# define arch_kmap_local_post_unmap(vaddr) do { } while (0) +#endif + +#ifndef arch_kmap_local_map_idx +#define arch_kmap_local_map_idx(idx, pfn) kmap_local_calc_idx(idx) +#endif + +#ifndef arch_kmap_local_unmap_idx +#define arch_kmap_local_unmap_idx(idx, vaddr) kmap_local_calc_idx(idx) +#endif + +#ifndef arch_kmap_local_high_get +static inline void *arch_kmap_local_high_get(struct page *page) +{ + return NULL; +} +#endif + +/* Unmap a local mapping which was obtained by kmap_high_get() */ +static inline bool kmap_high_unmap_local(unsigned long vaddr) +{ +#ifdef ARCH_NEEDS_KMAP_HIGH_GET + if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) { + kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)])); + return true; + } +#endif + return false; +} + +static inline int kmap_local_calc_idx(int idx) +{ + return idx + KM_MAX_IDX * smp_processor_id(); +} + +static pte_t *__kmap_pte; + +static pte_t *kmap_get_pte(void) +{ + if (!__kmap_pte) + __kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN)); + return __kmap_pte; +} + +void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot) +{ + pte_t pteval, *kmap_pte = kmap_get_pte(); + unsigned long vaddr; + int idx; + + /* + * Disable migration so resulting virtual address is stable + * accross preemption. + */ + migrate_disable(); + preempt_disable(); + idx = arch_kmap_local_map_idx(kmap_local_idx_push(), pfn); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + BUG_ON(!pte_none(*(kmap_pte - idx))); + pteval = pfn_pte(pfn, prot); + set_pte_at(&init_mm, vaddr, kmap_pte - idx, pteval); + arch_kmap_local_post_map(vaddr, pteval); + current->kmap_ctrl.pteval[kmap_local_idx()] = pteval; + preempt_enable(); + + return (void *)vaddr; +} +EXPORT_SYMBOL_GPL(__kmap_local_pfn_prot); + +void *__kmap_local_page_prot(struct page *page, pgprot_t prot) +{ + void *kmap; + + /* + * To broaden the usage of the actual kmap_local() machinery always map + * pages when debugging is enabled and the architecture has no problems + * with alias mappings. + */ + if (!IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP) && !PageHighMem(page)) + return page_address(page); + + /* Try kmap_high_get() if architecture has it enabled */ + kmap = arch_kmap_local_high_get(page); + if (kmap) + return kmap; + + return __kmap_local_pfn_prot(page_to_pfn(page), prot); +} +EXPORT_SYMBOL(__kmap_local_page_prot); + +void kunmap_local_indexed(void *vaddr) +{ + unsigned long addr = (unsigned long) vaddr & PAGE_MASK; + pte_t *kmap_pte = kmap_get_pte(); + int idx; + + if (addr < __fix_to_virt(FIX_KMAP_END) || + addr > __fix_to_virt(FIX_KMAP_BEGIN)) { + if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP)) { + /* This _should_ never happen! See above. */ + WARN_ON_ONCE(1); + return; + } + /* + * Handle mappings which were obtained by kmap_high_get() + * first as the virtual address of such mappings is below + * PAGE_OFFSET. Warn for all other addresses which are in + * the user space part of the virtual address space. + */ + if (!kmap_high_unmap_local(addr)) + WARN_ON_ONCE(addr < PAGE_OFFSET); + return; + } + + preempt_disable(); + idx = arch_kmap_local_unmap_idx(kmap_local_idx(), addr); + WARN_ON_ONCE(addr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); + + arch_kmap_local_pre_unmap(addr); + pte_clear(&init_mm, addr, kmap_pte - idx); + arch_kmap_local_post_unmap(addr); + current->kmap_ctrl.pteval[kmap_local_idx()] = __pte(0); + kmap_local_idx_pop(); + preempt_enable(); + migrate_enable(); +} +EXPORT_SYMBOL(kunmap_local_indexed); + +/* + * Invoked before switch_to(). This is safe even when during or after + * clearing the maps an interrupt which needs a kmap_local happens because + * the task::kmap_ctrl.idx is not modified by the unmapping code so a + * nested kmap_local will use the next unused index and restore the index + * on unmap. The already cleared kmaps of the outgoing task are irrelevant + * because the interrupt context does not know about them. The same applies + * when scheduling back in for an interrupt which happens before the + * restore is complete. + */ +void __kmap_local_sched_out(void) +{ + struct task_struct *tsk = current; + pte_t *kmap_pte = kmap_get_pte(); + int i; + + /* Clear kmaps */ + for (i = 0; i < tsk->kmap_ctrl.idx; i++) { + pte_t pteval = tsk->kmap_ctrl.pteval[i]; + unsigned long addr; + int idx; + + /* With debug all even slots are unmapped and act as guard */ + if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !(i & 0x01)) { + WARN_ON_ONCE(!pte_none(pteval)); + continue; + } + if (WARN_ON_ONCE(pte_none(pteval))) + continue; + + /* + * This is a horrible hack for XTENSA to calculate the + * coloured PTE index. Uses the PFN encoded into the pteval + * and the map index calculation because the actual mapped + * virtual address is not stored in task::kmap_ctrl. + * For any sane architecture this is optimized out. + */ + idx = arch_kmap_local_map_idx(i, pte_pfn(pteval)); + + addr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + arch_kmap_local_pre_unmap(addr); + pte_clear(&init_mm, addr, kmap_pte - idx); + arch_kmap_local_post_unmap(addr); + } +} + +void __kmap_local_sched_in(void) +{ + struct task_struct *tsk = current; + pte_t *kmap_pte = kmap_get_pte(); + int i; + + /* Restore kmaps */ + for (i = 0; i < tsk->kmap_ctrl.idx; i++) { + pte_t pteval = tsk->kmap_ctrl.pteval[i]; + unsigned long addr; + int idx; + + /* With debug all even slots are unmapped and act as guard */ + if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !(i & 0x01)) { + WARN_ON_ONCE(!pte_none(pteval)); + continue; + } + if (WARN_ON_ONCE(pte_none(pteval))) + continue; + + /* See comment in __kmap_local_sched_out() */ + idx = arch_kmap_local_map_idx(i, pte_pfn(pteval)); + addr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + set_pte_at(&init_mm, addr, kmap_pte - idx, pteval); + arch_kmap_local_post_map(addr, pteval); + } +} + +void kmap_local_fork(struct task_struct *tsk) +{ + if (WARN_ON_ONCE(tsk->kmap_ctrl.idx)) + memset(&tsk->kmap_ctrl, 0, sizeof(tsk->kmap_ctrl)); +} + +#endif #if defined(HASHED_PAGE_VIRTUAL)