From 18c78ecd82b4d8522fcd2a2459af5f27f06fe9b2 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 23 Mar 2023 14:08:58 +0000 Subject: [PATCH] ANDROID: arm64: Implement hypervisor workaround for SoCs with DMA beyond the PoC SoCs featuring peripherals that can issue non-coherent DMA traffic beyond the point of coherency (PoC) present multiple challenges for the DMA-API implementation in Linux. Many of these challenges can be overcome by suitable configuration of the interconnect, however the presence of a cacheable alias for non-cacheable buffers can still lead to coherence issues arising when stale clean lines are back-snooped from the cache hierarchy to satisfy a non-cacheable transaction at the PoC. Removing all cacheable aliases on a case-by-cases basis is both error-prone and expensive. Instead, leverage the stage-2 identity mapping installed by pKVM to enforce consistent cacheability for all stage-1 aliases. Bug: 240786634 Change-Id: I78b0aa51fe3e23811bbd25481173086aa957c4bf Signed-off-by: Will Deacon --- .../admin-guide/kernel-parameters.txt | 13 ++ arch/arm64/Kconfig | 17 ++ .../include/asm/android_erratum_pgtable.h | 64 ++++++++ arch/arm64/include/asm/kvm_asm.h | 3 + arch/arm64/include/asm/pgtable.h | 9 ++ arch/arm64/include/asm/vmalloc.h | 6 + arch/arm64/kernel/image-vars.h | 4 + arch/arm64/kvm/Makefile | 1 + arch/arm64/kvm/android_erratum_pgtable.c | 108 +++++++++++++ arch/arm64/kvm/hyp/include/nvhe/mm.h | 1 + arch/arm64/kvm/hyp/nvhe/hyp-main.c | 14 ++ arch/arm64/kvm/hyp/nvhe/mem_protect.c | 150 +++++++++++++++--- arch/arm64/kvm/hyp/nvhe/mm.c | 19 ++- arch/arm64/kvm/pkvm.c | 5 + 14 files changed, 395 insertions(+), 19 deletions(-) create mode 100644 arch/arm64/include/asm/android_erratum_pgtable.h create mode 100644 arch/arm64/kvm/android_erratum_pgtable.c diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index fce2ec7e4bfe..9700967c4139 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2420,6 +2420,19 @@ for all guests. Default is 1 (enabled) if in 64-bit or 32-bit PAE mode. + kvm-arm.force_nc + [KVM,ARM,ANDROID_ARM64_WORKAROUND_DMA_BEYOND_POC] + + Enable hypercalls to remap host pages as normal + non-cacheable at stage-2 and issue these hypercalls + when installing non-cacheable ptes at stage-1. This + is useful to work around coherency issues on systems + with DMA peripherals integrated beyond the Point of + Coherency (PoC). + + This option only applies when booting with + kvm-arm.mode=protected. + kvm-arm.mode= [KVM,ARM] Select one of KVM/arm64's modes of operation. diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1ddfda7b9125..cab86cfa361e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1031,6 +1031,23 @@ config SOCIONEXT_SYNQUACER_PREITS If unsure, say Y. +config ANDROID_ARM64_WORKAROUND_DMA_BEYOND_POC + bool "Remove cacheable aliases of non-cacheable DMA buffers at stage-2" + default y + depends on KVM + help + Some SoCs integrate non-coherent DMA-capable peripherals beyond + the Point of Coherency (PoC), resulting in loss of coherency + with non-cacheable mappings on the CPU in the presence of a + cacheable alias. + + This workaround provides a mechanism (controlled by the kernel + command-line) to remap pages as non-cacheable in pKVM's stage-2 + mapping for the host, thereby removing any cacheable aliases + that may be present in the stage-1 mapping. + + If unsure, say Y. + endmenu diff --git a/arch/arm64/include/asm/android_erratum_pgtable.h b/arch/arm64/include/asm/android_erratum_pgtable.h new file mode 100644 index 000000000000..515dda699eca --- /dev/null +++ b/arch/arm64/include/asm/android_erratum_pgtable.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 - Google LLC + * Author: Will Deacon + */ +#ifndef _ASM_ARM64_ANDROID_ERRATUM_PGTABLE_H +#define _ASM_ARM64_ANDROID_ERRATUM_PGTABLE_H + +#ifndef __ASM_PGTABLE_H +#error "Please don't include this header directly." +#endif + +#ifdef CONFIG_ANDROID_ARM64_WORKAROUND_DMA_BEYOND_POC +extern void pkvm_host_set_stage2_memattr(phys_addr_t addr, bool force_nc); +extern __init int pkvm_register_early_nc_mappings(void); + +DECLARE_STATIC_KEY_FALSE(pkvm_force_nc); +static inline bool prot_needs_stage2_update(pgprot_t prot) +{ + pteval_t val = pgprot_val(prot); + + if (!static_branch_unlikely(&pkvm_force_nc)) + return 0; + + return (val & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL_NC); +} + +static inline void arm64_update_cacheable_aliases(pte_t *ptep, pte_t pte) +{ + pte_t old_pte = READ_ONCE(*ptep); + bool force_nc; + + if (!static_branch_unlikely(&pkvm_force_nc)) + return; + + if (pte_valid(old_pte) == pte_valid(pte)) + return; + + if (!pte_valid(pte)) { + force_nc = false; + pte = old_pte; + } else { + force_nc = true; + } + + if ((pte_val(pte) & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL_NC)) + pkvm_host_set_stage2_memattr(__pte_to_phys(pte), force_nc); +} + +#define set_pmd_at(mm, addr, pmdp, pmd) do { \ + WARN_ON(prot_needs_stage2_update(__pgprot(pmd_val(pmd)))); \ + set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd)); \ +} while (0) + +#define set_pud_at(mm, addr, pudp, pud) do { \ + WARN_ON(prot_needs_stage2_update(__pgprot(pud_val(pud)))); \ + set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud)); \ +} while (0) + +#else +static inline void arm64_update_cacheable_aliases(pte_t *ptep, pte_t pte) { } +static inline bool prot_needs_stage2_update(pgprot_t prot) { return false; } +#endif /* CONFIG_ANDROID_ARM64_WORKAROUND_DMA_BEYOND_POC */ +#endif /* _ASM_ARM64_ANDROID_ERRATUM_PGTABLE_H */ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index d231b3556f45..691dd9afc1e6 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -103,6 +103,9 @@ enum __kvm_host_smccc_func { __KVM_HOST_SMCCC_FUNC___pkvm_rb_swap_reader_page, __KVM_HOST_SMCCC_FUNC___pkvm_rb_update_footers, __KVM_HOST_SMCCC_FUNC___pkvm_enable_event, +#ifdef CONFIG_ANDROID_ARM64_WORKAROUND_DMA_BEYOND_POC + __KVM_HOST_SMCCC_FUNC___pkvm_host_set_stage2_memattr, +#endif /* * Start of the dynamically registered hypercalls. Start a bit diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 7e77f1d7f23b..35d1806ace9e 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -312,6 +312,8 @@ static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep, __func__, pte_val(old_pte), pte_val(pte)); } +#include + static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { @@ -340,6 +342,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, __check_racy_pte_update(mm, ptep, pte); + arm64_update_cacheable_aliases(ptep, pte); set_pte(ptep, pte); } @@ -485,8 +488,13 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd) #define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT) #define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)) +#ifndef set_pmd_at #define set_pmd_at(mm, addr, pmdp, pmd) set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd)) +#endif + +#ifndef set_pud_at #define set_pud_at(mm, addr, pudp, pud) set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud)) +#endif #define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d)) #define __phys_to_p4d_val(phys) __phys_to_pte_val(phys) @@ -869,6 +877,7 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long address, pte_t *ptep) { + arm64_update_cacheable_aliases(ptep, __pte(0)); return __pte(xchg_relaxed(&pte_val(*ptep), 0)); } diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h index 1ed0aace67b4..665ff5fbad3d 100644 --- a/arch/arm64/include/asm/vmalloc.h +++ b/arch/arm64/include/asm/vmalloc.h @@ -9,6 +9,9 @@ #define arch_vmap_pud_supported arch_vmap_pud_supported static inline bool arch_vmap_pud_supported(pgprot_t prot) { + if (prot_needs_stage2_update(prot)) + return false; + /* * Only 4k granule supports level 1 block mappings. * SW table walks can't handle removal of intermediate entries. @@ -20,6 +23,9 @@ static inline bool arch_vmap_pud_supported(pgprot_t prot) #define arch_vmap_pmd_supported arch_vmap_pmd_supported static inline bool arch_vmap_pmd_supported(pgprot_t prot) { + if (prot_needs_stage2_update(prot)) + return false; + /* See arch_vmap_pud_supported() */ return !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS); } diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 8ca6df922da8..41a068e9ce5e 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -134,6 +134,10 @@ KVM_NVHE_ALIAS(__hyp_event_ids_end); /* pKVM static key */ KVM_NVHE_ALIAS(kvm_protected_mode_initialized); +#ifdef CONFIG_ANDROID_ARM64_WORKAROUND_DMA_BEYOND_POC +KVM_NVHE_ALIAS(pkvm_force_nc); +#endif + #endif /* CONFIG_KVM */ #endif /* __ARM64_KERNEL_IMAGE_VARS_H */ diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 21d8980762fc..b05c974aca87 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -25,6 +25,7 @@ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \ vgic/vgic-its.o vgic/vgic-debug.o kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o +kvm-$(CONFIG_ANDROID_ARM64_WORKAROUND_DMA_BEYOND_POC) += android_erratum_pgtable.o kvm-$(CONFIG_TRACING) += hyp_events.o hyp_trace.o diff --git a/arch/arm64/kvm/android_erratum_pgtable.c b/arch/arm64/kvm/android_erratum_pgtable.c new file mode 100644 index 000000000000..301f24e9d811 --- /dev/null +++ b/arch/arm64/kvm/android_erratum_pgtable.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 - Google LLC + * Author: Will Deacon + */ +#include +#include + +#include +#include +#include + +DEFINE_STATIC_KEY_FALSE(pkvm_force_nc); +static int __init early_pkvm_force_nc_cfg(char *arg) +{ + static_branch_enable(&pkvm_force_nc); + return 0; +} +early_param("kvm-arm.force_nc", early_pkvm_force_nc_cfg); + +/* + * Update the stage-2 memory attributes (cacheability) for a page, usually + * in response to mapping or unmapping a normal non-cacheable region at stage-1. + * + * If 'force_nc' is set, the stage-2 entry is immediately made non-cacheable + * (and cleaned+invalidated to the PoC) otherwise the entry is unmapped and the + * cacheability determined based on the stage-1 attribute of the next access + * (with no cache maintenance being performed). + */ +struct pkvm_host_nc_region { + phys_addr_t start; + phys_addr_t end; +}; + +#define PKVM_HOST_MAX_EARLY_NC_REGIONS 8 +static struct pkvm_host_nc_region +pkvm_host_early_nc_regions[PKVM_HOST_MAX_EARLY_NC_REGIONS]; + +static void pkvm_host_track_early_nc_mapping(phys_addr_t addr) +{ + static int idx /*= 0*/; + struct pkvm_host_nc_region *reg = &pkvm_host_early_nc_regions[idx]; + + if (reg->start == reg->end) { + reg->start = addr; + } else if (reg->end != addr) { + if (WARN_ON(idx == PKVM_HOST_MAX_EARLY_NC_REGIONS - 1)) + return; + + reg = &pkvm_host_early_nc_regions[++idx]; + reg->start = addr; + } + + reg->end = addr + PAGE_SIZE; +} + +void pkvm_host_set_stage2_memattr(phys_addr_t addr, bool force_nc) +{ + int err; + + if (kvm_get_mode() != KVM_MODE_PROTECTED) + return; + + /* + * Non-memory regions or carveouts marked as "no-map" are handled + * entirely by their corresponding driver, which should avoid the + * creation of a cacheable alias in the first place. + */ + if (!memblock_is_map_memory(addr)) + return; + + if (!is_pkvm_initialized()) { + if (!WARN_ON_ONCE(!force_nc)) + pkvm_host_track_early_nc_mapping(addr); + return; + } + + err = kvm_call_hyp_nvhe(__pkvm_host_set_stage2_memattr, addr, force_nc); + WARN_ON(err && err != -EAGAIN); +} +EXPORT_SYMBOL_GPL(pkvm_host_set_stage2_memattr); + +int __init pkvm_register_early_nc_mappings(void) +{ + int i; + + if (!is_pkvm_initialized()) + return 0; + + for (i = 0; i < PKVM_HOST_MAX_EARLY_NC_REGIONS; ++i) { + struct pkvm_host_nc_region *reg = &pkvm_host_early_nc_regions[i]; + + if (reg->start == reg->end) + return 0; + + while (reg->start != reg->end) { + int err; + + err = kvm_call_hyp_nvhe(__pkvm_host_set_stage2_memattr, reg->start, true); + if (err) + return err; + + reg->start += PAGE_SIZE; + } + } + + return 0; +} diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h index ed2e8ab8d236..92278f8b2ab9 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h @@ -16,6 +16,7 @@ extern const struct pkvm_module_ops module_ops; int hyp_create_pcpu_fixmap(void); void *hyp_fixmap_map(phys_addr_t phys); +void *hyp_fixmap_map_nc(phys_addr_t phys); void hyp_fixmap_unmap(void); void hyp_poison_page(phys_addr_t phys); diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index c854d1f674e5..8e291ac1a053 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -1263,6 +1263,17 @@ static void handle___pkvm_enable_event(struct kvm_cpu_context *host_ctxt) cpu_reg(host_ctxt, 1) = __pkvm_enable_event(id, enable); } +#ifdef CONFIG_ANDROID_ARM64_WORKAROUND_DMA_BEYOND_POC +extern int __pkvm_host_set_stage2_memattr(phys_addr_t phys, bool force_nc); +static void handle___pkvm_host_set_stage2_memattr(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(phys_addr_t, phys, host_ctxt, 1); + DECLARE_REG(bool, force_nc, host_ctxt, 2); + + cpu_reg(host_ctxt, 1) = __pkvm_host_set_stage2_memattr(phys, force_nc); +} +#endif + typedef void (*hcall_t)(struct kvm_cpu_context *); #define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x @@ -1315,6 +1326,9 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__pkvm_rb_swap_reader_page), HANDLE_FUNC(__pkvm_rb_update_footers), HANDLE_FUNC(__pkvm_enable_event), +#ifdef CONFIG_ANDROID_ARM64_WORKAROUND_DMA_BEYOND_POC + HANDLE_FUNC(__pkvm_host_set_stage2_memattr), +#endif }; unsigned long pkvm_priv_hcall_limit __ro_after_init = __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize; diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 4590e3d9d8dd..ddd14eb489bc 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -539,23 +539,10 @@ static inline bool range_included(struct kvm_mem_range *child, return parent->start <= child->start && child->end <= parent->end; } -static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) +static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range, + u32 level) { struct kvm_mem_range cur; - kvm_pte_t pte; - u32 level; - int ret; - - hyp_assert_lock_held(&host_mmu.lock); - ret = kvm_pgtable_get_leaf(&host_mmu.pgt, addr, &pte, &level); - if (ret) - return ret; - - if (kvm_pte_valid(pte)) - return -EAGAIN; - - if (pte) - return -EPERM; do { u64 granule = kvm_granule_size(level); @@ -641,15 +628,141 @@ static bool host_stage2_pte_is_counted(kvm_pte_t pte, u32 level) return (pte & KVM_HOST_S2_DEFAULT_MASK) != KVM_HOST_S2_DEFAULT_MMIO_PTE; } -static int host_stage2_idmap(u64 addr) +#define DEFERRED_MEMATTR_NOTE (1ULL << 24) +#ifdef CONFIG_ANDROID_ARM64_WORKAROUND_DMA_BEYOND_POC +static enum pkvm_page_state host_get_page_state(kvm_pte_t pte, u64 addr); + +int __pkvm_host_set_stage2_memattr(phys_addr_t phys, bool force_nc) +{ + kvm_pte_t pte; + int ret = 0; + + if (!static_branch_unlikely(&pkvm_force_nc)) + return -ENOENT; + + phys = ALIGN_DOWN(phys, PAGE_SIZE); + hyp_spin_lock(&host_mmu.lock); + + ret = kvm_pgtable_get_leaf(&host_mmu.pgt, phys, &pte, NULL); + if (ret) + goto unlock; + + if (!addr_is_memory(phys)) { + ret = -EIO; + goto unlock; + } + + if (!kvm_pte_valid(pte) && pte) { + switch (pte) { + case DEFERRED_MEMATTR_NOTE: + break; + default: + ret = -EPERM; + } + } else if (host_get_page_state(pte, phys) != PKVM_PAGE_OWNED) { + ret = -EPERM; + } + + if (ret) + goto unlock; + + if (force_nc) { + ret = host_stage2_idmap_locked(phys, PAGE_SIZE, + PKVM_HOST_MEM_PROT | + KVM_PGTABLE_PROT_NC, + false); + if (ret) + goto unlock; + + kvm_flush_dcache_to_poc(hyp_fixmap_map_nc(phys), PAGE_SIZE); + hyp_fixmap_unmap(); + } else { + ret = kvm_pgtable_stage2_annotate(&host_mmu.pgt, phys, + PAGE_SIZE, &host_s2_pool, + DEFERRED_MEMATTR_NOTE); + } +unlock: + hyp_spin_unlock(&host_mmu.lock); + return ret; +} + +static int handle_memattr_annotation(struct kvm_vcpu_fault_info *fault, + u64 addr, enum kvm_pgtable_prot *prot, + struct kvm_mem_range *range) +{ + u64 par, oldpar; + + /* If the S1 MMU is disabled, treat the access as cacheable */ + if (unlikely(!(read_sysreg(sctlr_el1) & SCTLR_ELx_M))) + return 0; + + /* If we took a fault on a PTW, then treat it as cacheable */ + if (fault->esr_el2 & ESR_ELx_S1PTW) + return 0; + + oldpar = read_sysreg_par(); + + if (!__kvm_at("s1e1r", fault->far_el2)) + par = read_sysreg_par(); + else + par = SYS_PAR_EL1_F; + + write_sysreg(oldpar, par_el1); + + if (unlikely(par & SYS_PAR_EL1_F)) + return -EAGAIN; + + if ((par >> 56) == MAIR_ATTR_NORMAL_NC) { + range->start = ALIGN_DOWN(addr, PAGE_SIZE); + range->end = range->start + PAGE_SIZE; + *prot |= KVM_PGTABLE_PROT_NC; + } + + return 0; +} +#else +static int handle_memattr_annotation(struct kvm_vcpu_fault_info *fault, + u64 addr, enum kvm_pgtable_prot *prot, + struct kvm_mem_range *range) +{ + return -EPERM; +} +#endif + +static int host_stage2_idmap(struct kvm_vcpu_fault_info *fault, u64 addr) { struct kvm_mem_range range; bool is_memory = !!find_mem_range(addr, &range); enum kvm_pgtable_prot prot = default_host_prot(is_memory); + kvm_pte_t pte; + u32 level; int ret; hyp_assert_lock_held(&host_mmu.lock); + ret = kvm_pgtable_get_leaf(&host_mmu.pgt, addr, &pte, &level); + if (ret) + return ret; + + if (kvm_pte_valid(pte)) + return -EAGAIN; + + if (pte) { + if (!is_memory) + return -EPERM; + + switch (pte) { + case DEFERRED_MEMATTR_NOTE: + ret = handle_memattr_annotation(fault, addr, &prot, + &range); + if (ret) + return ret; + break; + default: + return -EPERM; + } + } + /* * Adjust against IOMMU devices first. host_stage2_adjust_range() should * be called last for proper alignment. @@ -661,7 +774,7 @@ static int host_stage2_idmap(u64 addr) return ret; } - ret = host_stage2_adjust_range(addr, &range); + ret = host_stage2_adjust_range(addr, &range, level); if (ret) return ret; @@ -750,6 +863,7 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) esr = read_sysreg_el2(SYS_ESR); BUG_ON(!__get_fault_info(esr, &fault)); + fault.esr_el2 = esr; addr = (fault.hpfar_el2 & HPFAR_MASK) << 8; addr |= fault.far_el2 & FAR_MASK; @@ -763,7 +877,7 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) /* If not handled, attempt to map the page. */ if (ret == -EPERM) - ret = host_stage2_idmap(addr); + ret = host_stage2_idmap(&fault, addr); host_unlock_component(); diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c index 672676c20e9c..4d07d5e874e0 100644 --- a/arch/arm64/kvm/hyp/nvhe/mm.c +++ b/arch/arm64/kvm/hyp/nvhe/mm.c @@ -309,12 +309,29 @@ void *hyp_fixmap_map(phys_addr_t phys) return (void *)slot->addr + offset_in_page(phys); } +#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2) +void *hyp_fixmap_map_nc(phys_addr_t phys) +{ + struct hyp_fixmap_slot *slot = this_cpu_ptr(&fixmap_slots); + kvm_pte_t pte, *ptep = slot->ptep; + + pte = *ptep; + pte &= ~kvm_phys_to_pte(KVM_PHYS_INVALID); + pte |= kvm_phys_to_pte(phys) | KVM_PTE_VALID | + FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX, MT_NORMAL_NC); + WRITE_ONCE(*ptep, pte); + dsb(ishst); + + return (void *)slot->addr; +} + static void fixmap_clear_slot(struct hyp_fixmap_slot *slot) { kvm_pte_t *ptep = slot->ptep; u64 addr = slot->addr; - WRITE_ONCE(*ptep, *ptep & ~KVM_PTE_VALID); + /* Zap the memory type too. MT_NORMAL is 0 so the fixmap is cacheable by default */ + WRITE_ONCE(*ptep, *ptep & ~(KVM_PTE_VALID | KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX)); /* * Irritatingly, the architecture requires that we use inner-shareable diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index 3ec32f19b4a9..84eae2b5a2b2 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -514,6 +514,11 @@ static int __init finalize_pkvm(void) pkvm_firmware_rmem_clear(); } +#ifdef CONFIG_ANDROID_ARM64_WORKAROUND_DMA_BEYOND_POC + if (!ret) + ret = pkvm_register_early_nc_mappings(); +#endif + return ret; } device_initcall_sync(finalize_pkvm);