From a97793ae99adb2faba52271201a2917cefa4c1ab Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 21 Oct 2021 10:23:57 +0100 Subject: [PATCH] ANDROID: KVM: arm64: Prepare EL2 guest stage-2 page-table In preparation for managing the stage-2 page-table of guests at EL2 in nVHE protected mode, allocate memory for the guest PGDs and populate the shadow kvm_s2_mmu upon shadow creation. Signed-off-by: Quentin Perret Bug: 209580772 Change-Id: I39f9dec9dc1bb60fe66ec6923f9b4dedc3e37f3f Signed-off-by: Will Deacon --- arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 1 + arch/arm64/kvm/hyp/include/nvhe/pkvm.h | 14 +- arch/arm64/kvm/hyp/nvhe/cache.S | 10 ++ arch/arm64/kvm/hyp/nvhe/hyp-main.c | 3 +- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 129 ++++++++++++++++++ arch/arm64/kvm/hyp/nvhe/pkvm.c | 37 +++-- arch/arm64/kvm/mmu.c | 4 +- arch/arm64/kvm/pkvm.c | 29 +++- 8 files changed, 205 insertions(+), 22 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 7db0bdbb3389..28496fdd7550 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -65,6 +65,7 @@ bool addr_is_memory(phys_addr_t phys); int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot); int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id); int kvm_host_prepare_stage2(void *pgt_pool_base); +int kvm_guest_prepare_stage2(struct kvm_shadow_vm *vm, void *pgd); void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt); int hyp_pin_shared_mem(void *from, void *to); diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h index 6ddd1df812cf..c1c0d79cbac5 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h @@ -9,6 +9,9 @@ #include +#include +#include + /* * A container for the vcpu state that hyp needs to maintain for protected VMs. */ @@ -24,9 +27,6 @@ struct kvm_shadow_vm { /* A unique id to the shadow structs in the hyp shadow area. */ int shadow_handle; - /* A pointer to the s2 mmu for the protected vm.. */ - struct kvm_s2_mmu *mmu; - /* Number of vcpus for the vm. */ int created_vcpus; @@ -39,13 +39,19 @@ struct kvm_shadow_vm { /* The total size of the donated shadow area. */ size_t shadow_area_size; + struct kvm_arch arch; + struct kvm_pgtable pgt; + struct kvm_pgtable_mm_ops mm_ops; + struct hyp_pool pool; + hyp_spinlock_t lock; + /* Array of the shadow state per vcpu. */ struct shadow_vcpu_state shadow_vcpus[0]; }; extern struct kvm_shadow_vm **shadow_table; -int __pkvm_init_shadow(struct kvm *kvm, void *shadow_va, size_t size); +int __pkvm_init_shadow(struct kvm *kvm, void *shadow_va, size_t size, void *pgd); int __pkvm_teardown_shadow(struct kvm *kvm); struct kvm_vcpu *hyp_get_shadow_vcpu(const struct kvm_vcpu *host_vcpu); diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S index 958734f4d6b0..4c447f2a31e1 100644 --- a/arch/arm64/kvm/hyp/nvhe/cache.S +++ b/arch/arm64/kvm/hyp/nvhe/cache.S @@ -11,3 +11,13 @@ SYM_FUNC_START_PI(dcache_clean_inval_poc) dcache_by_line_op civac, sy, x0, x1, x2, x3 ret SYM_FUNC_END_PI(dcache_clean_inval_poc) + +SYM_FUNC_START_PI(icache_inval_pou) +alternative_if ARM64_HAS_CACHE_DIC + isb + ret +alternative_else_nop_endif + + invalidate_icache_by_line x0, x1, x2, x3 + ret +SYM_FUNC_END_PI(icache_inval_pou) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 871c4ffe7ed6..15edc7b99035 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -788,9 +788,10 @@ static void handle___pkvm_init_shadow(struct kvm_cpu_context *host_ctxt) DECLARE_REG(struct kvm *, host_kvm, host_ctxt, 1); DECLARE_REG(void *, host_shadow_va, host_ctxt, 2); DECLARE_REG(size_t, shadow_size, host_ctxt, 3); + DECLARE_REG(void *, pgd, host_ctxt, 4); cpu_reg(host_ctxt, 1) = __pkvm_init_shadow(host_kvm, host_shadow_va, - shadow_size); + shadow_size, pgd); } static void handle___pkvm_teardown_shadow(struct kvm_cpu_context *host_ctxt) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 52e909db1eff..afa707e1e31d 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -19,6 +19,7 @@ #include #include #include +#include #define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_NOFWB | KVM_PGTABLE_S2_IDMAP) @@ -30,6 +31,21 @@ static struct hyp_pool host_s2_pool; const u8 pkvm_host_id = 0; const u8 pkvm_hyp_id = 1; +static DEFINE_PER_CPU(struct kvm_shadow_vm *, __current_vm); +#define current_vm (*this_cpu_ptr(&__current_vm)) + +static void __guest_lock(struct kvm_shadow_vm *vm) +{ + hyp_spin_lock(&vm->lock); + current_vm = vm; +} + +static void __guest_unlock(struct kvm_shadow_vm *vm) +{ + current_vm = NULL; + hyp_spin_unlock(&vm->lock); +} + static void host_lock_component(void) { hyp_spin_lock(&host_kvm.lock); @@ -146,6 +162,119 @@ int kvm_host_prepare_stage2(void *pgt_pool_base) return 0; } +static bool guest_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot) +{ + return true; +} + +static void *guest_s2_zalloc_pages_exact(size_t size) +{ + void *addr = hyp_alloc_pages(¤t_vm->pool, get_order(size)); + + WARN_ON(size != (PAGE_SIZE << get_order(size))); + hyp_split_page(hyp_virt_to_page(addr)); + + return addr; +} + +static void guest_s2_free_pages_exact(void *addr, unsigned long size) +{ + u8 order = get_order(size); + unsigned int i; + + for (i = 0; i < (1 << order); i++) + hyp_put_page(¤t_vm->pool, addr + (i * PAGE_SIZE)); +} + +static void *guest_s2_zalloc_page(void *mc) +{ + struct hyp_page *p; + void *addr; + + addr = hyp_alloc_pages(¤t_vm->pool, 0); + if (addr) + return addr; + + addr = pop_hyp_memcache(mc, hyp_phys_to_virt); + if (!addr) + return addr; + + memset(addr, 0, PAGE_SIZE); + p = hyp_virt_to_page(addr); + memset(p, 0, sizeof(*p)); + p->refcount = 1; + + return addr; +} + +static void guest_s2_get_page(void *addr) +{ + hyp_get_page(¤t_vm->pool, addr); +} + +static void guest_s2_put_page(void *addr) +{ + hyp_put_page(¤t_vm->pool, addr); +} + +static void clean_dcache_guest_page(void *va, size_t size) +{ + __clean_dcache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size); + hyp_fixmap_unmap(); +} + +static void invalidate_icache_guest_page(void *va, size_t size) +{ + __invalidate_icache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size); + hyp_fixmap_unmap(); +} + +int kvm_guest_prepare_stage2(struct kvm_shadow_vm *vm, void *pgd) +{ + struct kvm_s2_mmu *mmu = &vm->arch.mmu; + unsigned long nr_pages; + int ret; + + vm->arch.vtcr = host_kvm.arch.vtcr; + nr_pages = kvm_pgtable_stage2_pgd_size(vm->arch.vtcr) >> PAGE_SHIFT; + ret = __pkvm_host_donate_hyp(hyp_virt_to_pfn(pgd), nr_pages); + if (ret) + return ret; + + ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0); + if (ret) + goto err; + + hyp_spin_lock_init(&vm->lock); + vm->mm_ops = (struct kvm_pgtable_mm_ops) { + .zalloc_pages_exact = guest_s2_zalloc_pages_exact, + .free_pages_exact = guest_s2_free_pages_exact, + .zalloc_page = guest_s2_zalloc_page, + .phys_to_virt = hyp_phys_to_virt, + .virt_to_phys = hyp_virt_to_phys, + .page_count = hyp_page_count, + .get_page = guest_s2_get_page, + .put_page = guest_s2_put_page, + .dcache_clean_inval_poc = clean_dcache_guest_page, + .icache_inval_pou = invalidate_icache_guest_page, + }; + + __guest_lock(vm); + ret = __kvm_pgtable_stage2_init(mmu->pgt, mmu, &vm->mm_ops, 0, + guest_stage2_force_pte_cb); + __guest_unlock(vm); + if (ret) + goto err; + + vm->arch.mmu.pgd_phys = __hyp_pa(vm->pgt.pgd); + + return 0; + +err: + WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(pgd), nr_pages)); + return ret; +} + int __pkvm_prot_finalize(void) { struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu; diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index db83670992ae..dc8fe0ccaa45 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -342,8 +342,6 @@ static int init_shadow_structs(struct kvm *kvm, struct kvm_shadow_vm *vm, int nr int i; int ret; - /* TODO: initialize the protected MMU. For now, use the host's. */ - vm->mmu = &kvm->arch.mmu; vm->host_kvm = kvm; vm->created_vcpus = 0; @@ -372,8 +370,7 @@ static int init_shadow_structs(struct kvm *kvm, struct kvm_shadow_vm *vm, int nr vm->vcpus[i] = shadow_vcpu; shadow_state->vm = vm; - /* TODO - use &vm->arch.mmu when setup properly */ - shadow_vcpu->arch.hw_mmu = host_vcpu->arch.hw_mmu; + shadow_vcpu->arch.hw_mmu = &vm->arch.mmu; shadow_vcpu->arch.pkvm.shadow_handle = vm->shadow_handle; shadow_vcpu->arch.pkvm.host_vcpu = host_vcpu; shadow_vcpu->arch.pkvm.shadow_vm = vm; @@ -421,7 +418,9 @@ static bool exists_shadow(struct kvm *host_kvm) static int __insert_shadow_table(struct kvm *kvm, struct kvm_shadow_vm *vm, size_t shadow_size) { + struct kvm_s2_mmu *mmu = &vm->arch.mmu; int shadow_handle; + int vmid; if (unlikely(num_shadow_entries >= KVM_MAX_PVMS)) return -ENOMEM; @@ -445,6 +444,16 @@ static int __insert_shadow_table(struct kvm *kvm, struct kvm_shadow_vm *vm, vm->shadow_handle = shadow_handle; vm->shadow_area_size = shadow_size; + /* VMID 0 is reserved for the host */ + vmid = next_shadow_alloc + 1; + if (vmid > 0xff) + return -ENOMEM; + + mmu->vmid.vmid = vmid; + mmu->vmid.vmid_gen = 0; + mmu->arch = &vm->arch; + mmu->pgt = &vm->pgt; + shadow_table[next_shadow_alloc] = vm; next_shadow_alloc = (next_shadow_alloc + 1) % KVM_MAX_PVMS; num_shadow_entries++; @@ -517,7 +526,8 @@ static int check_shadow_size(int nr_vcpus, size_t shadow_size) */ int __pkvm_init_shadow(struct kvm *kvm, void *shadow_va, - size_t shadow_size) + size_t shadow_size, + void *pgd) { struct kvm_shadow_vm *vm = kern_hyp_va(shadow_va); phys_addr_t shadow_pa = hyp_virt_to_phys(vm); @@ -548,19 +558,28 @@ int __pkvm_init_shadow(struct kvm *kvm, /* Add the entry to the shadow table. */ ret = insert_shadow_table(kvm, vm, shadow_size); if (ret < 0) - goto err_clear_shadow; + goto err_remove_mappings; ret = init_shadow_structs(kvm, vm, nr_vcpus); if (ret < 0) - goto err_clear_shadow; + goto err_remove_shadow_table; + + pgd = kern_hyp_va(pgd); + ret = kvm_guest_prepare_stage2(vm, pgd); + if (ret) + goto err_remove_shadow_table; return vm->shadow_handle; -err_clear_shadow: +err_remove_shadow_table: + remove_shadow_table(vm->shadow_handle); + +err_remove_mappings: unpin_host_vcpus(vm); /* Clear the donated shadow memory on failure to avoid data leaks. */ memset(vm, 0, shadow_size); - WARN_ON(__pkvm_hyp_donate_host(pfn, nr_pages)); + WARN_ON(__pkvm_hyp_donate_host(hyp_phys_to_pfn(shadow_pa), + shadow_size >> PAGE_SHIFT)); err: hyp_unpin_shared_mem(kvm, kvm + 1); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index af43d76f27d9..c1e94df0fab8 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -636,7 +636,9 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t return -EINVAL; phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type); - if (phys_shift) { + if (is_protected_kvm_enabled()) { + phys_shift = kvm_ipa_limit; + } else if (phys_shift) { if (phys_shift > kvm_ipa_limit || phys_shift < ARM64_MIN_PARANGE_BITS) return -EINVAL; diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index 4481e841388d..50a00a1e03d7 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -117,9 +117,9 @@ static void update_vcpu_state(struct kvm_vcpu *vcpu, int shadow_handle) */ static int create_el2_shadow(struct kvm *kvm) { + size_t pgd_sz, shadow_sz; + void *pgd, *shadow_addr; int shadow_handle; - void *shadow_addr; - size_t shadow_sz; int ret, i; if (kvm->arch.pkvm.shadow_handle) @@ -128,17 +128,30 @@ static int create_el2_shadow(struct kvm *kvm) if (kvm->created_vcpus < 1) return -EINVAL; + pgd_sz = kvm_pgtable_stage2_pgd_size(kvm->arch.vtcr); + /* + * The PGD pages will be reclaimed using a hyp_memcache which implies + * page granularity. So, use alloc_pages_exact() to get individual + * refcounts. + */ + pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT); + if (!pgd) + return -ENOMEM; + /* Allocate memory to donate to hyp for the kvm and vcpu state. */ shadow_sz = PAGE_ALIGN(KVM_SHADOW_VM_SIZE + SHADOW_VCPU_STATE_SIZE * kvm->created_vcpus); shadow_addr = alloc_pages_exact(shadow_sz, GFP_KERNEL_ACCOUNT); - if (!shadow_addr) - return -ENOMEM; + if (!shadow_addr) { + ret = -ENOMEM; + goto free_pgd; + } /* Donate the shadow memory to hyp and let hyp initialize it. */ - ret = kvm_call_hyp_nvhe(__pkvm_init_shadow, kvm, shadow_addr, shadow_sz); + ret = kvm_call_hyp_nvhe(__pkvm_init_shadow, kvm, shadow_addr, shadow_sz, + pgd); if (ret < 0) - goto err; + goto free_shadow; shadow_handle = ret; @@ -151,8 +164,10 @@ static int create_el2_shadow(struct kvm *kvm) return 0; -err: +free_shadow: free_pages_exact(shadow_addr, shadow_sz); +free_pgd: + free_pages_exact(pgd, pgd_sz); return ret; }