From e86aeb3febeb809fbc2cfe9ac6a8dc3ae0331bcc Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 21 Oct 2021 11:38:46 +0100 Subject: [PATCH] ANDROID: KVM: arm64: Call __pkvm_host_share_guest on memory abort Now that EL2 is ready to manage guest page-tables in protected mode, use the recently introduced hypercall to share pages with guests from the memory abort path, instead of manipulating their page-tables directly. Signed-off-by: Quentin Perret Bug: 209580772 Change-Id: I05ed8283d0eed19b2cfd6314cfcafbe3f689937c Signed-off-by: Will Deacon --- arch/arm64/include/asm/kvm_host.h | 6 ++ arch/arm64/kvm/arm.c | 21 ++++++- arch/arm64/kvm/mmu.c | 96 ++++++++++++++++++++++++++++++- 3 files changed, 121 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 7b779271654d..93dd0a11f76e 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -156,10 +156,16 @@ struct kvm_s2_mmu { struct kvm_arch_memory_slot { }; +struct kvm_pinned_page { + struct list_head link; + struct page *page; +}; + struct kvm_protected_vm { bool enabled; int shadow_handle; struct kvm_hyp_memcache teardown_mc; + struct list_head pinned_pages; }; struct kvm_arch { diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 5252deddde96..144091443ff9 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -164,6 +164,10 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) void free_hyp_memcache(struct kvm_hyp_memcache *mc); static void kvm_shadow_destroy(struct kvm *kvm) { + struct kvm_pinned_page *ppage, *tmp; + struct mm_struct *mm = current->mm; + struct list_head *ppages; + if (!kvm_vm_is_protected(kvm)) return; @@ -171,6 +175,14 @@ static void kvm_shadow_destroy(struct kvm *kvm) WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_shadow, kvm)); free_hyp_memcache(&kvm->arch.pkvm.teardown_mc); + + ppages = &kvm->arch.pkvm.pinned_pages; + list_for_each_entry_safe(ppage, tmp, ppages, link) { + account_locked_vm(mm, 1, false); + unpin_user_pages_dirty_lock(&ppage->page, 1, true); + list_del(&ppage->link); + kfree(ppage); + } } /** @@ -434,7 +446,10 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) if (vcpu_has_run_once(vcpu) && unlikely(!irqchip_in_kernel(vcpu->kvm))) static_branch_dec(&userspace_irqchip_in_use); - kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); + if (is_protected_kvm_enabled()) + free_hyp_memcache(&vcpu->arch.pkvm_memcache); + else + kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); kvm_timer_vcpu_terminate(vcpu); kvm_pmu_vcpu_destroy(vcpu); @@ -475,6 +490,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) struct kvm_s2_mmu *mmu; int *last_ran; + if (is_protected_kvm_enabled()) + goto nommu; + mmu = vcpu->arch.hw_mmu; last_ran = this_cpu_ptr(mmu->last_vcpu_ran); @@ -492,6 +510,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) *last_ran = vcpu->vcpu_id; } +nommu: vcpu->cpu = cpu; kvm_vgic_load(vcpu); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 67f9788060a5..761a737844e4 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -654,6 +654,11 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift); + INIT_LIST_HEAD(&kvm->arch.pkvm.pinned_pages); + mmu->arch = &kvm->arch; + + if (is_protected_kvm_enabled()) + return 0; if (mmu->pgt != NULL) { kvm_err("kvm_arch already initialized?\n"); @@ -763,6 +768,9 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu); struct kvm_pgtable *pgt = NULL; + if (is_protected_kvm_enabled()) + return; + spin_lock(&kvm->mmu_lock); pgt = mmu->pgt; if (pgt) { @@ -1117,6 +1125,88 @@ static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn, return 0; } +static int pkvm_host_share_guest(u64 pfn, u64 gfn, struct kvm_vcpu *vcpu) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__pkvm_host_share_guest), + pfn, gfn, vcpu, &res); + WARN_ON(res.a0 != SMCCC_RET_SUCCESS); + + /* + * Getting -EPERM at this point implies that the pfn has already been + * shared. This should only ever happen when two vCPUs faulted on the + * same page, and the current one lost the race to do the donation. + */ + return (res.a1 == -EPERM) ? -EAGAIN : res.a1; +} + +static int pkvm_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + unsigned long hva) +{ + struct kvm_hyp_memcache *hyp_memcache = &vcpu->arch.pkvm_memcache; + struct mm_struct *mm = current->mm; + unsigned int flags = FOLL_FORCE | + FOLL_HWPOISON | + FOLL_LONGTERM | + FOLL_WRITE; + struct kvm_pinned_page *ppage; + struct kvm *kvm = vcpu->kvm; + struct page *page; + u64 pfn; + int ret; + + ret = topup_hyp_memcache(hyp_memcache, kvm_mmu_cache_min_pages(kvm)); + if (ret) + return -ENOMEM; + + ppage = kmalloc(sizeof(*ppage), GFP_KERNEL_ACCOUNT); + if (!ppage) + return -ENOMEM; + + ret = account_locked_vm(mm, 1, true); + if (ret) + goto free_ppage; + + mmap_read_lock(mm); + ret = pin_user_pages(hva, 1, flags, &page, NULL); + mmap_read_unlock(mm); + + if (ret == -EHWPOISON) { + kvm_send_hwpoison_signal(hva, PAGE_SHIFT); + ret = 0; + goto dec_account; + } else if (ret != 1) { + ret = -EFAULT; + goto dec_account; + } + + pfn = page_to_pfn(page); + ret = pkvm_host_share_guest(pfn, fault_ipa >> PAGE_SHIFT, vcpu); + if (ret) { + if (ret == -EAGAIN) + ret = 0; + goto unpin; + } + + ppage->page = page; + INIT_LIST_HEAD(&ppage->link); + spin_lock(&kvm->mmu_lock); + list_add(&ppage->link, &kvm->arch.pkvm.pinned_pages); + spin_unlock(&kvm->mmu_lock); + + return 0; + +unpin: + unpin_user_pages(&page, 1); +dec_account: + account_locked_vm(mm, 1, false); +free_ppage: + kfree(ppage); + + return ret; +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long hva, unsigned long fault_status) @@ -1461,7 +1551,11 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) goto out_unlock; } - ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status); + + if (is_protected_kvm_enabled()) + ret = pkvm_mem_abort(vcpu, fault_ipa, hva); + else + ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status); if (ret == 0) ret = 1; out: