From f780e698c787b7cac163736a4e5451dfc704f0da Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 2 Dec 2021 14:56:00 +0000
Subject: [PATCH] ANDROID: KVM: arm64: pkvm: Store vcpus in donated PGD at
 shadow-creation time

As KVM is moving to using an xarray to hold the vcpus instead of
the fixed size array that has been the norm so far, we are faced
with two options: either teach the EL2 code to parse an xarray
when building the shadow structures, or find an alternative way
of communicating the vcpus to the EL2 code.

An easy way to deal with the second approach is to use the page
that EL1 donates to HYP to hold the VM S2 PDG. Instead of just
giving the memory, let's copy the pointers to the vcpus in this
page. The overhead is acceptable (it happens only at VM creation
time), and in most cases we only have a handful of vcpus.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Bug: 209580772
Change-Id: Id0264f0960821563c4b3c0dfcbc43598b85a1f3b
Signed-off-by: Will Deacon <willdeacon@google.com>
---
 arch/arm64/kvm/hyp/nvhe/mem_protect.c | 12 ++----------
 arch/arm64/kvm/hyp/nvhe/pkvm.c        | 20 +++++++++++++++-----
 arch/arm64/kvm/pkvm.c                 |  8 ++++++++
 3 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 5c22195e776f..6025f18c910a 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -249,15 +249,11 @@ int kvm_guest_prepare_stage2(struct kvm_shadow_vm *vm, void *pgd)
 	unsigned long nr_pages;
 	int ret;
 
-	vm->arch.vtcr = host_kvm.arch.vtcr;
 	nr_pages = kvm_pgtable_stage2_pgd_size(vm->arch.vtcr) >> PAGE_SHIFT;
-	ret = __pkvm_host_donate_hyp(hyp_virt_to_pfn(pgd), nr_pages);
-	if (ret)
-		return ret;
 
 	ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0);
 	if (ret)
-		goto err;
+		return ret;
 
 	hyp_spin_lock_init(&vm->lock);
 	vm->mm_ops = (struct kvm_pgtable_mm_ops) {
@@ -278,15 +274,11 @@ int kvm_guest_prepare_stage2(struct kvm_shadow_vm *vm, void *pgd)
 					guest_stage2_force_pte_cb);
 	__guest_unlock(vm);
 	if (ret)
-		goto err;
+		return ret;
 
 	vm->arch.mmu.pgd_phys = __hyp_pa(vm->pgt.pgd);
 
 	return 0;
-
-err:
-	WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(pgd), nr_pages));
-	return ret;
 }
 
 static int reclaim_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index d7d3155b039f..a7f14e2df6e5 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -349,7 +349,8 @@ static void unpin_host_vcpus(struct kvm_shadow_vm *vm)
 	}
 }
 
-static int init_shadow_structs(struct kvm *kvm, struct kvm_shadow_vm *vm, int nr_vcpus)
+static int init_shadow_structs(struct kvm *kvm, struct kvm_shadow_vm *vm,
+			       struct kvm_vcpu **vcpu_array, int nr_vcpus)
 {
 	int i;
 	int ret;
@@ -359,7 +360,7 @@ static int init_shadow_structs(struct kvm *kvm, struct kvm_shadow_vm *vm, int nr
 	vm->arch.pkvm.pvmfw_load_addr = kvm->arch.pkvm.pvmfw_load_addr;
 
 	for (i = 0; i < nr_vcpus; i++) {
-		struct kvm_vcpu *host_vcpu = kern_hyp_va(kvm->vcpus[i]);
+		struct kvm_vcpu *host_vcpu = kern_hyp_va(vcpu_array[i]);
 		struct shadow_vcpu_state *shadow_state = &vm->shadow_vcpus[i];
 		struct kvm_vcpu *shadow_vcpu = &shadow_state->vcpu;
 
@@ -553,10 +554,12 @@ int __pkvm_init_shadow(struct kvm *kvm,
 	phys_addr_t shadow_pa = hyp_virt_to_phys(vm);
 	u64 pfn = hyp_phys_to_pfn(shadow_pa);
 	u64 nr_pages = shadow_size >> PAGE_SHIFT;
+	u64 pgd_size;
 	int nr_vcpus = 0;
 	int ret = 0;
 
 	kvm = kern_hyp_va(kvm);
+	pgd = kern_hyp_va(pgd);
 
 	ret = hyp_pin_shared_mem(kvm, kvm + 1);
 	if (ret)
@@ -574,17 +577,21 @@ int __pkvm_init_shadow(struct kvm *kvm,
 
 	/* Ensure we're working with a clean slate. */
 	memset(vm, 0, shadow_size);
+	vm->arch.vtcr = host_kvm.arch.vtcr;
+	pgd_size = kvm_pgtable_stage2_pgd_size(vm->arch.vtcr) >> PAGE_SHIFT;
+	ret =  __pkvm_host_donate_hyp(hyp_virt_to_pfn(pgd), pgd_size);
+	if (ret)
+		goto err_remove_mappings;
 
 	/* Add the entry to the shadow table. */
 	ret = insert_shadow_table(kvm, vm, shadow_size);
 	if (ret < 0)
-		goto err_remove_mappings;
+		goto err_remove_pgd;
 
-	ret = init_shadow_structs(kvm, vm, nr_vcpus);
+	ret = init_shadow_structs(kvm, vm, pgd, nr_vcpus);
 	if (ret < 0)
 		goto err_remove_shadow_table;
 
-	pgd = kern_hyp_va(pgd);
 	ret = kvm_guest_prepare_stage2(vm, pgd);
 	if (ret)
 		goto err_remove_shadow_table;
@@ -594,6 +601,9 @@ int __pkvm_init_shadow(struct kvm *kvm,
 err_remove_shadow_table:
 	remove_shadow_table(vm->shadow_handle);
 
+err_remove_pgd:
+	WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(pgd), pgd_size));
+
 err_remove_mappings:
 	unpin_host_vcpus(vm);
 	/* Clear the donated shadow memory on failure to avoid data leaks. */
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index 9bda3f033a31..7017771928e3 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -124,8 +124,10 @@ static void update_vcpu_state(struct kvm_vcpu *vcpu, int shadow_handle)
  */
 static int __create_el2_shadow(struct kvm *kvm)
 {
+	struct kvm_vcpu *vcpu, **vcpu_array;
 	size_t pgd_sz, shadow_sz;
 	void *pgd, *shadow_addr;
+	unsigned long idx;
 	int shadow_handle;
 	int ret, i;
 
@@ -151,6 +153,12 @@ static int __create_el2_shadow(struct kvm *kvm)
 		goto free_pgd;
 	}
 
+	/* Stash the vcpu pointers into the PGD */
+	BUILD_BUG_ON(KVM_MAX_VCPUS > (PAGE_SIZE / sizeof(u64)));
+	vcpu_array = pgd;
+	kvm_for_each_vcpu(idx, vcpu, kvm)
+		vcpu_array[idx] = vcpu;
+
 	/* Donate the shadow memory to hyp and let hyp initialize it. */
 	ret = kvm_call_hyp_nvhe(__pkvm_init_shadow, kvm, shadow_addr, shadow_sz,
 				pgd);