diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index ef1cf41ea394..edca12fd852e 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -75,6 +75,7 @@ enum __kvm_host_smccc_func { __KVM_HOST_SMCCC_FUNC___vgic_v3_save_vmcr_aprs, __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs, __KVM_HOST_SMCCC_FUNC___pkvm_init_shadow, + __KVM_HOST_SMCCC_FUNC___pkvm_init_shadow_vcpu, __KVM_HOST_SMCCC_FUNC___pkvm_teardown_shadow, __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load, __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put, diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h index 80ecc831fc31..b8c39fe50ba9 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h @@ -42,6 +42,12 @@ struct kvm_shadow_vm { /* The total size of the donated shadow area. */ size_t shadow_area_size; + /* + * The number of vcpus initialized and ready to run in the shadow vm. + * Modifying this is protected by shadow_lock. + */ + unsigned int nr_vcpus; + struct kvm_arch arch; struct kvm_pgtable pgt; struct kvm_pgtable_mm_ops mm_ops; @@ -65,6 +71,7 @@ extern phys_addr_t pvmfw_size; void hyp_shadow_table_init(void *tbl); int __pkvm_init_shadow(struct kvm *kvm, void *shadow_va, size_t size, void *pgd); +int __pkvm_init_shadow_vcpu(unsigned int shadow_handle, struct kvm_vcpu *host_vcpu); int __pkvm_teardown_shadow(int shadow_handle); struct kvm_vcpu *get_shadow_vcpu(int shadow_handle, unsigned int vcpu_idx); void put_shadow_vcpu(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 14f9da9f87b9..b88567cccc50 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -1006,6 +1006,14 @@ static void handle___pkvm_init_shadow(struct kvm_cpu_context *host_ctxt) shadow_size, pgd); } +static void handle___pkvm_init_shadow_vcpu(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(unsigned int, shadow_handle, host_ctxt, 1); + DECLARE_REG(struct kvm_vcpu *, host_vcpu, host_ctxt, 2); + + cpu_reg(host_ctxt, 1) = __pkvm_init_shadow_vcpu(shadow_handle, host_vcpu); +} + static void handle___pkvm_teardown_shadow(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(int, shadow_handle, host_ctxt, 1); @@ -1079,6 +1087,7 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__vgic_v3_save_vmcr_aprs), HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs), HANDLE_FUNC(__pkvm_init_shadow), + HANDLE_FUNC(__pkvm_init_shadow_vcpu), HANDLE_FUNC(__pkvm_teardown_shadow), HANDLE_FUNC(__pkvm_vcpu_load), HANDLE_FUNC(__pkvm_vcpu_put), diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 10323ac3daf0..56de73e7c378 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -276,7 +276,7 @@ struct kvm_vcpu *get_shadow_vcpu(int shadow_handle, unsigned int vcpu_idx) hyp_spin_lock(&shadow_lock); vm = find_shadow_by_handle(shadow_handle); - if (!vm || vm->created_vcpus <= vcpu_idx) + if (!vm || vm->nr_vcpus <= vcpu_idx) goto unlock; vcpu = &vm->shadow_vcpus[vcpu_idx].vcpu; @@ -394,28 +394,6 @@ static void unpin_host_vcpus(struct shadow_vcpu_state *shadow_vcpus, int nr_vcpu unpin_host_vcpu(&shadow_vcpus[i]); } -static int set_host_vcpus(struct shadow_vcpu_state *shadow_vcpus, int nr_vcpus, - struct kvm_vcpu **vcpu_array, size_t vcpu_array_size) -{ - int i; - - if (vcpu_array_size < sizeof(*vcpu_array) * nr_vcpus) - return -EINVAL; - - for (i = 0; i < nr_vcpus; i++) { - struct kvm_vcpu *host_vcpu = kern_hyp_va(vcpu_array[i]); - - if (hyp_pin_shared_mem(host_vcpu, host_vcpu + 1)) { - unpin_host_vcpus(shadow_vcpus, i); - return -EBUSY; - } - - shadow_vcpus[i].vcpu.arch.pkvm.host_vcpu = host_vcpu; - } - - return 0; -} - static int init_ptrauth(struct kvm_vcpu *shadow_vcpu) { int ret = 0; @@ -435,23 +413,33 @@ static void init_shadow_vm(struct kvm *kvm, struct kvm_shadow_vm *vm, } static int init_shadow_vcpu(struct shadow_vcpu_state *shadow_state, + struct kvm_vcpu *host_vcpu, struct kvm_shadow_vm *vm, int vcpu_idx) { struct kvm_vcpu *shadow_vcpu = &shadow_state->vcpu; - struct kvm_vcpu *host_vcpu = shadow_vcpu->arch.pkvm.host_vcpu; int ret; + host_vcpu = kern_hyp_va(host_vcpu); + if (hyp_pin_shared_mem(host_vcpu, host_vcpu + 1)) + return -EBUSY; + + if (host_vcpu->vcpu_idx != vcpu_idx) { + ret = -EINVAL; + goto done; + } + + shadow_vcpu->arch.pkvm.host_vcpu = host_vcpu; shadow_vcpu->kvm = vm->host_kvm; shadow_vcpu->vcpu_id = host_vcpu->vcpu_id; shadow_vcpu->vcpu_idx = vcpu_idx; ret = copy_features(shadow_vcpu, host_vcpu); if (ret) - return ret; + goto done; ret = init_ptrauth(shadow_vcpu); if (ret) - return ret; + goto done; if (test_bit(KVM_ARM_VCPU_SVE, shadow_vcpu->arch.features)) { size_t sve_state_size; @@ -470,7 +458,8 @@ static int init_shadow_vcpu(struct shadow_vcpu_state *shadow_state, clear_bit(KVM_ARM_VCPU_SVE, shadow_vcpu->arch.features); shadow_vcpu->arch.sve_state = NULL; shadow_vcpu->arch.sve_max_vl = 0; - return -EINVAL; + ret = -EINVAL; + goto done; } } @@ -489,8 +478,10 @@ static int init_shadow_vcpu(struct shadow_vcpu_state *shadow_state, shadow_vcpu->arch.pkvm.power_state = PSCI_0_2_AFFINITY_LEVEL_OFF; } else if (pvm_has_pvmfw(vm)) { - if (vm->pvmfw_entry_vcpu) - return -EINVAL; + if (vm->pvmfw_entry_vcpu) { + ret = -EINVAL; + goto done; + } vm->pvmfw_entry_vcpu = shadow_vcpu; shadow_vcpu->arch.reset_state.reset = true; @@ -507,24 +498,11 @@ static int init_shadow_vcpu(struct shadow_vcpu_state *shadow_state, PSCI_0_2_AFFINITY_LEVEL_ON_PENDING; } - return 0; -} +done: + if (ret) + unpin_host_vcpu(shadow_state); -static int init_shadow_structs(struct kvm *kvm, struct kvm_shadow_vm *vm, - int nr_vcpus) -{ - int i; - - init_shadow_vm(kvm, vm, nr_vcpus); - - for (i = 0; i < nr_vcpus; i++) { - int ret = init_shadow_vcpu(&vm->shadow_vcpus[i], vm, i); - - if (ret) - return ret; - } - - return 0; + return ret; } static bool __exists_shadow(struct kvm *host_kvm) @@ -667,8 +645,6 @@ static void drain_shadow_vcpus(struct shadow_vcpu_state *shadow_vcpus, * Must be a multiple of the page size. * pgd: The host va of the area being donated for the stage-2 PGD for the VM. * Must be page aligned. Its size is implied by the VM's VTCR. - * Note: An array to the host KVM VCPUs (host VA) is passed via the pgd, as to - * not to be dependent on how the VCPU's are layed out in struct kvm. * * Return a unique handle to the protected VM on success, * negative error code on failure. @@ -720,19 +696,13 @@ int __pkvm_init_shadow(struct kvm *kvm, if (ret) goto err_remove_mappings; - ret = set_host_vcpus(vm->shadow_vcpus, nr_vcpus, pgd, pgd_size); - if (ret) - goto err_remove_pgd; - - ret = init_shadow_structs(kvm, vm, nr_vcpus); - if (ret < 0) - goto err_unpin_host_vcpus; + init_shadow_vm(kvm, vm, nr_vcpus); /* Add the entry to the shadow table. */ hyp_spin_lock(&shadow_lock); ret = insert_shadow_table(kvm, vm, shadow_size); if (ret < 0) - goto err_unlock_unpin_host_vcpus; + goto err_unlock; ret = kvm_guest_prepare_stage2(vm, pgd); if (ret) @@ -743,24 +713,60 @@ int __pkvm_init_shadow(struct kvm *kvm, err_remove_shadow_table: remove_shadow_table(vm->shadow_handle); -err_unlock_unpin_host_vcpus: +err_unlock: hyp_spin_unlock(&shadow_lock); -err_unpin_host_vcpus: - unpin_host_vcpus(vm->shadow_vcpus, nr_vcpus); -err_remove_pgd: WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(pgd), nr_pgd_pages)); - err_remove_mappings: /* Clear the donated shadow memory on failure to avoid data leaks. */ memset(vm, 0, shadow_size); WARN_ON(__pkvm_hyp_donate_host(hyp_phys_to_pfn(shadow_pa), shadow_size >> PAGE_SHIFT)); - err: hyp_unpin_shared_mem(kvm, kvm + 1); return ret; } +/* + * Initialize the protected vcpu state shadow copy in host-donated memory. + * + * shadow_handle: The handle for the protected vm. + * host_vcpu: A pointer to the corresponding host vcpu (host va). + * + * Return 0 on success, negative error code on failure. + */ +int __pkvm_init_shadow_vcpu(unsigned int shadow_handle, + struct kvm_vcpu *host_vcpu) +{ + struct kvm_shadow_vm *vm; + struct shadow_vcpu_state *shadow_state; + unsigned int idx; + int ret; + + hyp_spin_lock(&shadow_lock); + + vm = find_shadow_by_handle(shadow_handle); + if (!vm) { + ret = -ENOENT; + goto unlock; + } + + idx = vm->nr_vcpus; + if (idx >= vm->created_vcpus) { + ret = -EINVAL; + goto unlock; + } + + shadow_state = &vm->shadow_vcpus[idx]; + ret = init_shadow_vcpu(shadow_state, host_vcpu, vm, idx); + if (ret) + goto unlock; + + vm->nr_vcpus++; +unlock: + hyp_spin_unlock(&shadow_lock); + return ret; +} + static void teardown_donated_memory(struct kvm_hyp_memcache *mc, void *addr, size_t size) { @@ -781,6 +787,7 @@ int __pkvm_teardown_shadow(int shadow_handle) struct kvm_hyp_memcache *mc; struct kvm_shadow_vm *vm; struct kvm *host_kvm; + unsigned int nr_vcpus; int err; int i; @@ -798,6 +805,7 @@ int __pkvm_teardown_shadow(int shadow_handle) } host_kvm = vm->host_kvm; + nr_vcpus = vm->nr_vcpus; /* * Clear the tracking for last_loaded_vcpu for all cpus for this vm in @@ -820,8 +828,8 @@ int __pkvm_teardown_shadow(int shadow_handle) /* Reclaim guest pages, and page-table pages */ mc = &host_kvm->arch.pkvm.teardown_mc; reclaim_guest_pages(vm, mc); - drain_shadow_vcpus(vm->shadow_vcpus, vm->created_vcpus, mc); - unpin_host_vcpus(vm->shadow_vcpus, vm->created_vcpus); + drain_shadow_vcpus(vm->shadow_vcpus, nr_vcpus, mc); + unpin_host_vcpus(vm->shadow_vcpus, nr_vcpus); teardown_donated_memory(mc, vm, vm->shadow_area_size); @@ -946,7 +954,7 @@ struct kvm_vcpu *pvm_mpidr_to_vcpu(struct kvm_shadow_vm *vm, unsigned long mpidr mpidr &= MPIDR_HWID_BITMASK; - for (i = 0; i < vm->created_vcpus; i++) { + for (i = 0; i < READ_ONCE(vm->nr_vcpus); i++) { vcpu = vm->vcpus[i]; if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu)) @@ -1063,7 +1071,7 @@ static bool pvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) * then if at least one is PENDING_ON then return PENDING_ON. * Otherwise, return OFF. */ - for (i = 0; i < vm->created_vcpus; i++) { + for (i = 0; i < READ_ONCE(vm->nr_vcpus); i++) { tmp = vm->vcpus[i]; mpidr = kvm_vcpu_get_mpidr_aff(tmp); diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index f7f6b08dd371..d69865c166fc 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -120,7 +120,7 @@ void __init kvm_hyp_reserve(void) */ static int __create_el2_shadow(struct kvm *kvm) { - struct kvm_vcpu *vcpu, **vcpu_array; + struct kvm_vcpu *vcpu; size_t pgd_sz, shadow_sz; void *pgd, *shadow_addr; unsigned long idx; @@ -149,12 +149,6 @@ static int __create_el2_shadow(struct kvm *kvm) goto free_pgd; } - /* Stash the vcpu pointers into the PGD */ - BUILD_BUG_ON(KVM_MAX_VCPUS > (PAGE_SIZE / sizeof(u64))); - vcpu_array = pgd; - kvm_for_each_vcpu(idx, vcpu, kvm) - vcpu_array[idx] = vcpu; - /* Donate the shadow memory to hyp and let hyp initialize it. */ ret = kvm_call_hyp_nvhe(__pkvm_init_shadow, kvm, shadow_addr, shadow_sz, pgd); @@ -166,8 +160,25 @@ static int __create_el2_shadow(struct kvm *kvm) /* Store the shadow handle given by hyp for future call reference. */ kvm->arch.pkvm.shadow_handle = shadow_handle; + /* Initialize the shadow vcpus. */ + kvm_for_each_vcpu (idx, vcpu, kvm) { + /* Indexing of the vcpus to be sequential starting at 0. */ + if (WARN_ON(vcpu->vcpu_idx != idx)) { + ret = -EINVAL; + goto destroy_vm; + } + + ret = kvm_call_hyp_nvhe(__pkvm_init_shadow_vcpu, shadow_handle, + vcpu); + if (ret) + goto destroy_vm; + } + return 0; +destroy_vm: + kvm_shadow_destroy(kvm); + return ret; free_shadow: free_pages_exact(shadow_addr, shadow_sz); free_pgd: