diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 3a2055044968..64aa2f65ce4f 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -82,6 +82,7 @@ enum __kvm_host_smccc_func { __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put, __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_sync_state, __KVM_HOST_SMCCC_FUNC___pkvm_iommu_driver_init, + __KVM_HOST_SMCCC_FUNC___pkvm_iommu_register, }; #define DECLARE_KVM_VHE_SYM(sym) extern char sym[] diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index ea7e80bdb8a9..e42b318baa78 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -395,6 +395,8 @@ enum pkvm_iommu_driver_id { }; int pkvm_iommu_driver_init(enum pkvm_iommu_driver_id drv_id, void *data, size_t size); +int pkvm_iommu_register(struct device *dev, enum pkvm_iommu_driver_id drv_id, + phys_addr_t pa, size_t size); struct vcpu_reset_state { unsigned long pc; diff --git a/arch/arm64/kvm/hyp/include/nvhe/iommu.h b/arch/arm64/kvm/hyp/include/nvhe/iommu.h index 54f5ce5976d7..51935ab93efa 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/iommu.h +++ b/arch/arm64/kvm/hyp/include/nvhe/iommu.h @@ -16,15 +16,32 @@ struct pkvm_iommu_ops { * Driver initialization lock held during callback. */ int (*init)(void *data, size_t size); + + /* + * Driver-specific validation of device registration inputs. + * This should be stateless. No locks are held at entry. + */ + int (*validate)(phys_addr_t base, size_t size); + + /* Amount of memory allocated per-device for use by the driver. */ + size_t data_size; }; struct pkvm_iommu { struct list_head list; + unsigned long id; + const struct pkvm_iommu_ops *ops; phys_addr_t pa; + void *va; size_t size; + char data[]; }; int __pkvm_iommu_driver_init(enum pkvm_iommu_driver_id id, void *data, size_t size); +int __pkvm_iommu_register(unsigned long dev_id, + enum pkvm_iommu_driver_id drv_id, + phys_addr_t dev_pa, size_t dev_size, + void *kern_mem_va, size_t mem_size); int pkvm_iommu_host_stage2_adjust_range(phys_addr_t addr, phys_addr_t *start, phys_addr_t *end); diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index abc366c4e8ad..a42090ab120c 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -84,6 +84,7 @@ int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages); bool addr_is_memory(phys_addr_t phys); int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot); int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id); +int host_stage2_unmap_dev_locked(phys_addr_t start, u64 size); int kvm_host_prepare_stage2(void *pgt_pool_base); int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd); void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt); diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index e758e81c3eb1..c9acd573b77a 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -1123,6 +1123,19 @@ static void handle___pkvm_iommu_driver_init(struct kvm_cpu_context *host_ctxt) cpu_reg(host_ctxt, 1) = __pkvm_iommu_driver_init(id, data, size); } +static void handle___pkvm_iommu_register(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(unsigned long, dev_id, host_ctxt, 1); + DECLARE_REG(enum pkvm_iommu_driver_id, drv_id, host_ctxt, 2); + DECLARE_REG(phys_addr_t, dev_pa, host_ctxt, 3); + DECLARE_REG(size_t, dev_size, host_ctxt, 4); + DECLARE_REG(void *, mem, host_ctxt, 5); + DECLARE_REG(size_t, mem_size, host_ctxt, 6); + + cpu_reg(host_ctxt, 1) = __pkvm_iommu_register(dev_id, drv_id, dev_pa, + dev_size, mem, mem_size); +} + typedef void (*hcall_t)(struct kvm_cpu_context *); #define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x @@ -1158,6 +1171,7 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__pkvm_vcpu_put), HANDLE_FUNC(__pkvm_vcpu_sync_state), HANDLE_FUNC(__pkvm_iommu_driver_init), + HANDLE_FUNC(__pkvm_iommu_register), }; static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) diff --git a/arch/arm64/kvm/hyp/nvhe/iommu.c b/arch/arm64/kvm/hyp/nvhe/iommu.c index f2b510fd2727..f2132c381766 100644 --- a/arch/arm64/kvm/hyp/nvhe/iommu.c +++ b/arch/arm64/kvm/hyp/nvhe/iommu.c @@ -9,8 +9,10 @@ #include #include #include +#include #include +#include enum { IOMMU_DRIVER_NOT_READY = 0, @@ -33,6 +35,16 @@ static void assert_host_component_locked(void) hyp_assert_lock_held(&host_mmu.lock); } +static void host_lock_component(void) +{ + hyp_spin_lock(&host_mmu.lock); +} + +static void host_unlock_component(void) +{ + hyp_spin_unlock(&host_mmu.lock); +} + /* * Find IOMMU driver by its ID. The input ID is treated as unstrusted * and is properly validated. @@ -69,6 +81,91 @@ static inline void driver_release_init(struct pkvm_iommu_driver *drv, : IOMMU_DRIVER_NOT_READY); } +static inline bool is_driver_ready(struct pkvm_iommu_driver *drv) +{ + return atomic_read(&drv->state) == IOMMU_DRIVER_READY; +} + +/* Global memory pool for allocating IOMMU list entry structs. */ +static inline struct pkvm_iommu * +alloc_iommu_list_entry(struct pkvm_iommu_driver *drv, void *mem, size_t mem_size) +{ + static void *pool; + static size_t remaining; + static DEFINE_HYP_SPINLOCK(lock); + size_t size = sizeof(struct pkvm_iommu) + drv->ops->data_size; + void *ptr; + + size = ALIGN(size, sizeof(unsigned long)); + + hyp_spin_lock(&lock); + + /* + * If new memory is being provided, replace the existing pool with it. + * Any remaining memory in the pool is discarded. + */ + if (mem && mem_size) { + pool = mem; + remaining = mem_size; + } + + if (size <= remaining) { + ptr = pool; + pool += size; + remaining -= size; + } else { + ptr = NULL; + } + + hyp_spin_unlock(&lock); + return ptr; +} + +static bool is_overlap(phys_addr_t r1_start, size_t r1_size, + phys_addr_t r2_start, size_t r2_size) +{ + phys_addr_t r1_end = r1_start + r1_size; + phys_addr_t r2_end = r2_start + r2_size; + + return (r1_start < r2_end) && (r2_start < r1_end); +} + +static bool is_mmio_range(phys_addr_t base, size_t size) +{ + struct memblock_region *reg; + phys_addr_t limit = BIT(host_mmu.pgt.ia_bits); + size_t i; + + /* Check against limits of host IPA space. */ + if ((base >= limit) || !size || (size > limit - base)) + return false; + + for (i = 0; i < hyp_memblock_nr; i++) { + reg = &hyp_memory[i]; + if (is_overlap(base, size, reg->base, reg->size)) + return false; + } + return true; +} + +static bool validate_against_existing_iommus(struct pkvm_iommu *dev) +{ + struct pkvm_iommu *other; + + assert_host_component_locked(); + + list_for_each_entry(other, &iommu_list, list) { + /* Device ID must be unique. */ + if (dev->id == other->id) + return false; + + /* MMIO regions must not overlap. */ + if (is_overlap(dev->pa, dev->size, other->pa, other->size)) + return false; + } + return true; +} + /* * Initialize EL2 IOMMU driver. * @@ -110,6 +207,88 @@ out: return ret; } +int __pkvm_iommu_register(unsigned long dev_id, + enum pkvm_iommu_driver_id drv_id, + phys_addr_t dev_pa, size_t dev_size, + void *kern_mem_va, size_t mem_size) +{ + struct pkvm_iommu *dev = NULL; + struct pkvm_iommu_driver *drv; + void *dev_va, *mem_va = NULL; + int ret = 0; + + drv = get_driver(drv_id); + if (!drv || !is_driver_ready(drv)) + return -ENOENT; + + if (!PAGE_ALIGNED(dev_pa) || !PAGE_ALIGNED(dev_size)) + return -EINVAL; + + if (!is_mmio_range(dev_pa, dev_size)) + return -EINVAL; + + if (drv->ops->validate) { + ret = drv->ops->validate(dev_pa, dev_size); + if (ret) + return ret; + } + + /* + * Accept memory donation if the host is providing new memory. + * Note: We do not return the memory even if there is an error later. + */ + if (kern_mem_va && mem_size) { + mem_va = kern_hyp_va(kern_mem_va); + + if (!PAGE_ALIGNED(mem_va) || !PAGE_ALIGNED(mem_size)) + return -EINVAL; + + ret = __pkvm_host_donate_hyp(hyp_virt_to_pfn(mem_va), + mem_size >> PAGE_SHIFT); + if (ret) + return ret; + } + + /* Allocate memory for the new device entry. */ + dev = alloc_iommu_list_entry(drv, mem_va, mem_size); + if (!dev) + return -ENOMEM; + + /* Create EL2 mapping for the device. */ + ret = __pkvm_create_private_mapping(dev_pa, dev_size, + PAGE_HYP_DEVICE,(unsigned long *)&dev_va); + if (ret) + return ret; + + /* Populate the new device entry. */ + *dev = (struct pkvm_iommu){ + .id = dev_id, + .ops = drv->ops, + .pa = dev_pa, + .va = dev_va, + .size = dev_size, + }; + + /* Take the host_mmu lock to block host stage-2 changes. */ + host_lock_component(); + if (!validate_against_existing_iommus(dev)) { + ret = -EBUSY; + goto out; + } + + /* Unmap the device's MMIO range from host stage-2. */ + ret = host_stage2_unmap_dev_locked(dev_pa, dev_size); + if (ret) + goto out; + + /* Register device and prevent host from mapping the MMIO range. */ + list_add_tail(&dev->list, &iommu_list); + +out: + host_unlock_component(); + return ret; +} + /* * Check host memory access against IOMMUs' MMIO regions. * Returns -EPERM if the address is within the bounds of a registered device. diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 3711f626fb4a..392f862d76ff 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -414,6 +414,13 @@ int __pkvm_prot_finalize(void) return 0; } +int host_stage2_unmap_dev_locked(phys_addr_t start, u64 size) +{ + hyp_assert_lock_held(&host_mmu.lock); + + return kvm_pgtable_stage2_unmap(&host_mmu.pgt, start, size); +} + static int host_stage2_unmap_dev_all(void) { struct kvm_pgtable *pgt = &host_mmu.pgt; @@ -424,11 +431,11 @@ static int host_stage2_unmap_dev_all(void) /* Unmap all non-memory regions to recycle the pages */ for (i = 0; i < hyp_memblock_nr; i++, addr = reg->base + reg->size) { reg = &hyp_memory[i]; - ret = kvm_pgtable_stage2_unmap(pgt, addr, reg->base - addr); + ret = host_stage2_unmap_dev_locked(addr, reg->base - addr); if (ret) return ret; } - return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr); + return host_stage2_unmap_dev_locked(addr, BIT(pgt->ia_bits) - addr); } struct kvm_mem_range { @@ -625,6 +632,7 @@ static int host_stage2_idmap(u64 addr) prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT; + host_lock_component(); /* * Adjust against IOMMU devices first. host_stage2_adjust_range() should * be called last for proper alignment. @@ -633,10 +641,9 @@ static int host_stage2_idmap(u64 addr) ret = pkvm_iommu_host_stage2_adjust_range(addr, &range.start, &range.end); if (ret) - return ret; + goto unlock; } - host_lock_component(); ret = host_stage2_adjust_range(addr, &range); if (ret) goto unlock; diff --git a/arch/arm64/kvm/iommu.c b/arch/arm64/kvm/iommu.c index edd7316bd61b..a845be0c8fa9 100644 --- a/arch/arm64/kvm/iommu.c +++ b/arch/arm64/kvm/iommu.c @@ -6,7 +6,37 @@ #include +static unsigned long dev_to_id(struct device *dev) +{ + /* Use the struct device pointer as a unique identifier. */ + return (unsigned long)dev; +} + int pkvm_iommu_driver_init(enum pkvm_iommu_driver_id id, void *data, size_t size) { return kvm_call_hyp_nvhe(__pkvm_iommu_driver_init, id, data, size); } + +int pkvm_iommu_register(struct device *dev, enum pkvm_iommu_driver_id drv_id, + phys_addr_t pa, size_t size) +{ + void *mem; + int ret; + + /* + * Hypcall to register the device. It will return -ENOMEM if it needs + * more memory. In that case allocate a page and retry. + * We assume that hyp never allocates more than a page per hypcall. + */ + ret = kvm_call_hyp_nvhe(__pkvm_iommu_register, dev_to_id(dev), + drv_id, pa, size, NULL, 0); + if (ret == -ENOMEM) { + mem = (void *)__get_free_page(GFP_KERNEL); + if (!mem) + return -ENOMEM; + + ret = kvm_call_hyp_nvhe(__pkvm_iommu_register, dev_to_id(dev), + drv_id, pa, size, mem, PAGE_SIZE); + } + return ret; +}