From 51a84221b108408dfd76e08652698ccf371e60b9 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 12 Dec 2022 18:09:47 +0000 Subject: [PATCH] ANDROID: KVM: arm64: Introduce concept of pKVM moveable regions The pKVM memory pool is currently sized to allow page-granularity mapping in the host stage-2 page-table of all the memory as well as up to 1GiB of MMIO range. Indeed, pKVM currently assumes that MMIO regions are completely and solely owned by the host for the entire lifetime of the system. As such, the pages used to map MMIO regions can always be recycled to allow forward progress if the memory pool ran out of pages -- pKVM can unmap MMIO ranges at stage-2 without fearing to loose important information about the state of the underlying page, and those mappings can always be reconstructed later. In order to allow transitioning the ownership of non-memory regions, introduce a concept of pkvm 'moveable' regions, which represents regions of the physical address space which can be 'moved' from an ownership perspective. These moveable regions are used to size the hyp memory pool. In a first step, the list of moveable regions is equal to the memblock list, but it will be extended in subsequent changes. No functional changes intended. Bug: 244543039 Bug: 244373730 Change-Id: I7f451924b1eed9579868e6ff8c7adc7b4a5a0ae1 Signed-off-by: Quentin Perret --- arch/arm64/include/asm/kvm_pkvm.h | 28 ++++++--- arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 2 +- arch/arm64/kvm/hyp/nvhe/iommu.c | 2 +- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 36 +++++++----- arch/arm64/kvm/pkvm.c | 57 +++++++++++++++++++ 5 files changed, 101 insertions(+), 24 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index 7ac74dd69b34..49aa99fad1b3 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -242,6 +242,20 @@ static inline int pkvm_get_max_wrps(void) return num ? num + 1 : 0; } +enum pkvm_moveable_reg_type { + PKVM_MREG_MEMORY, +}; + +struct pkvm_moveable_reg { + phys_addr_t start; + u64 size; + enum pkvm_moveable_reg_type type; +}; + +#define PKVM_NR_MOVEABLE_REGS 512 +extern struct pkvm_moveable_reg kvm_nvhe_sym(pkvm_moveable_regs)[]; +extern unsigned int kvm_nvhe_sym(pkvm_moveable_regs_nr); + extern struct memblock_region kvm_nvhe_sym(hyp_memory)[]; extern unsigned int kvm_nvhe_sym(hyp_memblock_nr); @@ -292,13 +306,13 @@ static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages) return total; } -static inline unsigned long __hyp_pgtable_total_pages(void) +static inline unsigned long __hyp_pgtable_moveable_regs_pages(void) { unsigned long res = 0, i; - /* Cover all of memory with page-granularity */ - for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) { - struct memblock_region *reg = &kvm_nvhe_sym(hyp_memory)[i]; + /* Cover all of moveable regions with page-granularity */ + for (i = 0; i < kvm_nvhe_sym(pkvm_moveable_regs_nr); i++) { + struct pkvm_moveable_reg *reg = &kvm_nvhe_sym(pkvm_moveable_regs)[i]; res += __hyp_pgtable_max_pages(reg->size >> PAGE_SHIFT); } @@ -309,7 +323,7 @@ static inline unsigned long hyp_s1_pgtable_pages(void) { unsigned long res; - res = __hyp_pgtable_total_pages(); + res = __hyp_pgtable_moveable_regs_pages(); /* Allow 1 GiB for private mappings */ res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT); @@ -325,9 +339,9 @@ static inline unsigned long host_s2_pgtable_pages(void) * Include an extra 16 pages to safely upper-bound the worst case of * concatenated pgds. */ - res = __hyp_pgtable_total_pages() + 16; + res = __hyp_pgtable_moveable_regs_pages() + 16; - /* Allow 1 GiB for MMIO mappings */ + /* Allow 1 GiB for non-moveable regions */ res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT); return res; diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 64d643355759..751f860770ac 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -87,7 +87,7 @@ bool addr_is_memory(phys_addr_t phys); int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot, bool update_iommu); int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, enum pkvm_component_id owner_id); -int host_stage2_unmap_dev_locked(phys_addr_t start, u64 size); +int host_stage2_unmap_reg_locked(phys_addr_t start, u64 size); int kvm_host_prepare_stage2(void *pgt_pool_base); int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd); void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt); diff --git a/arch/arm64/kvm/hyp/nvhe/iommu.c b/arch/arm64/kvm/hyp/nvhe/iommu.c index df974eba1e0f..a6073f443c8f 100644 --- a/arch/arm64/kvm/hyp/nvhe/iommu.c +++ b/arch/arm64/kvm/hyp/nvhe/iommu.c @@ -424,7 +424,7 @@ int __pkvm_iommu_register(unsigned long dev_id, unsigned long drv_id, * is successful, future attempts to re-map will be blocked by * pkvm_iommu_host_stage2_adjust_range. */ - ret = host_stage2_unmap_dev_locked(dev_pa, dev_size); + ret = host_stage2_unmap_reg_locked(dev_pa, dev_size); if (ret) goto out_free; diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 53cc9b80b93d..c1f457a56176 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -25,6 +25,9 @@ struct host_mmu host_mmu; +struct pkvm_moveable_reg pkvm_moveable_regs[PKVM_NR_MOVEABLE_REGS]; +unsigned int pkvm_moveable_regs_nr; + static struct hyp_pool host_s2_pool; static DEFINE_PER_CPU(struct pkvm_hyp_vm *, __current_vm); @@ -432,7 +435,7 @@ int __pkvm_prot_finalize(void) return 0; } -int host_stage2_unmap_dev_locked(phys_addr_t start, u64 size) +int host_stage2_unmap_reg_locked(phys_addr_t start, u64 size) { int ret; @@ -446,21 +449,24 @@ int host_stage2_unmap_dev_locked(phys_addr_t start, u64 size) return 0; } -static int host_stage2_unmap_dev_all(void) +static int host_stage2_unmap_unmoveable_regs(void) { struct kvm_pgtable *pgt = &host_mmu.pgt; - struct memblock_region *reg; + struct pkvm_moveable_reg *reg; u64 addr = 0; int i, ret; - /* Unmap all non-memory regions to recycle the pages */ - for (i = 0; i < hyp_memblock_nr; i++, addr = reg->base + reg->size) { - reg = &hyp_memory[i]; - ret = host_stage2_unmap_dev_locked(addr, reg->base - addr); - if (ret) - return ret; + /* Unmap all unmoveable regions to recycle the pages */ + for (i = 0; i < pkvm_moveable_regs_nr; i++) { + reg = &pkvm_moveable_regs[i]; + if (reg->start > addr) { + ret = host_stage2_unmap_reg_locked(addr, reg->start - addr); + if (ret) + return ret; + } + addr = max(addr, reg->start + reg->size); } - return host_stage2_unmap_dev_locked(addr, BIT(pgt->ia_bits) - addr); + return host_stage2_unmap_reg_locked(addr, BIT(pgt->ia_bits) - addr); } struct kvm_mem_range { @@ -552,10 +558,10 @@ static inline int __host_stage2_idmap(u64 start, u64 end, } /* - * The pool has been provided with enough pages to cover all of memory with - * page granularity, but it is difficult to know how much of the MMIO range - * we will need to cover upfront, so we may need to 'recycle' the pages if we - * run out. + * The pool has been provided with enough pages to cover all of moveable regions + * with page granularity, but it is difficult to know how much of the + * non-moveable regions we will need to cover upfront, so we may need to + * 'recycle' the pages if we run out. */ #define host_stage2_try(fn, ...) \ ({ \ @@ -563,7 +569,7 @@ static inline int __host_stage2_idmap(u64 start, u64 end, hyp_assert_lock_held(&host_mmu.lock); \ __ret = fn(__VA_ARGS__); \ if (__ret == -ENOMEM) { \ - __ret = host_stage2_unmap_dev_all(); \ + __ret = host_stage2_unmap_unmoveable_regs(); \ if (!__ret) \ __ret = fn(__VA_ARGS__); \ } \ diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index f53776f04a95..c2979cc77804 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -24,6 +24,7 @@ static struct reserved_mem *pkvm_firmware_mem; static phys_addr_t *pvmfw_base = &kvm_nvhe_sym(pvmfw_base); static phys_addr_t *pvmfw_size = &kvm_nvhe_sym(pvmfw_size); +static struct pkvm_moveable_reg *moveable_regs = kvm_nvhe_sym(pkvm_moveable_regs); static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory); static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr); @@ -63,6 +64,55 @@ static int __init register_memblock_regions(void) return 0; } +static int cmp_moveable_reg(const void *p1, const void *p2) +{ + const struct pkvm_moveable_reg *r1 = p1; + const struct pkvm_moveable_reg *r2 = p2; + + /* + * Moveable regions may overlap, so put the largest one first when start + * addresses are equal to allow a simpler walk from e.g. + * host_stage2_unmap_unmoveable_regs(). + */ + if (r1->start < r2->start) + return -1; + else if (r1->start > r2->start) + return 1; + else if (r1->size > r2->size) + return -1; + else if (r1->size < r2->size) + return 1; + return 0; +} + +static void __init sort_moveable_regs(void) +{ + sort(moveable_regs, + kvm_nvhe_sym(pkvm_moveable_regs_nr), + sizeof(struct pkvm_moveable_reg), + cmp_moveable_reg, + NULL); +} + +static int __init register_moveable_regions(void) +{ + struct memblock_region *reg; + int i = 0; + + for_each_mem_region(reg) { + if (i >= PKVM_NR_MOVEABLE_REGS) + return -ENOMEM; + moveable_regs[i].start = reg->base; + moveable_regs[i].size = reg->size; + moveable_regs[i].type = PKVM_MREG_MEMORY; + i++; + } + kvm_nvhe_sym(pkvm_moveable_regs_nr) = i; + sort_moveable_regs(); + + return 0; +} + void __init kvm_hyp_reserve(void) { u64 hyp_mem_pages = 0; @@ -81,6 +131,13 @@ void __init kvm_hyp_reserve(void) return; } + ret = register_moveable_regions(); + if (ret) { + *hyp_memblock_nr_ptr = 0; + kvm_err("Failed to register pkvm moveable regions: %d\n", ret); + return; + } + hyp_mem_pages += hyp_s1_pgtable_pages(); hyp_mem_pages += host_s2_pgtable_pages(); hyp_mem_pages += hyp_vm_table_pages();