diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 4f857405ae85..d150b1dcc4df 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -208,6 +208,24 @@ enum kvm_pgtable_prot { #define PKVM_HOST_MEM_PROT KVM_PGTABLE_PROT_RWX #define PKVM_HOST_MMIO_PROT KVM_PGTABLE_PROT_RW +#define KVM_HOST_S2_DEFAULT_ATTR (KVM_PTE_LEAF_ATTR_HI | \ + KVM_PTE_LEAF_ATTR_LO) + +#define KVM_HOST_S2_DEFAULT_MEM_PTE \ + (KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR | \ + KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \ + KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \ + KVM_PTE_LEAF_ATTR_LO_S2_SH | \ + KVM_PTE_LEAF_ATTR_LO_S2_AF) + +#define KVM_HOST_S2_DEFAULT_MMIO_PTE \ + (KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR | \ + KVM_PTE_LEAF_ATTR_HI_S2_XN | \ + KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \ + KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \ + KVM_PTE_LEAF_ATTR_LO_S2_SH | \ + KVM_PTE_LEAF_ATTR_LO_S2_AF) + #define PAGE_HYP KVM_PGTABLE_PROT_RW #define PAGE_HYP_EXEC (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X) #define PAGE_HYP_RO (KVM_PGTABLE_PROT_R) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index c201da97edad..d3a1a8d84640 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -178,7 +178,7 @@ static bool guest_stage2_force_pte_cb(u64 addr, u64 end, static bool guest_stage2_pte_is_counted(kvm_pte_t pte, u32 level) { - return host_stage2_pte_is_counted(pte, level); + return !!pte; } static void *guest_s2_zalloc_pages_exact(size_t size) @@ -660,12 +660,26 @@ static bool host_stage2_force_pte(u64 addr, u64 end, enum kvm_pgtable_prot prot) static bool host_stage2_pte_is_counted(kvm_pte_t pte, u32 level) { + u64 phys; + /* * The refcount tracks valid entries as well as invalid entries if they * encode ownership of a page to another entity than the page-table * owner, whose id is 0. */ - return !!pte; + if (!kvm_pte_valid(pte)) + return !!pte; + + if (kvm_pte_table(pte, level)) + return true; + + phys = kvm_pte_to_phys(pte); + if (!addr_is_memory(phys)) + return (pte & KVM_HOST_S2_DEFAULT_ATTR) != + KVM_HOST_S2_DEFAULT_MMIO_PTE; + else + return (pte & KVM_HOST_S2_DEFAULT_ATTR) != + KVM_HOST_S2_DEFAULT_MEM_PTE; } static int host_stage2_idmap(u64 addr) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 82b55bd8d570..7808591e2a09 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -767,6 +767,13 @@ static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, */ if (pte_ops->pte_is_counted_cb(pte, level)) stage2_put_pte(ptep, data->mmu, addr, level, mm_ops); + else { + /* + * On non-refcounted PTEs we just clear them out without + * dropping the refcount. + */ + stage2_clear_pte(ptep, data->mmu, addr, level); + } kvm_set_table_pte(ptep, childp, mm_ops); mm_ops->get_page(ptep); @@ -774,6 +781,35 @@ static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, return 0; } +static void stage2_coalesce_walk_table_post(u64 addr, u64 end, u32 level, + kvm_pte_t *ptep, + struct stage2_map_data *data) +{ + struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; + kvm_pte_t *childp = kvm_pte_follow(*ptep, mm_ops); + + /* + * Decrement the refcount only on the set ownership path to avoid a + * loop situation when the following happens: + * 1. We take a host stage2 fault and we create a small mapping which + * has default attributes (is not refcounted). + * 2. On the way back we execute the post handler and we zap the + * table that holds our mapping. + */ + if (kvm_phys_is_valid(data->phys) || + !kvm_level_supports_block_mapping(level)) + return; + + /* + * Free a page that is not referenced anymore and drop the reference + * of the page table page. + */ + if (mm_ops->page_count(childp) == 1) { + stage2_put_pte(ptep, data->mmu, addr, level, mm_ops); + mm_ops->put_page(childp); + } +} + static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, struct stage2_map_data *data) @@ -782,8 +818,11 @@ static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level, kvm_pte_t *childp; int ret = 0; - if (!data->anchor) + if (!data->anchor) { + stage2_coalesce_walk_table_post(addr, end, level, ptep, + data); return 0; + } if (data->anchor == ptep) { childp = data->childp;