From a4318b7d29105badbf9c18ac188209ef4801fb9a Mon Sep 17 00:00:00 2001 From: Simon Xue Date: Wed, 3 Jun 2020 17:11:13 +0800 Subject: [PATCH] iommu: rockchip: Add support iommu v2 Change-Id: I82dcbf5b9d24bd82d6127558c264226b32e7a7bd Signed-off-by: Simon Xue --- drivers/iommu/rockchip-iommu.c | 384 +++++++++++++++++++++++++++++++-- 1 file changed, 370 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index d5e52aabb8b0..48b8565281cc 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -79,6 +79,30 @@ */ #define RK_IOMMU_PGSIZE_BITMAP 0x007ff000 +#define DT_LO_MASK 0xfffff000 +#define DT_HI_MASK GENMASK_ULL(39, 32) +#define DT_SHIFT 28 + +#define DTE_BASE_HI_MASK GENMASK(11, 4) + +#define PAGE_DESC_LO_MASK 0xfffff000 +#define PAGE_DESC_HI1_LOWER 32 +#define PAGE_DESC_HI1_UPPER 34 +#define PAGE_DESC_HI2_LOWER 35 +#define PAGE_DESC_HI2_UPPER 39 +#define PAGE_DESC_HI_MASK1 GENMASK_ULL(PAGE_DESC_HI1_UPPER, PAGE_DESC_HI1_LOWER) +#define PAGE_DESC_HI_MASK2 GENMASK_ULL(PAGE_DESC_HI2_UPPER, PAGE_DESC_HI2_LOWER) + +#define DTE_HI1_LOWER 9 +#define DTE_HI1_UPPER 11 +#define DTE_HI2_LOWER 4 +#define DTE_HI2_UPPER 8 +#define DTE_HI_MASK1 GENMASK(DTE_HI1_UPPER, DTE_HI1_LOWER) +#define DTE_HI_MASK2 GENMASK(DTE_HI2_UPPER, DTE_HI2_LOWER) + +#define PAGE_DESC_HI_SHIFT1 (PAGE_DESC_HI1_LOWER - DTE_HI1_LOWER) +#define PAGE_DESC_HI_SHIFT2 (PAGE_DESC_HI2_LOWER - DTE_HI2_LOWER) + struct rk_iommu_domain { struct list_head iommus; u32 *dt; /* page directory table */ @@ -89,6 +113,10 @@ struct rk_iommu_domain { struct iommu_domain domain; }; +struct rockchip_iommu_data { + u32 version; +}; + /* list of clocks required by IOMMU */ static const char * const rk_iommu_clocks[] = { "aclk", "iface", @@ -107,6 +135,7 @@ struct rk_iommu { struct list_head node; /* entry in rk_iommu_domain.iommus */ struct iommu_domain *domain; /* domain to which iommu is attached */ struct iommu_group *group; + u32 version; }; struct rk_iommudata { @@ -174,11 +203,32 @@ static struct rk_iommu_domain *to_rk_domain(struct iommu_domain *dom) #define RK_DTE_PT_ADDRESS_MASK 0xfffff000 #define RK_DTE_PT_VALID BIT(0) +/* + * In v2: + * 31:12 - PT address bit 31:0 + * 11: 9 - PT address bit 34:32 + * 8: 4 - PT address bit 39:35 + * 3: 1 - Reserved + * 0 - 1 if PT @ PT address is valid + */ +#define RK_DTE_PT_ADDRESS_MASK_V2 0xfffffff0 + static inline phys_addr_t rk_dte_pt_address(u32 dte) { return (phys_addr_t)dte & RK_DTE_PT_ADDRESS_MASK; } +static inline phys_addr_t rk_dte_pt_address_v2(u32 dte) +{ + phys_addr_t dte_v2 = dte; + + dte_v2 = ((dte_v2 & DTE_HI_MASK2) << PAGE_DESC_HI_SHIFT2) | + ((dte_v2 & DTE_HI_MASK1) << PAGE_DESC_HI_SHIFT1) | + (dte_v2 & PAGE_DESC_LO_MASK); + + return dte_v2; +} + static inline bool rk_dte_is_pt_valid(u32 dte) { return dte & RK_DTE_PT_VALID; @@ -189,6 +239,15 @@ static inline u32 rk_mk_dte(dma_addr_t pt_dma) return (pt_dma & RK_DTE_PT_ADDRESS_MASK) | RK_DTE_PT_VALID; } +static inline u32 rk_mk_dte_v2(dma_addr_t pt_dma) +{ + pt_dma = (pt_dma & PAGE_DESC_LO_MASK) | + ((pt_dma & PAGE_DESC_HI_MASK1) >> PAGE_DESC_HI_SHIFT1) | + (pt_dma & PAGE_DESC_HI_MASK2) >> PAGE_DESC_HI_SHIFT2; + + return (pt_dma & RK_DTE_PT_ADDRESS_MASK_V2) | RK_DTE_PT_VALID; +} + /* * Each PTE has a Page address, some flags and a valid bit: * +---------------------+---+-------+-+ @@ -215,11 +274,37 @@ static inline u32 rk_mk_dte(dma_addr_t pt_dma) #define RK_PTE_PAGE_READABLE BIT(1) #define RK_PTE_PAGE_VALID BIT(0) +/* + * In v2: + * 31:12 - Page address bit 31:0 + * 11:9 - Page address bit 34:32 + * 8:4 - Page address bit 39:35 + * 3 - Security + * 2 - Readable + * 1 - Writable + * 0 - 1 if Page @ Page address is valid + */ +#define RK_PTE_PAGE_ADDRESS_MASK_V2 0xfffffff0 +#define RK_PTE_PAGE_FLAGS_MASK_V2 0x0000000e +#define RK_PTE_PAGE_READABLE_V2 BIT(2) +#define RK_PTE_PAGE_WRITABLE_V2 BIT(1) + static inline phys_addr_t rk_pte_page_address(u32 pte) { return (phys_addr_t)pte & RK_PTE_PAGE_ADDRESS_MASK; } +static inline phys_addr_t rk_pte_page_address_v2(u32 pte) +{ + phys_addr_t pte_v2 = pte; + + pte_v2 = ((pte_v2 & DTE_HI_MASK2) << PAGE_DESC_HI_SHIFT2) | + ((pte_v2 & DTE_HI_MASK1) << PAGE_DESC_HI_SHIFT1) | + (pte_v2 & PAGE_DESC_LO_MASK); + + return pte_v2; +} + static inline bool rk_pte_is_page_valid(u32 pte) { return pte & RK_PTE_PAGE_VALID; @@ -229,12 +314,26 @@ static inline bool rk_pte_is_page_valid(u32 pte) static u32 rk_mk_pte(phys_addr_t page, int prot) { u32 flags = 0; + flags |= (prot & IOMMU_READ) ? RK_PTE_PAGE_READABLE : 0; flags |= (prot & IOMMU_WRITE) ? RK_PTE_PAGE_WRITABLE : 0; page &= RK_PTE_PAGE_ADDRESS_MASK; return page | flags | RK_PTE_PAGE_VALID; } +static u32 rk_mk_pte_v2(phys_addr_t page, int prot) +{ + u32 flags = 0; + + flags |= (prot & IOMMU_READ) ? RK_PTE_PAGE_READABLE_V2 : 0; + flags |= (prot & IOMMU_WRITE) ? RK_PTE_PAGE_WRITABLE_V2 : 0; + page = (page & PAGE_DESC_LO_MASK) | + ((page & PAGE_DESC_HI_MASK1) >> PAGE_DESC_HI_SHIFT1) | + (page & PAGE_DESC_HI_MASK2) >> PAGE_DESC_HI_SHIFT2; + page &= RK_PTE_PAGE_ADDRESS_MASK_V2; + return page | flags | RK_PTE_PAGE_VALID; +} + static u32 rk_mk_pte_invalid(u32 pte) { return pte & ~RK_PTE_PAGE_VALID; @@ -463,6 +562,7 @@ static int rk_iommu_force_reset(struct rk_iommu *iommu) int ret, i; u32 dte_addr; bool val; + u32 address_mask; if (iommu->reset_disabled) return 0; @@ -474,11 +574,19 @@ static int rk_iommu_force_reset(struct rk_iommu *iommu) * Check if register DTE_ADDR is working by writing DTE_ADDR_DUMMY * and verifying that upper 5 nybbles are read back. */ + + /* + * In v2: upper 7 nybbles are read back. + */ for (i = 0; i < iommu->num_mmu; i++) { rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, DTE_ADDR_DUMMY); + if (iommu->version >= 0x2) + address_mask = RK_DTE_PT_ADDRESS_MASK_V2; + else + address_mask = RK_DTE_PT_ADDRESS_MASK; dte_addr = rk_iommu_read(iommu->bases[i], RK_MMU_DTE_ADDR); - if (dte_addr != (DTE_ADDR_DUMMY & RK_DTE_PT_ADDRESS_MASK)) { + if (dte_addr != (DTE_ADDR_DUMMY & address_mask)) { dev_err(iommu->dev, "Error during raw reset. MMU_DTE_ADDR is not functioning\n"); return -EFAULT; } @@ -520,6 +628,10 @@ static void log_iova(struct rk_iommu *iommu, int index, dma_addr_t iova) mmu_dte_addr = rk_iommu_read(base, RK_MMU_DTE_ADDR); mmu_dte_addr_phys = (phys_addr_t)mmu_dte_addr; + if (iommu->version >= 0x2) { + mmu_dte_addr_phys = (mmu_dte_addr_phys & DT_LO_MASK) | + ((mmu_dte_addr_phys & DTE_BASE_HI_MASK) << DT_SHIFT); + } dte_addr_phys = mmu_dte_addr_phys + (4 * dte_index); dte_addr = phys_to_virt(dte_addr_phys); @@ -644,6 +756,34 @@ out: return phys; } +static phys_addr_t rk_iommu_iova_to_phys_v2(struct iommu_domain *domain, + dma_addr_t iova) +{ + struct rk_iommu_domain *rk_domain = to_rk_domain(domain); + unsigned long flags; + phys_addr_t pt_phys, phys = 0; + u32 dte, pte; + u32 *page_table; + + spin_lock_irqsave(&rk_domain->dt_lock, flags); + + dte = rk_domain->dt[rk_iova_dte_index(iova)]; + if (!rk_dte_is_pt_valid(dte)) + goto out; + + pt_phys = rk_dte_pt_address_v2(dte); + page_table = (u32 *)phys_to_virt(pt_phys); + pte = page_table[rk_iova_pte_index(iova)]; + if (!rk_pte_is_page_valid(pte)) + goto out; + + phys = rk_pte_page_address_v2(pte) + rk_iova_page_offset(iova); +out: + spin_unlock_irqrestore(&rk_domain->dt_lock, flags); + + return phys; +} + static void rk_iommu_zap_iova(struct rk_iommu_domain *rk_domain, dma_addr_t iova, size_t size) { @@ -720,6 +860,44 @@ done: return (u32 *)phys_to_virt(pt_phys); } +static u32 *rk_dte_get_page_table_v2(struct rk_iommu_domain *rk_domain, + dma_addr_t iova) +{ + u32 *page_table, *dte_addr; + u32 dte_index, dte; + phys_addr_t pt_phys; + dma_addr_t pt_dma; + + assert_spin_locked(&rk_domain->dt_lock); + + dte_index = rk_iova_dte_index(iova); + dte_addr = &rk_domain->dt[dte_index]; + dte = *dte_addr; + if (rk_dte_is_pt_valid(dte)) + goto done; + + page_table = (u32 *)get_zeroed_page(GFP_ATOMIC | GFP_DMA32); + if (!page_table) + return ERR_PTR(-ENOMEM); + + pt_dma = dma_map_single(dma_dev, page_table, SPAGE_SIZE, DMA_TO_DEVICE); + if (dma_mapping_error(dma_dev, pt_dma)) { + dev_err(dma_dev, "DMA mapping error while allocating page table\n"); + free_page((unsigned long)page_table); + return ERR_PTR(-ENOMEM); + } + + dte = rk_mk_dte_v2(pt_dma); + *dte_addr = dte; + + rk_table_flush(rk_domain, pt_dma, NUM_PT_ENTRIES); + rk_table_flush(rk_domain, + rk_domain->dt_dma + dte_index * sizeof(u32), 1); +done: + pt_phys = rk_dte_pt_address_v2(dte); + return (u32 *)phys_to_virt(pt_phys); +} + static size_t rk_iommu_unmap_iova(struct rk_iommu_domain *rk_domain, u32 *pte_addr, dma_addr_t pte_dma, size_t size) @@ -790,6 +968,54 @@ unwind: return -EADDRINUSE; } +static int rk_iommu_map_iova_v2(struct rk_iommu_domain *rk_domain, u32 *pte_addr, + dma_addr_t pte_dma, dma_addr_t iova, + phys_addr_t paddr, size_t size, int prot) +{ + unsigned int pte_count; + unsigned int pte_total = size / SPAGE_SIZE; + phys_addr_t page_phys; + + assert_spin_locked(&rk_domain->dt_lock); + + for (pte_count = 0; pte_count < pte_total; pte_count++) { + u32 pte = pte_addr[pte_count]; + + if (rk_pte_is_page_valid(pte)) + goto unwind; + + pte_addr[pte_count] = rk_mk_pte_v2(paddr, prot); + + paddr += SPAGE_SIZE; + } + + rk_table_flush(rk_domain, pte_dma, pte_total); + + /* + * Zap the first and last iova to evict from iotlb any previously + * mapped cachelines holding stale values for its dte and pte. + * We only zap the first and last iova, since only they could have + * dte or pte shared with an existing mapping. + */ + + /* Do not zap tlb cache line if IOMMU_TLB_SHOT_ENTIRE set */ + if (!(prot & IOMMU_TLB_SHOT_ENTIRE)) + rk_iommu_zap_iova_first_last(rk_domain, iova, size); + + return 0; +unwind: + /* Unmap the range of iovas that we just mapped */ + rk_iommu_unmap_iova(rk_domain, pte_addr, pte_dma, + pte_count * SPAGE_SIZE); + + iova += pte_count * SPAGE_SIZE; + page_phys = rk_pte_page_address_v2(pte_addr[pte_count]); + pr_err("iova: %pad already mapped to %pa cannot remap to phys: %pa prot: %#x\n", + &iova, &page_phys, &paddr, prot); + + return -EADDRINUSE; +} + static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova, phys_addr_t paddr, size_t size, int prot) { @@ -797,7 +1023,7 @@ static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova, unsigned long flags; dma_addr_t pte_dma, iova = (dma_addr_t)_iova; u32 *page_table, *pte_addr; - u32 dte_index, pte_index; + u32 dte, pte_index; int ret; spin_lock_irqsave(&rk_domain->dt_lock, flags); @@ -815,10 +1041,10 @@ static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova, return PTR_ERR(page_table); } - dte_index = rk_domain->dt[rk_iova_dte_index(iova)]; + dte = rk_domain->dt[rk_iova_dte_index(iova)]; pte_index = rk_iova_pte_index(iova); pte_addr = &page_table[pte_index]; - pte_dma = rk_dte_pt_address(dte_index) + pte_index * sizeof(u32); + pte_dma = rk_dte_pt_address(dte) + pte_index * sizeof(u32); ret = rk_iommu_map_iova(rk_domain, pte_addr, pte_dma, iova, paddr, size, prot); @@ -827,6 +1053,43 @@ static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova, return ret; } +static int rk_iommu_map_v2(struct iommu_domain *domain, unsigned long _iova, + phys_addr_t paddr, size_t size, int prot) +{ + struct rk_iommu_domain *rk_domain = to_rk_domain(domain); + unsigned long flags; + dma_addr_t pte_dma, iova = (dma_addr_t)_iova; + u32 *page_table, *pte_addr; + u32 dte, pte_index; + int ret; + + spin_lock_irqsave(&rk_domain->dt_lock, flags); + + /* + * pgsize_bitmap specifies iova sizes that fit in one page table + * (1024 4-KiB pages = 4 MiB). + * So, size will always be 4096 <= size <= 4194304. + * Since iommu_map() guarantees that both iova and size will be + * aligned, we will always only be mapping from a single dte here. + */ + page_table = rk_dte_get_page_table_v2(rk_domain, iova); + if (IS_ERR(page_table)) { + spin_unlock_irqrestore(&rk_domain->dt_lock, flags); + return PTR_ERR(page_table); + } + + dte = rk_domain->dt[rk_iova_dte_index(iova)]; + pte_index = rk_iova_pte_index(iova); + pte_addr = &page_table[pte_index]; + pte_dma = rk_dte_pt_address_v2(dte) + pte_index * sizeof(u32); + ret = rk_iommu_map_iova_v2(rk_domain, pte_addr, pte_dma, iova, + paddr, size, prot); + + spin_unlock_irqrestore(&rk_domain->dt_lock, flags); + + return ret; +} + static size_t rk_iommu_unmap(struct iommu_domain *domain, unsigned long _iova, size_t size) { @@ -867,6 +1130,46 @@ static size_t rk_iommu_unmap(struct iommu_domain *domain, unsigned long _iova, return unmap_size; } +static size_t rk_iommu_unmap_v2(struct iommu_domain *domain, unsigned long _iova, + size_t size) +{ + struct rk_iommu_domain *rk_domain = to_rk_domain(domain); + unsigned long flags; + dma_addr_t pte_dma, iova = (dma_addr_t)_iova; + phys_addr_t pt_phys; + u32 dte; + u32 *pte_addr; + size_t unmap_size; + + spin_lock_irqsave(&rk_domain->dt_lock, flags); + + /* + * pgsize_bitmap specifies iova sizes that fit in one page table + * (1024 4-KiB pages = 4 MiB). + * So, size will always be 4096 <= size <= 4194304. + * Since iommu_unmap() guarantees that both iova and size will be + * aligned, we will always only be unmapping from a single dte here. + */ + dte = rk_domain->dt[rk_iova_dte_index(iova)]; + /* Just return 0 if iova is unmapped */ + if (!rk_dte_is_pt_valid(dte)) { + spin_unlock_irqrestore(&rk_domain->dt_lock, flags); + return 0; + } + + pt_phys = rk_dte_pt_address_v2(dte); + pte_addr = (u32 *)phys_to_virt(pt_phys) + rk_iova_pte_index(iova); + pte_dma = pt_phys + rk_iova_pte_index(iova) * sizeof(u32); + unmap_size = rk_iommu_unmap_iova(rk_domain, pte_addr, pte_dma, size); + + spin_unlock_irqrestore(&rk_domain->dt_lock, flags); + + /* Shootdown iotlb entries for iova range that was just unmapped */ + rk_iommu_zap_iova(rk_domain, iova, unmap_size); + + return unmap_size; +} + static void rk_iommu_flush_tlb_all(struct iommu_domain *domain) { struct rk_iommu_domain *rk_domain = to_rk_domain(domain); @@ -926,6 +1229,7 @@ static int rk_iommu_enable(struct rk_iommu *iommu) struct iommu_domain *domain = iommu->domain; struct rk_iommu_domain *rk_domain = to_rk_domain(domain); int ret, i; + u32 dt_v2; ret = clk_bulk_enable(iommu->num_clocks, iommu->clocks); if (ret) @@ -940,8 +1244,14 @@ static int rk_iommu_enable(struct rk_iommu *iommu) goto out_disable_stall; for (i = 0; i < iommu->num_mmu; i++) { - rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, - rk_domain->dt_dma); + if (iommu->version >= 0x2) { + dt_v2 = (rk_domain->dt_dma & DT_LO_MASK) | + ((rk_domain->dt_dma & DT_HI_MASK) >> DT_SHIFT); + rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, dt_v2); + } else { + rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, + rk_domain->dt_dma); + } rk_iommu_base_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE); rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, RK_MMU_IRQ_MASK); } @@ -1227,6 +1537,42 @@ static const struct iommu_ops rk_iommu_ops = { .of_xlate = rk_iommu_of_xlate, }; +static const struct iommu_ops rk_iommu_ops_v2 = { + .domain_alloc = rk_iommu_domain_alloc, + .domain_free = rk_iommu_domain_free, + .attach_dev = rk_iommu_attach_device, + .detach_dev = rk_iommu_detach_device, + .map = rk_iommu_map_v2, + .unmap = rk_iommu_unmap_v2, + .flush_iotlb_all = rk_iommu_flush_tlb_all, + .add_device = rk_iommu_add_device, + .remove_device = rk_iommu_remove_device, + .iova_to_phys = rk_iommu_iova_to_phys_v2, + .is_attach_deferred = rk_iommu_is_attach_deferred, + .device_group = rk_iommu_device_group, + .pgsize_bitmap = RK_IOMMU_PGSIZE_BITMAP, + .of_xlate = rk_iommu_of_xlate, +}; + +static const struct rockchip_iommu_data iommu_data_v1 = { + .version = 0x1, +}; + +static const struct rockchip_iommu_data iommu_data_v2 = { + .version = 0x2, +}; + +static const struct of_device_id rk_iommu_dt_ids[] = { + { .compatible = "rockchip,iommu", + .data = &iommu_data_v1, + }, { + .compatible = "rockchip,iommu-v2", + .data = &iommu_data_v2, + }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, rk_iommu_dt_ids); + static int rk_iommu_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -1234,11 +1580,21 @@ static int rk_iommu_probe(struct platform_device *pdev) struct resource *res; int num_res = pdev->num_resources; int err, i, irq; + const struct of_device_id *match; + struct rockchip_iommu_data *data; iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL); if (!iommu) return -ENOMEM; + match = of_match_device(rk_iommu_dt_ids, dev); + if (!match) + return -EINVAL; + + data = (struct rockchip_iommu_data *)match->data; + iommu->version = data->version; + dev_info(dev, "version = %x\n", iommu->version); + platform_set_drvdata(pdev, iommu); iommu->dev = dev; iommu->num_mmu = 0; @@ -1321,7 +1677,10 @@ static int rk_iommu_probe(struct platform_device *pdev) if (err) goto err_put_group; - iommu_device_set_ops(&iommu->iommu, &rk_iommu_ops); + if (iommu->version >= 0x2) + iommu_device_set_ops(&iommu->iommu, &rk_iommu_ops_v2); + else + iommu_device_set_ops(&iommu->iommu, &rk_iommu_ops); iommu_device_set_fwnode(&iommu->iommu, &dev->of_node->fwnode); err = iommu_device_register(&iommu->iommu); @@ -1336,7 +1695,10 @@ static int rk_iommu_probe(struct platform_device *pdev) if (!dma_dev) dma_dev = &pdev->dev; - bus_set_iommu(&platform_bus_type, &rk_iommu_ops); + if (iommu->version >= 0x2) + bus_set_iommu(&platform_bus_type, &rk_iommu_ops_v2); + else + bus_set_iommu(&platform_bus_type, &rk_iommu_ops); pm_runtime_enable(dev); @@ -1411,12 +1773,6 @@ static const struct dev_pm_ops rk_iommu_pm_ops = { pm_runtime_force_resume) }; -static const struct of_device_id rk_iommu_dt_ids[] = { - { .compatible = "rockchip,iommu" }, - { /* sentinel */ } -}; -MODULE_DEVICE_TABLE(of, rk_iommu_dt_ids); - static struct platform_driver rk_iommu_driver = { .probe = rk_iommu_probe, .shutdown = rk_iommu_shutdown,