diff --git a/drivers/rknpu/include/rknpu_drv.h b/drivers/rknpu/include/rknpu_drv.h index ce4a06fa9721..2e1260808c5a 100644 --- a/drivers/rknpu/include/rknpu_drv.h +++ b/drivers/rknpu/include/rknpu_drv.h @@ -29,7 +29,7 @@ #define DRIVER_NAME "rknpu" #define DRIVER_DESC "RKNPU driver" -#define DRIVER_DATE "20240322" +#define DRIVER_DATE "20240424" #define DRIVER_MAJOR 0 #define DRIVER_MINOR 9 #define DRIVER_PATCHLEVEL 6 diff --git a/drivers/rknpu/include/rknpu_ioctl.h b/drivers/rknpu/include/rknpu_ioctl.h index 76724f338f75..1f729186dd7c 100644 --- a/drivers/rknpu/include/rknpu_ioctl.h +++ b/drivers/rknpu/include/rknpu_ioctl.h @@ -39,10 +39,10 @@ #define RKNPU_STR_HELPER(x) #x -#define RKNPU_GET_DRV_VERSION_STRING(MAJOR, MINOR, PATCHLEVEL) \ - RKNPU_STR_HELPER(MAJOR) \ +#define RKNPU_GET_DRV_VERSION_STRING(MAJOR, MINOR, PATCHLEVEL) \ + RKNPU_STR_HELPER(MAJOR) \ "." RKNPU_STR_HELPER(MINOR) "." RKNPU_STR_HELPER(PATCHLEVEL) -#define RKNPU_GET_DRV_VERSION_CODE(MAJOR, MINOR, PATCHLEVEL) \ +#define RKNPU_GET_DRV_VERSION_CODE(MAJOR, MINOR, PATCHLEVEL) \ (MAJOR * 10000 + MINOR * 100 + PATCHLEVEL) #define RKNPU_GET_DRV_VERSION_MAJOR(CODE) (CODE / 10000) #define RKNPU_GET_DRV_VERSION_MINOR(CODE) ((CODE % 10000) / 100) @@ -62,7 +62,7 @@ enum e_rknpu_mem_type { RKNPU_MEM_WRITE_COMBINE = 1 << 2, /* dma attr kernel mapping */ RKNPU_MEM_KERNEL_MAPPING = 1 << 3, - /* iommu mapping */ + /* IOMMU mapping */ RKNPU_MEM_IOMMU = 1 << 4, /* zero mapping */ RKNPU_MEM_ZEROING = 1 << 5, @@ -74,19 +74,22 @@ enum e_rknpu_mem_type { RKNPU_MEM_TRY_ALLOC_SRAM = 1 << 8, /* request NBUF */ RKNPU_MEM_TRY_ALLOC_NBUF = 1 << 9, + /* IOMMU limiting IOVA alignment */ + RKNPU_MEM_IOMMU_LIMIT_IOVA_ALIGNMENT = 1 << 10, RKNPU_MEM_MASK = RKNPU_MEM_NON_CONTIGUOUS | RKNPU_MEM_CACHEABLE | RKNPU_MEM_WRITE_COMBINE | RKNPU_MEM_KERNEL_MAPPING | RKNPU_MEM_IOMMU | RKNPU_MEM_ZEROING | RKNPU_MEM_SECURE | RKNPU_MEM_DMA32 | - RKNPU_MEM_TRY_ALLOC_SRAM | RKNPU_MEM_TRY_ALLOC_NBUF + RKNPU_MEM_TRY_ALLOC_SRAM | RKNPU_MEM_TRY_ALLOC_NBUF | + RKNPU_MEM_IOMMU_LIMIT_IOVA_ALIGNMENT }; /* sync mode definitions. */ enum e_rknpu_mem_sync_mode { RKNPU_MEM_SYNC_TO_DEVICE = 1 << 0, RKNPU_MEM_SYNC_FROM_DEVICE = 1 << 1, - RKNPU_MEM_SYNC_MASK = - RKNPU_MEM_SYNC_TO_DEVICE | RKNPU_MEM_SYNC_FROM_DEVICE + RKNPU_MEM_SYNC_MASK = RKNPU_MEM_SYNC_TO_DEVICE | + RKNPU_MEM_SYNC_FROM_DEVICE }; /* job mode definitions. */ @@ -302,25 +305,25 @@ struct rknpu_action { #include -#define DRM_IOCTL_RKNPU_ACTION \ +#define DRM_IOCTL_RKNPU_ACTION \ DRM_IOWR(DRM_COMMAND_BASE + RKNPU_ACTION, struct rknpu_action) -#define DRM_IOCTL_RKNPU_SUBMIT \ +#define DRM_IOCTL_RKNPU_SUBMIT \ DRM_IOWR(DRM_COMMAND_BASE + RKNPU_SUBMIT, struct rknpu_submit) -#define DRM_IOCTL_RKNPU_MEM_CREATE \ +#define DRM_IOCTL_RKNPU_MEM_CREATE \ DRM_IOWR(DRM_COMMAND_BASE + RKNPU_MEM_CREATE, struct rknpu_mem_create) -#define DRM_IOCTL_RKNPU_MEM_MAP \ +#define DRM_IOCTL_RKNPU_MEM_MAP \ DRM_IOWR(DRM_COMMAND_BASE + RKNPU_MEM_MAP, struct rknpu_mem_map) -#define DRM_IOCTL_RKNPU_MEM_DESTROY \ +#define DRM_IOCTL_RKNPU_MEM_DESTROY \ DRM_IOWR(DRM_COMMAND_BASE + RKNPU_MEM_DESTROY, struct rknpu_mem_destroy) -#define DRM_IOCTL_RKNPU_MEM_SYNC \ +#define DRM_IOCTL_RKNPU_MEM_SYNC \ DRM_IOWR(DRM_COMMAND_BASE + RKNPU_MEM_SYNC, struct rknpu_mem_sync) #define IOCTL_RKNPU_ACTION RKNPU_IOWR(RKNPU_ACTION, struct rknpu_action) #define IOCTL_RKNPU_SUBMIT RKNPU_IOWR(RKNPU_SUBMIT, struct rknpu_submit) -#define IOCTL_RKNPU_MEM_CREATE \ +#define IOCTL_RKNPU_MEM_CREATE \ RKNPU_IOWR(RKNPU_MEM_CREATE, struct rknpu_mem_create) #define IOCTL_RKNPU_MEM_MAP RKNPU_IOWR(RKNPU_MEM_MAP, struct rknpu_mem_map) -#define IOCTL_RKNPU_MEM_DESTROY \ +#define IOCTL_RKNPU_MEM_DESTROY \ RKNPU_IOWR(RKNPU_MEM_DESTROY, struct rknpu_mem_destroy) #define IOCTL_RKNPU_MEM_SYNC RKNPU_IOWR(RKNPU_MEM_SYNC, struct rknpu_mem_sync) diff --git a/drivers/rknpu/include/rknpu_iommu.h b/drivers/rknpu/include/rknpu_iommu.h index 43d36db91e2d..75b77c63978e 100644 --- a/drivers/rknpu/include/rknpu_iommu.h +++ b/drivers/rknpu/include/rknpu_iommu.h @@ -32,10 +32,19 @@ struct rknpu_iommu_dma_cookie { }; dma_addr_t rknpu_iommu_dma_alloc_iova(struct iommu_domain *domain, size_t size, - u64 dma_limit, struct device *dev); + u64 dma_limit, struct device *dev, + bool size_aligned); void rknpu_iommu_dma_free_iova(struct rknpu_iommu_dma_cookie *cookie, - dma_addr_t iova, size_t size); + dma_addr_t iova, size_t size, bool size_aligned); + +int rknpu_iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, + bool iova_aligned); + +void rknpu_iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, + bool iova_aligned); int rknpu_iommu_init_domain(struct rknpu_device *rknpu_dev); int rknpu_iommu_switch_domain(struct rknpu_device *rknpu_dev, int domain_id); diff --git a/drivers/rknpu/rknpu_gem.c b/drivers/rknpu/rknpu_gem.c index 81fbb29334af..cebdf664903e 100644 --- a/drivers/rknpu/rknpu_gem.c +++ b/drivers/rknpu/rknpu_gem.c @@ -37,6 +37,8 @@ static int rknpu_gem_get_pages(struct rknpu_gem_object *rknpu_obj) dma_addr_t dma_addr = 0; dma_addr_t phys = 0; int ret = -EINVAL, i = 0; + bool iova_aligned = + !(rknpu_obj->flags & RKNPU_MEM_IOMMU_LIMIT_IOVA_ALIGNMENT); rknpu_obj->pages = drm_gem_get_pages(&rknpu_obj->base); if (IS_ERR(rknpu_obj->pages)) { @@ -60,8 +62,9 @@ static int rknpu_gem_get_pages(struct rknpu_gem_object *rknpu_obj) goto put_pages; } - ret = dma_map_sg(drm->dev, rknpu_obj->sgt->sgl, rknpu_obj->sgt->nents, - DMA_BIDIRECTIONAL); + ret = rknpu_iommu_dma_map_sg(drm->dev, rknpu_obj->sgt->sgl, + rknpu_obj->sgt->nents, DMA_BIDIRECTIONAL, + iova_aligned); if (ret == 0) { ret = -EFAULT; LOG_DEV_ERROR(drm->dev, "%s: dma map %zu fail\n", __func__, @@ -95,8 +98,9 @@ static int rknpu_gem_get_pages(struct rknpu_gem_object *rknpu_obj) return 0; unmap_sg: - dma_unmap_sg(drm->dev, rknpu_obj->sgt->sgl, rknpu_obj->sgt->nents, - DMA_BIDIRECTIONAL); + rknpu_iommu_dma_unmap_sg(drm->dev, rknpu_obj->sgt->sgl, + rknpu_obj->sgt->nents, DMA_BIDIRECTIONAL, + iova_aligned); free_sgt: sg_free_table(rknpu_obj->sgt); @@ -111,6 +115,8 @@ put_pages: static void rknpu_gem_put_pages(struct rknpu_gem_object *rknpu_obj) { struct drm_device *drm = rknpu_obj->base.dev; + bool iova_aligned = + !(rknpu_obj->flags & RKNPU_MEM_IOMMU_LIMIT_IOVA_ALIGNMENT); if (rknpu_obj->flags & RKNPU_MEM_KERNEL_MAPPING) { vunmap(rknpu_obj->kv_addr); @@ -118,8 +124,9 @@ static void rknpu_gem_put_pages(struct rknpu_gem_object *rknpu_obj) } if (rknpu_obj->sgt != NULL) { - dma_unmap_sg(drm->dev, rknpu_obj->sgt->sgl, - rknpu_obj->sgt->nents, DMA_BIDIRECTIONAL); + rknpu_iommu_dma_unmap_sg(drm->dev, rknpu_obj->sgt->sgl, + rknpu_obj->sgt->nents, + DMA_BIDIRECTIONAL, iova_aligned); sg_free_table(rknpu_obj->sgt); kfree(rknpu_obj->sgt); } @@ -198,9 +205,9 @@ static int rknpu_gem_alloc_buf(struct rknpu_gem_object *rknpu_obj) return -ENOMEM; } - rknpu_obj->cookie = - dma_alloc_attrs(drm->dev, rknpu_obj->size, &rknpu_obj->dma_addr, - gfp_mask, rknpu_obj->dma_attrs); + rknpu_obj->cookie = dma_alloc_attrs(drm->dev, rknpu_obj->size, + &rknpu_obj->dma_addr, gfp_mask, + rknpu_obj->dma_attrs); if (!rknpu_obj->cookie) { /* * when RKNPU_MEM_CONTIGUOUS and IOMMU is available @@ -214,10 +221,9 @@ static int rknpu_gem_alloc_buf(struct rknpu_gem_object *rknpu_obj) rknpu_obj->size); rknpu_obj->dma_attrs &= ~DMA_ATTR_FORCE_CONTIGUOUS; rknpu_obj->flags |= RKNPU_MEM_NON_CONTIGUOUS; - rknpu_obj->cookie = - dma_alloc_attrs(drm->dev, rknpu_obj->size, - &rknpu_obj->dma_addr, gfp_mask, - rknpu_obj->dma_attrs); + rknpu_obj->cookie = dma_alloc_attrs( + drm->dev, rknpu_obj->size, &rknpu_obj->dma_addr, + gfp_mask, rknpu_obj->dma_attrs); if (!rknpu_obj->cookie) { LOG_DEV_ERROR( drm->dev, @@ -429,6 +435,8 @@ static int rknpu_gem_alloc_buf_with_cache(struct rknpu_gem_object *rknpu_obj, phys_addr_t cache_start = 0; unsigned long cache_offset = 0; unsigned long cache_size = 0; + bool iova_aligned = + !(rknpu_obj->flags & RKNPU_MEM_IOMMU_LIMIT_IOVA_ALIGNMENT); switch (cache_type) { case RKNPU_CACHE_SRAM: @@ -458,7 +466,8 @@ static int rknpu_gem_alloc_buf_with_cache(struct rknpu_gem_object *rknpu_obj, iovad = &cookie->iovad; rknpu_obj->iova_size = iova_align(iovad, cache_size + rknpu_obj->size); rknpu_obj->iova_start = rknpu_iommu_dma_alloc_iova( - domain, rknpu_obj->iova_size, dma_get_mask(drm->dev), drm->dev); + domain, rknpu_obj->iova_size, dma_get_mask(drm->dev), drm->dev, + iova_aligned); if (!rknpu_obj->iova_start) { LOG_ERROR("iommu_dma_alloc_iova failed\n"); return -ENOMEM; @@ -567,7 +576,8 @@ cache_unmap: free_iova: rknpu_iommu_dma_free_iova((void *)domain->iova_cookie, - rknpu_obj->iova_start, rknpu_obj->iova_size); + rknpu_obj->iova_start, rknpu_obj->iova_size, + iova_aligned); return ret; } @@ -579,6 +589,8 @@ static void rknpu_gem_free_buf_with_cache(struct rknpu_gem_object *rknpu_obj, struct rknpu_device *rknpu_dev = drm->dev_private; struct iommu_domain *domain = NULL; unsigned long cache_size = 0; + bool iova_aligned = + !(rknpu_obj->flags & RKNPU_MEM_IOMMU_LIMIT_IOVA_ALIGNMENT); switch (cache_type) { case RKNPU_CACHE_SRAM: @@ -600,7 +612,7 @@ static void rknpu_gem_free_buf_with_cache(struct rknpu_gem_object *rknpu_obj, rknpu_obj->size); rknpu_iommu_dma_free_iova((void *)domain->iova_cookie, rknpu_obj->iova_start, - rknpu_obj->iova_size); + rknpu_obj->iova_size, iova_aligned); } if (rknpu_obj->pages) @@ -648,6 +660,9 @@ struct rknpu_gem_object *rknpu_gem_object_create(struct drm_device *drm, "non-contiguous allocation is not supported without IOMMU, falling back to contiguous buffer\n"); } + /* set memory type and cache attribute from user side. */ + rknpu_obj->flags = flags; + if (IS_ENABLED(CONFIG_ROCKCHIP_RKNPU_SRAM) && (flags & RKNPU_MEM_TRY_ALLOC_SRAM) && rknpu_dev->sram_size > 0) { size_t sram_free_size = 0; @@ -656,9 +671,6 @@ struct rknpu_gem_object *rknpu_gem_object_create(struct drm_device *drm, if (sram_size != 0) sram_size = round_up(sram_size, PAGE_SIZE); - /* set memory type and cache attribute from user side. */ - rknpu_obj->flags = flags; - sram_free_size = rknpu_dev->sram_mm->free_chunks * rknpu_dev->sram_mm->chunk_size; if (sram_free_size > 0) { @@ -696,9 +708,6 @@ struct rknpu_gem_object *rknpu_gem_object_create(struct drm_device *drm, remain_ddr_size : rknpu_dev->nbuf_size; - /* set memory type and cache attribute from user side. */ - rknpu_obj->flags = flags; - if (nbuf_size > 0) { rknpu_obj->nbuf_size = nbuf_size; @@ -711,9 +720,6 @@ struct rknpu_gem_object *rknpu_gem_object_create(struct drm_device *drm, } if (remain_ddr_size > 0) { - /* set memory type and cache attribute from user side. */ - rknpu_obj->flags = flags; - ret = rknpu_gem_alloc_buf(rknpu_obj); if (ret < 0) goto gem_release; @@ -1471,8 +1477,7 @@ int rknpu_gem_sync_ioctl(struct drm_device *dev, void *data, RKNPU_CACHE_NBUF); } - for_each_sg(rknpu_obj->sgt->sgl, sg, rknpu_obj->sgt->nents, - i) { + for_each_sg(rknpu_obj->sgt->sgl, sg, rknpu_obj->sgt->nents, i) { if (length == 0) break; diff --git a/drivers/rknpu/rknpu_iommu.c b/drivers/rknpu/rknpu_iommu.c index 53bca78953ac..4797f0fec598 100644 --- a/drivers/rknpu/rknpu_iommu.c +++ b/drivers/rknpu/rknpu_iommu.c @@ -4,17 +4,20 @@ * Author: Felix Zeng */ +#include + #include "rknpu_iommu.h" dma_addr_t rknpu_iommu_dma_alloc_iova(struct iommu_domain *domain, size_t size, - u64 dma_limit, struct device *dev) + u64 dma_limit, struct device *dev, + bool size_aligned) { struct rknpu_iommu_dma_cookie *cookie = (void *)domain->iova_cookie; struct iova_domain *iovad = &cookie->iovad; unsigned long shift, iova_len, iova = 0; -#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) - dma_addr_t limit; -#endif + unsigned long limit_pfn; + struct iova *new_iova = NULL; + bool alloc_fast = size_aligned; shift = iova_shift(iovad); iova_len = size >> shift; @@ -42,22 +45,319 @@ dma_addr_t rknpu_iommu_dma_alloc_iova(struct iommu_domain *domain, size_t size, min_t(u64, dma_limit, domain->geometry.aperture_end); #if (KERNEL_VERSION(5, 4, 0) <= LINUX_VERSION_CODE) - iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift, true); + limit_pfn = dma_limit >> shift; #else - limit = min_t(dma_addr_t, dma_limit >> shift, iovad->end_pfn); - - iova = alloc_iova_fast(iovad, iova_len, limit, true); + limit_pfn = min_t(dma_addr_t, dma_limit >> shift, iovad->end_pfn); #endif + if (alloc_fast) { + iova = alloc_iova_fast(iovad, iova_len, limit_pfn, true); + } else { + new_iova = alloc_iova(iovad, iova_len, limit_pfn, size_aligned); + if (!new_iova) + return 0; + iova = new_iova->pfn_lo; + } + return (dma_addr_t)iova << shift; } void rknpu_iommu_dma_free_iova(struct rknpu_iommu_dma_cookie *cookie, - dma_addr_t iova, size_t size) + dma_addr_t iova, size_t size, bool size_aligned) { struct iova_domain *iovad = &cookie->iovad; + bool alloc_fast = size_aligned; - free_iova_fast(iovad, iova_pfn(iovad, iova), size >> iova_shift(iovad)); + if (alloc_fast) + free_iova_fast(iovad, iova_pfn(iovad, iova), + size >> iova_shift(iovad)); + else + free_iova(iovad, iova_pfn(iovad, iova)); +} + +static int rknpu_dma_info_to_prot(enum dma_data_direction dir, bool coherent) +{ + int prot = coherent ? IOMMU_CACHE : 0; + + switch (dir) { + case DMA_BIDIRECTIONAL: + return prot | IOMMU_READ | IOMMU_WRITE; + case DMA_TO_DEVICE: + return prot | IOMMU_READ; + case DMA_FROM_DEVICE: + return prot | IOMMU_WRITE; + default: + return 0; + } +} + +/* + * Prepare a successfully-mapped scatterlist to give back to the caller. + * + * At this point the segments are already laid out by iommu_dma_map_sg() to + * avoid individually crossing any boundaries, so we merely need to check a + * segment's start address to avoid concatenating across one. + */ +static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents, + dma_addr_t dma_addr) +{ + struct scatterlist *s, *cur = sg; + unsigned long seg_mask = dma_get_seg_boundary(dev); + unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev); + int i, count = 0; + + for_each_sg(sg, s, nents, i) { + /* Restore this segment's original unaligned fields first */ +#if KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE + dma_addr_t s_dma_addr = sg_dma_address(s); +#endif + unsigned int s_iova_off = sg_dma_address(s); + unsigned int s_length = sg_dma_len(s); + unsigned int s_iova_len = s->length; + + sg_dma_address(s) = DMA_MAPPING_ERROR; + sg_dma_len(s) = 0; + +#if KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE + if (sg_is_dma_bus_address(s)) { + if (i > 0) + cur = sg_next(cur); + + sg_dma_unmark_bus_address(s); + sg_dma_address(cur) = s_dma_addr; + sg_dma_len(cur) = s_length; + sg_dma_mark_bus_address(cur); + count++; + cur_len = 0; + continue; + } +#endif + + s->offset += s_iova_off; + s->length = s_length; + + /* + * Now fill in the real DMA data. If... + * - there is a valid output segment to append to + * - and this segment starts on an IOVA page boundary + * - but doesn't fall at a segment boundary + * - and wouldn't make the resulting output segment too long + */ + if (cur_len && !s_iova_off && (dma_addr & seg_mask) && + (max_len - cur_len >= s_length)) { + /* ...then concatenate it with the previous one */ + cur_len += s_length; + } else { + /* Otherwise start the next output segment */ + if (i > 0) + cur = sg_next(cur); + cur_len = s_length; + count++; + + sg_dma_address(cur) = dma_addr + s_iova_off; + } + + sg_dma_len(cur) = cur_len; + dma_addr += s_iova_len; + + if (s_length + s_iova_off < s_iova_len) + cur_len = 0; + } + return count; +} + +/* + * If mapping failed, then just restore the original list, + * but making sure the DMA fields are invalidated. + */ +static void __invalidate_sg(struct scatterlist *sg, int nents) +{ + struct scatterlist *s; + int i; + +#if KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE + for_each_sg(sg, s, nents, i) { + if (sg_is_dma_bus_address(s)) { + sg_dma_unmark_bus_address(s); + } else { + if (sg_dma_address(s) != DMA_MAPPING_ERROR) + s->offset += sg_dma_address(s); + if (sg_dma_len(s)) + s->length = sg_dma_len(s); + } + sg_dma_address(s) = DMA_MAPPING_ERROR; + sg_dma_len(s) = 0; + } +#else + for_each_sg(sg, s, nents, i) { + if (sg_dma_address(s) != DMA_MAPPING_ERROR) + s->offset += sg_dma_address(s); + if (sg_dma_len(s)) + s->length = sg_dma_len(s); + sg_dma_address(s) = DMA_MAPPING_ERROR; + sg_dma_len(s) = 0; + } +#endif +} + +int rknpu_iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, + bool iova_aligned) +{ + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + struct rknpu_iommu_dma_cookie *cookie = (void *)domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + struct scatterlist *s = NULL, *prev = NULL; + int prot = rknpu_dma_info_to_prot(dir, dev_is_dma_coherent(dev)); + dma_addr_t iova; + unsigned long iova_len = 0; + unsigned long mask = dma_get_seg_boundary(dev); + ssize_t ret = -EINVAL; + int i = 0; + + if (iova_aligned) + return dma_map_sg(dev, sg, nents, dir); + + /* + * Work out how much IOVA space we need, and align the segments to + * IOVA granules for the IOMMU driver to handle. With some clever + * trickery we can modify the list in-place, but reversibly, by + * stashing the unaligned parts in the as-yet-unused DMA fields. + */ + for_each_sg(sg, s, nents, i) { + size_t s_iova_off = iova_offset(iovad, s->offset); + size_t s_length = s->length; + size_t pad_len = (mask - iova_len + 1) & mask; + + sg_dma_address(s) = s_iova_off; + sg_dma_len(s) = s_length; + s->offset -= s_iova_off; + s_length = iova_align(iovad, s_length + s_iova_off); + s->length = s_length; + + /* + * Due to the alignment of our single IOVA allocation, we can + * depend on these assumptions about the segment boundary mask: + * - If mask size >= IOVA size, then the IOVA range cannot + * possibly fall across a boundary, so we don't care. + * - If mask size < IOVA size, then the IOVA range must start + * exactly on a boundary, therefore we can lay things out + * based purely on segment lengths without needing to know + * the actual addresses beforehand. + * - The mask must be a power of 2, so pad_len == 0 if + * iova_len == 0, thus we cannot dereference prev the first + * time through here (i.e. before it has a meaningful value). + */ + if (pad_len && pad_len < s_length - 1) { + prev->length += pad_len; + iova_len += pad_len; + } + + iova_len += s_length; + prev = s; + } + + if (!iova_len) { + ret = __finalise_sg(dev, sg, nents, 0); + goto out; + } + + iova = rknpu_iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), + dev, iova_aligned); + if (!iova) { + ret = -ENOMEM; + LOG_ERROR("failed to allocate IOVA: %zd\n", ret); + goto out_restore_sg; + } + + ret = iommu_map_sg(domain, iova, sg, nents, prot); + if (ret < 0 || ret < iova_len) { + LOG_ERROR("failed to map SG: %zd\n", ret); + goto out_free_iova; + } + + return __finalise_sg(dev, sg, nents, iova); + +out_free_iova: + rknpu_iommu_dma_free_iova(cookie, iova, iova_len, iova_aligned); +out_restore_sg: + __invalidate_sg(sg, nents); +out: + + if (ret < 0) + ret = 0; + + return ret; +} + +void rknpu_iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, + bool iova_aligned) +{ + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + struct rknpu_iommu_dma_cookie *cookie = (void *)domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + size_t iova_off = 0; + dma_addr_t end = 0, start = 0; + struct scatterlist *tmp = NULL; + dma_addr_t dma_addr = 0; + size_t size = 0; + int i = 0; + + if (iova_aligned) + return dma_unmap_sg(dev, sg, nents, dir); + +#if KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE + /* + * The scatterlist segments are mapped into a single + * contiguous IOVA allocation, the start and end points + * just have to be determined. + */ + for_each_sg(sg, tmp, nents, i) { + if (sg_is_dma_bus_address(tmp)) { + sg_dma_unmark_bus_address(tmp); + continue; + } + + if (sg_dma_len(tmp) == 0) + break; + + start = sg_dma_address(tmp); + break; + } + + nents -= i; + for_each_sg(tmp, tmp, nents, i) { + if (sg_is_dma_bus_address(tmp)) { + sg_dma_unmark_bus_address(tmp); + continue; + } + + if (sg_dma_len(tmp) == 0) + break; + + end = sg_dma_address(tmp) + sg_dma_len(tmp); + } +#else + start = sg_dma_address(sg); + for_each_sg(sg_next(sg), tmp, nents - 1, i) { + if (sg_dma_len(tmp) == 0) + break; + sg = tmp; + } + end = sg_dma_address(sg) + sg_dma_len(sg); +#endif + + dma_addr = start; + size = end - start; + iova_off = iova_offset(iovad, start); + + if (end) { + dma_addr -= iova_off; + size = iova_align(iovad, size + iova_off); + iommu_unmap(domain, dma_addr, size); + rknpu_iommu_dma_free_iova(cookie, dma_addr, size, iova_aligned); + } } #if defined(CONFIG_IOMMU_API) && defined(CONFIG_NO_GKI)