diff --git a/drivers/rknpu/Kconfig b/drivers/rknpu/Kconfig index 8cb2b777ae61..c3343eece9c6 100644 --- a/drivers/rknpu/Kconfig +++ b/drivers/rknpu/Kconfig @@ -29,6 +29,12 @@ config ROCKCHIP_RKNPU_FENCE help Enable fence support for RKNPU. +config ROCKCHIP_RKNPU_SRAM + bool "RKNPU SRAM" + depends on NO_GKI + help + Enable RKNPU SRAM support + choice prompt "RKNPU memory manager" default ROCKCHIP_RKNPU_DRM_GEM diff --git a/drivers/rknpu/Makefile b/drivers/rknpu/Makefile index 2e18da1d0d61..41dacc93157c 100644 --- a/drivers/rknpu/Makefile +++ b/drivers/rknpu/Makefile @@ -9,6 +9,7 @@ rknpu-y += rknpu_drv.o rknpu-y += rknpu_reset.o rknpu-y += rknpu_job.o rknpu-y += rknpu_debugger.o +rknpu-$(CONFIG_ROCKCHIP_RKNPU_SRAM) += rknpu_mm.o rknpu-$(CONFIG_ROCKCHIP_RKNPU_FENCE) += rknpu_fence.o rknpu-$(CONFIG_ROCKCHIP_RKNPU_DRM_GEM) += rknpu_gem.o rknpu-$(CONFIG_ROCKCHIP_RKNPU_DMA_HEAP) += rknpu_mem.o diff --git a/drivers/rknpu/include/rknpu_debugger.h b/drivers/rknpu/include/rknpu_debugger.h index 61369706690b..3f4420d443e1 100644 --- a/drivers/rknpu/include/rknpu_debugger.h +++ b/drivers/rknpu/include/rknpu_debugger.h @@ -1,12 +1,14 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co.Ltd * Author: Felix Zeng */ #ifndef __LINUX_RKNPU_DEBUGGER_H_ #define __LINUX_RKNPU_DEBUGGER_H_ +#include + /* * struct rknpu_debugger - rknpu debugger information * @@ -78,6 +80,8 @@ struct rknpu_debugger_node { struct list_head list; }; +struct rknpu_device; + int rknpu_debugger_init(struct rknpu_device *rknpu_dev); int rknpu_debugger_remove(struct rknpu_device *rknpu_dev); diff --git a/drivers/rknpu/include/rknpu_drv.h b/drivers/rknpu/include/rknpu_drv.h index 57979f9b9dfb..126b9e857b38 100644 --- a/drivers/rknpu/include/rknpu_drv.h +++ b/drivers/rknpu/include/rknpu_drv.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co.Ltd * Author: Felix Zeng */ @@ -26,13 +26,14 @@ #include "rknpu_job.h" #include "rknpu_fence.h" #include "rknpu_debugger.h" +#include "rknpu_mm.h" #define DRIVER_NAME "rknpu" #define DRIVER_DESC "RKNPU driver" -#define DRIVER_DATE "20220428" +#define DRIVER_DATE "20220803" #define DRIVER_MAJOR 0 -#define DRIVER_MINOR 7 -#define DRIVER_PATCHLEVEL 2 +#define DRIVER_MINOR 8 +#define DRIVER_PATCHLEVEL 0 #define LOG_TAG "RKNPU" @@ -81,7 +82,7 @@ struct rknpu_subcore_data { struct list_head todo_list; wait_queue_head_t job_done_wq; struct rknpu_job *job; - uint64_t task_num; + int64_t task_num; struct rknpu_timer timer; }; @@ -106,6 +107,7 @@ struct rknpu_device { spinlock_t lock; spinlock_t irq_lock; struct mutex power_lock; + struct mutex reset_lock; struct rknpu_subcore_data subcore_datas[RKNPU_MAX_CORES]; const struct rknpu_config *config; void __iomem *bw_priority_base; @@ -130,6 +132,7 @@ struct rknpu_device { unsigned long current_volt; int bypass_irq_handler; int bypass_soft_reset; + bool soft_reseting; struct device *genpd_dev_npu0; struct device *genpd_dev_npu1; struct device *genpd_dev_npu2; @@ -142,6 +145,11 @@ struct rknpu_device { struct rknpu_debugger debugger; struct hrtimer timer; ktime_t kt; + phys_addr_t sram_start; + phys_addr_t sram_end; + uint32_t sram_size; + void __iomem *sram_base_io; + struct rknpu_mm *sram_mm; }; int rknpu_action(struct rknpu_device *rknpu_dev, struct rknpu_action *args); diff --git a/drivers/rknpu/include/rknpu_fence.h b/drivers/rknpu/include/rknpu_fence.h index c0f9d73e107a..164f6de4116b 100644 --- a/drivers/rknpu/include/rknpu_fence.h +++ b/drivers/rknpu/include/rknpu_fence.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co.Ltd * Author: Felix Zeng */ diff --git a/drivers/rknpu/include/rknpu_gem.h b/drivers/rknpu/include/rknpu_gem.h index 5d911e98d412..954586607b16 100644 --- a/drivers/rknpu/include/rknpu_gem.h +++ b/drivers/rknpu/include/rknpu_gem.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co.Ltd * Author: Felix Zeng */ @@ -19,6 +19,8 @@ #include #endif +#include "rknpu_mm.h" + #define to_rknpu_obj(x) container_of(x, struct rknpu_gem_object, base) /* @@ -45,6 +47,10 @@ struct rknpu_gem_object { struct drm_gem_object base; unsigned int flags; unsigned long size; + unsigned long sram_size; + struct rknpu_mm_obj *sram_obj; + dma_addr_t iova_start; + unsigned long iova_size; void *cookie; void __iomem *kv_addr; dma_addr_t dma_addr; @@ -58,7 +64,8 @@ struct rknpu_gem_object { /* create a new buffer with gem object */ struct rknpu_gem_object *rknpu_gem_object_create(struct drm_device *dev, unsigned int flags, - unsigned long size); + unsigned long size, + unsigned long sram_size); /* destroy a buffer with gem object */ void rknpu_gem_object_destroy(struct rknpu_gem_object *rknpu_obj); diff --git a/drivers/rknpu/include/rknpu_ioctl.h b/drivers/rknpu/include/rknpu_ioctl.h index 43ab93d5ce83..49d4442e62f7 100644 --- a/drivers/rknpu/include/rknpu_ioctl.h +++ b/drivers/rknpu/include/rknpu_ioctl.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co.Ltd * Author: Felix Zeng */ @@ -19,6 +19,7 @@ #endif #define RKNPU_OFFSET_VERSION 0x0 +#define RKNPU_OFFSET_VERSION_NUM 0x4 #define RKNPU_OFFSET_PC_OP_EN 0x8 #define RKNPU_OFFSET_PC_DATA_ADDR 0x10 #define RKNPU_OFFSET_PC_DATA_AMOUNT 0x14 @@ -75,10 +76,13 @@ enum e_rknpu_mem_type { RKNPU_MEM_SECURE = 1 << 6, /* allocate from non-dma32 zone */ RKNPU_MEM_NON_DMA32 = 1 << 7, + /* request SRAM */ + RKNPU_MEM_TRY_ALLOC_SRAM = 1 << 8, RKNPU_MEM_MASK = RKNPU_MEM_NON_CONTIGUOUS | RKNPU_MEM_CACHEABLE | RKNPU_MEM_WRITE_COMBINE | RKNPU_MEM_KERNEL_MAPPING | RKNPU_MEM_IOMMU | RKNPU_MEM_ZEROING | - RKNPU_MEM_SECURE | RKNPU_MEM_NON_DMA32 + RKNPU_MEM_SECURE | RKNPU_MEM_NON_DMA32 | + RKNPU_MEM_TRY_ALLOC_SRAM }; /* sync mode definitions. */ @@ -127,6 +131,8 @@ enum e_rknpu_action { RKNPU_SET_PROC_NICE = 19, RKNPU_POWER_ON = 20, RKNPU_POWER_OFF = 21, + RKNPU_GET_TOTAL_SRAM_SIZE = 22, + RKNPU_GET_FREE_SRAM_SIZE = 23, }; /** @@ -138,6 +144,8 @@ enum e_rknpu_action { * - this size value would be page-aligned internally. * @obj_addr: address of RKNPU memory object. * @dma_addr: dma address that access by rknpu. + * @sram_size: user-desired sram memory allocation size. + * - this size value would be page-aligned internally. */ struct rknpu_mem_create { __u32 handle; @@ -145,6 +153,7 @@ struct rknpu_mem_create { __u64 size; __u64 obj_addr; __u64 dma_addr; + __u64 sram_size; }; /** diff --git a/drivers/rknpu/include/rknpu_job.h b/drivers/rknpu/include/rknpu_job.h index 6a1161b5932c..6ef52d439277 100644 --- a/drivers/rknpu/include/rknpu_job.h +++ b/drivers/rknpu/include/rknpu_job.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co.Ltd * Author: Felix Zeng */ @@ -19,6 +19,7 @@ #define RKNPU_JOB_DONE (1 << 0) #define RKNPU_JOB_ASYNC (1 << 1) +#define RKNPU_JOB_DETACHED (1 << 2) #define RKNPU_CORE_AUTO_MASK 0x00 #define RKNPU_CORE0_MASK 0x01 @@ -29,6 +30,8 @@ struct rknpu_job { struct rknpu_device *rknpu_dev; struct list_head head[RKNPU_MAX_CORES]; struct work_struct cleanup_work; + bool in_queue[RKNPU_MAX_CORES]; + bool irq_entry[RKNPU_MAX_CORES]; unsigned int flags; int ret; struct rknpu_submit *args; diff --git a/drivers/rknpu/include/rknpu_mem.h b/drivers/rknpu/include/rknpu_mem.h index 828b88e45589..925535c85f06 100644 --- a/drivers/rknpu/include/rknpu_mem.h +++ b/drivers/rknpu/include/rknpu_mem.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co.Ltd * Author: Felix Zeng */ diff --git a/drivers/rknpu/include/rknpu_mm.h b/drivers/rknpu/include/rknpu_mm.h new file mode 100644 index 000000000000..b764892d1dc4 --- /dev/null +++ b/drivers/rknpu/include/rknpu_mm.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) Rockchip Electronics Co.Ltd + * Author: Felix Zeng + */ + +#ifndef __LINUX_RKNPU_MM_H +#define __LINUX_RKNPU_MM_H + +#include +#include +#include +#include +#include + +#include "rknpu_drv.h" + +struct rknpu_mm { + void *bitmap; + struct mutex lock; + unsigned int chunk_size; + unsigned int total_chunks; + unsigned int free_chunks; +}; + +struct rknpu_mm_obj { + uint32_t range_start; + uint32_t range_end; +}; + +int rknpu_mm_create(unsigned int mem_size, unsigned int chunk_size, + struct rknpu_mm **mm); + +void rknpu_mm_destroy(struct rknpu_mm *mm); + +int rknpu_mm_alloc(struct rknpu_mm *mm, unsigned int size, + struct rknpu_mm_obj **mm_obj); + +int rknpu_mm_free(struct rknpu_mm *mm, struct rknpu_mm_obj *mm_obj); + +int rknpu_mm_dump(struct seq_file *m, void *data); + +enum iommu_dma_cookie_type { + IOMMU_DMA_IOVA_COOKIE, + IOMMU_DMA_MSI_COOKIE, +}; + +struct rknpu_iommu_dma_cookie { + enum iommu_dma_cookie_type type; + + /* Full allocator for IOMMU_DMA_IOVA_COOKIE */ + struct iova_domain iovad; +}; + +dma_addr_t rknpu_iommu_dma_alloc_iova(struct iommu_domain *domain, size_t size, + u64 dma_limit, struct device *dev); + +void rknpu_iommu_dma_free_iova(struct rknpu_iommu_dma_cookie *cookie, + dma_addr_t iova, size_t size); + +#endif diff --git a/drivers/rknpu/include/rknpu_reset.h b/drivers/rknpu/include/rknpu_reset.h index 05c9622e526c..b80e29b321b0 100644 --- a/drivers/rknpu/include/rknpu_reset.h +++ b/drivers/rknpu/include/rknpu_reset.h @@ -1,9 +1,12 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co.Ltd * Author: Felix Zeng */ +#ifndef __LINUX_RKNPU_RESET_H +#define __LINUX_RKNPU_RESET_H + #include #include "rknpu_drv.h" @@ -11,3 +14,5 @@ int rknpu_reset_get(struct rknpu_device *rknpu_dev); int rknpu_soft_reset(struct rknpu_device *rknpu_dev); + +#endif diff --git a/drivers/rknpu/rknpu_debugger.c b/drivers/rknpu/rknpu_debugger.c index 585983bf2ce0..49a22503d534 100644 --- a/drivers/rknpu/rknpu_debugger.c +++ b/drivers/rknpu/rknpu_debugger.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co.Ltd * Author: Felix Zeng */ @@ -9,10 +9,10 @@ #include #include #include -#include #include -#include "rknpu_drv.h" +#include "rknpu_mm.h" +#include "rknpu_reset.h" #include "rknpu_debugger.h" #define RKNPU_DEBUGGER_ROOT_NAME "rknpu" @@ -88,7 +88,7 @@ static ssize_t rknpu_power_set(struct file *file, const char __user *ubuf, struct rknpu_device *rknpu_dev = container_of(debugger, struct rknpu_device, debugger); struct rknpu_action args; - char buf[10]; + char buf[8]; if (len > sizeof(buf) - 1) return -EINVAL; @@ -121,10 +121,55 @@ static ssize_t rknpu_power_set(struct file *file, const char __user *ubuf, return len; } -struct rknpu_debugger_list rknpu_debugger_root_list[] = { - { "driver_version", rknpu_version_show, NULL, NULL }, +static int rknpu_reset_show(struct seq_file *m, void *data) +{ + struct rknpu_debugger_node *node = m->private; + struct rknpu_debugger *debugger = node->debugger; + struct rknpu_device *rknpu_dev = + container_of(debugger, struct rknpu_device, debugger); + + if (!rknpu_dev->bypass_soft_reset) + seq_puts(m, "on\n"); + else + seq_puts(m, "off\n"); + + return 0; +} + +static ssize_t rknpu_reset_set(struct file *file, const char __user *ubuf, + size_t len, loff_t *offp) +{ + struct seq_file *priv = file->private_data; + struct rknpu_debugger_node *node = priv->private; + struct rknpu_debugger *debugger = node->debugger; + struct rknpu_device *rknpu_dev = + container_of(debugger, struct rknpu_device, debugger); + char buf[8]; + + if (len > sizeof(buf) - 1) + return -EINVAL; + if (copy_from_user(buf, ubuf, len)) + return -EFAULT; + buf[len - 1] = '\0'; + + if (strcmp(buf, "1") == 0 && rknpu_dev->is_powered) + rknpu_soft_reset(rknpu_dev); + else if (strcmp(buf, "on") == 0) + rknpu_dev->bypass_soft_reset = 0; + else if (strcmp(buf, "off") == 0) + rknpu_dev->bypass_soft_reset = 1; + + return len; +} + +static struct rknpu_debugger_list rknpu_debugger_root_list[] = { + { "version", rknpu_version_show, NULL, NULL }, { "load", rknpu_load_show, NULL, NULL }, - { "power", rknpu_power_show, rknpu_power_set, NULL } + { "power", rknpu_power_show, rknpu_power_set, NULL }, + { "reset", rknpu_reset_show, rknpu_reset_set, NULL }, +#ifdef CONFIG_ROCKCHIP_RKNPU_SRAM + { "mm", rknpu_mm_dump, NULL, NULL }, +#endif }; static ssize_t rknpu_debugger_write(struct file *file, const char __user *ubuf, @@ -350,14 +395,14 @@ MALLOC_FAIL: return -1; } -int rknpu_procfs_remove(struct rknpu_debugger *debugger) +static int rknpu_procfs_remove(struct rknpu_debugger *debugger) { rknpu_procfs_remove_files(debugger); return 0; } -int rknpu_procfs_init(struct rknpu_debugger *debugger) +static int rknpu_procfs_init(struct rknpu_debugger *debugger) { int ret; diff --git a/drivers/rknpu/rknpu_drv.c b/drivers/rknpu/rknpu_drv.c index 0aa5add1e4d3..48a12595c90e 100644 --- a/drivers/rknpu/rknpu_drv.c +++ b/drivers/rknpu/rknpu_drv.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co.Ltd * Author: Felix Zeng */ @@ -33,6 +33,8 @@ #include #include #include +#include +#include #ifndef FPGA_PLATFORM #include @@ -45,6 +47,7 @@ #include "rknpu_reset.h" #include "rknpu_fence.h" #include "rknpu_drv.h" +#include "rknpu_gem.h" #ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM #include @@ -289,6 +292,22 @@ int rknpu_action(struct rknpu_device *rknpu_dev, struct rknpu_action *args) set_user_nice(current, *(int32_t *)&args->value); ret = 0; break; + case RKNPU_GET_TOTAL_SRAM_SIZE: + if (rknpu_dev->sram_mm) + args->value = rknpu_dev->sram_mm->total_chunks * + rknpu_dev->sram_mm->chunk_size; + else + args->value = 0; + ret = 0; + break; + case RKNPU_GET_FREE_SRAM_SIZE: + if (rknpu_dev->sram_mm) + args->value = rknpu_dev->sram_mm->free_chunks * + rknpu_dev->sram_mm->chunk_size; + else + args->value = 0; + ret = 0; + break; default: ret = -EINVAL; break; @@ -502,10 +521,6 @@ static void rknpu_cancel_timer(struct rknpu_device *rknpu_dev) hrtimer_cancel(&rknpu_dev->timer); } -#ifdef CONFIG_ROCKCHIP_RKNPU_DMA_HEAP - -#endif - static bool rknpu_is_iommu_enable(struct device *dev) { struct device_node *iommu = NULL; @@ -587,7 +602,7 @@ static int rknpu_power_on(struct rknpu_device *rknpu_dev) if (ret) { LOG_DEV_ERROR( dev, - "failed to enable vdd reg for rknpu, ret = %d\n", + "failed to enable vdd reg for rknpu, ret: %d\n", ret); return ret; } @@ -598,7 +613,7 @@ static int rknpu_power_on(struct rknpu_device *rknpu_dev) if (ret) { LOG_DEV_ERROR( dev, - "failed to enable mem reg for rknpu, ret = %d\n", + "failed to enable mem reg for rknpu, ret: %d\n", ret); return ret; } @@ -607,7 +622,7 @@ static int rknpu_power_on(struct rknpu_device *rknpu_dev) ret = clk_bulk_prepare_enable(rknpu_dev->num_clks, rknpu_dev->clks); if (ret) { - LOG_DEV_ERROR(dev, "failed to enable clk for rknpu, ret = %d\n", + LOG_DEV_ERROR(dev, "failed to enable clk for rknpu, ret: %d\n", ret); return ret; } @@ -629,7 +644,7 @@ static int rknpu_power_on(struct rknpu_device *rknpu_dev) if (ret < 0) { LOG_DEV_ERROR( dev, - "failed to get pm runtime for npu0, ret = %d\n", + "failed to get pm runtime for npu0, ret: %d\n", ret); goto out; } @@ -644,7 +659,7 @@ static int rknpu_power_on(struct rknpu_device *rknpu_dev) if (ret < 0) { LOG_DEV_ERROR( dev, - "failed to get pm runtime for npu1, ret = %d\n", + "failed to get pm runtime for npu1, ret: %d\n", ret); goto out; } @@ -659,7 +674,7 @@ static int rknpu_power_on(struct rknpu_device *rknpu_dev) if (ret < 0) { LOG_DEV_ERROR( dev, - "failed to get pm runtime for npu2, ret = %d\n", + "failed to get pm runtime for npu2, ret: %d\n", ret); goto out; } @@ -668,7 +683,7 @@ static int rknpu_power_on(struct rknpu_device *rknpu_dev) ret = pm_runtime_get_sync(dev); if (ret < 0) { LOG_DEV_ERROR(dev, - "failed to get pm runtime for rknpu, ret = %d\n", + "failed to get pm runtime for rknpu, ret: %d\n", ret); } @@ -821,7 +836,7 @@ static int npu_opp_helper(struct dev_pm_set_opp_data *data) rockchip_set_read_margin(dev, opp_info, target_rm, is_set_rm); if (is_set_clk && clk_set_rate(clk, new_freq)) { ret = -EINVAL; - LOG_DEV_ERROR(dev, "failed to set clk rate\n"); + LOG_DEV_ERROR(dev, "failed to set clk rate: %d\n", ret); goto restore_rm; } /* Scaling down? Scale voltage after frequency */ @@ -831,7 +846,7 @@ static int npu_opp_helper(struct dev_pm_set_opp_data *data) rockchip_set_read_margin(dev, opp_info, target_rm, is_set_rm); if (is_set_clk && clk_set_rate(clk, new_freq)) { ret = -EINVAL; - LOG_DEV_ERROR(dev, "failed to set clk rate\n"); + LOG_DEV_ERROR(dev, "failed to set clk rate: %d\n", ret); goto restore_rm; } ret = regulator_set_voltage(vdd_reg, new_supply_vdd->u_volt, @@ -1334,6 +1349,56 @@ static int rknpu_register_irq(struct platform_device *pdev, return 0; } +static int rknpu_find_sram_resource(struct rknpu_device *rknpu_dev) +{ + struct device *dev = rknpu_dev->dev; + struct device_node *sram_node = NULL; + struct resource sram_res; + uint32_t sram_size = 0; + int ret = -EINVAL; + + /* get sram device node */ + sram_node = of_parse_phandle(dev->of_node, "rockchip,sram", 0); + rknpu_dev->sram_size = 0; + if (!sram_node) + return -EINVAL; + + /* get sram start and size */ + ret = of_address_to_resource(sram_node, 0, &sram_res); + of_node_put(sram_node); + if (ret) + return ret; + + /* check sram start and size is PAGE_SIZE align */ + rknpu_dev->sram_start = round_up(sram_res.start, PAGE_SIZE); + rknpu_dev->sram_end = round_down( + sram_res.start + resource_size(&sram_res), PAGE_SIZE); + if (rknpu_dev->sram_end <= rknpu_dev->sram_start) { + LOG_DEV_WARN( + dev, + "invalid sram resource, sram start %pa, sram end %pa\n", + &rknpu_dev->sram_start, &rknpu_dev->sram_end); + return -EINVAL; + } + + sram_size = rknpu_dev->sram_end - rknpu_dev->sram_start; + + rknpu_dev->sram_base_io = + devm_ioremap(dev, rknpu_dev->sram_start, sram_size); + if (IS_ERR(rknpu_dev->sram_base_io)) { + LOG_DEV_ERROR(dev, "failed to remap sram base io!\n"); + rknpu_dev->sram_base_io = NULL; + } + + rknpu_dev->sram_size = sram_size; + + LOG_DEV_INFO(dev, "sram region: [%pa, %pa), sram size: %#x\n", + &rknpu_dev->sram_start, &rknpu_dev->sram_end, + rknpu_dev->sram_size); + + return 0; +} + static int rknpu_probe(struct platform_device *pdev) { struct resource *res = NULL; @@ -1428,6 +1493,7 @@ static int rknpu_probe(struct platform_device *pdev) spin_lock_init(&rknpu_dev->lock); spin_lock_init(&rknpu_dev->irq_lock); mutex_init(&rknpu_dev->power_lock); + mutex_init(&rknpu_dev->reset_lock); for (i = 0; i < config->num_irqs; i++) { INIT_LIST_HEAD(&rknpu_dev->subcore_datas[i].todo_list); init_waitqueue_head(&rknpu_dev->subcore_datas[i].job_done_wq); @@ -1557,6 +1623,18 @@ static int rknpu_probe(struct platform_device *pdev) } INIT_DEFERRABLE_WORK(&rknpu_dev->power_off_work, rknpu_power_off_delay_work); + + if (IS_ENABLED(CONFIG_ROCKCHIP_RKNPU_SRAM) && rknpu_dev->iommu_en) { + if (!rknpu_find_sram_resource(rknpu_dev)) { + ret = rknpu_mm_create(rknpu_dev->sram_size, PAGE_SIZE, + &rknpu_dev->sram_mm); + if (ret != 0) + goto err_remove_wq; + } else { + LOG_DEV_WARN(dev, "could not find sram resource!\n"); + } + } + rknpu_power_off(rknpu_dev); rknpu_dev->is_powered = false; atomic_set(&rknpu_dev->power_refcount, 0); @@ -1588,6 +1666,9 @@ static int rknpu_remove(struct platform_device *pdev) cancel_delayed_work_sync(&rknpu_dev->power_off_work); destroy_workqueue(rknpu_dev->power_off_wq); + if (IS_ENABLED(CONFIG_ROCKCHIP_RKNPU_SRAM) && rknpu_dev->sram_mm) + rknpu_mm_destroy(rknpu_dev->sram_mm); + rknpu_debugger_remove(rknpu_dev); rknpu_cancel_timer(rknpu_dev); diff --git a/drivers/rknpu/rknpu_fence.c b/drivers/rknpu/rknpu_fence.c index 529a98ef33ba..dc22ea1c4e12 100644 --- a/drivers/rknpu/rknpu_fence.c +++ b/drivers/rknpu/rknpu_fence.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co.Ltd * Author: Felix Zeng */ diff --git a/drivers/rknpu/rknpu_gem.c b/drivers/rknpu/rknpu_gem.c index e1f2e4cbb3c8..6e0e4428517d 100644 --- a/drivers/rknpu/rknpu_gem.c +++ b/drivers/rknpu/rknpu_gem.c @@ -1,17 +1,22 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co.Ltd * Author: Felix Zeng */ #include #include #include +#include +#include #include #include +#include +#include #include #include +#include #if KERNEL_VERSION(5, 10, 0) <= LINUX_VERSION_CODE #include @@ -29,13 +34,17 @@ static int rknpu_gem_get_pages(struct rknpu_gem_object *rknpu_obj) struct drm_device *drm = rknpu_obj->base.dev; struct scatterlist *s = NULL; dma_addr_t dma_addr = 0; + dma_addr_t phys = 0; int ret = -EINVAL, i = 0; rknpu_obj->pages = drm_gem_get_pages(&rknpu_obj->base); - if (IS_ERR(rknpu_obj->pages)) - return PTR_ERR(rknpu_obj->pages); + if (IS_ERR(rknpu_obj->pages)) { + ret = PTR_ERR(rknpu_obj->pages); + LOG_ERROR("failed to get pages: %d\n", ret); + return ret; + } - rknpu_obj->num_pages = rknpu_obj->base.size >> PAGE_SHIFT; + rknpu_obj->num_pages = rknpu_obj->size >> PAGE_SHIFT; #if KERNEL_VERSION(5, 10, 0) <= LINUX_VERSION_CODE rknpu_obj->sgt = drm_prime_pages_to_sg(drm, rknpu_obj->pages, @@ -46,6 +55,7 @@ static int rknpu_gem_get_pages(struct rknpu_gem_object *rknpu_obj) #endif if (IS_ERR(rknpu_obj->sgt)) { ret = PTR_ERR(rknpu_obj->sgt); + LOG_ERROR("failed to allocate sgt: %d\n", ret); goto put_pages; } @@ -54,7 +64,7 @@ static int rknpu_gem_get_pages(struct rknpu_gem_object *rknpu_obj) if (ret == 0) { ret = -EFAULT; LOG_DEV_ERROR(drm->dev, "%s: dma map %zu fail\n", __func__, - rknpu_obj->base.size); + rknpu_obj->size); goto free_sgt; } @@ -63,6 +73,7 @@ static int rknpu_gem_get_pages(struct rknpu_gem_object *rknpu_obj) VM_MAP, PAGE_KERNEL); if (!rknpu_obj->cookie) { ret = -ENOMEM; + LOG_ERROR("failed to vmap: %d\n", ret); goto unmap_sg; } rknpu_obj->kv_addr = rknpu_obj->cookie; @@ -73,9 +84,10 @@ static int rknpu_gem_get_pages(struct rknpu_gem_object *rknpu_obj) for_each_sg(rknpu_obj->sgt->sgl, s, rknpu_obj->sgt->nents, i) { dma_addr += s->length; + phys = sg_phys(s); LOG_DEBUG( - "gem pages alloc sgt[%d], dma_address: %#llx, length: %#x\n", - i, (__u64)dma_addr, s->length); + "gem pages alloc sgt[%d], dma_address: %pad, length: %#x, phys: %pad, virt: %p\n", + i, &dma_addr, s->length, &phys, sg_virt(s)); } return 0; @@ -107,8 +119,11 @@ static void rknpu_gem_put_pages(struct rknpu_gem_object *rknpu_obj) DMA_BIDIRECTIONAL); drm_gem_put_pages(&rknpu_obj->base, rknpu_obj->pages, true, true); - sg_free_table(rknpu_obj->sgt); - kfree(rknpu_obj->sgt); + + if (rknpu_obj->sgt != NULL) { + sg_free_table(rknpu_obj->sgt); + kfree(rknpu_obj->sgt); + } } #endif @@ -234,9 +249,8 @@ static int rknpu_gem_alloc_buf(struct rknpu_gem_object *rknpu_obj) for_each_sg(sgt->sgl, s, sgt->nents, i) { sg_dma_address(s) = sg_phys(s); - LOG_DEBUG( - "dma alloc sgt[%d], phys_address: %#llx, length: %u\n", - i, (__u64)s->dma_address, s->length); + LOG_DEBUG("dma alloc sgt[%d], phys_address: %pad, length: %u\n", + i, &s->dma_address, s->length); } if (drm_prime_sg_to_page_addr_arrays(sgt, rknpu_obj->pages, NULL, @@ -307,7 +321,7 @@ static int rknpu_gem_handle_create(struct drm_gem_object *obj, if (ret) return ret; - LOG_DEBUG("gem handle = %#x\n", *handle); + LOG_DEBUG("gem handle: %#x\n", *handle); /* drop reference from allocate - handle holds it now. */ rknpu_gem_object_put(obj); @@ -333,7 +347,6 @@ static struct rknpu_gem_object *rknpu_gem_init(struct drm_device *drm, if (!rknpu_obj) return ERR_PTR(-ENOMEM); - rknpu_obj->size = size; obj = &rknpu_obj->base; ret = drm_gem_object_init(drm, obj, size); @@ -343,6 +356,8 @@ static struct rknpu_gem_object *rknpu_gem_init(struct drm_device *drm, return ERR_PTR(ret); } + rknpu_obj->size = rknpu_obj->base.size; + gfp_mask = mapping_gfp_mask(obj->filp->f_mapping); if (rknpu_obj->flags & RKNPU_MEM_ZEROING) @@ -355,39 +370,204 @@ static struct rknpu_gem_object *rknpu_gem_init(struct drm_device *drm, mapping_set_gfp_mask(obj->filp->f_mapping, gfp_mask); - ret = drm_gem_create_mmap_offset(obj); - if (ret < 0) { - drm_gem_object_release(obj); - kfree(rknpu_obj); - return ERR_PTR(ret); + return rknpu_obj; +} + +static void rknpu_gem_release(struct rknpu_gem_object *rknpu_obj) +{ + /* release file pointer to gem object. */ + drm_gem_object_release(&rknpu_obj->base); + kfree(rknpu_obj); +} + +static int rknpu_gem_alloc_buf_with_sram(struct rknpu_gem_object *rknpu_obj) +{ + struct drm_device *drm = rknpu_obj->base.dev; + struct rknpu_device *rknpu_dev = drm->dev_private; + struct iommu_domain *domain = NULL; + struct rknpu_iommu_dma_cookie *cookie = NULL; + struct iova_domain *iovad = NULL; + struct scatterlist *s = NULL; + unsigned long length = 0; + unsigned long size = 0; + unsigned long offset = 0; + int i = 0; + int ret = -EINVAL; + + /* iova map to sram */ + domain = iommu_get_domain_for_dev(rknpu_dev->dev); + if (!domain) { + LOG_ERROR("failed to get iommu domain!"); + return -EINVAL; } - return rknpu_obj; + cookie = domain->iova_cookie; + iovad = &cookie->iovad; + rknpu_obj->iova_size = + iova_align(iovad, rknpu_obj->sram_size + rknpu_obj->size); + rknpu_obj->iova_start = rknpu_iommu_dma_alloc_iova( + domain, rknpu_obj->iova_size, dma_get_mask(drm->dev), drm->dev); + if (!rknpu_obj->iova_start) { + LOG_ERROR("iommu_dma_alloc_iova failed\n"); + return -ENOMEM; + } + + LOG_INFO("allocate iova start: %pad, size: %lu\n", + &rknpu_obj->iova_start, rknpu_obj->iova_size); + + /* + * Overview SRAM + DDR map to IOVA + * -------- + * sram_size: rknpu_obj->sram_size + * - allocate from SRAM, this size value has been page-aligned + * size: rknpu_obj->size + * - allocate from DDR pages, this size value has been page-aligned + * iova_size: rknpu_obj->iova_size + * - from iova_align(sram_size + size) + * - it may be larger than the (sram_size + size), and the larger part is not mapped + * -------- + * + * |<- sram_size ->| |<- - - - size - - - ->| + * +---------------+ +----------------------+ + * | SRAM | | DDR | + * +---------------+ +----------------------+ + * | | + * | V | V | + * +---------------------------------------+ + * | IOVA range | + * +---------------------------------------+ + * |<- - - - - - - iova_size - - - - - - ->| + * + */ + offset = rknpu_obj->sram_obj->range_start * + rknpu_dev->sram_mm->chunk_size; + ret = iommu_map(domain, rknpu_obj->iova_start, + rknpu_dev->sram_start + offset, rknpu_obj->sram_size, + IOMMU_READ | IOMMU_WRITE); + if (ret) { + LOG_ERROR("sram iommu_map error: %d\n", ret); + goto free_iova; + } + + rknpu_obj->dma_addr = rknpu_obj->iova_start; + + if (rknpu_obj->size == 0) { + LOG_INFO("allocate sram size: %lu\n", rknpu_obj->sram_size); + return 0; + } + + rknpu_obj->pages = drm_gem_get_pages(&rknpu_obj->base); + if (IS_ERR(rknpu_obj->pages)) { + ret = PTR_ERR(rknpu_obj->pages); + LOG_ERROR("failed to get pages: %d\n", ret); + goto sram_unmap; + } + + rknpu_obj->num_pages = rknpu_obj->size >> PAGE_SHIFT; + +#if KERNEL_VERSION(5, 10, 0) <= LINUX_VERSION_CODE + rknpu_obj->sgt = drm_prime_pages_to_sg(drm, rknpu_obj->pages, + rknpu_obj->num_pages); +#else + rknpu_obj->sgt = + drm_prime_pages_to_sg(rknpu_obj->pages, rknpu_obj->num_pages); +#endif + if (IS_ERR(rknpu_obj->sgt)) { + ret = PTR_ERR(rknpu_obj->sgt); + LOG_ERROR("failed to allocate sgt: %d\n", ret); + goto put_pages; + } + + length = rknpu_obj->size; + offset = rknpu_obj->iova_start + rknpu_obj->sram_size; + + for_each_sg(rknpu_obj->sgt->sgl, s, rknpu_obj->sgt->nents, i) { + size = (length < s->length) ? length : s->length; + + ret = iommu_map(domain, offset, sg_phys(s), size, + IOMMU_READ | IOMMU_WRITE); + if (ret) { + LOG_ERROR("ddr iommu_map error: %d\n", ret); + goto sgl_unmap; + } + + length -= size; + offset += size; + + if (length == 0) + break; + } + + LOG_INFO("allocate size: %lu with sram size: %lu\n", rknpu_obj->size, + rknpu_obj->sram_size); + + return 0; + +sgl_unmap: + iommu_unmap(domain, rknpu_obj->iova_start + rknpu_obj->sram_size, + rknpu_obj->size - length); + sg_free_table(rknpu_obj->sgt); + kfree(rknpu_obj->sgt); + +put_pages: + drm_gem_put_pages(&rknpu_obj->base, rknpu_obj->pages, false, false); + +sram_unmap: + iommu_unmap(domain, rknpu_obj->iova_start, rknpu_obj->sram_size); + +free_iova: + rknpu_iommu_dma_free_iova(domain->iova_cookie, rknpu_obj->iova_start, + rknpu_obj->iova_size); + + return ret; +} + +static void rknpu_gem_free_buf_with_sram(struct rknpu_gem_object *rknpu_obj) +{ + struct drm_device *drm = rknpu_obj->base.dev; + struct rknpu_device *rknpu_dev = drm->dev_private; + struct iommu_domain *domain = NULL; + + domain = iommu_get_domain_for_dev(rknpu_dev->dev); + if (domain) { + iommu_unmap(domain, rknpu_obj->iova_start, + rknpu_obj->sram_size); + if (rknpu_obj->size > 0) + iommu_unmap(domain, + rknpu_obj->iova_start + + rknpu_obj->sram_size, + rknpu_obj->size); + rknpu_iommu_dma_free_iova(domain->iova_cookie, + rknpu_obj->iova_start, + rknpu_obj->iova_size); + } + + if (rknpu_obj->pages) + drm_gem_put_pages(&rknpu_obj->base, rknpu_obj->pages, true, + true); + + if (rknpu_obj->sgt != NULL) { + sg_free_table(rknpu_obj->sgt); + kfree(rknpu_obj->sgt); + } } struct rknpu_gem_object *rknpu_gem_object_create(struct drm_device *drm, unsigned int flags, - unsigned long size) + unsigned long size, + unsigned long sram_size) { struct rknpu_device *rknpu_dev = drm->dev_private; struct rknpu_gem_object *rknpu_obj = NULL; + size_t remain_ddr_size = 0; int ret = -EINVAL; - if (flags & ~(RKNPU_MEM_MASK)) { - LOG_DEV_ERROR(drm->dev, "invalid buffer flags: %u\n", flags); - return ERR_PTR(-EINVAL); - } - if (!size) { LOG_DEV_ERROR(drm->dev, "invalid buffer size: %lu\n", size); return ERR_PTR(-EINVAL); } - size = roundup(size, PAGE_SIZE); - - rknpu_obj = rknpu_gem_init(drm, size); - if (IS_ERR(rknpu_obj)) - return rknpu_obj; + remain_ddr_size = round_up(size, PAGE_SIZE); if (!rknpu_dev->iommu_en && (flags & RKNPU_MEM_NON_CONTIGUOUS)) { /* @@ -399,22 +579,82 @@ struct rknpu_gem_object *rknpu_gem_object_create(struct drm_device *drm, "non-contiguous allocation is not supported without IOMMU, falling back to contiguous buffer\n"); } - /* set memory type and cache attribute from user side. */ - rknpu_obj->flags = flags; + if (IS_ENABLED(CONFIG_ROCKCHIP_RKNPU_SRAM) && + (flags & RKNPU_MEM_TRY_ALLOC_SRAM) && rknpu_dev->sram_size > 0) { + size_t sram_free_size = 0; + size_t real_sram_size = 0; - ret = rknpu_gem_alloc_buf(rknpu_obj); - if (ret < 0) { - drm_gem_object_release(&rknpu_obj->base); - kfree(rknpu_obj); - return ERR_PTR(ret); + if (sram_size != 0) + sram_size = round_up(sram_size, PAGE_SIZE); + + rknpu_obj = rknpu_gem_init(drm, remain_ddr_size); + if (IS_ERR(rknpu_obj)) + return rknpu_obj; + + /* set memory type and cache attribute from user side. */ + rknpu_obj->flags = flags; + + sram_free_size = rknpu_dev->sram_mm->free_chunks * + rknpu_dev->sram_mm->chunk_size; + if (sram_free_size > 0) { + real_sram_size = remain_ddr_size; + if (sram_size != 0 && remain_ddr_size > sram_size) + real_sram_size = sram_size; + if (real_sram_size > sram_free_size) + real_sram_size = sram_free_size; + ret = rknpu_mm_alloc(rknpu_dev->sram_mm, real_sram_size, + &rknpu_obj->sram_obj); + if (ret != 0) { + sram_free_size = + rknpu_dev->sram_mm->free_chunks * + rknpu_dev->sram_mm->chunk_size; + LOG_WARN( + "mm allocate %zu failed, ret: %d, free size: %zu\n", + real_sram_size, ret, sram_free_size); + real_sram_size = 0; + } + } + + if (real_sram_size > 0) { + rknpu_obj->sram_size = real_sram_size; + + ret = rknpu_gem_alloc_buf_with_sram(rknpu_obj); + if (ret < 0) + goto mm_free; + remain_ddr_size = 0; + } } - LOG_DEBUG( - "create dma addr = %#llx, cookie = 0x%p, size = %lu, attrs = %#lx, flags = %#x\n", - (__u64)rknpu_obj->dma_addr, rknpu_obj->cookie, rknpu_obj->size, - rknpu_obj->dma_attrs, rknpu_obj->flags); + if (remain_ddr_size > 0) { + rknpu_obj = rknpu_gem_init(drm, remain_ddr_size); + if (IS_ERR(rknpu_obj)) + return rknpu_obj; + + /* set memory type and cache attribute from user side. */ + rknpu_obj->flags = flags; + + ret = rknpu_gem_alloc_buf(rknpu_obj); + if (ret < 0) + goto gem_release; + } + + if (rknpu_obj) + LOG_DEBUG( + "created dma addr: %pad, cookie: %p, ddr size: %lu, sram size: %lu, attrs: %#lx, flags: %#x\n", + &rknpu_obj->dma_addr, rknpu_obj->cookie, rknpu_obj->size, + rknpu_obj->sram_size, rknpu_obj->dma_attrs, rknpu_obj->flags); return rknpu_obj; + +mm_free: + if (IS_ENABLED(CONFIG_ROCKCHIP_RKNPU_SRAM) && + rknpu_obj->sram_obj != NULL) + rknpu_mm_free(rknpu_dev->sram_mm, rknpu_obj->sram_obj); + +gem_release: + rknpu_gem_release(rknpu_obj); + + return ERR_PTR(ret); } void rknpu_gem_object_destroy(struct rknpu_gem_object *rknpu_obj) @@ -422,8 +662,8 @@ void rknpu_gem_object_destroy(struct rknpu_gem_object *rknpu_obj) struct drm_gem_object *obj = &rknpu_obj->base; LOG_DEBUG( - "destroy dma addr = %#llx, cookie = 0x%p, size = %lu, attrs = %#lx, flags = %#x, handle count = %d\n", - (__u64)rknpu_obj->dma_addr, rknpu_obj->cookie, rknpu_obj->size, + "destroy dma addr: %pad, cookie: %p, size: %lu, attrs: %#lx, flags: %#x, handle count: %d\n", + &rknpu_obj->dma_addr, rknpu_obj->cookie, rknpu_obj->size, rknpu_obj->dma_attrs, rknpu_obj->flags, obj->handle_count); /* @@ -436,13 +676,20 @@ void rknpu_gem_object_destroy(struct rknpu_gem_object *rknpu_obj) drm_prime_gem_destroy(obj, rknpu_obj->sgt); rknpu_gem_free_page(rknpu_obj->pages); } else { - rknpu_gem_free_buf(rknpu_obj); + if (IS_ENABLED(CONFIG_ROCKCHIP_RKNPU_SRAM) && + rknpu_obj->sram_size > 0) { + struct rknpu_device *rknpu_dev = obj->dev->dev_private; + + if (rknpu_obj->sram_obj != NULL) + rknpu_mm_free(rknpu_dev->sram_mm, + rknpu_obj->sram_obj); + rknpu_gem_free_buf_with_sram(rknpu_obj); + } else { + rknpu_gem_free_buf(rknpu_obj); + } } - /* release file pointer to gem object. */ - drm_gem_object_release(obj); - - kfree(rknpu_obj); + rknpu_gem_release(rknpu_obj); } int rknpu_gem_create_ioctl(struct drm_device *dev, void *data, @@ -454,8 +701,8 @@ int rknpu_gem_create_ioctl(struct drm_device *dev, void *data, rknpu_obj = rknpu_gem_object_find(file_priv, args->handle); if (!rknpu_obj) { - rknpu_obj = - rknpu_gem_object_create(dev, args->flags, args->size); + rknpu_obj = rknpu_gem_object_create( + dev, args->flags, args->size, args->sram_size); if (IS_ERR(rknpu_obj)) return PTR_ERR(rknpu_obj); @@ -470,6 +717,7 @@ int rknpu_gem_create_ioctl(struct drm_device *dev, void *data, // rknpu_gem_object_get(&rknpu_obj->base); args->size = rknpu_obj->size; + args->sram_size = rknpu_obj->sram_size; args->obj_addr = (__u64)(uintptr_t)rknpu_obj; args->dma_addr = rknpu_obj->dma_addr; @@ -584,6 +832,39 @@ static int rknpu_gem_mmap_buffer(struct rknpu_gem_object *rknpu_obj, if (vm_size > rknpu_obj->size) return -EINVAL; + if (rknpu_obj->sram_size > 0) { + unsigned long offset = 0; + unsigned long num_pages = 0; + int i = 0; + + vma->vm_flags |= VM_MIXEDMAP; + + offset = rknpu_obj->sram_obj->range_start * + rknpu_dev->sram_mm->chunk_size; + vma->vm_pgoff = __phys_to_pfn(rknpu_dev->sram_start + offset); + + ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + rknpu_obj->sram_size, vma->vm_page_prot); + if (ret) + return -EAGAIN; + + if (rknpu_obj->size == 0) + return 0; + + offset = rknpu_obj->sram_size; + + num_pages = (vm_size - rknpu_obj->sram_size) / PAGE_SIZE; + for (i = 0; i < num_pages; ++i) { + ret = vm_insert_page(vma, vma->vm_start + offset, + rknpu_obj->pages[i]); + if (ret < 0) + return ret; + offset += PAGE_SIZE; + } + + return 0; + } + #if RKNPU_GEM_ALLOC_FROM_PAGES if ((rknpu_obj->flags & RKNPU_MEM_NON_CONTIGUOUS) && rknpu_dev->iommu_en) { @@ -595,7 +876,7 @@ static int rknpu_gem_mmap_buffer(struct rknpu_gem_object *rknpu_obj, rknpu_obj->dma_addr, rknpu_obj->size, rknpu_obj->dma_attrs); if (ret < 0) { - LOG_DEV_ERROR(drm->dev, "failed to mmap, ret = %d\n", ret); + LOG_DEV_ERROR(drm->dev, "failed to mmap, ret: %d\n", ret); return ret; } @@ -628,7 +909,7 @@ int rknpu_gem_dumb_create(struct drm_file *file_priv, struct drm_device *drm, else flags = RKNPU_MEM_CONTIGUOUS | RKNPU_MEM_WRITE_COMBINE; - rknpu_obj = rknpu_gem_object_create(drm, flags, args->size); + rknpu_obj = rknpu_gem_object_create(drm, flags, args->size, 0); if (IS_ERR(rknpu_obj)) { LOG_DEV_ERROR(drm->dev, "gem object allocate failed.\n"); return PTR_ERR(rknpu_obj); @@ -655,7 +936,7 @@ int rknpu_gem_dumb_map_offset(struct drm_file *file_priv, rknpu_obj = rknpu_gem_object_find(file_priv, handle); if (!rknpu_obj) - return -EINVAL; + return 0; /* Don't allow imported objects to be mapped */ obj = &rknpu_obj->base; @@ -770,7 +1051,7 @@ static int rknpu_gem_mmap_obj(struct drm_gem_object *obj, struct rknpu_gem_object *rknpu_obj = to_rknpu_obj(obj); int ret = -EINVAL; - LOG_DEBUG("flags = %#x\n", rknpu_obj->flags); + LOG_DEBUG("flags: %#x\n", rknpu_obj->flags); /* non-cacheable as default. */ if (rknpu_obj->flags & RKNPU_MEM_CACHEABLE) { @@ -803,7 +1084,7 @@ int rknpu_gem_mmap(struct file *filp, struct vm_area_struct *vma) /* set vm_area_struct. */ ret = drm_gem_mmap(filp, vma); if (ret < 0) { - LOG_ERROR("failed to mmap, ret = %d\n", ret); + LOG_ERROR("failed to mmap, ret: %d\n", ret); return ret; } @@ -887,8 +1168,7 @@ rknpu_gem_prime_import_sg_table(struct drm_device *dev, err_free_large: rknpu_gem_free_page(rknpu_obj->pages); err: - drm_gem_object_release(&rknpu_obj->base); - kfree(rknpu_obj); + rknpu_gem_release(rknpu_obj); return ERR_PTR(ret); } @@ -953,8 +1233,42 @@ int rknpu_gem_sync_ioctl(struct drm_device *dev, void *data, length = args->size; offset = args->offset; + if (IS_ENABLED(CONFIG_ROCKCHIP_RKNPU_SRAM) && rknpu_obj->sram_size > 0) { + struct drm_gem_object *obj = &rknpu_obj->base; + struct rknpu_device *rknpu_dev = obj->dev->dev_private; + unsigned long sram_offset = + rknpu_obj->sram_obj->range_start * + rknpu_dev->sram_mm->chunk_size; + if ((offset + length) <= rknpu_obj->sram_size) { + __dma_map_area(rknpu_dev->sram_base_io + + offset + sram_offset, + length, DMA_TO_DEVICE); + __dma_unmap_area(rknpu_dev->sram_base_io + + offset + sram_offset, + length, DMA_FROM_DEVICE); + length = 0; + offset = 0; + } else if (offset >= rknpu_obj->sram_size) { + offset -= rknpu_obj->sram_size; + } else { + unsigned long sram_length = + rknpu_obj->sram_size - offset; + __dma_map_area(rknpu_dev->sram_base_io + + offset + sram_offset, + sram_length, DMA_TO_DEVICE); + __dma_unmap_area(rknpu_dev->sram_base_io + + offset + sram_offset, + sram_length, DMA_FROM_DEVICE); + length -= sram_length; + offset = 0; + } + } + for_each_sg(rknpu_obj->sgt->sgl, sg, rknpu_obj->sgt->nents, i) { + if (length == 0) + break; + len += sg->length; if (len <= offset) continue; @@ -974,9 +1288,6 @@ int rknpu_gem_sync_ioctl(struct drm_device *dev, void *data, offset += size; length -= size; - - if (length == 0) - break; } } diff --git a/drivers/rknpu/rknpu_job.c b/drivers/rknpu/rknpu_job.c index 0232b4541b0b..6f601ab5fe6d 100644 --- a/drivers/rknpu/rknpu_job.c +++ b/drivers/rknpu/rknpu_job.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co.Ltd * Author: Felix Zeng */ @@ -159,16 +159,33 @@ static inline int rknpu_job_wait(struct rknpu_job *job) struct rknpu_subcore_data *subcore_data = NULL; void __iomem *rknpu_core_base = NULL; int core_index = rknpu_core_index(job->args->core_mask); + unsigned long flags; + int wait_count = 0; int ret = -EINVAL; subcore_data = &rknpu_dev->subcore_datas[core_index]; - ret = wait_event_interruptible_timeout(subcore_data->job_done_wq, - job->flags & RKNPU_JOB_DONE, - msecs_to_jiffies(args->timeout)); + + do { + ret = wait_event_interruptible_timeout( + subcore_data->job_done_wq, + job->flags & RKNPU_JOB_DONE || rknpu_dev->soft_reseting, + msecs_to_jiffies(args->timeout)); + if (++wait_count >= 3) + break; + } while (ret == 0 && job->in_queue[core_index]); + + if (job->in_queue[core_index]) { + spin_lock_irqsave(&rknpu_dev->lock, flags); + list_del_init(&job->head[core_index]); + subcore_data->task_num -= rknn_get_task_number(job, core_index); + job->in_queue[core_index] = false; + spin_unlock_irqrestore(&rknpu_dev->lock, flags); + return ret < 0 ? ret : -EINVAL; + } last_task = job->last_task; if (!last_task) - return -EINVAL; + return ret < 0 ? ret : -EINVAL; last_task->int_status = job->int_status[core_index]; @@ -182,9 +199,18 @@ static inline int rknpu_job_wait(struct rknpu_job *job) (task_status & rknpu_dev->config->pc_task_number_mask); } + + LOG_ERROR( + "failed to wait job, task counter: %d, flags: %#x, ret = %d, elapsed time: %lldus\n", + args->task_counter, args->flags, ret, + ktime_to_us(ktime_sub(ktime_get(), job->timestamp))); + return ret < 0 ? ret : -ETIMEDOUT; } + if (!(job->flags & RKNPU_JOB_DONE)) + return -EINVAL; + args->task_counter = args->task_number; return 0; @@ -304,6 +330,9 @@ static void rknpu_job_next(struct rknpu_device *rknpu_dev, int core_index) struct rknpu_subcore_data *subcore_data = NULL; unsigned long flags; + if (rknpu_dev->soft_reseting) + return; + subcore_data = &rknpu_dev->subcore_datas[core_index]; spin_lock_irqsave(&rknpu_dev->irq_lock, flags); @@ -317,7 +346,7 @@ static void rknpu_job_next(struct rknpu_device *rknpu_dev, int core_index) head[core_index]); list_del_init(&job->head[core_index]); - + job->in_queue[core_index] = false; subcore_data->job = job; job->run_count--; job->hw_recoder_time = ktime_get(); @@ -338,36 +367,37 @@ static void rknpu_job_done(struct rknpu_job *job, int ret, int core_index) struct rknpu_device *rknpu_dev = job->rknpu_dev; struct rknpu_subcore_data *subcore_data = NULL; unsigned long flags; - int task_num = 0; ktime_t now = ktime_get(); subcore_data = &rknpu_dev->subcore_datas[core_index]; - task_num = rknn_get_task_number(job, core_index); + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); subcore_data->job = NULL; - subcore_data->task_num = subcore_data->task_num - task_num; + subcore_data->task_num -= rknn_get_task_number(job, core_index); job->interrupt_count--; subcore_data->timer.busy_time += ktime_us_delta(now, job->hw_recoder_time); spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); if (job->interrupt_count == 0) { + int use_core_num = job->use_core_num; + job->flags |= RKNPU_JOB_DONE; job->ret = ret; if (job->fence) dma_fence_signal(job->fence); - if (job->use_core_num > 1) + if (job->flags & RKNPU_JOB_ASYNC) + schedule_work(&job->cleanup_work); + + if (use_core_num > 1) wake_up(&(&rknpu_dev->subcore_datas[0])->job_done_wq); else wake_up(&subcore_data->job_done_wq); } rknpu_job_next(rknpu_dev, core_index); - - if (job->flags & RKNPU_JOB_ASYNC) - schedule_work(&job->cleanup_work); } static void rknpu_job_schedule(struct rknpu_job *job) @@ -377,7 +407,6 @@ static void rknpu_job_schedule(struct rknpu_job *job) int i = 0, core_index = 0; unsigned long flags; int task_num_list[3] = { 0, 1, 2 }; - int task_num = 0; int tmp = 0; if ((job->args->core_mask & 0x07) == RKNPU_CORE_AUTO_MASK) { @@ -414,17 +443,16 @@ static void rknpu_job_schedule(struct rknpu_job *job) job->run_count = 1; } - spin_lock_irqsave(&rknpu_dev->irq_lock, flags); for (i = 0; i < rknpu_dev->config->num_irqs; i++) { if (job->args->core_mask & rknpu_core_mask(i)) { subcore_data = &rknpu_dev->subcore_datas[i]; - task_num = rknn_get_task_number(job, i); - subcore_data->task_num = - subcore_data->task_num + task_num; + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); list_add_tail(&job->head[i], &subcore_data->todo_list); + subcore_data->task_num += rknn_get_task_number(job, i); + job->in_queue[i] = true; + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); } } - spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); for (i = 0; i < rknpu_dev->config->num_irqs; i++) { if (job->args->core_mask & rknpu_core_mask(i)) @@ -440,34 +468,39 @@ static void rknpu_job_abort(struct rknpu_job *job) void __iomem *rknpu_core_base = rknpu_dev->base[core_index]; unsigned long flags; int i = 0; - int task_num = 0; msleep(100); - if (job->ret == -ETIMEDOUT) { - LOG_ERROR( - "job timeout, irq status: %#x, raw status: %#x, require mask: %#x, task counter: %#x\n", - REG_READ(RKNPU_OFFSET_INT_STATUS), - REG_READ(RKNPU_OFFSET_INT_RAW_STATUS), - job->int_mask[core_index], - (REG_READ(RKNPU_OFFSET_PC_TASK_STATUS) & - rknpu_dev->config->pc_task_number_mask)); - rknpu_soft_reset(rknpu_dev); - } + for (i = 0; i < rknpu_dev->config->num_irqs; i++) { if (job->args->core_mask & rknpu_core_mask(i)) { subcore_data = &rknpu_dev->subcore_datas[i]; - if (job == subcore_data->job) { - spin_lock_irqsave(&rknpu_dev->irq_lock, flags); - task_num = rknn_get_task_number(job, i); + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); + if (job == subcore_data->job && !job->irq_entry[i]) { subcore_data->job = NULL; - subcore_data->task_num = - subcore_data->task_num - task_num; - spin_unlock_irqrestore(&rknpu_dev->irq_lock, - flags); + subcore_data->task_num -= + rknn_get_task_number(job, i); } + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); } } + if (job->ret == -ETIMEDOUT) { + LOG_ERROR( + "job timeout, flags: %#x, irq status: %#x, raw status: %#x, require mask: %#x, task counter: %#x, elapsed time: %lldus\n", + job->flags, REG_READ(RKNPU_OFFSET_INT_STATUS), + REG_READ(RKNPU_OFFSET_INT_RAW_STATUS), + job->int_mask[core_index], + (REG_READ(RKNPU_OFFSET_PC_TASK_STATUS) & + rknpu_dev->config->pc_task_number_mask), + ktime_to_us(ktime_sub(ktime_get(), job->timestamp))); + rknpu_soft_reset(rknpu_dev); + } else { + LOG_ERROR( + "job abort, flags: %#x, ret: %d, elapsed time: %lldus\n", + job->flags, job->ret, + ktime_to_us(ktime_sub(ktime_get(), job->timestamp))); + } + rknpu_job_cleanup(job); } @@ -499,12 +532,24 @@ static inline uint32_t rknpu_fuzz_status(uint32_t status) static inline irqreturn_t rknpu_irq_handler(int irq, void *data, int core_index) { struct rknpu_device *rknpu_dev = data; - struct rknpu_job *job = rknpu_dev->subcore_datas[core_index].job; void __iomem *rknpu_core_base = rknpu_dev->base[core_index]; + struct rknpu_subcore_data *subcore_data = NULL; + struct rknpu_job *job = NULL; uint32_t status = 0; + unsigned long flags; - if (!job) + subcore_data = &rknpu_dev->subcore_datas[core_index]; + + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); + job = subcore_data->job; + if (!job) { + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); + REG_WRITE(RKNPU_INT_CLEAR, RKNPU_OFFSET_INT_CLEAR); + rknpu_job_next(rknpu_dev, core_index); return IRQ_HANDLED; + } + job->irq_entry[core_index] = true; + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); status = REG_READ(RKNPU_OFFSET_INT_STATUS); @@ -579,6 +624,7 @@ static void rknpu_job_timeout_clean(struct rknpu_device *rknpu_dev, struct rknpu_job, head[i]); list_del_init(&job->head[i]); + job->in_queue[i] = false; } else { job = NULL; } @@ -615,7 +661,7 @@ static int rknpu_submit(struct rknpu_device *rknpu_dev, in_fence = sync_file_get_fence(args->fence_fd); if (!in_fence) { - LOG_ERROR("invalid fence in fd, fd = %d\n", + LOG_ERROR("invalid fence in fd, fd: %d\n", args->fence_fd); return -EINVAL; } @@ -729,8 +775,11 @@ int rknpu_get_hw_version(struct rknpu_device *rknpu_dev, uint32_t *version) { void __iomem *rknpu_core_base = rknpu_dev->base[0]; - if (version != NULL) - *version = REG_READ(RKNPU_OFFSET_VERSION); + if (version == NULL) + return -EINVAL; + + *version = REG_READ(RKNPU_OFFSET_VERSION) + + REG_READ(RKNPU_OFFSET_VERSION_NUM); return 0; } diff --git a/drivers/rknpu/rknpu_mem.c b/drivers/rknpu/rknpu_mem.c index b8ba36b27a29..715cfad2e340 100644 --- a/drivers/rknpu/rknpu_mem.c +++ b/drivers/rknpu/rknpu_mem.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co.Ltd * Author: Felix Zeng */ @@ -96,7 +96,7 @@ int rknpu_mem_create_ioctl(struct rknpu_device *rknpu_dev, unsigned long data) phys = sg_dma_address(sgl); page = sg_page(sgl); length = sg_dma_len(sgl); - LOG_DEBUG("%s, %d, phys = %pad, length = 0x%x\n", __func__, + LOG_DEBUG("%s, %d, phys: %pad, length: %u\n", __func__, __LINE__, &phys, length); } @@ -126,7 +126,7 @@ int rknpu_mem_create_ioctl(struct rknpu_device *rknpu_dev, unsigned long data) args.handle = fd; LOG_DEBUG( - "args.handle = %d, args.size = %lld, rknpu_obj = %#llx, rknpu_obj->dma_addr = %#llx\n", + "args.handle: %d, args.size: %lld, rknpu_obj: %#llx, rknpu_obj->dma_addr: %#llx\n", args.handle, args.size, (__u64)(uintptr_t)rknpu_obj, (__u64)rknpu_obj->dma_addr); @@ -190,7 +190,7 @@ int rknpu_mem_destroy_ioctl(struct rknpu_device *rknpu_dev, unsigned long data) rknpu_obj = (struct rknpu_mem_object *)(uintptr_t)args.obj_addr; dmabuf = rknpu_obj->dmabuf; LOG_DEBUG( - "free args.handle = %d, rknpu_obj = %#llx, rknpu_obj->dma_addr = %#llx\n", + "free args.handle: %d, rknpu_obj: %#llx, rknpu_obj->dma_addr: %#llx\n", args.handle, (__u64)(uintptr_t)rknpu_obj, (__u64)rknpu_obj->dma_addr); diff --git a/drivers/rknpu/rknpu_mm.c b/drivers/rknpu/rknpu_mm.c new file mode 100644 index 000000000000..9a13c3e256a4 --- /dev/null +++ b/drivers/rknpu/rknpu_mm.c @@ -0,0 +1,289 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Rockchip Electronics Co.Ltd + * Author: Felix Zeng + */ + +#include "rknpu_debugger.h" +#include "rknpu_mm.h" + +int rknpu_mm_create(unsigned int mem_size, unsigned int chunk_size, + struct rknpu_mm **mm) +{ + unsigned int num_of_longs; + int ret = -EINVAL; + + if (WARN_ON(mem_size < chunk_size)) + return -EINVAL; + if (WARN_ON(mem_size == 0)) + return -EINVAL; + if (WARN_ON(chunk_size == 0)) + return -EINVAL; + + *mm = kzalloc(sizeof(struct rknpu_mm), GFP_KERNEL); + if (!(*mm)) + return -ENOMEM; + + (*mm)->chunk_size = chunk_size; + (*mm)->total_chunks = mem_size / chunk_size; + (*mm)->free_chunks = (*mm)->total_chunks; + + num_of_longs = + ((*mm)->total_chunks + BITS_PER_LONG - 1) / BITS_PER_LONG; + + (*mm)->bitmap = kcalloc(num_of_longs, sizeof(long), GFP_KERNEL); + if (!(*mm)->bitmap) { + ret = -ENOMEM; + goto free_mm; + } + + mutex_init(&(*mm)->lock); + + LOG_DEBUG("total_chunks: %d, bitmap: %p\n", (*mm)->total_chunks, + (*mm)->bitmap); + + return 0; + +free_mm: + kfree(mm); + return ret; +} + +void rknpu_mm_destroy(struct rknpu_mm *mm) +{ + if (mm != NULL) { + mutex_destroy(&mm->lock); + kfree(mm->bitmap); + kfree(mm); + } +} + +int rknpu_mm_alloc(struct rknpu_mm *mm, unsigned int size, + struct rknpu_mm_obj **mm_obj) +{ + unsigned int found, start_search, cur_size; + + if (size == 0) + return -EINVAL; + + if (size > mm->total_chunks * mm->chunk_size) + return -ENOMEM; + + *mm_obj = kzalloc(sizeof(struct rknpu_mm_obj), GFP_KERNEL); + if (!(*mm_obj)) + return -ENOMEM; + + start_search = 0; + + mutex_lock(&mm->lock); + +mm_restart_search: + /* Find the first chunk that is free */ + found = find_next_zero_bit(mm->bitmap, mm->total_chunks, start_search); + + /* If there wasn't any free chunk, bail out */ + if (found == mm->total_chunks) + goto mm_no_free_chunk; + + /* Update fields of mm_obj */ + (*mm_obj)->range_start = found; + (*mm_obj)->range_end = found; + + /* If we need only one chunk, mark it as allocated and get out */ + if (size <= mm->chunk_size) { + set_bit(found, mm->bitmap); + goto mm_out; + } + + /* Otherwise, try to see if we have enough contiguous chunks */ + cur_size = size - mm->chunk_size; + do { + (*mm_obj)->range_end = find_next_zero_bit( + mm->bitmap, mm->total_chunks, ++found); + /* + * If next free chunk is not contiguous than we need to + * restart our search from the last free chunk we found (which + * wasn't contiguous to the previous ones + */ + if ((*mm_obj)->range_end != found) { + start_search = found; + goto mm_restart_search; + } + + /* + * If we reached end of buffer, bail out with error + */ + if (found == mm->total_chunks) + goto mm_no_free_chunk; + + /* Check if we don't need another chunk */ + if (cur_size <= mm->chunk_size) + cur_size = 0; + else + cur_size -= mm->chunk_size; + + } while (cur_size > 0); + + /* Mark the chunks as allocated */ + for (found = (*mm_obj)->range_start; found <= (*mm_obj)->range_end; + found++) + set_bit(found, mm->bitmap); + +mm_out: + mm->free_chunks -= ((*mm_obj)->range_end - (*mm_obj)->range_start + 1); + mutex_unlock(&mm->lock); + + LOG_DEBUG("mm allocate, mm_obj: %p, range_start: %d, range_end: %d\n", + *mm_obj, (*mm_obj)->range_start, (*mm_obj)->range_end); + + return 0; + +mm_no_free_chunk: + mutex_unlock(&mm->lock); + kfree(*mm_obj); + + return -ENOMEM; +} + +int rknpu_mm_free(struct rknpu_mm *mm, struct rknpu_mm_obj *mm_obj) +{ + unsigned int bit; + + /* Act like kfree when trying to free a NULL object */ + if (!mm_obj) + return 0; + + LOG_DEBUG("mm free, mem_obj: %p, range_start: %d, range_end: %d\n", + mm_obj, mm_obj->range_start, mm_obj->range_end); + + mutex_lock(&mm->lock); + + /* Mark the chunks as free */ + for (bit = mm_obj->range_start; bit <= mm_obj->range_end; bit++) + clear_bit(bit, mm->bitmap); + + mm->free_chunks += (mm_obj->range_end - mm_obj->range_start + 1); + + mutex_unlock(&mm->lock); + + kfree(mm_obj); + + return 0; +} + +int rknpu_mm_dump(struct seq_file *m, void *data) +{ + struct rknpu_debugger_node *node = m->private; + struct rknpu_debugger *debugger = node->debugger; + struct rknpu_device *rknpu_dev = + container_of(debugger, struct rknpu_device, debugger); + struct rknpu_mm *mm = NULL; + int cur = 0, rbot = 0, rtop = 0; + size_t ret = 0; + char buf[64]; + size_t size = sizeof(buf); + int seg_chunks = 32, seg_id = 0; + int free_size = 0; + int i = 0; + + mm = rknpu_dev->sram_mm; + if (mm == NULL) + return 0; + + seq_printf(m, "SRAM bitmap: \"*\" - used, \".\" - free (1bit = %dKB)\n", + mm->chunk_size / 1024); + + rbot = cur = find_first_bit(mm->bitmap, mm->total_chunks); + for (i = 0; i < cur; ++i) { + ret += scnprintf(buf + ret, size - ret, "."); + if (ret >= seg_chunks) { + seq_printf(m, "[%03d] [%s]\n", seg_id++, buf); + ret = 0; + } + } + while (cur < mm->total_chunks) { + rtop = cur; + cur = find_next_bit(mm->bitmap, mm->total_chunks, cur + 1); + if (cur < mm->total_chunks && cur <= rtop + 1) + continue; + + for (i = rbot; i <= rtop; ++i) { + ret += scnprintf(buf + ret, size - ret, "*"); + if (ret >= seg_chunks) { + seq_printf(m, "[%03d] [%s]\n", seg_id++, buf); + ret = 0; + } + } + + for (i = rtop + 1; i < cur; ++i) { + ret += scnprintf(buf + ret, size - ret, "."); + if (ret >= seg_chunks) { + seq_printf(m, "[%03d] [%s]\n", seg_id++, buf); + ret = 0; + } + } + + rbot = cur; + } + + if (ret > 0) + seq_printf(m, "[%03d] [%s]\n", seg_id++, buf); + + free_size = mm->free_chunks * mm->chunk_size; + seq_printf(m, "SRAM total size: %d, used: %d, free: %d\n", + rknpu_dev->sram_size, rknpu_dev->sram_size - free_size, + free_size); + + return 0; +} + +dma_addr_t rknpu_iommu_dma_alloc_iova(struct iommu_domain *domain, size_t size, + u64 dma_limit, struct device *dev) +{ + struct rknpu_iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + unsigned long shift, iova_len, iova = 0; +#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) + dma_addr_t limit; +#endif + + shift = iova_shift(iovad); + iova_len = size >> shift; + /* + * Freeing non-power-of-two-sized allocations back into the IOVA caches + * will come back to bite us badly, so we have to waste a bit of space + * rounding up anything cacheable to make sure that can't happen. The + * order of the unadjusted size will still match upon freeing. + */ + if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1))) + iova_len = roundup_pow_of_two(iova_len); + +#if (KERNEL_VERSION(5, 10, 0) <= LINUX_VERSION_CODE) + dma_limit = min_not_zero(dma_limit, dev->bus_dma_limit); +#else + if (dev->bus_dma_mask) + dma_limit &= dev->bus_dma_mask; +#endif + + if (domain->geometry.force_aperture) + dma_limit = + min_t(u64, dma_limit, domain->geometry.aperture_end); + +#if (KERNEL_VERSION(5, 4, 0) <= LINUX_VERSION_CODE) + iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift, true); +#else + limit = min_t(dma_addr_t, dma_limit >> shift, iovad->end_pfn); + + iova = alloc_iova_fast(iovad, iova_len, limit, true); +#endif + + return (dma_addr_t)iova << shift; +} + +void rknpu_iommu_dma_free_iova(struct rknpu_iommu_dma_cookie *cookie, + dma_addr_t iova, size_t size) +{ + struct iova_domain *iovad = &cookie->iovad; + + free_iova_fast(iovad, iova_pfn(iovad, iova), size >> iova_shift(iovad)); +} diff --git a/drivers/rknpu/rknpu_reset.c b/drivers/rknpu/rknpu_reset.c index 681647afd964..91c9b75d68e7 100644 --- a/drivers/rknpu/rknpu_reset.c +++ b/drivers/rknpu/rknpu_reset.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co.Ltd * Author: Felix Zeng */ @@ -92,6 +92,7 @@ int rknpu_soft_reset(struct rknpu_device *rknpu_dev) { #ifndef FPGA_PLATFORM struct iommu_domain *domain = NULL; + struct rknpu_subcore_data *subcore_data = NULL; int ret = -EINVAL, i = 0; if (rknpu_dev->bypass_soft_reset) { @@ -99,6 +100,18 @@ int rknpu_soft_reset(struct rknpu_device *rknpu_dev) return 0; } + if (!mutex_trylock(&rknpu_dev->reset_lock)) + return 0; + + rknpu_dev->soft_reseting = true; + + msleep(100); + + for (i = 0; i < rknpu_dev->config->num_irqs; ++i) { + subcore_data = &rknpu_dev->subcore_datas[i]; + wake_up(&subcore_data->job_done_wq); + } + LOG_INFO("soft reset\n"); for (i = 0; i < rknpu_dev->config->num_resets; i++) { @@ -114,6 +127,7 @@ int rknpu_soft_reset(struct rknpu_device *rknpu_dev) if (ret) { LOG_DEV_ERROR(rknpu_dev->dev, "failed to soft reset for rknpu: %d\n", ret); + mutex_unlock(&rknpu_dev->reset_lock); return ret; } @@ -124,6 +138,10 @@ int rknpu_soft_reset(struct rknpu_device *rknpu_dev) iommu_detach_device(domain, rknpu_dev->dev); iommu_attach_device(domain, rknpu_dev->dev); } + + rknpu_dev->soft_reseting = false; + + mutex_unlock(&rknpu_dev->reset_lock); #endif return 0;