diff --git a/drivers/rknpu/Kconfig b/drivers/rknpu/Kconfig index 42cb4718436e..8cb2b777ae61 100644 --- a/drivers/rknpu/Kconfig +++ b/drivers/rknpu/Kconfig @@ -1,10 +1,54 @@ # SPDX-License-Identifier: GPL-2.0 menu "RKNPU" - depends on ARCH_ROCKCHIP && DRM + depends on ARCH_ROCKCHIP config ROCKCHIP_RKNPU tristate "ROCKCHIP_RKNPU" + depends on DRM || DMABUF_HEAPS_ROCKCHIP_CMA_HEAP help rknpu module. +if ROCKCHIP_RKNPU + +config ROCKCHIP_RKNPU_DEBUG_FS + bool "RKNPU debugfs" + depends on DEBUG_FS + default y + help + Enable debugfs to debug RKNPU usage. + +config ROCKCHIP_RKNPU_PROC_FS + bool "RKNPU procfs" + depends on PROC_FS + help + Enable procfs to debug RKNPU usage. + +config ROCKCHIP_RKNPU_FENCE + bool "RKNPU fence" + depends on SYNC_FILE + help + Enable fence support for RKNPU. + +choice + prompt "RKNPU memory manager" + default ROCKCHIP_RKNPU_DRM_GEM + help + Select RKNPU memory manager + +config ROCKCHIP_RKNPU_DRM_GEM + bool "RKNPU DRM GEM" + depends on DRM + help + Enable RKNPU memory manager by DRM GEM. + +config ROCKCHIP_RKNPU_DMA_HEAP + bool "RKNPU DMA heap" + depends on DMABUF_HEAPS_ROCKCHIP_CMA_HEAP + help + Enable RKNPU memory manager by DMA Heap. + +endchoice + +endif + endmenu diff --git a/drivers/rknpu/Makefile b/drivers/rknpu/Makefile index 9cfe0421edf4..2e18da1d0d61 100644 --- a/drivers/rknpu/Makefile +++ b/drivers/rknpu/Makefile @@ -2,10 +2,13 @@ obj-$(CONFIG_ROCKCHIP_RKNPU) += rknpu.o ccflags-y += -I$(srctree)/$(src)/include +ccflags-y += -I$(src)/include ccflags-y += -Werror rknpu-y += rknpu_drv.o rknpu-y += rknpu_reset.o rknpu-y += rknpu_job.o -rknpu-y += rknpu_gem.o -rknpu-y += rknpu_fence.o +rknpu-y += rknpu_debugger.o +rknpu-$(CONFIG_ROCKCHIP_RKNPU_FENCE) += rknpu_fence.o +rknpu-$(CONFIG_ROCKCHIP_RKNPU_DRM_GEM) += rknpu_gem.o +rknpu-$(CONFIG_ROCKCHIP_RKNPU_DMA_HEAP) += rknpu_mem.o diff --git a/drivers/rknpu/include/rknpu_debugger.h b/drivers/rknpu/include/rknpu_debugger.h new file mode 100644 index 000000000000..ef1cd44d6ea3 --- /dev/null +++ b/drivers/rknpu/include/rknpu_debugger.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Author: Felix Zeng + */ + +#ifndef __LINUX_RKNPU_DEBUGGER_H_ +#define __LINUX_RKNPU_DEBUGGER_H_ + +/* + * struct rknpu_debugger - rknpu debugger information + * + * This structure represents a debugger to be created by the rknpu driver + * or core. + */ +struct rknpu_debugger { +#ifdef CONFIG_ROCKCHIP_RKNPU_DEBUG_FS + /* Directory of debugfs file */ + struct dentry *debugfs_dir; + struct list_head debugfs_entry_list; + struct mutex debugfs_lock; +#endif +#ifdef CONFIG_ROCKCHIP_RKNPU_PROC_FS + /* Directory of procfs file */ + struct proc_dir_entry *procfs_dir; + struct list_head procfs_entry_list; + struct mutex procfs_lock; +#endif +}; + +/* + * struct rknpu_debugger_list - debugfs/procfs info list entry + * + * This structure represents a debugfs/procfs file to be created by the npu + * driver or core. + */ +struct rknpu_debugger_list { + /* File name */ + const char *name; + /* + * Show callback. &seq_file->private will be set to the &struct + * rknpu_debugger_node corresponding to the instance of this info + * on a given &struct rknpu_debugger. + */ + int (*show)(struct seq_file *seq, void *data); + /* + * Write callback. &seq_file->private will be set to the &struct + * rknpu_debugger_node corresponding to the instance of this info + * on a given &struct rknpu_debugger. + */ + ssize_t (*write)(struct file *file, const char __user *ubuf, + size_t len, loff_t *offp); + /* Procfs/Debugfs private data. */ + void *data; +}; + +/* + * struct rknpu_debugger_node - Nodes for debugfs/procfs + * + * This structure represents each instance of procfs/debugfs created from the + * template. + */ +struct rknpu_debugger_node { + struct rknpu_debugger *debugger; + + /* template for this node. */ + const struct rknpu_debugger_list *info_ent; + + /* Each Procfs/Debugfs file. */ +#ifdef CONFIG_ROCKCHIP_RKNPU_DEBUG_FS + struct dentry *dent; +#endif + +#ifdef CONFIG_ROCKCHIP_RKNPU_PROC_FS + struct proc_dir_entry *pent; +#endif + + struct list_head list; +}; + + +int rknpu_debugger_init(struct rknpu_device *rknpu_dev); +int rknpu_debugger_remove(struct rknpu_device *rknpu_dev); + + +#endif /* __LINUX_RKNPU_FENCE_H_ */ diff --git a/drivers/rknpu/include/rknpu_drv.h b/drivers/rknpu/include/rknpu_drv.h index 2d67234a8846..f9d776e4b11e 100644 --- a/drivers/rknpu/include/rknpu_drv.h +++ b/drivers/rknpu/include/rknpu_drv.h @@ -14,33 +14,43 @@ #include #include #include +#include +#include + +#ifndef FPGA_PLATFORM +#if KERNEL_VERSION(5, 10, 0) <= LINUX_VERSION_CODE #include +#endif +#endif #include "rknpu_job.h" #include "rknpu_fence.h" +#include "rknpu_debugger.h" #define DRIVER_NAME "rknpu" #define DRIVER_DESC "RKNPU driver" -#define DRIVER_DATE "20211227" +#define DRIVER_DATE "20220328" #define DRIVER_MAJOR 0 -#define DRIVER_MINOR 6 -#define DRIVER_PATCHLEVEL 4 +#define DRIVER_MINOR 7 +#define DRIVER_PATCHLEVEL 0 #define LOG_TAG "RKNPU" +/* sample interval: 1000ms */ +#define RKNPU_LOAD_INTERVAL 1000000000 + #define LOG_INFO(fmt, args...) pr_info(LOG_TAG ": " fmt, ##args) #if KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE #define LOG_WARN(fmt, args...) pr_warn(LOG_TAG ": " fmt, ##args) #else #define LOG_WARN(fmt, args...) pr_warning(LOG_TAG ": " fmt, ##args) #endif -#define LOG_DEBUG(fmt, args...) DRM_DEBUG_DRIVER(LOG_TAG ": " fmt, ##args) +#define LOG_DEBUG(fmt, args...) pr_devel(LOG_TAG ": " fmt, ##args) #define LOG_ERROR(fmt, args...) pr_err(LOG_TAG ": " fmt, ##args) #define LOG_DEV_INFO(dev, fmt, args...) dev_info(dev, LOG_TAG ": " fmt, ##args) #define LOG_DEV_WARN(dev, fmt, args...) dev_warn(dev, LOG_TAG ": " fmt, ##args) -#define LOG_DEV_DEBUG(dev, fmt, args...) \ - DRM_DEV_DEBUG_DRIVER(dev, LOG_TAG ": " fmt, ##args) +#define LOG_DEV_DEBUG(dev, fmt, args...) dev_dbg(dev, LOG_TAG ": " fmt, ##args) #define LOG_DEV_ERROR(dev, fmt, args...) dev_err(dev, LOG_TAG ": " fmt, ##args) struct npu_reset_data { @@ -52,7 +62,9 @@ struct rknpu_config { __u32 bw_priority_addr; __u32 bw_priority_length; __u64 dma_mask; - __u32 pc_data_extra_amount; + __u32 pc_data_amount_scale; + __u32 pc_task_number_bits; + __u32 pc_task_number_mask; __u32 bw_enable; const struct npu_irqs_data *irqs; const struct npu_reset_data *resets; @@ -60,11 +72,17 @@ struct rknpu_config { int num_resets; }; +struct rknpu_timer { + __u32 busy_time; + __u32 busy_time_record; +}; + struct rknpu_subcore_data { struct list_head todo_list; wait_queue_head_t job_done_wq; struct rknpu_job *job; uint64_t task_num; + struct rknpu_timer timer; }; /** @@ -77,11 +95,17 @@ struct rknpu_subcore_data { struct rknpu_device { void __iomem *base[RKNPU_MAX_CORES]; struct device *dev; - struct device *fake_dev; +#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM struct drm_device *drm_dev; +#endif +#ifdef CONFIG_ROCKCHIP_RKNPU_DMA_HEAP + struct miscdevice miscdev; + struct rk_dma_heap *heap; +#endif atomic_t sequence; spinlock_t lock; spinlock_t irq_lock; + struct mutex power_lock; struct rknpu_subcore_data subcore_datas[RKNPU_MAX_CORES]; const struct rknpu_config *config; void __iomem *bw_priority_base; @@ -97,7 +121,11 @@ struct rknpu_device { struct ipa_power_model_data *model_data; struct thermal_cooling_device *devfreq_cooling; struct devfreq *devfreq; +#ifndef FPGA_PLATFORM +#if KERNEL_VERSION(5, 10, 0) <= LINUX_VERSION_CODE struct rockchip_opp_info opp_info; +#endif +#endif unsigned long current_freq; unsigned long current_volt; int bypass_irq_handler; @@ -106,6 +134,13 @@ struct rknpu_device { struct device *genpd_dev_npu1; struct device *genpd_dev_npu2; bool multiple_domains; + atomic_t power_refcount; + struct delayed_work power_off_work; + struct workqueue_struct *power_off_wq; + bool is_powered; + struct rknpu_debugger debugger; + struct hrtimer timer; + ktime_t kt; }; #endif /* __LINUX_RKNPU_DRV_H_ */ diff --git a/drivers/rknpu/include/rknpu_ioctl.h b/drivers/rknpu/include/rknpu_ioctl.h index a63540d46ccd..a8f727c23d75 100644 --- a/drivers/rknpu/include/rknpu_ioctl.h +++ b/drivers/rknpu/include/rknpu_ioctl.h @@ -14,6 +14,10 @@ #define __user #endif +#ifndef __packed +#define __packed __attribute__((packed)) +#endif + #define RKNPU_OFFSET_VERSION 0x0 #define RKNPU_OFFSET_PC_OP_EN 0x8 #define RKNPU_OFFSET_PC_DATA_ADDR 0x10 @@ -36,6 +40,8 @@ #define RKNPU_INT_CLEAR 0x1ffff +#define RKNPU_PC_DATA_EXTRA_AMOUNT 4 + #define RKNPU_STR_HELPER(x) #x #define RKNPU_GET_DRV_VERSION_STRING(MAJOR, MINOR, PATCHLEVEL) \ @@ -119,6 +125,8 @@ enum e_rknpu_action { RKNPU_GET_TOTAL_RW_AMOUNT = 17, RKNPU_GET_IOMMU_EN = 18, RKNPU_SET_PROC_NICE = 19, + RKNPU_POWER_ON = 20, + RKNPU_POWER_OFF = 21, }; /** @@ -157,10 +165,12 @@ struct rknpu_mem_map { * * @handle: handle of the buffer. * @reserved: reserved for padding. + * @obj_addr: rknpu_mem_object addr. */ struct rknpu_mem_destroy { __u32 handle; __u32 reserved; + __u64 obj_addr; }; /** @@ -192,7 +202,7 @@ struct rknpu_mem_sync { * @int_status: interrupt status * @regcfg_amount: register config number * @regcfg_offset: offset for register config - * @regcmd_data: data for register command + * @regcmd_addr: address for register command * */ struct rknpu_task { @@ -204,7 +214,7 @@ struct rknpu_task { __u32 int_status; __u32 regcfg_amount; __u32 regcfg_offset; - __u64 regcmd_data; + __u64 regcmd_addr; } __packed; /** @@ -217,7 +227,6 @@ struct rknpu_task { struct rknpu_subcore_task { __u32 task_start; __u32 task_number; - __u32 task_end; }; /** @@ -231,10 +240,11 @@ struct rknpu_subcore_task { * @priority: submit priority * @task_obj_addr: address of task object * @regcfg_obj_addr: address of register config object + * @task_base_addr: task base address * @user_data: (optional) user data - * @sequence: submit sequence * @core_mask: core mask of rknpu * @fence_fd: dma fence fd + * @subcore_task: subcore task * */ struct rknpu_submit { @@ -246,8 +256,8 @@ struct rknpu_submit { __s32 priority; __u64 task_obj_addr; __u64 regcfg_obj_addr; + __u64 task_base_addr; __u64 user_data; - __u64 sequence; __u32 core_mask; __s32 fence_fd; struct rknpu_subcore_task subcore_task[5]; @@ -272,6 +282,11 @@ struct rknpu_action { #define RKNPU_MEM_DESTROY 0x04 #define RKNPU_MEM_SYNC 0x05 +#define RKNPU_IOC_MAGIC 'r' +#define RKNPU_IOW(nr, type) _IOW(RKNPU_IOC_MAGIC, nr, type) +#define RKNPU_IOR(nr, type) _IOR(RKNPU_IOC_MAGIC, nr, type) +#define RKNPU_IOWR(nr, type) _IOWR(RKNPU_IOC_MAGIC, nr, type) + #if defined(__arm__) || defined(__aarch64__) #include @@ -289,6 +304,15 @@ struct rknpu_action { #define DRM_IOCTL_RKNPU_MEM_SYNC \ DRM_IOWR(DRM_COMMAND_BASE + RKNPU_MEM_SYNC, struct rknpu_mem_sync) +#define IOCTL_RKNPU_ACTION RKNPU_IOWR(RKNPU_ACTION, struct rknpu_action) +#define IOCTL_RKNPU_SUBMIT RKNPU_IOWR(RKNPU_SUBMIT, struct rknpu_submit) +#define IOCTL_RKNPU_MEM_CREATE \ + RKNPU_IOWR(RKNPU_MEM_CREATE, struct rknpu_mem_create) +#define IOCTL_RKNPU_MEM_MAP RKNPU_IOWR(RKNPU_MEM_MAP, struct rknpu_mem_map) +#define IOCTL_RKNPU_MEM_DESTROY \ + RKNPU_IOWR(RKNPU_MEM_DESTROY, struct rknpu_mem_destroy) +#define IOCTL_RKNPU_MEM_SYNC RKNPU_IOWR(RKNPU_MEM_SYNC, struct rknpu_mem_sync) + #endif #endif diff --git a/drivers/rknpu/include/rknpu_job.h b/drivers/rknpu/include/rknpu_job.h index ef3628f8da4b..6a1161b5932c 100644 --- a/drivers/rknpu/include/rknpu_job.h +++ b/drivers/rknpu/include/rknpu_job.h @@ -9,6 +9,7 @@ #include #include +#include #include @@ -41,14 +42,20 @@ struct rknpu_job { uint32_t use_core_num; uint32_t run_count; uint32_t interrupt_count; + ktime_t hw_recoder_time; }; irqreturn_t rknpu_core0_irq_handler(int irq, void *data); irqreturn_t rknpu_core1_irq_handler(int irq, void *data); irqreturn_t rknpu_core2_irq_handler(int irq, void *data); +#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM int rknpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +#endif +#ifdef CONFIG_ROCKCHIP_RKNPU_DMA_HEAP +int rknpu_submit_ioctl(struct rknpu_device *rknpu_dev, unsigned long data); +#endif int rknpu_get_hw_version(struct rknpu_device *rknpu_dev, uint32_t *version); diff --git a/drivers/rknpu/include/rknpu_mem.h b/drivers/rknpu/include/rknpu_mem.h new file mode 100644 index 000000000000..828b88e45589 --- /dev/null +++ b/drivers/rknpu/include/rknpu_mem.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Author: Felix Zeng + */ + +#ifndef __LINUX_RKNPU_MEM_H +#define __LINUX_RKNPU_MEM_H + +#include +#include + +/* + * rknpu DMA buffer structure. + * + * @flags: indicate memory type to allocated buffer and cache attribute. + * @size: size requested from user, in bytes and this size is aligned + * in page unit. + * @kv_addr: kernel virtual address to allocated memory region. + * @dma_addr: bus address(accessed by dma) to allocated memory region. + * - this address could be physical address without IOMMU and + * device address with IOMMU. + * @pages: Array of backing pages. + * @sgt: Imported sg_table. + * @dmabuf: buffer for this attachment. + * @owner: Is this memory internally allocated. + */ +struct rknpu_mem_object { + unsigned long flags; + unsigned long size; + void __iomem *kv_addr; + dma_addr_t dma_addr; + struct page **pages; + struct sg_table *sgt; + struct dma_buf *dmabuf; + unsigned int owner; +}; + +int rknpu_mem_create_ioctl(struct rknpu_device *rknpu_dev, unsigned long data); +int rknpu_mem_destroy_ioctl(struct rknpu_device *rknpu_dev, unsigned long data); +int rknpu_mem_sync_ioctl(struct rknpu_device *rknpu_dev, unsigned long data); + +#endif diff --git a/drivers/rknpu/rknpu_debugger.c b/drivers/rknpu/rknpu_debugger.c new file mode 100644 index 000000000000..74367817a283 --- /dev/null +++ b/drivers/rknpu/rknpu_debugger.c @@ -0,0 +1,354 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Author: Felix Zeng + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "rknpu_drv.h" +#include "rknpu_debugger.h" + +#define RKNPU_DEBUGGER_ROOT_NAME "rknpu" + +static int rknpu_version_show(struct seq_file *m, void *data) +{ + seq_printf(m, "%s: v%d.%d.%d\n", DRIVER_DESC, DRIVER_MAJOR, + DRIVER_MINOR, DRIVER_PATCHLEVEL); + + return 0; +} + +static int rknpu_load_show(struct seq_file *m, void *data) +{ + struct rknpu_debugger_node *node = m->private; + struct rknpu_debugger *debugger = node->debugger; + struct rknpu_device *rknpu_dev = + container_of(debugger, struct rknpu_device, debugger); + struct rknpu_subcore_data *subcore_data = NULL; + unsigned long flags; + int i; + int load; + uint64_t busy_time_total, div_value; + + seq_puts(m, "NPU load: "); + for (i = 0; i < rknpu_dev->config->num_irqs; i++) { + subcore_data = &rknpu_dev->subcore_datas[i]; + + if (rknpu_dev->config->num_irqs > 1) + seq_printf(m, " Core%d: ", i); + + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); + + busy_time_total = subcore_data->timer.busy_time_record; + + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); + + div_value = (RKNPU_LOAD_INTERVAL / 100000); + do_div(busy_time_total, div_value); + load = busy_time_total; + + if (rknpu_dev->config->num_irqs > 1) + seq_printf(m, "%2.d%%,", load); + else + seq_printf(m, "%2.d%%", load); + } + seq_puts(m, "\n"); + + return 0; +} + +struct rknpu_debugger_list rknpu_debugger_root_list[] = { + { "driver_version", rknpu_version_show, NULL, NULL }, + { "load", rknpu_load_show, NULL, NULL }, +}; + +static ssize_t rknpu_debugger_write(struct file *file, const char __user *ubuf, + size_t len, loff_t *offp) +{ + struct seq_file *priv = file->private_data; + struct rknpu_debugger_node *node = priv->private; + + if (node->info_ent->write) + return node->info_ent->write(file, ubuf, len, offp); + else + return len; +} + +static int rknpu_debugfs_open(struct inode *inode, struct file *file) +{ + struct rknpu_debugger_node *node = inode->i_private; + + return single_open(file, node->info_ent->show, node); +} + +static const struct file_operations rknpu_debugfs_fops = { + .owner = THIS_MODULE, + .open = rknpu_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = rknpu_debugger_write, +}; + +#ifdef CONFIG_ROCKCHIP_RKNPU_DEBUG_FS +static int rknpu_debugfs_remove_files(struct rknpu_debugger *debugger) +{ + struct rknpu_debugger_node *pos, *q; + struct list_head *entry_list; + + mutex_lock(&debugger->debugfs_lock); + + /* Delete debugfs entry list */ + entry_list = &debugger->debugfs_entry_list; + list_for_each_entry_safe(pos, q, entry_list, list) { + if (pos->dent == NULL) + continue; + list_del(&pos->list); + kfree(pos); + pos = NULL; + } + + /* Delete all debugfs node in this directory */ + debugfs_remove_recursive(debugger->debugfs_dir); + debugger->debugfs_dir = NULL; + + mutex_unlock(&debugger->debugfs_lock); + + return 0; +} + +static int rknpu_debugfs_create_files(const struct rknpu_debugger_list *files, + int count, struct dentry *root, + struct rknpu_debugger *debugger) +{ + int i; + struct dentry *ent; + struct rknpu_debugger_node *tmp; + + for (i = 0; i < count; i++) { + tmp = kmalloc(sizeof(struct rknpu_debugger_node), GFP_KERNEL); + if (tmp == NULL) { + LOG_ERROR( + "Cannot alloc node path /sys/kernel/debug/%pd/%s\n", + root, files[i].name); + goto MALLOC_FAIL; + } + + tmp->info_ent = &files[i]; + tmp->debugger = debugger; + + ent = debugfs_create_file(files[i].name, S_IFREG | S_IRUGO, + root, tmp, &rknpu_debugfs_fops); + if (!ent) { + LOG_ERROR("Cannot create /sys/kernel/debug/%pd/%s\n", + root, files[i].name); + goto CREATE_FAIL; + } + + tmp->dent = ent; + + mutex_lock(&debugger->debugfs_lock); + list_add_tail(&tmp->list, &debugger->debugfs_entry_list); + mutex_unlock(&debugger->debugfs_lock); + } + + return 0; + +CREATE_FAIL: + kfree(tmp); +MALLOC_FAIL: + rknpu_debugfs_remove_files(debugger); + + return -1; +} + +static int rknpu_debugfs_remove(struct rknpu_debugger *debugger) +{ + rknpu_debugfs_remove_files(debugger); + + return 0; +} + +static int rknpu_debugfs_init(struct rknpu_debugger *debugger) +{ + int ret; + + debugger->debugfs_dir = + debugfs_create_dir(RKNPU_DEBUGGER_ROOT_NAME, NULL); + if (IS_ERR_OR_NULL(debugger->debugfs_dir)) { + LOG_ERROR("failed on mkdir /sys/kernel/debug/%s\n", + RKNPU_DEBUGGER_ROOT_NAME); + debugger->debugfs_dir = NULL; + return -EIO; + } + + ret = rknpu_debugfs_create_files(rknpu_debugger_root_list, + ARRAY_SIZE(rknpu_debugger_root_list), + debugger->debugfs_dir, debugger); + if (ret) { + LOG_ERROR( + "Could not install rknpu_debugger_root_list debugfs\n"); + goto CREATE_FAIL; + } + + return 0; + +CREATE_FAIL: + rknpu_debugfs_remove(debugger); + + return ret; +} +#endif /* #ifdef CONFIG_ROCKCHIP_RKNPU_DEBUG_FS */ + +#ifdef CONFIG_ROCKCHIP_RKNPU_PROC_FS +static int rknpu_procfs_open(struct inode *inode, struct file *file) +{ + struct rknpu_debugger_node *node = PDE_DATA(inode); + + return single_open(file, node->info_ent->show, node); +} + +static const struct proc_ops rknpu_procfs_fops = { + .proc_open = rknpu_procfs_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, + .proc_write = rknpu_debugger_write, +}; + +static int rknpu_procfs_remove_files(struct rknpu_debugger *debugger) +{ + struct rknpu_debugger_node *pos, *q; + struct list_head *entry_list; + + mutex_lock(&debugger->procfs_lock); + + /* Delete procfs entry list */ + entry_list = &debugger->procfs_entry_list; + list_for_each_entry_safe(pos, q, entry_list, list) { + if (pos->pent == NULL) + continue; + list_del(&pos->list); + kfree(pos); + pos = NULL; + } + + /* Delete all procfs node in this directory */ + proc_remove(debugger->procfs_dir); + debugger->procfs_dir = NULL; + + mutex_unlock(&debugger->procfs_lock); + + return 0; +} + +static int rknpu_procfs_create_files(const struct rknpu_debugger_list *files, + int count, struct proc_dir_entry *root, + struct rknpu_debugger *debugger) +{ + int i; + struct proc_dir_entry *ent; + struct rknpu_debugger_node *tmp; + + for (i = 0; i < count; i++) { + tmp = kmalloc(sizeof(struct rknpu_debugger_node), GFP_KERNEL); + if (tmp == NULL) { + LOG_ERROR("Cannot alloc node path for /proc/%s/%s\n", + RKNPU_DEBUGGER_ROOT_NAME, files[i].name); + goto MALLOC_FAIL; + } + + tmp->info_ent = &files[i]; + tmp->debugger = debugger; + + ent = proc_create_data(files[i].name, S_IFREG | S_IRUGO, root, + &rknpu_procfs_fops, tmp); + if (!ent) { + LOG_ERROR("Cannot create /proc/%s/%s\n", + RKNPU_DEBUGGER_ROOT_NAME, files[i].name); + goto CREATE_FAIL; + } + + tmp->pent = ent; + + mutex_lock(&debugger->procfs_lock); + list_add_tail(&tmp->list, &debugger->procfs_entry_list); + mutex_unlock(&debugger->procfs_lock); + } + + return 0; + +CREATE_FAIL: + kfree(tmp); +MALLOC_FAIL: + rknpu_procfs_remove_files(debugger); + return -1; +} + +int rknpu_procfs_remove(struct rknpu_debugger *debugger) +{ + rknpu_procfs_remove_files(debugger); + + return 0; +} + +int rknpu_procfs_init(struct rknpu_debugger *debugger) +{ + int ret; + + debugger->procfs_dir = proc_mkdir(RKNPU_DEBUGGER_ROOT_NAME, NULL); + if (IS_ERR_OR_NULL(debugger->procfs_dir)) { + pr_err("failed on mkdir /proc/%s\n", RKNPU_DEBUGGER_ROOT_NAME); + debugger->procfs_dir = NULL; + return -EIO; + } + + ret = rknpu_procfs_create_files(rknpu_debugger_root_list, + ARRAY_SIZE(rknpu_debugger_root_list), + debugger->procfs_dir, debugger); + if (ret) { + pr_err("Could not install rknpu_debugger_root_list procfs\n"); + goto CREATE_FAIL; + } + + return 0; + +CREATE_FAIL: + rknpu_procfs_remove(debugger); + + return ret; +} +#endif /* #ifdef CONFIG_ROCKCHIP_RKNPU_PROC_FS */ + +int rknpu_debugger_init(struct rknpu_device *rknpu_dev) +{ +#ifdef CONFIG_ROCKCHIP_RKNPU_DEBUG_FS + mutex_init(&rknpu_dev->debugger.debugfs_lock); + INIT_LIST_HEAD(&rknpu_dev->debugger.debugfs_entry_list); + rknpu_debugfs_init(&rknpu_dev->debugger); +#endif +#ifdef CONFIG_ROCKCHIP_RKNPU_PROC_FS + mutex_init(&rknpu_dev->debugger.procfs_lock); + INIT_LIST_HEAD(&rknpu_dev->debugger.procfs_entry_list); + rknpu_procfs_init(&rknpu_dev->debugger); +#endif + return 0; +} + +int rknpu_debugger_remove(struct rknpu_device *rknpu_dev) +{ +#ifdef CONFIG_ROCKCHIP_RKNPU_DEBUG_FS + rknpu_debugfs_remove(&rknpu_dev->debugger); +#endif +#ifdef CONFIG_ROCKCHIP_RKNPU_PROC_FS + rknpu_procfs_remove(&rknpu_dev->debugger); +#endif + return 0; +} diff --git a/drivers/rknpu/rknpu_drv.c b/drivers/rknpu/rknpu_drv.c index 30f8f9c1dd06..626907176f53 100644 --- a/drivers/rknpu/rknpu_drv.c +++ b/drivers/rknpu/rknpu_drv.c @@ -33,11 +33,6 @@ #include #include -#include -#include -#include -#include - #ifndef FPGA_PLATFORM #include #include @@ -46,11 +41,23 @@ #include "rknpu_ioctl.h" #include "rknpu_reset.h" -#include "rknpu_gem.h" #include "rknpu_fence.h" #include "rknpu_drv.h" -#define POWER_DOWN_FREQ 200000000 +#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM +#include +#include +#include +#include +#include "rknpu_gem.h" +#endif + +#ifdef CONFIG_ROCKCHIP_RKNPU_DMA_HEAP +#include +#include "rknpu_mem.h" +#endif + +#define POWER_DOWN_FREQ 200000000 static int bypass_irq_handler; module_param(bypass_irq_handler, int, 0644); @@ -68,7 +75,7 @@ struct npu_irqs_data { }; static const struct npu_irqs_data rk356x_npu_irqs[] = { - { "npu0_irq", rknpu_core0_irq_handler } + { "npu_irq", rknpu_core0_irq_handler } }; static const struct npu_irqs_data rk3588_npu_irqs[] = { @@ -77,6 +84,10 @@ static const struct npu_irqs_data rk3588_npu_irqs[] = { { "npu2_irq", rknpu_core2_irq_handler } }; +static const struct npu_irqs_data rv110x_npu_irqs[] = { + { "npu_irq", rknpu_core0_irq_handler } +}; + static const struct npu_reset_data rk356x_npu_resets[] = { { "srst_a", "srst_h" } }; @@ -86,11 +97,16 @@ static const struct npu_reset_data rk3588_npu_resets[] = { { "srst_a2", "srst_h2" } }; +static const struct npu_reset_data rv110x_npu_resets[] = { { "srst_a", + "srst_h" } }; + static const struct rknpu_config rk356x_rknpu_config = { .bw_priority_addr = 0xfe180008, .bw_priority_length = 0x10, .dma_mask = DMA_BIT_MASK(32), - .pc_data_extra_amount = 4, + .pc_data_amount_scale = 1, + .pc_task_number_bits = 12, + .pc_task_number_mask = 0xfff, .bw_enable = 1, .irqs = rk356x_npu_irqs, .resets = rk356x_npu_resets, @@ -102,7 +118,9 @@ static const struct rknpu_config rk3588_rknpu_config = { .bw_priority_addr = 0x0, .bw_priority_length = 0x0, .dma_mask = DMA_BIT_MASK(40), - .pc_data_extra_amount = 2, + .pc_data_amount_scale = 2, + .pc_task_number_bits = 12, + .pc_task_number_mask = 0xfff, .bw_enable = 0, .irqs = rk3588_npu_irqs, .resets = rk3588_npu_resets, @@ -110,6 +128,20 @@ static const struct rknpu_config rk3588_rknpu_config = { .num_resets = ARRAY_SIZE(rk3588_npu_resets) }; +static const struct rknpu_config rv1106_rknpu_config = { + .bw_priority_addr = 0x0, + .bw_priority_length = 0x0, + .dma_mask = DMA_BIT_MASK(32), + .pc_data_amount_scale = 2, + .pc_task_number_bits = 16, + .pc_task_number_mask = 0xffff, + .bw_enable = 1, + .irqs = rv110x_npu_irqs, + .resets = rv110x_npu_resets, + .num_irqs = ARRAY_SIZE(rv110x_npu_irqs), + .num_resets = ARRAY_SIZE(rv110x_npu_resets) +}; + /* driver probe and init */ static const struct of_device_id rknpu_of_match[] = { { @@ -124,83 +156,13 @@ static const struct of_device_id rknpu_of_match[] = { .compatible = "rockchip,rk3588-rknpu", .data = &rk3588_rknpu_config, }, + { + .compatible = "rockchip,rv1106-rknpu", + .data = &rv1106_rknpu_config, + }, {}, }; -static const struct vm_operations_struct rknpu_gem_vm_ops = { - .fault = rknpu_gem_fault, - .open = drm_gem_vm_open, - .close = drm_gem_vm_close, -}; - -static int rknpu_action_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv); - -static const struct drm_ioctl_desc rknpu_ioctls[] = { - DRM_IOCTL_DEF_DRV(RKNPU_ACTION, rknpu_action_ioctl, DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(RKNPU_SUBMIT, rknpu_submit_ioctl, DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(RKNPU_MEM_CREATE, rknpu_gem_create_ioctl, - DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(RKNPU_MEM_MAP, rknpu_gem_map_ioctl, DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(RKNPU_MEM_DESTROY, rknpu_gem_destroy_ioctl, - DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(RKNPU_MEM_SYNC, rknpu_gem_sync_ioctl, - DRM_RENDER_ALLOW), -}; - -static const struct file_operations rknpu_drm_driver_fops = { - .owner = THIS_MODULE, - .open = drm_open, - .mmap = rknpu_gem_mmap, - .poll = drm_poll, - .read = drm_read, - .unlocked_ioctl = drm_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = drm_compat_ioctl, -#endif - .release = drm_release, - .llseek = noop_llseek, -}; - -static struct drm_driver rknpu_drm_driver = { -#if KERNEL_VERSION(5, 4, 0) <= LINUX_VERSION_CODE - .driver_features = DRIVER_GEM | DRIVER_RENDER, -#else - .driver_features = DRIVER_GEM | DRIVER_PRIME | DRIVER_RENDER, -#endif - .gem_free_object_unlocked = rknpu_gem_free_object, - .gem_vm_ops = &rknpu_gem_vm_ops, - .dumb_create = rknpu_gem_dumb_create, -#if KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE - .dumb_map_offset = rknpu_gem_dumb_map_offset, -#else - .dumb_map_offset = drm_gem_dumb_map_offset, -#endif - .dumb_destroy = drm_gem_dumb_destroy, - .prime_handle_to_fd = drm_gem_prime_handle_to_fd, - .prime_fd_to_handle = drm_gem_prime_fd_to_handle, - .gem_prime_export = drm_gem_prime_export, -#if KERNEL_VERSION(4, 13, 0) <= LINUX_VERSION_CODE - .gem_prime_import = rknpu_gem_prime_import, -#else - .gem_prime_import = drm_gem_prime_import, -#endif - .gem_prime_get_sg_table = rknpu_gem_prime_get_sg_table, - .gem_prime_import_sg_table = rknpu_gem_prime_import_sg_table, - .gem_prime_vmap = rknpu_gem_prime_vmap, - .gem_prime_vunmap = rknpu_gem_prime_vunmap, - .gem_prime_mmap = rknpu_gem_prime_mmap, - .ioctls = rknpu_ioctls, - .num_ioctls = ARRAY_SIZE(rknpu_ioctls), - .fops = &rknpu_drm_driver_fops, - .name = DRIVER_NAME, - .desc = DRIVER_DESC, - .date = DRIVER_DATE, - .major = DRIVER_MAJOR, - .minor = DRIVER_MINOR, - .patchlevel = DRIVER_PATCHLEVEL, -}; - static int rknpu_get_drv_version(uint32_t *version) { *version = RKNPU_GET_DRV_VERSION_CODE(DRIVER_MAJOR, DRIVER_MINOR, @@ -208,13 +170,56 @@ static int rknpu_get_drv_version(uint32_t *version) return 0; } -static int rknpu_action_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) +static int rknpu_power_on(struct rknpu_device *rknpu_dev); +static int rknpu_power_off(struct rknpu_device *rknpu_dev); + +static void rknpu_power_off_delay_work(struct work_struct *power_off_work) +{ + struct rknpu_device *rknpu_dev = + container_of(to_delayed_work(power_off_work), + struct rknpu_device, power_off_work); + mutex_lock(&rknpu_dev->power_lock); + if (atomic_read(&rknpu_dev->power_refcount) == 0 && + rknpu_dev->is_powered) { + rknpu_dev->is_powered = false; + rknpu_power_off(rknpu_dev); + } + mutex_unlock(&rknpu_dev->power_lock); +} + +static int rknpu_action(struct rknpu_device *rknpu_dev, + struct rknpu_action *args) { - struct rknpu_device *rknpu_dev = dev_get_drvdata(dev->dev); - struct rknpu_action *args = data; int ret = -EINVAL; + switch (args->flags) { + case RKNPU_POWER_ON: + atomic_inc(&rknpu_dev->power_refcount); + mutex_lock(&rknpu_dev->power_lock); + if (!rknpu_dev->is_powered) { + rknpu_dev->is_powered = true; + ret = rknpu_power_on(rknpu_dev); + } + mutex_unlock(&rknpu_dev->power_lock); + break; + case RKNPU_POWER_OFF: + if (rknpu_dev->is_powered && + atomic_dec_if_positive(&rknpu_dev->power_refcount) == 0) + queue_delayed_work(rknpu_dev->power_off_wq, + &rknpu_dev->power_off_work, + msecs_to_jiffies(1000)); + break; + default: + /* default open rknpu power to compatible with librknnrt.so version before 1.2.0 */ + mutex_lock(&rknpu_dev->power_lock); + if (!rknpu_dev->is_powered) { + atomic_inc(&rknpu_dev->power_refcount); + rknpu_dev->is_powered = true; + ret = rknpu_power_on(rknpu_dev); + } + mutex_unlock(&rknpu_dev->power_lock); + } + switch (args->flags) { case RKNPU_GET_HW_VERSION: ret = rknpu_get_hw_version(rknpu_dev, &args->value); @@ -289,6 +294,215 @@ static int rknpu_action_ioctl(struct drm_device *dev, void *data, return ret; } +#ifdef CONFIG_ROCKCHIP_RKNPU_DMA_HEAP +static int rknpu_open(struct inode *inode, struct file *file) +{ + return nonseekable_open(inode, file); +} + +static int rknpu_release(struct inode *inode, struct file *file) +{ + return 0; +} + +static int rknpu_action_ioctl(struct rknpu_device *rknpu_dev, + unsigned long data) +{ + struct rknpu_action args; + int ret = -EINVAL; + + if (unlikely(copy_from_user(&args, (struct rknpu_action *)data, + sizeof(struct rknpu_action)))) { + LOG_ERROR("%s: copy_from_user failed\n", __func__); + ret = -EFAULT; + return ret; + } + + ret = rknpu_action(rknpu_dev, &args); + + if (unlikely(copy_to_user((struct rknpu_action *)data, &args, + sizeof(struct rknpu_action)))) { + LOG_ERROR("%s: copy_to_user failed\n", __func__); + ret = -EFAULT; + return ret; + } + + return ret; +} + +static long rknpu_ioctl(struct file *file, uint32_t cmd, unsigned long arg) +{ + long ret = 0; + struct rknpu_device *rknpu_dev = + container_of(file->private_data, struct rknpu_device, miscdev); + switch (cmd) { + case IOCTL_RKNPU_ACTION: + ret = rknpu_action_ioctl(rknpu_dev, arg); + break; + case IOCTL_RKNPU_SUBMIT: + ret = rknpu_submit_ioctl(rknpu_dev, arg); + break; + case IOCTL_RKNPU_MEM_CREATE: + ret = rknpu_mem_create_ioctl(rknpu_dev, arg); + break; + case RKNPU_MEM_MAP: + break; + case IOCTL_RKNPU_MEM_DESTROY: + ret = rknpu_mem_destroy_ioctl(rknpu_dev, arg); + break; + case IOCTL_RKNPU_MEM_SYNC: + ret = rknpu_mem_sync_ioctl(rknpu_dev, arg); + break; + default: + break; + } + return ret; +} +const struct file_operations rknpu_fops = { + .owner = THIS_MODULE, + .open = rknpu_open, + .release = rknpu_release, + .unlocked_ioctl = rknpu_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = rknpu_ioctl, +#endif +}; +#endif + +#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM +static const struct vm_operations_struct rknpu_gem_vm_ops = { + .fault = rknpu_gem_fault, + .open = drm_gem_vm_open, + .close = drm_gem_vm_close, +}; + +static int rknpu_action_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); + +static const struct drm_ioctl_desc rknpu_ioctls[] = { + DRM_IOCTL_DEF_DRV(RKNPU_ACTION, rknpu_action_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(RKNPU_SUBMIT, rknpu_submit_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(RKNPU_MEM_CREATE, rknpu_gem_create_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(RKNPU_MEM_MAP, rknpu_gem_map_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(RKNPU_MEM_DESTROY, rknpu_gem_destroy_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(RKNPU_MEM_SYNC, rknpu_gem_sync_ioctl, + DRM_RENDER_ALLOW), +}; + +static const struct file_operations rknpu_drm_driver_fops = { + .owner = THIS_MODULE, + .open = drm_open, + .mmap = rknpu_gem_mmap, + .poll = drm_poll, + .read = drm_read, + .unlocked_ioctl = drm_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = drm_compat_ioctl, +#endif + .release = drm_release, + .llseek = noop_llseek, +}; + +static struct drm_driver rknpu_drm_driver = { +#if KERNEL_VERSION(5, 4, 0) <= LINUX_VERSION_CODE + .driver_features = DRIVER_GEM | DRIVER_RENDER, +#else + .driver_features = DRIVER_GEM | DRIVER_PRIME | DRIVER_RENDER, +#endif + .gem_free_object_unlocked = rknpu_gem_free_object, + .gem_vm_ops = &rknpu_gem_vm_ops, + .dumb_create = rknpu_gem_dumb_create, +#if KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE + .dumb_map_offset = rknpu_gem_dumb_map_offset, +#else + .dumb_map_offset = drm_gem_dumb_map_offset, +#endif + .dumb_destroy = drm_gem_dumb_destroy, + .prime_handle_to_fd = drm_gem_prime_handle_to_fd, + .prime_fd_to_handle = drm_gem_prime_fd_to_handle, + .gem_prime_export = drm_gem_prime_export, +#if KERNEL_VERSION(4, 13, 0) <= LINUX_VERSION_CODE + .gem_prime_import = rknpu_gem_prime_import, +#else + .gem_prime_import = drm_gem_prime_import, +#endif + .gem_prime_get_sg_table = rknpu_gem_prime_get_sg_table, + .gem_prime_import_sg_table = rknpu_gem_prime_import_sg_table, + .gem_prime_vmap = rknpu_gem_prime_vmap, + .gem_prime_vunmap = rknpu_gem_prime_vunmap, + .gem_prime_mmap = rknpu_gem_prime_mmap, + .ioctls = rknpu_ioctls, + .num_ioctls = ARRAY_SIZE(rknpu_ioctls), + .fops = &rknpu_drm_driver_fops, + .name = DRIVER_NAME, + .desc = DRIVER_DESC, + .date = DRIVER_DATE, + .major = DRIVER_MAJOR, + .minor = DRIVER_MINOR, + .patchlevel = DRIVER_PATCHLEVEL, +}; + +static int rknpu_action_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct rknpu_device *rknpu_dev = dev_get_drvdata(dev->dev); + + return rknpu_action(rknpu_dev, (struct rknpu_action *)data); +} + +#endif + +static enum hrtimer_restart hrtimer_handler(struct hrtimer *timer) +{ + struct rknpu_device *rknpu_dev = + container_of(timer, struct rknpu_device, timer); + struct rknpu_subcore_data *subcore_data = NULL; + struct rknpu_job *job = NULL; + ktime_t now = ktime_get(); + unsigned long flags; + int i; + + for (i = 0; i < rknpu_dev->config->num_irqs; i++) { + subcore_data = &rknpu_dev->subcore_datas[i]; + + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); + + job = subcore_data->job; + if (job) { + subcore_data->timer.busy_time += + ktime_us_delta(now, job->hw_recoder_time); + job->hw_recoder_time = ktime_get(); + } + + subcore_data->timer.busy_time_record = + subcore_data->timer.busy_time; + subcore_data->timer.busy_time = 0; + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); + } + + hrtimer_forward_now(timer, rknpu_dev->kt); + return HRTIMER_RESTART; +} + +static void rknpu_init_timer(struct rknpu_device *rknpu_dev) +{ + rknpu_dev->kt = ktime_set(0, RKNPU_LOAD_INTERVAL); + hrtimer_init(&rknpu_dev->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + rknpu_dev->timer.function = hrtimer_handler; + hrtimer_start(&rknpu_dev->timer, rknpu_dev->kt, HRTIMER_MODE_REL); +} + +static void rknpu_cancel_timer(struct rknpu_device *rknpu_dev) +{ + hrtimer_cancel(&rknpu_dev->timer); +} + +#ifdef CONFIG_ROCKCHIP_RKNPU_DMA_HEAP + +#endif + static bool rknpu_is_iommu_enable(struct device *dev) { struct device_node *iommu = NULL; @@ -314,42 +528,7 @@ static bool rknpu_is_iommu_enable(struct device *dev) return true; } -static int drm_fake_dev_register(struct rknpu_device *rknpu_dev) -{ - const struct platform_device_info rknpu_dev_info = { - .name = "rknpu_dev", - .id = PLATFORM_DEVID_AUTO, - .dma_mask = rknpu_dev->config->dma_mask, - }; - struct platform_device *pdev = NULL; - int ret = -EINVAL; - - pdev = platform_device_register_full(&rknpu_dev_info); - if (pdev) { - ret = of_dma_configure(&pdev->dev, NULL, true); - if (ret) { - platform_device_unregister(pdev); - pdev = NULL; - } - } - - rknpu_dev->fake_dev = pdev ? &pdev->dev : NULL; - - return ret; -} - -static void drm_fake_dev_unregister(struct rknpu_device *rknpu_dev) -{ - struct platform_device *pdev = NULL; - - if (!rknpu_dev->fake_dev) - return; - - pdev = to_platform_device(rknpu_dev->fake_dev); - - platform_device_unregister(pdev); -} - +#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM static int rknpu_drm_probe(struct rknpu_device *rknpu_dev) { struct device *dev = rknpu_dev->dev; @@ -368,8 +547,6 @@ static int rknpu_drm_probe(struct rknpu_device *rknpu_dev) drm_dev->dev_private = rknpu_dev; rknpu_dev->drm_dev = drm_dev; - drm_fake_dev_register(rknpu_dev); - return 0; err_free_drm: @@ -386,8 +563,6 @@ static void rknpu_drm_remove(struct rknpu_device *rknpu_dev) { struct drm_device *drm_dev = rknpu_dev->drm_dev; - drm_fake_dev_unregister(rknpu_dev); - drm_dev_unregister(drm_dev); #if KERNEL_VERSION(4, 15, 0) <= LINUX_VERSION_CODE @@ -396,6 +571,7 @@ static void rknpu_drm_remove(struct rknpu_device *rknpu_dev) drm_dev_unref(drm_dev); #endif } +#endif static int rknpu_power_on(struct rknpu_device *rknpu_dev) { @@ -403,12 +579,15 @@ static int rknpu_power_on(struct rknpu_device *rknpu_dev) int ret = -EINVAL; #ifndef FPGA_PLATFORM - ret = regulator_enable(rknpu_dev->vdd); - if (ret) { - LOG_DEV_ERROR(dev, - "failed to enable vdd reg for rknpu, ret = %d\n", - ret); - return ret; + if (rknpu_dev->vdd) { + ret = regulator_enable(rknpu_dev->vdd); + if (ret) { + LOG_DEV_ERROR( + dev, + "failed to enable vdd reg for rknpu, ret = %d\n", + ret); + return ret; + } } if (rknpu_dev->mem) { @@ -432,8 +611,12 @@ static int rknpu_power_on(struct rknpu_device *rknpu_dev) if (rknpu_dev->multiple_domains) { if (rknpu_dev->genpd_dev_npu0) { +#if KERNEL_VERSION(5, 4, 0) < LINUX_VERSION_CODE ret = pm_runtime_resume_and_get( rknpu_dev->genpd_dev_npu0); +#else + ret = pm_runtime_get_sync(rknpu_dev->genpd_dev_npu0); +#endif if (ret < 0) { LOG_DEV_ERROR( dev, @@ -443,8 +626,12 @@ static int rknpu_power_on(struct rknpu_device *rknpu_dev) } } if (rknpu_dev->genpd_dev_npu1) { +#if KERNEL_VERSION(5, 4, 0) < LINUX_VERSION_CODE ret = pm_runtime_resume_and_get( rknpu_dev->genpd_dev_npu1); +#else + ret = pm_runtime_get_sync(rknpu_dev->genpd_dev_npu1); +#endif if (ret < 0) { LOG_DEV_ERROR( dev, @@ -454,8 +641,12 @@ static int rknpu_power_on(struct rknpu_device *rknpu_dev) } } if (rknpu_dev->genpd_dev_npu2) { +#if KERNEL_VERSION(5, 4, 0) < LINUX_VERSION_CODE ret = pm_runtime_resume_and_get( rknpu_dev->genpd_dev_npu2); +#else + ret = pm_runtime_get_sync(rknpu_dev->genpd_dev_npu2); +#endif if (ret < 0) { LOG_DEV_ERROR( dev, @@ -482,6 +673,15 @@ static int rknpu_power_off(struct rknpu_device *rknpu_dev) pm_runtime_put_sync(dev); + /* + * Because IOMMU's runtime suspend callback is asynchronous, + * So it may be executed after the NPU is turned off after PD/CLK/VD, + * and the runtime suspend callback has a register access. + * If the PD/VD/CLK is closed, the register access will crash. + * So add a delay to avoid this problem. + */ + msleep(20); + if (rknpu_dev->multiple_domains) { if (rknpu_dev->genpd_dev_npu2) pm_runtime_put_sync(rknpu_dev->genpd_dev_npu2); @@ -494,7 +694,8 @@ static int rknpu_power_off(struct rknpu_device *rknpu_dev) clk_bulk_disable_unprepare(rknpu_dev->num_clks, rknpu_dev->clks); #ifndef FPGA_PLATFORM - regulator_disable(rknpu_dev->vdd); + if (rknpu_dev->vdd) + regulator_disable(rknpu_dev->vdd); if (rknpu_dev->mem) regulator_disable(rknpu_dev->mem); @@ -508,9 +709,12 @@ static struct monitor_dev_profile npu_mdevp = { .type = MONITOR_TPYE_DEV, .low_temp_adjust = rockchip_monitor_dev_low_temp_adjust, .high_temp_adjust = rockchip_monitor_dev_high_temp_adjust, +#if KERNEL_VERSION(5, 10, 0) <= LINUX_VERSION_CODE .update_volt = rockchip_monitor_check_rate_volt, +#endif }; +#if KERNEL_VERSION(5, 10, 0) <= LINUX_VERSION_CODE static int npu_opp_helper(struct dev_pm_set_opp_data *data) { struct device *dev = data->dev; @@ -528,7 +732,7 @@ static int npu_opp_helper(struct dev_pm_set_opp_data *data) u32 target_rm = UINT_MAX; int ret = 0; - ret = clk_bulk_prepare_enable(opp_info->num_clks, opp_info->clks); + ret = clk_bulk_prepare_enable(opp_info->num_clks, opp_info->clks); if (ret < 0) { LOG_DEV_ERROR(dev, "failed to enable opp clks\n"); return ret; @@ -565,7 +769,7 @@ static int npu_opp_helper(struct dev_pm_set_opp_data *data) LOG_DEV_ERROR(dev, "failed to set clk rate: %d\n", ret); goto restore_rm; } - /* Scaling down? Scale voltage after frequency */ + /* Scaling down? Scale voltage after frequency */ } else { rockchip_set_intermediate_rate(dev, opp_info, clk, old_freq, new_freq, false, true); @@ -644,6 +848,86 @@ static int npu_devfreq_target(struct device *dev, unsigned long *freq, return ret; } +#else + +static int npu_devfreq_target(struct device *dev, unsigned long *target_freq, + u32 flags) +{ + struct rknpu_device *rknpu_dev = dev_get_drvdata(dev); + struct dev_pm_opp *opp = NULL; + unsigned long freq = *target_freq; + unsigned long old_freq = rknpu_dev->current_freq; + unsigned long volt, old_volt = rknpu_dev->current_volt; + int ret = -EINVAL; + +#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE + rcu_read_lock(); +#endif + + opp = devfreq_recommended_opp(dev, &freq, flags); + if (IS_ERR(opp)) { +#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE + rcu_read_unlock(); +#endif + LOG_DEV_ERROR(dev, "failed to get opp (%ld)\n", PTR_ERR(opp)); + return PTR_ERR(opp); + } + volt = dev_pm_opp_get_voltage(opp); +#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE + rcu_read_unlock(); +#endif + + /* + * Only update if there is a change of frequency + */ + if (old_freq == freq) { + *target_freq = freq; + if (old_volt == volt) + return 0; + ret = regulator_set_voltage(rknpu_dev->vdd, volt, INT_MAX); + if (ret) { + LOG_DEV_ERROR(dev, "failed to set volt %lu\n", volt); + return ret; + } + rknpu_dev->current_volt = volt; + return 0; + } + + if (rknpu_dev->vdd && old_volt != volt && old_freq < freq) { + ret = regulator_set_voltage(rknpu_dev->vdd, volt, INT_MAX); + if (ret) { + LOG_DEV_ERROR(dev, "failed to increase volt %lu\n", + volt); + return ret; + } + } + LOG_DEV_DEBUG(dev, "%luHz %luuV -> %luHz %luuV\n", old_freq, old_volt, + freq, volt); + ret = clk_set_rate(rknpu_dev->clks[0].clk, freq); + if (ret) { + LOG_DEV_ERROR(dev, "failed to set clock %lu\n", freq); + return ret; + } + *target_freq = freq; + rknpu_dev->current_freq = freq; + + if (rknpu_dev->devfreq) + rknpu_dev->devfreq->last_status.current_frequency = freq; + + if (rknpu_dev->vdd && old_volt != volt && old_freq > freq) { + ret = regulator_set_voltage(rknpu_dev->vdd, volt, INT_MAX); + if (ret) { + LOG_DEV_ERROR(dev, "failed to decrease volt %lu\n", + volt); + return ret; + } + } + rknpu_dev->current_volt = volt; + + return ret; +} +#endif + static int npu_devfreq_get_dev_status(struct device *dev, struct devfreq_dev_status *stat) { @@ -682,6 +966,7 @@ static struct devfreq_cooling_power npu_cooling_power = { .get_static_power = &npu_get_static_power, }; +#if KERNEL_VERSION(5, 10, 0) <= LINUX_VERSION_CODE static int rk3588_npu_set_read_margin(struct device *dev, struct rockchip_opp_info *opp_info, u32 rm) @@ -752,8 +1037,8 @@ static int rknpu_devfreq_init(struct rknpu_device *rknpu_dev) } rockchip_get_opp_data(rockchip_npu_of_match, &rknpu_dev->opp_info); - ret = rockchip_init_opp_table(dev, &rknpu_dev->opp_info, - "npu_leakage", "rknpu"); + ret = rockchip_init_opp_table(dev, &rknpu_dev->opp_info, "npu_leakage", + "rknpu"); if (ret) { LOG_DEV_ERROR(dev, "failed to init_opp_table\n"); return ret; @@ -817,6 +1102,125 @@ static int rknpu_devfreq_init(struct rknpu_device *rknpu_dev) out: return 0; } + +#else + +static int npu_devfreq_adjust_current_freq_volt(struct device *dev, + struct rknpu_device *rknpu_dev) +{ + unsigned long volt, old_freq, freq; + struct dev_pm_opp *opp = NULL; + int ret = -EINVAL; + + old_freq = clk_get_rate(rknpu_dev->clks[0].clk); + freq = old_freq; + +#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE + rcu_read_lock(); +#endif + + opp = devfreq_recommended_opp(dev, &freq, 0); + volt = dev_pm_opp_get_voltage(opp); + +#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE + rcu_read_unlock(); +#endif + + if (freq >= old_freq && rknpu_dev->vdd) { + ret = regulator_set_voltage(rknpu_dev->vdd, volt, INT_MAX); + if (ret) { + LOG_DEV_ERROR(dev, "failed to set volt %lu\n", volt); + return ret; + } + } + LOG_DEV_DEBUG(dev, "adjust current freq=%luHz, volt=%luuV\n", freq, + volt); + ret = clk_set_rate(rknpu_dev->clks[0].clk, freq); + if (ret) { + LOG_DEV_ERROR(dev, "failed to set clock %lu\n", freq); + return ret; + } + if (freq < old_freq && rknpu_dev->vdd) { + ret = regulator_set_voltage(rknpu_dev->vdd, volt, INT_MAX); + if (ret) { + LOG_DEV_ERROR(dev, "failed to set volt %lu\n", volt); + return ret; + } + } + rknpu_dev->current_freq = freq; + rknpu_dev->current_volt = volt; + + return 0; +} + +static int rknpu_devfreq_init(struct rknpu_device *rknpu_dev) +{ + struct device *dev = rknpu_dev->dev; + struct devfreq_dev_profile *dp = &npu_devfreq_profile; + int ret = -EINVAL; + + ret = rockchip_init_opp_table(dev, NULL, "npu_leakage", "rknpu"); + + if (ret) { + LOG_DEV_ERROR(dev, "failed to init_opp_table\n"); + return ret; + } + + ret = npu_devfreq_adjust_current_freq_volt(dev, rknpu_dev); + if (ret) { + LOG_DEV_ERROR(dev, "failed to adjust current freq volt\n"); + return ret; + } + dp->initial_freq = rknpu_dev->current_freq; + + rknpu_dev->devfreq = + devm_devfreq_add_device(dev, dp, "userspace", NULL); + if (IS_ERR(rknpu_dev->devfreq)) { + LOG_DEV_ERROR(dev, "failed to add devfreq\n"); + return PTR_ERR(rknpu_dev->devfreq); + } + devm_devfreq_register_opp_notifier(dev, rknpu_dev->devfreq); + + rknpu_dev->devfreq->last_status.current_frequency = dp->initial_freq; + rknpu_dev->devfreq->last_status.total_time = 1; + rknpu_dev->devfreq->last_status.busy_time = 1; + + npu_mdevp.data = rknpu_dev->devfreq; + rknpu_dev->mdev_info = + rockchip_system_monitor_register(dev, &npu_mdevp); + if (IS_ERR(rknpu_dev->mdev_info)) { + LOG_DEV_DEBUG(dev, "without system monitor\n"); + rknpu_dev->mdev_info = NULL; + } + rknpu_dev->current_freq = clk_get_rate(rknpu_dev->clks[0].clk); + rknpu_dev->current_volt = regulator_get_voltage(rknpu_dev->vdd); + + of_property_read_u32(dev->of_node, "dynamic-power-coefficient", + (u32 *)&npu_cooling_power.dyn_power_coeff); + rknpu_dev->model_data = + rockchip_ipa_power_model_init(dev, "npu_leakage"); + if (IS_ERR_OR_NULL(rknpu_dev->model_data)) { + rknpu_dev->model_data = NULL; + LOG_DEV_ERROR(dev, "failed to initialize power model\n"); + } else if (rknpu_dev->model_data->dynamic_coefficient) { + npu_cooling_power.dyn_power_coeff = + rknpu_dev->model_data->dynamic_coefficient; + } + + if (!npu_cooling_power.dyn_power_coeff) { + LOG_DEV_ERROR(dev, "failed to get dynamic-coefficient\n"); + goto out; + } + + rknpu_dev->devfreq_cooling = of_devfreq_cooling_register_power( + dev->of_node, rknpu_dev->devfreq, &npu_cooling_power); + if (IS_ERR_OR_NULL(rknpu_dev->devfreq_cooling)) + LOG_DEV_ERROR(dev, "failed to register cooling device\n"); + +out: + return 0; +} +#endif #endif static int rknpu_register_irq(struct platform_device *pdev, @@ -920,10 +1324,19 @@ static int rknpu_probe(struct platform_device *pdev) rknpu_reset_get(rknpu_dev); rknpu_dev->num_clks = devm_clk_bulk_get_all(dev, &rknpu_dev->clks); - if (strstr(__clk_get_name(rknpu_dev->clks[0].clk), "scmi")) - rknpu_dev->opp_info.scmi_clk = rknpu_dev->clks[0].clk; + if (rknpu_dev->num_clks < 1) { + LOG_DEV_ERROR(dev, "failed to get clk source for rknpu\n"); +#ifndef FPGA_PLATFORM + return -ENODEV; +#endif + } #ifndef FPGA_PLATFORM +#if KERNEL_VERSION(5, 10, 0) <= LINUX_VERSION_CODE + if (strstr(__clk_get_name(rknpu_dev->clks[0].clk), "scmi")) + rknpu_dev->opp_info.scmi_clk = rknpu_dev->clks[0].clk; +#endif + rknpu_dev->vdd = devm_regulator_get_optional(dev, "rknpu"); if (IS_ERR(rknpu_dev->vdd)) { if (PTR_ERR(rknpu_dev->vdd) != -ENODEV) { @@ -953,6 +1366,7 @@ static int rknpu_probe(struct platform_device *pdev) spin_lock_init(&rknpu_dev->lock); spin_lock_init(&rknpu_dev->irq_lock); + mutex_init(&rknpu_dev->power_lock); for (i = 0; i < config->num_irqs; i++) { INIT_LIST_HEAD(&rknpu_dev->subcore_datas[i].todo_list); init_waitqueue_head(&rknpu_dev->subcore_datas[i].job_done_wq); @@ -990,21 +1404,56 @@ static int rknpu_probe(struct platform_device *pdev) } } - if (!rknpu_dev->bypass_irq_handler) - rknpu_register_irq(pdev, rknpu_dev); + if (!rknpu_dev->bypass_irq_handler) { + ret = rknpu_register_irq(pdev, rknpu_dev); + if (ret) + return ret; + } else { + LOG_DEV_WARN(dev, "bypass irq handler!\n"); + } +#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM ret = rknpu_drm_probe(rknpu_dev); if (ret) { LOG_DEV_ERROR(dev, "failed to probe device for rknpu\n"); return ret; } +#endif +#ifdef CONFIG_ROCKCHIP_RKNPU_DMA_HEAP + rknpu_dev->miscdev.minor = MISC_DYNAMIC_MINOR; + rknpu_dev->miscdev.name = "rknpu"; + rknpu_dev->miscdev.fops = &rknpu_fops; + ret = misc_register(&rknpu_dev->miscdev); + if (ret) { + LOG_DEV_ERROR(dev, "cannot register miscdev (%d)\n", ret); + return ret; + } + + rknpu_dev->heap = rk_dma_heap_find("rk-dma-heap-cma"); + if (!rknpu_dev->heap) { + LOG_DEV_ERROR(dev, "failed to find cma heap\n"); + return -ENOMEM; + } + rk_dma_heap_set_dev(dev); + LOG_DEV_INFO(dev, "Initialized %s: v%d.%d.%d for %s\n", DRIVER_DESC, + DRIVER_MAJOR, DRIVER_MINOR, DRIVER_PATCHLEVEL, + DRIVER_DATE); +#endif + +#ifdef CONFIG_ROCKCHIP_RKNPU_FENCE ret = rknpu_fence_context_alloc(rknpu_dev); if (ret) { LOG_DEV_ERROR(dev, "failed to allocate fence context for rknpu\n"); +#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM goto err_remove_drm; +#endif +#ifdef CONFIG_ROCKCHIP_RKNPU_DMA_HEAP + goto err_remove_misc; +#endif } +#endif platform_set_drvdata(pdev, rknpu_dev); @@ -1025,18 +1474,47 @@ static int rknpu_probe(struct platform_device *pdev) } ret = rknpu_power_on(rknpu_dev); - if (ret) + if (ret) { +#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM goto err_remove_drm; +#endif +#ifdef CONFIG_ROCKCHIP_RKNPU_DMA_HEAP + goto err_remove_misc; +#endif + } #ifndef FPGA_PLATFORM rknpu_devfreq_init(rknpu_dev); #endif + rknpu_dev->power_off_wq = + create_freezable_workqueue("rknpu_power_off_wq"); + if (!rknpu_dev->power_off_wq) { + LOG_DEV_ERROR(dev, "rknpu couldn't create power_off workqueue"); + ret = -ENOMEM; + goto err_remove_wq; + } + INIT_DEFERRABLE_WORK(&rknpu_dev->power_off_work, + rknpu_power_off_delay_work); + rknpu_dev->is_powered = false; + atomic_set(&rknpu_dev->power_refcount, 0); + rknpu_power_off(rknpu_dev); + + rknpu_debugger_init(rknpu_dev); + rknpu_init_timer(rknpu_dev); + return 0; +#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM err_remove_drm: rknpu_drm_remove(rknpu_dev); - +#endif +#ifdef CONFIG_ROCKCHIP_RKNPU_DMA_HEAP +err_remove_misc: + misc_deregister(&(rknpu_dev->miscdev)); +#endif +err_remove_wq: + destroy_workqueue(rknpu_dev->power_off_wq); return ret; } @@ -1045,14 +1523,25 @@ static int rknpu_remove(struct platform_device *pdev) struct rknpu_device *rknpu_dev = platform_get_drvdata(pdev); int i = 0; + cancel_delayed_work_sync(&rknpu_dev->power_off_work); + destroy_workqueue(rknpu_dev->power_off_wq); + + rknpu_debugger_remove(rknpu_dev); + rknpu_cancel_timer(rknpu_dev); + for (i = 0; i < rknpu_dev->config->num_irqs; i++) { WARN_ON(rknpu_dev->subcore_datas[i].job); WARN_ON(!list_empty(&rknpu_dev->subcore_datas[i].todo_list)); } +#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM rknpu_drm_remove(rknpu_dev); - - rknpu_power_off(rknpu_dev); +#endif +#ifdef CONFIG_ROCKCHIP_RKNPU_DMA_HEAP + misc_deregister(&(rknpu_dev->miscdev)); +#endif + if (rknpu_dev->is_powered) + rknpu_power_off(rknpu_dev); if (rknpu_dev->multiple_domains) { if (rknpu_dev->genpd_dev_npu0) @@ -1068,6 +1557,8 @@ static int rknpu_remove(struct platform_device *pdev) return 0; } +#ifndef FPGA_PLATFORM +#if KERNEL_VERSION(5, 10, 0) <= LINUX_VERSION_CODE static int rknpu_runtime_suspend(struct device *dev) { struct rknpu_device *rknpu_dev = dev_get_drvdata(dev); @@ -1091,7 +1582,7 @@ static int rknpu_runtime_resume(struct device *dev) if (!rknpu_dev->current_freq || !rknpu_dev->current_volt) return 0; - ret = clk_bulk_prepare_enable(opp_info->num_clks, opp_info->clks); + ret = clk_bulk_prepare_enable(opp_info->num_clks, opp_info->clks); if (ret) { LOG_DEV_ERROR(dev, "failed to enable opp clks\n"); return ret; @@ -1113,8 +1604,11 @@ static int rknpu_runtime_resume(struct device *dev) static const struct dev_pm_ops rknpu_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) - SET_RUNTIME_PM_OPS(rknpu_runtime_suspend, rknpu_runtime_resume, NULL) + SET_RUNTIME_PM_OPS(rknpu_runtime_suspend, rknpu_runtime_resume, + NULL) }; +#endif +#endif static struct platform_driver rknpu_driver = { .probe = rknpu_probe, @@ -1122,7 +1616,11 @@ static struct platform_driver rknpu_driver = { .driver = { .owner = THIS_MODULE, .name = "RKNPU", +#ifndef FPGA_PLATFORM +#if KERNEL_VERSION(5, 4, 0) < LINUX_VERSION_CODE .pm = &rknpu_pm_ops, +#endif +#endif .of_match_table = of_match_ptr(rknpu_of_match), }, }; diff --git a/drivers/rknpu/rknpu_gem.c b/drivers/rknpu/rknpu_gem.c index 15b954ea67d4..0fd8204268a7 100644 --- a/drivers/rknpu/rknpu_gem.c +++ b/drivers/rknpu/rknpu_gem.c @@ -21,13 +21,14 @@ #include "rknpu_ioctl.h" #include "rknpu_gem.h" -#define RKNPU_GEM_ALLOC_FROM_PAGES 0 +#define RKNPU_GEM_ALLOC_FROM_PAGES 1 #if RKNPU_GEM_ALLOC_FROM_PAGES static int rknpu_gem_get_pages(struct rknpu_gem_object *rknpu_obj) { struct drm_device *drm = rknpu_obj->base.dev; struct scatterlist *s = NULL; + dma_addr_t dma_addr = 0; int ret = -EINVAL, i = 0; rknpu_obj->pages = drm_gem_get_pages(&rknpu_obj->base); @@ -48,35 +49,44 @@ static int rknpu_gem_get_pages(struct rknpu_gem_object *rknpu_obj) goto put_pages; } - for_each_sg(rknpu_obj->sgt->sgl, s, rknpu_obj->sgt->nents, i) { - sg_dma_address(s) = sg_phys(s); - LOG_DEBUG( - "gem pages alloc sgt[%d], phys_address: %#llx, length: %#x\n", - i, (__u64)s->dma_address, s->length); - } - - ret = dma_map_sg_attrs(drm->dev, rknpu_obj->sgt->sgl, - rknpu_obj->sgt->nents, DMA_BIDIRECTIONAL, - rknpu_obj->dma_attrs); + ret = dma_map_sg(drm->dev, rknpu_obj->sgt->sgl, rknpu_obj->sgt->nents, + DMA_BIDIRECTIONAL); if (ret == 0) { - LOG_DEV_ERROR(drm->dev, "failed to map sg table.\n"); ret = -EFAULT; goto free_sgt; } + dma_sync_sg_for_device(drm->dev, rknpu_obj->sgt->sgl, + rknpu_obj->sgt->nents, DMA_TO_DEVICE); + if (rknpu_obj->flags & RKNPU_MEM_KERNEL_MAPPING) { - rknpu_obj->kv_addr = - vmap(rknpu_obj->pages, rknpu_obj->num_pages, VM_MAP, - PAGE_KERNEL); + rknpu_obj->cookie = vmap(rknpu_obj->pages, rknpu_obj->num_pages, + VM_MAP, PAGE_KERNEL); + if (!rknpu_obj->cookie) + goto unmap_sg; + rknpu_obj->kv_addr = rknpu_obj->cookie; } - rknpu_obj->dma_addr = (__u64)sg_dma_address(rknpu_obj->sgt->sgl); + dma_addr = sg_dma_address(rknpu_obj->sgt->sgl); + rknpu_obj->dma_addr = dma_addr; + + for_each_sg(rknpu_obj->sgt->sgl, s, rknpu_obj->sgt->nents, i) { + dma_addr += s->length; + LOG_DEBUG( + "gem pages alloc sgt[%d], dma_address: %#llx, length: %#x\n", + i, (__u64)dma_addr, s->length); + } return 0; +unmap_sg: + dma_unmap_sg(drm->dev, rknpu_obj->sgt->sgl, rknpu_obj->sgt->nents, + DMA_BIDIRECTIONAL); + free_sgt: sg_free_table(rknpu_obj->sgt); kfree(rknpu_obj->sgt); + put_pages: drm_gem_put_pages(&rknpu_obj->base, rknpu_obj->pages, false, false); @@ -90,8 +100,9 @@ static void rknpu_gem_put_pages(struct rknpu_gem_object *rknpu_obj) if (rknpu_obj->flags & RKNPU_MEM_KERNEL_MAPPING) vunmap(rknpu_obj->kv_addr); - dma_map_sg_attrs(drm->dev, rknpu_obj->sgt->sgl, rknpu_obj->sgt->nents, - DMA_BIDIRECTIONAL, rknpu_obj->dma_attrs); + dma_unmap_sg(drm->dev, rknpu_obj->sgt->sgl, rknpu_obj->sgt->nents, + DMA_BIDIRECTIONAL); + drm_gem_put_pages(&rknpu_obj->base, rknpu_obj->pages, true, true); sg_free_table(rknpu_obj->sgt); kfree(rknpu_obj->sgt); @@ -911,9 +922,8 @@ int rknpu_gem_sync_ioctl(struct drm_device *dev, void *data, struct rknpu_gem_object *rknpu_obj = NULL; struct rknpu_mem_sync *args = data; struct scatterlist *sg; - dma_addr_t sg_dma_addr; unsigned long length, offset = 0; - unsigned long sg_offset, sg_left, size = 0; + unsigned long sg_left, size = 0; unsigned long len = 0; int i; @@ -937,11 +947,6 @@ int rknpu_gem_sync_ioctl(struct drm_device *dev, void *data, DMA_FROM_DEVICE); } } else { - struct drm_device *drm = rknpu_obj->base.dev; - struct rknpu_device *rknpu_dev = drm->dev_private; - - WARN_ON(!rknpu_dev->fake_dev); - length = args->size; offset = args->offset; @@ -951,21 +956,17 @@ int rknpu_gem_sync_ioctl(struct drm_device *dev, void *data, if (len <= offset) continue; - sg_dma_addr = sg_dma_address(sg); sg_left = len - offset; - sg_offset = sg->length - sg_left; size = (length < sg_left) ? length : sg_left; if (args->flags & RKNPU_MEM_SYNC_TO_DEVICE) { - dma_sync_single_range_for_device( - rknpu_dev->fake_dev, sg_dma_addr, - sg_offset, size, DMA_TO_DEVICE); + dma_sync_sg_for_device(dev->dev, sg, 1, + DMA_TO_DEVICE); } if (args->flags & RKNPU_MEM_SYNC_FROM_DEVICE) { - dma_sync_single_range_for_cpu( - rknpu_dev->fake_dev, sg_dma_addr, - sg_offset, size, DMA_FROM_DEVICE); + dma_sync_sg_for_cpu(dev->dev, sg, 1, + DMA_FROM_DEVICE); } offset += size; diff --git a/drivers/rknpu/rknpu_job.c b/drivers/rknpu/rknpu_job.c index 67a4b9d76734..8d4de7322ecf 100644 --- a/drivers/rknpu/rknpu_job.c +++ b/drivers/rknpu/rknpu_job.c @@ -15,6 +15,7 @@ #include "rknpu_gem.h" #include "rknpu_fence.h" #include "rknpu_job.h" +#include "rknpu_mem.h" #define _REG_READ(base, offset) readl(base + (offset)) #define _REG_WRITE(base, value, offset) writel(value, base + (offset)) @@ -57,15 +58,17 @@ static int rknn_get_task_number(struct rknpu_job *job, int core_index) static void rknpu_job_free(struct rknpu_job *job) { +#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM struct rknpu_gem_object *task_obj = NULL; - if (job->fence) - dma_fence_put(job->fence); - task_obj = (struct rknpu_gem_object *)(uintptr_t)job->args->task_obj_addr; if (task_obj) rknpu_gem_object_put(&task_obj->base); +#endif + + if (job->fence) + dma_fence_put(job->fence); if (job->args_owner) kfree(job->args); @@ -92,8 +95,9 @@ static inline struct rknpu_job *rknpu_job_alloc(struct rknpu_device *rknpu_dev, struct rknpu_submit *args) { struct rknpu_job *job = NULL; +#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM struct rknpu_gem_object *task_obj = NULL; - +#endif if (rknpu_dev->config->num_irqs == 1) args->core_mask = RKNPU_CORE0_MASK; @@ -108,10 +112,11 @@ static inline struct rknpu_job *rknpu_job_alloc(struct rknpu_device *rknpu_dev, ((args->core_mask & RKNPU_CORE2_MASK) >> 2); job->run_count = job->use_core_num; job->interrupt_count = job->use_core_num; - +#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM task_obj = (struct rknpu_gem_object *)(uintptr_t)args->task_obj_addr; if (task_obj) rknpu_gem_object_get(&task_obj->base); +#endif if (!(args->flags & RKNPU_JOB_NONBLOCK)) { job->args = args; @@ -159,7 +164,9 @@ static inline int rknpu_job_wait(struct rknpu_job *job) if (args->flags & RKNPU_JOB_PC) { uint32_t task_status = REG_READ(RKNPU_OFFSET_PC_TASK_STATUS); - args->task_counter = (task_status & 0xfff); + args->task_counter = + (task_status & + rknpu_dev->config->pc_task_number_mask); } return ret < 0 ? ret : -ETIMEDOUT; } @@ -173,8 +180,14 @@ static inline int rknpu_job_commit_pc(struct rknpu_job *job, int core_index) { struct rknpu_device *rknpu_dev = job->rknpu_dev; struct rknpu_submit *args = job->args; +#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM struct rknpu_gem_object *task_obj = (struct rknpu_gem_object *)(uintptr_t)args->task_obj_addr; +#endif +#ifdef CONFIG_ROCKCHIP_RKNPU_DMA_HEAP + struct rknpu_mem_object *task_obj = + (struct rknpu_mem_object *)(uintptr_t)args->task_obj_addr; +#endif struct rknpu_task *task_base = NULL; struct rknpu_task *first_task = NULL; struct rknpu_task *last_task = NULL; @@ -183,33 +196,45 @@ static inline int rknpu_job_commit_pc(struct rknpu_job *job, int core_index) int task_end = args->task_start + args->task_number - 1; int task_number = args->task_number; int task_pp_en = args->flags & RKNPU_JOB_PINGPONG ? 1 : 0; + int pc_data_amount_scale = rknpu_dev->config->pc_data_amount_scale; + int pc_task_number_bits = rknpu_dev->config->pc_task_number_bits; int i = 0; - for (i = 0; i < rknpu_dev->config->num_irqs; i++) { - if (i == core_index) { - REG_WRITE((0xe + 0x10000000 * i), 0x1004); - REG_WRITE((0xe + 0x10000000 * i), 0x3004); - } - } - if (!task_obj) return -EINVAL; - if (job->use_core_num == 1) { - task_start = args->subcore_task[core_index].task_start; - task_end = args->subcore_task[core_index].task_start + - args->subcore_task[core_index].task_end - 1; - task_number = args->subcore_task[core_index].task_number; - } else if (job->use_core_num == 2) { - task_start = args->subcore_task[core_index].task_start; - task_end = args->subcore_task[core_index].task_start + - args->subcore_task[core_index].task_end - 1; - task_number = args->subcore_task[core_index].task_number; - } else if (job->use_core_num == 3) { - task_start = args->subcore_task[core_index + 2].task_start; - task_end = args->subcore_task[core_index + 2].task_start + - args->subcore_task[core_index + 2].task_end - 1; - task_number = args->subcore_task[core_index + 2].task_number; + if (rknpu_dev->config->num_irqs > 1) { + for (i = 0; i < rknpu_dev->config->num_irqs; i++) { + if (i == core_index) { + REG_WRITE((0xe + 0x10000000 * i), 0x1004); + REG_WRITE((0xe + 0x10000000 * i), 0x3004); + } + } + + if (job->use_core_num == 1) { + task_start = args->subcore_task[core_index].task_start; + task_end = args->subcore_task[core_index].task_start + + args->subcore_task[core_index].task_number - + 1; + task_number = + args->subcore_task[core_index].task_number; + } else if (job->use_core_num == 2) { + task_start = args->subcore_task[core_index].task_start; + task_end = args->subcore_task[core_index].task_start + + args->subcore_task[core_index].task_number - + 1; + task_number = + args->subcore_task[core_index].task_number; + } else if (job->use_core_num == 3) { + task_start = + args->subcore_task[core_index + 2].task_start; + task_end = + args->subcore_task[core_index + 2].task_start + + args->subcore_task[core_index + 2].task_number - + 1; + task_number = + args->subcore_task[core_index + 2].task_number; + } } task_base = task_obj->kv_addr; @@ -217,20 +242,22 @@ static inline int rknpu_job_commit_pc(struct rknpu_job *job, int core_index) first_task = &task_base[task_start]; last_task = &task_base[task_end]; - REG_WRITE(first_task->regcmd_data, RKNPU_OFFSET_PC_DATA_ADDR); + REG_WRITE(first_task->regcmd_addr, RKNPU_OFFSET_PC_DATA_ADDR); - REG_WRITE(first_task->regcfg_amount + - rknpu_dev->config->pc_data_extra_amount - 1, + REG_WRITE((first_task->regcfg_amount + RKNPU_PC_DATA_EXTRA_AMOUNT + + pc_data_amount_scale - 1) / + pc_data_amount_scale - + 1, RKNPU_OFFSET_PC_DATA_AMOUNT); REG_WRITE(last_task->int_mask, RKNPU_OFFSET_INT_MASK); REG_WRITE(first_task->int_mask, RKNPU_OFFSET_INT_CLEAR); - REG_WRITE(((0x6 | task_pp_en) << 12) | task_number, + REG_WRITE(((0x6 | task_pp_en) << pc_task_number_bits) | task_number, RKNPU_OFFSET_PC_TASK_CONTROL); - REG_WRITE(0x0, RKNPU_OFFSET_PC_DMA_BASE_ADDR); + REG_WRITE(args->task_base_addr, RKNPU_OFFSET_PC_DMA_BASE_ADDR); job->first_task = first_task; job->last_task = last_task; @@ -279,7 +306,7 @@ static void rknpu_job_next(struct rknpu_device *rknpu_dev, int core_index) subcore_data->job = job; job->run_count--; - + job->hw_recoder_time = ktime_get(); spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); if (job->run_count == 0) { @@ -298,6 +325,7 @@ static void rknpu_job_done(struct rknpu_job *job, int ret, int core_index) struct rknpu_subcore_data *subcore_data = NULL; unsigned long flags; int task_num = 0; + ktime_t now = ktime_get(); subcore_data = &rknpu_dev->subcore_datas[core_index]; task_num = rknn_get_task_number(job, core_index); @@ -305,6 +333,8 @@ static void rknpu_job_done(struct rknpu_job *job, int ret, int core_index) subcore_data->job = NULL; subcore_data->task_num = subcore_data->task_num - task_num; job->interrupt_count--; + subcore_data->timer.busy_time += + ktime_us_delta(now, job->hw_recoder_time); spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); if (job->interrupt_count == 0) { @@ -401,10 +431,12 @@ static void rknpu_job_abort(struct rknpu_job *job) msleep(100); if (job->ret == -ETIMEDOUT) { LOG_ERROR( - "job timeout, irq status: %#x, raw status: %#x, require mask: %#x\n", + "job timeout, irq status: %#x, raw status: %#x, require mask: %#x, task counter: %#x\n", REG_READ(RKNPU_OFFSET_INT_STATUS), REG_READ(RKNPU_OFFSET_INT_RAW_STATUS), - job->int_mask[core_index]); + job->int_mask[core_index], + (REG_READ(RKNPU_OFFSET_PC_TASK_STATUS) & + rknpu_dev->config->pc_task_number_mask)); rknpu_soft_reset(rknpu_dev); } for (i = 0; i < rknpu_dev->config->num_irqs; i++) { @@ -461,18 +493,22 @@ static inline irqreturn_t rknpu_irq_handler(int irq, void *data, int core_index) return IRQ_HANDLED; status = REG_READ(RKNPU_OFFSET_INT_STATUS); - REG_WRITE(RKNPU_INT_CLEAR, RKNPU_OFFSET_INT_CLEAR); job->int_status[core_index] = status; if (rknpu_fuzz_status(status) != job->int_mask[core_index]) { LOG_ERROR( - "invalid irq status: %#x, raw status: %#x, require mask: %#x\n", + "invalid irq status: %#x, raw status: %#x, require mask: %#x, task counter: %#x\n", status, REG_READ(RKNPU_OFFSET_INT_RAW_STATUS), - job->int_mask[core_index]); + job->int_mask[core_index], + (REG_READ(RKNPU_OFFSET_PC_TASK_STATUS) & + rknpu_dev->config->pc_task_number_mask)); + REG_WRITE(RKNPU_INT_CLEAR, RKNPU_OFFSET_INT_CLEAR); return IRQ_HANDLED; } + REG_WRITE(RKNPU_INT_CLEAR, RKNPU_OFFSET_INT_CLEAR); + rknpu_job_done(job, 0, core_index); return IRQ_HANDLED; @@ -541,6 +577,7 @@ static void rknpu_job_timeout_clean(struct rknpu_device *rknpu_dev, } } +#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM int rknpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { @@ -561,6 +598,7 @@ int rknpu_submit_ioctl(struct drm_device *dev, void *data, } if (args->flags & RKNPU_JOB_FENCE_IN) { +#ifdef CONFIG_ROCKCHIP_RKNPU_FENCE struct dma_fence *in_fence; in_fence = sync_file_get_fence(args->fence_fd); @@ -589,9 +627,16 @@ int rknpu_submit_ioctl(struct drm_device *dev, void *data, return ret; } +#else + LOG_ERROR( + "failed to use rknpu fence, please enable rknpu fence config!\n"); + rknpu_job_free(job); + return -EINVAL; +#endif } if (args->flags & RKNPU_JOB_FENCE_OUT) { +#ifdef CONFIG_ROCKCHIP_RKNPU_FENCE ret = rknpu_fence_alloc(job); if (ret) { rknpu_job_free(job); @@ -599,6 +644,12 @@ int rknpu_submit_ioctl(struct drm_device *dev, void *data, } job->args->fence_fd = rknpu_fence_get_fd(job); args->fence_fd = job->args->fence_fd; +#else + LOG_ERROR( + "failed to use rknpu fence, please enable rknpu fence config!\n"); + rknpu_job_free(job); + return -EINVAL; +#endif } if (args->flags & RKNPU_JOB_NONBLOCK) { @@ -625,6 +676,120 @@ int rknpu_submit_ioctl(struct drm_device *dev, void *data, return ret; } +#endif + +#ifdef CONFIG_ROCKCHIP_RKNPU_DMA_HEAP +int rknpu_submit_ioctl(struct rknpu_device *rknpu_dev, unsigned long data) +{ + struct rknpu_submit args; + struct rknpu_job *job = NULL; + int ret = -EINVAL; + + if (unlikely(copy_from_user(&args, (struct rknpu_submit *)data, + sizeof(struct rknpu_submit)))) { + LOG_ERROR("%s: copy_from_user failed\n", __func__); + ret = -EFAULT; + return ret; + } + + if (args.task_number == 0) { + LOG_ERROR("invalid rknpu task number!\n"); + return -EINVAL; + } + + job = rknpu_job_alloc(rknpu_dev, &args); + if (!job) { + LOG_ERROR("failed to allocate rknpu job!\n"); + return -ENOMEM; + } + + if (args.flags & RKNPU_JOB_FENCE_IN) { +#ifdef CONFIG_ROCKCHIP_RKNPU_FENCE + struct dma_fence *in_fence; + + in_fence = sync_file_get_fence(args.fence_fd); + + if (!in_fence) { + LOG_ERROR("invalid fence in fd, fd = %d\n", + args.fence_fd); + return -EINVAL; + } + args.fence_fd = -1; + + /* + * Wait if the fence is from a foreign context, or if the fence + * array contains any fence from a foreign context. + */ + ret = 0; + if (!dma_fence_match_context(in_fence, + rknpu_dev->fence_ctx->context)) + ret = dma_fence_wait_timeout(in_fence, true, + args.timeout); + dma_fence_put(in_fence); + if (ret < 0) { + if (ret != -ERESTARTSYS) + LOG_ERROR("Error (%d) waiting for fence!\n", + ret); + + return ret; + } +#else + LOG_ERROR( + "failed to use rknpu fence, please enable rknpu fence config!\n"); + rknpu_job_free(job); + return -EINVAL; +#endif + } + + if (args.flags & RKNPU_JOB_FENCE_OUT) { +#ifdef CONFIG_ROCKCHIP_RKNPU_FENCE + ret = rknpu_fence_alloc(job); + if (ret) { + rknpu_job_free(job); + return ret; + } + job->args->fence_fd = rknpu_fence_get_fd(job); + args.fence_fd = job->args->fence_fd; +#else + LOG_ERROR( + "failed to use rknpu fence, please enable rknpu fence config!\n"); + rknpu_job_free(job); + return -EINVAL; +#endif + } + + if (args.flags & RKNPU_JOB_NONBLOCK) { + job->flags |= RKNPU_JOB_ASYNC; + rknpu_job_timeout_clean(rknpu_dev, job->args->core_mask); + rknpu_job_schedule(job); + ret = job->ret; + if (ret) { + rknpu_job_abort(job); + return ret; + } + } else { + rknpu_job_schedule(job); + if (args.flags & RKNPU_JOB_PC) + job->ret = rknpu_job_wait(job); + + args.task_counter = job->args->task_counter; + ret = job->ret; + if (!ret) + rknpu_job_cleanup(job); + else + rknpu_job_abort(job); + } + + if (unlikely(copy_to_user((struct rknpu_submit *)data, &args, + sizeof(struct rknpu_submit)))) { + LOG_ERROR("%s: copy_to_user failed\n", __func__); + ret = -EFAULT; + return ret; + } + + return ret; +} +#endif int rknpu_get_hw_version(struct rknpu_device *rknpu_dev, uint32_t *version) { @@ -717,6 +882,7 @@ int rknpu_get_rw_amount(struct rknpu_device *rknpu_dev, uint32_t *dt_wr, uint32_t *dt_rd, uint32_t *wd_rd) { void __iomem *rknpu_core_base = rknpu_dev->base[0]; + int amount_scale = rknpu_dev->config->pc_data_amount_scale; if (!rknpu_dev->config->bw_enable) { LOG_WARN("Get rw_amount is not supported on this device!\n"); @@ -726,13 +892,13 @@ int rknpu_get_rw_amount(struct rknpu_device *rknpu_dev, uint32_t *dt_wr, spin_lock(&rknpu_dev->lock); if (dt_wr != NULL) - *dt_wr = REG_READ(RKNPU_OFFSET_DT_WR_AMOUNT); + *dt_wr = REG_READ(RKNPU_OFFSET_DT_WR_AMOUNT) * amount_scale; if (dt_rd != NULL) - *dt_rd = REG_READ(RKNPU_OFFSET_DT_RD_AMOUNT); + *dt_rd = REG_READ(RKNPU_OFFSET_DT_RD_AMOUNT) * amount_scale; if (wd_rd != NULL) - *wd_rd = REG_READ(RKNPU_OFFSET_WT_RD_AMOUNT); + *wd_rd = REG_READ(RKNPU_OFFSET_WT_RD_AMOUNT) * amount_scale; spin_unlock(&rknpu_dev->lock); diff --git a/drivers/rknpu/rknpu_mem.c b/drivers/rknpu/rknpu_mem.c new file mode 100644 index 000000000000..539d7c1ae6cf --- /dev/null +++ b/drivers/rknpu/rknpu_mem.c @@ -0,0 +1,205 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Author: Felix Zeng + */ + +#include +#include + +#if KERNEL_VERSION(5, 10, 0) <= LINUX_VERSION_CODE +#include +#endif + +#include "rknpu_drv.h" +#include "rknpu_ioctl.h" +#include "rknpu_mem.h" + +int rknpu_mem_create_ioctl(struct rknpu_device *rknpu_dev, unsigned long data) +{ + struct rknpu_mem_create args; + int ret = -EINVAL; + struct dma_buf_attachment *attachment; + struct sg_table *table; + struct scatterlist *sgl; + dma_addr_t phys; + struct dma_buf *dmabuf; + struct page **pages; + struct page *page; + struct rknpu_mem_object *rknpu_obj = NULL; + int i, fd; + unsigned int length, page_count; + + if (unlikely(copy_from_user(&args, (struct rknpu_mem_create *)data, + sizeof(struct rknpu_mem_create)))) { + LOG_ERROR("%s: copy_from_user failed\n", __func__); + ret = -EFAULT; + return ret; + } + + if (args.flags & RKNPU_MEM_NON_CONTIGUOUS) { + LOG_ERROR("%s: malloc iommu memory unsupported in current!\n", + __func__); + ret = -EFAULT; + return ret; + } + + rknpu_obj = kzalloc(sizeof(*rknpu_obj), GFP_KERNEL); + if (!rknpu_obj) + return PTR_ERR(rknpu_obj); + + if (args.handle > 0) { + fd = args.handle; + + dmabuf = dma_buf_get(fd); + if (IS_ERR(dmabuf)) { + ret = PTR_ERR(dmabuf); + goto err_free_obj; + } + + rknpu_obj->dmabuf = dmabuf; + rknpu_obj->owner = 0; + } else { + /* Start test kernel alloc/free dma buf */ + dmabuf = rk_dma_heap_buffer_alloc(rknpu_dev->heap, args.size, + O_CLOEXEC | O_RDWR, 0x0, + dev_name(rknpu_dev->dev)); + if (IS_ERR(dmabuf)) + return PTR_ERR(dmabuf); + + rknpu_obj->dmabuf = dmabuf; + rknpu_obj->owner = 1; + + fd = dma_buf_fd(dmabuf, O_CLOEXEC | O_RDWR); + if (fd < 0) { + ret = -EFAULT; + goto err_free_dma_buf; + } + } + + attachment = dma_buf_attach(dmabuf, rknpu_dev->dev); + if (IS_ERR(attachment)) { + ret = PTR_ERR(attachment); + goto err_free_dma_buf; + } + + table = dma_buf_map_attachment(attachment, DMA_BIDIRECTIONAL); + if (IS_ERR(table)) { + ret = PTR_ERR(table); + goto err_free_dma_buf; + } + + for_each_sgtable_sg(table, sgl, i) { + phys = sg_dma_address(sgl); + page = sg_page(sgl); + LOG_DEBUG("%s, %d, phys = %pad, length = 0x%x\n", __func__, + __LINE__, &phys, sg_dma_len(sgl)); + length = sg_dma_len(sgl); + } + + page_count = length >> PAGE_SHIFT; + pages = kmalloc_array(page_count, sizeof(struct page), GFP_KERNEL); + for (i = 0; i < page_count; i++) + pages[i] = &page[i]; + + rknpu_obj->kv_addr = vmap(pages, page_count, VM_MAP, PAGE_KERNEL); + kfree(pages); + + dma_buf_unmap_attachment(attachment, table, DMA_BIDIRECTIONAL); + dma_buf_detach(dmabuf, attachment); + + rknpu_obj->size = PAGE_ALIGN(args.size); + rknpu_obj->dma_addr = phys; + rknpu_obj->sgt = table; + + args.size = rknpu_obj->size; + args.obj_addr = (__u64)(uintptr_t)rknpu_obj; + args.dma_addr = rknpu_obj->dma_addr; + args.handle = fd; + + LOG_DEBUG( + "args.handle = %d, args.size = %lld, rknpu_obj = %#llx, rknpu_obj->dma_addr = %#llx\n", + args.handle, args.size, (__u64)(uintptr_t)rknpu_obj, + (__u64)rknpu_obj->dma_addr); + + if (unlikely(copy_to_user((struct rknpu_mem_create *)data, &args, + sizeof(struct rknpu_mem_create)))) { + LOG_ERROR("%s: copy_to_user failed\n", __func__); + ret = -EFAULT; + goto err_free_dma_buf; + } + return 0; + +err_free_dma_buf: + dma_buf_put(dmabuf); + if (rknpu_obj->owner) + rk_dma_heap_buffer_free(dmabuf); + return ret; + +err_free_obj: + kfree(rknpu_obj); + return ret; +} + +int rknpu_mem_destroy_ioctl(struct rknpu_device *rknpu_dev, unsigned long data) +{ + struct rknpu_mem_object *rknpu_obj = NULL; + struct rknpu_mem_destroy args; + struct dma_buf *dmabuf; + int ret = -EFAULT; + + if (unlikely(copy_from_user(&args, (struct rknpu_mem_destroy *)data, + sizeof(struct rknpu_mem_destroy)))) { + LOG_ERROR("%s: copy_from_user failed\n", __func__); + ret = -EFAULT; + return ret; + } + + if (!kern_addr_valid(args.obj_addr)) { + LOG_ERROR("%s: invalid params, unknown obj_addr\n", __func__); + ret = -EFAULT; + return ret; + } + + rknpu_obj = (struct rknpu_mem_object *)(uintptr_t)args.obj_addr; + dmabuf = rknpu_obj->dmabuf; + LOG_DEBUG( + "free args.handle = %d, rknpu_obj = %#llx, rknpu_obj->dma_addr = %#llx\n", + args.handle, (__u64)(uintptr_t)rknpu_obj, + (__u64)rknpu_obj->dma_addr); + + vunmap(rknpu_obj->kv_addr); + + kfree(rknpu_obj); + + return 0; +} + +int rknpu_mem_sync_ioctl(struct rknpu_device *rknpu_dev, unsigned long data) +{ + struct rknpu_mem_object *rknpu_obj = NULL; + struct rknpu_mem_sync args; + struct dma_buf *dmabuf; + int ret = -EFAULT; + + if (unlikely(copy_from_user(&args, (struct rknpu_mem_sync *)data, + sizeof(struct rknpu_mem_sync)))) { + LOG_ERROR("%s: copy_from_user failed\n", __func__); + ret = -EFAULT; + return ret; + } + + rknpu_obj = (struct rknpu_mem_object *)(uintptr_t)args.obj_addr; + dmabuf = rknpu_obj->dmabuf; + + if (args.flags & RKNPU_MEM_SYNC_TO_DEVICE) { + dmabuf->ops->end_cpu_access_partial(dmabuf, DMA_TO_DEVICE, + args.offset, args.size); + } + if (args.flags & RKNPU_MEM_SYNC_FROM_DEVICE) { + dmabuf->ops->begin_cpu_access_partial(dmabuf, DMA_FROM_DEVICE, + args.offset, args.size); + } + + return 0; +}