From ae35fd7eec8faba1a2ac787ee86056e749ef0ae6 Mon Sep 17 00:00:00 2001 From: Yu Qiaowei Date: Thu, 7 Aug 2025 15:04:31 +0800 Subject: [PATCH] video: rockchip: rga3: use genpool/dmapool to manage cmd_buf on each scheduler In some scenarios with high timing requirements, frequent calls to dma_alloc_coherent may trigger memory reclamation with low probability, thereby increasing the overall time consumption per frame. Change-Id: I28ffe47c5db40c82a54254b056f117931efbe38e Signed-off-by: Yu Qiaowei --- drivers/video/rockchip/rga3/Kconfig | 23 +++ .../video/rockchip/rga3/include/rga_dma_buf.h | 24 +++ drivers/video/rockchip/rga3/include/rga_drv.h | 2 + .../rockchip/rga3/include/rga_hw_config.h | 2 + drivers/video/rockchip/rga3/include/rga_job.h | 2 - drivers/video/rockchip/rga3/rga_dma_buf.c | 145 ++++++++++++++++++ drivers/video/rockchip/rga3/rga_drv.c | 37 +++++ drivers/video/rockchip/rga3/rga_hw_config.c | 16 ++ drivers/video/rockchip/rga3/rga_job.c | 7 +- 9 files changed, 253 insertions(+), 5 deletions(-) diff --git a/drivers/video/rockchip/rga3/Kconfig b/drivers/video/rockchip/rga3/Kconfig index c8c96b2d67ff..caa3fb7e2f63 100644 --- a/drivers/video/rockchip/rga3/Kconfig +++ b/drivers/video/rockchip/rga3/Kconfig @@ -34,4 +34,27 @@ config ROCKCHIP_RGA_DEBUGGER help Enabling the debugger of multi RGA, you can use procfs and debugfs for debugging. +config ROCKCHIP_RGA_GENPOOL + bool "Use genpool to manage cmd_buf" + default n + help + Use genpool to manage cmd_buf on each scheduler, default use dmapool. + + When using genpool to manage cmd_buf, the total cmd_buf count is + ROCKCHIP_RGA_CMD_BUF_COUNT. which represents the maximum number of + jobs that can be allocated. If the number of active jobs exceeds + this number, new jobs cannot be allocated. + + Therefore, this config should be used with caution, and using the default + dmapool will not cause jobs cannot be allocated. + +config ROCKCHIP_RGA_CMD_BUF_COUNT + int "Number of pre-allocated command buffers for RGA" + depends on ROCKCHIP_RGA_GENPOOL + range 4 256 + default 32 + help + This option configures the number of command buffers (cmd_buf) that are + pre-allocated by the RGA driver during initialization. + endif diff --git a/drivers/video/rockchip/rga3/include/rga_dma_buf.h b/drivers/video/rockchip/rga3/include/rga_dma_buf.h index 05a801c0705c..2caead96275f 100644 --- a/drivers/video/rockchip/rga3/include/rga_dma_buf.h +++ b/drivers/video/rockchip/rga3/include/rga_dma_buf.h @@ -9,8 +9,26 @@ #ifndef __RGA3_DMA_BUF_H__ #define __RGA3_DMA_BUF_H__ +#ifdef CONFIG_ROCKCHIP_RGA_GENPOOL +#include +#else +#include +#endif + #include "rga_drv.h" +struct rga_dma_buf_pool { +#ifdef CONFIG_ROCKCHIP_RGA_GENPOOL + struct gen_pool *pool; +#else + struct dma_pool *pool; +#endif + struct rga_dma_buffer *dma_buf; + size_t block_size; + + struct rga_scheduler_t *scheduler; +}; + #ifndef for_each_sgtable_sg /* * Loop over each sg element in the given sg_table object. @@ -45,5 +63,11 @@ void rga_dma_sync_flush_range(void *pstart, void *pend, struct rga_scheduler_t * struct rga_dma_buffer *rga_dma_alloc_coherent(struct rga_scheduler_t *scheduler, int size); int rga_dma_free(struct rga_dma_buffer *buffer); +struct rga_dma_buf_pool *rga_dma_buf_pool_init(struct rga_scheduler_t *scheduler, + int block_size); +void rga_dma_buf_pool_destroy(struct rga_dma_buf_pool *pool); +struct rga_dma_buffer *rga_dma_buf_pool_alloc(struct rga_dma_buf_pool *pool); +int rga_dma_buf_pool_free(struct rga_dma_buf_pool *pool, struct rga_dma_buffer *buffer); + #endif /* #ifndef __RGA3_DMA_BUF_H__ */ diff --git a/drivers/video/rockchip/rga3/include/rga_drv.h b/drivers/video/rockchip/rga3/include/rga_drv.h index 45b13b221292..73b8d482479e 100644 --- a/drivers/video/rockchip/rga3/include/rga_drv.h +++ b/drivers/video/rockchip/rga3/include/rga_drv.h @@ -370,6 +370,8 @@ struct rga_scheduler_t { int core; struct rga_timer timer; + + struct rga_dma_buf_pool *cmd_buf_pool; }; struct rga_request { diff --git a/drivers/video/rockchip/rga3/include/rga_hw_config.h b/drivers/video/rockchip/rga3/include/rga_hw_config.h index 34f48e00472e..12e7e81e3f47 100644 --- a/drivers/video/rockchip/rga3/include/rga_hw_config.h +++ b/drivers/video/rockchip/rga3/include/rga_hw_config.h @@ -71,6 +71,8 @@ struct rga_hw_data { unsigned int win_size; enum rga_mmu mmu; + + uint32_t cmd_reg_size; }; extern const struct rga_hw_data rga3_data; diff --git a/drivers/video/rockchip/rga3/include/rga_job.h b/drivers/video/rockchip/rga3/include/rga_job.h index 28bf35bacc55..e63659bb0e8e 100644 --- a/drivers/video/rockchip/rga3/include/rga_job.h +++ b/drivers/video/rockchip/rga3/include/rga_job.h @@ -13,8 +13,6 @@ #include "rga_drv.h" -#define RGA_CMD_REG_SIZE 256 /* 32 * 8 bit */ - enum job_flags { RGA_JOB_DONE = 1 << 0, RGA_JOB_ASYNC = 1 << 1, diff --git a/drivers/video/rockchip/rga3/rga_dma_buf.c b/drivers/video/rockchip/rga3/rga_dma_buf.c index 2ec2c0e14725..bb9aa3a7c5d3 100644 --- a/drivers/video/rockchip/rga3/rga_dma_buf.c +++ b/drivers/video/rockchip/rga3/rga_dma_buf.c @@ -484,3 +484,148 @@ fail_dma_alloc: return NULL; } + +struct rga_dma_buf_pool *rga_dma_buf_pool_init(struct rga_scheduler_t *scheduler, int block_size) +{ + int ret; + struct rga_dma_buf_pool *pool; + + pool = kzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) { + rga_err("Failed to allocate memory for rga_dma_buf_pool.\n"); + return ERR_PTR(-ENOMEM); + } + +#ifdef CONFIG_ROCKCHIP_RGA_GENPOOL + block_size = ALIGN(block_size, scheduler->data->byte_stride_align); + + pool->dma_buf = rga_dma_alloc_coherent(scheduler, + block_size * CONFIG_ROCKCHIP_RGA_CMD_BUF_COUNT); + if (pool->dma_buf == NULL) { + rga_err("Failed to allocate coherent memory for dma_buf_pool.\n"); + ret = -ENOMEM; + goto err_free_pool; + } + + pool->pool = gen_pool_create(ilog2(block_size), -1); + if (!pool->pool) { + rga_err("Failed to create memory pool.\n"); + ret = -ENOMEM; + goto err_free_dma_buf; + } + + ret = gen_pool_add_virt(pool->pool, (unsigned long)pool->dma_buf->vaddr, + pool->dma_buf->dma_addr, pool->dma_buf->size, -1); + if (ret < 0) { + rga_err("Failed to add memory to gen_pool.\n"); + goto err_destroy_pool; + } +#else + pool->pool = dma_pool_create("rga_cmd_buf_pool", + scheduler->iommu_info ? scheduler->iommu_info->default_dev : + scheduler->dev, + block_size, scheduler->data->byte_stride_align, 0); + if (!pool->pool) { + rga_err("Failed to create dma pool.\n"); + ret = -ENOMEM; + goto err_free_pool; + } +#endif + + pool->scheduler = scheduler; + pool->block_size = block_size; + + return pool; + +#ifdef CONFIG_ROCKCHIP_RGA_GENPOOL +err_destroy_pool: + gen_pool_destroy(pool->pool); + pool->pool = NULL; + +err_free_dma_buf: + rga_dma_free(pool->dma_buf); + pool->dma_buf = NULL; +#endif + +err_free_pool: + kfree(pool); + + return ERR_PTR(ret); +} + +void rga_dma_buf_pool_destroy(struct rga_dma_buf_pool *pool) +{ + if (!pool) + return; + + if (pool->pool) { +#ifdef CONFIG_ROCKCHIP_RGA_GENPOOL + gen_pool_destroy(pool->pool); +#else + dma_pool_destroy(pool->pool); +#endif + pool->pool = NULL; + } + + if (pool->dma_buf) { + rga_dma_free(pool->dma_buf); + pool->dma_buf = NULL; + } + + kfree(pool); +} + +struct rga_dma_buffer *rga_dma_buf_pool_alloc(struct rga_dma_buf_pool *pool) +{ + struct rga_dma_buffer *buffer; + + if (!pool || !pool->pool) { + rga_err("rga_dma_buf_pool is not initialized.\n"); + return NULL; + } + + buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); + if (!buffer) { + rga_err("Failed to allocate memory for rga_dma_buffer.\n"); + return NULL; + } + +#ifdef CONFIG_ROCKCHIP_RGA_GENPOOL + buffer->vaddr = gen_pool_dma_zalloc(pool->pool, pool->block_size, &buffer->dma_addr); +#else + buffer->vaddr = dma_pool_zalloc(pool->pool, GFP_KERNEL, &buffer->dma_addr); +#endif + if (!buffer->vaddr) { + rga_err("Failed to allocate memory from gen_pool.\n"); + kfree(buffer); + return NULL; + } + + buffer->size = pool->block_size; + buffer->map_dev = pool->scheduler->dev; + if (pool->scheduler->data->mmu == RGA_IOMMU) + buffer->iova = buffer->dma_addr; + + return buffer; +} + +int rga_dma_buf_pool_free(struct rga_dma_buf_pool *pool, struct rga_dma_buffer *buffer) +{ + if (!pool || !pool->pool || !buffer || !buffer->vaddr) { + rga_err("Invalid pool or buffer.\n"); + return -EINVAL; + } + +#ifdef CONFIG_ROCKCHIP_RGA_GENPOOL + gen_pool_free(pool->pool, (unsigned long)buffer->vaddr, buffer->size); +#else + dma_pool_free(pool->pool, buffer->vaddr, buffer->dma_addr); +#endif + buffer->vaddr = NULL; + buffer->dma_addr = 0; + buffer->iova = 0; + + kfree(buffer); + + return 0; +} diff --git a/drivers/video/rockchip/rga3/rga_drv.c b/drivers/video/rockchip/rga3/rga_drv.c index 9ef013bb651c..b764ae6ed460 100644 --- a/drivers/video/rockchip/rga3/rga_drv.c +++ b/drivers/video/rockchip/rga3/rga_drv.c @@ -1556,6 +1556,7 @@ static struct platform_driver rga2_driver = { static int __init rga_init(void) { int ret; + int i; rga_drvdata = kzalloc(sizeof(struct rga_drvdata_t), GFP_KERNEL); if (rga_drvdata == NULL) { @@ -1590,6 +1591,21 @@ static int __init rga_init(void) goto err_unbind_iommu; } + /* init cmd reg buffer pool */ + for (i = 0; i < rga_drvdata->num_of_scheduler; i++) { + struct rga_scheduler_t *scheduler = rga_drvdata->scheduler[i]; + + scheduler->cmd_buf_pool = + rga_dma_buf_pool_init(scheduler, + scheduler->data->cmd_reg_size * sizeof(uint32_t)); + if (IS_ERR(scheduler->cmd_buf_pool)) { + dev_err(scheduler->dev, "failed to init cmd buf pool\n"); + ret = PTR_ERR(scheduler->cmd_buf_pool); + scheduler->cmd_buf_pool = NULL; + goto err_destroy_buf_pool; + } + } + rga_init_timer(); rga_mm_init(&rga_drvdata->mm); @@ -1610,6 +1626,16 @@ static int __init rga_init(void) return 0; +err_destroy_buf_pool: + for (i = 0; i < rga_drvdata->num_of_scheduler; i++) { + struct rga_scheduler_t *scheduler = rga_drvdata->scheduler[i]; + + if (scheduler->cmd_buf_pool) { + rga_dma_buf_pool_destroy(scheduler->cmd_buf_pool); + scheduler->cmd_buf_pool = NULL; + } + } + err_unbind_iommu: rga_iommu_unbind(); @@ -1627,6 +1653,8 @@ err_free_drvdata: static void __exit rga_exit(void) { + int i; + #ifdef CONFIG_ROCKCHIP_RGA_DEBUGGER rga_debugger_remove(&rga_drvdata->debugger); #endif @@ -1643,6 +1671,15 @@ static void __exit rga_exit(void) rga_cancel_timer(); + for (i = 0; i < rga_drvdata->num_of_scheduler; i++) { + struct rga_scheduler_t *scheduler = rga_drvdata->scheduler[i]; + + if (scheduler->cmd_buf_pool) { + rga_dma_buf_pool_destroy(scheduler->cmd_buf_pool); + scheduler->cmd_buf_pool = NULL; + } + } + rga_iommu_unbind(); platform_driver_unregister(&rga3_driver); diff --git a/drivers/video/rockchip/rga3/rga_hw_config.c b/drivers/video/rockchip/rga3/rga_hw_config.c index fadf251cfe0d..581693e82cb9 100644 --- a/drivers/video/rockchip/rga3/rga_hw_config.c +++ b/drivers/video/rockchip/rga3/rga_hw_config.c @@ -589,6 +589,8 @@ const struct rga_hw_data rga3_data = { .csc_y2r_mode = RGA_MODE_CSC_BT601L | RGA_MODE_CSC_BT601F | RGA_MODE_CSC_BT709 | RGA_MODE_CSC_BT2020, .mmu = RGA_IOMMU, + + .cmd_reg_size = 48, //0x100:0x1bc }; const struct rga_hw_data rga2e_data = { @@ -613,6 +615,8 @@ const struct rga_hw_data rga2e_data = { .csc_y2r_mode = RGA_MODE_CSC_BT601L | RGA_MODE_CSC_BT601F | RGA_MODE_CSC_BT709, .mmu = RGA_MMU, + + .cmd_reg_size = 32, //0x100:0x17c }; const struct rga_hw_data rga2e_1106_data = { @@ -639,6 +643,8 @@ const struct rga_hw_data rga2e_1106_data = { .csc_y2r_mode = RGA_MODE_CSC_BT601L | RGA_MODE_CSC_BT601F | RGA_MODE_CSC_BT709, .mmu = RGA_NONE_MMU, + + .cmd_reg_size = 32, //0x100:0x17c }; const struct rga_hw_data rga2e_3506_data = { @@ -663,6 +669,8 @@ const struct rga_hw_data rga2e_3506_data = { .csc_y2r_mode = RGA_MODE_CSC_BT601L | RGA_MODE_CSC_BT601F | RGA_MODE_CSC_BT709, .mmu = RGA_NONE_MMU, + + .cmd_reg_size = 32, //0x100:0x17c }; const struct rga_hw_data rga2e_iommu_data = { @@ -689,6 +697,8 @@ const struct rga_hw_data rga2e_iommu_data = { .csc_y2r_mode = RGA_MODE_CSC_BT601L | RGA_MODE_CSC_BT601F | RGA_MODE_CSC_BT709, .mmu = RGA_IOMMU, + + .cmd_reg_size = 32, //0x100:0x17c }; const struct rga_hw_data rga2p_iommu_data = { @@ -715,6 +725,8 @@ const struct rga_hw_data rga2p_iommu_data = { .csc_y2r_mode = RGA_MODE_CSC_BT601L | RGA_MODE_CSC_BT601F | RGA_MODE_CSC_BT709, .mmu = RGA_IOMMU, + + .cmd_reg_size = 32, //0x100:0x17c }; const struct rga_hw_data rga2p_lite_1103b_data = { @@ -739,6 +751,8 @@ const struct rga_hw_data rga2p_lite_1103b_data = { .csc_y2r_mode = RGA_MODE_CSC_BT601L | RGA_MODE_CSC_BT601F | RGA_MODE_CSC_BT709, .mmu = RGA_NONE_MMU, + + .cmd_reg_size = 32, //0x100:0x17c }; const struct rga_hw_data rga2p_iommu_non_fbc_data = { @@ -765,4 +779,6 @@ const struct rga_hw_data rga2p_iommu_non_fbc_data = { .csc_y2r_mode = RGA_MODE_CSC_BT601L | RGA_MODE_CSC_BT601F | RGA_MODE_CSC_BT709, .mmu = RGA_IOMMU, + + .cmd_reg_size = 32, //0x100:0x17c }; diff --git a/drivers/video/rockchip/rga3/rga_job.c b/drivers/video/rockchip/rga3/rga_job.c index 93e47be000c3..78f1837625e4 100644 --- a/drivers/video/rockchip/rga3/rga_job.c +++ b/drivers/video/rockchip/rga3/rga_job.c @@ -16,7 +16,7 @@ static void rga_job_free(struct rga_job *job) { if (job->cmd_buf) - rga_dma_free(job->cmd_buf); + rga_dma_buf_pool_free(job->scheduler->cmd_buf_pool, job->cmd_buf); kfree(job); } @@ -425,9 +425,10 @@ int rga_job_commit(struct rga_req *rga_command_base, struct rga_request *request goto err_free_job; } - job->cmd_buf = rga_dma_alloc_coherent(scheduler, RGA_CMD_REG_SIZE); + job->cmd_buf = rga_dma_buf_pool_alloc(scheduler->cmd_buf_pool); if (job->cmd_buf == NULL) { rga_job_err(job, "failed to alloc command buffer.\n"); + job->ret = -ENOMEM; goto err_free_job; } @@ -467,7 +468,7 @@ err_power_disable: rga_power_disable(scheduler); err_free_cmd_buf: - rga_dma_free(job->cmd_buf); + rga_dma_buf_pool_free(scheduler->cmd_buf_pool, job->cmd_buf); job->cmd_buf = NULL; err_free_job: