video: rockchip: rga3: use genpool/dmapool to manage cmd_buf on each scheduler

In some scenarios with high timing requirements, frequent calls to
dma_alloc_coherent may trigger memory reclamation with low probability,
thereby increasing the overall time consumption per frame.

Change-Id: I28ffe47c5db40c82a54254b056f117931efbe38e
Signed-off-by: Yu Qiaowei <cerf.yu@rock-chips.com>
This commit is contained in:
Yu Qiaowei
2025-08-07 15:04:31 +08:00
committed by Tao Huang
parent ea07a6e6fe
commit ae35fd7eec
9 changed files with 253 additions and 5 deletions

View File

@@ -34,4 +34,27 @@ config ROCKCHIP_RGA_DEBUGGER
help
Enabling the debugger of multi RGA, you can use procfs and debugfs for debugging.
config ROCKCHIP_RGA_GENPOOL
bool "Use genpool to manage cmd_buf"
default n
help
Use genpool to manage cmd_buf on each scheduler, default use dmapool.
When using genpool to manage cmd_buf, the total cmd_buf count is
ROCKCHIP_RGA_CMD_BUF_COUNT. which represents the maximum number of
jobs that can be allocated. If the number of active jobs exceeds
this number, new jobs cannot be allocated.
Therefore, this config should be used with caution, and using the default
dmapool will not cause jobs cannot be allocated.
config ROCKCHIP_RGA_CMD_BUF_COUNT
int "Number of pre-allocated command buffers for RGA"
depends on ROCKCHIP_RGA_GENPOOL
range 4 256
default 32
help
This option configures the number of command buffers (cmd_buf) that are
pre-allocated by the RGA driver during initialization.
endif

View File

@@ -9,8 +9,26 @@
#ifndef __RGA3_DMA_BUF_H__
#define __RGA3_DMA_BUF_H__
#ifdef CONFIG_ROCKCHIP_RGA_GENPOOL
#include <linux/genalloc.h>
#else
#include <linux/dmapool.h>
#endif
#include "rga_drv.h"
struct rga_dma_buf_pool {
#ifdef CONFIG_ROCKCHIP_RGA_GENPOOL
struct gen_pool *pool;
#else
struct dma_pool *pool;
#endif
struct rga_dma_buffer *dma_buf;
size_t block_size;
struct rga_scheduler_t *scheduler;
};
#ifndef for_each_sgtable_sg
/*
* Loop over each sg element in the given sg_table object.
@@ -45,5 +63,11 @@ void rga_dma_sync_flush_range(void *pstart, void *pend, struct rga_scheduler_t *
struct rga_dma_buffer *rga_dma_alloc_coherent(struct rga_scheduler_t *scheduler, int size);
int rga_dma_free(struct rga_dma_buffer *buffer);
struct rga_dma_buf_pool *rga_dma_buf_pool_init(struct rga_scheduler_t *scheduler,
int block_size);
void rga_dma_buf_pool_destroy(struct rga_dma_buf_pool *pool);
struct rga_dma_buffer *rga_dma_buf_pool_alloc(struct rga_dma_buf_pool *pool);
int rga_dma_buf_pool_free(struct rga_dma_buf_pool *pool, struct rga_dma_buffer *buffer);
#endif /* #ifndef __RGA3_DMA_BUF_H__ */

View File

@@ -370,6 +370,8 @@ struct rga_scheduler_t {
int core;
struct rga_timer timer;
struct rga_dma_buf_pool *cmd_buf_pool;
};
struct rga_request {

View File

@@ -71,6 +71,8 @@ struct rga_hw_data {
unsigned int win_size;
enum rga_mmu mmu;
uint32_t cmd_reg_size;
};
extern const struct rga_hw_data rga3_data;

View File

@@ -13,8 +13,6 @@
#include "rga_drv.h"
#define RGA_CMD_REG_SIZE 256 /* 32 * 8 bit */
enum job_flags {
RGA_JOB_DONE = 1 << 0,
RGA_JOB_ASYNC = 1 << 1,

View File

@@ -484,3 +484,148 @@ fail_dma_alloc:
return NULL;
}
struct rga_dma_buf_pool *rga_dma_buf_pool_init(struct rga_scheduler_t *scheduler, int block_size)
{
int ret;
struct rga_dma_buf_pool *pool;
pool = kzalloc(sizeof(*pool), GFP_KERNEL);
if (!pool) {
rga_err("Failed to allocate memory for rga_dma_buf_pool.\n");
return ERR_PTR(-ENOMEM);
}
#ifdef CONFIG_ROCKCHIP_RGA_GENPOOL
block_size = ALIGN(block_size, scheduler->data->byte_stride_align);
pool->dma_buf = rga_dma_alloc_coherent(scheduler,
block_size * CONFIG_ROCKCHIP_RGA_CMD_BUF_COUNT);
if (pool->dma_buf == NULL) {
rga_err("Failed to allocate coherent memory for dma_buf_pool.\n");
ret = -ENOMEM;
goto err_free_pool;
}
pool->pool = gen_pool_create(ilog2(block_size), -1);
if (!pool->pool) {
rga_err("Failed to create memory pool.\n");
ret = -ENOMEM;
goto err_free_dma_buf;
}
ret = gen_pool_add_virt(pool->pool, (unsigned long)pool->dma_buf->vaddr,
pool->dma_buf->dma_addr, pool->dma_buf->size, -1);
if (ret < 0) {
rga_err("Failed to add memory to gen_pool.\n");
goto err_destroy_pool;
}
#else
pool->pool = dma_pool_create("rga_cmd_buf_pool",
scheduler->iommu_info ? scheduler->iommu_info->default_dev :
scheduler->dev,
block_size, scheduler->data->byte_stride_align, 0);
if (!pool->pool) {
rga_err("Failed to create dma pool.\n");
ret = -ENOMEM;
goto err_free_pool;
}
#endif
pool->scheduler = scheduler;
pool->block_size = block_size;
return pool;
#ifdef CONFIG_ROCKCHIP_RGA_GENPOOL
err_destroy_pool:
gen_pool_destroy(pool->pool);
pool->pool = NULL;
err_free_dma_buf:
rga_dma_free(pool->dma_buf);
pool->dma_buf = NULL;
#endif
err_free_pool:
kfree(pool);
return ERR_PTR(ret);
}
void rga_dma_buf_pool_destroy(struct rga_dma_buf_pool *pool)
{
if (!pool)
return;
if (pool->pool) {
#ifdef CONFIG_ROCKCHIP_RGA_GENPOOL
gen_pool_destroy(pool->pool);
#else
dma_pool_destroy(pool->pool);
#endif
pool->pool = NULL;
}
if (pool->dma_buf) {
rga_dma_free(pool->dma_buf);
pool->dma_buf = NULL;
}
kfree(pool);
}
struct rga_dma_buffer *rga_dma_buf_pool_alloc(struct rga_dma_buf_pool *pool)
{
struct rga_dma_buffer *buffer;
if (!pool || !pool->pool) {
rga_err("rga_dma_buf_pool is not initialized.\n");
return NULL;
}
buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
if (!buffer) {
rga_err("Failed to allocate memory for rga_dma_buffer.\n");
return NULL;
}
#ifdef CONFIG_ROCKCHIP_RGA_GENPOOL
buffer->vaddr = gen_pool_dma_zalloc(pool->pool, pool->block_size, &buffer->dma_addr);
#else
buffer->vaddr = dma_pool_zalloc(pool->pool, GFP_KERNEL, &buffer->dma_addr);
#endif
if (!buffer->vaddr) {
rga_err("Failed to allocate memory from gen_pool.\n");
kfree(buffer);
return NULL;
}
buffer->size = pool->block_size;
buffer->map_dev = pool->scheduler->dev;
if (pool->scheduler->data->mmu == RGA_IOMMU)
buffer->iova = buffer->dma_addr;
return buffer;
}
int rga_dma_buf_pool_free(struct rga_dma_buf_pool *pool, struct rga_dma_buffer *buffer)
{
if (!pool || !pool->pool || !buffer || !buffer->vaddr) {
rga_err("Invalid pool or buffer.\n");
return -EINVAL;
}
#ifdef CONFIG_ROCKCHIP_RGA_GENPOOL
gen_pool_free(pool->pool, (unsigned long)buffer->vaddr, buffer->size);
#else
dma_pool_free(pool->pool, buffer->vaddr, buffer->dma_addr);
#endif
buffer->vaddr = NULL;
buffer->dma_addr = 0;
buffer->iova = 0;
kfree(buffer);
return 0;
}

View File

@@ -1556,6 +1556,7 @@ static struct platform_driver rga2_driver = {
static int __init rga_init(void)
{
int ret;
int i;
rga_drvdata = kzalloc(sizeof(struct rga_drvdata_t), GFP_KERNEL);
if (rga_drvdata == NULL) {
@@ -1590,6 +1591,21 @@ static int __init rga_init(void)
goto err_unbind_iommu;
}
/* init cmd reg buffer pool */
for (i = 0; i < rga_drvdata->num_of_scheduler; i++) {
struct rga_scheduler_t *scheduler = rga_drvdata->scheduler[i];
scheduler->cmd_buf_pool =
rga_dma_buf_pool_init(scheduler,
scheduler->data->cmd_reg_size * sizeof(uint32_t));
if (IS_ERR(scheduler->cmd_buf_pool)) {
dev_err(scheduler->dev, "failed to init cmd buf pool\n");
ret = PTR_ERR(scheduler->cmd_buf_pool);
scheduler->cmd_buf_pool = NULL;
goto err_destroy_buf_pool;
}
}
rga_init_timer();
rga_mm_init(&rga_drvdata->mm);
@@ -1610,6 +1626,16 @@ static int __init rga_init(void)
return 0;
err_destroy_buf_pool:
for (i = 0; i < rga_drvdata->num_of_scheduler; i++) {
struct rga_scheduler_t *scheduler = rga_drvdata->scheduler[i];
if (scheduler->cmd_buf_pool) {
rga_dma_buf_pool_destroy(scheduler->cmd_buf_pool);
scheduler->cmd_buf_pool = NULL;
}
}
err_unbind_iommu:
rga_iommu_unbind();
@@ -1627,6 +1653,8 @@ err_free_drvdata:
static void __exit rga_exit(void)
{
int i;
#ifdef CONFIG_ROCKCHIP_RGA_DEBUGGER
rga_debugger_remove(&rga_drvdata->debugger);
#endif
@@ -1643,6 +1671,15 @@ static void __exit rga_exit(void)
rga_cancel_timer();
for (i = 0; i < rga_drvdata->num_of_scheduler; i++) {
struct rga_scheduler_t *scheduler = rga_drvdata->scheduler[i];
if (scheduler->cmd_buf_pool) {
rga_dma_buf_pool_destroy(scheduler->cmd_buf_pool);
scheduler->cmd_buf_pool = NULL;
}
}
rga_iommu_unbind();
platform_driver_unregister(&rga3_driver);

View File

@@ -589,6 +589,8 @@ const struct rga_hw_data rga3_data = {
.csc_y2r_mode = RGA_MODE_CSC_BT601L | RGA_MODE_CSC_BT601F |
RGA_MODE_CSC_BT709 | RGA_MODE_CSC_BT2020,
.mmu = RGA_IOMMU,
.cmd_reg_size = 48, //0x100:0x1bc
};
const struct rga_hw_data rga2e_data = {
@@ -613,6 +615,8 @@ const struct rga_hw_data rga2e_data = {
.csc_y2r_mode = RGA_MODE_CSC_BT601L | RGA_MODE_CSC_BT601F |
RGA_MODE_CSC_BT709,
.mmu = RGA_MMU,
.cmd_reg_size = 32, //0x100:0x17c
};
const struct rga_hw_data rga2e_1106_data = {
@@ -639,6 +643,8 @@ const struct rga_hw_data rga2e_1106_data = {
.csc_y2r_mode = RGA_MODE_CSC_BT601L | RGA_MODE_CSC_BT601F |
RGA_MODE_CSC_BT709,
.mmu = RGA_NONE_MMU,
.cmd_reg_size = 32, //0x100:0x17c
};
const struct rga_hw_data rga2e_3506_data = {
@@ -663,6 +669,8 @@ const struct rga_hw_data rga2e_3506_data = {
.csc_y2r_mode = RGA_MODE_CSC_BT601L | RGA_MODE_CSC_BT601F |
RGA_MODE_CSC_BT709,
.mmu = RGA_NONE_MMU,
.cmd_reg_size = 32, //0x100:0x17c
};
const struct rga_hw_data rga2e_iommu_data = {
@@ -689,6 +697,8 @@ const struct rga_hw_data rga2e_iommu_data = {
.csc_y2r_mode = RGA_MODE_CSC_BT601L | RGA_MODE_CSC_BT601F |
RGA_MODE_CSC_BT709,
.mmu = RGA_IOMMU,
.cmd_reg_size = 32, //0x100:0x17c
};
const struct rga_hw_data rga2p_iommu_data = {
@@ -715,6 +725,8 @@ const struct rga_hw_data rga2p_iommu_data = {
.csc_y2r_mode = RGA_MODE_CSC_BT601L | RGA_MODE_CSC_BT601F |
RGA_MODE_CSC_BT709,
.mmu = RGA_IOMMU,
.cmd_reg_size = 32, //0x100:0x17c
};
const struct rga_hw_data rga2p_lite_1103b_data = {
@@ -739,6 +751,8 @@ const struct rga_hw_data rga2p_lite_1103b_data = {
.csc_y2r_mode = RGA_MODE_CSC_BT601L | RGA_MODE_CSC_BT601F |
RGA_MODE_CSC_BT709,
.mmu = RGA_NONE_MMU,
.cmd_reg_size = 32, //0x100:0x17c
};
const struct rga_hw_data rga2p_iommu_non_fbc_data = {
@@ -765,4 +779,6 @@ const struct rga_hw_data rga2p_iommu_non_fbc_data = {
.csc_y2r_mode = RGA_MODE_CSC_BT601L | RGA_MODE_CSC_BT601F |
RGA_MODE_CSC_BT709,
.mmu = RGA_IOMMU,
.cmd_reg_size = 32, //0x100:0x17c
};

View File

@@ -16,7 +16,7 @@
static void rga_job_free(struct rga_job *job)
{
if (job->cmd_buf)
rga_dma_free(job->cmd_buf);
rga_dma_buf_pool_free(job->scheduler->cmd_buf_pool, job->cmd_buf);
kfree(job);
}
@@ -425,9 +425,10 @@ int rga_job_commit(struct rga_req *rga_command_base, struct rga_request *request
goto err_free_job;
}
job->cmd_buf = rga_dma_alloc_coherent(scheduler, RGA_CMD_REG_SIZE);
job->cmd_buf = rga_dma_buf_pool_alloc(scheduler->cmd_buf_pool);
if (job->cmd_buf == NULL) {
rga_job_err(job, "failed to alloc command buffer.\n");
job->ret = -ENOMEM;
goto err_free_job;
}
@@ -467,7 +468,7 @@ err_power_disable:
rga_power_disable(scheduler);
err_free_cmd_buf:
rga_dma_free(job->cmd_buf);
rga_dma_buf_pool_free(scheduler->cmd_buf_pool, job->cmd_buf);
job->cmd_buf = NULL;
err_free_job: