diff --git a/drivers/media/platform/rk3288-vpu/rk3288_vpu.c b/drivers/media/platform/rk3288-vpu/rk3288_vpu.c index 17728371896b..dc92f2b9471e 100644 --- a/drivers/media/platform/rk3288-vpu/rk3288_vpu.c +++ b/drivers/media/platform/rk3288-vpu/rk3288_vpu.c @@ -93,6 +93,17 @@ static void __rk3288_vpu_dequeue_run_locked(struct rk3288_vpu_ctx *ctx) ctx->run.dst = dst; } +static struct rk3288_vpu_ctx * +rk3288_vpu_encode_after_decode_war(struct rk3288_vpu_ctx *ctx) +{ + struct rk3288_vpu_dev *dev = ctx->dev; + + if (dev->was_decoding && rk3288_vpu_ctx_is_encoder(ctx)) + return dev->dummy_encode_ctx; + + return ctx; +} + static void rk3288_vpu_try_run(struct rk3288_vpu_dev *dev) { struct rk3288_vpu_ctx *ctx = NULL; @@ -115,10 +126,24 @@ static void rk3288_vpu_try_run(struct rk3288_vpu_dev *dev) goto out; ctx = list_entry(dev->ready_ctxs.next, struct rk3288_vpu_ctx, list); - list_del_init(&ctx->list); + + /* + * WAR for corrupted hardware state when encoding directly after + * certain decoding runs. + * + * If previous context was decoding and currently picked one is + * encoding then we need to execute a dummy encode with proper + * settings to reinitialize certain internal hardware state. + */ + ctx = rk3288_vpu_encode_after_decode_war(ctx); + + if (!rk3288_vpu_ctx_is_dummy_encode(ctx)) { + list_del_init(&ctx->list); + __rk3288_vpu_dequeue_run_locked(ctx); + } dev->current_ctx = ctx; - __rk3288_vpu_dequeue_run_locked(ctx); + dev->was_decoding = !rk3288_vpu_ctx_is_encoder(ctx); out: spin_unlock_irqrestore(&dev->irqlock, flags); @@ -145,8 +170,6 @@ static void __rk3288_vpu_try_context_locked(struct rk3288_vpu_dev *dev, void rk3288_vpu_run_done(struct rk3288_vpu_ctx *ctx, enum vb2_buffer_state result) { - struct vb2_buffer *src = &ctx->run.src->b; - struct vb2_buffer *dst = &ctx->run.dst->b; struct rk3288_vpu_dev *dev = ctx->dev; unsigned long flags; @@ -155,9 +178,14 @@ void rk3288_vpu_run_done(struct rk3288_vpu_ctx *ctx, if (ctx->run_ops->run_done) ctx->run_ops->run_done(ctx, result); - dst->v4l2_buf.timestamp = src->v4l2_buf.timestamp; - vb2_buffer_done(&ctx->run.src->b, result); - vb2_buffer_done(&ctx->run.dst->b, result); + if (!rk3288_vpu_ctx_is_dummy_encode(ctx)) { + struct vb2_buffer *src = &ctx->run.src->b; + struct vb2_buffer *dst = &ctx->run.dst->b; + + dst->v4l2_buf.timestamp = src->v4l2_buf.timestamp; + vb2_buffer_done(&ctx->run.src->b, result); + vb2_buffer_done(&ctx->run.dst->b, result); + } dev->current_ctx = NULL; wake_up_all(&dev->run_wq); @@ -603,6 +631,12 @@ static int rk3288_vpu_probe(struct platform_device *pdev) platform_set_drvdata(pdev, vpu); + ret = rk3288_vpu_enc_init_dummy_ctx(vpu); + if (ret) { + dev_err(&pdev->dev, "Failed to create dummy encode context\n"); + goto err_dummy_enc; + } + /* encoder */ vfd = video_device_alloc(); if (!vfd) { @@ -674,6 +708,8 @@ err_dec_alloc: err_enc_reg: video_device_release(vpu->vfd_enc); err_enc_alloc: + rk3288_vpu_enc_free_dummy_ctx(vpu); +err_dummy_enc: v4l2_device_unregister(&vpu->v4l2_dev); err_v4l2_dev_reg: vb2_dma_contig_cleanup_ctx(vpu->alloc_ctx_vm); @@ -704,6 +740,7 @@ static int rk3288_vpu_remove(struct platform_device *pdev) video_unregister_device(vpu->vfd_dec); video_unregister_device(vpu->vfd_enc); + rk3288_vpu_enc_free_dummy_ctx(vpu); v4l2_device_unregister(&vpu->v4l2_dev); vb2_dma_contig_cleanup_ctx(vpu->alloc_ctx_vm); vb2_dma_contig_cleanup_ctx(vpu->alloc_ctx); diff --git a/drivers/media/platform/rk3288-vpu/rk3288_vpu_common.h b/drivers/media/platform/rk3288-vpu/rk3288_vpu_common.h index 155693cb79e4..9ac44e82c42f 100644 --- a/drivers/media/platform/rk3288-vpu/rk3288_vpu_common.h +++ b/drivers/media/platform/rk3288-vpu/rk3288_vpu_common.h @@ -156,6 +156,11 @@ enum rk3288_vpu_state { * @current_ctx: Context being currently processed by hardware. * @run_wq: Wait queue to wait for run completion. * @watchdog_work: Delayed work for hardware timeout handling. + * @dummy_encode_ctx: Context used to run dummy frame encoding to initialize + * encoder hardware state. + * @dummy_encode_src: Source buffers used for dummy frame encoding. + * @dummy_encode_dst: Desintation buffer used for dummy frame encoding. + * @was_decoding: Indicates whether last run context was a decoder. */ struct rk3288_vpu_dev { struct v4l2_device v4l2_dev; @@ -180,6 +185,10 @@ struct rk3288_vpu_dev { struct rk3288_vpu_ctx *current_ctx; wait_queue_head_t run_wq; struct delayed_work watchdog_work; + struct rk3288_vpu_ctx *dummy_encode_ctx; + struct rk3288_vpu_aux_buf dummy_encode_src[VIDEO_MAX_PLANES]; + struct rk3288_vpu_aux_buf dummy_encode_dst; + bool was_decoding; }; /** @@ -439,6 +448,13 @@ static inline bool rk3288_vpu_ctx_is_encoder(struct rk3288_vpu_ctx *ctx) return ctx->vpu_dst_fmt->codec_mode != RK_VPU_CODEC_NONE; } +static inline bool rk3288_vpu_ctx_is_dummy_encode(struct rk3288_vpu_ctx *ctx) +{ + struct rk3288_vpu_dev *dev = ctx->dev; + + return ctx == dev->dummy_encode_ctx; +} + int rk3288_vpu_ctrls_setup(struct rk3288_vpu_ctx *ctx, const struct v4l2_ctrl_ops *ctrl_ops, struct rk3288_vpu_control *controls, diff --git a/drivers/media/platform/rk3288-vpu/rk3288_vpu_enc.c b/drivers/media/platform/rk3288-vpu/rk3288_vpu_enc.c index 9a0a6ac9e5fc..1f6e9225a2e8 100644 --- a/drivers/media/platform/rk3288-vpu/rk3288_vpu_enc.c +++ b/drivers/media/platform/rk3288-vpu/rk3288_vpu_enc.c @@ -1391,4 +1391,127 @@ void rk3288_vpu_enc_exit(struct rk3288_vpu_ctx *ctx) rk3288_vpu_aux_buf_free(vpu, &ctx->run.priv_dst); rk3288_vpu_aux_buf_free(vpu, &ctx->run.priv_src); +}; + +/* + * WAR for encoder state corruption after decoding + */ + +static const struct rk3288_vpu_run_ops dummy_encode_run_ops = { + /* No ops needed for dummy encoding. */ +}; + +#define DUMMY_W 64 +#define DUMMY_H 64 +#define DUMMY_SRC_FMT V4L2_PIX_FMT_YUYV +#define DUMMY_DST_FMT V4L2_PIX_FMT_VP8 +#define DUMMY_DST_SIZE (32 * 1024) + +int rk3288_vpu_enc_init_dummy_ctx(struct rk3288_vpu_dev *dev) +{ + struct rk3288_vpu_ctx *ctx; + int ret; + int i; + + ctx = devm_kzalloc(dev->dev, sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->dev = dev; + + ctx->vpu_src_fmt = find_format(DUMMY_SRC_FMT, false); + ctx->src_fmt.width = DUMMY_W; + ctx->src_fmt.height = DUMMY_H; + ctx->src_fmt.pixelformat = ctx->vpu_src_fmt->fourcc; + ctx->src_fmt.num_planes = ctx->vpu_src_fmt->num_planes; + + calculate_plane_sizes(ctx->vpu_src_fmt, ctx->src_fmt.width, + ctx->src_fmt.height, &ctx->src_fmt); + + ctx->vpu_dst_fmt = find_format(DUMMY_DST_FMT, true); + ctx->dst_fmt.width = ctx->src_fmt.width; + ctx->dst_fmt.height = ctx->src_fmt.height; + ctx->dst_fmt.pixelformat = ctx->vpu_dst_fmt->fourcc; + ctx->dst_fmt.plane_fmt[0].sizeimage = DUMMY_DST_SIZE; + ctx->dst_fmt.plane_fmt[0].bytesperline = 0; + ctx->dst_fmt.num_planes = 1; + + INIT_LIST_HEAD(&ctx->src_queue); + + ctx->src_crop.left = 0; + ctx->src_crop.top = 0; + ctx->src_crop.width = ctx->src_fmt.width; + ctx->src_crop.left = ctx->src_fmt.height; + + INIT_LIST_HEAD(&ctx->dst_queue); + INIT_LIST_HEAD(&ctx->list); + + ctx->run.vp8e.reg_params = rk3288_vpu_vp8e_get_dummy_params(); + ctx->run_ops = &dummy_encode_run_ops; + + ctx->run.dst = devm_kzalloc(dev->dev, sizeof(*ctx->run.dst), + GFP_KERNEL); + if (!ctx->run.dst) + return -ENOMEM; + + ret = rk3288_vpu_aux_buf_alloc(dev, &ctx->run.priv_src, + RK3288_HW_PARAMS_SIZE); + if (ret) + return ret; + + ret = rk3288_vpu_aux_buf_alloc(dev, &ctx->run.priv_dst, + RK3288_RET_PARAMS_SIZE); + if (ret) + goto err_free_priv_src; + + for (i = 0; i < ctx->src_fmt.num_planes; ++i) { + ret = rk3288_vpu_aux_buf_alloc(dev, &dev->dummy_encode_src[i], + ctx->src_fmt.plane_fmt[i].sizeimage); + if (ret) + goto err_free_src; + + memset(dev->dummy_encode_src[i].cpu, 0, + dev->dummy_encode_src[i].size); + } + + ret = rk3288_vpu_aux_buf_alloc(dev, &dev->dummy_encode_dst, + ctx->dst_fmt.plane_fmt[0].sizeimage); + if (ret) + goto err_free_src; + + memset(dev->dummy_encode_dst.cpu, 0, dev->dummy_encode_dst.size); + + ret = rk3288_vpu_init(ctx); + if (ret) + goto err_free_dst; + + dev->dummy_encode_ctx = ctx; + + return 0; + +err_free_dst: + rk3288_vpu_aux_buf_free(dev, &dev->dummy_encode_dst); +err_free_src: + for (i = 0; i < ctx->src_fmt.num_planes; ++i) + if (dev->dummy_encode_src[i].cpu) + rk3288_vpu_aux_buf_free(dev, &dev->dummy_encode_src[i]); + rk3288_vpu_aux_buf_free(dev, &ctx->run.priv_dst); +err_free_priv_src: + rk3288_vpu_aux_buf_free(dev, &ctx->run.priv_src); + + return ret; +} + +void rk3288_vpu_enc_free_dummy_ctx(struct rk3288_vpu_dev *dev) +{ + struct rk3288_vpu_ctx *ctx = dev->dummy_encode_ctx; + int i; + + rk3288_vpu_deinit(ctx); + + for (i = 0; i < ctx->src_fmt.num_planes; ++i) + rk3288_vpu_aux_buf_free(dev, &dev->dummy_encode_src[i]); + rk3288_vpu_aux_buf_free(dev, &dev->dummy_encode_dst); + rk3288_vpu_aux_buf_free(dev, &ctx->run.priv_src); + rk3288_vpu_aux_buf_free(dev, &ctx->run.priv_dst); } diff --git a/drivers/media/platform/rk3288-vpu/rk3288_vpu_enc.h b/drivers/media/platform/rk3288-vpu/rk3288_vpu_enc.h index 80b71c2a5979..4b1979d5d2ef 100644 --- a/drivers/media/platform/rk3288-vpu/rk3288_vpu_enc.h +++ b/drivers/media/platform/rk3288-vpu/rk3288_vpu_enc.h @@ -30,5 +30,7 @@ const struct v4l2_ioctl_ops *get_enc_v4l2_ioctl_ops(void); struct rk3288_vpu_fmt *get_enc_def_fmt(bool src); int rk3288_vpu_enc_init(struct rk3288_vpu_ctx *ctx); void rk3288_vpu_enc_exit(struct rk3288_vpu_ctx *ctx); +int rk3288_vpu_enc_init_dummy_ctx(struct rk3288_vpu_dev *dev); +void rk3288_vpu_enc_free_dummy_ctx(struct rk3288_vpu_dev *dev); #endif /* RK3288_VPU_ENC_H_ */ diff --git a/drivers/media/platform/rk3288-vpu/rk3288_vpu_hw.h b/drivers/media/platform/rk3288-vpu/rk3288_vpu_hw.h index ee5a1466bb05..f8325536295b 100644 --- a/drivers/media/platform/rk3288-vpu/rk3288_vpu_hw.h +++ b/drivers/media/platform/rk3288-vpu/rk3288_vpu_hw.h @@ -171,6 +171,7 @@ void rk3288_vpu_vp8e_exit(struct rk3288_vpu_ctx *ctx); void rk3288_vpu_vp8e_run(struct rk3288_vpu_ctx *ctx); void rk3288_vpu_vp8e_done(struct rk3288_vpu_ctx *ctx, enum vb2_buffer_state result); +const struct rk3288_vp8e_reg_params *rk3288_vpu_vp8e_get_dummy_params(void); void rk3288_vpu_vp8e_assemble_bitstream(struct rk3288_vpu_ctx *ctx, struct rk3288_vpu_buf *dst_buf); diff --git a/drivers/media/platform/rk3288-vpu/rk3288_vpu_hw_vp8e.c b/drivers/media/platform/rk3288-vpu/rk3288_vpu_hw_vp8e.c index 25684d32c233..ce02712dc9fc 100644 --- a/drivers/media/platform/rk3288-vpu/rk3288_vpu_hw_vp8e.c +++ b/drivers/media/platform/rk3288-vpu/rk3288_vpu_hw_vp8e.c @@ -201,8 +201,13 @@ static void rk3288_vpu_vp8e_set_buffers(struct rk3288_vpu_dev *vpu, rec_buf_dma += rounded_size * 3 / 2; ctx->hw.vp8e.ref_rec_ptr ^= 1; - dst_dma = vb2_dma_contig_plane_dma_addr(&ctx->run.dst->b, 0); - dst_size = vb2_plane_size(&ctx->run.dst->b, 0); + if (rk3288_vpu_ctx_is_dummy_encode(ctx)) { + dst_dma = vpu->dummy_encode_dst.dma; + dst_size = vpu->dummy_encode_dst.size; + } else { + dst_dma = vb2_dma_contig_plane_dma_addr(&ctx->run.dst->b, 0); + dst_size = vb2_plane_size(&ctx->run.dst->b, 0); + } /* * stream addr-->| @@ -266,12 +271,24 @@ static void rk3288_vpu_vp8e_set_buffers(struct rk3288_vpu_dev *vpu, VEPU_REG_ADDR_REC_CHROMA); /* Source buffer. */ - vepu_write_relaxed(vpu, vb2_dma_contig_plane_dma_addr(&ctx->run.src->b, - PLANE_Y), VEPU_REG_ADDR_IN_LUMA); - vepu_write_relaxed(vpu, vb2_dma_contig_plane_dma_addr(&ctx->run.src->b, - PLANE_CB), VEPU_REG_ADDR_IN_CB); - vepu_write_relaxed(vpu, vb2_dma_contig_plane_dma_addr(&ctx->run.src->b, - PLANE_CR), VEPU_REG_ADDR_IN_CR); + if (rk3288_vpu_ctx_is_dummy_encode(ctx)) { + vepu_write_relaxed(vpu, vpu->dummy_encode_src[PLANE_Y].dma, + VEPU_REG_ADDR_IN_LUMA); + vepu_write_relaxed(vpu, vpu->dummy_encode_src[PLANE_CB].dma, + VEPU_REG_ADDR_IN_CB); + vepu_write_relaxed(vpu, vpu->dummy_encode_src[PLANE_CR].dma, + VEPU_REG_ADDR_IN_CR); + } else { + vepu_write_relaxed(vpu, vb2_dma_contig_plane_dma_addr( + &ctx->run.src->b, PLANE_Y), + VEPU_REG_ADDR_IN_LUMA); + vepu_write_relaxed(vpu, vb2_dma_contig_plane_dma_addr( + &ctx->run.src->b, PLANE_CB), + VEPU_REG_ADDR_IN_CB); + vepu_write_relaxed(vpu, vb2_dma_contig_plane_dma_addr( + &ctx->run.src->b, PLANE_CR), + VEPU_REG_ADDR_IN_CR); + } /* Source parameters. */ vepu_write_relaxed(vpu, enc_in_img_ctrl(ctx), VEPU_REG_IN_IMG_CTRL); @@ -408,3 +425,110 @@ void rk3288_vpu_vp8e_done(struct rk3288_vpu_ctx *ctx, rk3288_vpu_run_done(ctx, result); } + +/* + * WAR for encoder state corruption after decoding + */ + +static const struct rk3288_vp8e_reg_params dummy_encode_reg_params = { + /* 00000014 */ .hdr_len = 0x00000000, + /* 00000038 */ .enc_ctrl = VEPU_REG_ENC_CTRL_KEYFRAME_BIT, + /* 00000040 */ .enc_ctrl0 = 0x00000000, + /* 00000044 */ .enc_ctrl1 = 0x00000000, + /* 00000048 */ .enc_ctrl2 = 0x00040014, + /* 0000004c */ .enc_ctrl3 = 0x404083c0, + /* 00000050 */ .enc_ctrl5 = 0x01006bff, + /* 00000054 */ .enc_ctrl4 = 0x00000039, + /* 00000058 */ .str_hdr_rem_msb = 0x85848805, + /* 0000005c */ .str_hdr_rem_lsb = 0x02000000, + /* 00000064 */ .mad_ctrl = 0x00000000, + /* 0000006c */ .qp_val = { + /* 0000006c */ 0x020213b1, + /* 00000070 */ 0x02825249, + /* 00000074 */ 0x048409d8, + /* 00000078 */ 0x03834c30, + /* 0000007c */ 0x020213b1, + /* 00000080 */ 0x02825249, + /* 00000084 */ 0x00340e0d, + /* 00000088 */ 0x401c1a15, + }, + /* 0000008c */ .bool_enc = 0x00018140, + /* 00000090 */ .vp8_ctrl0 = 0x000695c0, + /* 00000094 */ .rlc_ctrl = 0x14000000, + /* 00000098 */ .mb_ctrl = 0x00000000, + /* 000000d4 */ .rgb_yuv_coeff = { + /* 000000d4 */ 0x962b4c85, + /* 000000d8 */ 0x90901d50, + }, + /* 000000dc */ .rgb_mask_msb = 0x0000b694, + /* 000000e0 */ .intra_area_ctrl = 0xffffffff, + /* 000000e4 */ .cir_intra_ctrl = 0x00000000, + /* 000000f0 */ .first_roi_area = 0xffffffff, + /* 000000f4 */ .second_roi_area = 0xffffffff, + /* 000000f8 */ .mvc_ctrl = 0x01780000, + /* 00000100 */ .intra_penalty = { + /* 00000100 */ 0x00010005, + /* 00000104 */ 0x00015011, + /* 00000108 */ 0x0000c005, + /* 0000010c */ 0x00016010, + /* 00000110 */ 0x0001a018, + /* 00000114 */ 0x00018015, + /* 00000118 */ 0x0001d01a, + }, + /* 00000120 */ .seg_qp = { + /* 00000120 */ 0x020213b1, + /* 00000124 */ 0x02825249, + /* 00000128 */ 0x048409d8, + /* 0000012c */ 0x03834c30, + /* 00000130 */ 0x020213b1, + /* 00000134 */ 0x02825249, + /* 00000138 */ 0x00340e0d, + /* 0000013c */ 0x341c1a15, + /* 00000140 */ 0x020213b1, + /* 00000144 */ 0x02825249, + /* 00000148 */ 0x048409d8, + /* 0000014c */ 0x03834c30, + /* 00000150 */ 0x020213b1, + /* 00000154 */ 0x02825249, + /* 00000158 */ 0x00340e0d, + /* 0000015c */ 0x341c1a15, + /* 00000160 */ 0x020213b1, + /* 00000164 */ 0x02825249, + /* 00000168 */ 0x048409d8, + /* 0000016c */ 0x03834c30, + /* 00000170 */ 0x020213b1, + /* 00000174 */ 0x02825249, + /* 00000178 */ 0x00340e0d, + /* 0000017c */ 0x341c1a15, + }, + /* 00000180 */ .dmv_4p_1p_penalty = { + /* 00000180 */ 0x00020406, + /* 00000184 */ 0x080a0c0e, + /* 00000188 */ 0x10121416, + /* 0000018c */ 0x181a1c1e, + /* 00000190 */ 0x20222426, + /* 00000194 */ 0x282a2c2e, + /* 00000198 */ 0x30323436, + /* 0000019c */ 0x383a3c3e, + /* 000001a0 */ 0x40424446, + /* 000001a4 */ 0x484a4c4e, + /* 000001a8 */ 0x50525456, + /* 000001ac */ 0x585a5c5e, + /* 000001b0 */ 0x60626466, + /* 000001b4 */ 0x686a6c6e, + /* 000001b8 */ 0x70727476, + /* NOTE: Further 17 registers set to 0. */ + }, + /* + * NOTE: Following registers all set to 0: + * - dmv_qpel_penalty, + * - vp8_ctrl1, + * - bit_cost_golden, + * - loop_flt_delta. + */ +}; + +const struct rk3288_vp8e_reg_params *rk3288_vpu_vp8e_get_dummy_params(void) +{ + return &dummy_encode_reg_params; +}