From 953f382ba3c75fe2d23c9ec307324edaed3d2c9c Mon Sep 17 00:00:00 2001 From: Andy Yan Date: Fri, 27 Sep 2024 10:42:49 +0800 Subject: [PATCH] drm/rockchip: Report drm error event to userspace Userspace can do a error recovery by disable/enable all the display pipeline when get such error event like IOMMU_FAULT. Change-Id: I4fb5bc7f6f1c04eb3559462ef5ffee5960507d2f Signed-off-by: Andy Yan --- drivers/gpu/drm/rockchip/rockchip_drm_drv.c | 150 ++++++++++++++++++- drivers/gpu/drm/rockchip/rockchip_drm_drv.h | 17 +++ drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 30 ++++ include/uapi/drm/rockchip_drm.h | 6 + 4 files changed, 202 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c index 6c3f083d4b8e..30772aa63c16 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c @@ -18,7 +18,9 @@ #include #include #include +#include #include +#include #include #include @@ -1370,6 +1372,28 @@ void rockchip_unregister_crtc_funcs(struct drm_crtc *crtc) priv->crtc_funcs[pipe] = NULL; } +/* + * a high frequency of page faults will follow up, if + * there is a iommu fault, so it's better to limit the + * registers dump frequency to save log buffer + * + * Report no more than once every 10s, give userspace time + * to do recovery process, as for a serdes based display + * pipeline, the disable/enable time may very long. + */ +static DEFINE_RATELIMIT_STATE(fault_handler_rate, 10 * HZ, 1); + +static int fault_handler_rate_limit(void) +{ + return __ratelimit(&fault_handler_rate); +} + +void rockchip_drm_reset_iommu_fault_handler_rate_limit(void) +{ + fault_handler_rate.begin = 0; + fault_handler_rate.printed = 0; +} + static int rockchip_drm_fault_handler(struct iommu_domain *iommu, struct device *dev, unsigned long iova, int flags, void *arg) @@ -1377,11 +1401,27 @@ static int rockchip_drm_fault_handler(struct iommu_domain *iommu, struct drm_device *drm_dev = arg; struct rockchip_drm_private *priv = drm_dev->dev_private; struct drm_crtc *crtc; + bool handled = false; + + DRM_ERROR("iommu fault handler flags: 0x%x: count: %lld\n", + flags, ++priv->iommu_fault_count); + + if (!fault_handler_rate_limit()) + return 0; - DRM_ERROR("iommu fault handler flags: 0x%x\n", flags); drm_for_each_crtc(crtc, drm_dev) { int pipe = drm_crtc_index(crtc); + /* + * Only need to call iommu fault handler once for one iommu fault + */ + if (priv->crtc_funcs[pipe] && + priv->crtc_funcs[pipe]->iommu_fault_handler && + !handled) { + priv->crtc_funcs[pipe]->iommu_fault_handler(crtc, iommu); + handled = true; + } + if (priv->crtc_funcs[pipe] && priv->crtc_funcs[pipe]->regs_dump) priv->crtc_funcs[pipe]->regs_dump(crtc, NULL); @@ -1784,6 +1824,111 @@ static void rockchip_drm_sysfs_fini(struct drm_device *drm_dev) } } +void rockchip_drm_send_error_event(struct rockchip_drm_private *priv, + enum rockchip_drm_error_event_type event) +{ + struct rockchip_drm_error_event *error_event = &priv->error_event; + struct drm_event_vblank *e; + struct timespec64 tv; + unsigned long flags; + + /* + * Maybe the error thread has not be created. + */ + if (IS_ERR_OR_NULL(priv->error_event.thread)) + return; + + spin_lock_irqsave(&error_event->lock, flags); + tv = ktime_to_timespec64(ktime_get()); + e = &error_event->event; + e->base.type = event; + e->base.length = sizeof(*e); + e->tv_sec = tv.tv_sec; + e->tv_usec = tv.tv_nsec / 1000; + e->sequence++; + error_event->error_state = true; + spin_unlock_irqrestore(&error_event->lock, flags); + + wake_up_interruptible_all(&error_event->wait); +} + +static int rockchip_drm_error_event_thread(void *data) +{ + struct drm_device *drm_dev = data; + struct rockchip_drm_private *priv = drm_dev->dev_private; + struct rockchip_drm_error_event *error_event = &priv->error_event; + struct drm_event_vblank *e; + int ret = 0; + int cnt = 0; + + while (!kthread_should_stop()) { + e = &error_event->event; + + error_event->error_state = false; + ret = wait_event_interruptible(error_event->wait, error_event->error_state); + if (!ret) { + sysfs_notify(&drm_dev->dev->kobj, NULL, "error_event"); + drm_info(drm_dev, "rockchipdrm send_error_event_type: 0x%x, count:%d\n", + e->base.type, ++cnt); + } + } + + return 0; +} + +static ssize_t rockchip_drm_error_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *drm_dev = dev_get_drvdata(dev); + struct rockchip_drm_private *priv = drm_dev->dev_private; + struct rockchip_drm_error_event *error_event = &priv->error_event; + struct drm_event_vblank *e; + uint32_t length = sizeof(*e); + unsigned long flags; + + spin_lock_irqsave(&error_event->lock, flags); + e = &error_event->event; + memcpy(buf, e, length); + spin_unlock_irqrestore(&error_event->lock, flags); + + return length; +} +static DEVICE_ATTR(error_event, 0444, rockchip_drm_error_event_show, NULL); + +static void rockchip_drm_error_event_init(struct drm_device *drm_dev) +{ + struct rockchip_drm_private *priv = drm_dev->dev_private; + struct sched_param sched_param = { .sched_priority = MAX_RT_PRIO - 1 }; + int ret; + + ret = device_create_file(drm_dev->dev, &dev_attr_error_event); + if (ret) { + dev_warn(drm_dev->dev, "failed to create vcnt event file\n"); + return; + } + + init_waitqueue_head(&priv->error_event.wait); + spin_lock_init(&priv->error_event.lock); + priv->error_event.thread = kthread_run(rockchip_drm_error_event_thread, + drm_dev, "display-error-event-thread"); + if (IS_ERR(priv->error_event.thread)) { + priv->error_event.thread = NULL; + drm_err(drm_dev, "failed to run display error_event thread\n"); + } else { + sched_setscheduler(priv->error_event.thread, SCHED_FIFO, &sched_param); + drm_info(drm_dev, "run display error_event monitor\n"); + } +} + +static void rockchip_drm_error_event_fini(struct drm_device *drm_dev) +{ + struct rockchip_drm_private *priv = drm_dev->dev_private; + + if (priv->error_event.thread) + kthread_stop(priv->error_event.thread); + device_remove_file(drm_dev->dev, &dev_attr_error_event); +} + static int rockchip_drm_bind(struct device *dev) { struct drm_device *drm_dev; @@ -1891,6 +2036,8 @@ static int rockchip_drm_bind(struct device *dev) if (ret) goto err_drm_fbdev_fini; + rockchip_drm_error_event_init(drm_dev); + return 0; err_drm_fbdev_fini: rockchip_drm_fbdev_fini(drm_dev); @@ -1915,6 +2062,7 @@ static void rockchip_drm_unbind(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); + rockchip_drm_error_event_fini(drm_dev); rockchip_drm_sysfs_fini(drm_dev); rockchip_drm_fbdev_fini(drm_dev); drm_dev_unregister(drm_dev); diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h index 1a7837c8fe6f..021901edb6f3 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h @@ -349,6 +349,15 @@ struct rockchip_drm_vcnt { int pipe; }; +struct rockchip_drm_error_event { + wait_queue_head_t wait; + struct task_struct *thread; + struct list_head event_list; + struct drm_event_vblank event; + bool error_state; + spinlock_t lock; +}; + struct rockchip_logo { dma_addr_t dma_addr; struct drm_mm_node logo_reserved_node; @@ -511,6 +520,7 @@ struct rockchip_crtc_funcs { int (*crtc_set_color_bar)(struct drm_crtc *crtc, enum rockchip_color_bar_mode mode); int (*set_aclk)(struct drm_crtc *crtc, enum rockchip_drm_vop_aclk_mode aclk_mode, struct dmcfreq_vop_info *vop_bw_info); int (*get_crc)(struct drm_crtc *crtc); + void (*iommu_fault_handler)(struct drm_crtc *crtc, struct iommu_domain *iommu); }; struct rockchip_dclk_pll { @@ -558,6 +568,8 @@ struct rockchip_drm_private { const struct rockchip_crtc_funcs *crtc_funcs[ROCKCHIP_MAX_CRTC]; + uint64_t iommu_fault_count; + struct rockchip_dclk_pll default_pll; struct rockchip_dclk_pll hdmi_pll; @@ -568,6 +580,8 @@ struct rockchip_drm_private { struct mutex ovl_lock; struct rockchip_drm_vcnt vcnt[ROCKCHIP_MAX_CRTC]; + struct rockchip_drm_error_event error_event; + /** * @loader_protect * ignore restore_fbdev_mode_atomic when in logo on state @@ -647,6 +661,9 @@ int rockchip_drm_dclk_set_rate(u32 version, struct clk *dclk, unsigned long rate bool rockchip_drm_is_afbc(struct drm_plane *plane, u64 modifier); bool rockchip_drm_is_rfbc(struct drm_plane *plane, u64 modifier); const char *rockchip_drm_modifier_to_string(uint64_t modifier); +void rockchip_drm_reset_iommu_fault_handler_rate_limit(void); +void rockchip_drm_send_error_event(struct rockchip_drm_private *priv, + enum rockchip_drm_error_event_type event); __printf(3, 4) void rockchip_drm_dbg(const struct device *dev, diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c index f049d6ea2e1d..8dd65acb0881 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c @@ -882,6 +882,11 @@ struct vop2 { */ bool skip_ref_fb; + /* + * report iommu fault event to userspace + */ + bool report_iommu_fault; + bool loader_protect; bool aclk_rate_reset; @@ -962,6 +967,7 @@ struct vop2 { unsigned long aclk_target_freq; u32 aclk_mode_rate[ROCKCHIP_VOP_ACLK_MAX_MODE]; #endif + bool iommu_fault_in_progress; /* aclk auto cs div */ u32 csu_div; @@ -4605,6 +4611,13 @@ static void vop2_disable(struct drm_crtc *crtc) vop2_set_aclk_rate(crtc, ROCKCHIP_VOP_ACLK_RESET_MODE, NULL); rockchip_drm_dma_detach_device(vop2->drm_dev, vop2->dev); vop2->is_iommu_enabled = false; + vop2->iommu_fault_in_progress = false; + /* + * Reset fault handler rate limit state, so that we can + * immediately report the error event again if an error occurs + * shortly after the recovery(Disable then enable) process done. + */ + rockchip_drm_reset_iommu_fault_handler_rate_limit(); } if (vop2->version == VOP_VERSION_RK3588 || vop2->version == VOP_VERSION_RK3576) vop2_power_off_all_pd(vop2); @@ -7958,6 +7971,21 @@ static int vop2_crtc_get_crc(struct drm_crtc *crtc) return 0; } +static void vop2_iommu_fault_handler(struct drm_crtc *crtc, struct iommu_domain *iommu) +{ + struct vop2_video_port *vp = to_vop2_video_port(crtc); + struct vop2 *vop2 = vp->vop2; + struct drm_device *drm_dev = vop2->drm_dev; + struct rockchip_drm_private *private = drm_dev->dev_private; + + if (!vop2->report_iommu_fault) + return; + + vop2->iommu_fault_in_progress = true; + + rockchip_drm_send_error_event(private, ROCKCHIP_DRM_ERROR_EVENT_IOMMU_FAULT); +} + static const struct rockchip_crtc_funcs private_crtc_funcs = { .loader_protect = vop2_crtc_loader_protect, .cancel_pending_vblank = vop2_crtc_cancel_pending_vblank, @@ -7979,6 +8007,7 @@ static const struct rockchip_crtc_funcs private_crtc_funcs = { .crtc_set_color_bar = vop2_crtc_set_color_bar, .set_aclk = vop2_set_aclk_rate, .get_crc = vop2_crtc_get_crc, + .iommu_fault_handler = vop2_iommu_fault_handler, }; static bool vop2_crtc_mode_fixup(struct drm_crtc *crtc, @@ -14344,6 +14373,7 @@ static int vop2_bind(struct device *dev, struct device *master, void *data) vop2->disable_afbc_win = of_property_read_bool(dev->of_node, "disable-afbc-win"); vop2->disable_win_move = of_property_read_bool(dev->of_node, "disable-win-move"); vop2->skip_ref_fb = of_property_read_bool(dev->of_node, "skip-ref-fb"); + vop2->report_iommu_fault = of_property_read_bool(dev->of_node, "rockchip,report-iommu-fault"); if (!is_vop3(vop2) || vop2->version == VOP_VERSION_RK3528 || vop2->version == VOP_VERSION_RK3562) vop2->merge_irq = true; diff --git a/include/uapi/drm/rockchip_drm.h b/include/uapi/drm/rockchip_drm.h index 2ac71b43dba0..f9bf16d5f403 100644 --- a/include/uapi/drm/rockchip_drm.h +++ b/include/uapi/drm/rockchip_drm.h @@ -111,6 +111,12 @@ enum rockchip_cabc_mode { ROCKCHIP_DRM_CABC_MODE_USERSPACE, }; +enum rockchip_drm_error_event_type { + ROCKCHIP_DRM_ERROR_EVENT_IOMMU_FAULT = (1 << 0), + ROCKCHIP_DRM_ERROR_EVENT_POST_BUF_EMPTY = (1 << 1), + ROCKCHIP_DRM_ERROR_EVENT_REQUEST_RESET = (1 << 2), +}; + #define DRM_ROCKCHIP_GEM_CREATE 0x00 #define DRM_ROCKCHIP_GEM_MAP_OFFSET 0x01 #define DRM_ROCKCHIP_GEM_CPU_ACQUIRE 0x02