mirror of
https://github.com/hardkernel/linux.git
synced 2026-06-06 19:08:57 +09:00
media: venus: Rework error fail recover logic
[ Upstream commit4cba5473c5] The Venus code has a sort of watchdog that attempts to recover from IP errors, implemented as a delayed work job, which calls venus_sys_error_handler(). Right now, it has several issues: 1. It assumes that PM runtime resume never fails 2. It internally runs two while() loops that also assume that PM runtime will never fail to go idle: while (pm_runtime_active(core->dev_dec) || pm_runtime_active(core->dev_enc)) msleep(10); ... while (core->pmdomains[0] && pm_runtime_active(core->pmdomains[0])) usleep_range(1000, 1500); 3. It uses an OR to merge all return codes and then report to the user 4. If the hardware never recovers, it keeps running on every 10ms, flooding the syslog with 2 messages (so, up to 200 messages per second). Rework the code, in order to prevent that, by: 1. check the return code from PM runtime resume; 2. don't let the while() loops run forever; 3. store the failed event; 4. use warn ratelimited when it fails to recover. Fixes:af2c3834c8("[media] media: venus: adding core part and helper functions") Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org> Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
committed by
Greg Kroah-Hartman
parent
08d0aa16a1
commit
e23dc4a3e8
@@ -48,52 +48,86 @@ static const struct hfi_core_ops venus_core_ops = {
|
|||||||
.event_notify = venus_event_notify,
|
.event_notify = venus_event_notify,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define RPM_WAIT_FOR_IDLE_MAX_ATTEMPTS 10
|
||||||
|
|
||||||
static void venus_sys_error_handler(struct work_struct *work)
|
static void venus_sys_error_handler(struct work_struct *work)
|
||||||
{
|
{
|
||||||
struct venus_core *core =
|
struct venus_core *core =
|
||||||
container_of(work, struct venus_core, work.work);
|
container_of(work, struct venus_core, work.work);
|
||||||
int ret = 0;
|
int ret, i, max_attempts = RPM_WAIT_FOR_IDLE_MAX_ATTEMPTS;
|
||||||
|
const char *err_msg = "";
|
||||||
|
bool failed = false;
|
||||||
|
|
||||||
pm_runtime_get_sync(core->dev);
|
ret = pm_runtime_get_sync(core->dev);
|
||||||
|
if (ret < 0) {
|
||||||
|
err_msg = "resume runtime PM";
|
||||||
|
max_attempts = 0;
|
||||||
|
failed = true;
|
||||||
|
}
|
||||||
|
|
||||||
hfi_core_deinit(core, true);
|
hfi_core_deinit(core, true);
|
||||||
|
|
||||||
dev_warn(core->dev, "system error has occurred, starting recovery!\n");
|
|
||||||
|
|
||||||
mutex_lock(&core->lock);
|
mutex_lock(&core->lock);
|
||||||
|
|
||||||
while (pm_runtime_active(core->dev_dec) || pm_runtime_active(core->dev_enc))
|
for (i = 0; i < max_attempts; i++) {
|
||||||
|
if (!pm_runtime_active(core->dev_dec) && !pm_runtime_active(core->dev_enc))
|
||||||
|
break;
|
||||||
msleep(10);
|
msleep(10);
|
||||||
|
}
|
||||||
|
|
||||||
venus_shutdown(core);
|
venus_shutdown(core);
|
||||||
|
|
||||||
pm_runtime_put_sync(core->dev);
|
pm_runtime_put_sync(core->dev);
|
||||||
|
|
||||||
while (core->pmdomains[0] && pm_runtime_active(core->pmdomains[0]))
|
for (i = 0; i < max_attempts; i++) {
|
||||||
|
if (!core->pmdomains[0] || !pm_runtime_active(core->pmdomains[0]))
|
||||||
|
break;
|
||||||
usleep_range(1000, 1500);
|
usleep_range(1000, 1500);
|
||||||
|
}
|
||||||
|
|
||||||
hfi_reinit(core);
|
hfi_reinit(core);
|
||||||
|
|
||||||
pm_runtime_get_sync(core->dev);
|
ret = pm_runtime_get_sync(core->dev);
|
||||||
|
if (ret < 0) {
|
||||||
|
err_msg = "resume runtime PM";
|
||||||
|
failed = true;
|
||||||
|
}
|
||||||
|
|
||||||
ret |= venus_boot(core);
|
ret = venus_boot(core);
|
||||||
ret |= hfi_core_resume(core, true);
|
if (ret && !failed) {
|
||||||
|
err_msg = "boot Venus";
|
||||||
|
failed = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = hfi_core_resume(core, true);
|
||||||
|
if (ret && !failed) {
|
||||||
|
err_msg = "resume HFI";
|
||||||
|
failed = true;
|
||||||
|
}
|
||||||
|
|
||||||
enable_irq(core->irq);
|
enable_irq(core->irq);
|
||||||
|
|
||||||
mutex_unlock(&core->lock);
|
mutex_unlock(&core->lock);
|
||||||
|
|
||||||
ret |= hfi_core_init(core);
|
ret = hfi_core_init(core);
|
||||||
|
if (ret && !failed) {
|
||||||
|
err_msg = "init HFI";
|
||||||
|
failed = true;
|
||||||
|
}
|
||||||
|
|
||||||
pm_runtime_put_sync(core->dev);
|
pm_runtime_put_sync(core->dev);
|
||||||
|
|
||||||
if (ret) {
|
if (failed) {
|
||||||
disable_irq_nosync(core->irq);
|
disable_irq_nosync(core->irq);
|
||||||
dev_warn(core->dev, "recovery failed (%d)\n", ret);
|
dev_warn_ratelimited(core->dev,
|
||||||
|
"System error has occurred, recovery failed to %s\n",
|
||||||
|
err_msg);
|
||||||
schedule_delayed_work(&core->work, msecs_to_jiffies(10));
|
schedule_delayed_work(&core->work, msecs_to_jiffies(10));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
dev_warn(core->dev, "system error has occurred (recovered)\n");
|
||||||
|
|
||||||
mutex_lock(&core->lock);
|
mutex_lock(&core->lock);
|
||||||
core->sys_error = false;
|
core->sys_error = false;
|
||||||
mutex_unlock(&core->lock);
|
mutex_unlock(&core->lock);
|
||||||
|
|||||||
Reference in New Issue
Block a user