From aef810d2c1568b760cab2efa5b4e83a2fce1ebb9 Mon Sep 17 00:00:00 2001 From: Zhen Chen Date: Mon, 11 Jul 2022 09:45:58 +0800 Subject: [PATCH] MALI: bifrost: from ARM: do a faster & cleaner reset for unrecoverable CS_FATAL This is part of the workaround for HW_issue_of_fragment_endpoint_TERMINATED_responses, though it is not expected to be triggered often. Change-Id: I6b0cf15b8050ebacd3099f52af1a03d786c6ff44 Signed-off-by: Zhen Chen --- drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c | 6 ++++++ drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h | 3 +++ drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h | 1 + drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c | 3 ++- .../gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_csf.c | 5 ++++- 5 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c index cc1369ce8766..80e37a36ca76 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c @@ -1359,6 +1359,7 @@ static int create_queue_group(struct kbase_context *const kctx, kbase_csf_priority_check(kctx->kbdev, create->in.priority)); group->doorbell_nr = KBASEP_USER_DB_NR_INVALID; group->faulted = false; + group->cs_unrecoverable = false; group->reevaluate_idle_status = false; @@ -2429,6 +2430,11 @@ handle_fatal_event(struct kbase_queue *const queue, CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR) { queue_work(system_wq, &kbdev->csf.fw_error_work); } else { + if (cs_fatal_exception_type == CS_FATAL_EXCEPTION_TYPE_CS_UNRECOVERABLE) { + queue->group->cs_unrecoverable = true; + if (kbase_prepare_to_reset_gpu(queue->kctx->kbdev, RESET_FLAGS_NONE)) + kbase_reset_gpu(queue->kctx->kbdev); + } get_queue(queue); queue->cs_fatal = cs_fatal; queue->cs_fatal_info = cs_fatal_info; diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h index 89055e95a02f..27aa53de110d 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h @@ -477,6 +477,8 @@ struct kbase_protected_suspend_buffer { * @faulted: Indicates that a GPU fault occurred for the queue group. * This flag persists until the fault has been queued to be * reported to userspace. + * @cs_unrecoverable: Flag to unblock the thread waiting for CSG termination in + * case of CS_FATAL_EXCEPTION_TYPE_CS_UNRECOVERABLE * @reevaluate_idle_status : Flag set when work is submitted for the normal group * or it becomes unblocked during protected mode. The * flag helps Scheduler confirm if the group actually @@ -522,6 +524,7 @@ struct kbase_queue_group { u32 prepared_seq_num; u32 scan_seq_num; bool faulted; + bool cs_unrecoverable; bool reevaluate_idle_status; struct kbase_queue *bound_queues[MAX_SUPPORTED_STREAMS_PER_GROUP]; diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h index c797bcbc461a..177569bfb427 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h @@ -699,6 +699,7 @@ (((value) << CS_FATAL_EXCEPTION_TYPE_SHIFT) & CS_FATAL_EXCEPTION_TYPE_MASK)) /* CS_FATAL_EXCEPTION_TYPE values */ #define CS_FATAL_EXCEPTION_TYPE_CS_CONFIG_FAULT 0x40 +#define CS_FATAL_EXCEPTION_TYPE_CS_UNRECOVERABLE 0x41 #define CS_FATAL_EXCEPTION_TYPE_CS_ENDPOINT_FAULT 0x44 #define CS_FATAL_EXCEPTION_TYPE_CS_BUS_FAULT 0x48 #define CS_FATAL_EXCEPTION_TYPE_CS_INVALID_INSTRUCTION 0x49 diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c index c0f63fe75a0f..af3b6912845d 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c @@ -2645,7 +2645,8 @@ static int term_group_sync(struct kbase_queue_group *group) term_csg_slot(group); remaining = wait_event_timeout(kbdev->csf.event_wait, - csg_slot_stopped_locked(kbdev, group->csg_nr), remaining); + group->cs_unrecoverable || csg_slot_stopped_locked(kbdev, group->csg_nr), + remaining); if (!remaining) { dev_warn(kbdev->dev, "[%llu] term request timeout (%d ms) for group %d of context %d_%d on slot %d", diff --git a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_csf.c b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_csf.c index 893a3352a908..15bfd0375f0b 100644 --- a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_csf.c +++ b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -86,6 +86,9 @@ const char *kbase_gpu_exception_name(u32 const exception_code) case CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR: e = "FIRMWARE_INTERNAL_ERROR"; break; + case CS_FATAL_EXCEPTION_TYPE_CS_UNRECOVERABLE: + e = "CS_UNRECOVERABLE"; + break; case CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT: e = "RESOURCE_EVICTION_TIMEOUT"; break;