MALI: rockchip: upgrade bifrost DDK to g10p0-01eac0, from g9p0-01eac0

Change-Id: If5e4683a1da37e00eeaa5a16463206f6f45ecfb4 Signed-off-by: Zhen Chen <chenzhen@rock-chips.com>
2026-06-05 18:41:58 +09:00 · 2022-01-04 15:07:30 +08:00
parent c3ad28a4aa
commit 3f89b26931
124 changed files with 7459 additions and 1997 deletions
--- a/drivers/base/arm/dma_buf_lock/src/Makefile
+++ b/drivers/base/arm/dma_buf_lock/src/Makefile
@@ -1,39 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
-#
-# (C) COPYRIGHT 2012, 2020-2021 ARM Limited. All rights reserved.
-#
-# This program is free software and is provided to you under the terms of the
-# GNU General Public License version 2 as published by the Free Software
-# Foundation, and any use by you of this program is subject to the terms
-# of such GNU license.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
-#
-
-# linux build system bootstrap for out-of-tree module
-
-# default to building for the host
-ARCH ?= $(shell uname -m)
-
-# Handle Android Common Kernel source naming
-KERNEL_SRC ?= /lib/modules/$(shell uname -r)/build
-KDIR ?= $(KERNEL_SRC)
-
-all: dma_buf_lock
-
-dma_buf_lock:
-	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../../include"
-
-clean:
-	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
-
-modules_install:
-	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) modules_install
--- a/drivers/base/arm/dma_buf_lock/src/build.bp
+++ b/drivers/base/arm/dma_buf_lock/src/build.bp
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+bob_kernel_module {
+    name: "dma_buf_lock",
+    defaults: [
+        "kernel_defaults"
+    ],
+    srcs: [
+        "Kbuild",
+        "dma_buf_lock.c",
+        "dma_buf_lock.h",
+    ],
+    enabled: false,
+    dma_buf_lock: {
+        kbuild_options: ["CONFIG_DMA_BUF_LOCK=y"],
+        enabled: true,
+    },
+}
--- a/drivers/base/arm/dma_buf_lock/src/dma_buf_lock.c
+++ b/drivers/base/arm/dma_buf_lock/src/dma_buf_lock.c
@@ -902,4 +902,4 @@ module_init(dma_buf_lock_init);
 module_exit(dma_buf_lock_exit);

 MODULE_LICENSE("GPL");
-
+MODULE_INFO(import_ns, "DMA_BUF");
--- a/drivers/base/arm/dma_buf_test_exporter/dma-buf-test-exporter.c
+++ b/drivers/base/arm/dma_buf_test_exporter/dma-buf-test-exporter.c
@@ -822,3 +822,4 @@ static void __exit dma_buf_te_exit(void)
 module_init(dma_buf_te_init);
 module_exit(dma_buf_te_exit);
 MODULE_LICENSE("GPL");
+MODULE_INFO(import_ns, "DMA_BUF");
--- a/drivers/base/arm/protected_memory_allocator/protected_memory_allocator.c
+++ b/drivers/base/arm/protected_memory_allocator/protected_memory_allocator.c
@@ -62,9 +62,10 @@ struct simple_pma_device {
 };

 /**
- * Number of elements in array 'allocated_pages_bitfield_arr'. If the number of
- * pages required does not divide exactly by PAGES_PER_BITFIELD_ELEM, adds an
- * extra page for the remainder.
+ * ALLOC_PAGES_BITFIELD_ARR_SIZE() - Number of elements in array
+ *                                   'allocated_pages_bitfield_arr'
+ * If the number of pages required does not divide exactly by
+ * PAGES_PER_BITFIELD_ELEM, adds an extra page for the remainder.
 * @num_pages: number of pages
 */
 #define ALLOC_PAGES_BITFIELD_ARR_SIZE(num_pages) \
--- a/drivers/gpu/arm/bifrost/Kbuild
+++ b/drivers/gpu/arm/bifrost/Kbuild
@@ -71,7 +71,7 @@ endif
 #

 # Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= '"g9p0-01eac0"'
+MALI_RELEASE_NAME ?= '"g10p0-01eac0"'
 # Set up defaults if not defined by build system
 ifeq ($(CONFIG_MALI_BIFROST_DEBUG), y)
    MALI_UNIT_TEST = 1
@@ -82,8 +82,6 @@ else
 endif
 MALI_COVERAGE ?= 0

-CONFIG_MALI_PLATFORM_NAME ?= "devicetree"
-
 # Kconfig passes in the name with quotes for in-tree builds - remove them.
 MALI_PLATFORM_DIR := $(shell echo $(CONFIG_MALI_PLATFORM_NAME))

@@ -122,7 +120,6 @@ ccflags-y = \
    -DMALI_RELEASE_NAME=$(MALI_RELEASE_NAME) \
    -DMALI_JIT_PRESSURE_LIMIT_BASE=$(MALI_JIT_PRESSURE_LIMIT_BASE) \
    -DMALI_INCREMENTAL_RENDERING=$(MALI_INCREMENTAL_RENDERING) \
-    -DMALI_KBASE_BUILD \
    -DMALI_PLATFORM_DIR=$(MALI_PLATFORM_DIR)


@@ -165,7 +162,6 @@ bifrost_kbase-y := \
    mali_kbase_hwcnt.o \
    mali_kbase_hwcnt_gpu.o \
    mali_kbase_hwcnt_gpu_narrow.o \
-    mali_kbase_hwcnt_legacy.o \
    mali_kbase_hwcnt_types.o \
    mali_kbase_hwcnt_virtualizer.o \
    mali_kbase_softjobs.o \
@@ -205,6 +201,7 @@ bifrost_kbase-$(CONFIG_SYNC_FILE) += \
 ifeq ($(CONFIG_MALI_CSF_SUPPORT),y)
    bifrost_kbase-y += \
        mali_kbase_hwcnt_backend_csf.o \
+        mali_kbase_hwcnt_watchdog_if_timer.o \
        mali_kbase_hwcnt_backend_csf_if_fw.o
 else
    bifrost_kbase-y += \
--- a/drivers/gpu/arm/bifrost/Makefile
+++ b/drivers/gpu/arm/bifrost/Makefile
@@ -34,12 +34,21 @@ endif

 CONFIG_MALI_BIFROST ?= m
 ifeq ($(CONFIG_MALI_BIFROST),m)
+    CONFIG_MALI_PLATFORM_NAME ?= "devicetree"
    CONFIG_MALI_BIFROST_GATOR_SUPPORT ?= y
    CONFIG_MALI_ARBITRATION ?= n
    CONFIG_MALI_PARTITION_MANAGER ?= n

+    ifeq ($(origin CONFIG_MALI_ABITER_MODULES), undefined)
+        CONFIG_MALI_ARBITER_MODULES := $(CONFIG_MALI_ARBITRATION)
+    endif
+
+    ifeq ($(origin CONFIG_MALI_GPU_POWER_MODULES), undefined)
+        CONFIG_MALI_GPU_POWER_MODULES := $(CONFIG_MALI_ARBITRATION)
+    endif
+
    ifneq ($(CONFIG_MALI_BIFROST_NO_MALI),y)
-        # Prevent misuse when CONFIG_MALI_BIFROST_NO_MALI=y
+        # Prevent misuse when CONFIG_MALI_BIFROST_NO_MALI
        CONFIG_MALI_REAL_HW ?= y
    endif

@@ -135,6 +144,8 @@ ifeq ($(CONFIG_MALI_BIFROST),m)
 else
    # Prevent misuse when CONFIG_MALI_BIFROST=n
    CONFIG_MALI_ARBITRATION = n
+    CONFIG_MALI_ARBITER_MODULES = n
+    CONFIG_MALI_GPU_POWER_MODULES = n
    CONFIG_MALI_KUTF = n
    CONFIG_MALI_KUTF_IRQ_TEST = n
    CONFIG_MALI_KUTF_CLK_RATE_TRACE = n
@@ -148,6 +159,8 @@ CONFIGS := \
    CONFIG_MALI_BIFROST_DMA_FENCE \
    CONFIG_MALI_ARBITER_SUPPORT \
    CONFIG_MALI_ARBITRATION \
+    CONFIG_MALI_ARBITER_MODULES \
+    CONFIG_MALI_GPU_POWER_MODULES \
    CONFIG_MALI_PARTITION_MANAGER \
    CONFIG_MALI_REAL_HW \
    CONFIG_MALI_GEM5_BUILD \
@@ -191,6 +204,8 @@ MAKE_ARGS := $(foreach config,$(CONFIGS), \
                        $(value config)=$(value $(value config)), \
                        $(value config)=n))

+MAKE_ARGS += CONFIG_MALI_PLATFORM_NAME=$(CONFIG_MALI_PLATFORM_NAME)
+
 #
 # EXTRA_CFLAGS to define the custom CONFIGs on out-of-tree build
 #
@@ -201,6 +216,8 @@ EXTRA_CFLAGS := $(foreach config,$(CONFIGS), \
                    $(if $(filter y m,$(value $(value config))), \
                        -D$(value config)=1))

+EXTRA_CFLAGS += -DCONFIG_MALI_PLATFORM_NAME=$(CONFIG_MALI_PLATFORM_NAME)
+
 #
 # KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions
 #
--- a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c
+++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c
@@ -1053,8 +1053,8 @@ void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq,

 	mutex_lock(&arb_freq->arb_freq_lock);
 	if (arb_freq->arb_freq != freq) {
-		ndata.new_rate = freq * KHZ_TO_HZ;
-		ndata.old_rate = arb_freq->arb_freq * KHZ_TO_HZ;
+		ndata.new_rate = (unsigned long)freq * KHZ_TO_HZ;
+		ndata.old_rate = (unsigned long)arb_freq->arb_freq * KHZ_TO_HZ;
 		ndata.gpu_clk_handle = arb_freq;
 		arb_freq->arb_freq = freq;
 		arb_freq->freq_updated = true;
--- a/drivers/gpu/arm/bifrost/arbitration/Kconfig
+++ b/drivers/gpu/arm/bifrost/arbitration/Kconfig
@@ -27,5 +27,23 @@ config MALI_XEN
 	  virtualization setup for Mali
 	  If unsure, say N.

+config MALI_ARBITER_MODULES
+	tristate "Enable mali arbiter modules"
+	depends on MALI_ARBITRATION
+	default y
+	help
+	  Enables the build of the arbiter modules used in the reference
+	  virtualization setup for Mali
+	  If unsure, say N
+
+config MALI_GPU_POWER_MODULES
+	tristate "Enable gpu power modules"
+	depends on MALI_ARBITRATION
+	default y
+	help
+	  Enables the build of the gpu power modules used in the reference
+	  virtualization setup for Mali
+	  If unsure, say N
+

 source "drivers/gpu/arm/bifrost/arbitration/ptm/Kconfig"
--- a/drivers/gpu/arm/bifrost/backend/gpu/Kbuild
+++ b/drivers/gpu/arm/bifrost/backend/gpu/Kbuild
@@ -47,3 +47,8 @@ endif
 bifrost_kbase-$(CONFIG_MALI_BIFROST_DEVFREQ) += \
    backend/gpu/mali_kbase_devfreq.o

+# Dummy model
+bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += backend/gpu/mali_kbase_model_dummy.o
+bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += backend/gpu/mali_kbase_model_linux.o
+# HW error simulation
+bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += backend/gpu/mali_kbase_model_error_generator.o
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.h
@@ -64,13 +64,12 @@ int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev);
 * kbase_init_lowest_gpu_freq() - Find the lowest frequency that the GPU can
 *                                run as using the device tree, and save this
 *                                within kbdev.
+ * @kbdev: Pointer to kbase device.
 *
 * This function could be called from kbase_clk_rate_trace_manager_init,
 * but is left separate as it can be called as soon as
 * dev_pm_opp_of_add_table() has been called to initialize the OPP table.
 *
- * @kbdev: Pointer to kbase device.
- *
 * Return: 0 in any case.
 */
 int kbase_lowest_gpu_freq_init(struct kbase_device *kbdev);
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c
@@ -61,7 +61,7 @@ static struct monitor_dev_profile mali_mdevp = {
 * This function will be called only when the opp table which is compatible with
 * "operating-points-v2-mali", is not present in the devicetree for GPU device.
 *
- * Return: Voltage value in uV, 0 in case of error.
+ * Return: Voltage value in micro volts, 0 in case of error.
 */
 static unsigned long get_voltage(struct kbase_device *kbdev, unsigned long freq)
 {
@@ -87,7 +87,7 @@ static unsigned long get_voltage(struct kbase_device *kbdev, unsigned long freq)
 	rcu_read_unlock();
 #endif

-	/* Return the voltage in uV. */
+	/* Return the voltage in micro volts */
 	return voltage;
 }

--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.h
@@ -57,6 +57,7 @@ void kbase_devfreq_enqueue_work(struct kbase_device *kbdev,
 * This function will only perform translation if an operating-points-v2-mali
 * table is present in devicetree. If one is not present then it will return an
 * untranslated frequency (and corresponding voltage) and all cores enabled.
+ * The voltages returned are in micro Volts (uV).
 */
 void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq,
 	u64 *core_mask, unsigned long *freqs, unsigned long *volts);
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c
@@ -46,10 +46,10 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev,
 	registers.core_features = kbase_reg_read(kbdev,
 				GPU_CONTROL_REG(CORE_FEATURES));
 #else /* !MALI_USE_CSF */
-	if (((registers.gpu_id & GPU_ID2_PRODUCT_MODEL) ==
-	     GPU_ID2_PRODUCT_TGRX) ||
-	    ((registers.gpu_id & GPU_ID2_PRODUCT_MODEL) ==
-	     GPU_ID2_PRODUCT_TVAX))
+	if (!(((registers.gpu_id & GPU_ID2_PRODUCT_MODEL) ==
+	       GPU_ID2_PRODUCT_TDUX) ||
+	      ((registers.gpu_id & GPU_ID2_PRODUCT_MODEL) ==
+	       GPU_ID2_PRODUCT_TODX)))
 		registers.core_features =
 			kbase_reg_read(kbdev, GPU_CONTROL_REG(CORE_FEATURES));
 #endif /* MALI_USE_CSF */
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c
@@ -119,29 +119,62 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
 	return err;
 }

+static void kbasep_instr_hwc_disable_hw_prfcnt(struct kbase_device *kbdev)
+{
+	u32 irq_mask;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->hwcnt.lock);
+
+	if (kbase_is_gpu_removed(kbdev))
+		/* GPU has been removed by Arbiter */
+		return;
+
+	/* Disable interrupt */
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask & ~PRFCNT_SAMPLE_COMPLETED);
+
+	/* Disable the counters */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0);
+
+	kbdev->hwcnt.kctx = NULL;
+	kbdev->hwcnt.addr = 0ULL;
+	kbdev->hwcnt.addr_bytes = 0ULL;
+}
+
 int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
 {
 	unsigned long flags, pm_flags;
 	int err = -EINVAL;
-	u32 irq_mask;
 	struct kbase_device *kbdev = kctx->kbdev;

 	while (1) {
 		spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
 		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);

+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_UNRECOVERABLE_ERROR) {
+			/* Instrumentation is in unrecoverable error state,
+			 * there is nothing for us to do.
+			 */
+			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+			/* Already disabled, return no error. */
+			return 0;
+		}
+
 		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) {
 			/* Instrumentation is not enabled */
 			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
 			spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
-			goto out;
+			return err;
 		}

 		if (kbdev->hwcnt.kctx != kctx) {
 			/* Instrumentation has been setup for another context */
 			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
 			spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
-			goto out;
+			return err;
 		}

 		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE)
@@ -158,25 +191,7 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
 	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
 	kbdev->hwcnt.backend.triggered = 0;

-	if (kbase_is_gpu_removed(kbdev)) {
-		/* GPU has been removed by Arbiter */
-		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
-		err = 0;
-		goto out;
-	}
-
-	/* Disable interrupt */
-	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
-	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
-				irq_mask & ~PRFCNT_SAMPLE_COMPLETED);
-
-	/* Disable the counters */
-	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0);
-
-	kbdev->hwcnt.kctx = NULL;
-	kbdev->hwcnt.addr = 0ULL;
-	kbdev->hwcnt.addr_bytes = 0ULL;
+	kbasep_instr_hwc_disable_hw_prfcnt(kbdev);

 	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
@@ -184,9 +199,7 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
 	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %pK",
 									kctx);

-	err = 0;
- out:
-	return err;
+	return 0;
 }

 int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
@@ -204,7 +217,7 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)

 	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) {
 		/* HW counters are disabled or another dump is ongoing, or we're
-		 * resetting
+		 * resetting, or we are in unrecoverable error state.
 		 */
 		goto unlock;
 	}
@@ -274,6 +287,10 @@ void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)

 	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);

+	/* If the state is in unrecoverable error, we already wake_up the waiter
+	 * and don't need to do any action when sample is done.
+	 */
+
 	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
 		kbdev->hwcnt.backend.triggered = 1;
 		wake_up(&kbdev->hwcnt.backend.wait);
@@ -302,6 +319,8 @@ int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
 	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
 		err = -EINVAL;
 		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_UNRECOVERABLE_ERROR) {
+		err = -EIO;
 	} else {
 		/* Dump done */
 		KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
@@ -322,8 +341,8 @@ int kbase_instr_hwcnt_clear(struct kbase_context *kctx)

 	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);

-	/* Check it's the context previously set up and we're not already
-	 * dumping
+	/* Check it's the context previously set up and we're not in IDLE
+	 * state.
 	 */
 	if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state !=
 							KBASE_INSTR_STATE_IDLE)
@@ -347,6 +366,48 @@ out:
 }
 KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear);

+void kbase_instr_hwcnt_on_unrecoverable_error(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	/* If we already in unrecoverable error state, early return. */
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_UNRECOVERABLE_ERROR) {
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		return;
+	}
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_UNRECOVERABLE_ERROR;
+
+	/* Need to disable HW if it's not disabled yet. */
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED)
+		kbasep_instr_hwc_disable_hw_prfcnt(kbdev);
+
+	/* Wake up any waiters. */
+	kbdev->hwcnt.backend.triggered = 1;
+	wake_up(&kbdev->hwcnt.backend.wait);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_on_unrecoverable_error);
+
+void kbase_instr_hwcnt_on_before_reset(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	/* A reset is the only way to exit the unrecoverable error state */
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_UNRECOVERABLE_ERROR)
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_on_before_reset);
+
 int kbase_instr_backend_init(struct kbase_device *kbdev)
 {
 	spin_lock_init(&kbdev->hwcnt.lock);
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_defs.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_defs.h
@@ -38,8 +38,12 @@ enum kbase_instr_state {
 	KBASE_INSTR_STATE_IDLE,
 	/* Hardware is currently dumping a frame. */
 	KBASE_INSTR_STATE_DUMPING,
-	/* An error has occured during DUMPING (page fault). */
-	KBASE_INSTR_STATE_FAULT
+	/* An error has occurred during DUMPING (page fault). */
+	KBASE_INSTR_STATE_FAULT,
+	/* An unrecoverable error has occurred, a reset is the only way to exit
+	 * from unrecoverable error state.
+	 */
+	KBASE_INSTR_STATE_UNRECOVERABLE_ERROR,
 };

 /* Structure used for instrumentation and HW counters dumping */
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_defs.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_defs.h
@@ -38,10 +38,31 @@ struct rb_entry {
 	struct kbase_jd_atom *katom;
 };

+/* SLOT_RB_TAG_PURGED assumes a value that is different from
+ * NULL (SLOT_RB_NULL_TAG_VAL) and will not be the result of
+ * any valid pointer via macro translation: SLOT_RB_TAG_KCTX(x).
+ */
+#define SLOT_RB_TAG_PURGED ((u64)(1 << 1))
+#define SLOT_RB_NULL_TAG_VAL ((u64)0)
+
+/**
+ * SLOT_RB_TAG_KCTX() - a function-like macro for converting a pointer to a
+ *			u64 for serving as tagged value.
+ */
+#define SLOT_RB_TAG_KCTX(kctx) (u64)((uintptr_t)(kctx))
 /**
 * struct slot_rb - Slot ringbuffer
 * @entries:		Ringbuffer entries
- * @last_context:	The last context to submit a job on this slot
+ * @last_kctx_tagged:	The last context that submitted a job to the slot's
+ *			HEAD_NEXT register. The value is a tagged variant so
+ *			must not be dereferenced. It is used in operation to
+ *			track when shader core L1 caches might contain a
+ *			previous context's data, and so must only be set to
+ *			SLOT_RB_NULL_TAG_VAL after reset/powerdown of the
+ *			cores. In slot job submission, if there is a kctx
+ *			change, and the relevant katom is configured with
+ *			BASE_JD_REQ_SKIP_CACHE_START, a L1 read only cache
+ *			maintenace operation is enforced.
 * @read_idx:		Current read index of buffer
 * @write_idx:		Current write index of buffer
 * @job_chain_flag:	Flag used to implement jobchain disambiguation
@@ -49,7 +70,7 @@ struct rb_entry {
 struct slot_rb {
 	struct rb_entry entries[SLOT_RB_SIZE];

-	struct kbase_context *last_context;
+	u64 last_kctx_tagged;

 	u8 read_idx;
 	u8 write_idx;
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c
@@ -33,6 +33,7 @@
 #include <mali_kbase_reset_gpu.h>
 #include <mali_kbase_ctx_sched.h>
 #include <mali_kbase_kinstr_jm.h>
+#include <mali_kbase_hwaccess_instr.h>
 #include <mali_kbase_hwcnt_context.h>
 #include <device/mali_kbase_device.h>
 #include <backend/gpu/mali_kbase_irq_internal.h>
@@ -198,7 +199,9 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
 	u32 cfg;
 	u64 const jc_head = select_job_chain(katom);
 	u64 affinity;
+	struct slot_rb *ptr_slot_rb = &kbdev->hwaccess.backend.slot_rb[js];

+	lockdep_assert_held(&kbdev->hwaccess_lock);
 	KBASE_DEBUG_ASSERT(kbdev);
 	KBASE_DEBUG_ASSERT(katom);

@@ -227,9 +230,23 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
 			!(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET))
 		cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION;

-	if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_START))
-		cfg |= JS_CONFIG_START_FLUSH_NO_ACTION;
-	else
+	if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_START)) {
+		/* Force a cache maintenance operation if the newly submitted
+		 * katom to the slot is from a different kctx. For a JM GPU
+		 * that has the feature BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
+		 * applies a FLUSH_INV_SHADER_OTHER. Otherwise, do a
+		 * FLUSH_CLEAN_INVALIDATE.
+		 */
+		u64 tagged_kctx = ptr_slot_rb->last_kctx_tagged;
+
+		if (tagged_kctx != SLOT_RB_NULL_TAG_VAL && tagged_kctx != SLOT_RB_TAG_KCTX(kctx)) {
+			if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER))
+				cfg |= JS_CONFIG_START_FLUSH_INV_SHADER_OTHER;
+			else
+				cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE;
+		} else
+			cfg |= JS_CONFIG_START_FLUSH_NO_ACTION;
+	} else
 		cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE;

 	if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END) &&
@@ -246,13 +263,13 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
 	    (katom->core_req & BASE_JD_REQ_END_RENDERPASS))
 		cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK;

-	if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) {
+	if (!ptr_slot_rb->job_chain_flag) {
 		cfg |= JS_CONFIG_JOB_CHAIN_FLAG;
 		katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN;
-		kbdev->hwaccess.backend.slot_rb[js].job_chain_flag = true;
+		ptr_slot_rb->job_chain_flag = true;
 	} else {
 		katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN;
-		kbdev->hwaccess.backend.slot_rb[js].job_chain_flag = false;
+		ptr_slot_rb->job_chain_flag = false;
 	}

 	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg);
@@ -290,6 +307,10 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
 			&kbdev->gpu_props.props.raw_props.js_features[js],
 			"ctx_nr,atom_nr");
 	kbase_kinstr_jm_atom_hw_submit(katom);
+
+	/* Update the slot's last katom submission kctx */
+	ptr_slot_rb->last_kctx_tagged = SLOT_RB_TAG_KCTX(kctx);
+
 #if IS_ENABLED(CONFIG_GPU_TRACEPOINTS)
 	if (!kbase_backend_nr_atoms_submitted(kbdev, js)) {
 		/* If this is the only job on the slot, trace it as starting */
@@ -300,7 +321,6 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
 						sizeof(js_string)),
 				ktime_to_ns(katom->start_timestamp),
 				(u32)katom->kctx->id, 0, katom->work_id);
-		kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx;
 	}
 #endif

@@ -823,7 +843,7 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
 	if (timeout != 0)
 		goto exit;

-	if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) {
+	if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) {
 		dev_err(kbdev->dev,
 			"Issuing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n",
 			ZAP_TIMEOUT);
@@ -938,6 +958,7 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
 	stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js,
 							target_katom,
 							JS_COMMAND_HARD_STOP);
+	CSTD_UNUSED(stopped);
 }

 /**
@@ -1177,6 +1198,13 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
 	kbase_pm_metrics_update(kbdev, NULL);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);

+	/* Tell hardware counters a reset is about to occur.
+	 * If the instr backend is in an unrecoverable error state (e.g. due to
+	 * HW being unresponsive), this will transition the backend out of
+	 * it, on the assumption a reset will fix whatever problem there was.
+	 */
+	kbase_instr_hwcnt_on_before_reset(kbdev);
+
 	/* Reset the GPU */
 	kbase_pm_init_hw(kbdev, 0);

@@ -1309,7 +1337,7 @@ static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev)
 * @kbdev: kbase device
 * @flags: Bitfield indicating impact of reset (see flag defines)
 *
- * This function just soft-stops all the slots to ensure that as many jobs as
+ * This function soft-stops all the slots to ensure that as many jobs as
 * possible are saved.
 *
 * Return:
@@ -1323,7 +1351,6 @@ bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev,
 {
 	int i;

-	CSTD_UNUSED(flags);
 	KBASE_DEBUG_ASSERT(kbdev);

 #ifdef CONFIG_MALI_ARBITER_SUPPORT
@@ -1335,6 +1362,9 @@ bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev,
 	}
 #endif

+	if (flags & RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)
+		kbase_instr_hwcnt_on_unrecoverable_error(kbdev);
+
 	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
 						KBASE_RESET_GPU_NOT_PENDING,
 						KBASE_RESET_GPU_PREPARED) !=
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c
@@ -760,6 +760,13 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
 		/* ***TRANSITION TO HIGHER STATE*** */
 		fallthrough;
 	case KBASE_ATOM_EXIT_PROTECTED_RESET:
+		/* L2 cache has been turned off (which is needed prior to the reset of GPU
+		 * to exit the protected mode), so the override flag can be safely cleared.
+		 * Even if L2 cache is powered up again before the actual reset, it should
+		 * not be an issue (there are no jobs running on the GPU).
+		 */
+		kbase_pm_protected_override_disable(kbdev);
+
 		/* Issue the reset to the GPU */
 		err = kbase_gpu_protected_mode_reset(kbdev);

@@ -768,7 +775,6 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,

 		if (err) {
 			kbdev->protected_mode_transition = false;
-			kbase_pm_protected_override_disable(kbdev);

 			/* Failed to exit protected mode, fail atom */
 			katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
@@ -1069,9 +1075,9 @@ kbase_rb_atom_might_depend(const struct kbase_jd_atom *katom_a,
 /**
 * kbase_gpu_irq_evict - evict a slot's JSn_HEAD_NEXT atom from the HW if it is
 *                       related to a failed JSn_HEAD atom
- * @kbdev kbase device
- * @js job slot to check
- * @completion_code completion code of the failed atom
+ * @kbdev: kbase device
+ * @js: job slot to check
+ * @completion_code: completion code of the failed atom
 *
 * Note: 'STOPPED' atoms are considered 'failed', as they are in the HW, but
 * unlike other failure codes we _can_ re-run them.
@@ -1129,6 +1135,14 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
 		if (next_katom->core_req & BASE_JD_REQ_PERMON)
 			kbase_pm_release_gpu_cycle_counter_nolock(kbdev);

+		/* On evicting the next_katom, the last submission kctx on the
+		 * given job slot then reverts back to the one that owns katom.
+		 * The aim is to enable the next submission that can determine
+		 * if the read only shader core L1 cache should be invalidated.
+		 */
+		kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged =
+			SLOT_RB_TAG_KCTX(katom->kctx);
+
 		return true;
 	}

@@ -1137,11 +1151,11 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,

 /**
 * kbase_gpu_complete_hw - complete the atom in a slot's JSn_HEAD
- * @kbdev kbase device
- * @js job slot to check
- * @completion_code completion code of the completed atom
- * @job_tail value read from JSn_TAIL, for STOPPED atoms
- * @end_timestamp pointer to approximate ktime value when the katom completed
+ * @kbdev: kbase device
+ * @js: job slot to check
+ * @completion_code: completion code of the completed atom
+ * @job_tail: value read from JSn_TAIL, for STOPPED atoms
+ * @end_timestamp: pointer to approximate ktime value when the katom completed
 *
 * Among other operations, this also executes step 2 of a 2-step process of
 * removing any related atoms from a slot's JSn_HEAD_NEXT (ringbuffer index 1),
@@ -1323,8 +1337,6 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
 						ktime_to_ns(*end_timestamp),
 						(u32)next_katom->kctx->id, 0,
 						next_katom->work_id);
-			kbdev->hwaccess.backend.slot_rb[js].last_context =
-							next_katom->kctx;
 		} else {
 			char js_string[16];

@@ -1333,7 +1345,6 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
 							sizeof(js_string)),
 						ktime_to_ns(ktime_get()), 0, 0,
 						0);
-			kbdev->hwaccess.backend.slot_rb[js].last_context = 0;
 		}
 	}
 #endif
@@ -1427,6 +1438,9 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
 			katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
 			kbase_jm_complete(kbdev, katom, end_timestamp);
 		}
+
+		/* Clear the slot's last katom submission kctx on reset */
+		kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = SLOT_RB_NULL_TAG_VAL;
 	}

 	/* Re-enable GPU hardware counters if we're resetting from protected
@@ -1649,6 +1663,11 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
 						kbase_gpu_remove_atom(kbdev,
 								katom_idx1,
 								action, true);
+						/* Revert the last_context. */
+						kbdev->hwaccess.backend.slot_rb[js]
+							.last_kctx_tagged =
+							SLOT_RB_TAG_KCTX(katom_idx0->kctx);
+
 						stop_x_dep_idx1 =
 					should_stop_x_dep_slot(katom_idx1);

@@ -1724,6 +1743,10 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
 					kbase_gpu_remove_atom(kbdev, katom_idx1,
 									action,
 									false);
+					/* Revert the last_context, or mark as purged */
+					kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged =
+						kctx_idx0 ? SLOT_RB_TAG_KCTX(katom_idx0->kctx) :
+							    SLOT_RB_TAG_PURGED;
 				} else {
 					/* idx0 has already completed - stop
 					 * idx1
@@ -1753,7 +1776,8 @@ void kbase_backend_cache_clean(struct kbase_device *kbdev,
 		struct kbase_jd_atom *katom)
 {
 	if (katom->need_cache_flush_cores_retained) {
-		kbase_gpu_start_cache_clean(kbdev);
+		kbase_gpu_start_cache_clean(kbdev,
+					    GPU_COMMAND_CACHE_CLN_INV_FULL);
 		kbase_gpu_wait_cache_clean(kbdev);

 		katom->need_cache_flush_cores_retained = false;
@@ -1811,3 +1835,34 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev)

 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
+
+void kbase_backend_slot_kctx_purge_locked(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	int js;
+	bool tracked = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		u64 tagged_kctx = kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged;
+
+		if (tagged_kctx == SLOT_RB_TAG_KCTX(kctx)) {
+			/* Marking the slot kctx tracking field is purged */
+			kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = SLOT_RB_TAG_PURGED;
+			tracked = true;
+		}
+	}
+
+	if (tracked) {
+		/* The context had run some jobs before the purge, other slots
+		 * in SLOT_RB_NULL_TAG_VAL condition needs to be marked as
+		 * purged as well.
+		 */
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged ==
+			    SLOT_RB_NULL_TAG_VAL)
+				kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged =
+					SLOT_RB_TAG_PURGED;
+		}
+	}
+}
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.c
@@ -26,7 +26,7 @@
 #include "mali_kbase_l2_mmu_config.h"

 /**
- * struct l2_mmu_config_limit_region
+ * struct l2_mmu_config_limit_region - L2 MMU limit field
 *
 * @value:    The default value to load into the L2_MMU_CONFIG register
 * @mask:     The shifted mask of the field in the L2_MMU_CONFIG register
@@ -39,7 +39,7 @@ struct l2_mmu_config_limit_region {
 };

 /**
- * struct l2_mmu_config_limit
+ * struct l2_mmu_config_limit - L2 MMU read and write limit
 *
 * @product_model:    The GPU for which this entry applies
 * @read:             Values for the read limit field
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h
@@ -0,0 +1,177 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Dummy Model interface
+ */
+
+#ifndef _KBASE_MODEL_DUMMY_H_
+#define _KBASE_MODEL_DUMMY_H_
+
+#include <uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h>
+
+#define model_error_log(module, ...) pr_err(__VA_ARGS__)
+
+#define NUM_SLOTS 4		/*number of job slots */
+
+/*Errors Mask Codes*/
+/* each bit of errors_mask is associated to a specific error:
+ * NON FAULT STATUS CODES: only the following are implemented since the others
+ * represent normal working statuses
+ */
+#define KBASE_JOB_INTERRUPTED         (1<<0)
+#define KBASE_JOB_STOPPED             (1<<1)
+#define KBASE_JOB_TERMINATED          (1<<2)
+
+/* JOB EXCEPTIONS: */
+#define KBASE_JOB_CONFIG_FAULT        (1<<3)
+#define KBASE_JOB_POWER_FAULT         (1<<4)
+#define KBASE_JOB_READ_FAULT          (1<<5)
+#define KBASE_JOB_WRITE_FAULT         (1<<6)
+#define KBASE_JOB_AFFINITY_FAULT      (1<<7)
+#define KBASE_JOB_BUS_FAULT           (1<<8)
+#define KBASE_INSTR_INVALID_PC        (1<<9)
+#define KBASE_INSTR_INVALID_ENC       (1<<10)
+#define KBASE_INSTR_TYPE_MISMATCH     (1<<11)
+#define KBASE_INSTR_OPERAND_FAULT     (1<<12)
+#define KBASE_INSTR_TLS_FAULT         (1<<13)
+#define KBASE_INSTR_BARRIER_FAULT     (1<<14)
+#define KBASE_INSTR_ALIGN_FAULT       (1<<15)
+#define KBASE_DATA_INVALID_FAULT      (1<<16)
+#define KBASE_TILE_RANGE_FAULT        (1<<17)
+#define KBASE_ADDR_RANGE_FAULT        (1<<18)
+#define KBASE_OUT_OF_MEMORY           (1<<19)
+#define KBASE_UNKNOWN                 (1<<20)
+
+/* GPU EXCEPTIONS:*/
+#define KBASE_DELAYED_BUS_FAULT       (1<<21)
+#define KBASE_SHAREABILITY_FAULT      (1<<22)
+
+/* MMU EXCEPTIONS:*/
+#define KBASE_TRANSLATION_FAULT       (1<<23)
+#define KBASE_PERMISSION_FAULT        (1<<24)
+#define KBASE_TRANSTAB_BUS_FAULT      (1<<25)
+#define KBASE_ACCESS_FLAG             (1<<26)
+
+/* generic useful bitmasks */
+#define IS_A_JOB_ERROR ((KBASE_UNKNOWN << 1) - KBASE_JOB_INTERRUPTED)
+#define IS_A_MMU_ERROR ((KBASE_ACCESS_FLAG << 1) - KBASE_TRANSLATION_FAULT)
+#define IS_A_GPU_ERROR (KBASE_DELAYED_BUS_FAULT|KBASE_SHAREABILITY_FAULT)
+
+/* number of possible MMU address spaces */
+#define NUM_MMU_AS 16 /* total number of MMU address spaces as in
+		       * MMU_IRQ_RAWSTAT register
+		       */
+
+/* Forward declaration */
+struct kbase_device;
+
+/*
+ * the function below is used to trigger the simulation of a faulty
+ * HW condition for a specific job chain atom
+ */
+
+struct kbase_error_params {
+	u64 jc;
+	u32 errors_mask;
+	u32 mmu_table_level;
+	u16 faulty_mmu_as;
+	u16 padding[3];
+};
+
+enum kbase_model_control_command {
+	/* Disable/Enable job completion in the dummy model */
+	KBASE_MC_DISABLE_JOBS
+};
+
+/* struct to control dummy model behavior */
+struct kbase_model_control_params {
+	s32 command;
+	s32 value;
+};
+
+/* struct to track faulty atoms */
+struct kbase_error_atom {
+	struct kbase_error_params params;
+	struct kbase_error_atom *next;
+};
+
+/*struct to track the system error state*/
+struct error_status_t {
+	u32 errors_mask;
+	u32 mmu_table_level;
+	int faulty_mmu_as;
+
+	u64 current_jc;
+	int current_job_slot;
+
+	u32 job_irq_rawstat;
+	u32 job_irq_status;
+	u32 js_status[NUM_SLOTS];
+
+	u32 mmu_irq_mask;
+	u32 mmu_irq_rawstat;
+
+	u32 gpu_error_irq;
+	u32 gpu_fault_status;
+
+	u32 as_faultstatus[NUM_MMU_AS];
+	u32 as_command[NUM_MMU_AS];
+	u64 as_transtab[NUM_MMU_AS];
+};
+
+void *midgard_model_create(const void *config);
+void midgard_model_destroy(void *h);
+u8 midgard_model_write_reg(void *h, u32 addr, u32 value);
+u8 midgard_model_read_reg(void *h, u32 addr,
+							u32 * const value);
+void gpu_generate_error(void);
+void midgard_set_error(int job_slot);
+int job_atom_inject_error(struct kbase_error_params *params);
+int gpu_model_control(void *h,
+				struct kbase_model_control_params *params);
+
+void gpu_model_set_dummy_prfcnt_sample(u32 *usr_data, u32 usr_data_size);
+void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *usr_data, u32 usr_data_size);
+void gpu_model_get_dummy_prfcnt_cores(struct kbase_device *kbdev,
+		u64 *l2_present, u64 *shader_present);
+void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev,
+		u64 l2_present, u64 shader_present);
+void gpu_model_set_dummy_prfcnt_base_cpu(u32 *base, struct kbase_device *kbdev,
+					 struct tagged_addr *pages,
+					 size_t page_count);
+/* Clear the counter values array maintained by the dummy model */
+void gpu_model_clear_prfcnt_values(void);
+
+enum gpu_dummy_irq {
+	GPU_DUMMY_JOB_IRQ,
+	GPU_DUMMY_GPU_IRQ,
+	GPU_DUMMY_MMU_IRQ
+};
+
+void gpu_device_raise_irq(void *model,
+						enum gpu_dummy_irq irq);
+void gpu_device_set_data(void *model, void *data);
+void *gpu_device_get_data(void *model);
+
+extern struct error_status_t hw_error_status;
+
+#endif
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include <mali_kbase.h>
+#include <linux/random.h>
+#include "backend/gpu/mali_kbase_model_dummy.h"
+
+/* all the error conditions supported by the model */
+#define TOTAL_FAULTS 27
+/* maximum number of levels in the MMU translation table tree */
+#define MAX_MMU_TABLE_LEVEL 4
+/* worst case scenario is <1 MMU fault + 1 job fault + 2 GPU faults> */
+#define MAX_CONCURRENT_FAULTS 3
+
+static struct kbase_error_atom *error_track_list;
+
+unsigned int rand_seed;
+
+/*following error probability are set quite high in order to stress the driver*/
+unsigned int error_probability = 50;	/* to be set between 0 and 100 */
+/* probability to have multiple error give that there is an error */
+unsigned int multiple_error_probability = 50;
+
+void gpu_generate_error(void)
+{
+	unsigned int errors_num = 0;
+
+	/*is there at least one error? */
+	if ((prandom_u32() % 100) < error_probability) {
+		/* pick up a faulty mmu address space */
+		hw_error_status.faulty_mmu_as = prandom_u32() % NUM_MMU_AS;
+		/* pick up an mmu table level */
+		hw_error_status.mmu_table_level =
+			1 + (prandom_u32() % MAX_MMU_TABLE_LEVEL);
+		hw_error_status.errors_mask =
+			(u32)(1 << (prandom_u32() % TOTAL_FAULTS));
+
+		/*is there also one or more errors? */
+		if ((prandom_u32() % 100) < multiple_error_probability) {
+			errors_num = 1 + (prandom_u32() %
+					  (MAX_CONCURRENT_FAULTS - 1));
+			while (errors_num-- > 0) {
+				u32 temp_mask;
+
+				temp_mask = (u32)(
+					1 << (prandom_u32() % TOTAL_FAULTS));
+				/* below we check that no bit of the same error
+				 * type is set again in the error mask
+				 */
+				if ((temp_mask & IS_A_JOB_ERROR) &&
+						(hw_error_status.errors_mask &
+							IS_A_JOB_ERROR)) {
+					errors_num++;
+					continue;
+				}
+				if ((temp_mask & IS_A_MMU_ERROR) &&
+						(hw_error_status.errors_mask &
+							IS_A_MMU_ERROR)) {
+					errors_num++;
+					continue;
+				}
+				if ((temp_mask & IS_A_GPU_ERROR) &&
+						(hw_error_status.errors_mask &
+							IS_A_GPU_ERROR)) {
+					errors_num++;
+					continue;
+				}
+				/* this error mask is already set */
+				if ((hw_error_status.errors_mask | temp_mask) ==
+						hw_error_status.errors_mask) {
+					errors_num++;
+					continue;
+				}
+				hw_error_status.errors_mask |= temp_mask;
+			}
+		}
+	}
+}
+
+int job_atom_inject_error(struct kbase_error_params *params)
+{
+	struct kbase_error_atom *new_elem;
+
+	KBASE_DEBUG_ASSERT(params);
+
+	new_elem = kzalloc(sizeof(*new_elem), GFP_KERNEL);
+
+	if (!new_elem) {
+		model_error_log(KBASE_CORE,
+			"\njob_atom_inject_error: kzalloc failed for new_elem\n"
+									);
+		return -ENOMEM;
+	}
+	new_elem->params.jc = params->jc;
+	new_elem->params.errors_mask = params->errors_mask;
+	new_elem->params.mmu_table_level = params->mmu_table_level;
+	new_elem->params.faulty_mmu_as = params->faulty_mmu_as;
+
+	/*circular list below */
+	if (error_track_list == NULL) {	/*no elements */
+		error_track_list = new_elem;
+		new_elem->next = error_track_list;
+	} else {
+		struct kbase_error_atom *walker = error_track_list;
+
+		while (walker->next != error_track_list)
+			walker = walker->next;
+
+		new_elem->next = error_track_list;
+		walker->next = new_elem;
+	}
+	return 0;
+}
+
+void midgard_set_error(int job_slot)
+{
+#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM
+	gpu_generate_error();
+#else
+	struct kbase_error_atom *walker, *auxiliar;
+
+	if (error_track_list != NULL) {
+		walker = error_track_list->next;
+		auxiliar = error_track_list;
+		do {
+			if (walker->params.jc == hw_error_status.current_jc) {
+				/* found a faulty atom matching with the
+				 * current one
+				 */
+				hw_error_status.errors_mask =
+						walker->params.errors_mask;
+				hw_error_status.mmu_table_level =
+						walker->params.mmu_table_level;
+				hw_error_status.faulty_mmu_as =
+						walker->params.faulty_mmu_as;
+				hw_error_status.current_job_slot = job_slot;
+
+				if (walker->next == walker) {
+					/* only one element */
+					kfree(error_track_list);
+					error_track_list = NULL;
+				} else {
+					auxiliar->next = walker->next;
+					if (walker == error_track_list)
+						error_track_list = walker->next;
+
+					kfree(walker);
+				}
+				break;
+			}
+			auxiliar = walker;
+			walker = walker->next;
+		} while (auxiliar->next != error_track_list);
+	}
+#endif				/* CONFIG_MALI_ERROR_INJECT_RANDOM */
+}
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.c
@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2015, 2017-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Model interface
+ */
+
+#include <mali_kbase.h>
+#include <gpu/mali_kbase_gpu_regmap.h>
+#include <backend/gpu/mali_kbase_model_dummy.h>
+#include "backend/gpu/mali_kbase_model_linux.h"
+#include "device/mali_kbase_device.h"
+#include "mali_kbase_irq_internal.h"
+
+#include <linux/kthread.h>
+
+struct model_irq_data {
+	struct kbase_device *kbdev;
+	struct work_struct work;
+};
+
+static void serve_job_irq(struct work_struct *work)
+{
+	struct model_irq_data *data = container_of(work, struct model_irq_data,
+									work);
+	struct kbase_device *kbdev = data->kbdev;
+
+	/* Make sure no worker is already serving this IRQ */
+	while (atomic_cmpxchg(&kbdev->serving_job_irq, 1, 0) == 1) {
+		u32 val;
+
+		while ((val = kbase_reg_read(kbdev,
+				JOB_CONTROL_REG(JOB_IRQ_STATUS)))) {
+			unsigned long flags;
+
+			/* Handle the IRQ */
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+#if MALI_USE_CSF
+			kbase_csf_interrupt(kbdev, val);
+#else
+			kbase_job_done(kbdev, val);
+#endif
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		}
+	}
+
+	kmem_cache_free(kbdev->irq_slab, data);
+}
+
+static void serve_gpu_irq(struct work_struct *work)
+{
+	struct model_irq_data *data = container_of(work, struct model_irq_data,
+									work);
+	struct kbase_device *kbdev = data->kbdev;
+
+	/* Make sure no worker is already serving this IRQ */
+	while (atomic_cmpxchg(&kbdev->serving_gpu_irq, 1, 0) == 1) {
+		u32 val;
+
+		while ((val = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(GPU_IRQ_STATUS)))) {
+			/* Handle the IRQ */
+			kbase_gpu_interrupt(kbdev, val);
+		}
+	}
+
+	kmem_cache_free(kbdev->irq_slab, data);
+}
+
+static void serve_mmu_irq(struct work_struct *work)
+{
+	struct model_irq_data *data = container_of(work, struct model_irq_data,
+									work);
+	struct kbase_device *kbdev = data->kbdev;
+
+	/* Make sure no worker is already serving this IRQ */
+	if (atomic_cmpxchg(&kbdev->serving_mmu_irq, 1, 0) == 1) {
+		u32 val;
+
+		while ((val = kbase_reg_read(kbdev,
+					MMU_REG(MMU_IRQ_STATUS)))) {
+			/* Handle the IRQ */
+			kbase_mmu_interrupt(kbdev, val);
+		}
+	}
+
+	kmem_cache_free(kbdev->irq_slab, data);
+}
+
+void gpu_device_raise_irq(void *model,
+				enum gpu_dummy_irq irq)
+{
+	struct model_irq_data *data;
+	struct kbase_device *kbdev = gpu_device_get_data(model);
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	data = kmem_cache_alloc(kbdev->irq_slab, GFP_ATOMIC);
+	if (data == NULL)
+		return;
+
+	data->kbdev = kbdev;
+
+	switch (irq) {
+	case GPU_DUMMY_JOB_IRQ:
+		INIT_WORK(&data->work, serve_job_irq);
+		atomic_set(&kbdev->serving_job_irq, 1);
+		break;
+	case GPU_DUMMY_GPU_IRQ:
+		INIT_WORK(&data->work, serve_gpu_irq);
+		atomic_set(&kbdev->serving_gpu_irq, 1);
+		break;
+	case GPU_DUMMY_MMU_IRQ:
+		INIT_WORK(&data->work, serve_mmu_irq);
+		atomic_set(&kbdev->serving_mmu_irq, 1);
+		break;
+	default:
+		dev_warn(kbdev->dev, "Unknown IRQ");
+		kmem_cache_free(kbdev->irq_slab, data);
+	}
+	queue_work(kbdev->irq_workq, &data->work);
+}
+
+void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->reg_op_lock, flags);
+	midgard_model_write_reg(kbdev->model, offset, value);
+	spin_unlock_irqrestore(&kbdev->reg_op_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_write);
+
+u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset)
+{
+	unsigned long flags;
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->reg_op_lock, flags);
+	midgard_model_read_reg(kbdev->model, offset, &val);
+	spin_unlock_irqrestore(&kbdev->reg_op_lock, flags);
+
+	return val;
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_read);
+
+/**
+ * kbase_is_gpu_removed - Has the GPU been removed.
+ * @kbdev:    Kbase device pointer
+ *
+ * This function would return true if the GPU has been removed.
+ * It is stubbed here
+ * Return: Always false
+ */
+bool kbase_is_gpu_removed(struct kbase_device *kbdev)
+{
+	return false;
+}
+
+int kbase_install_interrupts(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	atomic_set(&kbdev->serving_job_irq, 0);
+	atomic_set(&kbdev->serving_gpu_irq, 0);
+	atomic_set(&kbdev->serving_mmu_irq, 0);
+
+	kbdev->irq_workq = alloc_ordered_workqueue("dummy irq queue", 0);
+	if (kbdev->irq_workq == NULL)
+		return -ENOMEM;
+
+	kbdev->irq_slab = kmem_cache_create("dummy_irq_slab",
+				sizeof(struct model_irq_data), 0, 0, NULL);
+	if (kbdev->irq_slab == NULL) {
+		destroy_workqueue(kbdev->irq_workq);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+void kbase_release_interrupts(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+	destroy_workqueue(kbdev->irq_workq);
+	kmem_cache_destroy(kbdev->irq_slab);
+}
+
+void kbase_synchronize_irqs(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+	flush_workqueue(kbdev->irq_workq);
+}
+
+KBASE_EXPORT_TEST_API(kbase_synchronize_irqs);
+
+int kbase_set_custom_irq_handler(struct kbase_device *kbdev,
+					irq_handler_t custom_handler,
+					int irq_type)
+{
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler);
+
+irqreturn_t kbase_gpu_irq_test_handler(int irq, void *data, u32 val)
+{
+	if (!val)
+		return IRQ_NONE;
+
+	return IRQ_HANDLED;
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_irq_test_handler);
+
+int kbase_gpu_device_create(struct kbase_device *kbdev)
+{
+	kbdev->model = midgard_model_create(NULL);
+	if (kbdev->model == NULL)
+		return -ENOMEM;
+
+	gpu_device_set_data(kbdev->model, kbdev);
+
+	spin_lock_init(&kbdev->reg_op_lock);
+
+	dev_warn(kbdev->dev, "Using Dummy Model");
+
+	return 0;
+}
+
+void kbase_gpu_device_destroy(struct kbase_device *kbdev)
+{
+	midgard_model_destroy(kbdev->model);
+}
--- a/include/uapi/gpu/arm/bifrost/csf/mali_gpu_csf_control_registers.h
+++ b/include/uapi/gpu/arm/bifrost/csf/mali_gpu_csf_control_registers.h
@@ -20,13 +20,13 @@
 */

 /*
- * This header was autogenerated, it should not be edited.
+ * Model interface
 */

-#ifndef _UAPI_GPU_CSF_CONTROL_REGISTERS_H_
-#define _UAPI_GPU_CSF_CONTROL_REGISTERS_H_
+#ifndef _KBASE_MODEL_LINUX_H_
+#define _KBASE_MODEL_LINUX_H_

-/* GPU_REGISTERS register offsets */
-#define GPU_CONTROL_MCU 0x3000 /* () MCU control registers */
+int kbase_gpu_device_create(struct kbase_device *kbdev);
+void kbase_gpu_device_destroy(struct kbase_device *kbdev);

-#endif /* _UAPI_GPU_CSF_CONTROL_REGISTERS_H_ */
+#endif				/* _KBASE_MODEL_LINUX_H_ */
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c
@@ -568,11 +568,14 @@ static void kbase_pm_hwcnt_disable_worker(struct work_struct *data)
 * when system suspend takes place.
 * The function first waits for the @gpu_poweroff_wait_work to complete, which
 * could have been enqueued after the last PM reference was released.
+ *
+ * Return: 0 on success, negative value otherwise.
 */
-static void kbase_pm_do_poweroff_sync(struct kbase_device *kbdev)
+static int kbase_pm_do_poweroff_sync(struct kbase_device *kbdev)
 {
 	struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
 	unsigned long flags;
+	int ret = 0;

 	WARN_ON(kbdev->pm.active_count);

@@ -581,8 +584,8 @@ static void kbase_pm_do_poweroff_sync(struct kbase_device *kbdev)
 	kbase_pm_lock(kbdev);
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	WARN_ON(backend->poweroff_wait_in_progress);
+	WARN_ON(backend->gpu_sleep_mode_active);
 	if (backend->gpu_powered) {
-		int ret;

 		backend->mcu_desired = false;
 		backend->l2_desired = false;
@@ -591,17 +594,11 @@ static void kbase_pm_do_poweroff_sync(struct kbase_device *kbdev)

 		ret = kbase_pm_wait_for_desired_state(kbdev);
 		if (ret) {
-			dev_warn(kbdev->dev, "Wait failed on synchronous power off");
-			kbase_pm_unlock(kbdev);
-			/* Wait for the completion of reset, triggered due to
-			 * the previous failure.
-			 */
-			kbase_reset_gpu_wait(kbdev);
-			/* Wait again for the poweroff work which could have
-			 * been enqueued by the GPU reset worker.
-			 */
-			kbase_pm_wait_for_poweroff_work_complete(kbdev);
-			kbase_pm_lock(kbdev);
+			dev_warn(
+				kbdev->dev,
+				"Wait for pm state change failed on synchronous power off");
+			ret = -EBUSY;
+			goto out;
 		}

 		/* Due to the power policy, GPU could have been kept active
@@ -614,12 +611,19 @@ static void kbase_pm_do_poweroff_sync(struct kbase_device *kbdev)
 			backend->gpu_idled = true;
 		}

-		kbase_pm_clock_off(kbdev);
+		if (!kbase_pm_clock_off(kbdev)) {
+			dev_warn(
+				kbdev->dev,
+				"Failed to turn off GPU clocks on synchronous power off, MMU faults pending");
+			ret = -EBUSY;
+		}
 	} else {
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 	}

+out:
 	kbase_pm_unlock(kbdev);
+	return ret;
 }
 #endif

@@ -793,7 +797,7 @@ void kbase_hwaccess_pm_halt(struct kbase_device *kbdev)
 	KBASE_DEBUG_ASSERT(kbdev != NULL);

 #if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
-	kbase_pm_do_poweroff_sync(kbdev);
+	WARN_ON(kbase_pm_do_poweroff_sync(kbdev));
 #else
 	mutex_lock(&kbdev->pm.lock);
 	kbase_pm_do_poweroff(kbdev);
@@ -902,10 +906,14 @@ void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev)
 	kbase_pm_update_active(kbdev);
 }

-void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
+int kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
 {
+	int ret = 0;
+
 #if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
-	kbase_pm_do_poweroff_sync(kbdev);
+	ret = kbase_pm_do_poweroff_sync(kbdev);
+	if (ret)
+		return ret;
 #else
 	/* Force power off the GPU and all cores (regardless of policy), only
 	 * after the PM active count reaches zero (otherwise, we risk turning it
@@ -929,6 +937,8 @@ void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)

 	if (kbdev->pm.backend.callback_power_suspend)
 		kbdev->pm.backend.callback_power_suspend(kbdev);
+
+	return ret;
 }

 void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
@@ -1044,7 +1054,12 @@ static int pm_handle_mcu_sleep_on_runtime_suspend(struct kbase_device *kbdev)

 	ret = kbase_pm_force_mcu_wakeup_after_sleep(kbdev);
 	if (ret) {
-		dev_warn(kbdev->dev, "Wait for MCU wake up failed on runtime suspend");
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		dev_warn(
+			kbdev->dev,
+			"Waiting for MCU to wake up failed on runtime suspend");
+		kbdev->pm.backend.gpu_wakeup_override = false;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 		return ret;
 	}

--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c
@@ -26,6 +26,9 @@
 #include <mali_kbase.h>
 #include <mali_kbase_pm.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+#include <backend/gpu/mali_kbase_model_dummy.h>
+#endif /* CONFIG_MALI_BIFROST_NO_MALI */
 #include <mali_kbase_dummy_job_wa.h>

 int kbase_pm_ca_init(struct kbase_device *kbdev)
@@ -120,7 +123,9 @@ u64 kbase_pm_ca_get_instr_core_mask(struct kbase_device *kbdev)
 {
 	lockdep_assert_held(&kbdev->hwaccess_lock);

-#if   MALI_USE_CSF
+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+	return (((1ull) << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1);
+#elif MALI_USE_CSF
 	return kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
 #else
 	return kbdev->pm.backend.pm_shaders_core_mask;
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.h
@@ -29,10 +29,10 @@
 /**
 * kbase_pm_ca_init - Initialize core availability framework
 *
- * Must be called before calling any other core availability function
- *
 * @kbdev: The kbase device structure for the device (must be a valid pointer)
 *
+ * Must be called before calling any other core availability function
+ *
 * Return: 0 if the core availability framework was successfully initialized,
 *         -errno otherwise
 */
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca_devfreq.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca_devfreq.h
@@ -30,12 +30,12 @@
 /**
 * struct kbasep_pm_ca_policy_devfreq - Private structure for devfreq ca policy
 *
- * This contains data that is private to the devfreq core availability
- * policy.
- *
 * @cores_desired: Cores that the policy wants to be available
 * @cores_enabled: Cores that the policy is currently returning as available
 * @cores_used: Cores currently powered or transitioning
+ *
+ * This contains data that is private to the devfreq core availability
+ * policy.
 */
 struct kbasep_pm_ca_policy_devfreq {
 	u64 cores_desired;
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_coarse_demand.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_coarse_demand.h
@@ -52,10 +52,8 @@
 /**
 * struct kbasep_pm_policy_coarse_demand - Private structure for coarse demand
 *                                         policy
- *
- * This contains data that is private to the coarse demand power policy.
- *
 * @dummy: Dummy member - no state needed
+ * This contains data that is private to the coarse demand power policy.
 */
 struct kbasep_pm_policy_coarse_demand {
 	int dummy;
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h
@@ -40,6 +40,11 @@ struct kbase_jd_atom;
 /**
 * enum kbase_pm_core_type - The types of core in a GPU.
 *
+ * @KBASE_PM_CORE_L2: The L2 cache
+ * @KBASE_PM_CORE_SHADER: Shader cores
+ * @KBASE_PM_CORE_TILER: Tiler cores
+ * @KBASE_PM_CORE_STACK: Core stacks
+ *
 * These enumerated values are used in calls to
 * - kbase_pm_get_present_cores()
 * - kbase_pm_get_active_cores()
@@ -49,11 +54,6 @@ struct kbase_jd_atom;
 * They specify which type of core should be acted on.  These values are set in
 * a manner that allows core_type_to_reg() function to be simpler and more
 * efficient.
- *
- * @KBASE_PM_CORE_L2: The L2 cache
- * @KBASE_PM_CORE_SHADER: Shader cores
- * @KBASE_PM_CORE_TILER: Tiler cores
- * @KBASE_PM_CORE_STACK: Core stacks
 */
 enum kbase_pm_core_type {
 	KBASE_PM_CORE_L2 = L2_PRESENT_LO,
@@ -215,9 +215,6 @@ union kbase_pm_policy_data {
 /**
 * struct kbase_pm_backend_data - Data stored per device for power management.
 *
- * This structure contains data for the power management framework. There is one
- * instance of this structure per device in the system.
- *
 * @pm_current_policy: The policy that is currently actively controlling the
 *                     power state.
 * @pm_policy_data:    Private data for current PM policy. This is automatically
@@ -324,6 +321,10 @@ union kbase_pm_policy_data {
 * @policy_change_lock: Used to serialize the policy change calls. In CSF case,
 *                      the change of policy may involve the scheduler to
 *                      suspend running CSGs and then reconfigure the MCU.
+ * @core_idle_wq: Workqueue for executing the @core_idle_work.
+ * @core_idle_work: Work item used to wait for undesired cores to become inactive.
+ *                  The work item is enqueued when Host controls the power for
+ *                  shader cores and down scaling of cores is performed.
 * @gpu_sleep_supported: Flag to indicate that if GPU sleep feature can be
 *                       supported by the kernel driver or not. If this
 *                       flag is not set, then HW state is directly saved
@@ -389,6 +390,9 @@ union kbase_pm_policy_data {
 * @gpu_clock_control_work: work item to set GPU clock during L2 power cycle
 *                          using gpu_clock_control
 *
+ * This structure contains data for the power management framework. There is one
+ * instance of this structure per device in the system.
+ *
 * Note:
 * During an IRQ, @pm_current_policy can be NULL when the policy is being
 * changed with kbase_pm_set_policy(). The change is protected under
@@ -455,6 +459,8 @@ struct kbase_pm_backend_data {
 	bool policy_change_clamp_state_to_off;
 	unsigned int csf_pm_sched_flags;
 	struct mutex policy_change_lock;
+	struct workqueue_struct *core_idle_wq;
+	struct work_struct core_idle_work;

 #ifdef KBASE_PM_RUNTIME
 	bool gpu_sleep_supported;
@@ -547,9 +553,6 @@ enum kbase_pm_policy_event {
 /**
 * struct kbase_pm_policy - Power policy structure.
 *
- * Each power policy exposes a (static) instance of this structure which
- * contains function pointers to the policy's methods.
- *
 * @name:               The name of this policy
 * @init:               Function called when the policy is selected
 * @term:               Function called when the policy is unselected
@@ -567,6 +570,8 @@ enum kbase_pm_policy_event {
 *                  Pre-defined required flags exist for each of the
 *                  ARM released policies, such as 'always_on', 'coarse_demand'
 *                  and etc.
+ * Each power policy exposes a (static) instance of this structure which
+ * contains function pointers to the policy's methods.
 */
 struct kbase_pm_policy {
 	char *name;
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c
@@ -54,6 +54,10 @@
 #include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
 #endif

+#if MALI_USE_CSF
+#include <linux/delay.h>
+#endif
+
 #include <linux/of.h>

 #ifdef CONFIG_MALI_CORESTACK
@@ -72,16 +76,16 @@ KBASE_EXPORT_TEST_API(corestack_driver_control);
 /**
 * enum kbasep_pm_action - Actions that can be performed on a core.
 *
- * This enumeration is private to the file. Its values are set to allow
- * core_type_to_reg() function, which decodes this enumeration, to be simpler
- * and more efficient.
- *
 * @ACTION_PRESENT: The cores that are present
 * @ACTION_READY: The cores that are ready
 * @ACTION_PWRON: Power on the cores specified
 * @ACTION_PWROFF: Power off the cores specified
 * @ACTION_PWRTRANS: The cores that are transitioning
 * @ACTION_PWRACTIVE: The cores that are active
+ *
+ * This enumeration is private to the file. Its values are set to allow
+ * core_type_to_reg() function, which decodes this enumeration, to be simpler
+ * and more efficient.
 */
 enum kbasep_pm_action {
 	ACTION_PRESENT = 0,
@@ -221,14 +225,14 @@ void kbase_pm_protected_l2_override(struct kbase_device *kbdev, bool override)
 /**
 * core_type_to_reg - Decode a core type and action to a register.
 *
+ * @core_type: The type of core
+ * @action:    The type of action
+ *
 * Given a core type (defined by kbase_pm_core_type) and an action (defined
 * by kbasep_pm_action) this function will return the register offset that
 * will perform the action on the core type. The register returned is the _LO
 * register and an offset must be applied to use the _HI register.
 *
- * @core_type: The type of core
- * @action:    The type of action
- *
 * Return: The register offset of the _LO register that performs an action of
 * type @action on a core of type @core_type.
 */
@@ -291,14 +295,14 @@ static void mali_cci_flush_l2(struct kbase_device *kbdev)
 /**
 * kbase_pm_invoke - Invokes an action on a core set
 *
- * This function performs the action given by @action on a set of cores of a
- * type given by @core_type. It is a static function used by
- * kbase_pm_transition_core_type()
- *
 * @kbdev:     The kbase device structure of the device
 * @core_type: The type of core that the action should be performed on
 * @cores:     A bit mask of cores to perform the action on (low 32 bits)
 * @action:    The action to perform on the cores
+ *
+ * This function performs the action given by @action on a set of cores of a
+ * type given by @core_type. It is a static function used by
+ * kbase_pm_transition_core_type()
 */
 static void kbase_pm_invoke(struct kbase_device *kbdev,
 					enum kbase_pm_core_type core_type,
@@ -376,15 +380,15 @@ static void kbase_pm_invoke(struct kbase_device *kbdev,
 /**
 * kbase_pm_get_state - Get information about a core set
 *
+ * @kbdev:     The kbase device structure of the device
+ * @core_type: The type of core that the should be queried
+ * @action:    The property of the cores to query
+ *
 * This function gets information (chosen by @action) about a set of cores of
 * a type given by @core_type. It is a static function used by
 * kbase_pm_get_active_cores(), kbase_pm_get_trans_cores() and
 * kbase_pm_get_ready_cores().
 *
- * @kbdev:     The kbase device structure of the device
- * @core_type: The type of core that the should be queried
- * @action:    The property of the cores to query
- *
 * Return: A bit mask specifying the state of the cores
 */
 static u64 kbase_pm_get_state(struct kbase_device *kbdev,
@@ -753,17 +757,17 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
 			if (!kbase_pm_is_mcu_desired(kbdev))
 				backend->mcu_state = KBASE_MCU_ON_HWCNT_DISABLE;
 			else if (kbdev->csf.firmware_hctl_core_pwr) {
-				/* Host control add additional Cores to be active */
-				if (backend->shaders_desired_mask & ~shaders_ready) {
+				/* Host control scale up/down cores as needed */
+				if (backend->shaders_desired_mask != shaders_ready) {
 					backend->hwcnt_desired = false;
 					if (!backend->hwcnt_disabled)
 						kbase_pm_trigger_hwcnt_disable(kbdev);
 					backend->mcu_state =
 						KBASE_MCU_HCTL_MCU_ON_RECHECK;
 				}
-			} else if (kbase_pm_handle_mcu_core_attr_update(kbdev))
-				kbdev->pm.backend.mcu_state =
-					KBASE_MCU_ON_CORE_ATTR_UPDATE_PEND;
+			} else if (kbase_pm_handle_mcu_core_attr_update(kbdev)) {
+				backend->mcu_state = KBASE_MCU_ON_CORE_ATTR_UPDATE_PEND;
+			}
 			break;

 		case KBASE_MCU_HCTL_MCU_ON_RECHECK:
@@ -787,16 +791,54 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
 						ACTION_PWRON);
 				backend->mcu_state =
 					KBASE_MCU_HCTL_SHADERS_PEND_ON;
+
+			} else if (~backend->shaders_desired_mask & shaders_ready) {
+				kbase_csf_firmware_update_core_attr(kbdev, false, true,
+								    backend->shaders_desired_mask);
+				backend->mcu_state = KBASE_MCU_HCTL_CORES_DOWN_SCALE_NOTIFY_PEND;
 			} else {
 				backend->mcu_state =
 					KBASE_MCU_HCTL_SHADERS_PEND_ON;
 			}
 			break;

+		case KBASE_MCU_HCTL_CORES_DOWN_SCALE_NOTIFY_PEND:
+			if (kbase_csf_firmware_core_attr_updated(kbdev)) {
+				/* wait in queue until cores idle */
+				queue_work(backend->core_idle_wq, &backend->core_idle_work);
+				backend->mcu_state = KBASE_MCU_HCTL_CORE_INACTIVE_PEND;
+			}
+			break;
+
+		case KBASE_MCU_HCTL_CORE_INACTIVE_PEND:
+			{
+				u64 active_cores = kbase_pm_get_active_cores(
+							kbdev,
+							KBASE_PM_CORE_SHADER);
+				u64 cores_to_disable = shaders_ready &
+							~backend->shaders_desired_mask;
+
+				if (!(cores_to_disable & active_cores)) {
+					kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER,
+							cores_to_disable,
+							ACTION_PWROFF);
+					backend->shaders_avail = backend->shaders_desired_mask;
+					backend->mcu_state = KBASE_MCU_HCTL_SHADERS_CORE_OFF_PEND;
+				}
+			}
+			break;
+
+		case KBASE_MCU_HCTL_SHADERS_CORE_OFF_PEND:
+			if (!shaders_trans && shaders_ready == backend->shaders_avail) {
+				/* Cores now stable */
+				backend->pm_shaders_core_mask = shaders_ready;
+				backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE;
+			}
+			break;
+
 		case KBASE_MCU_ON_CORE_ATTR_UPDATE_PEND:
 			if (kbase_csf_firmware_core_attr_updated(kbdev)) {
-				backend->shaders_avail =
-					backend->shaders_desired_mask;
+				backend->shaders_avail = backend->shaders_desired_mask;
 				backend->mcu_state = KBASE_MCU_ON;
 			}
 			break;
@@ -832,6 +874,8 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)

 		case KBASE_MCU_ON_PEND_HALT:
 			if (kbase_csf_firmware_mcu_halted(kbdev)) {
+				KBASE_KTRACE_ADD(kbdev, MCU_HALTED, NULL,
+					kbase_csf_ktrace_gpu_cycle_cnt(kbdev));
 				if (kbdev->csf.firmware_hctl_core_pwr)
 					backend->mcu_state =
 						KBASE_MCU_HCTL_SHADERS_READY_OFF;
@@ -875,6 +919,8 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)

 		case KBASE_MCU_ON_PEND_SLEEP:
 			if (kbase_csf_firmware_is_mcu_in_sleep(kbdev)) {
+				KBASE_KTRACE_ADD(kbdev, MCU_IN_SLEEP, NULL,
+					kbase_csf_ktrace_gpu_cycle_cnt(kbdev));
 				backend->mcu_state = KBASE_MCU_IN_SLEEP;
 				kbase_pm_enable_db_mirror_interrupt(kbdev);
 				kbase_csf_scheduler_reval_idleness_post_sleep(kbdev);
@@ -884,6 +930,8 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
 		case KBASE_MCU_IN_SLEEP:
 			if (kbase_pm_is_mcu_desired(kbdev) &&
 			    backend->l2_state == KBASE_L2_ON) {
+				KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP(
+					kbdev, kbase_backend_get_cycle_cnt(kbdev));
 				kbase_pm_enable_mcu_db_notification(kbdev);
 				kbase_pm_disable_db_mirror_interrupt(kbdev);
 				backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE;
@@ -910,6 +958,33 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)

 	return 0;
 }
+
+static void core_idle_worker(struct work_struct *work)
+{
+	struct kbase_device *kbdev =
+		container_of(work, struct kbase_device, pm.backend.core_idle_work);
+	struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	while (backend->gpu_powered && (backend->mcu_state == KBASE_MCU_HCTL_CORE_INACTIVE_PEND)) {
+		const unsigned int core_inactive_wait_ms = 1;
+		u64 active_cores = kbase_pm_get_active_cores(kbdev, KBASE_PM_CORE_SHADER);
+		u64 shaders_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+		u64 cores_to_disable = shaders_ready & ~backend->shaders_desired_mask;
+
+		if (!(cores_to_disable & active_cores)) {
+			kbase_pm_update_state(kbdev);
+			break;
+		}
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		msleep(core_inactive_wait_ms);
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
 #endif

 static const char *kbase_l2_core_state_to_string(enum kbase_l2_core_state state)
@@ -925,6 +1000,23 @@ static const char *kbase_l2_core_state_to_string(enum kbase_l2_core_state state)
 		return strings[state];
 }

+#if !MALI_USE_CSF
+/* On powering on the L2, the tracked kctx becomes stale and can be cleared.
+ * This enables the backend to spare the START_FLUSH.INV_SHADER_OTHER
+ * operation on the first submitted katom after the L2 powering on.
+ */
+static void kbase_pm_l2_clear_backend_slot_submit_kctx(struct kbase_device *kbdev)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* Clear the slots' last katom submission kctx */
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+		kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = SLOT_RB_NULL_TAG_VAL;
+}
+#endif
+
 static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
 {
 	struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
@@ -1015,6 +1107,8 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
 					kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2,
 							l2_present & ~1,
 							ACTION_PWRON);
+				/* Clear backend slot submission kctx */
+				kbase_pm_l2_clear_backend_slot_submit_kctx(kbdev);
 #else
 				/* With CSF firmware, Host driver doesn't need to
 				 * handle power management with both shader and tiler cores.
@@ -1217,7 +1311,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
 				 * powered off.
 				 */
 				kbase_gpu_start_cache_clean_nolock(
-						kbdev);
+					kbdev, GPU_COMMAND_CACHE_CLN_INV_L2);
 #if !MALI_USE_CSF
 			KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, 0u);
 #else
@@ -1594,10 +1688,12 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev)
 			break;

 		case KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON:
-			shader_poweroff_timer_queue_cancel(kbdev);
+			if (!backend->partial_shaderoff)
+				shader_poweroff_timer_queue_cancel(kbdev);

 			if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921)) {
-				kbase_gpu_start_cache_clean_nolock(kbdev);
+				kbase_gpu_start_cache_clean_nolock(
+					kbdev, GPU_COMMAND_CACHE_CLN_INV_L2);
 				backend->shaders_state =
 					KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON;
 			} else {
@@ -1895,11 +1991,24 @@ int kbase_pm_state_machine_init(struct kbase_device *kbdev)
 	stt->default_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER;
 	stt->configured_ticks = stt->default_ticks;

+#if MALI_USE_CSF
+	kbdev->pm.backend.core_idle_wq = alloc_workqueue("coreoff_wq", WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (!kbdev->pm.backend.core_idle_wq) {
+		destroy_workqueue(stt->wq);
+		return -ENOMEM;
+	}
+
+	INIT_WORK(&kbdev->pm.backend.core_idle_work, core_idle_worker);
+#endif
+
 	return 0;
 }

 void kbase_pm_state_machine_term(struct kbase_device *kbdev)
 {
+#if MALI_USE_CSF
+	destroy_workqueue(kbdev->pm.backend.core_idle_wq);
+#endif
 	hrtimer_cancel(&kbdev->pm.backend.shader_tick_timer.timer);
 	destroy_workqueue(kbdev->pm.backend.shader_tick_timer.wq);
 }
@@ -2419,9 +2528,9 @@ void kbase_pm_reset_done(struct kbase_device *kbdev)
 /**
 * kbase_pm_wait_for_reset - Wait for a reset to happen
 *
- * Wait for the %RESET_COMPLETED IRQ to occur, then reset the waiting state.
- *
 * @kbdev: Kbase device
+ *
+ * Wait for the %RESET_COMPLETED IRQ to occur, then reset the waiting state.
 */
 static void kbase_pm_wait_for_reset(struct kbase_device *kbdev)
 {
@@ -2889,6 +2998,7 @@ exit:

 /**
 * kbase_pm_request_gpu_cycle_counter_do_request - Request cycle counters
+ * @kbdev:     The kbase device structure of the device
 *
 * Increase the count of cycle counter users and turn the cycle counters on if
 * they were previously off
@@ -2899,8 +3009,6 @@ exit:
 *
 * When this function is called the l2 cache must be on - i.e., the GPU must be
 * on.
- *
- * @kbdev:     The kbase device structure of the device
 */
 static void
 kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev)
@@ -2918,11 +3026,13 @@ kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev)
 		/* This might happen after GPU reset.
 		 * Then counter needs to be kicked.
 		 */
+#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
 		if (!(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) &
 		      GPU_STATUS_CYCLE_COUNT_ACTIVE)) {
 			kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
 					GPU_COMMAND_CYCLE_COUNT_START);
 		}
+#endif
 	}

 	spin_unlock_irqrestore(
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h
@@ -35,18 +35,18 @@
 /**
 * kbase_pm_dev_idle - The GPU is idle.
 *
- * The OS may choose to turn off idle devices
- *
 * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * The OS may choose to turn off idle devices
 */
 void kbase_pm_dev_idle(struct kbase_device *kbdev);

 /**
 * kbase_pm_dev_activate - The GPU is active.
 *
- * The OS should avoid opportunistically turning off the GPU while it is active
- *
 * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * The OS should avoid opportunistically turning off the GPU while it is active
 */
 void kbase_pm_dev_activate(struct kbase_device *kbdev);

@@ -54,14 +54,14 @@ void kbase_pm_dev_activate(struct kbase_device *kbdev);
 * kbase_pm_get_present_cores - Get details of the cores that are present in
 *                              the device.
 *
- * This function can be called by the active power policy to return a bitmask of
- * the cores (of a specified type) present in the GPU device and also a count of
- * the number of cores.
- *
 * @kbdev: The kbase device structure for the device (must be a valid
 *         pointer)
 * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
 *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) present in the GPU device and also a count of
+ * the number of cores.
+ *
 * Return: The bit mask of cores present
 */
 u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
@@ -71,13 +71,13 @@ u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
 * kbase_pm_get_active_cores - Get details of the cores that are currently
 *                             active in the device.
 *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
 * This function can be called by the active power policy to return a bitmask of
 * the cores (of a specified type) that are actively processing work (i.e.
 * turned on *and* busy).
 *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
- *
 * Return: The bit mask of active cores
 */
 u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
@@ -87,13 +87,13 @@ u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
 * kbase_pm_get_trans_cores - Get details of the cores that are currently
 *                            transitioning between power states.
 *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
 * This function can be called by the active power policy to return a bitmask of
 * the cores (of a specified type) that are currently transitioning between
 * power states.
 *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
- *
 * Return: The bit mask of transitioning cores
 */
 u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
@@ -103,13 +103,13 @@ u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
 * kbase_pm_get_ready_cores - Get details of the cores that are currently
 *                            powered and ready for jobs.
 *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
 * This function can be called by the active power policy to return a bitmask of
 * the cores (of a specified type) that are powered and ready for jobs (they may
 * or may not be currently executing jobs).
 *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
- *
 * Return: The bit mask of ready cores
 */
 u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
@@ -119,13 +119,13 @@ u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
 * kbase_pm_clock_on - Turn the clock for the device on, and enable device
 *                     interrupts.
 *
- * This function can be used by a power policy to turn the clock for the GPU on.
- * It should be modified during integration to perform the necessary actions to
- * ensure that the GPU is fully powered and clocked.
- *
 * @kbdev:     The kbase device structure for the device (must be a valid
 *             pointer)
 * @is_resume: true if clock on due to resume after suspend, false otherwise
+ *
+ * This function can be used by a power policy to turn the clock for the GPU on.
+ * It should be modified during integration to perform the necessary actions to
+ * ensure that the GPU is fully powered and clocked.
 */
 void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume);

@@ -133,6 +133,9 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume);
 * kbase_pm_clock_off - Disable device interrupts, and turn the clock for the
 *                      device off.
 *
+ * @kbdev:      The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
 * This function can be used by a power policy to turn the clock for the GPU
 * off. It should be modified during integration to perform the necessary
 * actions to turn the clock off (if this is possible in the integration).
@@ -141,9 +144,6 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume);
 * then this function would usually be invoked from the runtime suspend
 * callback function.
 *
- * @kbdev:      The kbase device structure for the device (must be a valid
- *              pointer)
- *
 * Return: true  if clock was turned off, or
 *         false if clock can not be turned off due to pending page/bus fault
 *               workers. Caller must flush MMU workqueues and retry
@@ -153,22 +153,22 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev);
 /**
 * kbase_pm_enable_interrupts - Enable interrupts on the device.
 *
- * Interrupts are also enabled after a call to kbase_pm_clock_on().
- *
 * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Interrupts are also enabled after a call to kbase_pm_clock_on().
 */
 void kbase_pm_enable_interrupts(struct kbase_device *kbdev);

 /**
 * kbase_pm_disable_interrupts - Disable interrupts on the device.
 *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
 * This prevents delivery of Power Management interrupts to the CPU so that
 * kbase_pm_update_state() will not be called from the IRQ handler
 * until kbase_pm_enable_interrupts() or kbase_pm_clock_on() is called.
 *
 * Interrupts are also disabled after a call to kbase_pm_clock_off().
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
 */
 void kbase_pm_disable_interrupts(struct kbase_device *kbdev);

@@ -176,9 +176,9 @@ void kbase_pm_disable_interrupts(struct kbase_device *kbdev);
 * kbase_pm_disable_interrupts_nolock - Version of kbase_pm_disable_interrupts()
 *                                      that does not take the hwaccess_lock
 *
- * Caller must hold the hwaccess_lock.
- *
 * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Caller must hold the hwaccess_lock.
 */
 void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev);

@@ -197,12 +197,11 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags);

 /**
 * kbase_pm_reset_done - The GPU has been reset successfully.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
 *
 * This function must be called by the GPU interrupt handler when the
 * RESET_COMPLETED bit is set. It signals to the power management initialization
 * code that the GPU has been successfully reset.
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
 */
 void kbase_pm_reset_done(struct kbase_device *kbdev);

@@ -210,6 +209,7 @@ void kbase_pm_reset_done(struct kbase_device *kbdev);
 /**
 * kbase_pm_wait_for_desired_state - Wait for the desired power state to be
 *                                   reached
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
 *
 * Wait for the L2 and MCU state machines to reach the states corresponding
 * to the values of 'kbase_pm_is_l2_desired' and 'kbase_pm_is_mcu_desired'.
@@ -224,8 +224,6 @@ void kbase_pm_reset_done(struct kbase_device *kbdev);
 * power off in progress and kbase_pm_context_active() was called instead of
 * kbase_csf_scheduler_pm_active().
 *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- *
 * Return: 0 on success, error code on error
 */
 int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
@@ -233,6 +231,7 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
 /**
 * kbase_pm_wait_for_desired_state - Wait for the desired power state to be
 *                                   reached
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
 *
 * Wait for the L2 and shader power state machines to reach the states
 * corresponding to the values of 'l2_desired' and 'shaders_desired'.
@@ -248,8 +247,6 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
 * must ensure that this is not the case by, for example, calling
 * kbase_pm_wait_for_poweroff_work_complete()
 *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- *
 * Return: 0 on success, error code on error
 */
 int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
@@ -258,6 +255,8 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
 /**
 * kbase_pm_wait_for_l2_powered - Wait for the L2 cache to be powered on
 *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
 * Wait for the L2 to be powered on, and for the L2 and the state machines of
 * its dependent stack components to stabilise.
 *
@@ -266,8 +265,6 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
 * Unlike kbase_pm_update_state(), the caller must not hold hwaccess_lock,
 * because this function will take that lock itself.
 *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- *
 * Return: 0 on success, error code on error
 */
 int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev);
@@ -276,13 +273,12 @@ int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev);
 * kbase_pm_update_dynamic_cores_onoff - Update the L2 and shader power state
 *                                       machines after changing shader core
 *                                       availability
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
 *
 * It can be called in any status, so need to check the l2 and shader core
 * power status in this function or it will break shader/l2 state machine
 *
 * Caller must hold hwaccess_lock
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
 */
 void kbase_pm_update_dynamic_cores_onoff(struct kbase_device *kbdev);

@@ -318,22 +314,21 @@ void kbase_pm_state_machine_term(struct kbase_device *kbdev);
 * kbase_pm_update_cores_state - Update the desired state of shader cores from
 *                               the Power Policy, and begin any power
 *                               transitions.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
 *
 * This function will update the desired_xx_state members of
 * struct kbase_pm_device_data by calling into the current Power Policy. It will
 * then begin power transitions to make the hardware acheive the desired shader
 * core state.
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
 */
 void kbase_pm_update_cores_state(struct kbase_device *kbdev);

 /**
 * kbasep_pm_metrics_init - Initialize the metrics gathering framework.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
 *
 * This must be called before other metric gathering APIs are called.
 *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
 *
 * Return: 0 on success, error code on error
 */
@@ -341,29 +336,27 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev);

 /**
 * kbasep_pm_metrics_term - Terminate the metrics gathering framework.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
 *
 * This must be called when metric gathering is no longer required. It is an
 * error to call any metrics gathering function (other than
 * kbasep_pm_metrics_init()) after calling this function.
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
 */
 void kbasep_pm_metrics_term(struct kbase_device *kbdev);

 /**
 * kbase_pm_report_vsync - Function to be called by the frame buffer driver to
 *                         update the vsync metric.
+ * @kbdev:          The kbase device structure for the device (must be a
+ *                  valid pointer)
+ * @buffer_updated: True if the buffer has been updated on this VSync,
+ *                  false otherwise
 *
 * This function should be called by the frame buffer driver to update whether
 * the system is hitting the vsync target or not. buffer_updated should be true
 * if the vsync corresponded with a new frame being displayed, otherwise it
 * should be false. This function does not need to be called every vsync, but
 * only when the value of @buffer_updated differs from a previous call.
- *
- * @kbdev:          The kbase device structure for the device (must be a
- *                  valid pointer)
- * @buffer_updated: True if the buffer has been updated on this VSync,
- *                  false otherwise
 */
 void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated);

@@ -381,6 +374,7 @@ void kbase_pm_get_dvfs_action(struct kbase_device *kbdev);
 /**
 * kbase_pm_request_gpu_cycle_counter - Mark that the GPU cycle counter is
 *                                      needed
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
 *
 * If the caller is the first caller then the GPU cycle counters will be enabled
 * along with the l2 cache
@@ -388,13 +382,13 @@ void kbase_pm_get_dvfs_action(struct kbase_device *kbdev);
 * The GPU must be powered when calling this function (i.e.
 * kbase_pm_context_active() must have been called).
 *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
 */
 void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev);

 /**
 * kbase_pm_request_gpu_cycle_counter_l2_is_on - Mark GPU cycle counter is
 *                                               needed (l2 cache already on)
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
 *
 * This is a version of the above function
 * (kbase_pm_request_gpu_cycle_counter()) suitable for being called when the
@@ -405,14 +399,13 @@ void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev);
 * The GPU must be powered when calling this function (i.e.
 * kbase_pm_context_active() must have been called) and the l2 cache must be
 * powered on.
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
 */
 void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev);

 /**
 * kbase_pm_release_gpu_cycle_counter - Mark that the GPU cycle counter is no
 *                                      longer in use
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
 *
 * If the caller is the last caller then the GPU cycle counters will be
 * disabled. A request must have been made before a call to this.
@@ -420,18 +413,15 @@ void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev);
 * Caller must not hold the hwaccess_lock, as it will be taken in this function.
 * If the caller is already holding this lock then
 * kbase_pm_release_gpu_cycle_counter_nolock() must be used instead.
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
 */
 void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev);

 /**
 * kbase_pm_release_gpu_cycle_counter_nolock - Version of kbase_pm_release_gpu_cycle_counter()
 *                                             that does not take hwaccess_lock
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
 *
 * Caller must hold the hwaccess_lock.
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
 */
 void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev);

@@ -458,12 +448,11 @@ void kbase_pm_wait_for_gpu_power_down(struct kbase_device *kbdev);

 /**
 * kbase_pm_runtime_init - Initialize runtime-pm for Mali GPU platform device
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
 *
 * Setup the power management callbacks and initialize/enable the runtime-pm
 * for the Mali GPU platform device, using the callback function. This must be
 * called before the kbase_pm_register_access_enable() function.
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
 */
 int kbase_pm_runtime_init(struct kbase_device *kbdev);

@@ -476,6 +465,7 @@ void kbase_pm_runtime_term(struct kbase_device *kbdev);

 /**
 * kbase_pm_register_access_enable - Enable access to GPU registers
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
 *
 * Enables access to the GPU registers before power management has powered up
 * the GPU with kbase_pm_powerup().
@@ -486,13 +476,12 @@ void kbase_pm_runtime_term(struct kbase_device *kbdev);
 *
 * This should only be used before power management is powered up with
 * kbase_pm_powerup()
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
 */
 void kbase_pm_register_access_enable(struct kbase_device *kbdev);

 /**
 * kbase_pm_register_access_disable - Disable early register access
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
 *
 * Disables access to the GPU registers enabled earlier by a call to
 * kbase_pm_register_access_enable().
@@ -503,8 +492,6 @@ void kbase_pm_register_access_enable(struct kbase_device *kbdev);
 *
 * This should only be used before power management is powered up with
 * kbase_pm_powerup()
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
 */
 void kbase_pm_register_access_disable(struct kbase_device *kbdev);

@@ -515,6 +502,7 @@ void kbase_pm_register_access_disable(struct kbase_device *kbdev);
 /**
 * kbase_pm_metrics_is_active - Check if the power management metrics
 *                              collection is active.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
 *
 * Note that this returns if the power management metrics collection was
 * active at the time of calling, it is possible that after the call the metrics
@@ -522,7 +510,6 @@ void kbase_pm_register_access_disable(struct kbase_device *kbdev);
 *
 * The caller must handle the consequence that the state may have changed.
 *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
 * Return: true if metrics collection was active else false.
 */
 bool kbase_pm_metrics_is_active(struct kbase_device *kbdev);
@@ -558,12 +545,13 @@ void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev,
 /**
 * kbase_platform_dvfs_event - Report utilisation to DVFS code for CSF GPU
 *
- * Function provided by platform specific code when DVFS is enabled to allow
- * the power management metrics system to report utilisation.
- *
 * @kbdev:         The kbase device structure for the device (must be a
 *                 valid pointer)
 * @utilisation:   The current calculated utilisation by the metrics system.
+ *
+ * Function provided by platform specific code when DVFS is enabled to allow
+ * the power management metrics system to report utilisation.
+ *
 * Return:         Returns 0 on failure and non zero on success.
 */
 int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation);
@@ -571,15 +559,15 @@ int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation);
 /**
 * kbase_platform_dvfs_event - Report utilisation to DVFS code for JM GPU
 *
- * Function provided by platform specific code when DVFS is enabled to allow
- * the power management metrics system to report utilisation.
- *
 * @kbdev:         The kbase device structure for the device (must be a
 *                 valid pointer)
 * @utilisation:   The current calculated utilisation by the metrics system.
 * @util_gl_share: The current calculated gl share of utilisation.
 * @util_cl_share: The current calculated cl share of utilisation per core
 *                 group.
+ * Function provided by platform specific code when DVFS is enabled to allow
+ * the power management metrics system to report utilisation.
+ *
 * Return:         Returns 0 on failure and non zero on success.
 */
 int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_mcu_states.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_mcu_states.h
@@ -25,37 +25,47 @@
 * this header file. This header file can be included multiple times in the
 * same compilation unit with different definitions of KBASEP_MCU_STATE().
 *
- * @OFF:                      The MCU is powered off.
- * @PEND_ON_RELOAD:           The warm boot of MCU or cold boot of MCU (with
- *                            firmware reloading) is in progress.
- * @ON_GLB_REINIT_PEND:       The MCU is enabled and Global configuration
- *                            requests have been sent to the firmware.
- * @ON_HWCNT_ENABLE:          The Global requests have completed and MCU is now
- *                            ready for use and hwcnt is being enabled.
- * @ON:                       The MCU is active and hwcnt has been enabled.
- * @ON_CORE_ATTR_UPDATE_PEND: The MCU is active and mask of enabled shader cores
- *                            is being updated.
- * @ON_HWCNT_DISABLE:         The MCU is on and hwcnt is being disabled.
- * @ON_HALT:                  The MCU is on and hwcnt has been disabled, MCU
- *                            halt would be triggered.
- * @ON_PEND_HALT:             MCU halt in progress, confirmation pending.
- * @POWER_DOWN:               MCU halted operations, pending being disabled.
- * @PEND_OFF:                 MCU is being disabled, pending on powering off.
- * @RESET_WAIT:               The GPU is resetting, MCU state is unknown.
- * @HCTL_SHADERS_PEND_ON:     Global configuration requests sent to the firmware
- *                            have completed and shaders have been requested to
- *                            power on.
- * @HCTL_CORES_NOTIFY_PEND:   Shader cores have powered up and firmware is being
- *                            notified of the mask of enabled shader cores.
- * @HCTL_MCU_ON_RECHECK:      MCU is on and hwcnt disabling is triggered
- *                            and checks are done to increase the number of
- *                            enabled cores.
- * @HCTL_SHADERS_READY_OFF:   MCU has halted and cores need to be powered down
- * @HCTL_SHADERS_PEND_OFF:    Cores are transitioning to power down.
- * @ON_SLEEP_INITIATE:        MCU is on and hwcnt has been disabled and MCU
- *                            is being put to sleep.
- * @ON_PEND_SLEEP:            MCU sleep is in progress.
- * @IN_SLEEP:                 Sleep request is completed and MCU has halted.
+ * @OFF:                                The MCU is powered off.
+ * @PEND_ON_RELOAD:                     The warm boot of MCU or cold boot of MCU (with
+ *                                      firmware reloading) is in progress.
+ * @ON_GLB_REINIT_PEND:                 The MCU is enabled and Global configuration
+ *                                      requests have been sent to the firmware.
+ * @ON_HWCNT_ENABLE:                    The Global requests have completed and MCU is now
+ *                                      ready for use and hwcnt is being enabled.
+ * @ON:                                 The MCU is active and hwcnt has been enabled.
+ * @ON_CORE_ATTR_UPDATE_PEND:           The MCU is active and mask of enabled shader cores
+ *                                      is being updated.
+ * @ON_HWCNT_DISABLE:                   The MCU is on and hwcnt is being disabled.
+ * @ON_HALT:                            The MCU is on and hwcnt has been disabled, MCU
+ *                                      halt would be triggered.
+ * @ON_PEND_HALT:                       MCU halt in progress, confirmation pending.
+ * @POWER_DOWN:                         MCU halted operations, pending being disabled.
+ * @PEND_OFF:                           MCU is being disabled, pending on powering off.
+ * @RESET_WAIT:                         The GPU is resetting, MCU state is unknown.
+ * @HCTL_SHADERS_PEND_ON:               Global configuration requests sent to the firmware
+ *                                      have completed and shaders have been requested to
+ *                                      power on.
+ * @HCTL_CORES_NOTIFY_PEND:             Shader cores have powered up and firmware is being
+ *                                      notified of the mask of enabled shader cores.
+ * @HCTL_MCU_ON_RECHECK:                MCU is on and hwcnt disabling is triggered
+ *                                      and checks are done to update the number of
+ *                                      enabled cores.
+ * @HCTL_SHADERS_READY_OFF:             MCU has halted and cores need to be powered down
+ * @HCTL_SHADERS_PEND_OFF:              Cores are transitioning to power down.
+ * @HCTL_CORES_DOWN_SCALE_NOTIFY_PEND:  Firmware has been informed to stop using
+ *                                      specific cores, due to core_mask change request.
+ *                                      After the ACK from FW, the wait will be done for
+ *                                      undesired cores to become inactive.
+ * @HCTL_CORE_INACTIVE_PEND:            Waiting for specific cores to become inactive.
+ *                                      Once the cores become inactive their power down
+ *                                      will be initiated.
+ * @HCTL_SHADERS_CORE_OFF_PEND:         Waiting for specific cores to complete the
+ *                                      transition to power down. Once powered down,
+ *                                      HW counters will be re-enabled.
+ * @ON_SLEEP_INITIATE:                  MCU is on and hwcnt has been disabled and MCU
+ *                                      is being put to sleep.
+ * @ON_PEND_SLEEP:                      MCU sleep is in progress.
+ * @IN_SLEEP:                           Sleep request is completed and MCU has halted.
 */
 KBASEP_MCU_STATE(OFF)
 KBASEP_MCU_STATE(PEND_ON_RELOAD)
@@ -75,6 +85,9 @@ KBASEP_MCU_STATE(HCTL_CORES_NOTIFY_PEND)
 KBASEP_MCU_STATE(HCTL_MCU_ON_RECHECK)
 KBASEP_MCU_STATE(HCTL_SHADERS_READY_OFF)
 KBASEP_MCU_STATE(HCTL_SHADERS_PEND_OFF)
+KBASEP_MCU_STATE(HCTL_CORES_DOWN_SCALE_NOTIFY_PEND)
+KBASEP_MCU_STATE(HCTL_CORE_INACTIVE_PEND)
+KBASEP_MCU_STATE(HCTL_SHADERS_CORE_OFF_PEND)
 /* Additional MCU states to support GPU sleep feature */
 KBASEP_MCU_STATE(ON_SLEEP_INITIATE)
 KBASEP_MCU_STATE(ON_PEND_SLEEP)
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c
@@ -36,8 +36,13 @@
 #include <linux/of.h>

 static const struct kbase_pm_policy *const all_policy_list[] = {
+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+	&kbase_pm_always_on_policy_ops,
 	&kbase_pm_coarse_demand_policy_ops,
-	&kbase_pm_always_on_policy_ops
+#else /* CONFIG_MALI_BIFROST_NO_MALI */
+	&kbase_pm_coarse_demand_policy_ops,
+	&kbase_pm_always_on_policy_ops,
+#endif /* CONFIG_MALI_BIFROST_NO_MALI */
 };

 void kbase_pm_policy_init(struct kbase_device *kbdev)
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c
@@ -67,6 +67,9 @@ void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev,
 */
 static bool timedwait_cycle_count_active(struct kbase_device *kbdev)
 {
+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+	return true;
+#else
 	bool success = false;
 	const unsigned int timeout = 100;
 	const unsigned long remaining = jiffies + msecs_to_jiffies(timeout);
@@ -79,6 +82,7 @@ static bool timedwait_cycle_count_active(struct kbase_device *kbdev)
 		}
 	}
 	return success;
+#endif
 }
 #endif

--- a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c
+++ b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c
@@ -48,6 +48,7 @@ void kbase_context_debugfs_init(struct kbase_context *const kctx)
 	kbase_csf_queue_group_debugfs_init(kctx);
 	kbase_csf_kcpu_debugfs_init(kctx);
 	kbase_csf_tiler_heap_debugfs_init(kctx);
+	kbase_csf_tiler_heap_total_debugfs_init(kctx);
 	kbase_csf_cpu_queue_debugfs_init(kctx);
 }
 KBASE_EXPORT_SYMBOL(kbase_context_debugfs_init);
--- a/drivers/gpu/arm/bifrost/context/mali_kbase_context.c
+++ b/drivers/gpu/arm/bifrost/context/mali_kbase_context.c
@@ -163,8 +163,6 @@ int kbase_context_common_init(struct kbase_context *kctx)

 	kctx->id = atomic_add_return(1, &(kctx->kbdev->ctx_num)) - 1;

-	mutex_init(&kctx->legacy_hwcnt_lock);
-
 	mutex_lock(&kctx->kbdev->kctx_list_lock);

 	err = kbase_insert_kctx_to_process(kctx);
--- a/drivers/gpu/arm/bifrost/csf/Kbuild
+++ b/drivers/gpu/arm/bifrost/csf/Kbuild
@@ -33,10 +33,12 @@ bifrost_kbase-y += \
    csf/mali_kbase_csf_kcpu_debugfs.o \
    csf/mali_kbase_csf_protected_memory.o \
    csf/mali_kbase_csf_tiler_heap_debugfs.o \
-    csf/mali_kbase_csf_cpu_queue_debugfs.o
+    csf/mali_kbase_csf_cpu_queue_debugfs.o \
+    csf/mali_kbase_csf_event.o

 bifrost_kbase-$(CONFIG_MALI_REAL_HW) += csf/mali_kbase_csf_firmware.o

+bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += csf/mali_kbase_csf_firmware_no_mali.o

 ifeq ($(KBUILD_EXTMOD),)
 # in-tree
--- a/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.c
+++ b/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.c
@@ -43,7 +43,7 @@
 #define COMMAND_PROTECTED_ACK ((u32)4)
 #define COMMAND_RESET_ACK ((u32)5)

-/**
+/*
 * Default value for the TIMER register of the IPA Control interface,
 * expressed in milliseconds.
 *
@@ -53,22 +53,22 @@
 */
 #define TIMER_DEFAULT_VALUE_MS ((u32)10) /* 10 milliseconds */

-/**
+/*
 * Number of timer events per second.
 */
 #define TIMER_EVENTS_PER_SECOND ((u32)1000 / TIMER_DEFAULT_VALUE_MS)

-/**
+/*
 * Maximum number of loops polling the GPU before we assume the GPU has hung.
 */
 #define IPA_INACTIVE_MAX_LOOPS ((unsigned int)8000000)

-/**
+/*
 * Number of bits used to configure a performance counter in SELECT registers.
 */
 #define IPA_CONTROL_SELECT_BITS_PER_CNT ((u64)8)

-/**
+/*
 * Maximum value of a performance counter.
 */
 #define MAX_PRFCNT_VALUE (((u64)1 << 48) - 1)
@@ -251,9 +251,15 @@ static inline void calc_prfcnt_delta(struct kbase_device *kbdev,

 	delta_value *= prfcnt->scaling_factor;

-	if (!WARN_ON_ONCE(kbdev->csf.ipa_control.cur_gpu_rate == 0))
-		if (prfcnt->gpu_norm)
-			delta_value = div_u64(delta_value, kbdev->csf.ipa_control.cur_gpu_rate);
+	if (kbdev->csf.ipa_control.cur_gpu_rate == 0) {
+		static bool warned;
+
+		if (!warned) {
+			dev_warn(kbdev->dev, "%s: GPU freq is unexpectedly 0", __func__);
+			warned = true;
+		}
+	} else if (prfcnt->gpu_norm)
+		delta_value = div_u64(delta_value, kbdev->csf.ipa_control.cur_gpu_rate);

 	prfcnt->latest_raw_value = raw_value;

@@ -791,7 +797,7 @@ int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client,
 	ipa_ctrl = &kbdev->csf.ipa_control;
 	session = (struct kbase_ipa_control_session *)client;

-	if (WARN_ON(!session->active)) {
+	if (!session->active) {
 		dev_err(kbdev->dev,
 			"%s: attempt to query inactive session", __func__);
 		return -EINVAL;
--- a/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.h
+++ b/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.h
@@ -24,7 +24,7 @@

 #include <mali_kbase.h>

-/**
+/*
 * Maximum index accepted to configure an IPA Control performance counter.
 */
 #define KBASE_IPA_CONTROL_CNT_MAX_IDX ((u8)64 * 3)
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c
@@ -33,30 +33,12 @@
 #include "mali_kbase_csf_timeout.h"
 #include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
 #include <mali_kbase_hwaccess_time.h>
+#include "mali_kbase_csf_event.h"

 #define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK)
 #define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK)
 #define POWER_DOWN_LATEST_FLUSH_VALUE ((u32)1)

-/**
- * struct kbase_csf_event - CSF event callback.
- *
- * This structure belongs to the list of events which is part of a Kbase
- * context, and describes a callback function with a custom parameter to pass
- * to it when a CSF event is signalled.
- *
- * @link:      Link to the rest of the list.
- * @kctx:      Pointer to the Kbase context this event belongs to.
- * @callback:  Callback function to call when a CSF event is signalled.
- * @param:     Parameter to pass to the callback function.
- */
-struct kbase_csf_event {
-	struct list_head link;
-	struct kbase_context *kctx;
-	kbase_csf_event_callback *callback;
-	void *param;
-};
-
 const u8 kbasep_csf_queue_group_priority_to_relative[BASE_QUEUE_GROUP_PRIORITY_COUNT] = {
 	KBASE_QUEUE_GROUP_PRIORITY_HIGH,
 	KBASE_QUEUE_GROUP_PRIORITY_MEDIUM,
@@ -530,24 +512,24 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
 	if (reg_ex && reg_ex->ex_buffer_size) {
 		int buf_pages = (reg_ex->ex_buffer_size +
 				 (1 << PAGE_SHIFT) - 1) >> PAGE_SHIFT;
+		struct kbase_va_region *region_ex =
+			kbase_region_tracker_find_region_enclosing_address(kctx,
+									   reg_ex->ex_buffer_base);

-		region = kbase_region_tracker_find_region_enclosing_address(
-				kctx, reg_ex->ex_buffer_base);
-		if (kbase_is_region_invalid_or_free(region)) {
+		if (kbase_is_region_invalid_or_free(region_ex)) {
 			ret = -ENOENT;
 			goto out_unlock_vm;
 		}

-		if (buf_pages > (region->nr_pages -
-				 ((reg_ex->ex_buffer_base >> PAGE_SHIFT) -
-				 region->start_pfn))) {
+		if (buf_pages > (region_ex->nr_pages -
+				 ((reg_ex->ex_buffer_base >> PAGE_SHIFT) - region_ex->start_pfn))) {
 			ret = -EINVAL;
 			goto out_unlock_vm;
 		}

-		region = kbase_region_tracker_find_region_enclosing_address(
-				kctx, reg_ex->ex_offset_var_addr);
-		if (kbase_is_region_invalid_or_free(region)) {
+		region_ex = kbase_region_tracker_find_region_enclosing_address(
+			kctx, reg_ex->ex_offset_var_addr);
+		if (kbase_is_region_invalid_or_free(region_ex)) {
 			ret = -ENOENT;
 			goto out_unlock_vm;
 		}
@@ -582,6 +564,8 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
 	queue->sb_status = 0;
 	queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED;

+	atomic_set(&queue->pending, 0);
+
 	INIT_LIST_HEAD(&queue->link);
 	INIT_LIST_HEAD(&queue->error.link);
 	INIT_WORK(&queue->oom_event_work, oom_event_worker);
@@ -589,6 +573,7 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
 	list_add(&queue->link, &kctx->csf.queue_list);

 	region->flags |= KBASE_REG_NO_USER_FREE;
+	region->user_data = queue;

 	/* Initialize the cs_trace configuration parameters, When buffer_size
 	 * is 0, trace is disabled. Here we only update the fields when
@@ -669,8 +654,6 @@ void kbase_csf_queue_terminate(struct kbase_context *kctx,
 	queue = find_queue(kctx, term->buffer_gpu_addr);

 	if (queue) {
-		unsigned long flags;
-
 		/* As the GPU queue has been terminated by the
 		 * user space, undo the actions that were performed when the
 		 * queue was registered i.e. remove the queue from the per
@@ -687,19 +670,18 @@ void kbase_csf_queue_terminate(struct kbase_context *kctx,
 			/* After this the Userspace would be able to free the
 			 * memory for GPU queue. In case the Userspace missed
 			 * terminating the queue, the cleanup will happen on
-			 * context termination where teardown of region tracker
+			 * context termination where tear down of region tracker
 			 * would free up the GPU queue memory.
 			 */
 			queue->queue_reg->flags &= ~KBASE_REG_NO_USER_FREE;
+			queue->queue_reg->user_data = NULL;
 		}
 		kbase_gpu_vm_unlock(kctx);

-		spin_lock_irqsave(&kctx->csf.event_lock, flags);
 		dev_dbg(kctx->kbdev->dev,
 			"Remove any pending command queue fatal from context %pK\n",
 			(void *)kctx);
-		list_del_init(&queue->error.link);
-		spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
+		kbase_csf_event_remove_error(kctx, &queue->error);

 		release_queue(queue);
 	}
@@ -781,6 +763,48 @@ static struct kbase_queue_group *get_bound_queue_group(
 	return group;
 }

+/**
+ * pending_submission_worker() - Work item to process pending kicked GPU command queues.
+ *
+ * @work: Pointer to pending_submission_work.
+ *
+ * This function starts all pending queues, for which the work
+ * was previously submitted via ioctl call from application thread.
+ * If the queue is already scheduled and resident, it will be started
+ * right away, otherwise once the group is made resident.
+ */
+static void pending_submission_worker(struct work_struct *work)
+{
+	struct kbase_context *kctx =
+		container_of(work, struct kbase_context, csf.pending_submission_work);
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbase_queue *queue;
+	int err = kbase_reset_gpu_prevent_and_wait(kbdev);
+
+	if (err) {
+		dev_err(kbdev->dev, "Unsuccessful GPU reset detected when kicking queue ");
+		return;
+	}
+
+	mutex_lock(&kctx->csf.lock);
+
+	/* Iterate through the queue list and schedule the pending ones for submission. */
+	list_for_each_entry(queue, &kctx->csf.queue_list, link) {
+		if (atomic_cmpxchg(&queue->pending, 1, 0) == 1) {
+			struct kbase_queue_group *group = get_bound_queue_group(queue);
+
+			if (!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND)
+				dev_dbg(kbdev->dev, "queue is not bound to a group");
+			else
+				WARN_ON(kbase_csf_scheduler_queue_start(queue));
+		}
+	}
+
+	mutex_unlock(&kctx->csf.lock);
+
+	kbase_reset_gpu_allow(kbdev);
+}
+
 void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot)
 {
 	if (WARN_ON(slot < 0))
@@ -846,40 +870,44 @@ void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev,
 		kbase_csf_ring_csg_doorbell(kbdev, csg_nr);
 }

+static void enqueue_gpu_submission_work(struct kbase_context *const kctx)
+{
+	queue_work(system_highpri_wq, &kctx->csf.pending_submission_work);
+}
+
 int kbase_csf_queue_kick(struct kbase_context *kctx,
 			 struct kbase_ioctl_cs_queue_kick *kick)
 {
 	struct kbase_device *kbdev = kctx->kbdev;
-	struct kbase_queue_group *group;
-	struct kbase_queue *queue;
+	bool trigger_submission = false;
+	struct kbase_va_region *region;
 	int err = 0;

-	err = kbase_reset_gpu_prevent_and_wait(kbdev);
-	if (err) {
-		dev_warn(
-			kbdev->dev,
-			"Unsuccessful GPU reset detected when kicking queue (buffer_addr=0x%.16llx)",
-			kick->buffer_gpu_addr);
-		return err;
-	}
+	/* GPU work submission happening asynchronously to prevent the contention with
+	 * scheduler lock and as the result blocking application thread. For this reason,
+	 * the vm_lock is used here to get the reference to the queue based on its buffer_gpu_addr
+	 * from the context list of active va_regions.
+	 * Once the target queue is found the pending flag is set to one atomically avoiding
+	 * a race between submission ioctl thread and the work item.
+	 */
+	kbase_gpu_vm_lock(kctx);
+	region = kbase_region_tracker_find_region_enclosing_address(kctx, kick->buffer_gpu_addr);
+	if (!kbase_is_region_invalid_or_free(region)) {
+		struct kbase_queue *queue = region->user_data;

-	mutex_lock(&kctx->csf.lock);
-	queue = find_queue(kctx, kick->buffer_gpu_addr);
-	if (!queue)
-		err = -EINVAL;
-
-	if (!err) {
-		group = get_bound_queue_group(queue);
-		if (!group) {
-			dev_err(kctx->kbdev->dev, "queue not bound\n");
-			err = -EINVAL;
+		if (queue) {
+			atomic_cmpxchg(&queue->pending, 0, 1);
+			trigger_submission = true;
 		}
+	} else {
+		dev_dbg(kbdev->dev,
+			"Attempt to kick GPU queue without a valid command buffer region");
+		err = -EFAULT;
 	}
+	kbase_gpu_vm_unlock(kctx);

-	if (!err)
-		err = kbase_csf_scheduler_queue_start(queue);
-	mutex_unlock(&kctx->csf.lock);
-	kbase_reset_gpu_allow(kbdev);
+	if (likely(trigger_submission))
+		enqueue_gpu_submission_work(kctx);

 	return err;
 }
@@ -1310,6 +1338,7 @@ static int create_queue_group(struct kbase_context *const kctx,
 			group->doorbell_nr = KBASEP_USER_DB_NR_INVALID;
 			group->faulted = false;

+
 			group->group_uid = generate_group_uid();
 			create->out.group_uid = group->group_uid;

@@ -1343,6 +1372,7 @@ static int create_queue_group(struct kbase_context *const kctx,
 	return group_handle;
 }

+
 int kbase_csf_queue_group_create(struct kbase_context *const kctx,
 			union kbase_ioctl_cs_queue_group_create *const create)
 {
@@ -1368,6 +1398,9 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx,
 			"No CSG has at least %d CSs",
 			create->in.cs_min);
 		err = -EINVAL;
+	} else if (create->in.reserved) {
+		dev_warn(kctx->kbdev->dev, "Reserved field was set to non-0");
+		err = -EINVAL;
 	} else {
 		/* For the CSG which satisfies the condition for having
 		 * the needed number of CSs, check whether it also conforms
@@ -1517,6 +1550,19 @@ static void cancel_queue_group_events(struct kbase_queue_group *group)
 	cancel_work_sync(&group->protm_event_work);
 }

+static void remove_pending_group_fatal_error(struct kbase_queue_group *group)
+{
+	struct kbase_context *kctx = group->kctx;
+
+	dev_dbg(kctx->kbdev->dev,
+		"Remove any pending group fatal error from context %pK\n",
+		(void *)group->kctx);
+
+	kbase_csf_event_remove_error(kctx, &group->error_tiler_oom);
+	kbase_csf_event_remove_error(kctx, &group->error_timeout);
+	kbase_csf_event_remove_error(kctx, &group->error_fatal);
+}
+
 void kbase_csf_queue_group_terminate(struct kbase_context *kctx,
 				     u8 group_handle)
 {
@@ -1539,19 +1585,7 @@ void kbase_csf_queue_group_terminate(struct kbase_context *kctx,
 	group = find_queue_group(kctx, group_handle);

 	if (group) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&kctx->csf.event_lock, flags);
-
-		dev_dbg(kbdev->dev,
-			"Remove any pending group fatal error from context %pK\n",
-			(void *)group->kctx);
-
-		list_del_init(&group->error_tiler_oom.link);
-		list_del_init(&group->error_timeout.link);
-		list_del_init(&group->error_fatal.link);
-		spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
-
+		remove_pending_group_fatal_error(group);
 		term_queue_group(group);
 		kctx->csf.queue_groups[group_handle] = NULL;
 	}
@@ -1603,48 +1637,6 @@ int kbase_csf_queue_group_suspend(struct kbase_context *kctx,
 	return err;
 }

-/**
- * add_error() - Add an error to the list of errors to report to user space
- *
- * @kctx:  Address of a base context associated with a GPU address space.
- * @error: Address of the item to be added to the context's pending error list.
- * @data:  Error data to be returned to userspace.
- *
- * Does not wake up the event queue blocking a user thread in kbase_poll. This
- * is to make it more efficient to add multiple errors.
- *
- * The added error must not already be on the context's list of errors waiting
- * to be reported (e.g. because a previous error concerning the same object has
- * not yet been reported).
- */
-static void add_error(struct kbase_context *const kctx,
-		      struct kbase_csf_notification *const error,
-		      struct base_csf_notification const *const data)
-{
-	unsigned long flags;
-
-	if (WARN_ON(!kctx))
-		return;
-
-	if (WARN_ON(!error))
-		return;
-
-	if (WARN_ON(!data))
-		return;
-
-	spin_lock_irqsave(&kctx->csf.event_lock, flags);
-
-	if (!WARN_ON(!list_empty(&error->link))) {
-		error->data = *data;
-		list_add_tail(&error->link, &kctx->csf.error_list);
-		dev_dbg(kctx->kbdev->dev,
-			"Added error %pK of type %d in context %pK\n",
-			(void *)error, data->type, (void *)kctx);
-	}
-
-	spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
-}
-
 void kbase_csf_add_group_fatal_error(
 	struct kbase_queue_group *const group,
 	struct base_gpu_queue_group_error const *const err_payload)
@@ -1667,7 +1659,7 @@ void kbase_csf_add_group_fatal_error(
 		}
 	};

-	add_error(group->kctx, &group->error_fatal, &error);
+	kbase_csf_event_add_error(group->kctx, &group->error_fatal, &error);
 }

 void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev,
@@ -1708,12 +1700,11 @@ int kbase_csf_ctx_init(struct kbase_context *kctx)
 	struct kbase_device *kbdev = kctx->kbdev;
 	int err = -ENOMEM;

-	INIT_LIST_HEAD(&kctx->csf.event_callback_list);
 	INIT_LIST_HEAD(&kctx->csf.queue_list);
 	INIT_LIST_HEAD(&kctx->csf.link);
-	INIT_LIST_HEAD(&kctx->csf.error_list);

-	spin_lock_init(&kctx->csf.event_lock);
+	kbase_csf_event_init(kctx);
+
 	kctx->csf.user_reg_vma = NULL;
 	mutex_lock(&kbdev->pm.lock);
 	/* The inode information for /dev/malixx file is not available at the
@@ -1744,9 +1735,11 @@ int kbase_csf_ctx_init(struct kbase_context *kctx)
 			if (likely(!err)) {
 				err = kbase_csf_tiler_heap_context_init(kctx);

-				if (likely(!err))
+				if (likely(!err)) {
 					mutex_init(&kctx->csf.lock);
-				else
+					INIT_WORK(&kctx->csf.pending_submission_work,
+						  pending_submission_worker);
+				} else
 					kbase_csf_kcpu_queue_context_term(kctx);
 			}

@@ -1829,7 +1822,6 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
 	 * for queue groups & kcpu queues, hence no need to explicitly remove
 	 * those debugfs files.
 	 */
-	kbase_csf_event_wait_remove_all(kctx);

 	/* Wait for a GPU reset if it is happening, prevent it if not happening */
 	err = kbase_reset_gpu_prevent_and_wait(kbdev);
@@ -1841,13 +1833,20 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
 	else
 		reset_prevented = true;

+	cancel_work_sync(&kctx->csf.pending_submission_work);
+
 	mutex_lock(&kctx->csf.lock);
+
 	/* Iterate through the queue groups that were not terminated by
 	 * userspace and issue the term request to firmware for them.
 	 */
 	for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++) {
-		if (kctx->csf.queue_groups[i])
-			term_queue_group(kctx->csf.queue_groups[i]);
+		struct kbase_queue_group *group = kctx->csf.queue_groups[i];
+
+		if (group) {
+			remove_pending_group_fatal_error(group);
+			term_queue_group(group);
+		}
 	}
 	mutex_unlock(&kctx->csf.lock);

@@ -1910,185 +1909,19 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
 	kbase_csf_tiler_heap_context_term(kctx);
 	kbase_csf_kcpu_queue_context_term(kctx);
 	kbase_csf_scheduler_context_term(kctx);
+	kbase_csf_event_term(kctx);

 	mutex_destroy(&kctx->csf.lock);
 }

-int kbase_csf_event_wait_add(struct kbase_context *kctx,
-			     kbase_csf_event_callback *callback, void *param)
-{
-	int err = -ENOMEM;
-	struct kbase_csf_event *event =
-		kzalloc(sizeof(struct kbase_csf_event), GFP_KERNEL);
-
-	if (event) {
-		unsigned long flags;
-
-		event->kctx = kctx;
-		event->callback = callback;
-		event->param = param;
-
-		spin_lock_irqsave(&kctx->csf.event_lock, flags);
-		list_add_tail(&event->link, &kctx->csf.event_callback_list);
-		dev_dbg(kctx->kbdev->dev,
-			"Added event handler %pK with param %pK\n", event,
-			event->param);
-		spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
-
-		err = 0;
-	}
-
-	return err;
-}
-
-void kbase_csf_event_wait_remove(struct kbase_context *kctx,
-		kbase_csf_event_callback *callback, void *param)
-{
-	struct kbase_csf_event *event;
-	unsigned long flags;
-
-	spin_lock_irqsave(&kctx->csf.event_lock, flags);
-
-	list_for_each_entry(event, &kctx->csf.event_callback_list, link) {
-		if ((event->callback == callback) && (event->param == param)) {
-			list_del(&event->link);
-			dev_dbg(kctx->kbdev->dev,
-				"Removed event handler %pK with param %pK\n",
-				event, event->param);
-			kfree(event);
-			break;
-		}
-	}
-	spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
-}
-
-bool kbase_csf_read_error(struct kbase_context *kctx,
-		struct base_csf_notification *event_data)
-{
-	bool got_event = true;
-	struct kbase_csf_notification *error_data = NULL;
-	unsigned long flags;
-
-	spin_lock_irqsave(&kctx->csf.event_lock, flags);
-
-	if (likely(!list_empty(&kctx->csf.error_list))) {
-		error_data = list_first_entry(&kctx->csf.error_list,
-			struct kbase_csf_notification, link);
-		list_del_init(&error_data->link);
-		*event_data = error_data->data;
-		dev_dbg(kctx->kbdev->dev, "Dequeued error %pK in context %pK\n",
-			(void *)error_data, (void *)kctx);
-	} else {
-		got_event = false;
-	}
-
-	spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
-
-	return got_event;
-}
-
-bool kbase_csf_error_pending(struct kbase_context *kctx)
-{
-	bool event_pended = false;
-	unsigned long flags;
-
-	spin_lock_irqsave(&kctx->csf.event_lock, flags);
-	event_pended = !list_empty(&kctx->csf.error_list);
-	dev_dbg(kctx->kbdev->dev, "%s error is pending in context %pK\n",
-		event_pended ? "An" : "No", (void *)kctx);
-	spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
-
-	return event_pended;
-}
-
-static void sync_update_notify_gpu(struct kbase_context *kctx)
-{
-	bool can_notify_gpu;
-	unsigned long flags;
-
-	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags);
-	can_notify_gpu = kctx->kbdev->pm.backend.gpu_powered;
-#ifdef KBASE_PM_RUNTIME
-	if (kctx->kbdev->pm.backend.gpu_sleep_mode_active)
-		can_notify_gpu = false;
-#endif
-
-	if (can_notify_gpu) {
-		kbase_csf_ring_doorbell(kctx->kbdev, CSF_KERNEL_DOORBELL_NR);
-		KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT_NOTIFY_GPU, kctx, 0u);
-	}
-
-	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
-}
-
-void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu)
-{
-	struct kbase_csf_event *event, *next_event;
-	unsigned long flags;
-
-	dev_dbg(kctx->kbdev->dev,
-		"Signal event (%s GPU notify) for context %pK\n",
-		notify_gpu ? "with" : "without", (void *)kctx);
-
-	/* First increment the signal count and wake up event thread.
-	 */
-	atomic_set(&kctx->event_count, 1);
-	kbase_event_wakeup(kctx);
-
-	/* Signal the CSF firmware. This is to ensure that pending command
-	 * stream synch object wait operations are re-evaluated.
-	 * Write to GLB_DOORBELL would suffice as spec says that all pending
-	 * synch object wait operations are re-evaluated on a write to any
-	 * CS_DOORBELL/GLB_DOORBELL register.
-	 */
-	if (notify_gpu)
-		sync_update_notify_gpu(kctx);
-
-	/* Now invoke the callbacks registered on backend side.
-	 * Allow item removal inside the loop, if requested by the callback.
-	 */
-	spin_lock_irqsave(&kctx->csf.event_lock, flags);
-
-	list_for_each_entry_safe(
-		event, next_event, &kctx->csf.event_callback_list, link) {
-		enum kbase_csf_event_callback_action action;
-
-		dev_dbg(kctx->kbdev->dev,
-			"Calling event handler %pK with param %pK\n",
-			(void *)event, event->param);
-		action = event->callback(event->param);
-		if (action == KBASE_CSF_EVENT_CALLBACK_REMOVE) {
-			list_del(&event->link);
-			kfree(event);
-		}
-	}
-
-	spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
-}
-
-void kbase_csf_event_wait_remove_all(struct kbase_context *kctx)
-{
-	struct kbase_csf_event *event, *next_event;
-	unsigned long flags;
-
-	spin_lock_irqsave(&kctx->csf.event_lock, flags);
-
-	list_for_each_entry_safe(
-		event, next_event, &kctx->csf.event_callback_list, link) {
-		list_del(&event->link);
-		dev_dbg(kctx->kbdev->dev,
-			"Removed event handler %pK with param %pK\n",
-			(void *)event, event->param);
-		kfree(event);
-	}
-
-	spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
-}
-
 /**
 * handle_oom_event - Handle the OoM event generated by the firmware for the
 *                    CSI.
 *
+ * @kctx: Pointer to the kbase context in which the tiler heap was initialized.
+ * @stream: Pointer to the structure containing info provided by the firmware
+ *          about the CSI.
+ *
 * This function will handle the OoM event request from the firmware for the
 * CS. It will retrieve the address of heap context and heap's
 * statistics (like number of render passes in-flight) from the CS's kernel
@@ -2097,10 +1930,6 @@ void kbase_csf_event_wait_remove_all(struct kbase_context *kctx)
 * It will also update the CS's kernel input page with the address
 * of a new chunk that was allocated.
 *
- * @kctx: Pointer to the kbase context in which the tiler heap was initialized.
- * @stream: Pointer to the structure containing info provided by the firmware
- *          about the CSI.
- *
 * Return: 0 if successfully handled the request, otherwise a negative error
 *         code on failure.
 */
@@ -2171,7 +2000,9 @@ static void report_tiler_oom_error(struct kbase_queue_group *group)
 							  BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM,
 					  } } } };

-	add_error(group->kctx, &group->error_tiler_oom, &error);
+	kbase_csf_event_add_error(group->kctx,
+				  &group->error_tiler_oom,
+				  &error);
 	kbase_event_wakeup(group->kctx);
 }

@@ -2316,7 +2147,7 @@ static void report_group_timeout_error(struct kbase_queue_group *const group)
 		 "Notify the event notification thread, forward progress timeout (%llu cycles)\n",
 		 kbase_csf_timeout_get(group->kctx->kbdev));

-	add_error(group->kctx, &group->error_timeout, &error);
+	kbase_csf_event_add_error(group->kctx, &group->error_timeout, &error);
 	kbase_event_wakeup(group->kctx);
 }

@@ -2452,7 +2283,7 @@ static void report_queue_fatal_error(struct kbase_queue *const queue,
 		}
 	};

-	add_error(queue->kctx, &queue->error, &error);
+	kbase_csf_event_add_error(queue->kctx, &queue->error, &error);
 	kbase_event_wakeup(queue->kctx);
 }

@@ -3008,6 +2839,7 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
 			if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) {
 				int non_idle_offslot_grps;
 				bool can_suspend_on_idle;
+
 				dev_dbg(kbdev->dev, "Idle-hysteresis event flagged");
 				kbase_csf_firmware_global_input_mask(
 						global_iface, GLB_REQ, glb_ack,
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h
@@ -26,6 +26,7 @@
 #include "mali_kbase_csf_scheduler.h"
 #include "mali_kbase_csf_firmware.h"
 #include "mali_kbase_csf_protected_memory.h"
+#include "mali_kbase_hwaccess_time.h"

 /* Indicate invalid CS h/w interface
 */
@@ -46,129 +47,6 @@
 /* Idle hysteresis time can be scaled down when GPU sleep feature is used */
 #define FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER (5)

-/**
- * enum kbase_csf_event_callback_action - return type for CSF event callbacks.
- *
- * @KBASE_CSF_EVENT_CALLBACK_FIRST: Never set explicitly.
- * It doesn't correspond to any action or type of event callback.
- *
- * @KBASE_CSF_EVENT_CALLBACK_KEEP: The callback will remain registered.
- *
- * @KBASE_CSF_EVENT_CALLBACK_REMOVE: The callback will be removed
- * immediately upon return.
- *
- * @KBASE_CSF_EVENT_CALLBACK_LAST: Never set explicitly.
- * It doesn't correspond to any action or type of event callback.
- */
-enum kbase_csf_event_callback_action {
-	KBASE_CSF_EVENT_CALLBACK_FIRST = 0,
-	KBASE_CSF_EVENT_CALLBACK_KEEP,
-	KBASE_CSF_EVENT_CALLBACK_REMOVE,
-	KBASE_CSF_EVENT_CALLBACK_LAST,
-};
-
-/**
- * kbase_csf_event_callback_action - type for callback functions to be
- *                                   called upon CSF events.
- *
- * This is the type of callback functions that can be registered
- * for CSF events. These function calls shall be triggered by any call
- * to kbase_csf_event_signal.
- *
- * @param:   Generic parameter to pass to the callback function.
- *
- * Return: KBASE_CSF_EVENT_CALLBACK_KEEP if the callback should remain
- * registered, or KBASE_CSF_EVENT_CALLBACK_REMOVE if it should be removed.
- */
-typedef enum kbase_csf_event_callback_action kbase_csf_event_callback(void *param);
-
-/**
- * kbase_csf_event_wait_add - Add a CSF event callback
- *
- * This function adds an event callback to the list of CSF event callbacks
- * belonging to a given Kbase context, to be triggered when a CSF event is
- * signalled by kbase_csf_event_signal.
- *
- * @kctx:      The Kbase context the @callback should be registered to.
- * @callback:  The callback function to register.
- * @param:     Custom parameter to be passed to the @callback function.
- *
- * Return: 0 on success, or negative on failure.
- */
-int kbase_csf_event_wait_add(struct kbase_context *kctx,
-		kbase_csf_event_callback *callback, void *param);
-
-/**
- * kbase_csf_event_wait_remove - Remove a CSF event callback
- *
- * This function removes an event callback from the list of CSF event callbacks
- * belonging to a given Kbase context.
- *
- * @kctx:      The kbase context the @callback should be removed from.
- * @callback:  The callback function to remove.
- * @param:     Custom parameter that would have been passed to the @p callback
- *             function.
- */
-void kbase_csf_event_wait_remove(struct kbase_context *kctx,
-		kbase_csf_event_callback *callback, void *param);
-
-/**
- * kbase_csf_event_wait_remove_all - Removes all CSF event callbacks
- *
- * This function empties the list of CSF event callbacks belonging to a given
- * Kbase context.
- *
- * @kctx:  The kbase context for which CSF event callbacks have to be removed.
- */
-void kbase_csf_event_wait_remove_all(struct kbase_context *kctx);
-
-/**
- * kbase_csf_read_error - Read CS fatal error
- *
- * This function takes the CS fatal error from context's ordered
- * error_list, copies its contents to @event_data.
- *
- * @kctx:       The kbase context to read fatal error from
- * @event_data: Caller-provided buffer to copy the fatal error to
- *
- * Return: true if fatal error is read successfully.
- */
-bool kbase_csf_read_error(struct kbase_context *kctx,
-		struct base_csf_notification *event_data);
-
-/**
- * kbase_csf_error_pending - Check whether fatal error is pending
- *
- * @kctx:  The kbase context to check fatal error upon.
- *
- * Return: true if fatal error is pending.
- */
-bool kbase_csf_error_pending(struct kbase_context *kctx);
-
-/**
- * kbase_csf_event_signal - Signal a CSF event
- *
- * This function triggers all the CSF event callbacks that are registered to
- * a given Kbase context, and also signals the event handling thread of
- * userspace driver waiting for the CSF event.
- *
- * @kctx:  The kbase context whose CSF event callbacks shall be triggered.
- * @notify_gpu: Flag to indicate if CSF firmware should be notified of the
- *              signaling of event that happened on the Driver side, either
- *              the signal came from userspace or from kcpu queues.
- */
-void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu);
-
-static inline void kbase_csf_event_signal_notify_gpu(struct kbase_context *kctx)
-{
-	kbase_csf_event_signal(kctx, true);
-}
-
-static inline void kbase_csf_event_signal_cpu_only(struct kbase_context *kctx)
-{
-	kbase_csf_event_signal(kctx, false);
-}
-
 /**
 * kbase_csf_ctx_init - Initialize the CSF interface for a GPU address space.
 *
@@ -182,11 +60,11 @@ int kbase_csf_ctx_init(struct kbase_context *kctx);
 * kbase_csf_ctx_handle_fault - Terminate queue groups & notify fault upon
 *                              GPU bus fault, MMU page fault or similar.
 *
- * This function terminates all GPU command queue groups in the context and
- * notifies the event notification thread of the fault.
- *
 * @kctx:       Pointer to faulty kbase context.
 * @fault:      Pointer to the fault.
+ *
+ * This function terminates all GPU command queue groups in the context and
+ * notifies the event notification thread of the fault.
 */
 void kbase_csf_ctx_handle_fault(struct kbase_context *kctx,
 		struct kbase_fault *fault);
@@ -194,10 +72,10 @@ void kbase_csf_ctx_handle_fault(struct kbase_context *kctx,
 /**
 * kbase_csf_ctx_term - Terminate the CSF interface for a GPU address space.
 *
+ * @kctx:	Pointer to the kbase context which is being terminated.
+ *
 * This function terminates any remaining CSGs and CSs which weren't destroyed
 * before context termination.
- *
- * @kctx:	Pointer to the kbase context which is being terminated.
 */
 void kbase_csf_ctx_term(struct kbase_context *kctx);

@@ -246,14 +124,14 @@ void kbase_csf_queue_terminate(struct kbase_context *kctx,
 * kbase_csf_alloc_command_stream_user_pages - Allocate resources for a
 *                                             GPU command queue.
 *
- * This function allocates a pair of User mode input/output pages for a
- * GPU command queue and maps them in the shared interface segment of MCU
- * firmware address space. Also reserves a hardware doorbell page for the queue.
- *
 * @kctx:	Pointer to the kbase context within which the resources
 *		for the queue are being allocated.
 * @queue:	Pointer to the queue for which to allocate resources.
 *
+ * This function allocates a pair of User mode input/output pages for a
+ * GPU command queue and maps them in the shared interface segment of MCU
+ * firmware address space. Also reserves a hardware doorbell page for the queue.
+ *
 * Return:	0 on success, or negative on failure.
 */
 int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx,
@@ -294,9 +172,9 @@ void kbase_csf_queue_unbind_stopped(struct kbase_queue *queue);
 /**
 * kbase_csf_queue_kick - Schedule a GPU command queue on the firmware
 *
- * @kctx:	The kbase context.
- * @kick:	Pointer to the struct which specifies the queue
- *		that needs to be scheduled.
+ * @kctx:   The kbase context.
+ * @kick:   Pointer to the struct which specifies the queue
+ *          that needs to be scheduled.
 *
 * Return:	0 on success, or negative on failure.
 */
@@ -307,12 +185,12 @@ int kbase_csf_queue_kick(struct kbase_context *kctx,
 * kbase_csf_queue_group_handle_is_valid - Find if the given queue group handle
 *                                         is valid.
 *
- * This function is used to determine if the queue group handle is valid.
- *
 * @kctx:		The kbase context under which the queue group exists.
 * @group_handle:	Handle for the group which uniquely identifies it within
 *			the context with which it was created.
 *
+ * This function is used to determine if the queue group handle is valid.
+ *
 * Return:		0 on success, or negative on failure.
 */
 int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx,
@@ -359,8 +237,6 @@ void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group);
 /**
 * kbase_csf_queue_group_suspend - Suspend a GPU command queue group
 *
- * This function is used to suspend a queue group and copy the suspend buffer.
- *
 * @kctx:		The kbase context for which the queue group is to be
 *			suspended.
 * @sus_buf:		Pointer to the structure which contains details of the
@@ -368,6 +244,8 @@ void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group);
 * @group_handle:	Handle for the group which uniquely identifies it within
 *			the context within which it was created.
 *
+ * This function is used to suspend a queue group and copy the suspend buffer.
+ *
 * Return:		0 on success or negative value if failed to suspend
 *			queue group and copy suspend buffer contents.
 */
@@ -397,12 +275,12 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val);
 *                                   the update of userspace mapping of HW
 *                                   doorbell page.
 *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
 * The function creates a file and allocates a dummy page to facilitate the
 * update of userspace mapping to point to the dummy page instead of the real
 * HW doorbell page after the suspend of queue group.
 *
- * @kbdev: Instance of a GPU platform device that implements a CSF interface.
- *
 * Return: 0 on success, or negative on failure.
 */
 int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev);
@@ -420,14 +298,14 @@ void kbase_csf_doorbell_mapping_term(struct kbase_device *kbdev);
 *                                       instead of the User register page after
 *                                       the GPU power down.
 *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
 * The function allocates a dummy page which is used to replace the User
 * register page in the userspace mapping after the power down of GPU.
 * On the power up of GPU, the mapping is updated to point to the real
 * User register page. The mapping is used to allow access to LATEST_FLUSH
 * register from userspace.
 *
- * @kbdev: Instance of a GPU platform device that implements a CSF interface.
- *
 * Return: 0 on success, or negative on failure.
 */
 int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev);
@@ -443,10 +321,10 @@ void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev);
 /**
 * kbase_csf_ring_csg_doorbell - ring the doorbell for a CSG interface.
 *
- * The function kicks a notification on the CSG interface to firmware.
- *
 * @kbdev: Instance of a GPU platform device that implements a CSF interface.
 * @slot: Index of CSG interface for ringing the door-bell.
+ *
+ * The function kicks a notification on the CSG interface to firmware.
 */
 void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot);

@@ -454,10 +332,10 @@ void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot);
 * kbase_csf_ring_csg_slots_doorbell - ring the doorbell for a set of CSG
 *                                     interfaces.
 *
- * The function kicks a notification on a set of CSG interfaces to firmware.
- *
 * @kbdev: Instance of a GPU platform device that implements a CSF interface.
 * @slot_bitmap: bitmap for the given slots, slot-0 on bit-0, etc.
+ *
+ * The function kicks a notification on a set of CSG interfaces to firmware.
 */
 void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev,
 				       u32 slot_bitmap);
@@ -466,9 +344,6 @@ void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev,
 * kbase_csf_ring_cs_kernel_doorbell - ring the kernel doorbell for a CSI
 *                                     assigned to a GPU queue
 *
- * The function sends a doorbell interrupt notification to the firmware for
- * a CSI assigned to a GPU queue.
- *
 * @kbdev: Instance of a GPU platform device that implements a CSF interface.
 * @csi_index: ID of the CSI assigned to the GPU queue.
 * @csg_nr:    Index of the CSG slot assigned to the queue
@@ -479,6 +354,9 @@ void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev,
 *                     The flag is supposed be false only when the input page
 *                     for bound GPU queues is programmed at the time of
 *                     starting/resuming the group on a CSG slot.
+ *
+ * The function sends a doorbell interrupt notification to the firmware for
+ * a CSI assigned to a GPU queue.
 */
 void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev,
 				       int csi_index, int csg_nr,
@@ -488,11 +366,11 @@ void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev,
 * kbase_csf_ring_cs_user_doorbell - ring the user doorbell allocated for a
 *                                   queue.
 *
- * The function kicks a notification to the firmware on the doorbell assigned
- * to the queue.
- *
 * @kbdev: Instance of a GPU platform device that implements a CSF interface.
 * @queue: Pointer to the queue for ringing the door-bell.
+ *
+ * The function kicks a notification to the firmware on the doorbell assigned
+ * to the queue.
 */
 void kbase_csf_ring_cs_user_doorbell(struct kbase_device *kbdev,
 			struct kbase_queue *queue);
@@ -563,5 +441,23 @@ static inline u8 kbase_csf_priority_queue_group_priority_to_relative(u8 priority
 	return kbasep_csf_queue_group_priority_to_relative[priority];
 }

-
+/**
+ * kbase_csf_ktrace_gpu_cycle_cnt - Wrapper to retreive the GPU cycle counter
+ *                                  value for Ktrace purpose.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * This function is just a wrapper to retreive the GPU cycle counter value, to
+ * avoid any overhead on Release builds where Ktrace is disabled by default.
+ *
+ * Return: Snapshot of the GPU cycle count register.
+ */
+static inline u64 kbase_csf_ktrace_gpu_cycle_cnt(struct kbase_device *kbdev)
+{
+#if KBASE_KTRACE_ENABLE
+	return kbase_backend_get_cycle_cnt(kbdev);
+#else
+	return 0;
+#endif
+}
 #endif /* _KBASE_CSF_H_ */
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h
@@ -30,6 +30,7 @@
 #include <linux/wait.h>

 #include "mali_kbase_csf_firmware.h"
+#include "mali_kbase_csf_event.h"

 /* Maximum number of KCPU command queues to be created per GPU address space.
 */
@@ -331,6 +332,7 @@ struct kbase_csf_notification {
 *                    queue.
 * @cs_fatal_info:    Records additional information about the CS fatal event.
 * @cs_fatal:         Records information about the CS fatal event.
+ * @pending:          Indicating whether the queue has new submitted work.
 */
 struct kbase_queue {
 	struct kbase_context *kctx;
@@ -364,6 +366,7 @@ struct kbase_queue {
 	struct work_struct fatal_event_work;
 	u64 cs_fatal_info;
 	u32 cs_fatal;
+	atomic_t pending;
 };

 /**
@@ -487,6 +490,7 @@ struct kbase_queue_group {
 	struct kbase_csf_notification error_tiler_oom;

 	struct work_struct timer_event_work;
+
 };

 /**
@@ -538,10 +542,6 @@ struct kbase_csf_cpu_queue_context {
 /**
 * struct kbase_csf_heap_context_allocator - Allocator of heap contexts
 *
- * Heap context structures are allocated by the kernel for use by the firmware.
- * The current implementation subdivides a single GPU memory region for use as
- * a sparse array.
- *
 * @kctx:     Pointer to the kbase context with which this allocator is
 *            associated.
 * @region:   Pointer to a GPU memory region from which heap context structures
@@ -552,6 +552,10 @@ struct kbase_csf_cpu_queue_context {
 * @lock:     Lock preventing concurrent access to the @in_use bitmap.
 * @in_use:   Bitmap that indicates which heap context structures are currently
 *            allocated (in @region).
+ *
+ * Heap context structures are allocated by the kernel for use by the firmware.
+ * The current implementation subdivides a single GPU memory region for use as
+ * a sparse array.
 */
 struct kbase_csf_heap_context_allocator {
 	struct kbase_context *kctx;
@@ -565,10 +569,6 @@ struct kbase_csf_heap_context_allocator {
 * struct kbase_csf_tiler_heap_context - Object representing the tiler heaps
 *                                       context for a GPU address space.
 *
- * This contains all of the CSF state relating to chunked tiler heaps for one
- * @kbase_context. It is not the same as a heap context structure allocated by
- * the kernel for use by the firmware.
- *
 * @lock:        Lock to prevent the concurrent access to tiler heaps (after the
 *               initialization), a tiler heap can be terminated whilst an OoM
 *               event is being handled for it.
@@ -576,6 +576,10 @@ struct kbase_csf_heap_context_allocator {
 * @ctx_alloc:   Allocator for heap context structures.
 * @nr_of_heaps: Total number of tiler heaps that were added during the
 *               life time of the context.
+ *
+ * This contains all of the CSF state relating to chunked tiler heaps for one
+ * @kbase_context. It is not the same as a heap context structure allocated by
+ * the kernel for use by the firmware.
 */
 struct kbase_csf_tiler_heap_context {
 	struct mutex lock;
@@ -616,6 +620,43 @@ struct kbase_csf_scheduler_context {
 	u32 ngrp_to_schedule;
 };

+/**
+ * enum kbase_csf_event_callback_action - return type for CSF event callbacks.
+ *
+ * @KBASE_CSF_EVENT_CALLBACK_FIRST: Never set explicitly.
+ * It doesn't correspond to any action or type of event callback.
+ *
+ * @KBASE_CSF_EVENT_CALLBACK_KEEP: The callback will remain registered.
+ *
+ * @KBASE_CSF_EVENT_CALLBACK_REMOVE: The callback will be removed
+ * immediately upon return.
+ *
+ * @KBASE_CSF_EVENT_CALLBACK_LAST: Never set explicitly.
+ * It doesn't correspond to any action or type of event callback.
+ */
+enum kbase_csf_event_callback_action {
+	KBASE_CSF_EVENT_CALLBACK_FIRST = 0,
+	KBASE_CSF_EVENT_CALLBACK_KEEP,
+	KBASE_CSF_EVENT_CALLBACK_REMOVE,
+	KBASE_CSF_EVENT_CALLBACK_LAST,
+};
+
+/**
+ * struct kbase_csf_event - Object representing CSF event and error
+ *
+ * @callback_list:	List of callbacks which are registered to serve CSF
+ *			events.
+ * @error_list:		List for CS fatal errors in CSF context.
+ *			Link of fatal error is &struct_kbase_csf_notification.link.
+ * @lock:		Lock protecting access to @callback_list and
+ *			@error_list.
+ */
+struct kbase_csf_event {
+	struct list_head callback_list;
+	struct list_head error_list;
+	spinlock_t lock;
+};
+
 /**
 * struct kbase_csf_context - Object representing CSF for a GPU address space.
 *
@@ -647,10 +688,7 @@ struct kbase_csf_scheduler_context {
 *                    userspace mapping created for them on bind operation
 *                    hasn't been removed.
 * @kcpu_queues:      Kernel CPU command queues.
- * @event_lock:       Lock protecting access to @event_callback_list and
- *                    @error_list.
- * @event_callback_list: List of callbacks which are registered to serve CSF
- *                       events.
+ * @event:            CSF event object.
 * @tiler_heaps:      Chunked tiler memory heaps.
 * @wq:               Dedicated workqueue to process work items corresponding
 *                    to the OoM events raised for chunked tiler heaps being
@@ -661,10 +699,7 @@ struct kbase_csf_scheduler_context {
 *                    of the USER register page. Currently used only for sanity
 *                    checking.
 * @sched:            Object representing the scheduler's context
- * @error_list:       List for CS fatal errors in this context.
- *                    Link of fatal error is
- *                    &struct_kbase_csf_notification.link.
- *                    @event_lock needs to be held to access this list.
+ * @pending_submission_work: Work item to process pending kicked GPU command queues.
 * @cpu_queue:        CPU queue information. Only be available when DEBUG_FS
 *                    is enabled.
 */
@@ -677,14 +712,13 @@ struct kbase_csf_context {
 	struct kbase_queue_group *queue_groups[MAX_QUEUE_GROUP_NUM];
 	struct list_head queue_list;
 	struct kbase_csf_kcpu_queue_context kcpu_queues;
-	spinlock_t event_lock;
-	struct list_head event_callback_list;
+	struct kbase_csf_event event;
 	struct kbase_csf_tiler_heap_context tiler_heaps;
 	struct workqueue_struct *wq;
 	struct list_head link;
 	struct vm_area_struct *user_reg_vma;
 	struct kbase_csf_scheduler_context sched;
-	struct list_head error_list;
+	struct work_struct pending_submission_work;
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 	struct kbase_csf_cpu_queue_context cpu_queue;
 #endif
@@ -882,12 +916,12 @@ struct kbase_csf_scheduler {
 	bool tick_timer_active;
 };

-/**
+/*
 * Number of GPU cycles per unit of the global progress timeout.
 */
 #define GLB_PROGRESS_TIMER_TIMEOUT_SCALE ((u64)1024)

-/**
+/*
 * Maximum value of the global progress timeout.
 */
 #define GLB_PROGRESS_TIMER_TIMEOUT_MAX \
@@ -895,12 +929,12 @@ struct kbase_csf_scheduler {
 		GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) * \
 	GLB_PROGRESS_TIMER_TIMEOUT_SCALE)

-/**
+/*
 * Default GLB_PWROFF_TIMER_TIMEOUT value in unit of micro-seconds.
 */
 #define DEFAULT_GLB_PWROFF_TIMEOUT_US (800)

-/**
+/*
 * In typical operations, the management of the shader core power transitions
 * is delegated to the MCU/firmware. However, if the host driver is configured
 * to take direct control, one needs to disable the MCU firmware GLB_PWROFF
@@ -911,7 +945,7 @@ struct kbase_csf_scheduler {
 /* Index of the GPU_ACTIVE counter within the CSHW counter block */
 #define GPU_ACTIVE_CNT_IDX (4)

-/**
+/*
 * Maximum number of sessions that can be managed by the IPA Control component.
 */
 #if MALI_UNIT_TEST
@@ -937,13 +971,13 @@ enum kbase_ipa_core_type {
 	KBASE_IPA_CORE_TYPE_NUM
 };

-/**
+/*
 * Number of configurable counters per type of block on the IPA Control
 * interface.
 */
 #define KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS ((size_t)8)

-/**
+/*
 * Total number of configurable counters existing on the IPA Control interface.
 */
 #define KBASE_IPA_CONTROL_MAX_COUNTERS                                         \
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.c
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+#include <mali_kbase.h>
+#include "mali_kbase_csf_event.h"
+
+/**
+ * struct kbase_csf_event_cb - CSF event callback.
+ *
+ * @link:      Link to the rest of the list.
+ * @kctx:      Pointer to the Kbase context this event belongs to.
+ * @callback:  Callback function to call when a CSF event is signalled.
+ * @param:     Parameter to pass to the callback function.
+ *
+ * This structure belongs to the list of events which is part of a Kbase
+ * context, and describes a callback function with a custom parameter to pass
+ * to it when a CSF event is signalled.
+ */
+struct kbase_csf_event_cb {
+	struct list_head link;
+	struct kbase_context *kctx;
+	kbase_csf_event_callback *callback;
+	void *param;
+};
+
+int kbase_csf_event_wait_add(struct kbase_context *kctx,
+			     kbase_csf_event_callback *callback, void *param)
+{
+	int err = -ENOMEM;
+	struct kbase_csf_event_cb *event_cb =
+		kzalloc(sizeof(struct kbase_csf_event_cb), GFP_KERNEL);
+
+	if (event_cb) {
+		unsigned long flags;
+
+		event_cb->kctx = kctx;
+		event_cb->callback = callback;
+		event_cb->param = param;
+
+		spin_lock_irqsave(&kctx->csf.event.lock, flags);
+		list_add_tail(&event_cb->link, &kctx->csf.event.callback_list);
+		dev_dbg(kctx->kbdev->dev,
+			"Added event handler %pK with param %pK\n", event_cb,
+			event_cb->param);
+		spin_unlock_irqrestore(&kctx->csf.event.lock, flags);
+
+		err = 0;
+	}
+
+	return err;
+}
+
+void kbase_csf_event_wait_remove(struct kbase_context *kctx,
+		kbase_csf_event_callback *callback, void *param)
+{
+	struct kbase_csf_event_cb *event_cb;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kctx->csf.event.lock, flags);
+
+	list_for_each_entry(event_cb, &kctx->csf.event.callback_list, link) {
+		if ((event_cb->callback == callback) && (event_cb->param == param)) {
+			list_del(&event_cb->link);
+			dev_dbg(kctx->kbdev->dev,
+				"Removed event handler %pK with param %pK\n",
+				event_cb, event_cb->param);
+			kfree(event_cb);
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&kctx->csf.event.lock, flags);
+}
+
+static void sync_update_notify_gpu(struct kbase_context *kctx)
+{
+	bool can_notify_gpu;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags);
+	can_notify_gpu = kctx->kbdev->pm.backend.gpu_powered;
+#ifdef KBASE_PM_RUNTIME
+	if (kctx->kbdev->pm.backend.gpu_sleep_mode_active)
+		can_notify_gpu = false;
+#endif
+
+	if (can_notify_gpu) {
+		kbase_csf_ring_doorbell(kctx->kbdev, CSF_KERNEL_DOORBELL_NR);
+		KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT_NOTIFY_GPU, kctx, 0u);
+	}
+
+	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
+}
+
+void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu)
+{
+	struct kbase_csf_event_cb *event_cb, *next_event_cb;
+	unsigned long flags;
+
+	dev_dbg(kctx->kbdev->dev,
+		"Signal event (%s GPU notify) for context %pK\n",
+		notify_gpu ? "with" : "without", (void *)kctx);
+
+	/* First increment the signal count and wake up event thread.
+	 */
+	atomic_set(&kctx->event_count, 1);
+	kbase_event_wakeup(kctx);
+
+	/* Signal the CSF firmware. This is to ensure that pending command
+	 * stream synch object wait operations are re-evaluated.
+	 * Write to GLB_DOORBELL would suffice as spec says that all pending
+	 * synch object wait operations are re-evaluated on a write to any
+	 * CS_DOORBELL/GLB_DOORBELL register.
+	 */
+	if (notify_gpu)
+		sync_update_notify_gpu(kctx);
+
+	/* Now invoke the callbacks registered on backend side.
+	 * Allow item removal inside the loop, if requested by the callback.
+	 */
+	spin_lock_irqsave(&kctx->csf.event.lock, flags);
+
+	list_for_each_entry_safe(
+		event_cb, next_event_cb, &kctx->csf.event.callback_list, link) {
+		enum kbase_csf_event_callback_action action;
+
+		dev_dbg(kctx->kbdev->dev,
+			"Calling event handler %pK with param %pK\n",
+			(void *)event_cb, event_cb->param);
+		action = event_cb->callback(event_cb->param);
+		if (action == KBASE_CSF_EVENT_CALLBACK_REMOVE) {
+			list_del(&event_cb->link);
+			kfree(event_cb);
+		}
+	}
+
+	spin_unlock_irqrestore(&kctx->csf.event.lock, flags);
+}
+
+void kbase_csf_event_term(struct kbase_context *kctx)
+{
+	struct kbase_csf_event_cb *event_cb, *next_event_cb;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kctx->csf.event.lock, flags);
+
+	list_for_each_entry_safe(
+		event_cb, next_event_cb, &kctx->csf.event.callback_list, link) {
+		list_del(&event_cb->link);
+		dev_warn(kctx->kbdev->dev,
+			"Removed event handler %pK with param %pK\n",
+			(void *)event_cb, event_cb->param);
+		kfree(event_cb);
+	}
+
+	WARN_ON(!list_empty(&kctx->csf.event.error_list));
+
+	spin_unlock_irqrestore(&kctx->csf.event.lock, flags);
+}
+
+void kbase_csf_event_init(struct kbase_context *const kctx)
+{
+	INIT_LIST_HEAD(&kctx->csf.event.callback_list);
+	INIT_LIST_HEAD(&kctx->csf.event.error_list);
+	spin_lock_init(&kctx->csf.event.lock);
+}
+
+void kbase_csf_event_remove_error(struct kbase_context *kctx,
+				  struct kbase_csf_notification *error)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kctx->csf.event.lock, flags);
+	list_del_init(&error->link);
+	spin_unlock_irqrestore(&kctx->csf.event.lock, flags);
+}
+
+bool kbase_csf_event_read_error(struct kbase_context *kctx,
+				struct base_csf_notification *event_data)
+{
+	struct kbase_csf_notification *error_data = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kctx->csf.event.lock, flags);
+	if (likely(!list_empty(&kctx->csf.event.error_list))) {
+		error_data = list_first_entry(&kctx->csf.event.error_list,
+			struct kbase_csf_notification, link);
+		list_del_init(&error_data->link);
+		*event_data = error_data->data;
+		dev_dbg(kctx->kbdev->dev, "Dequeued error %pK in context %pK\n",
+			(void *)error_data, (void *)kctx);
+	}
+	spin_unlock_irqrestore(&kctx->csf.event.lock, flags);
+	return !!error_data;
+}
+
+void kbase_csf_event_add_error(struct kbase_context *const kctx,
+			struct kbase_csf_notification *const error,
+			struct base_csf_notification const *const data)
+{
+	unsigned long flags;
+
+	if (WARN_ON(!kctx))
+		return;
+
+	if (WARN_ON(!error))
+		return;
+
+	if (WARN_ON(!data))
+		return;
+
+	spin_lock_irqsave(&kctx->csf.event.lock, flags);
+	if (!WARN_ON(!list_empty(&error->link))) {
+		error->data = *data;
+		list_add_tail(&error->link, &kctx->csf.event.error_list);
+		dev_dbg(kctx->kbdev->dev,
+			"Added error %pK of type %d in context %pK\n",
+			(void *)error, data->type, (void *)kctx);
+	}
+	spin_unlock_irqrestore(&kctx->csf.event.lock, flags);
+}
+
+bool kbase_csf_event_error_pending(struct kbase_context *kctx)
+{
+	bool error_pending = false;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kctx->csf.event.lock, flags);
+	error_pending = !list_empty(&kctx->csf.event.error_list);
+
+	dev_dbg(kctx->kbdev->dev, "%s error is pending in context %pK\n",
+		error_pending ? "An" : "No", (void *)kctx);
+
+	spin_unlock_irqrestore(&kctx->csf.event.lock, flags);
+
+	return error_pending;
+}
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.h
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_CSF_EVENT_H_
+#define _KBASE_CSF_EVENT_H_
+
+#include <linux/types.h>
+#include <linux/wait.h>
+
+struct kbase_context;
+struct kbase_csf_event;
+enum kbase_csf_event_callback_action;
+
+/**
+ * kbase_csf_event_callback_action - type for callback functions to be
+ *                                   called upon CSF events.
+ * @param:   Generic parameter to pass to the callback function.
+ *
+ * This is the type of callback functions that can be registered
+ * for CSF events. These function calls shall be triggered by any call
+ * to kbase_csf_event_signal.
+ *
+ * Return: KBASE_CSF_EVENT_CALLBACK_KEEP if the callback should remain
+ * registered, or KBASE_CSF_EVENT_CALLBACK_REMOVE if it should be removed.
+ */
+typedef enum kbase_csf_event_callback_action kbase_csf_event_callback(void *param);
+
+/**
+ * kbase_csf_event_wait_add - Add a CSF event callback
+ *
+ * @kctx:      The Kbase context the @callback should be registered to.
+ * @callback:  The callback function to register.
+ * @param:     Custom parameter to be passed to the @callback function.
+ *
+ * This function adds an event callback to the list of CSF event callbacks
+ * belonging to a given Kbase context, to be triggered when a CSF event is
+ * signalled by kbase_csf_event_signal.
+ *
+ * Return: 0 on success, or negative on failure.
+ */
+int kbase_csf_event_wait_add(struct kbase_context *kctx,
+		kbase_csf_event_callback *callback, void *param);
+
+/**
+ * kbase_csf_event_wait_remove - Remove a CSF event callback
+ *
+ * @kctx:      The kbase context the @callback should be removed from.
+ * @callback:  The callback function to remove.
+ * @param:     Custom parameter that would have been passed to the @p callback
+ *             function.
+ *
+ * This function removes an event callback from the list of CSF event callbacks
+ * belonging to a given Kbase context.
+ */
+void kbase_csf_event_wait_remove(struct kbase_context *kctx,
+		kbase_csf_event_callback *callback, void *param);
+
+/**
+ * kbase_csf_event_term - Removes all CSF event callbacks
+ *
+ * @kctx:  The kbase context for which CSF event callbacks have to be removed.
+ *
+ * This function empties the list of CSF event callbacks belonging to a given
+ * Kbase context.
+ */
+void kbase_csf_event_term(struct kbase_context *kctx);
+
+/**
+ * kbase_csf_event_signal - Signal a CSF event
+ *
+ * @kctx:  The kbase context whose CSF event callbacks shall be triggered.
+ * @notify_gpu: Flag to indicate if CSF firmware should be notified of the
+ *              signaling of event that happened on the Driver side, either
+ *              the signal came from userspace or from kcpu queues.
+ *
+ * This function triggers all the CSF event callbacks that are registered to
+ * a given Kbase context, and also signals the event handling thread of
+ * userspace driver waiting for the CSF event.
+ */
+void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu);
+
+static inline void kbase_csf_event_signal_notify_gpu(struct kbase_context *kctx)
+{
+	kbase_csf_event_signal(kctx, true);
+}
+
+static inline void kbase_csf_event_signal_cpu_only(struct kbase_context *kctx)
+{
+	kbase_csf_event_signal(kctx, false);
+}
+
+/**
+ * kbase_csf_event_init - Initialize event object
+ *
+ * This function initializes the event object.
+ *
+ * @kctx: The kbase context whose event object will be initialized.
+ */
+void kbase_csf_event_init(struct kbase_context *const kctx);
+
+struct kbase_csf_notification;
+struct base_csf_notification;
+/**
+ * kbase_csf_event_read_error - Read and remove an error from error list in event
+ *
+ * @kctx: The kbase context.
+ * @event_data: Caller-provided buffer to copy the fatal error to
+ *
+ * This function takes the CS fatal error from context's ordered
+ * error_list, copies its contents to @event_data.
+ *
+ * Return: true if error is read out or false if there is no error in error list.
+ */
+bool kbase_csf_event_read_error(struct kbase_context *kctx,
+				struct base_csf_notification *event_data);
+
+/**
+ * kbase_csf_event_add_error - Add an error into event error list
+ *
+ * @kctx:  Address of a base context associated with a GPU address space.
+ * @error: Address of the item to be added to the context's pending error list.
+ * @data:  Error data to be returned to userspace.
+ *
+ * Does not wake up the event queue blocking a user thread in kbase_poll. This
+ * is to make it more efficient to add multiple errors.
+ *
+ * The added error must not already be on the context's list of errors waiting
+ * to be reported (e.g. because a previous error concerning the same object has
+ * not yet been reported).
+ *
+ */
+void kbase_csf_event_add_error(struct kbase_context *const kctx,
+			struct kbase_csf_notification *const error,
+			struct base_csf_notification const *const data);
+
+/**
+ * kbase_csf_event_remove_error - Remove an error from event error list
+ *
+ * @kctx:  Address of a base context associated with a GPU address space.
+ * @error: Address of the item to be removed from the context's event error list.
+ */
+void kbase_csf_event_remove_error(struct kbase_context *kctx,
+				  struct kbase_csf_notification *error);
+
+/**
+ * kbase_csf_event_error_pending - Check the error pending status
+ *
+ * @kctx: The kbase context to check fatal error upon.
+ *
+ * Return: true if there is error in the list.
+ */
+bool kbase_csf_event_error_pending(struct kbase_context *kctx);
+#endif /* _KBASE_CSF_EVENT_H_ */
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c
@@ -31,6 +31,7 @@
 #include "device/mali_kbase_device.h"
 #include "backend/gpu/mali_kbase_pm_internal.h"
 #include "tl/mali_kbase_timeline_priv.h"
+#include "tl/mali_kbase_tracepoints.h"
 #include "mali_kbase_csf_tl_reader.h"
 #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
 #include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
@@ -157,8 +158,7 @@ static bool entry_optional(u32 header)
 }

 /**
- * struct firmware_timeline_metadata -
- * Timeline metadata item within the MCU firmware
+ * struct firmware_timeline_metadata - Timeline metadata item within the MCU firmware
 *
 * @node: List head linking all timeline metadata to
 *        kbase_device:csf.firmware_timeline_metadata.
@@ -217,10 +217,11 @@ static int wait_mcu_status_value(struct kbase_device *kbdev, u32 val)
 	return (max_loops == 0) ? -1 : 0;
 }

-void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev)
+void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev)
 {
-	if (wait_mcu_status_value(kbdev, MCU_CNTRL_DISABLE) < 0)
-		dev_err(kbdev->dev, "MCU failed to get disabled");
+	KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_DISABLING(kbdev, kbase_backend_get_cycle_cnt(kbdev));
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_DISABLE);
 }

 static void wait_for_firmware_stop(struct kbase_device *kbdev)
@@ -229,6 +230,13 @@ static void wait_for_firmware_stop(struct kbase_device *kbdev)
 		/* This error shall go away once MIDJM-2371 is closed */
 		dev_err(kbdev->dev, "Firmware failed to stop");
 	}
+
+	KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_OFF(kbdev, kbase_backend_get_cycle_cnt(kbdev));
+}
+
+void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev)
+{
+	wait_for_firmware_stop(kbdev);
 }

 static void stop_csf_firmware(struct kbase_device *kbdev)
@@ -463,16 +471,16 @@ out:
 /**
 * parse_memory_setup_entry() - Process an "interface memory setup" section
 *
+ * @kbdev: Kbase device structure
+ * @fw: The firmware image containing the section
+ * @entry: Pointer to the start of the section
+ * @size: Size (in bytes) of the section
+ *
 * Read an "interface memory setup" section from the firmware image and create
 * the necessary memory region including the MMU page tables. If successful
 * the interface will be added to the kbase_device:csf.firmware_interfaces list.
 *
 * Return: 0 if successful, negative error code on failure
- *
- * @kbdev: Kbase device structure
- * @fw: The firmware image containing the section
- * @entry: Pointer to the start of the section
- * @size: Size (in bytes) of the section
 */
 static int parse_memory_setup_entry(struct kbase_device *kbdev,
 		const struct firmware *fw,
@@ -724,6 +732,11 @@ static int parse_timeline_metadata_entry(struct kbase_device *kbdev,
 /**
 * load_firmware_entry() - Process an entry from a firmware image
 *
+ * @kbdev:  Kbase device
+ * @fw:     Firmware image containing the entry
+ * @offset: Byte offset within the image of the entry to load
+ * @header: Header word of the entry
+ *
 * Read an entry from a firmware image and do any necessary work (e.g. loading
 * the data into page accessible to the MCU).
 *
@@ -731,11 +744,6 @@ static int parse_timeline_metadata_entry(struct kbase_device *kbdev,
 * otherwise the function will fail with -EINVAL
 *
 * Return: 0 if successful, negative error code on failure
- *
- * @kbdev:  Kbase device
- * @fw:     Firmware image containing the entry
- * @offset: Byte offset within the image of the entry to load
- * @header: Header word of the entry
 */
 static int load_firmware_entry(struct kbase_device *kbdev,
 		const struct firmware *fw,
@@ -784,18 +792,6 @@ static int load_firmware_entry(struct kbase_device *kbdev,
 		}
 		return kbase_csf_firmware_cfg_option_entry_parse(
 			kbdev, fw, entry, size, updatable);
-	case CSF_FIRMWARE_ENTRY_TYPE_FUTF_TEST:
-#ifndef MALI_KBASE_BUILD
-		/* FW UTF option */
-		if (size < 2*sizeof(*entry)) {
-			dev_err(kbdev->dev, "FW UTF entry too short (size=%u)\n",
-					size);
-			return -EINVAL;
-		}
-		return mali_kutf_process_fw_utf_entry(kbdev, fw->data,
-						      fw->size, entry);
-#endif
-		break;
 	case CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER:
 		/* Trace buffer */
 		if (size < TRACE_BUFFER_ENTRY_NAME_OFFSET + sizeof(*entry)) {
@@ -1170,6 +1166,7 @@ u32 kbase_csf_firmware_csg_output(
 	dev_dbg(kbdev->dev, "csg output r: reg %08x val %08x\n", offset, val);
 	return val;
 }
+KBASE_EXPORT_TEST_API(kbase_csf_firmware_csg_output);

 void kbase_csf_firmware_global_input(
 	const struct kbase_csf_global_iface *const iface, const u32 offset,
@@ -1180,6 +1177,7 @@ void kbase_csf_firmware_global_input(
 	dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x\n", offset, value);
 	input_page_write(iface->input, offset, value);
 }
+KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input);

 void kbase_csf_firmware_global_input_mask(
 	const struct kbase_csf_global_iface *const iface, const u32 offset,
@@ -1191,6 +1189,7 @@ void kbase_csf_firmware_global_input_mask(
 			offset, value, mask);
 	input_page_partial_write(iface->input, offset, value, mask);
 }
+KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input_mask);

 u32 kbase_csf_firmware_global_input_read(
 	const struct kbase_csf_global_iface *const iface, const u32 offset)
@@ -1211,6 +1210,7 @@ u32 kbase_csf_firmware_global_output(
 	dev_dbg(kbdev->dev, "glob output r: reg %08x val %08x\n", offset, val);
 	return val;
 }
+KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_output);

 /**
 * handle_internal_firmware_fatal - Handler for CS internal firmware fault.
@@ -1484,8 +1484,7 @@ bool kbase_csf_firmware_core_attr_updated(struct kbase_device *kbdev)
 }

 /**
- * kbase_csf_firmware_reload_worker() -
- * reload the fw image and re-enable the MCU
+ * kbase_csf_firmware_reload_worker() - reload the fw image and re-enable the MCU
 * @work: CSF Work item for reloading the firmware.
 *
 * This helper function will reload the firmware image and re-enable the MCU.
@@ -1505,6 +1504,8 @@ static void kbase_csf_firmware_reload_worker(struct work_struct *work)

 	dev_info(kbdev->dev, "reloading firmware");

+	KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_RELOADING(kbdev, kbase_backend_get_cycle_cnt(kbdev));
+
 	/* Reload just the data sections from firmware binary image */
 	err = reload_fw_data_sections(kbdev);
 	if (err)
@@ -2017,10 +2018,6 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
 		kfree(metadata);
 	}

-#ifndef MALI_KBASE_BUILD
-	mali_kutf_fw_utf_entry_cleanup(kbdev);
-#endif
-
 	/* This will also free up the region allocated for the shared interface
 	 * entry parsed from the firmware image.
 	 */
@@ -2144,6 +2141,8 @@ void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev)
 	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
 	unsigned long flags;

+	KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_HALT(kbdev, kbase_backend_get_cycle_cnt(kbdev));
+
 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
 	/* Validate there are no on-slot groups when sending the
 	 * halt request to firmware.
@@ -2155,12 +2154,25 @@ void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev)
 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
 }

+void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev)
+{
+	KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_ENABLING(kbdev, kbase_backend_get_cycle_cnt(kbdev));
+
+	/* Trigger the boot of MCU firmware, Use the AUTO mode as
+	 * otherwise on fast reset, to exit protected mode, MCU will
+	 * not reboot by itself to enter normal mode.
+	 */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_AUTO);
+}
+
 #ifdef KBASE_PM_RUNTIME
 void kbase_csf_firmware_trigger_mcu_sleep(struct kbase_device *kbdev)
 {
 	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
 	unsigned long flags;

+	KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_SLEEP(kbdev, kbase_backend_get_cycle_cnt(kbdev));
+
 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
 	set_global_request(global_iface, GLB_REQ_SLEEP_MASK);
 	dev_dbg(kbdev->dev, "Sending sleep request to MCU");
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h
@@ -346,14 +346,14 @@ static inline void kbase_csf_ring_doorbell(struct kbase_device *kbdev,
 /**
 * kbase_csf_read_firmware_memory - Read a value in a GPU address
 *
+ * @kbdev:     Device pointer
+ * @gpu_addr:  GPU address to read
+ * @value:     output pointer to which the read value will be written.
+ *
 * This function read a value in a GPU address that belongs to
 * a private firmware memory region. The function assumes that the location
 * is not permanently mapped on the CPU address space, therefore it maps it
 * and then unmaps it to access it independently.
- *
- * @kbdev:     Device pointer
- * @gpu_addr:  GPU address to read
- * @value:     output pointer to which the read value will be written.
 */
 void kbase_csf_read_firmware_memory(struct kbase_device *kbdev,
 	u32 gpu_addr, u32 *value);
@@ -361,14 +361,14 @@ void kbase_csf_read_firmware_memory(struct kbase_device *kbdev,
 /**
 * kbase_csf_update_firmware_memory - Write a value in a GPU address
 *
+ * @kbdev:     Device pointer
+ * @gpu_addr:  GPU address to write
+ * @value:     Value to write
+ *
 * This function writes a given value in a GPU address that belongs to
 * a private firmware memory region. The function assumes that the destination
 * is not permanently mapped on the CPU address space, therefore it maps it
 * and then unmaps it to access it independently.
- *
- * @kbdev:     Device pointer
- * @gpu_addr:  GPU address to write
- * @value:     Value to write
 */
 void kbase_csf_update_firmware_memory(struct kbase_device *kbdev,
 	u32 gpu_addr, u32 value);
@@ -404,20 +404,20 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev);
 /**
 * kbase_csf_firmware_ping - Send the ping request to firmware.
 *
- * The function sends the ping request to firmware.
- *
 * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * The function sends the ping request to firmware.
 */
 void kbase_csf_firmware_ping(struct kbase_device *kbdev);

 /**
 * kbase_csf_firmware_ping_wait - Send the ping request to firmware and waits.
 *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
 * The function sends the ping request to firmware and waits to confirm it is
 * alive.
 *
- * @kbdev: Instance of a GPU platform device that implements a CSF interface.
- *
 * Return: 0 on success, or negative on failure.
 */
 int kbase_csf_firmware_ping_wait(struct kbase_device *kbdev);
@@ -462,8 +462,12 @@ void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev);

 static inline bool kbase_csf_firmware_mcu_halted(struct kbase_device *kbdev)
 {
+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+	return true;
+#else
 	return (kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS)) ==
 		MCU_STATUS_HALTED);
+#endif /* CONFIG_MALI_BIFROST_NO_MALI */
 }

 /**
@@ -481,24 +485,14 @@ void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev);
 *
 * @kbdev: Instance of a GPU platform device that implements a CSF interface.
 */
-static inline void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev)
-{
-	/* Trigger the boot of MCU firmware, Use the AUTO mode as
-	 * otherwise on fast reset, to exit protected mode, MCU will
-	 * not reboot by itself to enter normal mode.
-	 */
-	kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_AUTO);
-}
+void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev);

 /**
 * kbase_csf_firmware_disable_mcu - Send the command to disable MCU
 *
 * @kbdev: Instance of a GPU platform device that implements a CSF interface.
 */
-static inline void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev)
-{
-	kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_DISABLE);
-}
+void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev);

 /**
 * kbase_csf_firmware_disable_mcu_wait - Wait for the MCU to reach disabled
@@ -560,9 +554,9 @@ void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev,
 *                      requests, sent after the reboot of MCU firmware, have
 *                      completed or not.
 *
- * Return: true if the Global configuration requests completed otherwise false.
- *
 * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * Return: true if the Global configuration requests completed otherwise false.
 */
 bool kbase_csf_firmware_global_reinit_complete(struct kbase_device *kbdev);

@@ -587,17 +581,16 @@ void kbase_csf_firmware_update_core_attr(struct kbase_device *kbdev,
 *                  request has completed or not, that was sent to update
 *                  the core attributes.
 *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
 * Return: true if the Global configuration request to update the core
 *         attributes has completed, otherwise false.
- *
- * @kbdev: Instance of a GPU platform device that implements a CSF interface.
 */
 bool kbase_csf_firmware_core_attr_updated(struct kbase_device *kbdev);

 /**
- * Request the global control block of CSF interface capabilities
- *
- * Return: Total number of CSs, summed across all groups.
+ * kbase_csf_firmware_get_glb_iface - Request the global control block of CSF
+ *                                      interface capabilities
 *
 * @kbdev:                 Kbase device.
 * @group_data:            Pointer where to store all the group data
@@ -620,6 +613,8 @@ bool kbase_csf_firmware_core_attr_updated(struct kbase_device *kbdev);
 * @instr_features:        Instrumentation features. Bits 7:4 hold the max size
 *                         of events. Bits 3:0 hold the offset update rate.
 *                         (csf >= 1,1,0)
+ *
+ * Return: Total number of CSs, summed across all groups.
 */
 u32 kbase_csf_firmware_get_glb_iface(
 	struct kbase_device *kbdev, struct basep_cs_group_control *group_data,
@@ -628,20 +623,26 @@ u32 kbase_csf_firmware_get_glb_iface(
 	u32 *group_num, u32 *prfcnt_size, u32 *instr_features);

 /**
- * Get CSF firmware header timeline metadata content
- *
- * Return: The firmware timeline metadata content which match @p name.
+ * kbase_csf_firmware_get_timeline_metadata - Get CSF firmware header timeline
+ *                                            metadata content
 *
 * @kbdev:        Kbase device.
 * @name:         Name of the metadata which metadata content to be returned.
 * @size:         Metadata size if specified metadata found.
+ *
+ * Return: The firmware timeline metadata content which match @p name.
 */
 const char *kbase_csf_firmware_get_timeline_metadata(struct kbase_device *kbdev,
 	const char *name, size_t *size);

 /**
- * kbase_csf_firmware_mcu_shared_mapping_init -
- * Allocate and map MCU shared memory.
+ * kbase_csf_firmware_mcu_shared_mapping_init - Allocate and map MCU shared memory.
+ *
+ * @kbdev:              Kbase device the memory mapping shall belong to.
+ * @num_pages:          Number of memory pages to map.
+ * @cpu_map_properties: Either PROT_READ or PROT_WRITE.
+ * @gpu_map_properties: Either KBASE_REG_GPU_RD or KBASE_REG_GPU_WR.
+ * @csf_mapping:        Object where to write metadata for the memory mapping.
 *
 * This helper function allocates memory and maps it on both the CPU
 * and the GPU address spaces. Most of the properties of the mapping
@@ -653,12 +654,6 @@ const char *kbase_csf_firmware_get_timeline_metadata(struct kbase_device *kbdev,
 * will be ignored by the function.
 *
 * Return: 0 if success, or an error code on failure.
- *
- * @kbdev:              Kbase device the memory mapping shall belong to.
- * @num_pages:          Number of memory pages to map.
- * @cpu_map_properties: Either PROT_READ or PROT_WRITE.
- * @gpu_map_properties: Either KBASE_REG_GPU_RD or KBASE_REG_GPU_WR.
- * @csf_mapping:        Object where to write metadata for the memory mapping.
 */
 int kbase_csf_firmware_mcu_shared_mapping_init(
 		struct kbase_device *kbdev,
@@ -676,36 +671,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
 void kbase_csf_firmware_mcu_shared_mapping_term(
 		struct kbase_device *kbdev, struct kbase_csf_mapping *csf_mapping);

-#ifndef MALI_KBASE_BUILD
-/**
- * mali_kutf_process_fw_utf_entry() - Process the "Firmware UTF tests" section
- *
- * Read "Firmware UTF tests" section from the firmware image and create
- * necessary kutf app+suite+tests.
- *
- * Return: 0 if successful, negative error code on failure. In both cases
- * caller will have to invoke mali_kutf_fw_utf_entry_cleanup for the cleanup
- *
- * @kbdev: Kbase device structure
- * @fw_data: Pointer to the start of firmware binary image loaded from disk
- * @fw_size: Size (in bytes) of the firmware image
- * @entry: Pointer to the start of the section
- */
-int mali_kutf_process_fw_utf_entry(struct kbase_device *kbdev,
-	const void *fw_data, size_t fw_size, const u32 *entry);
-
-/**
- * mali_kutf_fw_utf_entry_cleanup() - Remove the Fw UTF tests debugfs entries
- *
- * Destroy the kutf apps+suites+tests created on parsing "Firmware UTF tests"
- * section from the firmware image.
- *
- * @kbdev: Kbase device structure
- */
-void mali_kutf_fw_utf_entry_cleanup(struct kbase_device *kbdev);
-#endif
-
-#ifdef CONFIG_MALI_BIFROST_DEBUG
+#ifdef CONFIG_MALI_BIFROST_DEBUG 
 extern bool fw_debug;
 #endif

@@ -722,11 +688,11 @@ static inline long kbase_csf_timeout_in_jiffies(const unsigned int msecs)
 * kbase_csf_firmware_enable_gpu_idle_timer() - Activate the idle hysteresis
 *                                              monitoring operation
 *
+ * @kbdev: Kbase device structure
+ *
 * Program the firmware interface with its configured hysteresis count value
 * and enable the firmware to act on it. The Caller is
 * assumed to hold the kbdev->csf.scheduler.interrupt_lock.
- *
- * @kbdev: Kbase device structure
 */
 void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev);

@@ -734,10 +700,10 @@ void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev);
 * kbase_csf_firmware_disable_gpu_idle_timer() - Disable the idle time
 *                                             hysteresis monitoring operation
 *
+ * @kbdev: Kbase device structure
+ *
 * Program the firmware interface to disable the idle hysteresis timer. The
 * Caller is assumed to hold the kbdev->csf.scheduler.interrupt_lock.
- *
- * @kbdev: Kbase device structure
 */
 void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev);

--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c
@@ -29,10 +29,6 @@
 /**
 * struct firmware_config - Configuration item within the MCU firmware
 *
- * The firmware may expose configuration options. Each option has a name, the
- * address where the option is controlled and the minimum and maximum values
- * that the option can take.
- *
 * @node:        List head linking all options to
 *               kbase_device:csf.firmware_config
 * @kbdev:       Pointer to the Kbase device
@@ -47,6 +43,10 @@
 * @min:         The lowest legal value of the configuration option
 * @max:         The maximum legal value of the configuration option
 * @cur_val:     The current value of the configuration option
+ *
+ * The firmware may expose configuration options. Each option has a name, the
+ * address where the option is controlled and the minimum and maximum values
+ * that the option can take.
 */
 struct firmware_config {
 	struct list_head node;
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.h
@@ -32,12 +32,12 @@
 * kbase_csf_firmware_cfg_init - Create the sysfs directory for configuration
 *                               options present in firmware image.
 *
+ * @kbdev: Pointer to the Kbase device
+ *
 * This function would create a sysfs directory and populate it with a
 * sub-directory, that would contain a file per attribute, for every
 * configuration option parsed from firmware image.
 *
- * @kbdev: Pointer to the Kbase device
- *
 * Return: The initialization error code.
 */
 int kbase_csf_firmware_cfg_init(struct kbase_device *kbdev);
@@ -55,16 +55,16 @@ void kbase_csf_firmware_cfg_term(struct kbase_device *kbdev);
 * kbase_csf_firmware_cfg_option_entry_parse() - Process a
 *                                               "configuration option" section.
 *
- * Read a "configuration option" section adding it to the
- * kbase_device:csf.firmware_config list.
- *
- * Return: 0 if successful, negative error code on failure
- *
 * @kbdev:     Kbase device structure
 * @fw:        Firmware image containing the section
 * @entry:     Pointer to the section
 * @size:      Size (in bytes) of the section
 * @updatable: Indicates if entry can be updated with FIRMWARE_CONFIG_UPDATE
+ *
+ * Read a "configuration option" section adding it to the
+ * kbase_device:csf.firmware_config list.
+ *
+ * Return: 0 if successful, negative error code on failure
 */
 int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev,
 					      const struct firmware *fw,
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c
@@ -136,13 +136,13 @@ static inline void output_page_write(u32 *const output, const u32 offset,
 /**
 * invent_memory_setup_entry() - Invent an "interface memory setup" section
 *
+ * @kbdev: Kbase device structure
+ *
 * Invent an "interface memory setup" section similar to one from a firmware
 * image. If successful the interface will be added to the
 * kbase_device:csf.firmware_interfaces list.
 *
 * Return: 0 if successful, negative error code on failure
- *
- * @kbdev: Kbase device structure
 */
 static int invent_memory_setup_entry(struct kbase_device *kbdev)
 {
@@ -371,6 +371,7 @@ u32 kbase_csf_firmware_csg_output(
 	dev_dbg(kbdev->dev, "csg output r: reg %08x val %08x\n", offset, val);
 	return val;
 }
+KBASE_EXPORT_TEST_API(kbase_csf_firmware_csg_output);

 static void
 csf_firmware_prfcnt_process(const struct kbase_csf_global_iface *const iface,
@@ -418,6 +419,7 @@ void kbase_csf_firmware_global_input(
 		output_page_write(iface->output, GLB_ACK, value);
 	}
 }
+KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input);

 void kbase_csf_firmware_global_input_mask(
 	const struct kbase_csf_global_iface *const iface, const u32 offset,
@@ -431,6 +433,7 @@ void kbase_csf_firmware_global_input_mask(
 	/* NO_MALI: Go through kbase_csf_firmware_global_input to capture writes */
 	kbase_csf_firmware_global_input(iface, offset, (input_page_read(iface->input, offset) & ~mask) | (value & mask));
 }
+KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input_mask);

 u32 kbase_csf_firmware_global_input_read(
 	const struct kbase_csf_global_iface *const iface, const u32 offset)
@@ -451,6 +454,7 @@ u32 kbase_csf_firmware_global_output(
 	dev_dbg(kbdev->dev, "glob output r: reg %08x val %08x\n", offset, val);
 	return val;
 }
+KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_output);

 /**
 * handle_internal_firmware_fatal - Handler for CS internal firmware fault.
@@ -1020,10 +1024,6 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)

 	/* NO_MALI: No trace buffers to terminate */

-#ifndef MALI_KBASE_BUILD
-	mali_kutf_fw_utf_entry_cleanup(kbdev);
-#endif
-
 	mutex_destroy(&kbdev->csf.reg_lock);

 	/* This will also free up the region allocated for the shared interface
@@ -1154,6 +1154,15 @@ void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev)
 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
 }

+void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev)
+{
+	/* Trigger the boot of MCU firmware, Use the AUTO mode as
+	 * otherwise on fast reset, to exit protected mode, MCU will
+	 * not reboot by itself to enter normal mode.
+	 */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_AUTO);
+}
+
 #ifdef KBASE_PM_RUNTIME
 void kbase_csf_firmware_trigger_mcu_sleep(struct kbase_device *kbdev)
 {
@@ -1290,6 +1299,11 @@ const char *kbase_csf_firmware_get_timeline_metadata(
 	return NULL;
 }

+void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev)
+{
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_DISABLE);
+}
+
 void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev)
 {
 	/* NO_MALI: Nothing to do here */
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.h
@@ -47,11 +47,11 @@ void kbase_csf_heap_context_allocator_term(
 /**
 * kbase_csf_heap_context_allocator_alloc - Allocate a heap context structure
 *
+ * @ctx_alloc: Pointer to the heap context allocator.
+ *
 * If this function is successful then it returns the address of a
 * zero-initialized heap context structure for use by the firmware.
 *
- * @ctx_alloc: Pointer to the heap context allocator.
- *
 * Return: GPU virtual address of the allocated heap context or 0 on failure.
 */
 u64 kbase_csf_heap_context_allocator_alloc(
@@ -60,13 +60,13 @@ u64 kbase_csf_heap_context_allocator_alloc(
 /**
 * kbase_csf_heap_context_allocator_free - Free a heap context structure
 *
- * This function returns a heap context structure to the free pool of unused
- * contexts for possible reuse by a future call to
- * @kbase_csf_heap_context_allocator_alloc.
- *
 * @ctx_alloc:   Pointer to the heap context allocator.
 * @heap_gpu_va: The GPU virtual address of a heap context structure that
 *               was allocated for the firmware.
+ *
+ * This function returns a heap context structure to the free pool of unused
+ * contexts for possible reuse by a future call to
+ * @kbase_csf_heap_context_allocator_alloc.
 */
 void kbase_csf_heap_context_allocator_free(
 	struct kbase_csf_heap_context_allocator *const ctx_alloc,
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c
@@ -190,6 +190,12 @@ static void kbase_jit_add_to_pending_alloc_list(
 *
 * @queue: The queue containing this JIT allocation
 * @cmd:   The JIT allocation command
+ *
+ * Return:
+ * * 0       - allocation OK
+ * * -EINVAL - missing info or JIT ID still in use
+ * * -EAGAIN - Retry
+ * * -ENOMEM - no memory. unable to allocate
 */
 static int kbase_kcpu_jit_allocate_process(
 		struct kbase_kcpu_command_queue *queue,
@@ -289,8 +295,8 @@ static int kbase_kcpu_jit_allocate_process(
 		 * Write the address of the JIT allocation to the user provided
 		 * GPU allocation.
 		 */
-		ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr),
-				&mapping);
+		ptr = kbase_vmap_prot(kctx, info->gpu_alloc_addr, sizeof(*ptr),
+				KBASE_REG_CPU_WR, &mapping);
 		if (!ptr) {
 			ret = -ENOMEM;
 			goto fail;
@@ -570,9 +576,11 @@ static int kbase_csf_queue_group_suspend_prepare(
 {
 	struct kbase_context *const kctx = kcpu_queue->kctx;
 	struct kbase_suspend_copy_buffer *sus_buf = NULL;
+	const u32 csg_suspend_buf_size =
+		kctx->kbdev->csf.global_iface.groups[0].suspend_size;
 	u64 addr = suspend_buf->buffer;
 	u64 page_addr = addr & PAGE_MASK;
-	u64 end_addr = addr + suspend_buf->size - 1;
+	u64 end_addr = addr + csg_suspend_buf_size - 1;
 	u64 last_page_addr = end_addr & PAGE_MASK;
 	int nr_pages = (last_page_addr - page_addr) / PAGE_SIZE + 1;
 	int pinned_pages = 0, ret = 0;
@@ -580,8 +588,7 @@ static int kbase_csf_queue_group_suspend_prepare(

 	lockdep_assert_held(&kctx->csf.kcpu_queues.lock);

-	if (suspend_buf->size <
-			kctx->kbdev->csf.global_iface.groups[0].suspend_size)
+	if (suspend_buf->size < csg_suspend_buf_size)
 		return -EINVAL;

 	ret = kbase_csf_queue_group_handle_is_valid(kctx,
@@ -593,7 +600,7 @@ static int kbase_csf_queue_group_suspend_prepare(
 	if (!sus_buf)
 		return -ENOMEM;

-	sus_buf->size = suspend_buf->size;
+	sus_buf->size = csg_suspend_buf_size;
 	sus_buf->nr_pages = nr_pages;
 	sus_buf->offset = addr & ~PAGE_MASK;

--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h
@@ -294,6 +294,8 @@ struct kbase_kcpu_command_queue {
 *		queue will be created.
 * @newq:	Pointer to the structure which contains information about
 *		the new KCPU command queue to be created.
+ *
+ * Return: 0 if successful or a negative error code on failure.
 */
 int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
 			 struct kbase_ioctl_kcpu_queue_new *newq);
@@ -307,6 +309,8 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
 *		queue is to be deleted.
 * @del:	Pointer to the structure which specifies the KCPU command
 *		queue to be deleted.
+ *
+ * Return: 0 if successful or a negative error code on failure.
 */
 int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx,
 			    struct kbase_ioctl_kcpu_queue_delete *del);
@@ -320,6 +324,8 @@ int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx,
 * @enq:	Pointer to the structure which specifies the KCPU command
 *		as well as the KCPU command queue into which the command
 *		is to be enqueued.
+ *
+ * Return: 0 if successful or a negative error code on failure.
 */
 int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
 				 struct kbase_ioctl_kcpu_queue_enqueue *enq);
@@ -337,11 +343,11 @@ int kbase_csf_kcpu_queue_context_init(struct kbase_context *kctx);
 /**
 * kbase_csf_kcpu_queue_context_term - Terminate the kernel CPU queues context
 *                                     for a GPU address space
+ * @kctx: Pointer to the kbase context being terminated.
 *
 * This function deletes any kernel CPU queues that weren't deleted before
 * context termination.
 *
- * @kctx: Pointer to the kbase context being terminated.
 */
 void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx);

--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c
@@ -576,6 +576,7 @@ int kbase_reset_gpu_silent(struct kbase_device *kbdev)

 	return 0;
 }
+KBASE_EXPORT_TEST_API(kbase_reset_gpu_silent);

 bool kbase_reset_gpu_is_active(struct kbase_device *kbdev)
 {
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c
@@ -164,12 +164,14 @@ static int wait_for_scheduler_to_exit_sleep(struct kbase_device *kbdev)
 * This function will force the Scheduler to exit the sleep state by doing the
 * wake up of MCU and suspension of on-slot groups. It is called at the time of
 * system suspend.
+ *
+ * Return: 0 on success.
 */
-static void force_scheduler_to_exit_sleep(struct kbase_device *kbdev)
+static int force_scheduler_to_exit_sleep(struct kbase_device *kbdev)
 {
 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
 	unsigned long flags;
-	int ret;
+	int ret = 0;

 	lockdep_assert_held(&scheduler->lock);
 	WARN_ON(scheduler->state != SCHED_SLEEPING);
@@ -177,12 +179,16 @@ static void force_scheduler_to_exit_sleep(struct kbase_device *kbdev)

 	kbase_pm_lock(kbdev);
 	ret = kbase_pm_force_mcu_wakeup_after_sleep(kbdev);
-	if (ret)
-		dev_warn(kbdev->dev, "[%llu] Wait for MCU wake up failed on forced scheduler suspend",
-			kbase_backend_get_cycle_cnt(kbdev));
 	kbase_pm_unlock(kbdev);
+	if (ret) {
+		dev_warn(kbdev->dev,
+			 "[%llu] Wait for MCU wake up failed on forced scheduler suspend",
+			 kbase_backend_get_cycle_cnt(kbdev));
+		goto out;
+	}

-	suspend_active_groups_on_powerdown(kbdev, true);
+	if (suspend_active_groups_on_powerdown(kbdev, true))
+		goto out;

 	kbase_pm_lock(kbdev);
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@@ -191,12 +197,26 @@ static void force_scheduler_to_exit_sleep(struct kbase_device *kbdev)
 	kbase_pm_update_state(kbdev);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 	ret = kbase_pm_wait_for_desired_state(kbdev);
-	if (ret)
-		dev_warn(kbdev->dev, "[%llu] Wait for pm state change failed on forced scheduler suspend",
-			kbase_backend_get_cycle_cnt(kbdev));
 	kbase_pm_unlock(kbdev);
+	if (ret) {
+		dev_warn(kbdev->dev,
+			 "[%llu] Wait for pm state change failed on forced scheduler suspend",
+			 kbase_backend_get_cycle_cnt(kbdev));
+		goto out;
+	}

 	scheduler->state = SCHED_SUSPENDED;
+
+	return 0;
+
+out:
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.exit_gpu_sleep_mode = true;
+	kbdev->pm.backend.gpu_wakeup_override = false;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	kbase_csf_scheduler_invoke_tick(kbdev);
+
+	return ret;
 }
 #endif

@@ -445,6 +465,13 @@ static bool queue_group_idle_locked(struct kbase_queue_group *group)
 		group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE);
 }

+static bool on_slot_group_idle_locked(struct kbase_queue_group *group)
+{
+	lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock);
+
+	return (group->run_state == KBASE_CSF_GROUP_IDLE);
+}
+
 static bool queue_group_scheduled(struct kbase_queue_group *group)
 {
 	return (group->run_state != KBASE_CSF_GROUP_INACTIVE &&
@@ -582,6 +609,8 @@ static void disable_gpu_idle_fw_timer(struct kbase_device *kbdev)
 * This function is usually called when Scheduler needs to be activated.
 * The PM reference count is acquired for the Scheduler and the power on
 * of GPU is initiated.
+ *
+ * Return: 0 if successful or a negative error code on failure.
 */
 static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev,
 				enum kbase_pm_suspend_handler suspend_handler)
@@ -1243,8 +1272,16 @@ int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue)

 static void update_hw_active(struct kbase_queue *queue, bool active)
 {
+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+	if (queue && queue->enabled) {
+		u32 *output_addr = (u32 *)(queue->user_io_addr + PAGE_SIZE);
+
+		output_addr[CS_ACTIVE / sizeof(u32)] = active;
+	}
+#else
 	CSTD_UNUSED(queue);
 	CSTD_UNUSED(active);
+#endif
 }

 static void program_cs_extract_init(struct kbase_queue *queue)
@@ -2099,6 +2136,10 @@ static void save_csg_slot(struct kbase_queue_group *group)
 		bool sync_wait = false;
 		bool idle = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) &
 			    CSG_STATUS_STATE_IDLE_MASK;
+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+		for (i = 0; i < max_streams; i++)
+			update_hw_active(group->bound_queues[i], false);
+#endif /* CONFIG_MALI_BIFROST_NO_MALI */
 		for (i = 0; idle && i < max_streams; i++) {
 			struct kbase_queue *const queue =
 					group->bound_queues[i];
@@ -2385,6 +2426,7 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
 			protm_suspend_buf >> 32);
 	}

+
 	/* Enable all interrupts for now */
 	kbase_csf_firmware_csg_input(ginfo, CSG_ACK_IRQ_MASK, ~((u32)0));

@@ -2414,7 +2456,7 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
 	/* Trace the programming of the CSG on the slot */
 	KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG(
 		kbdev, kbdev->gpu_props.props.raw_props.gpu_id, group->kctx->id,
-		group->handle, slot);
+		group->handle, slot, (state == CSG_REQ_STATE_RESUME) ? 1 : 0);

 	dev_dbg(kbdev->dev, "Starting group %d of context %d_%d on slot %d with priority %u\n",
 		group->handle, kctx->tgid, kctx->id, slot, prio);
@@ -3166,15 +3208,15 @@ static void wait_csg_slots_start(struct kbase_device *kbdev)
 *                           flagged after the completion of a CSG status
 *                           update command
 *
+ * @kbdev:  Pointer to the GPU device.
+ * @slot:   The given slot for checking an occupying resident group's idle
+ *          state.
+ *
 * This function is called at the start of scheduling tick to check the
 * idle status of a queue group resident on a CSG slot.
 * The caller must make sure the corresponding status update command has
 * been called and completed before checking this status.
 *
- * @kbdev:  Pointer to the GPU device.
- * @slot:   The given slot for checking an occupying resident group's idle
- *          state.
- *
 * Return: true if the group resident on slot is idle, otherwise false.
 */
 static bool group_on_slot_is_idle(struct kbase_device *kbdev,
@@ -3194,16 +3236,16 @@ static bool group_on_slot_is_idle(struct kbase_device *kbdev,
 * slots_update_state_changed() -  Check the handshake state of a subset of
 *                                 command group slots.
 *
- * Checks the state of a subset of slots selected through the slots_mask
- * bit_map. Records which slots' handshake completed and send it back in the
- * slots_done bit_map.
- *
 * @kbdev:          The GPU device.
 * @field_mask:     The field mask for checking the state in the csg_req/ack.
 * @slots_mask:     A bit_map specifying the slots to check.
 * @slots_done:     A cleared bit_map for returning the slots that
 *                  have finished update.
 *
+ * Checks the state of a subset of slots selected through the slots_mask
+ * bit_map. Records which slots' handshake completed and send it back in the
+ * slots_done bit_map.
+ *
 * Return: true if the slots_done is set for at least one slot.
 *         Otherwise false.
 */
@@ -3237,10 +3279,6 @@ bool slots_update_state_changed(struct kbase_device *kbdev, u32 field_mask,
 * wait_csg_slots_handshake_ack - Wait the req/ack handshakes to complete on
 *                                the specified groups.
 *
- * This function waits for the acknowledgement of the request that have
- * already been placed for the CSG slots by the caller. Currently used for
- * the CSG priority update and status update requests.
- *
 * @kbdev:           Pointer to the GPU device.
 * @field_mask:      The field mask for checking the state in the csg_req/ack.
 * @slot_mask:       Bitmap reflecting the slots, the function will modify
@@ -3248,6 +3286,10 @@ bool slots_update_state_changed(struct kbase_device *kbdev, u32 field_mask,
 *                   bits.
 * @wait_in_jiffies: Wait duration in jiffies, controlling the time-out.
 *
+ * This function waits for the acknowledgment of the request that have
+ * already been placed for the CSG slots by the caller. Currently used for
+ * the CSG priority update and status update requests.
+ *
 * Return: 0 on all specified slots acknowledged; otherwise -ETIMEDOUT. For
 *         timed out condition with unacknowledged slots, their bits remain
 *         set in the slot_mask.
@@ -3349,14 +3391,14 @@ void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev,
 * scheduler_slot_protm_ack - Acknowledging the protected region requests
 * from the resident group on a given slot.
 *
- * The function assumes that the given slot is in stable running state and
- * has already been judged by the caller on that any pending protected region
- * requests of the resident group should be acknowledged.
- *
 * @kbdev:  Pointer to the GPU device.
 * @group:  Pointer to the resident group on the given slot.
 * @slot:   The slot that the given group is actively operating on.
 *
+ * The function assumes that the given slot is in stable running state and
+ * has already been judged by the caller on that any pending protected region
+ * requests of the resident group should be acknowledged.
+ *
 * Return: true if the group has pending protm request(s) and is acknowledged.
 *         The caller should arrange to enter the protected mode for servicing
 *         it. Otherwise return false, indicating the group has no pending protm
@@ -3426,15 +3468,15 @@ static bool scheduler_slot_protm_ack(struct kbase_device *const kbdev,
 * scheduler_group_check_protm_enter - Request the given group to be evaluated
 * for triggering the protected mode.
 *
+ * @kbdev:     Pointer to the GPU device.
+ * @input_grp: Pointer to the GPU queue group.
+ *
 * The function assumes the given group is either an active running group or
 * the scheduler internally maintained field scheduler->top_grp.
 *
 * If the GPU is not already running in protected mode and the input group
 * has protected region requests from its bound queues, the requests are
 * acknowledged and the GPU is instructed to enter the protected mode.
- *
- * @kbdev:     Pointer to the GPU device.
- * @input_grp: Pointer to the GPU queue group.
 */
 static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
 				struct kbase_queue_group *const input_grp)
@@ -3538,7 +3580,7 @@ static void scheduler_apply(struct kbase_device *kbdev)
 		}
 	}

-	/* Initialize the remaining avialable csg slots for the tick/tock */
+	/* Initialize the remaining available csg slots for the tick/tock */
 	scheduler->remaining_tick_slots = available_csg_slots;

 	/* If there are spare slots, apply heads in the list */
@@ -3615,8 +3657,9 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev,
 		group->scan_seq_num = scheduler->csg_scan_count_for_tick++;

 		if (queue_group_idle_locked(group)) {
-			list_add_tail(&group->link_to_schedule,
-				      &scheduler->idle_groups_to_schedule);
+			if (on_slot_group_idle_locked(group))
+				list_add_tail(&group->link_to_schedule,
+					&scheduler->idle_groups_to_schedule);
 			continue;
 		}

@@ -3640,6 +3683,8 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev,
 *                             fairness of scheduling within a single
 *                             kbase_context.
 *
+ * @kbdev:    Pointer to the GPU device.
+ *
 * Since only kbase_csf_scheduler's top_grp (i.e. the queue group assigned
 * the highest slot priority) is guaranteed to get the resources that it
 * needs we only rotate the kbase_context corresponding to it -
@@ -3678,8 +3723,6 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev,
 * the kbase_csf_scheduler's groups_to_schedule list. In this example, it will
 * be for a group in the next lowest priority level or in absence of those the
 * next kbase_context's queue groups.
- *
- * @kbdev:    Pointer to the GPU device.
 */
 static void scheduler_rotate_groups(struct kbase_device *kbdev)
 {
@@ -3750,17 +3793,17 @@ static void scheduler_rotate_ctxs(struct kbase_device *kbdev)
 *                       slots for which the IDLE notification was received
 *                        previously.
 *
- * This function sends a CSG status update request for all the CSG slots
- * present in the bitmap scheduler->csg_slots_idle_mask and wait for the
- * request to complete.
- * The bits set in the scheduler->csg_slots_idle_mask bitmap are cleared by
- * this function.
- *
 * @kbdev:             Pointer to the GPU device.
 * @csg_bitmap:        Bitmap of the CSG slots for which
 *                     the status update request completed successfully.
 * @failed_csg_bitmap: Bitmap of the CSG slots for which
 *                     the status update request timedout.
+ *
+ * This function sends a CSG status update request for all the CSG slots
+ * present in the bitmap scheduler->csg_slots_idle_mask and wait for the
+ * request to complete.
+ * The bits set in the scheduler->csg_slots_idle_mask bitmap are cleared by
+ * this function.
 */
 static void scheduler_update_idle_slots_status(struct kbase_device *kbdev,
 		unsigned long *csg_bitmap, unsigned long *failed_csg_bitmap)
@@ -3832,6 +3875,8 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev,
 *                    resident on CSG slots for which the
 *                    IDLE notification was received previously.
 *
+ * @kbdev:  Pointer to the GPU device.
+ *
 * This function is called at the start of scheduling tick/tock to reconfirm
 * the idle status of queue groups resident on CSG slots for
 * which idle notification was received previously, i.e. all the CSG slots
@@ -3845,8 +3890,6 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev,
 * updated accordingly.
 * The bits corresponding to slots for which the status update request timedout
 * remain set in scheduler->csg_slots_idle_mask.
- *
- * @kbdev:  Pointer to the GPU device.
 */
 static void scheduler_handle_idle_slots(struct kbase_device *kbdev)
 {
@@ -3901,7 +3944,7 @@ static void scheduler_scan_idle_groups(struct kbase_device *kbdev)
 	list_for_each_entry_safe(group, n, &scheduler->idle_groups_to_schedule,
 				 link_to_schedule) {

-		WARN_ON(!queue_group_idle_locked(group));
+		WARN_ON(!on_slot_group_idle_locked(group));

 		if (!scheduler->ngrp_to_schedule) {
 			/* keep the top csg's origin */
@@ -3955,6 +3998,18 @@ static struct kbase_queue_group *get_tock_top_group(
 	return NULL;
 }

+/**
+ * suspend_active_groups_on_powerdown() - Suspend active CSG groups upon
+ *                                        suspend or GPU IDLE.
+ *
+ * @kbdev:          Pointer to the device
+ * @system_suspend: Flag to indicate it's for system suspend.
+ *
+ * This function will suspend all active CSG groups upon either
+ * system suspend, runtime suspend or GPU IDLE.
+ *
+ * Return: 0 on success, -1 otherwise.
+ */
 static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
 					      bool system_suspend)
 {
@@ -3964,8 +4019,8 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
 	int ret = suspend_active_queue_groups(kbdev, slot_mask);

 	if (ret) {
-		/* The suspend of CSGs failed, trigger the GPU reset and wait
-		 * for it to complete to be in a deterministic state.
+		/* The suspend of CSGs failed,
+		 * trigger the GPU reset to be in a deterministic state.
 		 */
 		dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for CSG slots to suspend on power down, slot_mask: 0x%*pb\n",
 			 kbase_backend_get_cycle_cnt(kbdev),
@@ -3975,13 +4030,6 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
 		if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
 			kbase_reset_gpu(kbdev);

-		if (system_suspend) {
-			mutex_unlock(&scheduler->lock);
-			kbase_reset_gpu_allow(kbdev);
-			kbase_reset_gpu_wait(kbdev);
-			kbase_reset_gpu_prevent_and_wait(kbdev);
-			mutex_lock(&scheduler->lock);
-		}
 		return -1;
 	}

@@ -4059,6 +4107,8 @@ static void scheduler_sleep_on_idle(struct kbase_device *kbdev)
 * This function is called on GPU idle notification to trigger the power down of
 * GPU. Scheduler's state is changed to suspended and all the active queue
 * groups are suspended before halting the MCU firmware.
+ *
+ * Return: true if scheduler will be suspended or false if suspend is aborted.
 */
 static bool scheduler_suspend_on_idle(struct kbase_device *kbdev)
 {
@@ -4104,6 +4154,8 @@ static void gpu_idle_worker(struct work_struct *work)
 	disable_gpu_idle_fw_timer(kbdev);
 	scheduler_is_idle_suspendable = scheduler_idle_suspendable(kbdev);
 	if (scheduler_is_idle_suspendable) {
+		KBASE_KTRACE_ADD(kbdev, GPU_IDLE_HANDLING_START, NULL,
+				 kbase_csf_ktrace_gpu_cycle_cnt(kbdev));
 #ifdef KBASE_PM_RUNTIME
 		if (kbase_pm_gpu_sleep_allowed(kbdev) &&
 		    scheduler->total_runnable_grps)
@@ -4174,8 +4226,7 @@ static int scheduler_prepare(struct kbase_device *kbdev)
 	/* Adds those idle but runnable groups to the scanout list */
 	scheduler_scan_idle_groups(kbdev);

-	/* After adding the idle CSGs, the two counts should be the same */
-	WARN_ON(scheduler->csg_scan_count_for_tick != scheduler->ngrp_to_schedule);
+	WARN_ON(scheduler->csg_scan_count_for_tick < scheduler->ngrp_to_schedule);

 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp,
 			scheduler->num_active_address_spaces |
@@ -4705,8 +4756,11 @@ static int suspend_active_queue_groups_on_reset(struct kbase_device *kbdev)
 	 * due to the extra context ref-count, which prevents the
 	 * L2 powering down cache clean operation in the non racing
 	 * case.
+	 * LSC is being flushed together to cover buslogging usecase,
+	 * where GPU reset is done regularly to avoid the log buffer
+	 * overflow.
 	 */
-	kbase_gpu_start_cache_clean(kbdev);
+	kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
 	ret2 = kbase_gpu_wait_cache_clean_timeout(kbdev,
 			kbdev->reset_timeout_ms);
 	if (ret2) {
@@ -5055,13 +5109,18 @@ int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group,
 		unsigned int target_page_nr = 0, i = 0;
 		u64 offset = sus_buf->offset;
 		size_t to_copy = sus_buf->size;
+		const u32 csg_suspend_buf_nr_pages =
+			PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);

 		if (scheduler->state != SCHED_SUSPENDED) {
 			/* Similar to the case of HW counters, need to flush
-			 * the GPU cache before reading from the suspend buffer
+			 * the GPU L2 cache before reading from the suspend buffer
 			 * pages as they are mapped and cached on GPU side.
+			 * Flushing LSC is not done here, since only the flush of
+			 * CSG suspend buffer contents is needed from the L2 cache.
 			 */
-			kbase_gpu_start_cache_clean(kbdev);
+			kbase_gpu_start_cache_clean(
+				kbdev, GPU_COMMAND_CACHE_CLN_INV_L2);
 			kbase_gpu_wait_cache_clean(kbdev);
 		} else {
 			/* Make sure power down transitions have completed,
@@ -5073,7 +5132,7 @@ int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group,
 			kbase_pm_wait_for_desired_state(kbdev);
 		}

-		for (i = 0; i < PFN_UP(sus_buf->size) &&
+		for (i = 0; i < csg_suspend_buf_nr_pages &&
 				target_page_nr < sus_buf->nr_pages; i++) {
 			struct page *pg =
 				as_page(group->normal_suspend_buf.phy[i]);
@@ -5252,7 +5311,7 @@ void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group)
 * This function will evaluate the sync condition, if any, of all the queues
 * bound to the given group.
 *
- * Return true if the sync condition of at least one queue has been satisfied.
+ * Return: true if the sync condition of at least one queue has been satisfied.
 */
 static bool check_sync_update_for_on_slot_group(
 		struct kbase_queue_group *group)
@@ -5341,7 +5400,7 @@ static bool check_sync_update_for_on_slot_group(
 * protected mode that has a higher priority than the active protected mode
 * group.
 *
- * Return true if the sync condition of at least one queue in a group has been
+ * Return: true if the sync condition of at least one queue in a group has been
 * satisfied.
 */
 static bool check_sync_update_for_idle_groups_protm(struct kbase_device *kbdev)
@@ -5604,8 +5663,14 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev)
 		flush_work(&kbdev->csf.scheduler.gpu_idle_work);
 		mutex_lock(&kbdev->csf.scheduler.lock);

-		if (WARN_ON(kbdev->csf.scheduler.state != SCHED_SUSPENDED))
+		if (kbdev->csf.scheduler.state != SCHED_SUSPENDED) {
+			/* The power policy could prevent the Scheduler from
+			 * getting suspended when GPU becomes idle.
+			 */
+			WARN_ON(kbase_pm_idle_groups_sched_suspendable(kbdev));
 			scheduler_suspend(kbdev);
+		}
+
 		mutex_unlock(&kbdev->csf.scheduler.lock);
 		cancel_delayed_work_sync(&kbdev->csf.scheduler.ping_work);
 		cancel_tick_timer(kbdev);
@@ -5692,12 +5757,16 @@ void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev,
 		 * available, so need to drop the lock before cancellation.
 		 */
 		cancel_work_sync(&scheduler->tick_work);
-	} else if (!currently_enabled && enable) {
+		return;
+	}
+
+	if (!currently_enabled && enable) {
 		scheduler->timer_enabled = true;

 		scheduler_enable_tick_timer_nolock(kbdev);
-		mutex_unlock(&scheduler->lock);
 	}
+
+	mutex_unlock(&scheduler->lock);
 }

 void kbase_csf_scheduler_kick(struct kbase_device *kbdev)
@@ -5718,18 +5787,20 @@ out:
 	mutex_unlock(&scheduler->lock);
 }

-void kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev)
+int kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev)
 {
+	int result = 0;
 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;

 	/* Cancel any potential queued delayed work(s) */
 	cancel_work_sync(&scheduler->tick_work);
 	cancel_tock_work(scheduler);

-	if (kbase_reset_gpu_prevent_and_wait(kbdev)) {
+	result = kbase_reset_gpu_prevent_and_wait(kbdev);
+	if (result) {
 		dev_warn(kbdev->dev,
 			 "Stop PM suspending for failing to prevent gpu reset.\n");
-		return;
+		return result;
 	}

 	mutex_lock(&scheduler->lock);
@@ -5742,18 +5813,31 @@ void kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev)
 	 */
 	if (scheduler->state == SCHED_SLEEPING) {
 		dev_info(kbdev->dev, "Activating MCU out of sleep on system suspend");
-		force_scheduler_to_exit_sleep(kbdev);
+		result = force_scheduler_to_exit_sleep(kbdev);
+		if (result) {
+			dev_warn(kbdev->dev, "Scheduler failed to exit from sleep");
+			goto exit;
+		}
 	}
 #endif
 	if (scheduler->state != SCHED_SUSPENDED) {
-		suspend_active_groups_on_powerdown(kbdev, true);
-		dev_info(kbdev->dev, "Scheduler PM suspend");
-		scheduler_suspend(kbdev);
-		cancel_tick_timer(kbdev);
+		result = suspend_active_groups_on_powerdown(kbdev, true);
+		if (result) {
+			dev_warn(kbdev->dev, "failed to suspend active groups");
+			goto exit;
+		} else {
+			dev_info(kbdev->dev, "Scheduler PM suspend");
+			scheduler_suspend(kbdev);
+			cancel_tick_timer(kbdev);
+		}
 	}
+
+exit:
 	mutex_unlock(&scheduler->lock);

 	kbase_reset_gpu_allow(kbdev);
+
+	return result;
 }
 KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_suspend);

--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h
@@ -23,6 +23,7 @@
 #define _KBASE_CSF_SCHEDULER_H_

 #include "mali_kbase_csf.h"
+#include "mali_kbase_csf_event.h"

 /**
 * kbase_csf_scheduler_queue_start() - Enable the running of GPU command queue
@@ -250,14 +251,14 @@ void kbase_csf_scheduler_enable_tick_timer(struct kbase_device *kbdev);
 * kbase_csf_scheduler_group_copy_suspend_buf - Suspend a queue
 *		group and copy suspend buffer.
 *
- * This function is called to suspend a queue group and copy the suspend_buffer
- * contents to the input buffer provided.
- *
 * @group:	Pointer to the queue group to be suspended.
 * @sus_buf:	Pointer to the structure which contains details of the
 *		user buffer and its kernel pinned pages to which we need to copy
 *		the group suspend buffer.
 *
+ * This function is called to suspend a queue group and copy the suspend_buffer
+ * contents to the input buffer provided.
+ *
 * Return:	0 on success, or negative on failure.
 */
 int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group,
@@ -425,8 +426,10 @@ void kbase_csf_scheduler_pm_resume(struct kbase_device *kbdev);
 *
 * This function will make the scheduler suspend all the running queue groups
 * and drop its power managemenet reference.
+ *
+ * Return: 0 on success.
 */
-void kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev);
+int kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev);

 /**
 * kbase_csf_scheduler_all_csgs_idle() - Check if the scheduler internal
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c
@@ -28,13 +28,13 @@
 /**
 * encode_chunk_ptr - Encode the address and size of a chunk as an integer.
 *
+ * @chunk_size: Size of a tiler heap chunk, in bytes.
+ * @chunk_addr: GPU virtual address of the same tiler heap chunk.
+ *
 * The size and address of the next chunk in a list are packed into a single
 * 64-bit value for storage in a chunk's header. This function returns that
 * value.
 *
- * @chunk_size: Size of a tiler heap chunk, in bytes.
- * @chunk_addr: GPU virtual address of the same tiler heap chunk.
- *
 * Return: Next chunk pointer suitable for writing into a chunk header.
 */
 static u64 encode_chunk_ptr(u32 const chunk_size, u64 const chunk_addr)
@@ -76,14 +76,14 @@ static struct kbase_csf_tiler_heap_chunk *get_last_chunk(
 /**
 * link_chunk - Link a chunk into a tiler heap
 *
+ * @heap:  Pointer to the tiler heap.
+ * @chunk: Pointer to the heap chunk to be linked.
+ *
 * Unless the @chunk is the first in the kernel's list of chunks belonging to
 * a given tiler heap, this function stores the size and address of the @chunk
 * in the header of the preceding chunk. This requires the GPU memory region
 * containing the header to be be mapped temporarily, which can fail.
 *
- * @heap:  Pointer to the tiler heap.
- * @chunk: Pointer to the heap chunk to be linked.
- *
 * Return: 0 if successful or a negative error code on failure.
 */
 static int link_chunk(struct kbase_csf_tiler_heap *const heap,
@@ -118,15 +118,15 @@ static int link_chunk(struct kbase_csf_tiler_heap *const heap,
 /**
 * init_chunk - Initialize and link a tiler heap chunk
 *
- * Zero-initialize a new chunk's header (including its pointer to the next
- * chunk, which doesn't exist yet) and then update the previous chunk's
- * header to link the new chunk into the chunk list.
- *
 * @heap:  Pointer to the tiler heap.
 * @chunk: Pointer to the heap chunk to be initialized and linked.
 * @link_with_prev: Flag to indicate if the new chunk needs to be linked with
 *                  the previously allocated chunk.
 *
+ * Zero-initialize a new chunk's header (including its pointer to the next
+ * chunk, which doesn't exist yet) and then update the previous chunk's
+ * header to link the new chunk into the chunk list.
+ *
 * Return: 0 if successful or a negative error code on failure.
 */
 static int init_chunk(struct kbase_csf_tiler_heap *const heap,
@@ -163,14 +163,14 @@ static int init_chunk(struct kbase_csf_tiler_heap *const heap,
 /**
 * create_chunk - Create a tiler heap chunk
 *
- * This function allocates a chunk of memory for a tiler heap and adds it to
- * the end of the list of chunks associated with that heap. The size of the
- * chunk is not a parameter because it is configured per-heap not per-chunk.
- *
 * @heap: Pointer to the tiler heap for which to allocate memory.
 * @link_with_prev: Flag to indicate if the chunk to be allocated needs to be
 *                  linked with the previously allocated chunk.
 *
+ * This function allocates a chunk of memory for a tiler heap and adds it to
+ * the end of the list of chunks associated with that heap. The size of the
+ * chunk is not a parameter because it is configured per-heap not per-chunk.
+ *
 * Return: 0 if successful or a negative error code on failure.
 */
 static int create_chunk(struct kbase_csf_tiler_heap *const heap,
@@ -237,15 +237,15 @@ static int create_chunk(struct kbase_csf_tiler_heap *const heap,
 /**
 * delete_chunk - Delete a tiler heap chunk
 *
+ * @heap:  Pointer to the tiler heap for which @chunk was allocated.
+ * @chunk: Pointer to a chunk to be deleted.
+ *
 * This function frees a tiler heap chunk previously allocated by @create_chunk
 * and removes it from the list of chunks associated with the heap.
 *
 * WARNING: The deleted chunk is not unlinked from the list of chunks used by
 *          the GPU, therefore it is only safe to use this function when
 *          deleting a heap.
- *
- * @heap:  Pointer to the tiler heap for which @chunk was allocated.
- * @chunk: Pointer to a chunk to be deleted.
 */
 static void delete_chunk(struct kbase_csf_tiler_heap *const heap,
 	struct kbase_csf_tiler_heap_chunk *const chunk)
@@ -264,10 +264,10 @@ static void delete_chunk(struct kbase_csf_tiler_heap *const heap,
 /**
 * delete_all_chunks - Delete all chunks belonging to a tiler heap
 *
+ * @heap: Pointer to a tiler heap.
+ *
 * This function empties the list of chunks associated with a tiler heap by
 * freeing all chunks previously allocated by @create_chunk.
- *
- * @heap: Pointer to a tiler heap.
 */
 static void delete_all_chunks(struct kbase_csf_tiler_heap *heap)
 {
@@ -284,12 +284,12 @@ static void delete_all_chunks(struct kbase_csf_tiler_heap *heap)
 /**
 * create_initial_chunks - Create the initial list of chunks for a tiler heap
 *
- * This function allocates a given number of chunks for a tiler heap and
- * adds them to the list of chunks associated with that heap.
- *
 * @heap:    Pointer to the tiler heap for which to allocate memory.
 * @nchunks: Number of chunks to create.
 *
+ * This function allocates a given number of chunks for a tiler heap and
+ * adds them to the list of chunks associated with that heap.
+ *
 * Return: 0 if successful or a negative error code on failure.
 */
 static int create_initial_chunks(struct kbase_csf_tiler_heap *const heap,
@@ -310,12 +310,12 @@ static int create_initial_chunks(struct kbase_csf_tiler_heap *const heap,
 /**
 * delete_heap - Delete a tiler heap
 *
+ * @heap: Pointer to a tiler heap to be deleted.
+ *
 * This function frees any chunks allocated for a tiler heap previously
 * initialized by @kbase_csf_tiler_heap_init and removes it from the list of
 * heaps associated with the kbase context. The heap context structure used by
 * the firmware is also freed.
- *
- * @heap: Pointer to a tiler heap to be deleted.
 */
 static void delete_heap(struct kbase_csf_tiler_heap *heap)
 {
@@ -346,15 +346,15 @@ static void delete_heap(struct kbase_csf_tiler_heap *heap)
 /**
 * find_tiler_heap - Find a tiler heap from the address of its heap context
 *
+ * @kctx:        Pointer to the kbase context to search for a tiler heap.
+ * @heap_gpu_va: GPU virtual address of a heap context structure.
+ *
 * Each tiler heap managed by the kernel has an associated heap context
 * structure used by the firmware. This function finds a tiler heap object from
 * the GPU virtual address of its associated heap context. The heap context
 * should have been allocated by @kbase_csf_heap_context_allocator_alloc in the
 * same @kctx.
 *
- * @kctx:        Pointer to the kbase context to search for a tiler heap.
- * @heap_gpu_va: GPU virtual address of a heap context structure.
- *
 * Return: pointer to the tiler heap object, or NULL if not found.
 */
 static struct kbase_csf_tiler_heap *find_tiler_heap(
@@ -495,8 +495,11 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx,
 		dev_dbg(kctx->kbdev->dev, "Created tiler heap 0x%llX\n",
 			heap->gpu_va);
 		mutex_unlock(&kctx->csf.tiler_heaps.lock);
+		kctx->running_total_tiler_heap_nr_chunks += heap->chunk_count;
+		kctx->running_total_tiler_heap_memory += heap->chunk_size * heap->chunk_count;
+		if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory)
+			kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory;
 	}
-
 	return err;
 }

@@ -505,27 +508,36 @@ int kbase_csf_tiler_heap_term(struct kbase_context *const kctx,
 {
 	int err = 0;
 	struct kbase_csf_tiler_heap *heap = NULL;
+	u32 chunk_count = 0;
+	u64 heap_size = 0;

 	mutex_lock(&kctx->csf.tiler_heaps.lock);

 	heap = find_tiler_heap(kctx, heap_gpu_va);
-	if (likely(heap))
+	if (likely(heap)) {
+		chunk_count = heap->chunk_count;
+		heap_size = heap->chunk_size * chunk_count;
 		delete_heap(heap);
-	else
+	} else
 		err = -EINVAL;

 	mutex_unlock(&kctx->csf.tiler_heaps.lock);
-
+	if (likely(kctx->running_total_tiler_heap_memory >= heap_size))
+		kctx->running_total_tiler_heap_memory -= heap_size;
+	else
+		dev_warn(kctx->kbdev->dev,
+			 "Running total tiler heap memory lower than expected!");
+	if (likely(kctx->running_total_tiler_heap_nr_chunks >= chunk_count))
+		kctx->running_total_tiler_heap_nr_chunks -= chunk_count;
+	else
+		dev_warn(kctx->kbdev->dev,
+			 "Running total tiler chunk count lower than expected!");
 	return err;
 }

 /**
 * alloc_new_chunk - Allocate a new chunk for the tiler heap.
 *
- * This function will allocate a new chunk for the chunked tiler heap depending
- * on the settings provided by userspace when the heap was created and the
- * heap's statistics (like number of render passes in-flight).
- *
 * @heap:               Pointer to the tiler heap.
 * @nr_in_flight:       Number of render passes that are in-flight, must not be zero.
 * @pending_frag_count: Number of render passes in-flight with completed vertex/tiler stage.
@@ -534,6 +546,10 @@ int kbase_csf_tiler_heap_term(struct kbase_context *const kctx,
 * @new_chunk_ptr:      Where to store the GPU virtual address & size of the new
 *                      chunk allocated for the heap.
 *
+ * This function will allocate a new chunk for the chunked tiler heap depending
+ * on the settings provided by userspace when the heap was created and the
+ * heap's statistics (like number of render passes in-flight).
+ *
 * Return: 0 if a new chunk was allocated otherwise an appropriate negative
 *         error code.
 */
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.h
@@ -38,10 +38,10 @@ int kbase_csf_tiler_heap_context_init(struct kbase_context *kctx);
 * kbase_csf_tiler_heap_context_term - Terminate the tiler heaps context for a
 *                                     GPU address space
 *
+ * @kctx: Pointer to the kbase context being terminated.
+ *
 * This function deletes any chunked tiler heaps that weren't deleted before
 * context termination.
- *
- * @kctx: Pointer to the kbase context being terminated.
 */
 void kbase_csf_tiler_heap_context_term(struct kbase_context *kctx);

@@ -74,15 +74,15 @@ int kbase_csf_tiler_heap_init(struct kbase_context *kctx,
 /**
 * kbasep_cs_tiler_heap_term - Terminate a chunked tiler memory heap.
 *
+ * @kctx: Pointer to the kbase context in which the tiler heap was initialized.
+ * @gpu_heap_va: The GPU virtual address of the context that was set up for the
+ *               tiler heap.
+ *
 * This function will terminate a chunked tiler heap and cause all the chunks
 * (initial and those added during out-of-memory processing) to be freed.
 * It is the caller's responsibility to ensure no further operations on this
 * heap will happen before calling this function.
 *
- * @kctx: Pointer to the kbase context in which the tiler heap was initialized.
- * @gpu_heap_va: The GPU virtual address of the context that was set up for the
- *               tiler heap.
- *
 * Return: 0 if successful or a negative error code on failure.
 */
 int kbase_csf_tiler_heap_term(struct kbase_context *kctx, u64 gpu_heap_va);
@@ -90,12 +90,6 @@ int kbase_csf_tiler_heap_term(struct kbase_context *kctx, u64 gpu_heap_va);
 /**
 * kbase_csf_tiler_heap_alloc_new_chunk - Allocate a new chunk for tiler heap.
 *
- * This function will allocate a new chunk for the chunked tiler heap depending
- * on the settings provided by userspace when the heap was created and the
- * heap's statistics (like number of render passes in-flight).
- * It would return an appropriate error code if a new chunk couldn't be
- * allocated.
- *
 * @kctx:               Pointer to the kbase context in which the tiler heap was initialized.
 * @gpu_heap_va:        GPU virtual address of the heap context.
 * @nr_in_flight:       Number of render passes that are in-flight, must not be zero.
@@ -105,6 +99,12 @@ int kbase_csf_tiler_heap_term(struct kbase_context *kctx, u64 gpu_heap_va);
 * @new_chunk_ptr:      Where to store the GPU virtual address & size of the new
 *                      chunk allocated for the heap.
 *
+ * This function will allocate a new chunk for the chunked tiler heap depending
+ * on the settings provided by userspace when the heap was created and the
+ * heap's statistics (like number of render passes in-flight).
+ * It would return an appropriate error code if a new chunk couldn't be
+ * allocated.
+ *
 * Return: 0 if a new chunk was allocated otherwise an appropriate negative
 *         error code (like -EBUSY when a free chunk is expected to be
 *         available upon completion of a render pass and -EINVAL when
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_debugfs.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_debugfs.c
@@ -32,7 +32,7 @@
 * @file: The seq_file for printing to
 * @data: The debugfs dentry private data, a pointer to kbase_context
 *
- * Return: Negative error code or 0 on success.
+ * Return: 0 in any case.
 */
 static int kbasep_csf_tiler_heap_debugfs_show(struct seq_file *file, void *data)
 {
@@ -65,11 +65,41 @@ static int kbasep_csf_tiler_heap_debugfs_show(struct seq_file *file, void *data)
 	return 0;
 }

+/**
+ * kbasep_csf_tiler_heap_total_debugfs_show() - Print the total memory allocated
+ *                                              for all tiler heaps in a context.
+ *
+ * @file: The seq_file for printing to
+ * @data: The debugfs dentry private data, a pointer to kbase_context
+ *
+ * Return: 0 in any case.
+ */
+static int kbasep_csf_tiler_heap_total_debugfs_show(struct seq_file *file, void *data)
+{
+	struct kbase_context *kctx = file->private;
+
+	seq_printf(file, "MALI_CSF_TILER_HEAP_DEBUGFS_VERSION: v%u\n",
+		   MALI_CSF_TILER_HEAP_DEBUGFS_VERSION);
+	seq_printf(file, "Total number of chunks of all heaps in the context: %lu\n",
+		   (unsigned long)kctx->running_total_tiler_heap_nr_chunks);
+	seq_printf(file, "Total allocated memory of all heaps in the context: %llu\n",
+		   (unsigned long long)kctx->running_total_tiler_heap_memory);
+	seq_printf(file, "Peak allocated tiler heap memory in the context: %llu\n",
+		   (unsigned long long)kctx->peak_total_tiler_heap_memory);
+
+	return 0;
+}
+
 static int kbasep_csf_tiler_heap_debugfs_open(struct inode *in, struct file *file)
 {
 	return single_open(file, kbasep_csf_tiler_heap_debugfs_show, in->i_private);
 }

+static int kbasep_csf_tiler_heap_total_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_csf_tiler_heap_total_debugfs_show, in->i_private);
+}
+
 static const struct file_operations kbasep_csf_tiler_heap_debugfs_fops = {
 	.open = kbasep_csf_tiler_heap_debugfs_open,
 	.read = seq_read,
@@ -77,6 +107,13 @@ static const struct file_operations kbasep_csf_tiler_heap_debugfs_fops = {
 	.release = single_release,
 };

+static const struct file_operations kbasep_csf_tiler_heap_total_debugfs_fops = {
+	.open = kbasep_csf_tiler_heap_total_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
 void kbase_csf_tiler_heap_debugfs_init(struct kbase_context *kctx)
 {
 	struct dentry *file;
@@ -93,6 +130,21 @@ void kbase_csf_tiler_heap_debugfs_init(struct kbase_context *kctx)
 	}
 }

+void kbase_csf_tiler_heap_total_debugfs_init(struct kbase_context *kctx)
+{
+	struct dentry *file;
+
+	if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry)))
+		return;
+
+	file = debugfs_create_file("tiler_heaps_total", 0444, kctx->kctx_dentry,
+				   kctx, &kbasep_csf_tiler_heap_total_debugfs_fops);
+
+	if (IS_ERR_OR_NULL(file)) {
+		dev_warn(kctx->kbdev->dev,
+			"Unable to create total tiler heap allocated memory debugfs entry");
+	}
+}

 #else
 /*
@@ -102,5 +154,9 @@ void kbase_csf_tiler_heap_debugfs_init(struct kbase_context *kctx)
 {
 }

+void kbase_csf_tiler_heap_total_debugfs_init(struct kbase_context *kctx)
+{
+}
+
 #endif /* CONFIG_DEBUG_FS */

--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_debugfs.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_debugfs.h
@@ -34,4 +34,11 @@ struct kbase_context;
 */
 void kbase_csf_tiler_heap_debugfs_init(struct kbase_context *kctx);

+/**
+ * kbase_csf_tiler_heap_total_debugfs_init() - Create a debugfs entry for per context tiler heap
+ *
+ * @kctx: The kbase_context for which to create the debugfs entry
+ */
+void kbase_csf_tiler_heap_total_debugfs_init(struct kbase_context *kctx);
+
 #endif /* _KBASE_CSF_TILER_HEAP_DEBUGFS_H_ */
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c
@@ -171,8 +171,8 @@ static int kbase_ts_converter_init(
 *
 * Return: The CPU timestamp.
 */
-void kbase_ts_converter_convert(const struct kbase_ts_converter *self,
-				u64 *gpu_ts)
+static void __maybe_unused
+kbase_ts_converter_convert(const struct kbase_ts_converter *self, u64 *gpu_ts)
 {
 	u64 old_gpu_ts = *gpu_ts;
 	*gpu_ts = div64_u64(old_gpu_ts * self->multiplier, self->divisor) +
@@ -477,7 +477,14 @@ int kbase_csf_tl_reader_start(struct kbase_csf_tl_reader *self,
 		return 0;

 	if (tl_reader_init_late(self, kbdev)) {
+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+		dev_warn(
+			kbdev->dev,
+			"CSFFW timeline is not available for MALI_BIFROST_NO_MALI builds!");
+		return 0;
+#else
 		return -EINVAL;
+#endif
 	}

 	tl_reader_reset(self);
@@ -521,14 +528,5 @@ void kbase_csf_tl_reader_stop(struct kbase_csf_tl_reader *self)

 void kbase_csf_tl_reader_reset(struct kbase_csf_tl_reader *self)
 {
-	u64 gpu_cycle = 0;
-	struct kbase_device *kbdev = self->kbdev;
-
-	if (!kbdev)
-		return;
-
 	kbase_csf_tl_reader_flush_buffer(self);
-
-	get_cpu_gpu_time(kbdev, NULL, NULL, &gpu_cycle);
-	KBASE_TLSTREAM_TL_KBASE_CSFFW_RESET(kbdev, gpu_cycle);
 }
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.h
@@ -40,8 +40,7 @@ struct kbase_tlstream;
 struct kbase_device;

 /**
- * struct kbase_ts_converter -
- * System timestamp to CPU timestamp converter state.
+ * struct kbase_ts_converter - System timestamp to CPU timestamp converter state.
 *
 * @multiplier:		Numerator of the converter's fraction.
 * @divisor:		Denominator of the converter's fraction.
@@ -145,8 +144,7 @@ void kbase_csf_tl_reader_term(struct kbase_csf_tl_reader *self);
 int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self);

 /**
- * kbase_csf_tl_reader_start() -
- *	Start asynchronous copying of CSFFW timeline stream.
+ * kbase_csf_tl_reader_start() - Start asynchronous copying of CSFFW timeline stream.
 *
 * @self:	CSFFW TL Reader instance.
 * @kbdev:	Kbase device.
@@ -157,8 +155,7 @@ int kbase_csf_tl_reader_start(struct kbase_csf_tl_reader *self,
 	struct kbase_device *kbdev);

 /**
- * kbase_csf_tl_reader_stop() -
- *	Stop asynchronous copying of CSFFW timeline stream.
+ * kbase_csf_tl_reader_stop() - Stop asynchronous copying of CSFFW timeline stream.
 *
 * @self:	CSFFW TL Reader instance.
 */
@@ -166,8 +163,7 @@ void kbase_csf_tl_reader_stop(struct kbase_csf_tl_reader *self);

 #if IS_ENABLED(CONFIG_DEBUG_FS)
 /**
- * kbase_csf_tl_reader_debugfs_init() -
- *	Initialize debugfs for CSFFW Timelime Stream Reader.
+ * kbase_csf_tl_reader_debugfs_init() - Initialize debugfs for CSFFW Timelime Stream Reader.
 *
 * @kbdev:	Kbase device.
 */
@@ -175,8 +171,7 @@ void kbase_csf_tl_reader_debugfs_init(struct kbase_device *kbdev);
 #endif

 /**
- * kbase_csf_tl_reader_reset() -
- *	Reset CSFFW timeline reader, it should be called before reset CSFFW.
+ * kbase_csf_tl_reader_reset() - Reset CSFFW timeline reader, it should be called before reset CSFFW.
 *
 * @self:	CSFFW TL Reader instance.
 */
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c
@@ -38,12 +38,6 @@
 /**
 * struct firmware_trace_buffer - Trace Buffer within the MCU firmware
 *
- * The firmware relays information to the host by writing on memory buffers
- * which are allocated and partially configured by the host. These buffers
- * are called Trace Buffers: each of them has a specific purpose and is
- * identified by a name and a set of memory addresses where the host can
- * set pointers to host-allocated structures.
- *
 * @kbdev:        Pointer to the Kbase device.
 * @node:         List head linking all trace buffers to
 *                kbase_device:csf.firmware_trace_buffers
@@ -73,6 +67,12 @@
 * @num_pages: Size of the data buffer, in pages.
 * @trace_enable_init_mask: Initial value for the trace enable bit mask.
 * @name:  NULL terminated string which contains the name of the trace buffer.
+ *
+ * The firmware relays information to the host by writing on memory buffers
+ * which are allocated and partially configured by the host. These buffers
+ * are called Trace Buffers: each of them has a specific purpose and is
+ * identified by a name and a set of memory addresses where the host can
+ * set pointers to host-allocated structures.
 */
 struct firmware_trace_buffer {
 	struct kbase_device *kbdev;
@@ -100,14 +100,14 @@ struct firmware_trace_buffer {
 /**
 * struct firmware_trace_buffer_data - Configuration data for trace buffers
 *
- * Describe how to set up a trace buffer interface.
- * Trace buffers are identified by name and they require a data buffer and
- * an initial mask of values for the trace enable bits.
- *
 * @name: Name identifier of the trace buffer
 * @trace_enable_init_mask: Initial value to assign to the trace enable bits
 * @size: Size of the data buffer to allocate for the trace buffer, in pages.
 *        The size of a data buffer must always be a power of 2.
+ *
+ * Describe how to set up a trace buffer interface.
+ * Trace buffers are identified by name and they require a data buffer and
+ * an initial mask of values for the trace enable bits.
 */
 struct firmware_trace_buffer_data {
 	char name[64];
@@ -121,14 +121,13 @@ struct firmware_trace_buffer_data {
 * This table contains the configuration data for the trace buffers that are
 * expected to be parsed from the firmware.
 */
-static const struct firmware_trace_buffer_data
-trace_buffer_data[] = {
-#ifndef MALI_KBASE_BUILD
-	{ "fwutf", {0}, 1 },
+static const struct firmware_trace_buffer_data trace_buffer_data[] = {
+#if MALI_UNIT_TEST
+	{ "fwutf", { 0 }, 1 },
 #endif
-	{ FW_TRACE_BUF_NAME, {0}, 4 },
-	{ "benchmark", {0}, 2 },
-	{ "timeline",  {0}, KBASE_CSF_TL_BUFFER_NR_PAGES },
+	{ FW_TRACE_BUF_NAME, { 0 }, 4 },
+	{ "benchmark", { 0 }, 2 },
+	{ "timeline", { 0 }, KBASE_CSF_TL_BUFFER_NR_PAGES },
 };

 int kbase_csf_firmware_trace_buffers_init(struct kbase_device *kbdev)
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.h
@@ -34,6 +34,8 @@ struct kbase_device;
 /**
 * kbase_csf_firmware_trace_buffers_init - Initialize trace buffers
 *
+ * @kbdev: Device pointer
+ *
 * Allocate resources for trace buffers. In particular:
 * - One memory page of GPU-readable, CPU-writable memory is used for
 *   the Extract variables of all trace buffers.
@@ -52,8 +54,6 @@ struct kbase_device;
 * populated with data from the firmware image parsing.
 *
 * Return: 0 if success, or an error code on failure.
- *
- * @kbdev: Device pointer
 */
 int kbase_csf_firmware_trace_buffers_init(struct kbase_device *kbdev);

@@ -67,6 +67,11 @@ void kbase_csf_firmware_trace_buffers_term(struct kbase_device *kbdev);
 /**
 * kbase_csf_firmware_parse_trace_buffer_entry - Process a "trace buffer" section
 *
+ * @kbdev:     Kbase device structure
+ * @entry:     Pointer to the section
+ * @size:      Size (in bytes) of the section
+ * @updatable: Indicates whether config items can be updated with FIRMWARE_CONFIG_UPDATE
+ *
 * Read a "trace buffer" section adding metadata for the related trace buffer
 * to the kbase_device:csf.firmware_trace_buffers list.
 *
@@ -74,11 +79,6 @@ void kbase_csf_firmware_trace_buffers_term(struct kbase_device *kbdev);
 * will not be initialized.
 *
 * Return: 0 if successful, negative error code on failure.
- *
- * @kbdev:     Kbase device structure
- * @entry:     Pointer to the section
- * @size:      Size (in bytes) of the section
- * @updatable: Indicates whether config items can be updated with FIRMWARE_CONFIG_UPDATE
 */
 int kbase_csf_firmware_parse_trace_buffer_entry(struct kbase_device *kbdev,
 						const u32 *entry,
@@ -86,8 +86,9 @@ int kbase_csf_firmware_parse_trace_buffer_entry(struct kbase_device *kbdev,
 						bool updatable);

 /**
- * kbase_csf_firmware_reload_trace_buffers_data -
- * Reload trace buffers data for firmware reboot
+ * kbase_csf_firmware_reload_trace_buffers_data - Reload trace buffers data for firmware reboot
+ *
+ * @kbdev: Device pointer
 *
 * Helper function used when rebooting the firmware to reload the initial setup
 * for all the trace buffers which have been previously parsed and initialized.
@@ -99,44 +100,40 @@ int kbase_csf_firmware_parse_trace_buffer_entry(struct kbase_device *kbdev,
 *
 * In other words, the re-initialization done by this function will be
 * equivalent but not necessarily identical to the original initialization.
- *
- * @kbdev: Device pointer
 */
 void kbase_csf_firmware_reload_trace_buffers_data(struct kbase_device *kbdev);

 /**
 * kbase_csf_firmware_get_trace_buffer - Get a trace buffer
 *
- * Return: handle to a trace buffer, given the name, or NULL if a trace buffer
- *         with that name couldn't be found.
- *
 * @kbdev: Device pointer
 * @name:  Name of the trace buffer to find
+ *
+ * Return: handle to a trace buffer, given the name, or NULL if a trace buffer
+ *         with that name couldn't be found.
 */
 struct firmware_trace_buffer *kbase_csf_firmware_get_trace_buffer(
 	struct kbase_device *kbdev, const char *name);

 /**
- * kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count -
- * Get number of trace enable bits for a trace buffer
- *
- * Return: Number of trace enable bits in a trace buffer.
+ * kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count - Get number of trace enable bits for a trace buffer
 *
 * @trace_buffer: Trace buffer handle
+ *
+ * Return: Number of trace enable bits in a trace buffer.
 */
 unsigned int kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count(
 	const struct firmware_trace_buffer *trace_buffer);

 /**
- * kbase_csf_firmware_trace_buffer_update_trace_enable_bit -
- * Update a trace enable bit
- *
- * Update the value of a given trace enable bit.
+ * kbase_csf_firmware_trace_buffer_update_trace_enable_bit - Update a trace enable bit
 *
 * @trace_buffer: Trace buffer handle
 * @bit:          Bit to update
 * @value:        New value for the given bit
 *
+ * Update the value of a given trace enable bit.
+ *
 * Return: 0 if successful, negative error code on failure.
 */
 int kbase_csf_firmware_trace_buffer_update_trace_enable_bit(
@@ -146,9 +143,9 @@ int kbase_csf_firmware_trace_buffer_update_trace_enable_bit(
 /**
 * kbase_csf_firmware_trace_buffer_is_empty - Empty trace buffer predicate
 *
- * Return: True if the trace buffer is empty, or false otherwise.
- *
 * @trace_buffer: Trace buffer handle
+ *
+ * Return: True if the trace buffer is empty, or false otherwise.
 */
 bool kbase_csf_firmware_trace_buffer_is_empty(
 	const struct firmware_trace_buffer *trace_buffer);
@@ -156,14 +153,14 @@ bool kbase_csf_firmware_trace_buffer_is_empty(
 /**
 * kbase_csf_firmware_trace_buffer_read_data - Read data from a trace buffer
 *
+ * @trace_buffer: Trace buffer handle
+ * @data:         Pointer to a client-allocated where data shall be written.
+ * @num_bytes:    Maximum number of bytes to read from the trace buffer.
+ *
 * Read available data from a trace buffer. The client provides a data buffer
 * of a given size and the maximum number of bytes to read.
 *
 * Return: Number of bytes read from the trace buffer.
- *
- * @trace_buffer: Trace buffer handle
- * @data:         Pointer to a client-allocated where data shall be written.
- * @num_bytes:    Maximum number of bytes to read from the trace buffer.
 */
 unsigned int kbase_csf_firmware_trace_buffer_read_data(
 	struct firmware_trace_buffer *trace_buffer, u8 *data, unsigned int num_bytes);
--- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_csf.h
+++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_csf.h
@@ -97,6 +97,13 @@ int dummy_array[] = {
 	/* info_val = bitmask of slots that gave an ACK for STATUS_UPDATE */
 	KBASE_KTRACE_CODE_MAKE_CODE(SLOTS_STATUS_UPDATE_ACK),

+	/* info_val[63:0] = GPU cycle counter, used mainly for benchmarking
+	 * purpose.
+	 */
+	KBASE_KTRACE_CODE_MAKE_CODE(GPU_IDLE_HANDLING_START),
+	KBASE_KTRACE_CODE_MAKE_CODE(MCU_HALTED),
+	KBASE_KTRACE_CODE_MAKE_CODE(MCU_IN_SLEEP),
+
 	/*
 	 * Group events
 	 */
--- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_csf.h
+++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_csf.h
@@ -58,6 +58,9 @@ DEFINE_MALI_ADD_EVENT(IDLE_WORKER_END);
 DEFINE_MALI_ADD_EVENT(GROUP_SYNC_UPDATE_WORKER_BEGIN);
 DEFINE_MALI_ADD_EVENT(GROUP_SYNC_UPDATE_WORKER_END);
 DEFINE_MALI_ADD_EVENT(SLOTS_STATUS_UPDATE_ACK);
+DEFINE_MALI_ADD_EVENT(GPU_IDLE_HANDLING_START);
+DEFINE_MALI_ADD_EVENT(MCU_HALTED);
+DEFINE_MALI_ADD_EVENT(MCU_IN_SLEEP);

 DECLARE_EVENT_CLASS(mali_csf_grp_q_template,
 	TP_PROTO(struct kbase_device *kbdev, struct kbase_queue_group *group,
--- a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.h
+++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.h
@@ -49,6 +49,7 @@
 /**
 * kbase_ktrace_init - initialize kbase ktrace.
 * @kbdev: kbase device
+ * Return: 0 if successful or a negative error code on failure.
 */
 int kbase_ktrace_init(struct kbase_device *kbdev);

--- a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_internal.h
+++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_internal.h
@@ -63,6 +63,8 @@ void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg,
 * @ktrace: kbase device's ktrace
 *
 * This may also empty the oldest entry in the ringbuffer to make space.
+ *
+ * Return: ktrace message
 */
 struct kbase_ktrace_msg *kbasep_ktrace_reserve(struct kbase_ktrace *ktrace);

--- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c
+++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c
@@ -24,11 +24,15 @@

 #include <mali_kbase_hwaccess_backend.h>
 #include <mali_kbase_hwcnt_backend_csf_if_fw.h>
+#include <mali_kbase_hwcnt_watchdog_if_timer.h>
 #include <mali_kbase_ctx_sched.h>
 #include <mali_kbase_reset_gpu.h>
 #include <csf/mali_kbase_csf.h>
 #include <csf/ipa_control/mali_kbase_csf_ipa_control.h>

+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+#include <backend/gpu/mali_kbase_model_linux.h>
+#endif

 #include <mali_kbase.h>
 #include <backend/gpu/mali_kbase_irq_internal.h>
@@ -195,10 +199,32 @@ static void kbase_csf_early_term(struct kbase_device *kbdev)
 	kbase_csf_scheduler_early_term(kbdev);
 }

+/**
+ * kbase_device_hwcnt_watchdog_if_init - Create hardware counter watchdog
+ *                                       interface.
+ * @kbdev:	Device pointer
+ */
+static int kbase_device_hwcnt_watchdog_if_init(struct kbase_device *kbdev)
+{
+	return kbase_hwcnt_watchdog_if_timer_create(
+		&kbdev->hwcnt_watchdog_timer);
+}
+
+/**
+ * kbase_device_hwcnt_watchdog_if_term - Terminate hardware counter watchdog
+ *                                       interface.
+ * @kbdev:	Device pointer
+ */
+static void kbase_device_hwcnt_watchdog_if_term(struct kbase_device *kbdev)
+{
+	kbase_hwcnt_watchdog_if_timer_destroy(&kbdev->hwcnt_watchdog_timer);
+}
+
 /**
 * kbase_device_hwcnt_backend_csf_if_init - Create hardware counter backend
 *                                          firmware interface.
 * @kbdev:	Device pointer
+ * Return: 0 if successful or a negative error code on failure.
 */
 static int kbase_device_hwcnt_backend_csf_if_init(struct kbase_device *kbdev)
 {
@@ -226,7 +252,7 @@ static int kbase_device_hwcnt_backend_csf_init(struct kbase_device *kbdev)
 	return kbase_hwcnt_backend_csf_create(
 		&kbdev->hwcnt_backend_csf_if_fw,
 		KBASE_HWCNT_BACKEND_CSF_RING_BUFFER_COUNT,
-		&kbdev->hwcnt_gpu_iface);
+		&kbdev->hwcnt_watchdog_timer, &kbdev->hwcnt_gpu_iface);
 }

 /**
@@ -239,8 +265,13 @@ static void kbase_device_hwcnt_backend_csf_term(struct kbase_device *kbdev)
 }

 static const struct kbase_device_init dev_init[] = {
+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+	{ kbase_gpu_device_create, kbase_gpu_device_destroy,
+	  "Dummy model initialization failed" },
+#else
 	{ assign_irqs, NULL, "IRQ search failed" },
 	{ registers_map, registers_unmap, "Register map failed" },
+#endif
 	{ power_control_init, power_control_term,
 	  "Power control initialization failed" },
 	{ kbase_device_io_history_init, kbase_device_io_history_term,
@@ -270,6 +301,9 @@ static const struct kbase_device_init dev_init[] = {
 	  "Clock rate trace manager initialization failed" },
 	{ kbase_lowest_gpu_freq_init, NULL,
 	  "Lowest freq initialization failed" },
+	{ kbase_device_hwcnt_watchdog_if_init,
+	  kbase_device_hwcnt_watchdog_if_term,
+	  "GPU hwcnt backend watchdog interface creation failed" },
 	{ kbase_device_hwcnt_backend_csf_if_init,
 	  kbase_device_hwcnt_backend_csf_if_term,
 	  "GPU hwcnt backend CSF interface creation failed" },
@@ -283,7 +317,6 @@ static const struct kbase_device_init dev_init[] = {
 	{ kbase_csf_early_init, kbase_csf_early_term,
 	  "Early CSF initialization failed" },
 	{ NULL, kbase_device_firmware_hwcnt_term, NULL },
-#ifdef MALI_KBASE_BUILD
 	{ kbase_device_debugfs_init, kbase_device_debugfs_term,
 	  "DebugFS initialization failed" },
 	/* Sysfs init needs to happen before registering the device with
@@ -305,7 +338,6 @@ static const struct kbase_device_init dev_init[] = {
 	  "GPU property population failed" },
 	{ kbase_device_late_init, kbase_device_late_term,
 	  "Late device initialization failed" },
-#endif
 };

 static void kbase_device_term_partial(struct kbase_device *kbdev,
@@ -476,3 +508,4 @@ out:

 	return ret;
 }
+KBASE_EXPORT_TEST_API(kbase_device_firmware_init_once);
--- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c
+++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c
@@ -28,6 +28,9 @@
 #include <mali_kbase_ctx_sched.h>
 #include <mali_kbase_reset_gpu.h>

+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+#include <backend/gpu/mali_kbase_model_linux.h>
+#endif /* CONFIG_MALI_BIFROST_NO_MALI */

 #ifdef CONFIG_MALI_ARBITER_SUPPORT
 #include <arbiter/mali_kbase_arbiter_pm.h>
@@ -156,8 +159,13 @@ static void kbase_device_hwcnt_backend_jm_term(struct kbase_device *kbdev)
 }

 static const struct kbase_device_init dev_init[] = {
+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+	{ kbase_gpu_device_create, kbase_gpu_device_destroy,
+	  "Dummy model initialization failed" },
+#else
 	{ assign_irqs, NULL, "IRQ search failed" },
 	{ registers_map, registers_unmap, "Register map failed" },
+#endif
 	{ kbase_device_io_history_init, kbase_device_io_history_term,
 	  "Register access history initialization failed" },
 	{ kbase_device_pm_init, kbase_device_pm_term,
@@ -203,7 +211,6 @@ static const struct kbase_device_init dev_init[] = {
 	  "Performance counter instrumentation initialization failed" },
 	{ kbase_backend_late_init, kbase_backend_late_term,
 	  "Late backend initialization failed" },
-#ifdef MALI_KBASE_BUILD
 	{ kbase_debug_job_fault_dev_init, kbase_debug_job_fault_dev_term,
 	  "Job fault debug initialization failed" },
 	{ kbase_device_debugfs_init, kbase_device_debugfs_term,
@@ -225,7 +232,6 @@ static const struct kbase_device_init dev_init[] = {
 	  "Misc device registration failed" },
 	{ kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer,
 	  "GPU property population failed" },
-#endif
 	{ NULL, kbase_dummy_job_wa_cleanup, NULL },
 	{ kbase_device_late_init, kbase_device_late_term,
 	  "Late device initialization failed" },
--- a/drivers/gpu/arm/bifrost/device/mali_kbase_device.c
+++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device.c
@@ -275,6 +275,7 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
 	if (err)
 		goto dma_set_mask_failed;

+
 	/* There is no limit for Mali, so set to max. We only do this if dma_parms
 	 * is already allocated by the platform.
 	 */
@@ -345,6 +346,7 @@ void kbase_device_misc_term(struct kbase_device *kbdev)

 	kbase_device_all_as_term(kbdev);

+
 	if (kbdev->oom_notifier_block.notifier_call)
 		unregister_oom_notifier(&kbdev->oom_notifier_block);
 }
--- a/drivers/gpu/arm/bifrost/device/mali_kbase_device.h
+++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device.h
@@ -117,23 +117,43 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset);
 */
 bool kbase_is_gpu_removed(struct kbase_device *kbdev);

+/**
+ * kbase_gpu_cache_flush_and_busy_wait - Start a cache flush and busy wait
+ * @kbdev: Kbase device
+ * @flush_op: Flush command register value to be sent to HW
+ *
+ * Issue a cache flush command to hardware, then busy wait an irq status.
+ * This function will clear CLEAN_CACHES_COMPLETED irq mask bit set by other
+ * threads through kbase_gpu_start_cache_clean(), and wake them up manually
+ * after the busy-wait is done. Any pended cache flush commands raised by
+ * other thread are handled in this function.
+ * hwaccess_lock must be held by the caller.
+ *
+ * Return: 0 if successful or a negative error code on failure.
+ */
+int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev,
+					u32 flush_op);
+
 /**
 * kbase_gpu_start_cache_clean - Start a cache clean
 * @kbdev: Kbase device
+ * @flush_op: Flush command register value to be sent to HW
 *
- * Issue a cache clean and invalidate command to hardware. This function will
- * take hwaccess_lock.
+ * Issue a given cache flush command to hardware.
+ * This function will take hwaccess_lock.
 */
-void kbase_gpu_start_cache_clean(struct kbase_device *kbdev);
+void kbase_gpu_start_cache_clean(struct kbase_device *kbdev, u32 flush_op);

 /**
 * kbase_gpu_start_cache_clean_nolock - Start a cache clean
 * @kbdev: Kbase device
+ * @flush_op: Flush command register value to be sent to HW
 *
- * Issue a cache clean and invalidate command to hardware. hwaccess_lock
- * must be held by the caller.
+ * Issue a given cache flush command to hardware.
+ * hwaccess_lock must be held by the caller.
 */
-void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev);
+void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev,
+					u32 flush_op);

 /**
 * kbase_gpu_wait_cache_clean - Wait for cache cleaning to finish
--- a/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c
+++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c
@@ -38,7 +38,98 @@ bool kbase_is_gpu_removed(struct kbase_device *kbdev)
 }
 #endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */

-void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev)
+static int busy_wait_cache_clean_irq(struct kbase_device *kbdev)
+{
+	/* Previously MMU-AS command was used for L2 cache flush on page-table update.
+	 * And we're using the same max-loops count for GPU command, because amount of
+	 * L2 cache flush overhead are same between them.
+	 */
+	unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
+
+	/* Wait for the GPU cache clean operation to complete */
+	while (--max_loops &&
+	       !(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) &
+		 CLEAN_CACHES_COMPLETED)) {
+		;
+	}
+
+	/* reset gpu if time-out occurred */
+	if (max_loops == 0) {
+		dev_err(kbdev->dev,
+			"CLEAN_CACHES_COMPLETED bit stuck, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n");
+		if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE))
+			kbase_reset_gpu_locked(kbdev);
+		return -EBUSY;
+	}
+
+	/* Clear the interrupt CLEAN_CACHES_COMPLETED bit. */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR),
+			CLEAN_CACHES_COMPLETED);
+
+	return 0;
+}
+
+int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev,
+					u32 flush_op)
+{
+	u32 irq_mask;
+	int need_to_wake_up = 0;
+	int ret = 0;
+
+	/* hwaccess_lock must be held to avoid any sync issue with
+	 * kbase_gpu_start_cache_clean() / kbase_clean_caches_done()
+	 */
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* 1. Check if CLEAN_CACHES_COMPLETED irq mask bit is set.
+	 *    If it is set, it means there are threads waiting for
+	 *    CLEAN_CACHES_COMPLETED irq to be raised.
+	 *    We'll clear the irq mask bit and busy-wait for the cache
+	 *    clean operation to complete before submitting the cache
+	 *    clean command required after the GPU page table update.
+	 *    Pended flush commands will be merged to requested command.
+	 */
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
+	if (irq_mask & CLEAN_CACHES_COMPLETED) {
+		/* disable irq first */
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask & ~CLEAN_CACHES_COMPLETED);
+
+		/* busy wait irq status to be enabled */
+		ret = busy_wait_cache_clean_irq(kbdev);
+		if (ret)
+			return ret;
+
+		/* merge pended command if there's any */
+		flush_op = GPU_COMMAND_FLUSH_CACHE_MERGE(
+			kbdev->cache_clean_queued, flush_op);
+
+		/* enable wake up notify flag */
+		need_to_wake_up = 1;
+	} else {
+		/* Clear the interrupt CLEAN_CACHES_COMPLETED bit. */
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR),
+				CLEAN_CACHES_COMPLETED);
+	}
+
+	/* 2. Issue GPU_CONTROL.COMMAND.FLUSH_CACHE operation. */
+	KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, flush_op);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), flush_op);
+
+	/* 3. Busy-wait irq status to be enabled. */
+	ret = busy_wait_cache_clean_irq(kbdev);
+	if (ret)
+		return ret;
+
+	/* 4. Wake-up blocked threads when there is any. */
+	if (need_to_wake_up)
+		kbase_gpu_cache_clean_wait_complete(kbdev);
+
+	return ret;
+}
+
+void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev,
+					u32 flush_op)
 {
 	u32 irq_mask;

@@ -47,10 +138,11 @@ void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev)
 	if (kbdev->cache_clean_in_progress) {
 		/* If this is called while another clean is in progress, we
 		 * can't rely on the current one to flush any new changes in
-		 * the cache. Instead, trigger another cache clean immediately
-		 * after this one finishes.
+		 * the cache. Instead, accumulate all cache clean operations
+		 * and trigger that immediately after this one finishes.
 		 */
-		kbdev->cache_clean_queued = true;
+		kbdev->cache_clean_queued = GPU_COMMAND_FLUSH_CACHE_MERGE(
+			kbdev->cache_clean_queued, flush_op);
 		return;
 	}

@@ -59,19 +151,18 @@ void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev)
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
 				irq_mask | CLEAN_CACHES_COMPLETED);

-	KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, 0);
-	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
-			GPU_COMMAND_CACHE_CLN_INV_L2);
+	KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, flush_op);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), flush_op);

 	kbdev->cache_clean_in_progress = true;
 }

-void kbase_gpu_start_cache_clean(struct kbase_device *kbdev)
+void kbase_gpu_start_cache_clean(struct kbase_device *kbdev, u32 flush_op)
 {
 	unsigned long flags;

 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	kbase_gpu_start_cache_clean_nolock(kbdev);
+	kbase_gpu_start_cache_clean_nolock(kbdev, flush_op);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }

@@ -79,7 +170,7 @@ void kbase_gpu_cache_clean_wait_complete(struct kbase_device *kbdev)
 {
 	lockdep_assert_held(&kbdev->hwaccess_lock);

-	kbdev->cache_clean_queued = false;
+	kbdev->cache_clean_queued = 0;
 	kbdev->cache_clean_in_progress = false;
 	wake_up(&kbdev->cache_clean_wait);
 }
@@ -92,11 +183,14 @@ void kbase_clean_caches_done(struct kbase_device *kbdev)
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);

 	if (kbdev->cache_clean_queued) {
-		kbdev->cache_clean_queued = false;
+		u32 pended_flush_op = kbdev->cache_clean_queued;

-		KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, 0);
+		kbdev->cache_clean_queued = 0;
+
+		KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL,
+				 pended_flush_op);
 		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
-				GPU_COMMAND_CACHE_CLN_INV_L2);
+				pended_flush_op);
 	} else {
 		/* Disable interrupt */
 		irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
--- a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c
+++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c
@@ -24,6 +24,9 @@
 #include "mali_kbase_ipa_counter_common_jm.h"
 #include "mali_kbase.h"

+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+#include <backend/gpu/mali_kbase_model_dummy.h>
+#endif /* CONFIG_MALI_BIFROST_NO_MALI */

 /* Performance counter blocks base offsets */
 #define JM_BASE             (0 * KBASE_IPA_NR_BYTES_PER_BLOCK)
@@ -94,9 +97,15 @@ static u32 kbase_g7x_power_model_get_memsys_counter(struct kbase_ipa_model_vinst
 static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_data *model_data,
 						u32 counter_block_offset)
 {
+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+	const u32 sc_base = MEMSYS_BASE +
+		(KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS *
+		 KBASE_IPA_NR_BYTES_PER_BLOCK);
+#else
 	const u32 sc_base = MEMSYS_BASE +
 		(model_data->kbdev->gpu_props.props.l2_props.num_l2_slices *
 		 KBASE_IPA_NR_BYTES_PER_BLOCK);
+#endif
 	return sc_base + counter_block_offset;
 }

--- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c
+++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c
@@ -537,18 +537,34 @@ static void opp_translate_freq_voltage(struct kbase_device *kbdev,
 				       unsigned long *freqs,
 				       unsigned long *volts)
 {
+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+	/* An arbitrary voltage and frequency value can be chosen for testing
+	 * in no mali configuration which may not match with any OPP level.
+	 */
+	freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL] = nominal_freq;
+	volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL] = nominal_voltage;
+
+	freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = nominal_freq;
+	volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = nominal_voltage;
+#else
 	u64 core_mask;
+	unsigned int i;

 	kbase_devfreq_opp_translate(kbdev, nominal_freq, &core_mask,
 				    freqs, volts);
 	CSTD_UNUSED(core_mask);

+	/* Convert micro volts to milli volts */
+	for (i = 0; i < kbdev->nr_clocks; i++)
+		volts[i] /= 1000;
+
 	if (kbdev->nr_clocks == 1) {
 		freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] =
 			freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL];
 		volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] =
 			volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL];
 	}
+#endif
 }

 #if KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE
--- a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h
+++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h
@@ -653,8 +653,8 @@ static inline bool kbase_jd_katom_is_protected(

 /**
 * kbase_atom_is_younger - query if one atom is younger by age than another
- * @katom_a the first atom
- * @katom_a the second atom
+ * @katom_a: the first atom
+ * @katom_a: the second atom
 *
 * Return: true if the first atom is strictly younger than the second, false
 * otherwise.
--- a/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h
+++ b/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h
@@ -37,41 +37,42 @@ enum base_hw_feature {
 	BASE_HW_FEATURE_L2_CONFIG,
 	BASE_HW_FEATURE_ASN_HASH,
 	BASE_HW_FEATURE_GPU_SLEEP,
+	BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
 	BASE_HW_FEATURE_END
 };

-static const enum base_hw_feature base_hw_features_generic[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_generic[] = {
 	BASE_HW_FEATURE_END
 };

-static const enum base_hw_feature base_hw_features_tMIx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tMIx[] = {
 	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
 	BASE_HW_FEATURE_FLUSH_REDUCTION,
 	BASE_HW_FEATURE_END
 };

-static const enum base_hw_feature base_hw_features_tHEx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tHEx[] = {
 	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
 	BASE_HW_FEATURE_FLUSH_REDUCTION,
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
 	BASE_HW_FEATURE_END
 };

-static const enum base_hw_feature base_hw_features_tSIx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tSIx[] = {
 	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
 	BASE_HW_FEATURE_FLUSH_REDUCTION,
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
 	BASE_HW_FEATURE_END
 };

-static const enum base_hw_feature base_hw_features_tDVx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tDVx[] = {
 	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
 	BASE_HW_FEATURE_FLUSH_REDUCTION,
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
 	BASE_HW_FEATURE_END
 };

-static const enum base_hw_feature base_hw_features_tNOx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tNOx[] = {
 	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
 	BASE_HW_FEATURE_FLUSH_REDUCTION,
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
@@ -80,7 +81,7 @@ static const enum base_hw_feature base_hw_features_tNOx[] = {
 	BASE_HW_FEATURE_END
 };

-static const enum base_hw_feature base_hw_features_tGOx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tGOx[] = {
 	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
 	BASE_HW_FEATURE_FLUSH_REDUCTION,
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
@@ -89,50 +90,55 @@ static const enum base_hw_feature base_hw_features_tGOx[] = {
 	BASE_HW_FEATURE_END
 };

-static const enum base_hw_feature base_hw_features_tTRx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tTRx[] = {
 	BASE_HW_FEATURE_FLUSH_REDUCTION,
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
 	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
 	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+	BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
 	BASE_HW_FEATURE_END
 };

-static const enum base_hw_feature base_hw_features_tNAx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tNAx[] = {
 	BASE_HW_FEATURE_FLUSH_REDUCTION,
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
 	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
 	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+	BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
 	BASE_HW_FEATURE_END
 };

-static const enum base_hw_feature base_hw_features_tBEx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tBEx[] = {
 	BASE_HW_FEATURE_FLUSH_REDUCTION,
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
 	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
 	BASE_HW_FEATURE_L2_CONFIG,
 	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+	BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
 	BASE_HW_FEATURE_END
 };

-static const enum base_hw_feature base_hw_features_tBAx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tBAx[] = {
 	BASE_HW_FEATURE_FLUSH_REDUCTION,
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
 	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
 	BASE_HW_FEATURE_L2_CONFIG,
 	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+	BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
 	BASE_HW_FEATURE_END
 };

-static const enum base_hw_feature base_hw_features_tDUx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tDUx[] = {
 	BASE_HW_FEATURE_FLUSH_REDUCTION,
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
 	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
 	BASE_HW_FEATURE_L2_CONFIG,
 	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+	BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
 	BASE_HW_FEATURE_END
 };

-static const enum base_hw_feature base_hw_features_tODx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tODx[] = {
 	BASE_HW_FEATURE_FLUSH_REDUCTION,
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
 	BASE_HW_FEATURE_L2_CONFIG,
@@ -140,7 +146,7 @@ static const enum base_hw_feature base_hw_features_tODx[] = {
 	BASE_HW_FEATURE_END
 };

-static const enum base_hw_feature base_hw_features_tGRx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tGRx[] = {
 	BASE_HW_FEATURE_FLUSH_REDUCTION,
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
 	BASE_HW_FEATURE_L2_CONFIG,
@@ -148,7 +154,7 @@ static const enum base_hw_feature base_hw_features_tGRx[] = {
 	BASE_HW_FEATURE_END
 };

-static const enum base_hw_feature base_hw_features_tVAx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tVAx[] = {
 	BASE_HW_FEATURE_FLUSH_REDUCTION,
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
 	BASE_HW_FEATURE_L2_CONFIG,
@@ -156,7 +162,7 @@ static const enum base_hw_feature base_hw_features_tVAx[] = {
 	BASE_HW_FEATURE_END
 };

-static const enum base_hw_feature base_hw_features_tTUx[] = {
+__attribute__((unused)) static const enum base_hw_feature base_hw_features_tTUx[] = {
 	BASE_HW_FEATURE_FLUSH_REDUCTION,
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
 	BASE_HW_FEATURE_L2_CONFIG,
--- a/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h
+++ b/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h
@@ -63,11 +63,11 @@ enum base_hw_issue {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_generic[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_generic[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_10682,
 	BASE_HW_ISSUE_11054,
@@ -87,7 +87,7 @@ static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_10682,
 	BASE_HW_ISSUE_11054,
@@ -107,7 +107,7 @@ static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_tMIx_r0p1[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p1[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_10682,
 	BASE_HW_ISSUE_11054,
@@ -127,7 +127,7 @@ static const enum base_hw_issue base_hw_issues_tMIx_r0p1[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_model_tMIx[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tMIx[] = {
 	BASE_HW_ISSUE_5736,
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TMIX_7891,
@@ -142,7 +142,7 @@ static const enum base_hw_issue base_hw_issues_model_tMIx[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_10682,
 	BASE_HW_ISSUE_11054,
@@ -155,7 +155,7 @@ static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_tHEx_r0p1[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p1[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_10682,
 	BASE_HW_ISSUE_11054,
@@ -168,7 +168,7 @@ static const enum base_hw_issue base_hw_issues_tHEx_r0p1[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_tHEx_r0p2[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p2[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_10682,
 	BASE_HW_ISSUE_11054,
@@ -181,7 +181,7 @@ static const enum base_hw_issue base_hw_issues_tHEx_r0p2[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_tHEx_r0p3[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p3[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_10682,
 	BASE_HW_ISSUE_TMIX_7891,
@@ -193,7 +193,7 @@ static const enum base_hw_issue base_hw_issues_tHEx_r0p3[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_model_tHEx[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tHEx[] = {
 	BASE_HW_ISSUE_5736,
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TMIX_7891,
@@ -203,7 +203,7 @@ static const enum base_hw_issue base_hw_issues_model_tHEx[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_TMIX_8133,
@@ -216,7 +216,7 @@ static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_tSIx_r0p1[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r0p1[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_TMIX_8133,
@@ -229,7 +229,7 @@ static const enum base_hw_issue base_hw_issues_tSIx_r0p1[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_tSIx_r1p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r1p0[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_TMIX_8133,
@@ -241,7 +241,7 @@ static const enum base_hw_issue base_hw_issues_tSIx_r1p0[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_tSIx_r1p1[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r1p1[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TMIX_8133,
 	BASE_HW_ISSUE_TSIX_1116,
@@ -252,7 +252,7 @@ static const enum base_hw_issue base_hw_issues_tSIx_r1p1[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_model_tSIx[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tSIx[] = {
 	BASE_HW_ISSUE_5736,
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TMIX_8133,
@@ -262,7 +262,7 @@ static const enum base_hw_issue base_hw_issues_model_tSIx[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_tDVx_r0p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tDVx_r0p0[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TMIX_8133,
 	BASE_HW_ISSUE_TSIX_1116,
@@ -273,7 +273,7 @@ static const enum base_hw_issue base_hw_issues_tDVx_r0p0[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_model_tDVx[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tDVx[] = {
 	BASE_HW_ISSUE_5736,
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TMIX_8133,
@@ -283,7 +283,7 @@ static const enum base_hw_issue base_hw_issues_model_tDVx[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_tNOx_r0p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNOx_r0p0[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TMIX_8133,
 	BASE_HW_ISSUE_TSIX_1116,
@@ -295,7 +295,7 @@ static const enum base_hw_issue base_hw_issues_tNOx_r0p0[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_model_tNOx[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tNOx[] = {
 	BASE_HW_ISSUE_5736,
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TMIX_8133,
@@ -305,7 +305,7 @@ static const enum base_hw_issue base_hw_issues_model_tNOx[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_tGOx_r0p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGOx_r0p0[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TMIX_8133,
 	BASE_HW_ISSUE_TSIX_1116,
@@ -317,7 +317,7 @@ static const enum base_hw_issue base_hw_issues_tGOx_r0p0[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_tGOx_r1p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGOx_r1p0[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TMIX_8133,
 	BASE_HW_ISSUE_TSIX_1116,
@@ -329,7 +329,7 @@ static const enum base_hw_issue base_hw_issues_tGOx_r1p0[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_model_tGOx[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGOx[] = {
 	BASE_HW_ISSUE_5736,
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TMIX_8133,
@@ -339,7 +339,7 @@ static const enum base_hw_issue base_hw_issues_model_tGOx[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
@@ -355,7 +355,7 @@ static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_tTRx_r0p1[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p1[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
@@ -371,7 +371,7 @@ static const enum base_hw_issue base_hw_issues_tTRx_r0p1[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_tTRx_r0p2[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p2[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
@@ -386,7 +386,7 @@ static const enum base_hw_issue base_hw_issues_tTRx_r0p2[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_model_tTRx[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTRx[] = {
 	BASE_HW_ISSUE_5736,
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
@@ -398,7 +398,7 @@ static const enum base_hw_issue base_hw_issues_model_tTRx[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_tNAx_r0p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNAx_r0p0[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
@@ -414,7 +414,7 @@ static const enum base_hw_issue base_hw_issues_tNAx_r0p0[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_tNAx_r0p1[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNAx_r0p1[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
@@ -429,7 +429,7 @@ static const enum base_hw_issue base_hw_issues_tNAx_r0p1[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_model_tNAx[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tNAx[] = {
 	BASE_HW_ISSUE_5736,
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
@@ -441,7 +441,7 @@ static const enum base_hw_issue base_hw_issues_model_tNAx[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_tBEx_r0p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r0p0[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
@@ -455,7 +455,7 @@ static const enum base_hw_issue base_hw_issues_tBEx_r0p0[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_tBEx_r0p1[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r0p1[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
@@ -468,7 +468,7 @@ static const enum base_hw_issue base_hw_issues_tBEx_r0p1[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_tBEx_r1p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r1p0[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
@@ -481,7 +481,7 @@ static const enum base_hw_issue base_hw_issues_tBEx_r1p0[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_tBEx_r1p1[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r1p1[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
@@ -494,7 +494,7 @@ static const enum base_hw_issue base_hw_issues_tBEx_r1p1[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_model_tBEx[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tBEx[] = {
 	BASE_HW_ISSUE_5736,
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
@@ -506,7 +506,7 @@ static const enum base_hw_issue base_hw_issues_model_tBEx[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_lBEx_r1p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_lBEx_r1p0[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
@@ -520,7 +520,7 @@ static const enum base_hw_issue base_hw_issues_lBEx_r1p0[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_lBEx_r1p1[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_lBEx_r1p1[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
@@ -533,7 +533,7 @@ static const enum base_hw_issue base_hw_issues_lBEx_r1p1[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_tBAx_r0p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r0p0[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
@@ -546,7 +546,7 @@ static const enum base_hw_issue base_hw_issues_tBAx_r0p0[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_tBAx_r1p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r1p0[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
@@ -559,7 +559,7 @@ static const enum base_hw_issue base_hw_issues_tBAx_r1p0[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_model_tBAx[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tBAx[] = {
 	BASE_HW_ISSUE_5736,
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
@@ -571,7 +571,7 @@ static const enum base_hw_issue base_hw_issues_model_tBAx[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_tDUx_r0p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tDUx_r0p0[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
@@ -581,7 +581,7 @@ static const enum base_hw_issue base_hw_issues_tDUx_r0p0[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_model_tDUx[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tDUx[] = {
 	BASE_HW_ISSUE_5736,
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
@@ -591,7 +591,7 @@ static const enum base_hw_issue base_hw_issues_model_tDUx[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
@@ -599,7 +599,7 @@ static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_model_tODx[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tODx[] = {
 	BASE_HW_ISSUE_5736,
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
@@ -608,14 +608,14 @@ static const enum base_hw_issue base_hw_issues_model_tODx[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_tGRx_r0p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGRx_r0p0[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_model_tGRx[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGRx[] = {
 	BASE_HW_ISSUE_5736,
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
@@ -623,14 +623,14 @@ static const enum base_hw_issue base_hw_issues_model_tGRx[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_model_tVAx[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tVAx[] = {
 	BASE_HW_ISSUE_5736,
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
@@ -638,7 +638,7 @@ static const enum base_hw_issue base_hw_issues_model_tVAx[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_model_tTUx[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTUx[] = {
 	BASE_HW_ISSUE_5736,
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
@@ -646,7 +646,7 @@ static const enum base_hw_issue base_hw_issues_model_tTUx[] = {
 	BASE_HW_ISSUE_END
 };

-static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = {
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
--- a/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c
@@ -31,6 +31,10 @@
 #include <ipa/mali_kbase_ipa_debugfs.h>
 #endif /* CONFIG_DEVFREQ_THERMAL */
 #endif /* CONFIG_MALI_BIFROST_DEVFREQ */
+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+#include "backend/gpu/mali_kbase_model_linux.h"
+#include <backend/gpu/mali_kbase_model_dummy.h>
+#endif /* CONFIG_MALI_BIFROST_NO_MALI */
 #include "mali_kbase_mem_profile_debugfs_buf_size.h"
 #include "mali_kbase_mem.h"
 #include "mali_kbase_mem_pool_debugfs.h"
@@ -52,7 +56,6 @@
 #endif
 #include "mali_kbase_hwcnt_context.h"
 #include "mali_kbase_hwcnt_virtualizer.h"
-#include "mali_kbase_hwcnt_legacy.h"
 #include "mali_kbase_kinstr_prfcnt.h"
 #include "mali_kbase_vinstr.h"
 #if MALI_USE_CSF
@@ -60,6 +63,7 @@
 #include "csf/mali_kbase_csf_tiler_heap.h"
 #include "csf/mali_kbase_csf_csg_debugfs.h"
 #include "csf/mali_kbase_csf_cpu_queue_debugfs.h"
+#include "csf/mali_kbase_csf_event.h"
 #endif
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
 #include "arbiter/mali_kbase_arbiter_pm.h"
@@ -343,15 +347,6 @@ static void kbase_file_delete(struct kbase_file *const kfile)
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 		kbasep_mem_profile_debugfs_remove(kctx);
 #endif
-
-		mutex_lock(&kctx->legacy_hwcnt_lock);
-		/* If this client was performing hardware counter dumping and
-		 * did not explicitly detach itself, destroy it now
-		 */
-		kbase_hwcnt_legacy_client_destroy(kctx->legacy_hwcnt_cli);
-		kctx->legacy_hwcnt_cli = NULL;
-		mutex_unlock(&kctx->legacy_hwcnt_lock);
-
 		kbase_context_debugfs_term(kctx);

 		kbase_destroy_context(kctx);
@@ -906,62 +901,6 @@ static int kbase_api_hwcnt_reader_setup(struct kbase_context *kctx,
 	return kbase_vinstr_hwcnt_reader_setup(kctx->kbdev->vinstr_ctx, setup);
 }

-static int kbase_api_hwcnt_enable(struct kbase_context *kctx,
-		struct kbase_ioctl_hwcnt_enable *enable)
-{
-	int ret;
-
-	mutex_lock(&kctx->legacy_hwcnt_lock);
-	if (enable->dump_buffer != 0) {
-		/* Non-zero dump buffer, so user wants to create the client */
-		if (kctx->legacy_hwcnt_cli == NULL) {
-			ret = kbase_hwcnt_legacy_client_create(
-				kctx->kbdev->hwcnt_gpu_virt,
-				enable,
-				&kctx->legacy_hwcnt_cli);
-		} else {
-			/* This context already has a client */
-			ret = -EBUSY;
-		}
-	} else {
-		/* Zero dump buffer, so user wants to destroy the client */
-		if (kctx->legacy_hwcnt_cli != NULL) {
-			kbase_hwcnt_legacy_client_destroy(
-				kctx->legacy_hwcnt_cli);
-			kctx->legacy_hwcnt_cli = NULL;
-			ret = 0;
-		} else {
-			/* This context has no client to destroy */
-			ret = -EINVAL;
-		}
-	}
-	mutex_unlock(&kctx->legacy_hwcnt_lock);
-
-	return ret;
-}
-
-static int kbase_api_hwcnt_dump(struct kbase_context *kctx)
-{
-	int ret;
-
-	mutex_lock(&kctx->legacy_hwcnt_lock);
-	ret = kbase_hwcnt_legacy_client_dump(kctx->legacy_hwcnt_cli);
-	mutex_unlock(&kctx->legacy_hwcnt_lock);
-
-	return ret;
-}
-
-static int kbase_api_hwcnt_clear(struct kbase_context *kctx)
-{
-	int ret;
-
-	mutex_lock(&kctx->legacy_hwcnt_lock);
-	ret = kbase_hwcnt_legacy_client_clear(kctx->legacy_hwcnt_cli);
-	mutex_unlock(&kctx->legacy_hwcnt_lock);
-
-	return ret;
-}
-
 static int kbase_api_get_cpu_gpu_timeinfo(struct kbase_context *kctx,
 		union kbase_ioctl_get_cpu_gpu_timeinfo *timeinfo)
 {
@@ -993,6 +932,17 @@ static int kbase_api_get_cpu_gpu_timeinfo(struct kbase_context *kctx,
 	return 0;
 }

+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+static int kbase_api_hwcnt_set(struct kbase_context *kctx,
+		struct kbase_ioctl_hwcnt_values *values)
+{
+	gpu_model_set_dummy_prfcnt_sample(
+			(u32 __user *)(uintptr_t)values->data,
+			values->size);
+
+	return 0;
+}
+#endif /* CONFIG_MALI_BIFROST_NO_MALI */

 static int kbase_api_disjoint_query(struct kbase_context *kctx,
 		struct kbase_ioctl_disjoint_query *query)
@@ -1416,6 +1366,30 @@ static int kbasep_cs_queue_kick(struct kbase_context *kctx,
 	return kbase_csf_queue_kick(kctx, kick);
 }

+static int kbasep_cs_queue_group_create_1_6(
+	struct kbase_context *kctx,
+	union kbase_ioctl_cs_queue_group_create_1_6 *create)
+{
+	union kbase_ioctl_cs_queue_group_create
+		new_create = { .in = {
+				       .tiler_mask = create->in.tiler_mask,
+				       .fragment_mask =
+					       create->in.fragment_mask,
+				       .compute_mask = create->in.compute_mask,
+				       .cs_min = create->in.cs_min,
+				       .priority = create->in.priority,
+				       .tiler_max = create->in.tiler_max,
+				       .fragment_max = create->in.fragment_max,
+				       .compute_max = create->in.compute_max,
+			       } };
+
+	int ret = kbase_csf_queue_group_create(kctx, &new_create);
+
+	create->out.group_handle = new_create.out.group_handle;
+	create->out.group_uid = new_create.out.group_uid;
+
+	return ret;
+}
 static int kbasep_cs_queue_group_create(struct kbase_context *kctx,
 			     union kbase_ioctl_cs_queue_group_create *create)
 {
@@ -1874,28 +1848,20 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 				struct kbase_ioctl_hwcnt_reader_setup,
 				kctx);
 		break;
-	case KBASE_IOCTL_HWCNT_ENABLE:
-		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_ENABLE,
-				kbase_api_hwcnt_enable,
-				struct kbase_ioctl_hwcnt_enable,
-				kctx);
-		break;
-	case KBASE_IOCTL_HWCNT_DUMP:
-		KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_DUMP,
-				kbase_api_hwcnt_dump,
-				kctx);
-		break;
-	case KBASE_IOCTL_HWCNT_CLEAR:
-		KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_CLEAR,
-				kbase_api_hwcnt_clear,
-				kctx);
-		break;
 	case KBASE_IOCTL_GET_CPU_GPU_TIMEINFO:
 		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_GET_CPU_GPU_TIMEINFO,
 				kbase_api_get_cpu_gpu_timeinfo,
 				union kbase_ioctl_get_cpu_gpu_timeinfo,
 				kctx);
 		break;
+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+	case KBASE_IOCTL_HWCNT_SET:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_SET,
+				kbase_api_hwcnt_set,
+				struct kbase_ioctl_hwcnt_values,
+				kctx);
+		break;
+#endif /* CONFIG_MALI_BIFROST_NO_MALI */
 #ifdef CONFIG_MALI_CINSTR_GWT
 	case KBASE_IOCTL_CINSTR_GWT_START:
 		KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_START,
@@ -1950,6 +1916,12 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 				struct kbase_ioctl_cs_queue_kick,
 				kctx);
 		break;
+	case KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_6:
+		KBASE_HANDLE_IOCTL_INOUT(
+			KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_6,
+			kbasep_cs_queue_group_create_1_6,
+			union kbase_ioctl_cs_queue_group_create_1_6, kctx);
+		break;
 	case KBASE_IOCTL_CS_QUEUE_GROUP_CREATE:
 		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_QUEUE_GROUP_CREATE,
 				kbasep_cs_queue_group_create,
@@ -2049,7 +2021,7 @@ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, lof
 	if (atomic_read(&kctx->event_count))
 		read_event = true;
 	else
-		read_error = kbase_csf_read_error(kctx, &event_data);
+		read_error = kbase_csf_event_read_error(kctx, &event_data);

 	if (!read_event && !read_error) {
 		bool dump = kbase_csf_cpu_queue_read_dump_req(kctx,
@@ -2154,7 +2126,7 @@ int kbase_event_pending(struct kbase_context *ctx)
 	WARN_ON_ONCE(!ctx);

 	return (atomic_read(&ctx->event_count) != 0) ||
-		kbase_csf_error_pending(ctx) ||
+		kbase_csf_event_error_pending(ctx) ||
 		kbase_csf_cpu_queue_dump_needed(ctx);
 }
 #else
@@ -3911,8 +3883,6 @@ static DEVICE_ATTR(js_ctx_scheduling_mode, S_IRUGO | S_IWUSR,
 		show_js_ctx_scheduling_mode,
 		set_js_ctx_scheduling_mode);

-#ifdef MALI_KBASE_BUILD
-
 /* Number of entries in serialize_jobs_settings[] */
 #define NR_SERIALIZE_JOBS_SETTINGS 5
 /* Maximum string length in serialize_jobs_settings[].name */
@@ -4127,7 +4097,6 @@ static ssize_t store_serialize_jobs_sysfs(struct device *dev,

 static DEVICE_ATTR(serialize_jobs, 0600, show_serialize_jobs_sysfs,
 		   store_serialize_jobs_sysfs);
-#endif /* MALI_KBASE_BUILD */
 #endif /* !MALI_USE_CSF */

 static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data)
@@ -4223,6 +4192,15 @@ void kbase_protected_mode_term(struct kbase_device *kbdev)
 	kfree(kbdev->protected_dev);
 }

+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+static int kbase_common_reg_map(struct kbase_device *kbdev)
+{
+	return 0;
+}
+static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
+{
+}
+#else /* CONFIG_MALI_BIFROST_NO_MALI */
 static int kbase_common_reg_map(struct kbase_device *kbdev)
 {
 	int err = 0;
@@ -4258,6 +4236,7 @@ static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
 		kbdev->reg_size = 0;
 	}
 }
+#endif /* CONFIG_MALI_BIFROST_NO_MALI */

 int registers_map(struct kbase_device * const kbdev)
 {
@@ -4595,7 +4574,6 @@ void power_control_term(struct kbase_device *kbdev)
 #endif
 }

-#ifdef MALI_KBASE_BUILD
 #if IS_ENABLED(CONFIG_DEBUG_FS)

 static void trigger_reset(struct kbase_device *kbdev)
@@ -4868,7 +4846,6 @@ void kbase_device_debugfs_term(struct kbase_device *kbdev)
 	debugfs_remove_recursive(kbdev->mali_debugfs_directory);
 }
 #endif /* CONFIG_DEBUG_FS */
-#endif /* MALI_KBASE_BUILD */

 int kbase_device_coherency_init(struct kbase_device *kbdev)
 {
@@ -5259,10 +5236,8 @@ static int kbase_platform_device_probe(struct platform_device *pdev)
 		dev_set_drvdata(kbdev->dev, NULL);
 		kbase_device_free(kbdev);
 	} else {
-#ifdef MALI_KBASE_BUILD
 		dev_info(kbdev->dev,
 			"Probed as %s\n", dev_name(kbdev->mdev.this_device));
-#endif /* MALI_KBASE_BUILD */
 		kbase_increment_device_id();
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
 		mutex_lock(&kbdev->pm.lock);
@@ -5283,7 +5258,7 @@ static int kbase_platform_device_probe(struct platform_device *pdev)
 *
 * @dev:  The device to suspend
 *
- * Return: A standard Linux error code
+ * Return: A standard Linux error code on failure, 0 otherwise.
 */
 static int kbase_device_suspend(struct device *dev)
 {
@@ -5292,7 +5267,10 @@ static int kbase_device_suspend(struct device *dev)
 	if (!kbdev)
 		return -ENODEV;

-	kbase_pm_suspend(kbdev);
+	if (kbase_pm_suspend(kbdev)) {
+		dev_warn(kbdev->dev, "Abort suspend as GPU suspension failed");
+		return -EBUSY;
+	}

 #ifdef CONFIG_MALI_BIFROST_DVFS
 	kbase_pm_metrics_stop(kbdev);
@@ -5531,6 +5509,7 @@ MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \
 		__stringify(BASE_UK_VERSION_MAJOR) "." \
 		__stringify(BASE_UK_VERSION_MINOR) ")");
 MODULE_SOFTDEP("pre: memory_group_manager");
+MODULE_INFO(import_ns, "DMA_BUF");

 #define CREATE_TRACE_POINTS
 /* Create the trace points (otherwise we just get code to call a tracepoint) */
--- a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c
@@ -23,6 +23,9 @@
 #include <mali_kbase_defs.h>
 #include "mali_kbase_ctx_sched.h"
 #include "tl/mali_kbase_tracepoints.h"
+#if !MALI_USE_CSF
+#include <mali_kbase_hwaccess_jm.h>
+#endif

 /* Helper for ktrace */
 #if KBASE_KTRACE_ENABLE
@@ -124,7 +127,6 @@ int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx)
 						kbdev, prev_kctx->id);
 					prev_kctx->as_nr = KBASEP_AS_NR_INVALID;
 				}
-
 				kctx->as_nr = free_as;
 				kbdev->as_to_kctx[free_as] = kctx;
 				KBASE_TLSTREAM_TL_KBASE_CTX_ASSIGN_AS(
@@ -173,6 +175,9 @@ void kbase_ctx_sched_release_ctx(struct kbase_context *kctx)
 			kbdev->as_to_kctx[kctx->as_nr] = NULL;
 			kctx->as_nr = KBASEP_AS_NR_INVALID;
 			kbase_ctx_flag_clear(kctx, KCTX_AS_DISABLED_ON_FAULT);
+#if !MALI_USE_CSF
+			kbase_backend_slot_kctx_purge_locked(kbdev, kctx);
+#endif
 		}
 	}

--- a/drivers/gpu/arm/bifrost/mali_kbase_defs.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_defs.h
@@ -746,6 +746,7 @@ struct kbase_process {
 * @hwcnt.addr:            HW counter address
 * @hwcnt.addr_bytes:      HW counter size in bytes
 * @hwcnt.backend:         Kbase instrumentation backend
+ * @hwcnt_watchdog_timer:  Hardware counter watchdog interface.
 * @hwcnt_gpu_iface:       Backend interface for GPU hardware counter access.
 * @hwcnt_gpu_ctx:         Context for GPU hardware counter access.
 *                         @hwaccess_lock must be held when calling
@@ -774,8 +775,8 @@ struct kbase_process {
 * @cache_clean_in_progress: Set when a cache clean has been started, and
 *                         cleared when it has finished. This prevents multiple
 *                         cache cleans being done simultaneously.
- * @cache_clean_queued:    Set if a cache clean is invoked while another is in
- *                         progress. If this happens, another cache clean needs
+ * @cache_clean_queued:    Pended cache clean operations invoked while another is
+ *                         in progress. If this is not 0, another cache clean needs
 *                         to be triggered immediately after completion of the
 *                         current one.
 * @cache_clean_wait:      Signalled when a cache clean has finished.
@@ -984,6 +985,15 @@ struct kbase_device {
 	char devname[DEVNAME_SIZE];
 	u32  id;

+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+	void *model;
+	struct kmem_cache *irq_slab;
+	struct workqueue_struct *irq_workq;
+	atomic_t serving_job_irq;
+	atomic_t serving_gpu_irq;
+	atomic_t serving_mmu_irq;
+	spinlock_t reg_op_lock;
+#endif /* CONFIG_MALI_BIFROST_NO_MALI */
 	struct kbase_pm_device_data pm;

 	struct kbase_mem_pool_group mem_pools;
@@ -1013,6 +1023,7 @@ struct kbase_device {

 #if MALI_USE_CSF
 	struct kbase_hwcnt_backend_csf_if hwcnt_backend_csf_if_fw;
+	struct kbase_hwcnt_watchdog_interface hwcnt_watchdog_timer;
 #else
 	struct kbase_hwcnt {
 		spinlock_t lock;
@@ -1042,7 +1053,7 @@ struct kbase_device {
 	u64 lowest_gpu_freq_khz;

 	bool cache_clean_in_progress;
-	bool cache_clean_queued;
+	u32 cache_clean_queued;
 	wait_queue_head_t cache_clean_wait;

 	void *platform_context;
@@ -1213,6 +1224,7 @@ struct kbase_device {
 	struct priority_control_manager_device *pcm_dev;

 	struct notifier_block oom_notifier_block;
+
 };

 /**
@@ -1570,6 +1582,12 @@ struct kbase_sub_alloc {
 *                        pages used for GPU allocations, done for the context,
 *                        to the memory consumed by the process.
 * @gpu_va_end:           End address of the GPU va space (in 4KB page units)
+ * @running_total_tiler_heap_nr_chunks: Running total of number of chunks in all
+ *                        tiler heaps of the kbase context.
+ * @running_total_tiler_heap_memory: Running total of the tiler heap memory in the
+ *                        kbase context.
+ * @peak_total_tiler_heap_memory: Peak value of the total tiler heap memory in the
+ *                        kbase context.
 * @jit_va:               Indicates if a JIT_VA zone has been created.
 * @mem_profile_data:     Buffer containing the profiling information provided by
 *                        Userspace, can be read through the mem_profile debugfs file.
@@ -1596,11 +1614,6 @@ struct kbase_sub_alloc {
 * @slots_pullable:       Bitmask of slots, indicating the slots for which the
 *                        context has pullable atoms in the runnable tree.
 * @work:                 Work structure used for deferred ASID assignment.
- * @legacy_hwcnt_cli:     Pointer to the legacy userspace hardware counters
- *                        client, there can be only such client per kbase
- *                        context.
- * @legacy_hwcnt_lock:    Lock used to prevent concurrent access to
- *                        @legacy_hwcnt_cli.
 * @completed_jobs:       List containing completed atoms for which base_jd_event is
 *                        to be posted.
 * @work_count:           Number of work items, corresponding to atoms, currently
@@ -1783,6 +1796,11 @@ struct kbase_context {
 	spinlock_t         mm_update_lock;
 	struct mm_struct __rcu *process_mm;
 	u64 gpu_va_end;
+#if MALI_USE_CSF
+	u32 running_total_tiler_heap_nr_chunks;
+	u64 running_total_tiler_heap_memory;
+	u64 peak_total_tiler_heap_memory;
+#endif
 	bool jit_va;

 #if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -1796,10 +1814,6 @@ struct kbase_context {
 	struct list_head job_fault_resume_event_list;

 #endif /* CONFIG_DEBUG_FS */
-
-	struct kbase_hwcnt_legacy_client *legacy_hwcnt_cli;
-	struct mutex legacy_hwcnt_lock;
-
 	struct kbase_va_region *jit_alloc[1 + BASE_JIT_ALLOC_COUNT];
 	u8 jit_max_allocations;
 	u8 jit_current_allocations;
--- a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c
@@ -371,6 +371,7 @@ static void kbase_gpuprops_calculate_props(
 	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;

 #if MALI_USE_CSF
+	CSTD_UNUSED(gpu_id);
 	gpu_props->thread_props.max_registers =
 		KBASE_UBFX32(gpu_props->raw_props.thread_features,
 			     0U, 22);
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_instr.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_instr.h
@@ -144,4 +144,27 @@ void kbase_instr_backend_term(struct kbase_device *kbdev);
 void kbase_instr_backend_debugfs_init(struct kbase_device *kbdev);
 #endif

+/**
+ * kbase_instr_hwcnt_on_unrecoverable_error() - JM HWC instr backend function
+ *                                              called when unrecoverable errors
+ *                                              are detected.
+ * @kbdev: Kbase device
+ *
+ * This should be called on encountering errors that can only be recovered from
+ * with reset, or that may put HWC logic in state that could result in hang. For
+ * example, when HW becomes unresponsive.
+ *
+ * Caller requires kbdev->hwaccess_lock held.
+ */
+void kbase_instr_hwcnt_on_unrecoverable_error(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_hwcnt_on_before_reset() - JM HWC instr backend function to be
+ *                                       called immediately before a reset.
+ *                                       Takes us out of the unrecoverable
+ *                                       error state, if we were in it.
+ * @kbdev: Kbase device
+ */
+void kbase_instr_hwcnt_on_before_reset(struct kbase_device *kbdev);
+
 #endif /* _KBASE_HWACCESS_INSTR_H_ */
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h
@@ -299,4 +299,21 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
 */
 bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev);

+/**
+ * kbase_backend_slot_kctx_purge_locked - Perform a purge on the slot_rb tracked
+ *                                        kctx
+ *
+ * @kbdev:	Device pointer
+ * @kctx:	The kbase context that needs to be purged from slot_rb[]
+ *
+ * For JM GPUs, the L1 read only caches may need a start_flush invalidation,
+ * potentially on all slots (even if the kctx was only using a single slot),
+ * following a context termination or address-space ID recycle. This function
+ * performs a clean-up purge on the given kctx which if it has been tracked by
+ * slot_rb[] objects.
+ *
+ * Caller must hold kbase_device->hwaccess_lock.
+ */
+void kbase_backend_slot_kctx_purge_locked(struct kbase_device *kbdev, struct kbase_context *kctx);
+
 #endif /* _KBASE_HWACCESS_JM_H_ */
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_pm.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_pm.h
@@ -85,8 +85,10 @@ void kbase_hwaccess_pm_halt(struct kbase_device *kbdev);
 * Perform any backend-specific actions to suspend the GPU
 *
 * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 if suspend was successful.
 */
-void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev);
+int kbase_hwaccess_pm_suspend(struct kbase_device *kbdev);

 /**
 * Perform any backend-specific actions to resume the GPU from a suspend
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt.c
@@ -158,7 +158,6 @@ int kbase_hwcnt_context_init(

 	return 0;

-	destroy_workqueue(hctx->wq);
 err_alloc_workqueue:
 	kfree(hctx);
 err_alloc_hctx:
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf.c
@@ -36,16 +36,24 @@
 #define BASE_MAX_NR_CLOCKS_REGULATORS 4
 #endif

+/* Backend watch dog timer interval in milliseconds: 1 second. */
+#define HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS ((u32)1000)
+
 /**
 * enum kbase_hwcnt_backend_csf_dump_state - HWC CSF backend dumping states.
 *
 * @KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE: Initial state, or the state if there is
 * an error.
 *
- * @KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED: A dump has been requested and we are
- * waiting for an ACK, this ACK could come from either PRFCNT_ACK,
+ * @KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED: A user dump has been requested and
+ * we are waiting for an ACK, this ACK could come from either PRFCNT_ACK,
 * PROTMODE_ENTER_ACK, or if an error occurs.
 *
+ * @KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED: A watchdog dump has been
+ * requested and we're waiting for an ACK - this ACK could come from either
+ * PRFCNT_ACK, or if an error occurs, PROTMODE_ENTER_ACK is not applied here
+ * since watchdog request can't be triggered in protected mode.
+ *
 * @KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT: Checking the insert
 * immediately after receiving the ACK, so we know which index corresponds to
 * the buffer we requested.
@@ -60,18 +68,25 @@
 * @KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED: The dump completed successfully.
 *
 * Valid state transitions:
- * IDLE -> REQUESTED (on dump request)
- * REQUESTED -> QUERYING_INSERT (on dump ack)
+ * IDLE -> REQUESTED (on user dump request)
+ * IDLE -> WATCHDOG_REQUESTED (on watchdog request)
+ * IDLE -> QUERYING_INSERT (on user dump request in protected mode)
+ * REQUESTED -> QUERYING_INSERT (on dump acknowledged from firmware)
+ * WATCHDOG_REQUESTED -> REQUESTED (on user dump request)
+ * WATCHDOG_REQUESTED -> COMPLETED (on dump acknowledged from firmware for watchdog request)
 * QUERYING_INSERT -> WORKER_LAUNCHED (on worker submission)
 * WORKER_LAUNCHED -> ACCUMULATING (while the worker is accumulating)
 * ACCUMULATING -> COMPLETED (on accumulation completion)
- * COMPLETED -> REQUESTED (on dump request)
+ * COMPLETED -> QUERYING_INSERT (on user dump request in protected mode)
+ * COMPLETED -> REQUESTED (on user dump request)
+ * COMPLETED -> WATCHDOG_REQUESTED (on watchdog request)
 * COMPLETED -> IDLE (on disable)
 * ANY -> IDLE (on error)
 */
 enum kbase_hwcnt_backend_csf_dump_state {
 	KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE,
 	KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED,
+	KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED,
 	KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT,
 	KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED,
 	KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING,
@@ -136,6 +151,7 @@ enum kbase_hwcnt_backend_csf_enable_state {
 * @counter_set:                  The performance counter set to use.
 * @metadata:                     Hardware counter metadata.
 * @prfcnt_info:                  Performance counter information.
+ * @watchdog_if:                  Watchdog interface object pointer.
 */
 struct kbase_hwcnt_backend_csf_info {
 	struct kbase_hwcnt_backend_csf *backend;
@@ -146,6 +162,7 @@ struct kbase_hwcnt_backend_csf_info {
 	enum kbase_hwcnt_set counter_set;
 	const struct kbase_hwcnt_metadata *metadata;
 	struct kbase_hwcnt_backend_csf_if_prfcnt_info prfcnt_info;
+	struct kbase_hwcnt_watchdog_interface *watchdog_if;
 };

 /**
@@ -192,6 +209,10 @@ struct kbase_hwcnt_csf_physical_layout {
 * @old_sample_buf:             HWC sample buffer to save the previous values
 *                              for delta calculation, size
 *                              prfcnt_info.dump_bytes.
+ * @watchdog_last_seen_insert_idx: The insert index which watchdog has last
+ *                                 seen, to check any new firmware automatic
+ *                                 samples generated during the watchdog
+ *                                 period.
 * @ring_buf:                   Opaque pointer for ring buffer object.
 * @ring_buf_cpu_base:          CPU base address of the allocated ring buffer.
 * @clk_enable_map:             The enable map specifying enabled clock domains.
@@ -204,6 +225,8 @@ struct kbase_hwcnt_csf_physical_layout {
 *                              it is completed accumulating up to the
 *                              insert_index_to_accumulate.
 *                              Should be initialized to the "complete" state.
+ * @user_requested:             Flag to indicate a dump_request called from
+ *                              user.
 * @hwc_dump_workq:             Single threaded work queue for HWC workers
 *                              execution.
 * @hwc_dump_work:              Worker to accumulate samples.
@@ -219,6 +242,7 @@ struct kbase_hwcnt_backend_csf {
 	u64 *to_user_buf;
 	u64 *accum_buf;
 	u32 *old_sample_buf;
+	u32 watchdog_last_seen_insert_idx;
 	struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf;
 	void *ring_buf_cpu_base;
 	u64 clk_enable_map;
@@ -226,6 +250,7 @@ struct kbase_hwcnt_backend_csf {
 	u64 prev_cycle_count[BASE_MAX_NR_CLOCKS_REGULATORS];
 	struct kbase_hwcnt_csf_physical_layout phys_layout;
 	struct completion dump_completed;
+	bool user_requested;
 	struct workqueue_struct *hwc_dump_workq;
 	struct work_struct hwc_dump_work;
 	struct work_struct hwc_threshold_work;
@@ -594,6 +619,10 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(
 	backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
 	backend_csf->info->csf_if->set_extract_index(
 		backend_csf->info->csf_if->ctx, insert_index_to_stop);
+	/* Update the watchdog last seen index to check any new FW auto samples
+	 * in next watchdog callback.
+	 */
+	backend_csf->watchdog_last_seen_insert_idx = insert_index_to_stop;
 	backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
 					  flags);
 }
@@ -612,6 +641,67 @@ static void kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
 	}
 }

+static void kbasep_hwcnt_backend_watchdog_timer_cb(void *info)
+{
+	struct kbase_hwcnt_backend_csf_info *csf_info = info;
+	struct kbase_hwcnt_backend_csf *backend_csf;
+	unsigned long flags;
+
+	csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags);
+
+	if (WARN_ON(!kbasep_hwcnt_backend_csf_backend_exists(csf_info))) {
+		csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags);
+		return;
+	}
+
+	backend_csf = csf_info->backend;
+
+	/* Only do watchdog request when all conditions are met: */
+	if (/* 1. Backend is enabled. */
+	    (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) &&
+	    /* 2. FW is not in protected mode. */
+	    (!csf_info->fw_in_protected_mode) &&
+	    /* 3. dump state indicates no other dumping is in progress. */
+	    ((backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) ||
+	     (backend_csf->dump_state ==
+	      KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED))) {
+		u32 extract_index;
+		u32 insert_index;
+
+		/* Read the raw extract and insert indexes from the CSF interface. */
+		csf_info->csf_if->get_indexes(csf_info->csf_if->ctx,
+					      &extract_index, &insert_index);
+
+		/* Do watchdog request if no new FW auto samples. */
+		if (insert_index ==
+		    backend_csf->watchdog_last_seen_insert_idx) {
+			/* Trigger the watchdog request. */
+			csf_info->csf_if->dump_request(csf_info->csf_if->ctx);
+
+			/* A watchdog dump is required, change the state to
+			 * start the request process.
+			 */
+			backend_csf->dump_state =
+				KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED;
+		}
+	}
+
+	/* Must schedule another callback when in the transitional state because
+	 * this function can be called for the first time before the performance
+	 * counter enabled interrupt.
+	 */
+	if ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) ||
+	    (backend_csf->enable_state ==
+	     KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED)) {
+		/* Reschedule the timer for next watchdog callback. */
+		csf_info->watchdog_if->modify(
+			csf_info->watchdog_if->timer,
+			HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS);
+	}
+
+	csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags);
+}
+
 /**
 * kbasep_hwcnt_backend_csf_dump_worker() - HWC dump worker.
 * @work: Work structure.
@@ -826,6 +916,7 @@ static int kbasep_hwcnt_backend_csf_dump_enable_nolock(
 	struct kbase_hwcnt_backend_csf *backend_csf =
 		(struct kbase_hwcnt_backend_csf *)backend;
 	struct kbase_hwcnt_backend_csf_if_enable enable;
+	int err;

 	if (!backend_csf || !enable_map ||
 	    (enable_map->metadata != backend_csf->info->metadata))
@@ -841,6 +932,13 @@ static int kbasep_hwcnt_backend_csf_dump_enable_nolock(
 	if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED)
 		return -EIO;

+	err = backend_csf->info->watchdog_if->enable(
+		backend_csf->info->watchdog_if->timer,
+		HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS,
+		kbasep_hwcnt_backend_watchdog_timer_cb, backend_csf->info);
+	if (err)
+		return err;
+
 	backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE;
 	WARN_ON(!completion_done(&backend_csf->dump_completed));
 	kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
@@ -948,6 +1046,13 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend)
 	backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
 					  flags);

+	/* Deregister the timer and block until any timer callback has completed.
+	 * We've transitioned out of the ENABLED state so we can guarantee it
+	 * won't reschedule itself.
+	 */
+	backend_csf->info->watchdog_if->disable(
+		backend_csf->info->watchdog_if->timer);
+
 	/* Block until any async work has completed. We have transitioned out of
 	 * the ENABLED state so we can guarantee no new work will concurrently
 	 * be submitted.
@@ -978,6 +1083,9 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend)
 		break;
 	}

+	backend_csf->user_requested = false;
+	backend_csf->watchdog_last_seen_insert_idx = 0;
+
 	backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
 					  flags);

@@ -1006,6 +1114,7 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend,
 	struct kbase_hwcnt_backend_csf *backend_csf =
 		(struct kbase_hwcnt_backend_csf *)backend;
 	bool do_request = false;
+	bool watchdog_dumping = false;

 	if (!backend_csf)
 		return -EINVAL;
@@ -1022,6 +1131,7 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend,
 			KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED;
 		*dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend);
 		kbasep_hwcnt_backend_csf_cc_update(backend_csf);
+		backend_csf->user_requested = true;
 		backend_csf->info->csf_if->unlock(
 			backend_csf->info->csf_if->ctx, flags);
 		return 0;
@@ -1035,11 +1145,21 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend,
 	}

 	/* Make sure that this is either the first request since enable or the
-	 * previous dump has completed, so we can avoid midway through a dump.
+	 * previous user dump has completed or a watchdog dump is in progress,
+	 * so we can avoid midway through a user dump.
+	 * If user request comes while a watchdog dumping is in progress,
+	 * the user request takes the ownership of the watchdog dumping sample by
+	 * changing the dump_state so the interrupt for the watchdog
+	 * request can be processed instead of ignored.
 	 */
 	if ((backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) &&
 	    (backend_csf->dump_state !=
-	     KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED)) {
+	     KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) &&
+	    (backend_csf->dump_state !=
+	     KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED)) {
+		/* HWC is disabled or another user dump is ongoing,
+		 * or we're on fault.
+		 */
 		backend_csf->info->csf_if->unlock(
 			backend_csf->info->csf_if->ctx, flags);
 		/* HWC is disabled or another dump is ongoing, or we are on
@@ -1051,6 +1171,10 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend,
 	/* Reset the completion so dump_wait() has something to wait on. */
 	reinit_completion(&backend_csf->dump_completed);

+	if (backend_csf->dump_state ==
+	    KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED)
+		watchdog_dumping = true;
+
 	if ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) &&
 	    !backend_csf->info->fw_in_protected_mode) {
 		/* Only do the request if we are fully enabled and not in
@@ -1078,15 +1202,29 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend,

 	*dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend);
 	kbasep_hwcnt_backend_csf_cc_update(backend_csf);
+	backend_csf->user_requested = true;

-	if (do_request)
-		backend_csf->info->csf_if->dump_request(
-			backend_csf->info->csf_if->ctx);
-	else
+	if (do_request) {
+		/* If a watchdog dumping is in progress, don't need to do
+		 * another request, just update the dump_state and take the
+		 * ownership of the sample which watchdog requested.
+		 */
+		if (!watchdog_dumping)
+			backend_csf->info->csf_if->dump_request(
+				backend_csf->info->csf_if->ctx);
+	} else
 		kbase_hwcnt_backend_csf_submit_dump_worker(backend_csf->info);

 	backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
 					  flags);
+
+	/* Modify watchdog timer to delay the regular check time since
+	 * just requested.
+	 */
+	backend_csf->info->watchdog_if->modify(
+		backend_csf->info->watchdog_if->timer,
+		HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS);
+
 	return 0;
 }

@@ -1105,11 +1243,18 @@ kbasep_hwcnt_backend_csf_dump_wait(struct kbase_hwcnt_backend *backend)
 	wait_for_completion(&backend_csf->dump_completed);

 	backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
-	/* Make sure the last dump actually succeeded. */
-	errcode = (backend_csf->dump_state ==
-		   KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) ?
-			  0 :
-			  -EIO;
+	/* Make sure the last dump actually succeeded when user requested is
+	 * set.
+	 */
+	if (backend_csf->user_requested &&
+	    ((backend_csf->dump_state ==
+	      KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) ||
+	     (backend_csf->dump_state ==
+	      KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED)))
+		errcode = 0;
+	else
+		errcode = -EIO;
+
 	backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
 					  flags);

@@ -1155,13 +1300,16 @@ static int kbasep_hwcnt_backend_csf_dump_get(
 	    (dst_enable_map->metadata != dst->metadata))
 		return -EINVAL;

+	/* Extract elapsed cycle count for each clock domain if enabled. */
 	kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) {
 		if (!kbase_hwcnt_clk_enable_map_enabled(
 			    dst_enable_map->clk_enable_map, clk))
 			continue;

-		/* Extract elapsed cycle count for each clock domain. */
-		dst->clk_cnt_buf[clk] = backend_csf->cycle_count_elapsed[clk];
+		/* Reset the counter to zero if accumulation is off. */
+		if (!accumulate)
+			dst->clk_cnt_buf[clk] = 0;
+		dst->clk_cnt_buf[clk] += backend_csf->cycle_count_elapsed[clk];
 	}

 	/* We just return the user buffer without checking the current state,
@@ -1279,6 +1427,8 @@ kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info,
 	backend_csf->enable_state = KBASE_HWCNT_BACKEND_CSF_DISABLED;
 	backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE;
 	complete_all(&backend_csf->dump_completed);
+	backend_csf->user_requested = false;
+	backend_csf->watchdog_last_seen_insert_idx = 0;

 	*out_backend = backend_csf;
 	return 0;
@@ -1401,38 +1551,41 @@ static void kbasep_hwcnt_backend_csf_info_destroy(
 *                 used to create backend interface.
 * @ring_buf_cnt: The buffer count of the CSF hwcnt backend ring buffer.
 *                MUST be power of 2.
+ * @watchdog_if:  Non-NULL pointer to a hwcnt watchdog interface structure used to create
+ *                backend interface.
 * @out_info:     Non-NULL pointer to where info is stored on success.
 * @return 0 on success, else error code.
 */
 static int kbasep_hwcnt_backend_csf_info_create(
 	struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt,
+	struct kbase_hwcnt_watchdog_interface *watchdog_if,
 	const struct kbase_hwcnt_backend_csf_info **out_info)
 {
 	struct kbase_hwcnt_backend_csf_info *info = NULL;

-	WARN_ON(!csf_if);
-	WARN_ON(!out_info);
-	WARN_ON(!is_power_of_2(ring_buf_cnt));
+	if (WARN_ON(!csf_if) || WARN_ON(!watchdog_if) || WARN_ON(!out_info) ||
+	    WARN_ON(!is_power_of_2(ring_buf_cnt)))
+		return -EINVAL;

-	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	info = kmalloc(sizeof(*info), GFP_KERNEL);
 	if (!info)
 		return -ENOMEM;

+	*info = (struct kbase_hwcnt_backend_csf_info)
+	{
 #if defined(CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY)
-	info->counter_set = KBASE_HWCNT_SET_SECONDARY;
+		.counter_set = KBASE_HWCNT_SET_SECONDARY,
 #elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY)
-	info->counter_set = KBASE_HWCNT_SET_TERTIARY;
+		.counter_set = KBASE_HWCNT_SET_TERTIARY,
 #else
-	/* Default to primary */
-	info->counter_set = KBASE_HWCNT_SET_PRIMARY;
+		/* Default to primary */
+		.counter_set = KBASE_HWCNT_SET_PRIMARY,
 #endif
-
-	info->backend = NULL;
-	info->csf_if = csf_if;
-	info->ring_buf_cnt = ring_buf_cnt;
-	info->fw_in_protected_mode = false;
-	info->unrecoverable_error_happened = false;
-
+		.backend = NULL, .csf_if = csf_if, .ring_buf_cnt = ring_buf_cnt,
+		.fw_in_protected_mode = false,
+		.unrecoverable_error_happened = false,
+		.watchdog_if = watchdog_if,
+	};
 	*out_info = info;

 	return 0;
@@ -1653,6 +1806,14 @@ void kbase_hwcnt_backend_csf_on_prfcnt_sample(
 		return;
 	backend_csf = csf_info->backend;

+	/* Skip the dump_work if it's a watchdog request. */
+	if (backend_csf->dump_state ==
+	    KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED) {
+		backend_csf->dump_state =
+			KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED;
+		return;
+	}
+
 	/* If the current state is not REQUESTED, this HWC sample will be
 	 * skipped and processed in next dump_request.
 	 */
@@ -1831,14 +1992,15 @@ void kbase_hwcnt_backend_csf_metadata_term(
 	}
 }

-int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if,
-				   u32 ring_buf_cnt,
-				   struct kbase_hwcnt_backend_interface *iface)
+int kbase_hwcnt_backend_csf_create(
+	struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt,
+	struct kbase_hwcnt_watchdog_interface *watchdog_if,
+	struct kbase_hwcnt_backend_interface *iface)
 {
 	int errcode;
 	const struct kbase_hwcnt_backend_csf_info *info = NULL;

-	if (!iface || !csf_if)
+	if (!iface || !csf_if || !watchdog_if)
 		return -EINVAL;

 	/* The buffer count must be power of 2 */
@@ -1846,7 +2008,7 @@ int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if,
 		return -EINVAL;

 	errcode = kbasep_hwcnt_backend_csf_info_create(csf_if, ring_buf_cnt,
-						       &info);
+						       watchdog_if, &info);
 	if (errcode)
 		return errcode;

--- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf.h
@@ -29,6 +29,7 @@

 #include "mali_kbase_hwcnt_backend.h"
 #include "mali_kbase_hwcnt_backend_csf_if.h"
+#include "mali_kbase_hwcnt_watchdog_if.h"

 /**
 * kbase_hwcnt_backend_csf_create() - Create a CSF hardware counter backend
@@ -37,6 +38,8 @@
 *                used to create backend interface.
 * @ring_buf_cnt: The buffer count of CSF hwcnt backend, used when allocate ring
 *                buffer, MUST be power of 2.
+ * @watchdog_if:  Non-NULL pointer to a hwcnt watchdog interface structure used
+ *                to create backend interface.
 * @iface:        Non-NULL pointer to backend interface structure that is filled
 *                in on creation success.
 *
@@ -44,9 +47,10 @@
 *
 * Return: 0 on success, else error code.
 */
-int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if,
-				   u32 ring_buf_cnt,
-				   struct kbase_hwcnt_backend_interface *iface);
+int kbase_hwcnt_backend_csf_create(
+	struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt,
+	struct kbase_hwcnt_watchdog_interface *watchdog_if,
+	struct kbase_hwcnt_backend_interface *iface);

 /**
 * kbase_hwcnt_backend_csf_metadata_init() - Initialize the metadata for a CSF
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf_if_fw.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf_if_fw.c
@@ -38,6 +38,9 @@
 #include <linux/log2.h>
 #include "mali_kbase_ccswe.h"

+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+#include <backend/gpu/mali_kbase_model_dummy.h>
+#endif /* CONFIG_MALI_BIFROST_NO_MALI */

 /** The number of nanoseconds in a second. */
 #define NSECS_IN_SEC 1000000000ull /* ns */
@@ -217,6 +220,26 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
 	struct kbase_hwcnt_backend_csf_if_ctx *ctx,
 	struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info)
 {
+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+	size_t dummy_model_blk_count;
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+
+	prfcnt_info->l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS;
+	prfcnt_info->core_mask =
+		(1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1;
+	/* 1 FE block + 1 Tiler block + l2_count blocks + shader_core blocks */
+	dummy_model_blk_count =
+		2 + prfcnt_info->l2_count + fls64(prfcnt_info->core_mask);
+	prfcnt_info->dump_bytes =
+		dummy_model_blk_count * KBASE_DUMMY_MODEL_BLOCK_SIZE;
+	prfcnt_info->prfcnt_block_size =
+		KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK *
+		KBASE_HWCNT_VALUE_HW_BYTES;
+	prfcnt_info->clk_cnt = 1;
+	prfcnt_info->clearing_samples = true;
+	fw_ctx->buf_bytes = prfcnt_info->dump_bytes;
+#else
 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
 	struct kbase_device *kbdev;
 	u32 prfcnt_size;
@@ -261,6 +284,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
 	/* Total size must be multiple of block size. */
 	WARN_ON((prfcnt_info->dump_bytes % prfcnt_info->prfcnt_block_size) !=
 		0);
+#endif
 }

 static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
@@ -355,6 +379,11 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
 	*out_ring_buf =
 		(struct kbase_hwcnt_backend_csf_if_ring_buf *)fw_ring_buf;

+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+	/* The dummy model needs the CPU mapping. */
+	gpu_model_set_dummy_prfcnt_base_cpu(fw_ring_buf->cpu_dump_base, kbdev,
+					    phys, num_pages);
+#endif /* CONFIG_MALI_BIFROST_NO_MALI */

 	return 0;

--- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_jm.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_jm.c
@@ -28,6 +28,9 @@
 #include "mali_kbase_hwaccess_time.h"
 #include "mali_kbase_ccswe.h"

+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+#include "backend/gpu/mali_kbase_model_dummy.h"
+#endif /* CONFIG_MALI_BIFROST_NO_MALI */
 #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"

 #include "backend/gpu/mali_kbase_pm_internal.h"
@@ -140,6 +143,11 @@ kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev,
 	if (!kbdev || !info)
 		return -EINVAL;

+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+	info->l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS;
+	info->core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1;
+	info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK;
+#else /* CONFIG_MALI_BIFROST_NO_MALI */
 	{
 		const struct base_gpu_props *props = &kbdev->gpu_props.props;
 		const size_t l2_count = props->l2_props.num_l2_slices;
@@ -151,6 +159,7 @@ kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev,
 		info->prfcnt_values_per_block =
 			KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK;
 	}
+#endif /* CONFIG_MALI_BIFROST_NO_MALI */

 	/* Determine the number of available clock domains. */
 	for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) {
@@ -569,6 +578,11 @@ static int kbasep_hwcnt_backend_jm_dump_get(
 	struct kbase_hwcnt_backend_jm *backend_jm =
 		(struct kbase_hwcnt_backend_jm *)backend;
 	size_t clk;
+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+	struct kbase_device *kbdev;
+	unsigned long flags;
+	int errcode;
+#endif /* CONFIG_MALI_BIFROST_NO_MALI */

 	if (!backend_jm || !dst || !dst_enable_map ||
 	    (backend_jm->info->metadata != dst->metadata) ||
@@ -582,15 +596,32 @@ static int kbasep_hwcnt_backend_jm_dump_get(
 	/* Dump sample to the internal 64-bit user buffer. */
 	kbasep_hwcnt_backend_jm_dump_sample(backend_jm);

+	/* Extract elapsed cycle count for each clock domain if enabled. */
 	kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) {
 		if (!kbase_hwcnt_clk_enable_map_enabled(
 			dst_enable_map->clk_enable_map, clk))
 			continue;

-		/* Extract elapsed cycle count for each clock domain. */
-		dst->clk_cnt_buf[clk] = backend_jm->cycle_count_elapsed[clk];
+		/* Reset the counter to zero if accumulation is off. */
+		if (!accumulate)
+			dst->clk_cnt_buf[clk] = 0;
+		dst->clk_cnt_buf[clk] += backend_jm->cycle_count_elapsed[clk];
 	}

+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+	kbdev = backend_jm->kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Update the current configuration information. */
+	errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev,
+		&backend_jm->curr_config);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (errcode)
+		return errcode;
+#endif /* CONFIG_MALI_BIFROST_NO_MALI */
 	return kbase_hwcnt_jm_dump_get(dst, backend_jm->to_user_buf,
 				       dst_enable_map, backend_jm->pm_core_mask,
 				       &backend_jm->curr_config, accumulate);
@@ -700,6 +731,9 @@ static int kbasep_hwcnt_backend_jm_create(
 	int errcode;
 	struct kbase_device *kbdev;
 	struct kbase_hwcnt_backend_jm *backend = NULL;
+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+	size_t page_count;
+#endif

 	WARN_ON(!info);
 	WARN_ON(!out_backend);
@@ -739,6 +773,13 @@ static int kbasep_hwcnt_backend_jm_create(
 	kbase_ccswe_init(&backend->ccswe_shader_cores);
 	backend->rate_listener.notify = kbasep_hwcnt_backend_jm_on_freq_change;

+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+	/* The dummy model needs the CPU mapping. */
+	page_count = PFN_UP(info->dump_bytes);
+	gpu_model_set_dummy_prfcnt_base_cpu(backend->cpu_dump_va, kbdev,
+					    backend->vmap->cpu_pages,
+					    page_count);
+#endif /* CONFIG_MALI_BIFROST_NO_MALI */

 	*out_backend = backend;
 	return 0;
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_legacy.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_legacy.c
@@ -1,179 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
-/*
- *
- * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-#include "mali_kbase_hwcnt_legacy.h"
-#include "mali_kbase_hwcnt_virtualizer.h"
-#include "mali_kbase_hwcnt_types.h"
-#include "mali_kbase_hwcnt_gpu.h"
-#include "mali_kbase_hwcnt_gpu_narrow.h"
-#include <uapi/gpu/arm/bifrost/mali_kbase_ioctl.h>
-
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-
-/**
- * struct kbase_hwcnt_legacy_client - Legacy hardware counter client.
- * @user_dump_buf: Pointer to a non-NULL user buffer, where dumps are returned.
- * @enable_map:    Counter enable map.
- * @dump_buf:      Dump buffer used to manipulate dumps from virtualizer.
- * @hvcli:         Hardware counter virtualizer client.
- * @dump_buf_user: Narrow dump buffer used to manipulate dumps before they are
- *                 copied to user.
- * @metadata_user: For compatibility with the user driver interface, this
- *                 contains a narrowed version of the hardware counter metadata
- *                 which is limited to 64 entries per block and 32-bit for each
- *                 entry.
- */
-struct kbase_hwcnt_legacy_client {
-	void __user *user_dump_buf;
-	struct kbase_hwcnt_enable_map enable_map;
-	struct kbase_hwcnt_dump_buffer dump_buf;
-	struct kbase_hwcnt_virtualizer_client *hvcli;
-	struct kbase_hwcnt_dump_buffer_narrow dump_buf_user;
-	const struct kbase_hwcnt_metadata_narrow *metadata_user;
-};
-
-int kbase_hwcnt_legacy_client_create(
-	struct kbase_hwcnt_virtualizer *hvirt,
-	struct kbase_ioctl_hwcnt_enable *enable,
-	struct kbase_hwcnt_legacy_client **out_hlcli)
-{
-	int errcode;
-	struct kbase_hwcnt_legacy_client *hlcli;
-	const struct kbase_hwcnt_metadata *metadata;
-	struct kbase_hwcnt_physical_enable_map phys_em;
-
-	if (!hvirt || !enable || !enable->dump_buffer || !out_hlcli)
-		return -EINVAL;
-
-	metadata = kbase_hwcnt_virtualizer_metadata(hvirt);
-
-	hlcli = kzalloc(sizeof(*hlcli), GFP_KERNEL);
-	if (!hlcli)
-		return -ENOMEM;
-
-	errcode = kbase_hwcnt_gpu_metadata_narrow_create(&hlcli->metadata_user,
-							 metadata);
-	if (errcode)
-		goto error;
-
-	errcode = kbase_hwcnt_dump_buffer_narrow_alloc(hlcli->metadata_user,
-						       &hlcli->dump_buf_user);
-	if (errcode)
-		goto error;
-
-	hlcli->user_dump_buf = (void __user *)(uintptr_t)enable->dump_buffer;
-
-	errcode = kbase_hwcnt_enable_map_alloc(metadata, &hlcli->enable_map);
-	if (errcode)
-		goto error;
-
-	/* Translate from the ioctl enable map to the internal one */
-	phys_em.fe_bm = enable->fe_bm;
-	phys_em.shader_bm = enable->shader_bm;
-	phys_em.tiler_bm = enable->tiler_bm;
-	phys_em.mmu_l2_bm = enable->mmu_l2_bm;
-	kbase_hwcnt_gpu_enable_map_from_physical(&hlcli->enable_map, &phys_em);
-
-	errcode = kbase_hwcnt_dump_buffer_alloc(metadata, &hlcli->dump_buf);
-	if (errcode)
-		goto error;
-
-	errcode = kbase_hwcnt_virtualizer_client_create(
-		hvirt, &hlcli->enable_map, &hlcli->hvcli);
-	if (errcode)
-		goto error;
-
-	*out_hlcli = hlcli;
-	return 0;
-
-error:
-	kbase_hwcnt_legacy_client_destroy(hlcli);
-	return errcode;
-}
-
-void kbase_hwcnt_legacy_client_destroy(struct kbase_hwcnt_legacy_client *hlcli)
-{
-	if (!hlcli)
-		return;
-
-	kbase_hwcnt_virtualizer_client_destroy(hlcli->hvcli);
-	kbase_hwcnt_dump_buffer_free(&hlcli->dump_buf);
-	kbase_hwcnt_enable_map_free(&hlcli->enable_map);
-	kbase_hwcnt_dump_buffer_narrow_free(&hlcli->dump_buf_user);
-	kbase_hwcnt_gpu_metadata_narrow_destroy(hlcli->metadata_user);
-	kfree(hlcli);
-}
-
-int kbase_hwcnt_legacy_client_dump(struct kbase_hwcnt_legacy_client *hlcli)
-{
-	int errcode;
-	u64 ts_start_ns;
-	u64 ts_end_ns;
-
-	if (!hlcli)
-		return -EINVAL;
-
-	/* Dump into the kernel buffer */
-	errcode = kbase_hwcnt_virtualizer_client_dump(hlcli->hvcli,
-		&ts_start_ns, &ts_end_ns, &hlcli->dump_buf);
-	if (errcode)
-		return errcode;
-
-	/* Patch the dump buf headers, to hide the counters that other hwcnt
-	 * clients are using.
-	 */
-	kbase_hwcnt_gpu_patch_dump_headers(
-		&hlcli->dump_buf, &hlcli->enable_map);
-
-	/* Copy the dump buffer to the userspace visible buffer. The strict
-	 * variant will explicitly zero any non-enabled counters to ensure
-	 * nothing except exactly what the user asked for is made visible.
-	 *
-	 * A narrow copy is required since virtualizer has a bigger buffer
-	 * but user only needs part of it.
-	 */
-	kbase_hwcnt_dump_buffer_copy_strict_narrow(
-		&hlcli->dump_buf_user, &hlcli->dump_buf, &hlcli->enable_map);
-
-	/* Copy into the user's buffer */
-	errcode = copy_to_user(hlcli->user_dump_buf,
-			       hlcli->dump_buf_user.dump_buf,
-			       hlcli->dump_buf_user.md_narrow->dump_buf_bytes);
-	/* Non-zero errcode implies user buf was invalid or too small */
-	if (errcode)
-		return -EFAULT;
-
-	return 0;
-}
-
-int kbase_hwcnt_legacy_client_clear(struct kbase_hwcnt_legacy_client *hlcli)
-{
-	u64 ts_start_ns;
-	u64 ts_end_ns;
-
-	if (!hlcli)
-		return -EINVAL;
-
-	/* Dump with a NULL buffer to clear this client's counters */
-	return kbase_hwcnt_virtualizer_client_dump(hlcli->hvcli,
-		&ts_start_ns, &ts_end_ns, NULL);
-}
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_legacy.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_legacy.h
@@ -1,93 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- *
- * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-/*
- * Legacy hardware counter interface, giving userspace clients simple,
- * synchronous access to hardware counters.
- *
- * Any functions operating on an single legacy hardware counter client instance
- * must be externally synchronised.
- * Different clients may safely be used concurrently.
- */
-
-#ifndef _KBASE_HWCNT_LEGACY_H_
-#define _KBASE_HWCNT_LEGACY_H_
-
-struct kbase_hwcnt_legacy_client;
-struct kbase_ioctl_hwcnt_enable;
-struct kbase_hwcnt_virtualizer;
-
-/**
- * kbase_hwcnt_legacy_client_create() - Create a legacy hardware counter client.
- * @hvirt:     Non-NULL pointer to hardware counter virtualizer the client
- *             should be attached to.
- * @enable:    Non-NULL pointer to hwcnt_enable structure, containing a valid
- *             pointer to a user dump buffer large enough to hold a dump, and
- *             the counters that should be enabled.
- * @out_hlcli: Non-NULL pointer to where the pointer to the created client will
- *             be stored on success.
- *
- * Return: 0 on success, else error code.
- */
-int kbase_hwcnt_legacy_client_create(
-	struct kbase_hwcnt_virtualizer *hvirt,
-	struct kbase_ioctl_hwcnt_enable *enable,
-	struct kbase_hwcnt_legacy_client **out_hlcli);
-
-/**
- * kbase_hwcnt_legacy_client_destroy() - Destroy a legacy hardware counter
- *                                       client.
- * @hlcli: Pointer to the legacy hardware counter client.
- *
- * Will safely destroy a client in any partial state of construction.
- */
-void kbase_hwcnt_legacy_client_destroy(struct kbase_hwcnt_legacy_client *hlcli);
-
-/**
- * kbase_hwcnt_legacy_client_dump() - Perform a hardware counter dump into the
- *                                    client's user buffer.
- * @hlcli: Non-NULL pointer to the legacy hardware counter client.
- *
- * This function will synchronously dump hardware counters into the user buffer
- * specified on client creation, with the counters specified on client creation.
- *
- * The counters are automatically cleared after each dump, such that the next
- * dump performed will return the counter values accumulated between the time of
- * this function call and the next dump.
- *
- * Return: 0 on success, else error code.
- */
-int kbase_hwcnt_legacy_client_dump(struct kbase_hwcnt_legacy_client *hlcli);
-
-/**
- * kbase_hwcnt_legacy_client_clear() - Perform and discard a hardware counter
- *                                     dump.
- * @hlcli: Non-NULL pointer to the legacy hardware counter client.
- *
- * This function will synchronously clear the hardware counters, such that the
- * next dump performed will return the counter values accumulated between the
- * time of this function call and the next dump.
- *
- * Return: 0 on success, else error code.
- */
-int kbase_hwcnt_legacy_client_clear(struct kbase_hwcnt_legacy_client *hlcli);
-
-#endif /* _KBASE_HWCNT_LEGACY_H_ */
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_watchdog_if.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_watchdog_if.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Virtual interface for hardware counter watchdog.
+ */
+
+#ifndef _KBASE_HWCNT_WATCHDOG_IF_H_
+#define _KBASE_HWCNT_WATCHDOG_IF_H_
+
+#include <linux/types.h>
+
+/*
+ * Opaque structure of information used to create a watchdog timer interface.
+ */
+struct kbase_hwcnt_watchdog_info;
+
+/**
+ * typedef kbase_hwcnt_watchdog_callback_fn - Callback function when watchdog timer is done
+ *
+ * @user_data: Pointer to the callback user data.
+ */
+typedef void kbase_hwcnt_watchdog_callback_fn(void *user_data);
+
+/**
+ * typedef kbase_hwcnt_watchdog_enable_fn - Enable watchdog timer
+ *
+ * @timer:     Non-NULL pointer to a watchdog timer interface context
+ * @period_ms: Period in milliseconds of the watchdog timer
+ * @callback:  Non-NULL pointer to a watchdog callback function
+ * @user_data: Pointer to the user data, used when watchdog timer callback is called
+ *
+ * Return: 0 if the watchdog timer enabled successfully, error code otherwise.
+ */
+typedef int kbase_hwcnt_watchdog_enable_fn(
+	const struct kbase_hwcnt_watchdog_info *timer, u32 period_ms,
+	kbase_hwcnt_watchdog_callback_fn *callback, void *user_data);
+
+/**
+ * typedef kbase_hwcnt_watchdog_disable_fn - Disable watchdog timer
+ *
+ * @timer: Non-NULL pointer to a watchdog timer interface context
+ */
+typedef void
+kbase_hwcnt_watchdog_disable_fn(const struct kbase_hwcnt_watchdog_info *timer);
+
+/**
+ * typedef kbase_hwcnt_watchdog_modify_fn - Modify watchdog timer's timeout
+ *
+ * @timer:    Non-NULL pointer to a watchdog timer interface context
+ * @delay_ms: Watchdog timer expiration in milliseconds
+ */
+typedef void
+kbase_hwcnt_watchdog_modify_fn(const struct kbase_hwcnt_watchdog_info *timer,
+			       u32 delay_ms);
+
+/**
+ * struct kbase_hwcnt_watchdog_interface - Hardware counter watchdog virtual interface.
+ *
+ * @timer:   Immutable watchdog timer info
+ * @enable:  Function ptr to enable watchdog
+ * @disable: Function ptr to disable watchdog
+ * @modify:  Function ptr to modify watchdog
+ */
+struct kbase_hwcnt_watchdog_interface {
+	const struct kbase_hwcnt_watchdog_info *timer;
+	kbase_hwcnt_watchdog_enable_fn *enable;
+	kbase_hwcnt_watchdog_disable_fn *disable;
+	kbase_hwcnt_watchdog_modify_fn *modify;
+};
+
+#endif /* _KBASE_HWCNT_WATCHDOG_IF_H_ */
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_watchdog_if_timer.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_watchdog_if_timer.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "mali_kbase.h"
+#include "mali_kbase_hwcnt_watchdog_if.h"
+#include "mali_kbase_hwcnt_watchdog_if_timer.h"
+
+#include <linux/timer.h>
+#include <linux/slab.h>
+
+/**
+ * struct kbase_hwcnt_watchdog_if_timer_info - Timer information for watchdog
+ *                                             interface.
+ *
+ * @watchdog_timer: Watchdog timer
+ * @timer_enabled:  True if watchdog timer enabled, otherwise false
+ * @callback:       Watchdog callback function
+ * @user_data:      Pointer to user data passed as argument to the callback
+ *                  function
+ */
+struct kbase_hwcnt_watchdog_if_timer_info {
+	struct timer_list watchdog_timer;
+	bool timer_enabled;
+	kbase_hwcnt_watchdog_callback_fn *callback;
+	void *user_data;
+};
+
+/**
+ * kbasep_hwcnt_watchdog_callback() - Watchdog timer callback
+ *
+ * @timer: Timer structure
+ *
+ * Function to be called when watchdog timer expires. Will call the callback
+ * function provided at enable().
+ */
+static void kbasep_hwcnt_watchdog_callback(struct timer_list *const timer)
+{
+	struct kbase_hwcnt_watchdog_if_timer_info *const info =
+		container_of(timer, struct kbase_hwcnt_watchdog_if_timer_info,
+			     watchdog_timer);
+	if (info->callback)
+		info->callback(info->user_data);
+}
+
+static int kbasep_hwcnt_watchdog_if_timer_enable(
+	const struct kbase_hwcnt_watchdog_info *const timer,
+	u32 const period_ms, kbase_hwcnt_watchdog_callback_fn *const callback,
+	void *const user_data)
+{
+	struct kbase_hwcnt_watchdog_if_timer_info *const timer_info =
+		(void *)timer;
+
+	if (WARN_ON(!timer) || WARN_ON(!callback))
+		return -EINVAL;
+
+	timer_info->callback = callback;
+	timer_info->user_data = user_data;
+
+	mod_timer(&timer_info->watchdog_timer,
+		  jiffies + msecs_to_jiffies(period_ms));
+	timer_info->timer_enabled = true;
+
+	return 0;
+}
+
+static void kbasep_hwcnt_watchdog_if_timer_disable(
+	const struct kbase_hwcnt_watchdog_info *const timer)
+{
+	struct kbase_hwcnt_watchdog_if_timer_info *const timer_info =
+		(void *)timer;
+
+	if (WARN_ON(!timer))
+		return;
+
+	if (!timer_info->timer_enabled)
+		return;
+
+	del_timer_sync(&timer_info->watchdog_timer);
+	timer_info->timer_enabled = false;
+}
+
+static void kbasep_hwcnt_watchdog_if_timer_modify(
+	const struct kbase_hwcnt_watchdog_info *const timer, u32 const delay_ms)
+{
+	struct kbase_hwcnt_watchdog_if_timer_info *const timer_info =
+		(void *)timer;
+
+	if (WARN_ON(!timer))
+		return;
+
+	mod_timer(&timer_info->watchdog_timer,
+		  jiffies + msecs_to_jiffies(delay_ms));
+}
+
+void kbase_hwcnt_watchdog_if_timer_destroy(
+	struct kbase_hwcnt_watchdog_interface *const watchdog_if)
+{
+	struct kbase_hwcnt_watchdog_if_timer_info *timer_info;
+
+	if (WARN_ON(!watchdog_if))
+		return;
+
+	timer_info = (void *)watchdog_if->timer;
+
+	if (WARN_ON(!timer_info))
+		return;
+
+	del_timer_sync(&timer_info->watchdog_timer);
+	kfree(timer_info);
+
+	memset(watchdog_if, 0, sizeof(*watchdog_if));
+}
+
+int kbase_hwcnt_watchdog_if_timer_create(
+	struct kbase_hwcnt_watchdog_interface *const watchdog_if)
+{
+	struct kbase_hwcnt_watchdog_if_timer_info *timer_info;
+
+	if (WARN_ON(!watchdog_if))
+		return -EINVAL;
+
+	timer_info = kmalloc(sizeof(*timer_info), GFP_KERNEL);
+	if (!timer_info)
+		return -ENOMEM;
+
+	*timer_info =
+		(struct kbase_hwcnt_watchdog_if_timer_info){ .timer_enabled =
+								     false };
+
+	kbase_timer_setup(&timer_info->watchdog_timer,
+			  kbasep_hwcnt_watchdog_callback);
+
+	*watchdog_if = (struct kbase_hwcnt_watchdog_interface){
+		.timer = (void *)timer_info,
+		.enable = kbasep_hwcnt_watchdog_if_timer_enable,
+		.disable = kbasep_hwcnt_watchdog_if_timer_disable,
+		.modify = kbasep_hwcnt_watchdog_if_timer_modify,
+	};
+
+	return 0;
+}
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_watchdog_if_timer.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_watchdog_if_timer.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Concrete implementation of kbase_hwcnt_watchdog_interface for HWC backend
+ */
+
+#ifndef _KBASE_HWCNT_WATCHDOG_IF_TIMER_H_
+#define _KBASE_HWCNT_WATCHDOG_IF_TIMER_H_
+
+struct kbase_hwcnt_watchdog_interface;
+
+/**
+ * kbase_hwcnt_watchdog_if_timer_create() - Create a watchdog interface of hardware counter backend.
+ *
+ * @watchdog_if: Non-NULL pointer to watchdog interface that is filled in on creation success
+ *
+ * Return: 0 on success, error otherwise.
+ */
+int kbase_hwcnt_watchdog_if_timer_create(
+	struct kbase_hwcnt_watchdog_interface *watchdog_if);
+
+/**
+ * kbase_hwcnt_watchdog_if_timer_destroy() - Destroy a watchdog interface of hardware counter
+ *                                           backend.
+ *
+ * @watchdog_if: Pointer to watchdog interface to destroy
+ */
+void kbase_hwcnt_watchdog_if_timer_destroy(
+	struct kbase_hwcnt_watchdog_interface *watchdog_if);
+
+#endif /* _KBASE_HWCNT_WATCHDOG_IF_TIMER_H_ */
--- a/drivers/gpu/arm/bifrost/mali_kbase_jd.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_jd.c
@@ -619,8 +619,8 @@ static void jd_update_jit_usage(struct kbase_jd_atom *katom)
 		else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP)
 			size_to_read = sizeof(u64[COUNT]);

-		ptr = kbase_vmap(kctx, reg->heap_info_gpu_addr, size_to_read,
-				&mapping);
+		ptr = kbase_vmap_prot(kctx, reg->heap_info_gpu_addr, size_to_read,
+				KBASE_REG_CPU_RD, &mapping);

 		if (!ptr) {
 			dev_warn(kctx->kbdev->dev,
--- a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.c
--- a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.h
@@ -26,6 +26,8 @@
 #ifndef _KBASE_KINSTR_PRFCNT_H_
 #define _KBASE_KINSTR_PRFCNT_H_

+#include <uapi/gpu/arm/bifrost/mali_kbase_hwcnt_reader.h>
+
 struct kbase_kinstr_prfcnt_context;
 struct kbase_hwcnt_virtualizer;
 struct kbase_ioctl_hwcnt_reader_setup;
@@ -76,6 +78,49 @@ void kbase_kinstr_prfcnt_suspend(struct kbase_kinstr_prfcnt_context *kinstr_ctx)
 */
 void kbase_kinstr_prfcnt_resume(struct kbase_kinstr_prfcnt_context *kinstr_ctx);

+#if MALI_KERNEL_TEST_API
+/**
+ * kbasep_kinstr_prfcnt_get_block_info_list() - Get list of all block types
+ *                                              with their information.
+ * @metadata:  Non-NULL pointer to the hardware counter metadata.
+ * @block_set: Which SET the blocks will represent.
+ * @item_arr:  Non-NULL pointer to array of enumeration items to populate.
+ * @arr_idx:   Non-NULL pointer to index of array @item_arr.
+ *
+ * Populate list of counter blocks with information for enumeration.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbasep_kinstr_prfcnt_get_block_info_list(const struct kbase_hwcnt_metadata *metadata,
+					     size_t block_set, struct prfcnt_enum_item *item_arr,
+					     size_t *arr_idx);
+
+/**
+ * kbasep_kinstr_prfcnt_get_sample_md_count() - Get count of sample
+ *                                              metadata items.
+ * @metadata: Non-NULL pointer to the hardware counter metadata.
+ *
+ * Return: Number of metadata items for available blocks in each sample.
+ */
+size_t kbasep_kinstr_prfcnt_get_sample_md_count(const struct kbase_hwcnt_metadata *metadata);
+
+/**
+ * kbasep_kinstr_prfcnt_set_block_meta_items() - Populate a sample's block meta
+ *                                               item array.
+ * @dst:             Non-NULL pointer to the sample's dump buffer object.
+ * @block_meta_base: Non-NULL double pointer to the start of the block meta
+ *                   data items.
+ * @base_addr:       Address of allocated pages for array of samples. Used
+ *                   to calculate offset of block values.
+ * @counter_set:     The SET which blocks represent.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_dump_buffer *dst,
+					      struct prfcnt_metadata **block_meta_base,
+					      u64 base_addr, u8 counter_set);
+#endif /* MALI_KERNEL_TEST_API */
+
 /**
 * kbase_kinstr_prfcnt_enum_info - Enumerate performance counter information.
 * @kinstr_ctx: Non-NULL pointer to the kinstr_prfcnt context.
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem.c
@@ -4468,8 +4468,8 @@ void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx,

 	addr_start = reg->heap_info_gpu_addr - jit_report_gpu_mem_offset;

-	ptr = kbase_vmap(kctx, addr_start, KBASE_JIT_REPORT_GPU_MEM_SIZE,
-			&mapping);
+	ptr = kbase_vmap_prot(kctx, addr_start, KBASE_JIT_REPORT_GPU_MEM_SIZE,
+			KBASE_REG_CPU_RD, &mapping);
 	if (!ptr) {
 		dev_warn(kctx->kbdev->dev,
 				"%s: JIT start=0x%llx unable to map memory near end pointer %llx\n",
--- a/Show More
+++ b/Show More