diff --git a/Documentation/ABI/testing/sysfs-device-mali b/Documentation/ABI/testing/sysfs-device-mali
index 1ec265c5add4..12a1667feeb2 100644
--- a/Documentation/ABI/testing/sysfs-device-mali
+++ b/Documentation/ABI/testing/sysfs-device-mali
@@ -341,8 +341,7 @@ Description:
                 device-driver that supports a CSF GPU.
 
 		Used to enable firmware logs, logging levels valid values
-		are indicated using 'min and 'max' attribute values
-		values that are read-only.
+		are indicated using 'min' and 'max' attributes, which are read-only.
 
 		Log level can be set using the 'cur' read, write attribute,
 		we can use a valid log level value from min and max range values
diff --git a/Documentation/ABI/testing/sysfs-device-mali-coresight-source b/Documentation/ABI/testing/sysfs-device-mali-coresight-source
index 0f31a6acaa87..58d9085b8bb6 100644
--- a/Documentation/ABI/testing/sysfs-device-mali-coresight-source
+++ b/Documentation/ABI/testing/sysfs-device-mali-coresight-source
@@ -19,7 +19,7 @@ Description:
 
 What:		/sys/bus/coresight/devices/mali-source-etm/is_enabled
 Description:
-        Attribute used to check if Coresight Source ITM is enabled.
+        Attribute used to check if Coresight Source ETM is enabled.
 
 What:		/sys/bus/coresight/devices/mali-source-etm/trcconfigr
 Description:
diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-mali-source.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-mali-source.yaml
new file mode 100644
index 000000000000..d844ad10932c
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm,coresight-mali-source.yaml
@@ -0,0 +1,163 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# (C) COPYRIGHT 2022-2024 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU license.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+#
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/arm,coresight-mali-source.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM CoreSight Mali Source integration
+
+maintainers:
+  - ARM Ltd.
+
+description: |
+  See Documentation/trace/coresight/coresight.rst for detailed information
+  about Coresight.
+
+  This documentation will cover Mali specific devicetree integration.
+
+  References to Sink ports are given as examples. Access to Sink is specific
+  to an implementation and would require dedicated kernel modules.
+
+  Arm Mali GPU are supporting 3 different sources: ITM, ETM, ELA
+
+  ELA source configuration via SysFS entries:
+
+    The register values used by CoreSight for ELA can be configured using SysFS
+    interfaces. This implicitly includes configuring the ELA for independent or
+    shared JCN request and response channels.
+
+properties:
+  compatible:
+    enum:
+      - arm,coresight-mali-source-itm
+      - arm,coresight-mali-source-etm
+      - arm,coresight-mali-source-ela
+
+  gpu:
+    minItems: 1
+    maxItems: 1
+    description:
+      Phandle to a Mali GPU definition
+
+  port:
+    description:
+      Output connection to CoreSight Sink Trace bus.
+
+      Legacy binding between Coresight Sources and CoreSight Sink.
+      For Linux kernel < v4.20.
+    $ref: /schemas/graph.yaml#/properties/port
+
+  out-ports:
+    description:
+      Binding between Coresight Sources and CoreSight Sink.
+      For Linux kernel >= v4.20.
+    $ref: /schemas/graph.yaml#/properties/ports
+
+    properties:
+      port:
+        description: Output connection to CoreSight Sink Trace bus.
+        $ref: /schemas/graph.yaml#/properties/port
+
+required:
+  - compatible
+  - gpu
+  - port
+  - out-ports
+
+additionalProperties: false
+
+examples:
+
+# A Sink node without legacy CoreSight connections
+  - |
+    mali-source-itm {
+        compatible = "arm,coresight-mali-source-itm";
+        gpu = <&gpu>;
+
+        out-ports {
+            port {
+                mali_source_itm_out_port0: endpoint {
+                    remote-endpoint = <&mali_sink_in_port0>;
+                };
+            };
+        };
+    };
+
+    mali-source-ela {
+        compatible = "arm,coresight-mali-source-ela";
+        gpu = <&gpu>;
+
+        out-ports {
+            port {
+                mali_source_ela_out_port0: endpoint {
+                    remote-endpoint = <&mali_sink_in_port1>;
+                };
+            };
+        };
+    };
+
+    mali-source-etm {
+        compatible = "arm,coresight-mali-source-etm";
+        gpu = <&gpu>;
+
+        out-ports {
+            port {
+                mali_source_etm_out_port0: endpoint {
+                    remote-endpoint = <&mali_sink_in_port2>;
+                };
+            };
+        };
+    };
+
+# A Sink node with legacy CoreSight connections
+  - |
+    mali-source-itm {
+        compatible = "arm,coresight-mali-source-itm";
+        gpu = <&gpu>;
+
+        port {
+            mali_source_itm_out_port0: endpoint {
+                remote-endpoint = <&mali_sink_in_port0>;
+            };
+        };
+    };
+
+    mali-source-etm {
+        compatible = "arm,coresight-mali-source-etm";
+        gpu = <&gpu>;
+
+        port {
+            mali_source_etm_out_port0: endpoint {
+                remote-endpoint = <&mali_sink_in_port1>;
+            };
+        };
+    };
+
+    mali-source-ela {
+        compatible = "arm,coresight-mali-source-ela";
+        gpu = <&gpu>;
+
+        port {
+            mali_source_ela_out_port0: endpoint {
+                remote-endpoint = <&mali_sink_in_port2>;
+            };
+        };
+    };
diff --git a/Documentation/devicetree/bindings/arm/mali-bifrost.txt b/Documentation/devicetree/bindings/arm/mali-bifrost.txt
index 85672c6c6258..8ada052ebe56 100644
--- a/Documentation/devicetree/bindings/arm/mali-bifrost.txt
+++ b/Documentation/devicetree/bindings/arm/mali-bifrost.txt
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 #
-# (C) COPYRIGHT 2013-2023 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2013-2024 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -111,7 +111,10 @@ for details.
 -  idvs-group-size : Override the IDVS group size value. Tasks are sent to
 		     cores in groups of N + 1, so i.e. 0xF means 16 tasks.
 		     Valid values are between 0 to 0x3F (including).
--  l2-size : Override L2 cache size on GPU that supports it
+-  l2-size : Override L2 cache size on GPU that supports it. Value should be larger than the minimum
+             size 1KiB and smaller than the maximum size. Maximum size is Hardware integration dependent.
+             The value passed should be of log2(Cache Size in Bytes).
+             For example for a 1KiB of cache size, 0xa should be passed.
 -  l2-hash : Override L2 hash function on GPU that supports it
 -  l2-hash-values : Override L2 hash function using provided hash values, on GPUs that supports it.
 		    It is mutually exclusive with 'l2-hash'. Only one or the other must be
@@ -129,6 +132,10 @@ for details.
 		   set and the setting coresponding to the SYSC_ALLOC register.
 - propagate-bits: Used to write to L2_CONFIG.PBHA_HWU. This bitset establishes which
 		   PBHA bits are propagated on the AXI bus.
+- mma-wa-id: Sets the PBHA ID to be used for the PBHA override based MMA violation workaround.
+	     The read and write allocation override bits for the PBHA are set to NONCACHEABLE
+	     and the driver encodes the PBHA ID in the PTEs where this workaround is to be applied.
+	     Valid values are from 1 to 15.
 
 
 Example for a Mali GPU with 1 clock and 1 regulator:
@@ -237,7 +244,8 @@ gpu@0xfc010000 {
     ...
     pbha {
         int-id-override = <2 0x32>, <9 0x05>, <16 0x32>;
-        propagate-bits = /bits/ 4 <0x03>;
+        propagate-bits = /bits/ 8 <0x03>;
+        mma-wa-id = <2>;
     };
     ...
 };
diff --git a/drivers/base/arm/Kconfig b/drivers/base/arm/Kconfig
index e8bb8a40d2c5..c24a377723ca 100644
--- a/drivers/base/arm/Kconfig
+++ b/drivers/base/arm/Kconfig
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 #
-# (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
diff --git a/drivers/base/arm/Makefile b/drivers/base/arm/Makefile
index 4aa68f89d3d9..42071f769729 100644
--- a/drivers/base/arm/Makefile
+++ b/drivers/base/arm/Makefile
@@ -125,6 +125,8 @@ CFLAGS_MODULE += -Wno-sign-compare
 CFLAGS_MODULE += -Wno-shift-negative-value
 # This flag is needed to avoid build errors on older kernels
 CFLAGS_MODULE += $(call cc-option, -Wno-cast-function-type)
+# The following ensures the stack frame does not get larger than a page
+CFLAGS_MODULE += -Wframe-larger-than=4096
 
 KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1
 
diff --git a/drivers/base/arm/memory_group_manager/memory_group_manager.c b/drivers/base/arm/memory_group_manager/memory_group_manager.c
index 389b0f051f3a..da4a0c39e63a 100644
--- a/drivers/base/arm/memory_group_manager/memory_group_manager.c
+++ b/drivers/base/arm/memory_group_manager/memory_group_manager.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -51,10 +51,6 @@ static inline vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigne
 }
 #endif
 
-#define PTE_PBHA_SHIFT (59)
-#define PTE_PBHA_MASK ((uint64_t)0xf << PTE_PBHA_SHIFT)
-#define PTE_RES_BIT_MULTI_AS_SHIFT (63)
-
 #define IMPORTED_MEMORY_ID (MEMORY_GROUP_MANAGER_NR_GROUPS - 1)
 
 /**
@@ -263,7 +259,7 @@ static struct page *example_mgm_alloc_page(struct memory_group_manager_device *m
 	} else {
 		struct mgm_groups *data = mgm_dev->data;
 
-		dev_err(data->dev, "alloc_pages failed\n");
+		dev_dbg(data->dev, "alloc_pages failed\n");
 	}
 
 	return p;
@@ -303,7 +299,8 @@ static int example_mgm_get_import_memory_id(struct memory_group_manager_device *
 }
 
 static u64 example_mgm_update_gpu_pte(struct memory_group_manager_device *const mgm_dev,
-				      unsigned int const group_id, int const mmu_level, u64 pte)
+				      unsigned int const group_id, unsigned int const pbha_id,
+				      unsigned int pte_flags, int const mmu_level, u64 pte)
 {
 	struct mgm_groups *const data = mgm_dev->data;
 
@@ -313,7 +310,10 @@ static u64 example_mgm_update_gpu_pte(struct memory_group_manager_device *const
 	if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS))
 		return pte;
 
-	pte |= ((u64)group_id << PTE_PBHA_SHIFT) & PTE_PBHA_MASK;
+	if (pte_flags & BIT(MMA_VIOLATION)) {
+		pr_warn_once("MMA violation! Applying PBHA override workaround to PTE\n");
+		pte |= ((u64)pbha_id << PTE_PBHA_SHIFT) & PTE_PBHA_MASK;
+	}
 
 	/* Address could be translated into a different bus address here */
 	pte |= ((u64)1 << PTE_RES_BIT_MULTI_AS_SHIFT);
@@ -366,6 +366,16 @@ static vm_fault_t example_mgm_vmf_insert_pfn_prot(struct memory_group_manager_de
 	return fault;
 }
 
+static bool example_mgm_get_import_memory_cached_access_permitted(
+	struct memory_group_manager_device *mgm_dev,
+	struct memory_group_manager_import_data *import_data)
+{
+	CSTD_UNUSED(mgm_dev);
+	CSTD_UNUSED(import_data);
+
+	return true;
+}
+
 static int mgm_initialize_data(struct mgm_groups *mgm_data)
 {
 	int i;
@@ -412,6 +422,8 @@ static int memory_group_manager_probe(struct platform_device *pdev)
 	mgm_dev->ops.mgm_vmf_insert_pfn_prot = example_mgm_vmf_insert_pfn_prot;
 	mgm_dev->ops.mgm_update_gpu_pte = example_mgm_update_gpu_pte;
 	mgm_dev->ops.mgm_pte_to_original_pte = example_mgm_pte_to_original_pte;
+	mgm_dev->ops.mgm_get_import_memory_cached_access_permitted =
+		example_mgm_get_import_memory_cached_access_permitted;
 
 	mgm_data = kzalloc(sizeof(*mgm_data), GFP_KERNEL);
 	if (!mgm_data) {
diff --git a/drivers/gpu/arm/bifrost/Kbuild b/drivers/gpu/arm/bifrost/Kbuild
index b35fcee88baa..d64c439fbabc 100644
--- a/drivers/gpu/arm/bifrost/Kbuild
+++ b/drivers/gpu/arm/bifrost/Kbuild
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 #
-# (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2012-2024 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -69,7 +69,7 @@ endif
 #
 
 # Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= '"g22p0-01eac0"'
+MALI_RELEASE_NAME ?= '"g25p0-00eac0"'
 # Set up defaults if not defined by build system
 ifeq ($(CONFIG_MALI_BIFROST_DEBUG), y)
     MALI_UNIT_TEST = 1
@@ -104,7 +104,6 @@ endif
 #
 # Experimental features must default to disabled, e.g.:
 # MALI_EXPERIMENTAL_FEATURE ?= 0
-MALI_INCREMENTAL_RENDERING_JM ?= 0
 
 #
 # ccflags
@@ -117,7 +116,6 @@ ccflags-y = \
     -DMALI_COVERAGE=$(MALI_COVERAGE) \
     -DMALI_RELEASE_NAME=$(MALI_RELEASE_NAME) \
     -DMALI_JIT_PRESSURE_LIMIT_BASE=$(MALI_JIT_PRESSURE_LIMIT_BASE) \
-    -DMALI_INCREMENTAL_RENDERING_JM=$(MALI_INCREMENTAL_RENDERING_JM) \
     -DMALI_PLATFORM_DIR=$(MALI_PLATFORM_DIR)
 
 
@@ -212,6 +210,7 @@ endif
 
 
 INCLUDE_SUBDIR = \
+    $(src)/arbiter/Kbuild \
     $(src)/context/Kbuild \
     $(src)/debug/Kbuild \
     $(src)/device/Kbuild \
@@ -228,9 +227,6 @@ ifeq ($(CONFIG_MALI_CSF_SUPPORT),y)
     INCLUDE_SUBDIR += $(src)/csf/Kbuild
 endif
 
-ifeq ($(CONFIG_MALI_ARBITER_SUPPORT),y)
-    INCLUDE_SUBDIR += $(src)/arbiter/Kbuild
-endif
 
 ifeq ($(CONFIG_MALI_BIFROST_DEVFREQ),y)
     ifeq ($(CONFIG_DEVFREQ_THERMAL),y)
diff --git a/drivers/gpu/arm/bifrost/Kconfig b/drivers/gpu/arm/bifrost/Kconfig
index 22fdfe80405a..b8ceff10e250 100644
--- a/drivers/gpu/arm/bifrost/Kconfig
+++ b/drivers/gpu/arm/bifrost/Kconfig
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 #
-# (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2012-2024 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -63,6 +63,8 @@ config MALI_BIFROST_NO_MALI
 	  All calls to the simulated hardware will complete immediately as if the hardware
 	  completed the task.
 
+endchoice
+
 config MALI_NO_MALI_DEFAULT_GPU
 	string "Default GPU for No Mali"
 	depends on MALI_BIFROST_NO_MALI
@@ -70,8 +72,12 @@ config MALI_NO_MALI_DEFAULT_GPU
 	help
 	  This option sets the default GPU to identify as for No Mali builds.
 
-
-endchoice
+config MALI_IS_FPGA
+	bool "Enable build of Mali kernel driver for FPGA"
+	depends on MALI_BIFROST
+	default n
+	help
+	  This is the default HW backend.
 
 menu "Platform specific options"
 source "$(MALI_KCONFIG_EXT_PREFIX)drivers/gpu/arm/bifrost/platform/Kconfig"
@@ -214,16 +220,6 @@ config MALI_CORESTACK
 
 	  If unsure, say N.
 
-comment "Platform options"
-	depends on MALI_BIFROST && MALI_BIFROST_EXPERT
-
-config MALI_BIFROST_ERROR_INJECT
-	bool "Enable No Mali error injection"
-	depends on MALI_BIFROST && MALI_BIFROST_EXPERT && MALI_BIFROST_NO_MALI
-	default n
-	help
-	  Enables insertion of errors to test module failure and recovery mechanisms.
-
 comment "Debug options"
 	depends on MALI_BIFROST && MALI_BIFROST_EXPERT
 
@@ -304,7 +300,7 @@ endchoice
 
 config MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS
 	bool "Enable runtime selection of performance counters set via debugfs"
-	depends on MALI_BIFROST && MALI_BIFROST_EXPERT && DEBUG_FS
+	depends on MALI_BIFROST && MALI_BIFROST_EXPERT && DEBUG_FS && !MALI_CSF_SUPPORT
 	default n
 	help
 	  Select this option to make the secondary set of performance counters
@@ -351,7 +347,7 @@ config MALI_PWRSOFT_765
 	  changes have been backported say Y to avoid compilation errors.
 
 config MALI_HW_ERRATA_1485982_NOT_AFFECTED
-	bool "Disable workaround for BASE_HW_ISSUE_GPU2017_1336"
+	bool "Disable workaround for KBASE_HW_ISSUE_GPU2017_1336"
 	depends on MALI_BIFROST && MALI_BIFROST_EXPERT
 	default n
 	help
@@ -363,7 +359,7 @@ config MALI_HW_ERRATA_1485982_NOT_AFFECTED
 	  coherency mode requires the L2 to be turned off.
 
 config MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE
-	bool "Use alternative workaround for BASE_HW_ISSUE_GPU2017_1336"
+	bool "Use alternative workaround for KBASE_HW_ISSUE_GPU2017_1336"
 	depends on MALI_BIFROST && MALI_BIFROST_EXPERT && !MALI_HW_ERRATA_1485982_NOT_AFFECTED
 	default n
 	help
diff --git a/drivers/gpu/arm/bifrost/Makefile b/drivers/gpu/arm/bifrost/Makefile
index 69dbe3750a10..e10033aabc57 100644
--- a/drivers/gpu/arm/bifrost/Makefile
+++ b/drivers/gpu/arm/bifrost/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 #
-# (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -41,11 +41,12 @@ ifeq ($(MALI_KCONFIG_EXT_PREFIX),)
         CONFIG_MALI_BIFROST_GATOR_SUPPORT ?= y
         CONFIG_MALI_ARBITRATION ?= n
         CONFIG_MALI_PARTITION_MANAGER ?= n
-        CONFIG_MALI_64BIT_HW_ACCESS ?= n
+
 
         ifneq ($(CONFIG_MALI_BIFROST_NO_MALI),y)
-            # Prevent misuse when CONFIG_MALI_BIFROST_NO_MALI=y
+            # Prevent misuse when CONFIG_MALI_BIFROST_NO_MALI!=y
             CONFIG_MALI_REAL_HW ?= y
+        else
             CONFIG_MALI_CORESIGHT = n
         endif
 
@@ -76,7 +77,6 @@ ifeq ($(MALI_KCONFIG_EXT_PREFIX),)
             else
                 # Prevent misuse when CONFIG_MALI_BIFROST_NO_MALI=n
                 CONFIG_MALI_REAL_HW = y
-                CONFIG_MALI_BIFROST_ERROR_INJECT = n
             endif
 
 
@@ -108,7 +108,6 @@ ifeq ($(MALI_KCONFIG_EXT_PREFIX),)
             CONFIG_MALI_JOB_DUMP = n
             CONFIG_MALI_BIFROST_NO_MALI = n
             CONFIG_MALI_REAL_HW = y
-            CONFIG_MALI_BIFROST_ERROR_INJECT = n
             CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED = n
             CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n
             CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS = n
@@ -157,7 +156,6 @@ ifeq ($(MALI_KCONFIG_EXT_PREFIX),)
         CONFIG_MALI_BIFROST \
         CONFIG_MALI_CSF_SUPPORT \
         CONFIG_MALI_BIFROST_GATOR_SUPPORT \
-        CONFIG_MALI_ARBITER_SUPPORT \
         CONFIG_MALI_ARBITRATION \
         CONFIG_MALI_PARTITION_MANAGER \
         CONFIG_MALI_REAL_HW \
@@ -171,7 +169,7 @@ ifeq ($(MALI_KCONFIG_EXT_PREFIX),)
         CONFIG_MALI_PWRSOFT_765 \
         CONFIG_MALI_JOB_DUMP \
         CONFIG_MALI_BIFROST_NO_MALI \
-        CONFIG_MALI_BIFROST_ERROR_INJECT \
+        CONFIG_MALI_IS_FPGA \
         CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED \
         CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE \
         CONFIG_MALI_PRFCNT_SET_PRIMARY \
@@ -272,6 +270,8 @@ CFLAGS_MODULE += -Wmissing-field-initializers
 CFLAGS_MODULE += -Wno-type-limits
 CFLAGS_MODULE += $(call cc-option, -Wmaybe-uninitialized)
 CFLAGS_MODULE += $(call cc-option, -Wunused-macros)
+# The following ensures the stack frame does not get larger than a page
+CFLAGS_MODULE += -Wframe-larger-than=4096
 
 KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2
 
diff --git a/drivers/gpu/arm/bifrost/arbiter/Kbuild b/drivers/gpu/arm/bifrost/arbiter/Kbuild
index 2e6b111441ca..de339ccae394 100644
--- a/drivers/gpu/arm/bifrost/arbiter/Kbuild
+++ b/drivers/gpu/arm/bifrost/arbiter/Kbuild
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 #
-# (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -21,3 +21,4 @@
 bifrost_kbase-y += \
     arbiter/mali_kbase_arbif.o \
     arbiter/mali_kbase_arbiter_pm.o
+
diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c
index c290dd6b086f..49b42a6ec2c0 100644
--- a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c
+++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -108,6 +108,7 @@ static void on_gpu_stop(struct device *dev)
 	}
 
 	KBASE_TLSTREAM_TL_ARBITER_STOP_REQUESTED(kbdev, kbdev);
+	KBASE_KTRACE_ADD(kbdev, ARB_GPU_STOP_REQUESTED, NULL, 0);
 	kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_STOP_EVT);
 }
 
@@ -133,6 +134,7 @@ static void on_gpu_granted(struct device *dev)
 	}
 
 	KBASE_TLSTREAM_TL_ARBITER_GRANTED(kbdev, kbdev);
+	KBASE_KTRACE_ADD(kbdev, ARB_GPU_GRANTED, NULL, 0);
 	kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_GRANTED_EVT);
 }
 
@@ -156,10 +158,73 @@ static void on_gpu_lost(struct device *dev)
 		dev_err(dev, "%s(): kbdev is NULL", __func__);
 		return;
 	}
-
+	KBASE_TLSTREAM_TL_ARBITER_LOST(kbdev, kbdev);
+	KBASE_KTRACE_ADD(kbdev, ARB_GPU_LOST, NULL, 0);
 	kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_LOST_EVT);
 }
 
+static int kbase_arbif_of_init(struct kbase_device *kbdev)
+{
+	struct arbiter_if_dev *arb_if;
+	struct device_node *arbiter_if_node;
+	struct platform_device *pdev;
+
+	if (!IS_ENABLED(CONFIG_OF)) {
+		/*
+		 * Return -ENODEV in the event CONFIG_OF is not available and let the
+		 * internal AW check for suitability for arbitration.
+		 */
+		return -ENODEV;
+	}
+
+	arbiter_if_node = of_parse_phandle(kbdev->dev->of_node, "arbiter-if", 0);
+	if (!arbiter_if_node)
+		arbiter_if_node = of_parse_phandle(kbdev->dev->of_node, "arbiter_if", 0);
+	if (!arbiter_if_node) {
+		dev_dbg(kbdev->dev, "No arbiter_if in Device Tree");
+		/* no arbiter interface defined in device tree */
+		kbdev->arb.arb_dev = NULL;
+		kbdev->arb.arb_if = NULL;
+		return -ENODEV;
+	}
+
+	pdev = of_find_device_by_node(arbiter_if_node);
+	if (!pdev) {
+		dev_err(kbdev->dev, "Failed to find arbiter_if device");
+		return -EPROBE_DEFER;
+	}
+
+	if (!pdev->dev.driver || !try_module_get(pdev->dev.driver->owner)) {
+		dev_err(kbdev->dev, "arbiter_if driver not available");
+		put_device(&pdev->dev);
+		return -EPROBE_DEFER;
+	}
+	kbdev->arb.arb_dev = &pdev->dev;
+	arb_if = platform_get_drvdata(pdev);
+	if (!arb_if) {
+		dev_err(kbdev->dev, "arbiter_if driver not ready");
+		module_put(pdev->dev.driver->owner);
+		put_device(&pdev->dev);
+		return -EPROBE_DEFER;
+	}
+
+	kbdev->arb.arb_if = arb_if;
+	return 0;
+}
+
+static void kbase_arbif_of_term(struct kbase_device *kbdev)
+{
+	if (!IS_ENABLED(CONFIG_OF))
+		return;
+
+	if (kbdev->arb.arb_dev) {
+		module_put(kbdev->arb.arb_dev->driver->owner);
+		put_device(kbdev->arb.arb_dev);
+	}
+	kbdev->arb.arb_dev = NULL;
+}
+
+
 /**
  * kbase_arbif_init() - Kbase Arbiter interface initialisation.
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
@@ -174,47 +239,21 @@ static void on_gpu_lost(struct device *dev)
  */
 int kbase_arbif_init(struct kbase_device *kbdev)
 {
-#if IS_ENABLED(CONFIG_OF)
 	struct arbiter_if_arb_vm_ops ops;
 	struct arbiter_if_dev *arb_if;
-	struct device_node *arbiter_if_node;
-	struct platform_device *pdev;
-	int err;
+	int err = 0;
 
-	dev_dbg(kbdev->dev, "%s\n", __func__);
+	/* Tries to init with 'arbiter-if' if present in devicetree */
+	err = kbase_arbif_of_init(kbdev);
 
-	arbiter_if_node = of_parse_phandle(kbdev->dev->of_node, "arbiter-if", 0);
-	if (!arbiter_if_node)
-		arbiter_if_node = of_parse_phandle(kbdev->dev->of_node, "arbiter_if", 0);
-	if (!arbiter_if_node) {
-		dev_dbg(kbdev->dev, "No arbiter_if in Device Tree\n");
-		/* no arbiter interface defined in device tree */
-		kbdev->arb.arb_dev = NULL;
-		kbdev->arb.arb_if = NULL;
-		return 0;
+	if (err == -ENODEV) {
+		/* devicetree does not support arbitration */
+		return -EPERM;
 	}
 
-	pdev = of_find_device_by_node(arbiter_if_node);
-	if (!pdev) {
-		dev_err(kbdev->dev, "Failed to find arbiter_if device\n");
-		return -EPROBE_DEFER;
-	}
+	if (err)
+		return err;
 
-	if (!pdev->dev.driver || !try_module_get(pdev->dev.driver->owner)) {
-		dev_err(kbdev->dev, "arbiter_if driver not available\n");
-		put_device(&pdev->dev);
-		return -EPROBE_DEFER;
-	}
-	kbdev->arb.arb_dev = &pdev->dev;
-	arb_if = platform_get_drvdata(pdev);
-	if (!arb_if) {
-		dev_err(kbdev->dev, "arbiter_if driver not ready\n");
-		module_put(pdev->dev.driver->owner);
-		put_device(&pdev->dev);
-		return -EPROBE_DEFER;
-	}
-
-	kbdev->arb.arb_if = arb_if;
 	ops.arb_vm_gpu_stop = on_gpu_stop;
 	ops.arb_vm_gpu_granted = on_gpu_granted;
 	ops.arb_vm_gpu_lost = on_gpu_lost;
@@ -225,25 +264,35 @@ int kbase_arbif_init(struct kbase_device *kbdev)
 	kbdev->arb.arb_freq.freq_updated = false;
 	mutex_init(&kbdev->arb.arb_freq.arb_freq_lock);
 
-	/* register kbase arbiter_if callbacks */
-	if (arb_if->vm_ops.vm_arb_register_dev) {
-		err = arb_if->vm_ops.vm_arb_register_dev(arb_if, kbdev->dev, &ops);
-		if (err) {
-			dev_err(&pdev->dev, "Failed to register with arbiter. (err = %d)\n", err);
-			module_put(pdev->dev.driver->owner);
-			put_device(&pdev->dev);
-			if (err != -EPROBE_DEFER)
-				err = -EFAULT;
-			return err;
-		}
+	arb_if = kbdev->arb.arb_if;
+
+	if (arb_if == NULL) {
+		dev_err(kbdev->dev, "No arbiter interface present");
+		goto failure_term;
+	}
+
+	if (!arb_if->vm_ops.vm_arb_register_dev) {
+		dev_err(kbdev->dev, "arbiter_if registration callback not present");
+		goto failure_term;
+	}
+
+	/* register kbase arbiter_if callbacks */
+	err = arb_if->vm_ops.vm_arb_register_dev(arb_if, kbdev->dev, &ops);
+	if (err) {
+		dev_err(kbdev->dev, "Failed to register with arbiter. (err = %d)", err);
+		goto failure_term;
 	}
 
-#else /* CONFIG_OF */
-	dev_dbg(kbdev->dev, "No arbiter without Device Tree support\n");
-	kbdev->arb.arb_dev = NULL;
-	kbdev->arb.arb_if = NULL;
-#endif
 	return 0;
+
+failure_term:
+	{
+		kbase_arbif_of_term(kbdev);
+	}
+
+	if (err != -EPROBE_DEFER)
+		err = -EFAULT;
+	return err;
 }
 
 /**
@@ -256,16 +305,13 @@ void kbase_arbif_destroy(struct kbase_device *kbdev)
 {
 	struct arbiter_if_dev *arb_if = kbdev->arb.arb_if;
 
-	if (arb_if && arb_if->vm_ops.vm_arb_unregister_dev) {
-		dev_dbg(kbdev->dev, "%s\n", __func__);
+	if (arb_if && arb_if->vm_ops.vm_arb_unregister_dev)
 		arb_if->vm_ops.vm_arb_unregister_dev(kbdev->arb.arb_if);
+
+	{
+		kbase_arbif_of_term(kbdev);
 	}
 	kbdev->arb.arb_if = NULL;
-	if (kbdev->arb.arb_dev) {
-		module_put(kbdev->arb.arb_dev->driver->owner);
-		put_device(kbdev->arb.arb_dev);
-	}
-	kbdev->arb.arb_dev = NULL;
 }
 
 /**
@@ -278,10 +324,8 @@ void kbase_arbif_get_max_config(struct kbase_device *kbdev)
 {
 	struct arbiter_if_dev *arb_if = kbdev->arb.arb_if;
 
-	if (arb_if && arb_if->vm_ops.vm_arb_get_max_config) {
-		dev_dbg(kbdev->dev, "%s\n", __func__);
+	if (arb_if && arb_if->vm_ops.vm_arb_get_max_config)
 		arb_if->vm_ops.vm_arb_get_max_config(arb_if);
-	}
 }
 
 /**
@@ -295,8 +339,8 @@ void kbase_arbif_gpu_request(struct kbase_device *kbdev)
 	struct arbiter_if_dev *arb_if = kbdev->arb.arb_if;
 
 	if (arb_if && arb_if->vm_ops.vm_arb_gpu_request) {
-		dev_dbg(kbdev->dev, "%s\n", __func__);
 		KBASE_TLSTREAM_TL_ARBITER_REQUESTED(kbdev, kbdev);
+		KBASE_KTRACE_ADD(kbdev, ARB_GPU_REQUESTED, NULL, 0);
 		arb_if->vm_ops.vm_arb_gpu_request(arb_if);
 	}
 }
@@ -312,10 +356,12 @@ void kbase_arbif_gpu_stopped(struct kbase_device *kbdev, u8 gpu_required)
 	struct arbiter_if_dev *arb_if = kbdev->arb.arb_if;
 
 	if (arb_if && arb_if->vm_ops.vm_arb_gpu_stopped) {
-		dev_dbg(kbdev->dev, "%s\n", __func__);
 		KBASE_TLSTREAM_TL_ARBITER_STOPPED(kbdev, kbdev);
-		if (gpu_required)
+		KBASE_KTRACE_ADD(kbdev, ARB_GPU_STOPPED, NULL, 0);
+		if (gpu_required) {
 			KBASE_TLSTREAM_TL_ARBITER_REQUESTED(kbdev, kbdev);
+			KBASE_KTRACE_ADD(kbdev, ARB_GPU_REQUESTED, NULL, 0);
+		}
 		arb_if->vm_ops.vm_arb_gpu_stopped(arb_if, gpu_required);
 	}
 }
@@ -330,10 +376,8 @@ void kbase_arbif_gpu_active(struct kbase_device *kbdev)
 {
 	struct arbiter_if_dev *arb_if = kbdev->arb.arb_if;
 
-	if (arb_if && arb_if->vm_ops.vm_arb_gpu_active) {
-		dev_dbg(kbdev->dev, "%s\n", __func__);
+	if (arb_if && arb_if->vm_ops.vm_arb_gpu_active)
 		arb_if->vm_ops.vm_arb_gpu_active(arb_if);
-	}
 }
 
 /**
@@ -346,8 +390,6 @@ void kbase_arbif_gpu_idle(struct kbase_device *kbdev)
 {
 	struct arbiter_if_dev *arb_if = kbdev->arb.arb_if;
 
-	if (arb_if && arb_if->vm_ops.vm_arb_gpu_idle) {
-		dev_dbg(kbdev->dev, "vm_arb_gpu_idle\n");
+	if (arb_if && arb_if->vm_ops.vm_arb_gpu_idle)
 		arb_if->vm_ops.vm_arb_gpu_idle(arb_if);
-	}
 }
diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.h b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.h
index 701ffd42f6f7..c77792115e4d 100644
--- a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.h
+++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -50,6 +50,7 @@ enum kbase_arbif_evt {
 	KBASE_VM_OS_RESUME_EVENT,
 };
 
+
 /**
  * kbase_arbif_init() - Initialize the arbiter interface functionality.
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c
index 616b0a78cbe5..9b8551609dc7 100644
--- a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c
+++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -48,7 +48,7 @@ MODULE_PARM_DESC(
 	"On a virtualized platform, if the GPU is not granted within this time(ms) kbase will defer the probe");
 
 static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev);
-static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld(struct kbase_device *kbdev);
+static inline bool kbase_arbiter_pm_vm_gpu_assigned_locked(struct kbase_device *kbdev);
 
 /**
  * kbase_arbiter_pm_vm_state_str() - Helper function to get string
@@ -85,7 +85,6 @@ static inline const char *kbase_arbiter_pm_vm_state_str(enum kbase_vm_state stat
 	case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT:
 		return "KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT";
 	default:
-		KBASE_DEBUG_ASSERT(false);
 		return "[UnknownState]";
 	}
 }
@@ -117,14 +116,13 @@ static inline const char *kbase_arbiter_pm_vm_event_str(enum kbase_arbif_evt evt
 	case KBASE_VM_REF_EVENT:
 		return "KBASE_VM_REF_EVENT";
 	default:
-		KBASE_DEBUG_ASSERT(false);
 		return "[UnknownEvent]";
 	}
 }
 
 /**
  * kbase_arbiter_pm_vm_set_state() - Sets new kbase_arbiter_vm_state
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @kbdev: The kbase device structure for the device
  * @new_state: kbase VM new state
  *
  * This function sets the new state for the VM
@@ -201,6 +199,7 @@ static void kbase_arbiter_pm_resume_wq(struct work_struct *data)
 	arb_vm_state->vm_arb_starting = false;
 	mutex_unlock(&arb_vm_state->vm_state_lock);
 	KBASE_TLSTREAM_TL_ARBITER_STARTED(kbdev, kbdev);
+	KBASE_KTRACE_ADD(kbdev, ARB_GPU_STARTED, NULL, 0);
 	dev_dbg(kbdev->dev, "<%s\n", __func__);
 }
 
@@ -229,7 +228,7 @@ static enum hrtimer_restart request_timer_callback(struct hrtimer *timer)
 
 /**
  * start_request_timer() - Start a timer after requesting GPU
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @kbdev: The kbase device structure for the device
  *
  * Start a timer to track when kbase is waiting for the GPU from the
  * Arbiter.  If the timer expires before GPU is granted, a warning in
@@ -245,7 +244,7 @@ static void start_request_timer(struct kbase_device *kbdev)
 
 /**
  * cancel_request_timer() - Stop the request timer
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @kbdev: The kbase device structure for the device
  *
  * Stops the request timer once GPU has been granted.  Safe to call
  * even if timer is no longer running.
@@ -260,7 +259,7 @@ static void cancel_request_timer(struct kbase_device *kbdev)
 /**
  * kbase_arbiter_pm_early_init() - Initialize arbiter for VM
  *                                 Paravirtualized use.
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @kbdev: The kbase device structure for the device
  *
  * Initialize the arbiter and other required resources during the runtime
  * and request the GPU for the VM for the first time.
@@ -272,7 +271,7 @@ int kbase_arbiter_pm_early_init(struct kbase_device *kbdev)
 	int err;
 	struct kbase_arbiter_vm_state *arb_vm_state = NULL;
 
-	arb_vm_state = kmalloc(sizeof(struct kbase_arbiter_vm_state), GFP_KERNEL);
+	arb_vm_state = kzalloc(sizeof(struct kbase_arbiter_vm_state), GFP_KERNEL);
 	if (arb_vm_state == NULL)
 		return -ENOMEM;
 
@@ -297,11 +296,13 @@ int kbase_arbiter_pm_early_init(struct kbase_device *kbdev)
 
 	err = kbase_arbif_init(kbdev);
 	if (err) {
-		dev_err(kbdev->dev, "Failed to initialise arbif module. (err = %d)\n", err);
+		if (err != -EPERM)
+			dev_err(kbdev->dev, "Failed to initialise arbif module. (err = %d)", err);
+
 		goto arbif_init_fail;
 	}
 
-	if (kbdev->arb.arb_if) {
+	if (kbase_has_arbiter(kbdev)) {
 		kbase_arbif_gpu_request(kbdev);
 		dev_dbg(kbdev->dev, "Waiting for initial GPU assignment...\n");
 
@@ -311,7 +312,7 @@ int kbase_arbiter_pm_early_init(struct kbase_device *kbdev)
 					 msecs_to_jiffies((unsigned int)gpu_req_timeout));
 
 		if (!err) {
-			dev_dbg(kbdev->dev,
+			dev_err(kbdev->dev,
 				"Kbase probe Deferred after waiting %d ms to receive GPU_GRANT\n",
 				gpu_req_timeout);
 
@@ -336,7 +337,7 @@ arbif_init_fail:
 
 /**
  * kbase_arbiter_pm_early_term() - Shutdown arbiter and free resources
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @kbdev: The kbase device structure for the device
  *
  * Clean up all the resources
  */
@@ -344,6 +345,14 @@ void kbase_arbiter_pm_early_term(struct kbase_device *kbdev)
 {
 	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
 
+	if (arb_vm_state == NULL)
+		return;
+
+	if (!kbase_has_arbiter(kbdev))
+		return;
+
+	kbase_arbiter_pm_release_interrupts(kbdev);
+
 	cancel_request_timer(kbdev);
 	mutex_lock(&arb_vm_state->vm_state_lock);
 	if (arb_vm_state->vm_state > KBASE_VM_STATE_STOPPED_GPU_REQUESTED) {
@@ -358,12 +367,6 @@ void kbase_arbiter_pm_early_term(struct kbase_device *kbdev)
 	kbdev->pm.arb_vm_state = NULL;
 }
 
-/**
- * kbase_arbiter_pm_release_interrupts() - Release the GPU interrupts
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- *
- * Releases interrupts and set the interrupt flag to false
- */
 void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev)
 {
 	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
@@ -376,29 +379,25 @@ void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev)
 	mutex_unlock(&arb_vm_state->vm_state_lock);
 }
 
-/**
- * kbase_arbiter_pm_install_interrupts() - Install the GPU interrupts
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- *
- * Install interrupts and set the interrupt_install flag to true.
- *
- * Return: 0 if success, or a Linux error code
- */
 int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev)
 {
 	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
-	int err;
+	int err = 0;
 
 	mutex_lock(&arb_vm_state->vm_state_lock);
-	arb_vm_state->interrupts_installed = true;
-	err = kbase_install_interrupts(kbdev);
+	if (arb_vm_state->interrupts_installed == false) {
+		arb_vm_state->interrupts_installed = true;
+		err = kbase_install_interrupts(kbdev);
+	} else {
+		dev_dbg(kbdev->dev, "%s: interrupts installed already", __func__);
+	}
 	mutex_unlock(&arb_vm_state->vm_state_lock);
 	return err;
 }
 
 /**
  * kbase_arbiter_pm_vm_stopped() - Handle stop state for the VM
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @kbdev: The kbase device structure for the device
  *
  * Handles a stop state for the VM
  */
@@ -416,7 +415,13 @@ void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev)
 	dev_dbg(kbdev->dev, "%s %s\n", __func__,
 		kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state));
 
-	if (arb_vm_state->interrupts_installed) {
+	/*
+	 * Release the interrupts on external arb_if to address Xen requirements.
+	 * Interrupts are not released with internal arb_if as the IRQs are required
+	 * to handle messaging to/from Arbiter/Resource Group.
+	 */
+	if (arb_vm_state->interrupts_installed
+	) {
 		arb_vm_state->interrupts_installed = false;
 		kbase_release_interrupts(kbdev);
 	}
@@ -476,6 +481,12 @@ int kbase_arbiter_pm_gpu_assigned(struct kbase_device *kbdev)
 	if (!kbdev)
 		return result;
 
+	/* If there is no Arbiter, then there is no virtualization
+	 * and current VM always has access to GPU.
+	 */
+	if (!kbase_has_arbiter(kbdev))
+		return 1;
+
 	/* First check the GPU_LOST state */
 	kbase_pm_lock(kbdev);
 	if (kbase_pm_is_gpu_lost(kbdev)) {
@@ -507,7 +518,7 @@ int kbase_arbiter_pm_gpu_assigned(struct kbase_device *kbdev)
 
 /**
  * kbase_arbiter_pm_vm_gpu_start() - Handles the start state of the VM
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @kbdev: The kbase device structure for the device
  *
  * Handles the start state of the VM
  */
@@ -532,7 +543,15 @@ static void kbase_arbiter_pm_vm_gpu_start(struct kbase_device *kbdev)
 	case KBASE_VM_STATE_STOPPED_GPU_REQUESTED:
 		kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STARTING);
 		arb_vm_state->interrupts_installed = true;
-		kbase_install_interrupts(kbdev);
+		/*
+		 * Re-install interrupts that were released for external arb_if to
+		 * address Xen requirements. Interrupts are not released with internal
+		 * arb_if as the IRQs are required to handle messaging to/from
+		 * Arbiter/Resource Group.
+		 */
+		{
+			kbase_install_interrupts(kbdev);
+		}
 		/*
 		 * GPU GRANTED received while in stop can be a result of a
 		 * repartitioning.
@@ -561,7 +580,7 @@ static void kbase_arbiter_pm_vm_gpu_start(struct kbase_device *kbdev)
 
 /**
  * kbase_arbiter_pm_vm_gpu_stop() - Handles the stop state of the VM
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @kbdev: The kbase device structure for the device
  *
  * Handles the start state of the VM
  */
@@ -603,7 +622,7 @@ static void kbase_arbiter_pm_vm_gpu_stop(struct kbase_device *kbdev)
 
 /**
  * kbase_gpu_lost() - Kbase signals GPU is lost on a lost event signal
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @kbdev: The kbase device structure for the device
  *
  * On GPU lost event signals GPU_LOST to the aribiter
  */
@@ -658,7 +677,7 @@ static void kbase_gpu_lost(struct kbase_device *kbdev)
 /**
  * kbase_arbiter_pm_vm_os_suspend_ready_state() - checks if VM is ready
  *			to be moved to suspended state.
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @kbdev: The kbase device structure for the device
  *
  * Return: True if its ready to be suspended else False.
  */
@@ -678,10 +697,10 @@ static inline bool kbase_arbiter_pm_vm_os_suspend_ready_state(struct kbase_devic
 /**
  * kbase_arbiter_pm_vm_os_prepare_suspend() - Prepare OS to be in suspend state
  *                             until it receives the grant message from arbiter
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @kbdev: The kbase device structure for the device
  *
  * Prepares OS to be in suspend state until it receives GRANT message
- * from Arbiter asynchronously.
+ * from Arbiter asynchronously. This function assumes there is an active Arbiter.
  */
 static void kbase_arbiter_pm_vm_os_prepare_suspend(struct kbase_device *kbdev)
 {
@@ -689,10 +708,8 @@ static void kbase_arbiter_pm_vm_os_prepare_suspend(struct kbase_device *kbdev)
 	enum kbase_vm_state prev_state;
 
 	lockdep_assert_held(&arb_vm_state->vm_state_lock);
-	if (kbdev->arb.arb_if) {
-		if (kbdev->pm.arb_vm_state->vm_state == KBASE_VM_STATE_SUSPENDED)
-			return;
-	}
+	if (kbdev->pm.arb_vm_state->vm_state == KBASE_VM_STATE_SUSPENDED)
+		return;
 	/* Block suspend OS function until we are in a stable state
 	 * with vm_state_lock
 	 */
@@ -745,7 +762,7 @@ static void kbase_arbiter_pm_vm_os_prepare_suspend(struct kbase_device *kbdev)
 /**
  * kbase_arbiter_pm_vm_os_resume() - Resume OS function once it receives
  *                                   a grant message from arbiter
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @kbdev: The kbase device structure for the device
  *
  * Resume OS function once it receives GRANT message
  * from Arbiter asynchronously.
@@ -774,7 +791,7 @@ static void kbase_arbiter_pm_vm_os_resume(struct kbase_device *kbdev)
 
 /**
  * kbase_arbiter_pm_vm_event() - Dispatch VM event to the state machine.
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @kbdev: The kbase device structure for the device
  * @evt: VM event
  *
  * The state machine function. Receives events and transitions states
@@ -784,7 +801,7 @@ void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, enum kbase_arbif_evt
 {
 	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
 
-	if (!kbdev->arb.arb_if)
+	if (!kbase_has_arbiter(kbdev))
 		return;
 
 	mutex_lock(&arb_vm_state->vm_state_lock);
@@ -853,7 +870,7 @@ void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, enum kbase_arbif_evt
 		break;
 
 	default:
-		dev_alert(kbdev->dev, "Got Unknown Event!");
+		dev_err(kbdev->dev, "Got Unknown Event!");
 		break;
 	}
 	mutex_unlock(&arb_vm_state->vm_state_lock);
@@ -863,7 +880,7 @@ KBASE_EXPORT_TEST_API(kbase_arbiter_pm_vm_event);
 
 /**
  * kbase_arbiter_pm_vm_wait_gpu_assignment() - VM wait for a GPU assignment.
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @kbdev: The kbase device structure for the device
  *
  * VM waits for a GPU assignment.
  */
@@ -879,14 +896,14 @@ static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev)
 }
 
 /**
- * kbase_arbiter_pm_vm_gpu_assigned_lockheld() - Check if VM holds VM state lock
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * kbase_arbiter_pm_vm_gpu_assigned_locked() - Check if VM holds VM state lock
+ * @kbdev: The kbase device structure for the device
  *
  * Checks if the virtual machine holds VM state lock.
  *
  * Return: true if GPU is assigned, else false.
  */
-static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld(struct kbase_device *kbdev)
+static inline bool kbase_arbiter_pm_vm_gpu_assigned_locked(struct kbase_device *kbdev)
 {
 	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
 
@@ -898,13 +915,14 @@ static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld(struct kbase_device
 /**
  * kbase_arbiter_pm_ctx_active_handle_suspend() - Handle suspend operation for
  *                                                arbitration mode
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @kbdev: The kbase device structure for the device
  * @suspend_handler: The handler code for how to handle a suspend
  *                   that might occur
  *
  * This function handles a suspend event from the driver,
  * communicating with the arbiter and waiting synchronously for the GPU
- * to be granted again depending on the VM state.
+ * to be granted again depending on the VM state. Returns immediately
+ * with success if there is no Arbiter.
  *
  * Return: 0 on success else 1 suspend handler isn not possible.
  */
@@ -914,58 +932,58 @@ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev,
 	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
 	int res = 0;
 
-	if (kbdev->arb.arb_if) {
-		mutex_lock(&arb_vm_state->vm_state_lock);
-		while (!kbase_arbiter_pm_vm_gpu_assigned_lockheld(kbdev)) {
-			/* Update VM state since we have GPU work to do */
-			if (arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_IDLE)
-				kbase_arbiter_pm_vm_set_state(kbdev,
-							      KBASE_VM_STATE_STOPPING_ACTIVE);
-			else if (arb_vm_state->vm_state == KBASE_VM_STATE_STOPPED) {
-				kbase_arbiter_pm_vm_set_state(kbdev,
-							      KBASE_VM_STATE_STOPPED_GPU_REQUESTED);
-				kbase_arbif_gpu_request(kbdev);
-				start_request_timer(kbdev);
-			} else if (arb_vm_state->vm_state == KBASE_VM_STATE_INITIALIZING_WITH_GPU)
+	if (!kbase_has_arbiter(kbdev))
+		return res;
+
+	mutex_lock(&arb_vm_state->vm_state_lock);
+	while (!kbase_arbiter_pm_vm_gpu_assigned_locked(kbdev)) {
+		/* Update VM state since we have GPU work to do */
+		if (arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_IDLE)
+			kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STOPPING_ACTIVE);
+		else if (arb_vm_state->vm_state == KBASE_VM_STATE_STOPPED) {
+			kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STOPPED_GPU_REQUESTED);
+			kbase_arbif_gpu_request(kbdev);
+			start_request_timer(kbdev);
+		} else if (arb_vm_state->vm_state == KBASE_VM_STATE_INITIALIZING_WITH_GPU)
+			break;
+
+		if (suspend_handler != KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE) {
+			/* In case of GPU lost, even if
+			 * active_count > 0, we no longer have GPU
+			 * access
+			 */
+			if (kbase_pm_is_gpu_lost(kbdev))
+				res = 1;
+
+			switch (suspend_handler) {
+			case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE:
+				res = 1;
 				break;
-
-			if (suspend_handler != KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE) {
-				/* In case of GPU lost, even if
-				 * active_count > 0, we no longer have GPU
-				 * access
-				 */
-				if (kbase_pm_is_gpu_lost(kbdev))
+			case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE:
+				if (kbdev->pm.active_count == 0)
 					res = 1;
-
-				switch (suspend_handler) {
-				case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE:
-					res = 1;
-					break;
-				case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE:
-					if (kbdev->pm.active_count == 0)
-						res = 1;
-					break;
-				case KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED:
-					break;
-				default:
-					WARN(1, "Unknown suspend_handler\n");
-					res = 1;
-					break;
-				}
+				break;
+			case KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED:
+				break;
+			default:
+				WARN(1, "Unknown suspend_handler\n");
+				res = 1;
 				break;
 			}
-
-			/* Need to synchronously wait for GPU assignment */
-			atomic_inc(&kbdev->pm.gpu_users_waiting);
-			mutex_unlock(&arb_vm_state->vm_state_lock);
-			kbase_pm_unlock(kbdev);
-			kbase_arbiter_pm_vm_wait_gpu_assignment(kbdev);
-			kbase_pm_lock(kbdev);
-			mutex_lock(&arb_vm_state->vm_state_lock);
-			atomic_dec(&kbdev->pm.gpu_users_waiting);
+			break;
 		}
+
+		/* Need to synchronously wait for GPU assignment */
+		atomic_inc(&kbdev->pm.gpu_users_waiting);
 		mutex_unlock(&arb_vm_state->vm_state_lock);
+		kbase_pm_unlock(kbdev);
+		kbase_arbiter_pm_vm_wait_gpu_assignment(kbdev);
+		kbase_pm_lock(kbdev);
+		mutex_lock(&arb_vm_state->vm_state_lock);
+		atomic_dec(&kbdev->pm.gpu_users_waiting);
 	}
+	mutex_unlock(&arb_vm_state->vm_state_lock);
+
 	return res;
 }
 
diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h
index 3734d32b6e2b..649f488d4f67 100644
--- a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h
+++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -102,7 +102,7 @@ void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev);
  *
  * Install interrupts and set the interrupt_install flag to true.
  *
- * Return: 0 if success, or a Linux error code
+ * Return: 0 if success or already installed. Otherwise a Linux error code
  */
 int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev);
 
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/Kbuild b/drivers/gpu/arm/bifrost/backend/gpu/Kbuild
index c3db14217c6d..ffec0417aa5c 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/Kbuild
+++ b/drivers/gpu/arm/bifrost/backend/gpu/Kbuild
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 #
-# (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -47,12 +47,7 @@ endif
 bifrost_kbase-$(CONFIG_MALI_BIFROST_DEVFREQ) += \
     backend/gpu/mali_kbase_devfreq.o
 
-ifneq ($(CONFIG_MALI_REAL_HW),y)
-    bifrost_kbase-y += backend/gpu/mali_kbase_model_linux.o
-endif
+bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += backend/gpu/mali_kbase_model_linux.o
 
 # NO_MALI Dummy model interface
 bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += backend/gpu/mali_kbase_model_dummy.o
-# HW error simulation
-bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += backend/gpu/mali_kbase_model_error_generator.o
-
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.c
index e47dd440bff2..851e6feafd30 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -50,14 +50,22 @@ static struct kbase_clk_rate_trace_op_conf *
 get_clk_rate_trace_callbacks(__maybe_unused struct kbase_device *kbdev)
 {
 	/* base case */
+	const void *arbiter_if_node;
 	struct kbase_clk_rate_trace_op_conf *callbacks =
 		(struct kbase_clk_rate_trace_op_conf *)CLK_RATE_TRACE_OPS;
-#if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF)
-	const void *arbiter_if_node;
+
+	/* Nothing left to do here if there is no Arbiter/virtualization or if
+	 * CONFIG_OF is not enabled.
+	 */
+	if (!IS_ENABLED(CONFIG_OF))
+		return callbacks;
 
 	if (WARN_ON(!kbdev) || WARN_ON(!kbdev->dev))
 		return callbacks;
 
+	if (!kbase_has_arbiter(kbdev))
+		return callbacks;
+
 	arbiter_if_node = of_get_property(kbdev->dev->of_node, "arbiter-if", NULL);
 	if (!arbiter_if_node)
 		arbiter_if_node = of_get_property(kbdev->dev->of_node, "arbiter_if", NULL);
@@ -69,8 +77,6 @@ get_clk_rate_trace_callbacks(__maybe_unused struct kbase_device *kbdev)
 		dev_dbg(kbdev->dev,
 			"Arbitration supported but disabled by platform. Leaving clk rate callbacks as default.\n");
 
-#endif
-
 	return callbacks;
 }
 
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c
index 2649f1815e9f..e223535d01f7 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -366,7 +366,7 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev)
 		err = of_property_read_u64(node, "opp-hz-real", real_freqs);
 #endif
 		if (err < 0) {
-			dev_warn(kbdev->dev, "Failed to read opp-hz-real property with error %d\n",
+			dev_warn(kbdev->dev, "Failed to read opp-hz-real property with error %d",
 				 err);
 			continue;
 		}
@@ -374,8 +374,8 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev)
 		err = of_property_read_u32_array(node, "opp-microvolt", opp_volts,
 						 kbdev->nr_regulators);
 		if (err < 0) {
-			dev_warn(kbdev->dev,
-				 "Failed to read opp-microvolt property with error %d\n", err);
+			dev_warn(kbdev->dev, "Failed to read opp-microvolt property with error %d",
+				 err);
 			continue;
 		}
 #endif
@@ -386,11 +386,12 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev)
 
 			dev_warn(
 				kbdev->dev,
-				"Ignoring OPP %llu - Dynamic Core Scaling not supported on this GPU\n",
+				"Ignoring OPP %llu - Dynamic Core Scaling not supported on this GPU",
 				opp_freq);
 			continue;
 		}
 
+
 		core_count_p = of_get_property(node, "opp-core-count", NULL);
 		if (core_count_p) {
 			u64 remaining_core_mask = kbdev->gpu_props.shader_present;
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c
index 414ad546811a..a9b629ad7ea5 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -48,7 +48,7 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev, struct kbasep_gpuprop
 	/* Not a valid register on TMIX */
 
 	/* TGOx specific register */
-	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_THREAD_TLS_ALLOC))
+	if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_THREAD_TLS_ALLOC))
 		regdump->thread_tls_alloc =
 			kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(THREAD_TLS_ALLOC));
 #endif /* !MALI_USE_CSF */
@@ -64,7 +64,7 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev, struct kbasep_gpuprop
 	/* AMBA_FEATURES enum is mapped to COHERENCY_FEATURES enum */
 	regdump->coherency_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(COHERENCY_FEATURES));
 
-	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CORE_FEATURES))
+	if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_CORE_FEATURES))
 		regdump->core_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(CORE_FEATURES));
 
 #if MALI_USE_CSF
@@ -116,7 +116,7 @@ int kbase_backend_gpuprops_get_curr_config(struct kbase_device *kbdev,
 int kbase_backend_gpuprops_get_l2_features(struct kbase_device *kbdev,
 					   struct kbasep_gpuprops_regdump *regdump)
 {
-	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) {
+	if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_L2_CONFIG)) {
 		regdump->l2_features = KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(L2_FEATURES));
 		regdump->l2_config = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(L2_CONFIG));
 
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c
index 131cfe32df9f..07960713f75a 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -29,6 +29,8 @@
 #include <device/mali_kbase_device.h>
 #include <backend/gpu/mali_kbase_instr_internal.h>
 
+#define WAIT_FOR_DUMP_TIMEOUT_MS 5000
+
 static int wait_prfcnt_ready(struct kbase_device *kbdev)
 {
 	u32 val;
@@ -163,6 +165,7 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
 {
 	unsigned long flags, pm_flags;
 	struct kbase_device *kbdev = kctx->kbdev;
+	const unsigned long timeout = msecs_to_jiffies(WAIT_FOR_DUMP_TIMEOUT_MS);
 
 	while (1) {
 		spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
@@ -199,7 +202,8 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
 
 		/* Ongoing dump/setup - wait for its completion */
-		wait_event(kbdev->hwcnt.backend.wait, kbdev->hwcnt.backend.triggered != 0);
+		wait_event_timeout(kbdev->hwcnt.backend.wait, kbdev->hwcnt.backend.triggered != 0,
+				   timeout);
 	}
 
 	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
@@ -319,8 +323,19 @@ int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
 	unsigned long flags;
 	int err;
 
+	unsigned long remaining;
+	const unsigned long timeout = msecs_to_jiffies(WAIT_FOR_DUMP_TIMEOUT_MS);
+
 	/* Wait for dump & cache clean to complete */
-	wait_event(kbdev->hwcnt.backend.wait, kbdev->hwcnt.backend.triggered != 0);
+	remaining = wait_event_timeout(kbdev->hwcnt.backend.wait,
+				       kbdev->hwcnt.backend.triggered != 0, timeout);
+	if (remaining == 0) {
+		err = -ETIME;
+		/* Set the backend state so it's clear things have gone bad (could be a HW issue)
+		 */
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_UNRECOVERABLE_ERROR;
+		goto timed_out;
+	}
 
 	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
 
@@ -336,7 +351,7 @@ int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
 	}
 
 	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
-
+timed_out:
 	return err;
 }
 
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_internal.h
index 34e8178d1d76..feb76757f955 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_internal.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_internal.h
@@ -74,7 +74,7 @@ void kbase_synchronize_irqs(struct kbase_device *kbdev);
  * Return: 0 on success. Error code (negative) on failure.
  */
 int kbase_validate_interrupts(struct kbase_device *const kbdev);
-#endif /* CONFIG_MALI_REAL_HW */
+#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */
 #endif /* CONFIG_MALI_BIFROST_DEBUG */
 
 /**
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c
index 9cb367508dde..152b140b5381 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -23,6 +23,7 @@
 #include <device/mali_kbase_device.h>
 #include <backend/gpu/mali_kbase_irq_internal.h>
 
+
 #include <linux/interrupt.h>
 
 #if IS_ENABLED(CONFIG_MALI_REAL_HW)
@@ -163,13 +164,9 @@ static irqreturn_t kbase_gpu_irq_handler(int irq, void *data)
 static irqreturn_t kbase_combined_irq_handler(int irq, void *data)
 {
 	irqreturn_t irq_state = IRQ_NONE;
-
-	if (kbase_job_irq_handler(irq, data) == IRQ_HANDLED)
-		irq_state = IRQ_HANDLED;
-	if (kbase_mmu_irq_handler(irq, data) == IRQ_HANDLED)
-		irq_state = IRQ_HANDLED;
-	if (kbase_gpu_irq_handler(irq, data) == IRQ_HANDLED)
-		irq_state = IRQ_HANDLED;
+	irq_state |= kbase_job_irq_handler(irq, data);
+	irq_state |= kbase_mmu_irq_handler(irq, data);
+	irq_state |= kbase_gpu_irq_handler(irq, data);
 
 	return irq_state;
 }
@@ -212,8 +209,7 @@ int kbase_set_custom_irq_handler(struct kbase_device *kbdev, irq_handler_t custo
 	if (!handler)
 		handler = kbase_get_interrupt_handler(kbdev, irq_tag);
 
-	if (request_irq(kbdev->irqs[irq].irq, handler,
-			kbdev->irqs[irq].flags | ((kbdev->nr_irqs == 1) ? 0 : IRQF_SHARED),
+	if (request_irq(kbdev->irqs[irq].irq, handler, kbdev->irqs[irq].flags | IRQF_SHARED,
 			dev_name(kbdev->dev), kbase_tag(kbdev, irq)) != 0) {
 		result = -EINVAL;
 		dev_err(kbdev->dev, "Can't request interrupt %u (index %u)\n", kbdev->irqs[irq].irq,
@@ -396,8 +392,8 @@ static int validate_interrupt(struct kbase_device *const kbdev, u32 tag)
 
 		/* restore original interrupt */
 		if (request_irq(kbdev->irqs[irq].irq, kbase_get_interrupt_handler(kbdev, tag),
-				kbdev->irqs[irq].flags | ((kbdev->nr_irqs == 1) ? 0 : IRQF_SHARED),
-				dev_name(kbdev->dev), kbase_tag(kbdev, irq))) {
+				kbdev->irqs[irq].flags | IRQF_SHARED, dev_name(kbdev->dev),
+				kbase_tag(kbdev, irq))) {
 			dev_err(kbdev->dev, "Can't restore original interrupt %u (index %u)\n",
 				kbdev->irqs[irq].irq, tag);
 			err = -EINVAL;
@@ -449,10 +445,10 @@ int kbase_install_interrupts(struct kbase_device *kbdev)
 	u32 i;
 
 	for (i = 0; i < kbdev->nr_irqs; i++) {
-		const int result = request_irq(
-			kbdev->irqs[i].irq, kbase_get_interrupt_handler(kbdev, i),
-			kbdev->irqs[i].flags | ((kbdev->nr_irqs == 1) ? 0 : IRQF_SHARED),
-			dev_name(kbdev->dev), kbase_tag(kbdev, i));
+		const int result = request_irq(kbdev->irqs[i].irq,
+					       kbase_get_interrupt_handler(kbdev, i),
+					       kbdev->irqs[i].flags | IRQF_SHARED,
+					       dev_name(kbdev->dev), kbase_tag(kbdev, i));
 		if (result) {
 			dev_err(kbdev->dev, "Can't request interrupt %u (index %u)\n",
 				kbdev->irqs[i].irq, i);
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c
index e822dc59977b..b251de4fc23e 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -98,82 +98,6 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev, base_jd_core_req
 	return affinity;
 }
 
-/**
- * select_job_chain() - Select which job chain to submit to the GPU
- * @katom: Pointer to the atom about to be submitted to the GPU
- *
- * Selects one of the fragment job chains attached to the special atom at the
- * end of a renderpass, or returns the address of the single job chain attached
- * to any other type of atom.
- *
- * Which job chain is selected depends upon whether the tiling phase of the
- * renderpass completed normally or was soft-stopped because it used too
- * much memory. It also depends upon whether one of the fragment job chains
- * has already been run as part of the same renderpass.
- *
- * Return: GPU virtual address of the selected job chain
- */
-static u64 select_job_chain(struct kbase_jd_atom *katom)
-{
-	struct kbase_context *const kctx = katom->kctx;
-	u64 jc = katom->jc;
-	struct kbase_jd_renderpass *rp;
-
-	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
-
-	if (!(katom->core_req & BASE_JD_REQ_END_RENDERPASS))
-		return jc;
-
-	compiletime_assert((1ull << (sizeof(katom->renderpass_id) * 8)) <=
-				   ARRAY_SIZE(kctx->jctx.renderpasses),
-			   "Should check invalid access to renderpasses");
-
-	rp = &kctx->jctx.renderpasses[katom->renderpass_id];
-	/* We can read a subset of renderpass state without holding
-	 * higher-level locks (but not end_katom, for example).
-	 * If the end-of-renderpass atom is running with as-yet indeterminate
-	 * OOM state then assume that the start atom was not soft-stopped.
-	 */
-	switch (rp->state) {
-	case KBASE_JD_RP_OOM:
-		/* Tiling ran out of memory.
-		 * Start of incremental rendering, used once.
-		 */
-		jc = katom->jc_fragment.norm_read_forced_write;
-		break;
-	case KBASE_JD_RP_START:
-	case KBASE_JD_RP_PEND_OOM:
-		/* Tiling completed successfully first time.
-		 * Single-iteration rendering, used once.
-		 */
-		jc = katom->jc_fragment.norm_read_norm_write;
-		break;
-	case KBASE_JD_RP_RETRY_OOM:
-		/* Tiling ran out of memory again.
-		 * Continuation of incremental rendering, used as
-		 * many times as required.
-		 */
-		jc = katom->jc_fragment.forced_read_forced_write;
-		break;
-	case KBASE_JD_RP_RETRY:
-	case KBASE_JD_RP_RETRY_PEND_OOM:
-		/* Tiling completed successfully this time.
-		 * End of incremental rendering, used once.
-		 */
-		jc = katom->jc_fragment.forced_read_norm_write;
-		break;
-	default:
-		WARN_ON(1);
-		break;
-	}
-
-	dev_dbg(kctx->kbdev->dev, "Selected job chain 0x%llx for end atom %pK in state %d\n", jc,
-		(void *)katom, (int)rp->state);
-
-	katom->jc = jc;
-	return jc;
-}
-
 static inline bool kbasep_jm_wait_js_free(struct kbase_device *kbdev, unsigned int js,
 					  struct kbase_context *kctx)
 {
@@ -196,7 +120,7 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
 {
 	struct kbase_context *kctx;
 	u32 cfg;
-	u64 const jc_head = select_job_chain(katom);
+	u64 jc_head = katom->jc;
 	u64 affinity;
 	struct slot_rb *ptr_slot_rb = &kbdev->hwaccess.backend.slot_rb[js];
 
@@ -220,21 +144,21 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
 	 */
 	cfg = (u32)kctx->as_nr;
 
-	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION) &&
+	if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_FLUSH_REDUCTION) &&
 	    !(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET))
 		cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION;
 
 	if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_START)) {
 		/* Force a cache maintenance operation if the newly submitted
 		 * katom to the slot is from a different kctx. For a JM GPU
-		 * that has the feature BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
+		 * that has the feature KBASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
 		 * applies a FLUSH_INV_SHADER_OTHER. Otherwise, do a
 		 * FLUSH_CLEAN_INVALIDATE.
 		 */
 		u64 tagged_kctx = ptr_slot_rb->last_kctx_tagged;
 
 		if (tagged_kctx != SLOT_RB_NULL_TAG_VAL && tagged_kctx != SLOT_RB_TAG_KCTX(kctx)) {
-			if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER))
+			if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER))
 				cfg |= JS_CONFIG_START_FLUSH_INV_SHADER_OTHER;
 			else
 				cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE;
@@ -246,15 +170,14 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
 	if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END) &&
 	    !(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET))
 		cfg |= JS_CONFIG_END_FLUSH_NO_ACTION;
-	else if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CLEAN_ONLY_SAFE))
+	else if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_CLEAN_ONLY_SAFE))
 		cfg |= JS_CONFIG_END_FLUSH_CLEAN;
 	else
 		cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE;
 
 	cfg |= JS_CONFIG_THREAD_PRI(8);
 
-	if ((katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED) ||
-	    (katom->core_req & BASE_JD_REQ_END_RENDERPASS))
+	if (katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED)
 		cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK;
 
 	if (!ptr_slot_rb->job_chain_flag) {
@@ -268,7 +191,7 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
 
 	kbase_reg_write32(kbdev, JOB_SLOT_OFFSET(js, CONFIG_NEXT), cfg);
 
-	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION))
+	if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_FLUSH_REDUCTION))
 		kbase_reg_write32(kbdev, JOB_SLOT_OFFSET(js, FLUSH_ID_NEXT), katom->flush_id);
 
 	/* Write an approximate start timestamp.
@@ -440,7 +363,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
 				 * jobs to hang. Reset GPU before allowing
 				 * any other jobs on the slot to continue.
 				 */
-				if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_3076)) {
+				if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TTRX_3076)) {
 					if (completion_code == BASE_JD_EVENT_JOB_BUS_FAULT) {
 						if (kbase_prepare_to_reset_gpu_locked(
 							    kbdev, RESET_FLAGS_NONE))
@@ -740,66 +663,6 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
 	}
 }
 
-static int softstop_start_rp_nolock(struct kbase_context *kctx, struct kbase_va_region *reg)
-{
-	struct kbase_device *const kbdev = kctx->kbdev;
-	struct kbase_jd_atom *katom;
-	struct kbase_jd_renderpass *rp;
-
-	lockdep_assert_held(&kbdev->hwaccess_lock);
-
-	katom = kbase_gpu_inspect(kbdev, 1, 0);
-
-	if (!katom) {
-		dev_dbg(kctx->kbdev->dev, "No atom on job slot\n");
-		return -ESRCH;
-	}
-
-	if (!(katom->core_req & BASE_JD_REQ_START_RENDERPASS)) {
-		dev_dbg(kctx->kbdev->dev, "Atom %pK on job slot is not start RP\n", (void *)katom);
-		return -EPERM;
-	}
-
-	compiletime_assert((1ull << (sizeof(katom->renderpass_id) * 8)) <=
-				   ARRAY_SIZE(kctx->jctx.renderpasses),
-			   "Should check invalid access to renderpasses");
-
-	rp = &kctx->jctx.renderpasses[katom->renderpass_id];
-	if (WARN_ON(rp->state != KBASE_JD_RP_START && rp->state != KBASE_JD_RP_RETRY))
-		return -EINVAL;
-
-	dev_dbg(kctx->kbdev->dev, "OOM in state %d with region %pK\n", (int)rp->state, (void *)reg);
-
-	if (WARN_ON(katom != rp->start_katom))
-		return -EINVAL;
-
-	dev_dbg(kctx->kbdev->dev, "Adding region %pK to list %pK\n", (void *)reg,
-		(void *)&rp->oom_reg_list);
-	list_move_tail(&reg->link, &rp->oom_reg_list);
-	dev_dbg(kctx->kbdev->dev, "Added region to list\n");
-
-	rp->state = (rp->state == KBASE_JD_RP_START ? KBASE_JD_RP_PEND_OOM :
-							    KBASE_JD_RP_RETRY_PEND_OOM);
-
-	kbase_job_slot_softstop(kbdev, 1, katom);
-
-	return 0;
-}
-
-int kbase_job_slot_softstop_start_rp(struct kbase_context *const kctx,
-				     struct kbase_va_region *const reg)
-{
-	struct kbase_device *const kbdev = kctx->kbdev;
-	int err;
-	unsigned long flags;
-
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	err = softstop_start_rp_nolock(kctx, reg);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
-	return err;
-}
-
 void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
 {
 	struct kbase_device *kbdev = kctx->kbdev;
@@ -839,7 +702,7 @@ u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev)
 {
 	u32 flush_id = 0;
 
-	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) {
+	if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_FLUSH_REDUCTION)) {
 		mutex_lock(&kbdev->pm.lock);
 		if (kbdev->pm.backend.gpu_powered)
 			flush_id = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(LATEST_FLUSH));
@@ -1085,7 +948,7 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
 	/* The flush has completed so reset the active indicator */
 	kbdev->irq_reset_flush = false;
 
-	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8463)) {
+	if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TMIX_8463)) {
 		u64 val;
 		const u32 timeout_us =
 			kbase_get_timeout_ms(kbdev, KBASE_CLEAN_CACHE_TIMEOUT) * USEC_PER_MSEC;
@@ -1268,14 +1131,12 @@ bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, unsigned int
 {
 	unsigned int i;
 
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
 	if (kbase_pm_is_gpu_lost(kbdev)) {
 		/* GPU access has been removed, reset will be done by
 		 * Arbiter instead
 		 */
 		return false;
 	}
-#endif
 
 	if (flags & RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)
 		kbase_instr_hwcnt_on_unrecoverable_error(kbdev);
@@ -1328,7 +1189,7 @@ void kbase_reset_gpu(struct kbase_device *kbdev)
 
 	if (!kbase_is_quick_reset_enabled(kbdev))
 		dev_err(kbdev->dev,
-			"Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
+			"Preparing to soft-reset GPU: Waiting (up to %d ms) for all jobs to complete soft-stop\n",
 			kbdev->reset_timeout_ms);
 
 	hrtimer_start(&kbdev->hwaccess.backend.reset_timer,
@@ -1350,7 +1211,7 @@ void kbase_reset_gpu_locked(struct kbase_device *kbdev)
 
 	if (!kbase_is_quick_reset_enabled(kbdev))
 		dev_err(kbdev->dev,
-			"Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
+			"Preparing to soft-reset GPU: Waiting (up to %d ms) for all jobs to complete soft-stop\n",
 			kbdev->reset_timeout_ms);
 	hrtimer_start(&kbdev->hwaccess.backend.reset_timer,
 		      HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms), HRTIMER_MODE_REL);
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c
index 842209f9c049..a4a640a0fb92 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -425,7 +425,7 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, struct kbase_jd_a
 			}
 		}
 
-		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) {
+		if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TGOX_R1_1234)) {
 			if (katom->atom_flags & KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) {
 				kbase_pm_protected_l2_override(kbdev, false);
 				katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT;
@@ -698,7 +698,7 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, struct kbas
 
 		kbase_pm_protected_entry_override_disable(kbdev);
 
-		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) {
+		if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TGOX_R1_1234)) {
 			/*
 			 * Power on L2 caches; this will also result in the
 			 * correct value written to coherency enable register.
@@ -714,13 +714,13 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, struct kbas
 
 		katom[idx]->protected_state.enter = KBASE_ATOM_ENTER_PROTECTED_FINISHED;
 
-		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234))
+		if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TGOX_R1_1234))
 			return -EAGAIN;
 
 		/* ***TRANSITION TO HIGHER STATE*** */
 		fallthrough;
 	case KBASE_ATOM_ENTER_PROTECTED_FINISHED:
-		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) {
+		if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TGOX_R1_1234)) {
 			/*
 			 * Check that L2 caches are powered and, if so,
 			 * enter protected mode.
@@ -864,11 +864,7 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
-	if (kbase_reset_gpu_is_active(kbdev) || kbase_is_gpu_removed(kbdev))
-#else
-	if (kbase_reset_gpu_is_active(kbdev))
-#endif
+	if (kbase_reset_gpu_is_active(kbdev) || (kbase_is_gpu_removed(kbdev)))
 		return;
 
 	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
@@ -896,7 +892,7 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
 				break;
 
 			case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
-				if (kbase_js_atom_blocked_on_x_dep(katom[idx]))
+				if (katom[idx]->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)
 					break;
 
 				katom[idx]->gpu_rb_state =
@@ -1236,7 +1232,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 comp
 	 * When a hard-stop is followed close after a soft-stop, the completion
 	 * code may be set to STOPPED, even though the job is terminated
 	 */
-	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8438)) {
+	if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TMIX_8438)) {
 		if (completion_code == BASE_JD_EVENT_STOPPED &&
 		    (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_HARD_STOPPED)) {
 			completion_code = BASE_JD_EVENT_TERMINATED;
@@ -1331,6 +1327,9 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 comp
 		dev_dbg(kbdev->dev, "Update job chain address of atom %pK to resume from 0x%llx\n",
 			(void *)katom, job_tail);
 
+		/* Some of the job has been executed, so we update the job chain address to where
+		 *  we should resume from
+		 */
 		katom->jc = job_tail;
 		KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx, katom, job_tail, js);
 	}
@@ -1381,6 +1380,8 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 comp
 		dev_dbg(kbdev->dev, "Cross-slot dependency %pK has become runnable.\n",
 			(void *)katom);
 
+		/* Cross-slot dependency has now become runnable. Try to submit it. */
+
 		/* Check if there are lower priority jobs to soft stop */
 		kbase_job_slot_ctx_priority_check_locked(kctx, katom);
 
@@ -1437,7 +1438,7 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
 			 * then leave it in the RB and next time we're kicked
 			 * it will be processed again from the starting state.
 			 */
-			if (keep_in_jm_rb) {
+			if (!kbase_is_gpu_removed(kbdev) && keep_in_jm_rb) {
 				katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
 				/* As the atom was not removed, increment the
 				 * index so that we read the correct atom in the
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c
index 202671b323d5..99037c25bf08 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -46,7 +46,7 @@ static inline bool timer_callback_should_run(struct kbase_device *kbdev, int nr_
 	}
 #endif /* CONFIG_MALI_BIFROST_DEBUG */
 
-	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) {
+	if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_9435)) {
 		/* Timeouts would have to be 4x longer (due to micro-
 		 * architectural design) to support OpenCL conformance tests, so
 		 * only run the timer when there's:
@@ -100,7 +100,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
 			/* The current version of the model doesn't support
 			 * Soft-Stop
 			 */
-			if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) {
+			if (!kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_5736)) {
 				u32 ticks = atom->ticks++;
 
 #if !defined(CONFIG_MALI_JOB_DUMP) && !defined(CONFIG_MALI_VECTOR_DUMP)
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c
index 41b9b37797d3..0f4a8cd096bb 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -25,42 +25,8 @@
  *   insmod'ing mali_kbase.ko with no arguments after a build with "scons
  *   gpu=tXYZ" will yield the expected GPU ID for tXYZ. This can always be
  *   overridden by passing the 'no_mali_gpu' argument to insmod.
- *
- * - if CONFIG_MALI_BIFROST_ERROR_INJECT is defined the error injection system is
- *   activated.
  */
 
-/* Implementation of failure injection system:
- *
- * Error conditions are generated by gpu_generate_error().
- * According to CONFIG_MALI_BIFROST_ERROR_INJECT definition gpu_generate_error() either
- * generates an error HW condition randomly (CONFIG_MALI_ERROR_INJECT_RANDOM) or
- * checks if there is (in error_track_list) an error configuration to be set for
- * the current job chain (CONFIG_MALI_ERROR_INJECT_RANDOM not defined).
- * Each error condition will trigger a specific "state" for a certain set of
- * registers as per Midgard Architecture Specifications doc.
- *
- * According to Midgard Architecture Specifications doc the following registers
- * are always affected by error conditions:
- *
- * JOB Exception:
- *				JOB_IRQ_RAWSTAT
- *				JOB<n> STATUS AREA
- *
- * MMU Exception:
- *				MMU_IRQ_RAWSTAT
- *				AS<n>_FAULTSTATUS
- *				AS<n>_FAULTADDRESS
- *
- * GPU Exception:
- *				GPU_IRQ_RAWSTAT
- *				GPU_FAULTSTATUS
- *				GPU_FAULTADDRESS
- *
- *	For further clarification on the model behaviour upon specific error
- *      conditions the user may refer to the Midgard Architecture Specification
- *      document
- */
 #include <mali_kbase.h>
 #include <device/mali_kbase_device.h>
 #include <hw_access/mali_kbase_hw_access_regmap.h>
@@ -126,7 +92,7 @@ struct error_status_t hw_error_status;
  */
 struct control_reg_values_t {
 	const char *name;
-	u32 gpu_id;
+	u64 gpu_id;
 	u32 as_present;
 	u32 thread_max_threads;
 	u32 thread_max_workgroup_size;
@@ -524,7 +490,7 @@ MODULE_PARM_DESC(no_mali_gpu, "GPU to identify as");
 static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, u32 cnt_idx,
 				      bool is_low_word)
 {
-	u64 *counters_data;
+	u64 *counters_data = NULL;
 	u32 core_count = 0;
 	u32 event_index;
 	u64 value = 0;
@@ -580,6 +546,9 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, u32 cn
 		break;
 	}
 
+	if (unlikely(counters_data == NULL))
+		return 0;
+
 	for (core = 0; core < core_count; core++) {
 		value += counters_data[event_index];
 		event_index += KBASE_DUMMY_MODEL_COUNTER_PER_CORE;
@@ -1172,9 +1141,6 @@ static void midgard_model_update(void *h)
 
 		/*this job is done assert IRQ lines */
 		signal_int(dummy, i);
-#ifdef CONFIG_MALI_BIFROST_ERROR_INJECT
-		midgard_set_error(i);
-#endif /* CONFIG_MALI_BIFROST_ERROR_INJECT */
 		update_register_statuses(dummy, i);
 		/*if this job slot returned failures we cannot use it */
 		if (hw_error_status.job_irq_rawstat & (1u << (i + 16))) {
@@ -1564,6 +1530,7 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value)
 		case L2_PWROFF_HI:
 		case PWR_KEY:
 		case PWR_OVERRIDE0:
+		case PWR_OVERRIDE1:
 #if MALI_USE_CSF
 		case SHADER_PWRFEATURES:
 		case CSF_CONFIG:
@@ -1607,8 +1574,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value)
 #else /* !MALI_USE_CSF */
 	if (addr == GPU_CONTROL_REG(GPU_ID)) {
 #endif /* !MALI_USE_CSF */
-
-		*value = dummy->control_reg_values->gpu_id;
+		*value = dummy->control_reg_values->gpu_id & U32_MAX;
 	} else if (addr == JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)) {
 		*value = hw_error_status.job_irq_rawstat;
 		pr_debug("%s", "JS_IRQ_RAWSTAT being read");
@@ -1987,7 +1953,8 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value)
 		*value = dummy->control_reg_values->gpu_features_lo;
 	} else if (addr == GPU_CONTROL_REG(GPU_FEATURES_HI)) {
 		*value = dummy->control_reg_values->gpu_features_hi;
-	} else {
+	}
+	else {
 		model_error_log(
 			KBASE_CORE,
 			"Dummy model register access: Reading unsupported register 0x%x. Returning 0\n",
@@ -2166,9 +2133,3 @@ int gpu_model_control(void *model, struct kbase_model_control_params *params)
 
 	return 0;
 }
-
-u64 midgard_model_arch_timer_get_cntfrq(void *h)
-{
-	CSTD_UNUSED(h);
-	return arch_timer_get_cntfrq();
-}
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c
deleted file mode 100644
index 86d4e26bd6b4..000000000000
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c
+++ /dev/null
@@ -1,172 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
-/*
- *
- * (C) COPYRIGHT 2014-2015, 2018-2023 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-#include <mali_kbase.h>
-#include <linux/random.h>
-#include "backend/gpu/mali_kbase_model_linux.h"
-
-static struct kbase_error_atom *error_track_list;
-
-#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM
-
-/** Kernel 6.1.0 has dropped prandom_u32(), use get_random_u32() */
-#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
-#define prandom_u32 get_random_u32
-#endif
-
-/*following error probability are set quite high in order to stress the driver*/
-static unsigned int error_probability = 50; /* to be set between 0 and 100 */
-/* probability to have multiple error give that there is an error */
-static unsigned int multiple_error_probability = 50;
-
-/* all the error conditions supported by the model */
-#define TOTAL_FAULTS 27
-/* maximum number of levels in the MMU translation table tree */
-#define MAX_MMU_TABLE_LEVEL 4
-/* worst case scenario is <1 MMU fault + 1 job fault + 2 GPU faults> */
-#define MAX_CONCURRENT_FAULTS 3
-
-/**
- * gpu_generate_error - Generate GPU error
- */
-static void gpu_generate_error(void)
-{
-	unsigned int errors_num = 0;
-
-	/*is there at least one error? */
-	if ((prandom_u32() % 100) < error_probability) {
-		/* pick up a faulty mmu address space */
-		hw_error_status.faulty_mmu_as = prandom_u32() % NUM_MMU_AS;
-		/* pick up an mmu table level */
-		hw_error_status.mmu_table_level = 1 + (prandom_u32() % MAX_MMU_TABLE_LEVEL);
-		hw_error_status.errors_mask = (u32)(1 << (prandom_u32() % TOTAL_FAULTS));
-
-		/*is there also one or more errors? */
-		if ((prandom_u32() % 100) < multiple_error_probability) {
-			errors_num = 1 + (prandom_u32() % (MAX_CONCURRENT_FAULTS - 1));
-			while (errors_num-- > 0) {
-				u32 temp_mask;
-
-				temp_mask = (u32)(1 << (prandom_u32() % TOTAL_FAULTS));
-				/* below we check that no bit of the same error
-				 * type is set again in the error mask
-				 */
-				if ((temp_mask & IS_A_JOB_ERROR) &&
-				    (hw_error_status.errors_mask & IS_A_JOB_ERROR)) {
-					errors_num++;
-					continue;
-				}
-				if ((temp_mask & IS_A_MMU_ERROR) &&
-				    (hw_error_status.errors_mask & IS_A_MMU_ERROR)) {
-					errors_num++;
-					continue;
-				}
-				if ((temp_mask & IS_A_GPU_ERROR) &&
-				    (hw_error_status.errors_mask & IS_A_GPU_ERROR)) {
-					errors_num++;
-					continue;
-				}
-				/* this error mask is already set */
-				if ((hw_error_status.errors_mask | temp_mask) ==
-				    hw_error_status.errors_mask) {
-					errors_num++;
-					continue;
-				}
-				hw_error_status.errors_mask |= temp_mask;
-			}
-		}
-	}
-}
-#endif
-
-int job_atom_inject_error(struct kbase_error_params *params)
-{
-	struct kbase_error_atom *new_elem;
-
-	KBASE_DEBUG_ASSERT(params);
-
-	new_elem = kzalloc(sizeof(*new_elem), GFP_KERNEL);
-
-	if (!new_elem) {
-		model_error_log(KBASE_CORE,
-				"\njob_atom_inject_error: kzalloc failed for new_elem\n");
-		return -ENOMEM;
-	}
-	new_elem->params.jc = params->jc;
-	new_elem->params.errors_mask = params->errors_mask;
-	new_elem->params.mmu_table_level = params->mmu_table_level;
-	new_elem->params.faulty_mmu_as = params->faulty_mmu_as;
-
-	/*circular list below */
-	if (error_track_list == NULL) { /*no elements */
-		error_track_list = new_elem;
-		new_elem->next = error_track_list;
-	} else {
-		struct kbase_error_atom *walker = error_track_list;
-
-		while (walker->next != error_track_list)
-			walker = walker->next;
-
-		new_elem->next = error_track_list;
-		walker->next = new_elem;
-	}
-	return 0;
-}
-
-void midgard_set_error(u32 job_slot)
-{
-#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM
-	gpu_generate_error();
-#else
-	struct kbase_error_atom *walker, *auxiliar;
-
-	if (error_track_list != NULL) {
-		walker = error_track_list->next;
-		auxiliar = error_track_list;
-		do {
-			if (walker->params.jc == hw_error_status.current_jc) {
-				/* found a faulty atom matching with the
-				 * current one
-				 */
-				hw_error_status.errors_mask = walker->params.errors_mask;
-				hw_error_status.mmu_table_level = walker->params.mmu_table_level;
-				hw_error_status.faulty_mmu_as = walker->params.faulty_mmu_as;
-				hw_error_status.current_job_slot = job_slot;
-
-				if (walker->next == walker) {
-					/* only one element */
-					kfree(error_track_list);
-					error_track_list = NULL;
-				} else {
-					auxiliar->next = walker->next;
-					if (walker == error_track_list)
-						error_track_list = walker->next;
-
-					kfree(walker);
-				}
-				break;
-			}
-			auxiliar = walker;
-			walker = walker->next;
-		} while (auxiliar->next != error_track_list);
-	}
-#endif /* CONFIG_MALI_ERROR_INJECT_RANDOM */
-}
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h
index 77e089ef45c8..d38bb8891be1 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h
@@ -48,12 +48,8 @@
 /*
  * Include Model definitions
  */
-
-#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
 #include <backend/gpu/mali_kbase_model_dummy.h>
-#endif /* IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */
 
-#if !IS_ENABLED(CONFIG_MALI_REAL_HW)
 /**
  * kbase_gpu_device_create() - Generic create function.
  *
@@ -116,15 +112,6 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value);
  */
 void midgard_model_read_reg(void *h, u32 addr, u32 *const value);
 
-/**
- * midgard_model_arch_timer_get_cntfrq - Get Model specific System Timer Frequency
- *
- * @h: Model handle.
- *
- * Return: Frequency in Hz
- */
-u64 midgard_model_arch_timer_get_cntfrq(void *h);
-
 /**
  * gpu_device_raise_irq() - Private IRQ raise function.
  *
@@ -155,6 +142,5 @@ void gpu_device_set_data(void *model, void *data);
  * Return: Pointer to the data carried by model.
  */
 void *gpu_device_get_data(void *model);
-#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
 
 #endif /* _KBASE_MODEL_LINUX_H_ */
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c
index ca4e73d3fbb7..e1941d50133a 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -36,6 +36,7 @@
 #include <linux/version_compat_defs.h>
 #include <linux/pm_runtime.h>
 #include <mali_kbase_reset_gpu.h>
+#include <csf/mali_kbase_csf_scheduler.h>
 #endif /* !MALI_USE_CSF */
 #include <hwcnt/mali_kbase_hwcnt_context.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
@@ -97,10 +98,8 @@ void kbase_pm_register_access_enable(struct kbase_device *kbdev)
 	if (callbacks)
 		callbacks->power_on_callback(kbdev);
 
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
 	if (WARN_ON(kbase_pm_is_gpu_lost(kbdev)))
 		dev_err(kbdev->dev, "Attempting to power on while GPU lost\n");
-#endif
 
 	kbdev->pm.backend.gpu_powered = true;
 }
@@ -133,9 +132,7 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
 	INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work, kbase_pm_gpu_poweroff_wait_wq);
 
 	kbdev->pm.backend.ca_cores_enabled = ~0ull;
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
 	kbase_pm_set_gpu_lost(kbdev, false);
-#endif
 	init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait);
 
 #if !MALI_USE_CSF
@@ -177,15 +174,18 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
 	kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
 
 #if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
-	kbdev->pm.backend.gpu_sleep_supported =
-		kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_GPU_SLEEP) &&
-		!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TURSEHW_1997) &&
-		kbdev->pm.backend.callback_power_runtime_gpu_active &&
-		kbdev->pm.backend.callback_power_runtime_gpu_idle;
+	kbdev->pm.backend.gpu_sleep_allowed = 0;
+	if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_GPU_SLEEP) &&
+	    !kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TURSEHW_1997) &&
+	    kbdev->pm.backend.callback_power_runtime_gpu_active &&
+	    kbdev->pm.backend.callback_power_runtime_gpu_idle)
+		set_bit(KBASE_GPU_SUPPORTS_GPU_SLEEP, &kbdev->pm.backend.gpu_sleep_allowed);
 
 	kbdev->pm.backend.apply_hw_issue_TITANHW_2938_wa =
-		kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TITANHW_2938) &&
-		kbdev->pm.backend.gpu_sleep_supported;
+		kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TITANHW_2938) &&
+		test_bit(KBASE_GPU_SUPPORTS_GPU_SLEEP, &kbdev->pm.backend.gpu_sleep_allowed);
+
+	/* FW Sleep-on-Idle is feature is kept disabled */
 #endif
 
 	if (IS_ENABLED(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED))
@@ -193,14 +193,14 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
 
 	/* WA1: L2 always_on for GPUs being affected by GPU2017-1336 */
 	if (!IS_ENABLED(CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE)) {
-		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2017_1336))
+		if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_GPU2017_1336))
 			kbdev->pm.backend.l2_always_on = true;
 
 		return 0;
 	}
 
 	/* WA3: Clock slow down for GPUs being affected by GPU2017-1336 */
-	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2017_1336)) {
+	if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_GPU2017_1336)) {
 		kbdev->pm.backend.gpu_clock_slow_down_wa = true;
 		kbdev->pm.backend.gpu_clock_slow_down_desired = true;
 		INIT_WORK(&kbdev->pm.backend.gpu_clock_control_work,
@@ -345,13 +345,11 @@ static void pm_handle_power_off(struct kbase_device *kbdev)
 		 */
 		wait_for_mmu_fault_handling_in_gpu_poweroff_wait_wq(kbdev);
 
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
 		/* poweron_required may have changed while pm lock
 		 * was released.
 		 */
 		if (kbase_pm_is_gpu_lost(kbdev))
 			backend->poweron_required = false;
-#endif
 
 		/* Turn off clock now that fault have been handled. We
 		 * dropped locks so poweron_required may have changed -
@@ -393,7 +391,7 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data)
 		backend->poweron_required = false;
 		kbdev->pm.backend.l2_desired = true;
 #if MALI_USE_CSF
-		kbdev->pm.backend.mcu_desired = true;
+		kbdev->pm.backend.mcu_desired = kbdev->pm.backend.mcu_poweron_required;
 #endif
 		kbase_pm_update_state(kbdev);
 		kbase_pm_update_cores_state_nolock(kbdev);
@@ -860,9 +858,11 @@ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask)
 }
 KBASE_EXPORT_TEST_API(kbase_pm_set_debug_core_mask);
 #else
-void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask_js0,
-				  u64 new_core_mask_js1, u64 new_core_mask_js2)
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 *new_core_mask,
+				  size_t new_core_mask_size)
 {
+	size_t i;
+
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 	lockdep_assert_held(&kbdev->pm.lock);
 
@@ -870,13 +870,14 @@ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask_
 		dev_warn_once(
 			kbdev->dev,
 			"Change of core mask not supported for slot 0 as dummy job WA is enabled");
-		new_core_mask_js0 = kbdev->pm.debug_core_mask[0];
+		new_core_mask[0] = kbdev->pm.debug_core_mask[0];
 	}
 
-	kbdev->pm.debug_core_mask[0] = new_core_mask_js0;
-	kbdev->pm.debug_core_mask[1] = new_core_mask_js1;
-	kbdev->pm.debug_core_mask[2] = new_core_mask_js2;
-	kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 | new_core_mask_js2;
+	kbdev->pm.debug_core_mask_all = 0;
+	for (i = 0; i < new_core_mask_size; i++) {
+		kbdev->pm.debug_core_mask[i] = new_core_mask[i];
+		kbdev->pm.debug_core_mask_all |= new_core_mask[i];
+	}
 
 	kbase_pm_update_dynamic_cores_onoff(kbdev);
 }
@@ -942,13 +943,11 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
 	/* System resume callback has begun */
 	kbdev->pm.resuming = true;
 	kbdev->pm.suspending = false;
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
 	if (kbase_pm_is_gpu_lost(kbdev)) {
 		dev_dbg(kbdev->dev, "%s: GPU lost in progress\n", __func__);
 		kbase_pm_unlock(kbdev);
 		return;
 	}
-#endif
 	kbase_pm_do_poweron(kbdev, true);
 
 #if !MALI_USE_CSF
@@ -958,17 +957,20 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
 	kbase_pm_unlock(kbdev);
 }
 
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
 void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev)
 {
 	unsigned long flags;
-#if !MALI_USE_CSF
+#if MALI_USE_CSF
+	unsigned long flags_sched;
+#else
 	ktime_t end_timestamp = ktime_get_raw();
 #endif
 	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
 
-	if (!kbdev->arb.arb_if)
+	if (!kbase_has_arbiter(kbdev)) {
+		dev_warn(kbdev->dev, "%s called with no active arbiter!\n", __func__);
 		return;
+	}
 
 	mutex_lock(&kbdev->pm.lock);
 	mutex_lock(&arb_vm_state->vm_state_lock);
@@ -981,24 +983,45 @@ void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev)
 		 */
 		WARN(!kbase_is_gpu_removed(kbdev), "GPU is still available after GPU lost event\n");
 
-		/* Full GPU reset will have been done by hypervisor, so
-		 * cancel
-		 */
+#if MALI_USE_CSF
+		/* Full GPU reset will have been done by hypervisor, so cancel */
+		if (kbase_reset_gpu_prevent_and_wait(kbdev))
+			dev_warn(kbdev->dev, "Failed to prevent GPU reset.");
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_csf_scheduler_spin_lock(kbdev, &flags_sched);
+		atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_NOT_PENDING);
+		kbase_csf_scheduler_spin_unlock(kbdev, flags_sched);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		kbase_synchronize_irqs(kbdev);
+
+		/* Scheduler reset happens outside of spinlock due to the mutex it acquires */
+		kbase_csf_scheduler_reset(kbdev);
+
+		/* Update kbase status */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbdev->protected_mode = false;
+		kbase_pm_update_state(kbdev);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		/* Cancel any pending HWC dumps */
+		kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface);
+#else
+		/* Full GPU reset will have been done by hypervisor, so cancel */
 		atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_NOT_PENDING);
 		hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer);
+
 		kbase_synchronize_irqs(kbdev);
 
 		/* Clear all jobs running on the GPU */
 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 		kbdev->protected_mode = false;
-#if !MALI_USE_CSF
 		kbase_backend_reset(kbdev, &end_timestamp);
 		kbase_pm_metrics_update(kbdev, NULL);
-#endif
 		kbase_pm_update_state(kbdev);
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
-#if !MALI_USE_CSF
 		/* Cancel any pending HWC dumps */
 		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
 		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING ||
@@ -1008,14 +1031,12 @@ void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev)
 			wake_up(&kbdev->hwcnt.backend.wait);
 		}
 		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
-#endif
+#endif /* MALI_USE_CSF */
 	}
 	mutex_unlock(&arb_vm_state->vm_state_lock);
 	mutex_unlock(&kbdev->pm.lock);
 }
 
-#endif /* CONFIG_MALI_ARBITER_SUPPORT */
-
 #if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
 int kbase_pm_force_mcu_wakeup_after_sleep(struct kbase_device *kbdev)
 {
@@ -1063,26 +1084,15 @@ static int pm_handle_mcu_sleep_on_runtime_suspend(struct kbase_device *kbdev)
 	}
 
 	/* Check if a Doorbell mirror interrupt occurred meanwhile.
-	 * Also check if GPU idle work item is pending. If FW had sent the GPU idle notification
-	 * after the wake up of MCU then it can be assumed that Userspace submission didn't make
-	 * GPU non-idle, so runtime suspend doesn't need to be aborted.
 	 */
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	if (kbdev->pm.backend.gpu_sleep_mode_active && kbdev->pm.backend.exit_gpu_sleep_mode &&
-	    !work_pending(&kbdev->csf.scheduler.gpu_idle_work)) {
-		u32 glb_req =
-			kbase_csf_firmware_global_input_read(&kbdev->csf.global_iface, GLB_REQ);
-		u32 glb_ack = kbase_csf_firmware_global_output(&kbdev->csf.global_iface, GLB_ACK);
-
-		/* Only abort the runtime suspend if GPU idle event is not pending */
-		if (!((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK)) {
-			dev_dbg(kbdev->dev,
-				"DB mirror interrupt occurred during runtime suspend after L2 power up");
-			kbdev->pm.backend.gpu_wakeup_override = false;
-			kbdev->pm.backend.runtime_suspend_abort_reason = ABORT_REASON_DB_MIRROR_IRQ;
-			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-			return -EBUSY;
-		}
+	if (kbdev->pm.backend.gpu_sleep_mode_active && kbdev->pm.backend.exit_gpu_sleep_mode) {
+		dev_dbg(kbdev->dev,
+			"DB mirror interrupt occurred during runtime suspend after L2 power up");
+		kbdev->pm.backend.gpu_wakeup_override = false;
+		kbdev->pm.backend.runtime_suspend_abort_reason = ABORT_REASON_DB_MIRROR_IRQ;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		return -EBUSY;
 	}
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 	/* Need to release the kbdev->pm.lock to avoid lock ordering issue
@@ -1237,4 +1247,5 @@ out:
 
 	return ret;
 }
+
 #endif
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c
index 8daef13388a3..6522e5ca66e9 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2013-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -55,11 +55,18 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
 	unsigned long flags;
 #if MALI_USE_CSF
 	u64 old_core_mask = 0;
-#endif
+	bool mmu_sync_needed = false;
 
+	if (!IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) &&
+	    kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_GPU2019_3901)) {
+		mmu_sync_needed = true;
+		down_write(&kbdev->csf.mmu_sync_sem);
+	}
+#endif
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
 #if MALI_USE_CSF
+
 	if (!(core_mask & kbdev->pm.debug_core_mask)) {
 		dev_err(kbdev->dev,
 			"OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n",
@@ -98,6 +105,9 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
 				 old_core_mask, core_mask);
 		}
 	}
+
+	if (mmu_sync_needed)
+		up_write(&kbdev->csf.mmu_sync_sem);
 #endif
 
 	dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX\n", pm_backend->ca_cores_enabled);
@@ -105,6 +115,10 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
 	return;
 unlock:
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+#if MALI_USE_CSF
+	if (mmu_sync_needed)
+		up_write(&kbdev->csf.mmu_sync_sem);
+#endif
 }
 KBASE_EXPORT_TEST_API(kbase_devfreq_set_core_mask);
 #endif
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h
index a0b8b9500077..a25fe6bdc912 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -114,6 +114,27 @@ enum kbase_pm_runtime_suspend_abort_reason {
 	ABORT_REASON_NON_IDLE_CGS
 };
 
+/* The following indices point to the corresponding bits stored in
+ * &kbase_pm_backend_data.gpu_sleep_allowed. They denote the conditions that
+ * would be checked against to determine the level of support for GPU sleep
+ * and firmware sleep-on-idle.
+ */
+#define KBASE_GPU_SUPPORTS_GPU_SLEEP ((uint8_t)0)
+#define KBASE_GPU_SUPPORTS_FW_SLEEP_ON_IDLE ((uint8_t)1)
+#define KBASE_GPU_PERF_COUNTERS_COLLECTION_ENABLED ((uint8_t)2)
+#define KBASE_GPU_IGNORE_IDLE_EVENT ((uint8_t)3)
+#define KBASE_GPU_NON_IDLE_OFF_SLOT_GROUPS_AVAILABLE ((uint8_t)4)
+
+/* FW sleep-on-idle could be enabled if
+ * &kbase_pm_backend_data.gpu_sleep_allowed is equal to this value.
+ */
+#define KBASE_GPU_FW_SLEEP_ON_IDLE_ALLOWED                             \
+	((uint8_t)((1 << KBASE_GPU_SUPPORTS_GPU_SLEEP) |               \
+		   (1 << KBASE_GPU_SUPPORTS_FW_SLEEP_ON_IDLE) |        \
+		   (0 << KBASE_GPU_PERF_COUNTERS_COLLECTION_ENABLED) | \
+		   (0 << KBASE_GPU_IGNORE_IDLE_EVENT) |                \
+		   (0 << KBASE_GPU_NON_IDLE_OFF_SLOT_GROUPS_AVAILABLE)))
+
 /**
  * struct kbasep_pm_metrics - Metrics data collected for use by the power
  *                            management framework.
@@ -304,7 +325,7 @@ union kbase_pm_policy_data {
  *                                     called previously.
  *                                     See &struct kbase_pm_callback_conf.
  * @ca_cores_enabled: Cores that are currently available
- * @apply_hw_issue_TITANHW_2938_wa: Indicates if the workaround for BASE_HW_ISSUE_TITANHW_2938
+ * @apply_hw_issue_TITANHW_2938_wa: Indicates if the workaround for KBASE_HW_ISSUE_TITANHW_2938
  *                                  needs to be applied when unmapping memory from GPU.
  * @mcu_state: The current state of the micro-control unit, only applicable
  *             to GPUs that have such a component
@@ -332,7 +353,11 @@ union kbase_pm_policy_data {
  *                   cores may be different, but there should be transitions in
  *                   progress that will eventually achieve this state (assuming
  *                   that the policy doesn't change its mind in the mean time).
- * @mcu_desired: True if the micro-control unit should be powered on
+ * @mcu_desired: True if the micro-control unit should be powered on by the MCU state
+ *               machine. Updated as per the value of @mcu_poweron_required.
+ * @mcu_poweron_required: Boolean flag updated mainly by the CSF Scheduler code,
+ *                        before updating the PM active count, to indicate to the
+ *                        PM code that micro-control unit needs to be powered up/down.
  * @policy_change_clamp_state_to_off: Signaling the backend is in PM policy
  *                change transition, needs the mcu/L2 to be brought back to the
  *                off state and remain in that state until the flag is cleared.
@@ -346,10 +371,9 @@ union kbase_pm_policy_data {
  * @core_idle_work: Work item used to wait for undesired cores to become inactive.
  *                  The work item is enqueued when Host controls the power for
  *                  shader cores and down scaling of cores is performed.
- * @gpu_sleep_supported: Flag to indicate that if GPU sleep feature can be
- *                       supported by the kernel driver or not. If this
- *                       flag is not set, then HW state is directly saved
- *                       when GPU idle notification is received.
+ * @gpu_sleep_allowed: Bitmask to indicate the conditions that would be
+ *                     used to determine what support for GPU sleep is
+ *                     available.
  * @gpu_sleep_mode_active: Flag to indicate that the GPU needs to be in sleep
  *                         mode. It is set when the GPU idle notification is
  *                         received and is cleared when HW state has been
@@ -485,6 +509,7 @@ struct kbase_pm_backend_data {
 	u64 shaders_desired_mask;
 #if MALI_USE_CSF
 	bool mcu_desired;
+	bool mcu_poweron_required;
 	bool policy_change_clamp_state_to_off;
 	unsigned int csf_pm_sched_flags;
 	struct mutex policy_change_lock;
@@ -492,7 +517,7 @@ struct kbase_pm_backend_data {
 	struct work_struct core_idle_work;
 
 #ifdef KBASE_PM_RUNTIME
-	bool gpu_sleep_supported;
+	unsigned long gpu_sleep_allowed;
 	bool gpu_sleep_mode_active;
 	bool exit_gpu_sleep_mode;
 	bool gpu_idled;
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c
index 506e168f86d2..c6b6f3a8668a 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -47,9 +47,7 @@
 #include <backend/gpu/mali_kbase_pm_internal.h>
 #include <backend/gpu/mali_kbase_l2_mmu_config.h>
 #include <mali_kbase_dummy_job_wa.h>
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
 #include <arbiter/mali_kbase_arbiter_pm.h>
-#endif /* CONFIG_MALI_ARBITER_SUPPORT */
 
 #if MALI_USE_CSF
 #include <linux/delay.h>
@@ -70,6 +68,19 @@ MODULE_PARM_DESC(corestack_driver_control,
 		 "to the Mali GPU is known to be problematic.");
 KBASE_EXPORT_TEST_API(corestack_driver_control);
 
+/**
+ * enum kbase_gpu_state - The state of data in the GPU.
+ *
+ * @GPU_STATE_INTACT: The GPU state is intact
+ * @GPU_STATE_LOST: The GPU state is lost
+ * @GPU_STATE_IN_RESET: The GPU is in reset state
+ *
+ * This enumeration is private to the file. It is used as
+ * the return values of platform specific PM
+ * callback (*power_on_callback).
+ */
+enum kbase_gpu_state { GPU_STATE_INTACT = 0, GPU_STATE_LOST, GPU_STATE_IN_RESET };
+
 /**
  * enum kbasep_pm_action - Actions that can be performed on a core.
  *
@@ -110,7 +121,15 @@ bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev)
 	if (kbdev->pm.backend.l2_force_off_after_mcu_halt)
 		return false;
 
-	if (kbdev->csf.scheduler.pm_active_count && kbdev->pm.backend.mcu_desired)
+	/* Check if policy changing transition needs MCU to be off. */
+	if (unlikely(kbdev->pm.backend.policy_change_clamp_state_to_off))
+		return false;
+
+	if (kbdev->pm.backend.mcu_desired)
+		return true;
+
+	/* For always_on policy, the MCU needs to be kept on */
+	if (kbase_pm_no_mcu_core_pwroff(kbdev))
 		return true;
 
 #ifdef KBASE_PM_RUNTIME
@@ -119,13 +138,7 @@ bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev)
 		return true;
 #endif
 
-	/* MCU is supposed to be ON, only when scheduler.pm_active_count is
-	 * non zero. But for always_on policy, the MCU needs to be kept on,
-	 * unless policy changing transition needs it off.
-	 */
-
-	return (kbdev->pm.backend.mcu_desired && kbase_pm_no_mcu_core_pwroff(kbdev) &&
-		!kbdev->pm.backend.policy_change_clamp_state_to_off);
+	return false;
 }
 #endif
 
@@ -600,11 +613,11 @@ static void kbase_pm_l2_config_override(struct kbase_device *kbdev)
 	/*
 	 * Skip if it is not supported
 	 */
-	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG))
+	if (!kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_L2_CONFIG))
 		return;
 
 #if MALI_USE_CSF
-	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PBHA_HWU)) {
+	if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_PBHA_HWU)) {
 		val = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(L2_CONFIG));
 		kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(L2_CONFIG),
 				  L2_CONFIG_PBHA_HWU_SET(val, kbdev->pbha_propagate_bits));
@@ -728,16 +741,8 @@ bool kbase_pm_is_mcu_inactive(struct kbase_device *kbdev, enum kbase_mcu_state s
 }
 
 #ifdef KBASE_PM_RUNTIME
-/**
- * kbase_pm_enable_mcu_db_notification - Enable the Doorbell notification on
- *                                       MCU side
- *
- * @kbdev: Pointer to the device.
- *
- * This function is called to re-enable the Doorbell notification on MCU side
- * when MCU needs to beome active again.
- */
-static void kbase_pm_enable_mcu_db_notification(struct kbase_device *kbdev)
+
+void kbase_pm_enable_mcu_db_notification(struct kbase_device *kbdev)
 {
 	u32 val = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(MCU_CONTROL));
 
@@ -763,7 +768,7 @@ static void wait_mcu_as_inactive(struct kbase_device *kbdev)
 		kbase_get_timeout_ms(kbdev, KBASE_AS_INACTIVE_TIMEOUT) * USEC_PER_MSEC;
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
-	if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TURSEHW_2716))
+	if (!kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TURSEHW_2716))
 		return;
 
 	/* Wait for the AS_ACTIVE_INT bit to become 0 for the AS used by MCU FW */
@@ -912,6 +917,18 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
 			if (kbase_pm_is_mcu_desired(kbdev) &&
 			    !backend->policy_change_clamp_state_to_off &&
 			    backend->l2_state == KBASE_L2_ON) {
+				kbdev->csf.mcu_halted = false;
+
+				/* Ensure that FW would not go to sleep immediately after
+				 * resumption.
+				 */
+				kbase_csf_firmware_global_input_mask(&kbdev->csf.global_iface,
+								     GLB_REQ,
+								     GLB_REQ_REQ_IDLE_DISABLE,
+								     GLB_REQ_IDLE_DISABLE_MASK);
+				atomic_set(&kbdev->csf.scheduler.gpu_idle_timer_enabled, false);
+				atomic_set(&kbdev->csf.scheduler.fw_soi_enabled, false);
+
 				kbase_csf_firmware_trigger_reload(kbdev);
 				backend->mcu_state = KBASE_MCU_PEND_ON_RELOAD;
 			}
@@ -979,8 +996,8 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
 				kbase_hwcnt_backend_csf_set_hw_availability(
 					&kbdev->hwcnt_gpu_iface,
 					kbdev->gpu_props.curr_config.l2_slices,
-					kbdev->gpu_props.curr_config.shader_present &
-						kbdev->pm.debug_core_mask);
+					kbdev->gpu_props.curr_config.shader_present,
+					kbdev->pm.debug_core_mask);
 				kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
 				kbase_csf_scheduler_spin_unlock(kbdev, flags);
 				backend->hwcnt_disabled = false;
@@ -990,7 +1007,6 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
 
 		case KBASE_MCU_ON:
 			backend->shaders_desired_mask = kbase_pm_ca_get_core_mask(kbdev);
-
 			if (!kbase_pm_is_mcu_desired(kbdev))
 				backend->mcu_state = KBASE_MCU_ON_HWCNT_DISABLE;
 			else if (kbdev->csf.firmware_hctl_core_pwr) {
@@ -1170,7 +1186,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
 			break;
 
 		case KBASE_MCU_POWER_DOWN:
-			if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TITANHW_2922)) {
+			if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TITANHW_2922)) {
 				if (!kbdev->csf.firmware_hctl_core_pwr)
 					kbasep_pm_toggle_power_interrupt(kbdev, true);
 				backend->mcu_state = KBASE_MCU_OFF;
@@ -1191,7 +1207,20 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
 #ifdef KBASE_PM_RUNTIME
 		case KBASE_MCU_ON_SLEEP_INITIATE:
 			if (!kbase_pm_is_mcu_desired(kbdev)) {
-				kbase_csf_firmware_trigger_mcu_sleep(kbdev);
+				bool db_notif_disabled = false;
+
+				if (likely(test_bit(KBASE_GPU_SUPPORTS_FW_SLEEP_ON_IDLE,
+						    &kbdev->pm.backend.gpu_sleep_allowed)))
+					db_notif_disabled =
+						kbase_reg_read32(kbdev,
+								 GPU_CONTROL_ENUM(MCU_CONTROL)) &
+						MCU_CNTRL_DOORBELL_DISABLE_MASK;
+
+				/* If DB notification is enabled on FW side then send a sleep
+				 * request to FW.
+				 */
+				if (!db_notif_disabled)
+					kbase_csf_firmware_trigger_mcu_sleep(kbdev);
 				backend->mcu_state = KBASE_MCU_ON_PEND_SLEEP;
 			} else
 				backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE;
@@ -1225,6 +1254,16 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
 		case KBASE_MCU_IN_SLEEP:
 			if (kbase_pm_is_mcu_desired(kbdev) && backend->l2_state == KBASE_L2_ON) {
 				wait_mcu_as_inactive(kbdev);
+				/* Ensure that FW would not go to sleep immediately after
+				 * resumption.
+				 */
+				kbase_csf_firmware_global_input_mask(&kbdev->csf.global_iface,
+								     GLB_REQ,
+								     GLB_REQ_REQ_IDLE_DISABLE,
+								     GLB_REQ_IDLE_DISABLE_MASK);
+				atomic_set(&kbdev->csf.scheduler.gpu_idle_timer_enabled, false);
+				atomic_set(&kbdev->csf.scheduler.fw_soi_enabled, false);
+
 				KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP(
 					kbdev, kbase_backend_get_cycle_cnt(kbdev));
 				kbase_pm_enable_mcu_db_notification(kbdev);
@@ -1342,6 +1381,8 @@ static void kbase_pm_l2_clear_backend_slot_submit_kctx(struct kbase_device *kbde
 
 static bool can_power_down_l2(struct kbase_device *kbdev)
 {
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
 	/* Defer the power-down if MMU is in process of page migration. */
 	return !kbdev->mmu_page_migrate_in_progress;
 }
@@ -1367,20 +1408,6 @@ static bool need_tiler_control(struct kbase_device *kbdev)
 #endif
 }
 
-/**
- * hctl_l2_power_down - Initiate power down of L2 cache
- *
- * @kbdev: The kbase device structure for the device.
- *
- * This function initiates the power down of L2 cache when Host controls the power
- * for Tiler block. The function expects that power down of Tiler to already have
- * been initiated and it triggers the L2 power down only after the power down for
- * Tiler is complete.
- * The function shall be called only if L2 is in ready state.
- */
-static void hctl_l2_power_down(struct kbase_device *kbdev)
-{
-}
 
 /**
  * hctl_tiler_power_up_done - Check and/or initiate power up of Tiler
@@ -1427,7 +1454,6 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
 		u64 l2_trans = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2);
 		u64 l2_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2);
 
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
 		/*
 		 * kbase_pm_get_ready_cores and kbase_pm_get_trans_cores
 		 * are vulnerable to corruption if gpu is lost
@@ -1456,7 +1482,6 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
 			}
 			break;
 		}
-#endif
 
 		/* mask off ready from trans in case transitions finished
 		 * between the register reads
@@ -1557,7 +1582,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
 
 		case KBASE_L2_RESTORE_CLOCKS:
 			/* We always assume only GPUs being affected by
-			 * BASE_HW_ISSUE_GPU2017_1336 fall into this state
+			 * KBASE_HW_ISSUE_GPU2017_1336 fall into this state
 			 */
 			WARN_ON_ONCE(!kbdev->pm.backend.gpu_clock_slow_down_wa);
 
@@ -1659,7 +1684,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
 
 		case KBASE_L2_SLOW_DOWN_CLOCKS:
 			/* We always assume only GPUs being affected by
-			 * BASE_HW_ISSUE_GPU2017_1336 fall into this state
+			 * KBASE_HW_ISSUE_GPU2017_1336 fall into this state
 			 */
 			WARN_ON_ONCE(!kbdev->pm.backend.gpu_clock_slow_down_wa);
 
@@ -1708,11 +1733,6 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
 
 		case KBASE_L2_PEND_OFF:
 			if (likely(!backend->l2_always_on)) {
-				if (need_tiler_control(kbdev) && l2_ready) {
-					hctl_l2_power_down(kbdev);
-					break;
-				}
-
 				if (l2_trans || l2_ready)
 					break;
 			} else if (kbdev->cache_clean_in_progress)
@@ -1727,11 +1747,10 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
 			}
 #endif
 			/* Disabling MCU after L2 cache power down is to address
-			 * BASE_HW_ISSUE_TITANHW_2922 hardware issue.
+			 * KBASE_HW_ISSUE_TITANHW_2922 hardware issue.
 			 */
 			if (backend->l2_force_off_after_mcu_halt) {
-				kbase_csf_firmware_disable_mcu(kbdev);
-				kbase_csf_firmware_disable_mcu_wait(kbdev);
+				kbase_csf_stop_firmware_and_wait(kbdev);
 				WARN_ON_ONCE(backend->mcu_state != KBASE_MCU_OFF);
 				backend->l2_force_off_after_mcu_halt = false;
 			}
@@ -1878,12 +1897,7 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev)
 		 * kbase_pm_get_ready_cores and kbase_pm_get_trans_cores
 		 * are vulnerable to corruption if gpu is lost
 		 */
-		if (kbase_is_gpu_removed(kbdev)
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
-		    || kbase_pm_is_gpu_lost(kbdev)) {
-#else
-		) {
-#endif
+		if (kbase_is_gpu_removed(kbdev) || kbase_pm_is_gpu_lost(kbdev)) {
 			backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF;
 			dev_dbg(kbdev->dev, "GPU lost has occurred - shaders off\n");
 			break;
@@ -1988,9 +2002,8 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev)
 						kbdev, KBASE_PM_POLICY_EVENT_IDLE);
 
 				if (kbdev->pm.backend.protected_transition_override ||
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
-				    kbase_pm_is_suspending(kbdev) || kbase_pm_is_gpu_lost(kbdev) ||
-#endif /* CONFIG_MALI_ARBITER_SUPPORT */
+				    (kbase_has_arbiter(kbdev) && (kbase_pm_is_suspending(kbdev) ||
+								  kbase_pm_is_gpu_lost(kbdev))) ||
 				    !stt->configured_ticks || WARN_ON(stt->cancel_queued)) {
 					backend->shaders_state =
 						KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON;
@@ -2057,10 +2070,9 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev)
 						kbdev, KBASE_PM_POLICY_EVENT_TIMER_MISS);
 
 				backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON;
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
-			} else if (kbase_pm_is_suspending(kbdev) || kbase_pm_is_gpu_lost(kbdev)) {
+			} else if (kbase_has_arbiter(kbdev) &&
+				   (kbase_pm_is_suspending(kbdev) || kbase_pm_is_gpu_lost(kbdev))) {
 				backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON;
-#endif /* CONFIG_MALI_ARBITER_SUPPORT */
 			}
 			break;
 
@@ -2079,7 +2091,7 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev)
 			if (!backend->partial_shaderoff)
 				shader_poweroff_timer_queue_cancel(kbdev);
 
-			if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921)) {
+			if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TTRX_921)) {
 				kbase_gpu_start_cache_clean_nolock(kbdev,
 								   GPU_COMMAND_CACHE_CLN_INV_L2);
 				backend->shaders_state = KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON;
@@ -2429,6 +2441,9 @@ void kbase_pm_reset_complete(struct kbase_device *kbdev)
 	backend->in_reset = false;
 #if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
 	backend->gpu_wakeup_override = false;
+	backend->db_mirror_interrupt_enabled = false;
+	backend->gpu_sleep_mode_active = false;
+	backend->exit_gpu_sleep_mode = false;
 #endif
 	kbase_pm_update_state(kbdev);
 
@@ -2653,12 +2668,9 @@ static int pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev, bool k
 	const long timeout = kbase_csf_timeout_in_jiffies(
 		kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT) + extra_wait_time_ms);
 #else
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
 	/* Handling of timeout error isn't supported for arbiter builds */
-	const long timeout = MAX_SCHEDULE_TIMEOUT;
-#else
-	const long timeout = (long)msecs_to_jiffies(PM_TIMEOUT_MS);
-#endif
+	const long timeout = kbase_has_arbiter(kbdev) ? MAX_SCHEDULE_TIMEOUT :
+							      (long)msecs_to_jiffies(PM_TIMEOUT_MS);
 #endif
 	int err = 0;
 
@@ -2779,7 +2791,8 @@ static void update_user_reg_page_mapping(struct kbase_device *kbdev)
 		 * when the context (user process) needs to access to the page.
 		 */
 		unmap_mapping_range(kbdev->csf.user_reg.filp->f_inode->i_mapping,
-				    kctx->csf.user_reg.file_offset << PAGE_SHIFT, PAGE_SIZE, 1);
+				    (loff_t)kctx->csf.user_reg.file_offset << PAGE_SHIFT, PAGE_SIZE,
+				    1);
 		list_del_init(&kctx->csf.user_reg.link);
 		dev_dbg(kbdev->dev, "Updated USER Reg page mapping of ctx %d_%d", kctx->tgid,
 			kctx->id);
@@ -2797,7 +2810,7 @@ static void update_user_reg_page_mapping(struct kbase_device *kbdev)
 void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
 {
 	struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
-	bool reset_required = is_resume;
+	int ret = is_resume;
 	unsigned long flags;
 
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
@@ -2806,12 +2819,10 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
 #endif /* !MALI_USE_CSF */
 	lockdep_assert_held(&kbdev->pm.lock);
 
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
 	if (WARN_ON(kbase_pm_is_gpu_lost(kbdev))) {
 		dev_err(kbdev->dev, "%s: Cannot power up while GPU lost", __func__);
 		return;
 	}
-#endif
 
 	if (backend->gpu_powered) {
 #if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
@@ -2836,7 +2847,7 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
 		backend->callback_power_resume(kbdev);
 		return;
 	} else if (backend->callback_power_on) {
-		reset_required = backend->callback_power_on(kbdev);
+		ret = backend->callback_power_on(kbdev);
 	}
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@@ -2849,15 +2860,18 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
 #endif
 
 
-	if (reset_required) {
+	if (ret == GPU_STATE_IN_RESET) {
+		/* GPU is already in reset state after power on and no
+		 * soft-reset needed. Just reconfiguration is needed.
+		 */
+		kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS | PM_NO_RESET);
+	} else if (ret == GPU_STATE_LOST) {
 		/* GPU state was lost, reset GPU to ensure it is in a
 		 * consistent state
 		 */
 		kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS);
-	}
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
-	else {
-		if (kbdev->arb.arb_if) {
+	} else {
+		if (kbase_has_arbiter(kbdev)) {
 			struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
 
 			/* In the case that the GPU has just been granted by
@@ -2873,8 +2887,8 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
 	 * that a repartitioning occurred. In this case the current config
 	 * should be read again.
 	 */
-	kbase_gpuprops_get_curr_config_props(kbdev, &kbdev->gpu_props.curr_config);
-#endif /* CONFIG_MALI_ARBITER_SUPPORT */
+	if (kbase_has_arbiter(kbdev))
+		kbase_gpuprops_get_curr_config_props(kbdev, &kbdev->gpu_props.curr_config);
 
 	mutex_lock(&kbdev->mmu_hw_mutex);
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@@ -2898,7 +2912,7 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
 	backend->l2_desired = true;
 #if MALI_USE_CSF
 	{
-		if (reset_required) {
+		if (ret != GPU_STATE_INTACT) {
 			/* GPU reset was done after the power on, so send the post
 			 * reset event instead. This is okay as GPU power off event
 			 * is same as pre GPU reset event.
@@ -2966,12 +2980,7 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev)
 	}
 #endif
 
-	if (kbase_is_gpu_removed(kbdev)
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
-	    || kbase_pm_is_gpu_lost(kbdev)) {
-#else
-	) {
-#endif
+	if (kbase_is_gpu_removed(kbdev) || kbase_pm_is_gpu_lost(kbdev)) {
 		/* Ensure we unblock any threads that are stuck waiting
 		 * for the GPU
 		 */
@@ -2989,10 +2998,7 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev)
 	/* GPU is about to be turned off, switch to dummy page */
 	update_user_reg_page_mapping(kbdev);
 #endif
-
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
 	kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_IDLE_EVENT);
-#endif /* CONFIG_MALI_ARBITER_SUPPORT */
 
 	if (kbdev->pm.backend.callback_power_off)
 		kbdev->pm.backend.callback_power_off(kbdev);
@@ -3046,6 +3052,7 @@ static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer)
 	return HRTIMER_NORESTART;
 }
 
+
 static int kbase_set_gpu_quirks(struct kbase_device *kbdev)
 {
 #if MALI_USE_CSF
@@ -3075,7 +3082,7 @@ static int kbase_set_gpu_quirks(struct kbase_device *kbdev)
 	kbdev->hw_quirks_gpu = hw_quirks_gpu;
 
 #endif /* !MALI_USE_CSF */
-	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_IDVS_GROUP_SIZE)) {
+	if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_IDVS_GROUP_SIZE)) {
 		u32 default_idvs_group_size = 0xF;
 		u32 group_size = 0;
 
@@ -3109,10 +3116,10 @@ static int kbase_set_sc_quirks(struct kbase_device *kbdev)
 	if (kbase_is_gpu_removed(kbdev))
 		return -EIO;
 
-	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_2968_TTRX_3162))
+	if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TTRX_2968_TTRX_3162))
 		hw_quirks_sc |= SC_VAR_ALGORITHM;
 
-	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_TLS_HASHING))
+	if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_TLS_HASHING))
 		hw_quirks_sc |= SC_TLS_HASH_ENABLE;
 
 	kbdev->hw_quirks_sc = hw_quirks_sc;
@@ -3131,7 +3138,7 @@ static int kbase_set_tiler_quirks(struct kbase_device *kbdev)
 		return -EIO;
 
 	/* Set tiler clock gate override if required */
-	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3953))
+	if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_T76X_3953))
 		hw_quirks_tiler |= TC_CLOCK_GATE_OVERRIDE;
 
 	kbdev->hw_quirks_tiler = hw_quirks_tiler;
@@ -3139,6 +3146,7 @@ static int kbase_set_tiler_quirks(struct kbase_device *kbdev)
 	return 0;
 }
 
+
 static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
 {
 	struct device_node *np = kbdev->dev->of_node;
@@ -3191,6 +3199,7 @@ static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
 		error = kbase_set_mmu_quirks(kbdev);
 	}
 
+
 	return error;
 }
 
@@ -3210,6 +3219,7 @@ static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev)
 #else
 	kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(JM_CONFIG), kbdev->hw_quirks_gpu);
 #endif
+
 }
 
 void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev)
@@ -3257,16 +3267,10 @@ static void reenable_protected_mode_hwcnt(struct kbase_device *kbdev)
 }
 #endif
 
-static int kbase_pm_do_reset(struct kbase_device *kbdev)
+static int kbase_pm_do_reset_soft(struct kbase_device *kbdev)
 {
-	struct kbasep_reset_timeout_data rtdata;
-	u32 reg_offset, reg_val;
 	int ret;
 
-	KBASE_KTRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, 0);
-
-	KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev, kbdev);
-
 	if (kbdev->pm.backend.callback_soft_reset) {
 		ret = kbdev->pm.backend.callback_soft_reset(kbdev);
 		if (ret < 0)
@@ -3279,12 +3283,30 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev)
 					  GPU_COMMAND_SOFT_RESET);
 		}
 	}
+	return 0;
+}
 
-	reg_offset = GPU_CONTROL_ENUM(GPU_IRQ_MASK);
-	reg_val = RESET_COMPLETED;
+static int kbase_pm_do_reset(struct kbase_device *kbdev)
+{
+	struct kbasep_reset_timeout_data rtdata;
+	u32 reg_offset, reg_val;
+	int ret;
 
-	/* Unmask the reset complete interrupt only */
-	kbase_reg_write32(kbdev, reg_offset, reg_val);
+	KBASE_KTRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, 0);
+
+	KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev, kbdev);
+
+	{
+		ret = kbase_pm_do_reset_soft(kbdev);
+		if (ret)
+			return ret;
+
+		reg_offset = GPU_CONTROL_ENUM(GPU_IRQ_MASK);
+		reg_val = RESET_COMPLETED;
+
+		/* Unmask the reset complete interrupt only */
+		kbase_reg_write32(kbdev, reg_offset, reg_val);
+	}
 
 	/* Initialize a structure for tracking the status of the reset */
 	rtdata.kbdev = kbdev;
@@ -3333,9 +3355,8 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev)
 	/* The GPU doesn't seem to be responding to the reset so try a hard
 	 * reset, but only when NOT in arbitration mode.
 	 */
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
-	if (!kbdev->arb.arb_if) {
-#endif /* CONFIG_MALI_ARBITER_SUPPORT */
+
+	if (!kbase_has_arbiter(kbdev)) {
 		dev_err(kbdev->dev,
 			"Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n",
 			RESET_TIMEOUT);
@@ -3365,9 +3386,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev)
 
 		dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n",
 			RESET_TIMEOUT);
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
 	}
-#endif /* CONFIG_MALI_ARBITER_SUPPORT */
 
 	return -EINVAL;
 }
@@ -3418,9 +3437,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
 
 	/* Soft reset the GPU */
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
 	if (!(flags & PM_NO_RESET))
-#endif /* CONFIG_MALI_ARBITER_SUPPORT */
 		err = kbdev->protected_ops->protected_mode_disable(kbdev->protected_dev);
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
@@ -3441,7 +3458,6 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
 	if (err)
 		goto exit;
 
-
 	if (flags & PM_HW_ISSUES_DETECT) {
 		err = kbase_pm_hw_issues_detect(kbdev);
 		if (err)
@@ -3451,6 +3467,10 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
 	kbase_pm_hw_issues_apply(kbdev);
 	kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency);
 	kbase_amba_set_shareable_cache_support(kbdev);
+#if MALI_USE_CSF
+	kbase_backend_update_gpu_timestamp_offset(kbdev);
+	kbdev->csf.compute_progress_timeout_cc = 0;
+#endif
 
 	/* Sanity check protected mode was left after reset */
 	WARN_ON(kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_STATUS)) &
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h
index 033c80a7c6b4..a7fa191b89d1 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -821,6 +821,21 @@ bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev);
  */
 bool kbase_pm_is_mcu_inactive(struct kbase_device *kbdev, enum kbase_mcu_state state);
 
+#ifdef KBASE_PM_RUNTIME
+
+/**
+ * kbase_pm_enable_mcu_db_notification - Enable the Doorbell notification on
+ *                                       MCU side
+ *
+ * @kbdev: Pointer to the device.
+ *
+ * This function is called to re-enable the Doorbell notification on MCU side
+ * when MCU needs to beome active again.
+ */
+void kbase_pm_enable_mcu_db_notification(struct kbase_device *kbdev);
+
+#endif /* KBASE_PM_RUNTIME */
+
 /**
  * kbase_pm_idle_groups_sched_suspendable - Check whether the scheduler can be
  *                                        suspended to low power state when all
@@ -963,11 +978,29 @@ static inline bool kbase_pm_gpu_sleep_allowed(struct kbase_device *kbdev)
 	 * A high positive value of autosuspend_delay can be used to keep the
 	 * GPU in sleep state for a long time.
 	 */
-	if (unlikely(!kbdev->dev->power.autosuspend_delay ||
-		     (kbdev->dev->power.autosuspend_delay < 0)))
+	if (unlikely(kbdev->dev->power.autosuspend_delay <= 0))
 		return false;
 
-	return kbdev->pm.backend.gpu_sleep_supported;
+	return test_bit(KBASE_GPU_SUPPORTS_GPU_SLEEP, &kbdev->pm.backend.gpu_sleep_allowed);
+}
+
+/**
+ * kbase_pm_fw_sleep_on_idle_allowed - Check if FW sleep-on-idle could be enabled
+ *
+ * @kbdev: Device pointer
+ *
+ * This function should be called whenever the conditions that impact
+ * FW sleep-on-idle support change so that it could be enabled/disabled
+ * accordingly.
+ *
+ * Return: true if FW sleep-on-idle is allowed
+ */
+static inline bool kbase_pm_fw_sleep_on_idle_allowed(struct kbase_device *kbdev)
+{
+	if (unlikely(kbdev->dev->power.autosuspend_delay <= 0))
+		return false;
+
+	return kbdev->pm.backend.gpu_sleep_allowed == KBASE_GPU_FW_SLEEP_ON_IDLE_ALLOWED;
 }
 
 /**
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c
index 23e447b15767..457e91a0a978 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -77,7 +77,16 @@ void kbase_pm_policy_init(struct kbase_device *kbdev)
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	kbdev->pm.backend.pm_current_policy = default_policy;
 	kbdev->pm.backend.csf_pm_sched_flags = default_policy->pm_sched_flags;
+
+#ifdef KBASE_PM_RUNTIME
+	if (kbase_pm_idle_groups_sched_suspendable(kbdev))
+		clear_bit(KBASE_GPU_IGNORE_IDLE_EVENT, &kbdev->pm.backend.gpu_sleep_allowed);
+	else
+		set_bit(KBASE_GPU_IGNORE_IDLE_EVENT, &kbdev->pm.backend.gpu_sleep_allowed);
+#endif /* KBASE_PM_RUNTIME */
+
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
 #else
 	CSTD_UNUSED(flags);
 	kbdev->pm.backend.pm_current_policy = default_policy;
@@ -127,7 +136,7 @@ void kbase_pm_update_active(struct kbase_device *kbdev)
 			pm->backend.poweroff_wait_in_progress = false;
 			pm->backend.l2_desired = true;
 #if MALI_USE_CSF
-			pm->backend.mcu_desired = true;
+			pm->backend.mcu_desired = pm->backend.mcu_poweron_required;
 #endif
 
 			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
@@ -400,6 +409,13 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, const struct kbase_pm_polic
 	/* New policy in place, release the clamping on mcu/L2 off state */
 	kbdev->pm.backend.policy_change_clamp_state_to_off = false;
 	kbase_pm_update_state(kbdev);
+
+#ifdef KBASE_PM_RUNTIME
+	if (kbase_pm_idle_groups_sched_suspendable(kbdev))
+		clear_bit(KBASE_GPU_IGNORE_IDLE_EVENT, &kbdev->pm.backend.gpu_sleep_allowed);
+	else
+		set_bit(KBASE_GPU_IGNORE_IDLE_EVENT, &kbdev->pm.backend.gpu_sleep_allowed);
+#endif /* KBASE_PM_RUNTIME */
 #endif
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c
index 0bf0f5a062d3..d3715d97d23c 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -30,10 +30,7 @@
 #include <mali_kbase_config_defaults.h>
 #include <linux/version_compat_defs.h>
 #include <asm/arch_timer.h>
-
-#if !IS_ENABLED(CONFIG_MALI_REAL_HW)
-#include <backend/gpu/mali_kbase_model_linux.h>
-#endif
+#include <linux/mali_hw_access.h>
 
 struct kbase_timeout_info {
 	char *selector_str;
@@ -41,12 +38,15 @@ struct kbase_timeout_info {
 };
 
 #if MALI_USE_CSF
+
+#define GPU_TIMESTAMP_OFFSET_INVALID S64_MAX
+
 static struct kbase_timeout_info timeout_info[KBASE_TIMEOUT_SELECTOR_COUNT] = {
 	[CSF_FIRMWARE_TIMEOUT] = { "CSF_FIRMWARE_TIMEOUT", MIN(CSF_FIRMWARE_TIMEOUT_CYCLES,
 							       CSF_FIRMWARE_PING_TIMEOUT_CYCLES) },
 	[CSF_PM_TIMEOUT] = { "CSF_PM_TIMEOUT", CSF_PM_TIMEOUT_CYCLES },
 	[CSF_GPU_RESET_TIMEOUT] = { "CSF_GPU_RESET_TIMEOUT", CSF_GPU_RESET_TIMEOUT_CYCLES },
-	[CSF_CSG_SUSPEND_TIMEOUT] = { "CSF_CSG_SUSPEND_TIMEOUT", CSF_CSG_SUSPEND_TIMEOUT_CYCLES },
+	[CSF_CSG_TERM_TIMEOUT] = { "CSF_CSG_TERM_TIMEOUT", CSF_CSG_TERM_TIMEOUT_CYCLES },
 	[CSF_FIRMWARE_BOOT_TIMEOUT] = { "CSF_FIRMWARE_BOOT_TIMEOUT",
 					CSF_FIRMWARE_BOOT_TIMEOUT_CYCLES },
 	[CSF_FIRMWARE_PING_TIMEOUT] = { "CSF_FIRMWARE_PING_TIMEOUT",
@@ -82,6 +82,68 @@ static struct kbase_timeout_info timeout_info[KBASE_TIMEOUT_SELECTOR_COUNT] = {
 };
 #endif
 
+#if MALI_USE_CSF
+void kbase_backend_invalidate_gpu_timestamp_offset(struct kbase_device *kbdev)
+{
+	kbdev->backend_time.gpu_timestamp_offset = GPU_TIMESTAMP_OFFSET_INVALID;
+}
+KBASE_EXPORT_TEST_API(kbase_backend_invalidate_gpu_timestamp_offset);
+
+/**
+ * kbase_backend_compute_gpu_ts_offset() - Compute GPU TS offset.
+ *
+ * @kbdev:	Kbase device.
+ *
+ * This function compute the value of GPU and CPU TS offset:
+ *   - set to zero current TIMESTAMP_OFFSET register
+ *   - read CPU TS and convert it to ticks
+ *   - read GPU TS
+ *   - calculate diff between CPU and GPU ticks
+ *   - cache the diff as the GPU TS offset
+ *
+ * To reduce delays, preemption must be disabled during reads of both CPU and GPU TS
+ * this function require access to GPU register to be enabled
+ */
+static inline void kbase_backend_compute_gpu_ts_offset(struct kbase_device *kbdev)
+{
+	s64 cpu_ts_ticks = 0;
+	s64 gpu_ts_ticks = 0;
+
+	if (kbdev->backend_time.gpu_timestamp_offset != GPU_TIMESTAMP_OFFSET_INVALID)
+		return;
+
+	kbase_reg_write64(kbdev, GPU_CONTROL_ENUM(TIMESTAMP_OFFSET), 0);
+
+	gpu_ts_ticks = kbase_reg_read64_coherent(kbdev, GPU_CONTROL_ENUM(TIMESTAMP));
+	cpu_ts_ticks = ktime_get_raw_ns();
+	cpu_ts_ticks = div64_u64(cpu_ts_ticks * kbdev->backend_time.divisor,
+				 kbdev->backend_time.multiplier);
+	kbdev->backend_time.gpu_timestamp_offset = cpu_ts_ticks - gpu_ts_ticks;
+}
+
+void kbase_backend_update_gpu_timestamp_offset(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	kbase_backend_compute_gpu_ts_offset(kbdev);
+
+	dev_dbg(kbdev->dev, "Setting GPU timestamp offset register to %lld (%lld ns)",
+		kbdev->backend_time.gpu_timestamp_offset,
+		div64_s64(kbdev->backend_time.gpu_timestamp_offset *
+				  (s64)kbdev->backend_time.multiplier,
+			  (s64)kbdev->backend_time.divisor));
+	kbase_reg_write64(kbdev, GPU_CONTROL_ENUM(TIMESTAMP_OFFSET),
+			  kbdev->backend_time.gpu_timestamp_offset);
+}
+#if MALI_UNIT_TEST
+u64 kbase_backend_read_gpu_timestamp_offset_reg(struct kbase_device *kbdev)
+{
+	return kbase_reg_read64_coherent(kbdev, GPU_CONTROL_ENUM(TIMESTAMP_OFFSET));
+}
+KBASE_EXPORT_TEST_API(kbase_backend_read_gpu_timestamp_offset_reg);
+#endif
+#endif
+
 void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, u64 *cycle_counter,
 					  u64 *system_time, struct timespec64 *ts)
 {
@@ -100,6 +162,7 @@ void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, u64 *cycle
 		ktime_get_raw_ts64(ts);
 #endif
 }
+KBASE_EXPORT_TEST_API(kbase_backend_get_gpu_time_norequest);
 
 #if !MALI_USE_CSF
 /**
@@ -143,6 +206,7 @@ void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
 	kbase_pm_release_gpu_cycle_counter(kbdev);
 #endif
 }
+KBASE_EXPORT_TEST_API(kbase_backend_get_gpu_time);
 
 static u64 kbase_device_get_scaling_frequency(struct kbase_device *kbdev)
 {
@@ -171,6 +235,15 @@ void kbase_device_set_timeout_ms(struct kbase_device *kbdev, enum kbase_timeout_
 	}
 	selector_str = timeout_info[selector].selector_str;
 
+#if MALI_USE_CSF
+	if (IS_ENABLED(CONFIG_MALI_REAL_HW) && !IS_ENABLED(CONFIG_MALI_IS_FPGA) &&
+	    unlikely(timeout_ms >= MAX_TIMEOUT_MS)) {
+		dev_warn(kbdev->dev, "%s is capped from %dms to %dms\n",
+			 timeout_info[selector].selector_str, timeout_ms, MAX_TIMEOUT_MS);
+		timeout_ms = MAX_TIMEOUT_MS;
+	}
+#endif
+
 	kbdev->backend_time.device_scaled_timeouts[selector] = timeout_ms;
 	dev_dbg(kbdev->dev, "\t%-35s: %ums\n", selector_str, timeout_ms);
 }
@@ -282,36 +355,14 @@ u64 __maybe_unused kbase_backend_time_convert_gpu_to_cpu(struct kbase_device *kb
 	if (WARN_ON(!kbdev))
 		return 0;
 
-	return div64_u64(gpu_ts * kbdev->backend_time.multiplier, kbdev->backend_time.divisor) +
-	       kbdev->backend_time.offset;
-}
-
-/**
- * get_cpu_gpu_time() - Get current CPU and GPU timestamps.
- *
- * @kbdev:	Kbase device.
- * @cpu_ts:	Output CPU timestamp.
- * @gpu_ts:	Output GPU timestamp.
- * @gpu_cycle:  Output GPU cycle counts.
- */
-static void get_cpu_gpu_time(struct kbase_device *kbdev, u64 *cpu_ts, u64 *gpu_ts, u64 *gpu_cycle)
-{
-	struct timespec64 ts;
-
-	kbase_backend_get_gpu_time(kbdev, gpu_cycle, gpu_ts, &ts);
-
-	if (cpu_ts)
-		*cpu_ts = (u64)(ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec);
+	return div64_u64(gpu_ts * kbdev->backend_time.multiplier, kbdev->backend_time.divisor);
 }
+KBASE_EXPORT_TEST_API(kbase_backend_time_convert_gpu_to_cpu);
 #endif
 
 u64 kbase_arch_timer_get_cntfrq(struct kbase_device *kbdev)
 {
-	u64 freq = arch_timer_get_cntfrq();
-
-#if !IS_ENABLED(CONFIG_MALI_REAL_HW)
-	freq = midgard_model_arch_timer_get_cntfrq(kbdev->model);
-#endif
+	u64 freq = mali_arch_timer_get_cntfrq();
 
 	dev_dbg(kbdev->dev, "System Timer Freq = %lluHz", freq);
 
@@ -322,13 +373,10 @@ int kbase_backend_time_init(struct kbase_device *kbdev)
 {
 	int err = 0;
 #if MALI_USE_CSF
-	u64 cpu_ts = 0;
-	u64 gpu_ts = 0;
 	u64 freq;
 	u64 common_factor;
 
 	kbase_pm_register_access_enable(kbdev);
-	get_cpu_gpu_time(kbdev, &cpu_ts, &gpu_ts, NULL);
 	freq = kbase_arch_timer_get_cntfrq(kbdev);
 
 	if (!freq) {
@@ -348,9 +396,8 @@ int kbase_backend_time_init(struct kbase_device *kbdev)
 		goto disable_registers;
 	}
 
-	kbdev->backend_time.offset =
-		(s64)(cpu_ts - div64_u64(gpu_ts * kbdev->backend_time.multiplier,
-					 kbdev->backend_time.divisor));
+	kbase_backend_invalidate_gpu_timestamp_offset(
+		kbdev); /* force computation of GPU Timestamp offset */
 #endif
 
 	if (kbase_timeout_scaling_init(kbdev)) {
diff --git a/drivers/gpu/arm/bifrost/build.bp b/drivers/gpu/arm/bifrost/build.bp
index 9ee968af8de5..2ae771f5b546 100644
--- a/drivers/gpu/arm/bifrost/build.bp
+++ b/drivers/gpu/arm/bifrost/build.bp
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -71,18 +71,6 @@ bob_defaults {
     mali_real_hw: {
         kbuild_options: ["CONFIG_MALI_REAL_HW=y"],
     },
-    mali_error_inject_none: {
-        kbuild_options: ["CONFIG_MALI_ERROR_INJECT_NONE=y"],
-    },
-    mali_error_inject_track_list: {
-        kbuild_options: ["CONFIG_MALI_ERROR_INJECT_TRACK_LIST=y"],
-    },
-    mali_error_inject_random: {
-        kbuild_options: ["CONFIG_MALI_ERROR_INJECT_RANDOM=y"],
-    },
-    mali_error_inject: {
-        kbuild_options: ["CONFIG_MALI_BIFROST_ERROR_INJECT=y"],
-    },
     mali_debug: {
         kbuild_options: [
             "CONFIG_MALI_BIFROST_DEBUG=y",
@@ -125,7 +113,7 @@ bob_defaults {
     mali_hw_errata_1485982_use_clock_alternative: {
         kbuild_options: ["CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE=y"],
     },
-    platform_is_fpga: {
+    mali_is_fpga: {
         kbuild_options: ["CONFIG_MALI_IS_FPGA=y"],
     },
     mali_coresight: {
@@ -160,7 +148,6 @@ bob_defaults {
         // is an umbrella feature that would be open for inappropriate use
         // (catch-all for experimental CS code without separating it into
         // different features).
-        "MALI_INCREMENTAL_RENDERING_JM={{.incremental_rendering_jm}}",
         "MALI_BASE_CSF_PERFORMANCE_TESTS={{.base_csf_performance_tests}}",
     ],
 }
@@ -174,6 +161,9 @@ bob_kernel_module {
         "*.c",
         "*.h",
         "Kbuild",
+        "arbiter/*.c",
+        "arbiter/*.h",
+        "arbiter/Kbuild",
         "backend/gpu/*.c",
         "backend/gpu/*.h",
         "backend/gpu/Kbuild",
@@ -239,6 +229,7 @@ bob_kernel_module {
             "jm/*.h",
             "tl/backend/*_jm.c",
             "mmu/backend/*_jm.c",
+            "mmu/backend/*_jm.h",
             "ipa/backend/*_jm.c",
             "ipa/backend/*_jm.h",
         ],
@@ -263,17 +254,11 @@ bob_kernel_module {
             "hwcnt/backend/*_csf_*.h",
             "tl/backend/*_csf.c",
             "mmu/backend/*_csf.c",
+            "mmu/backend/*_csf.h",
             "ipa/backend/*_csf.c",
             "ipa/backend/*_csf.h",
         ],
     },
-    mali_arbiter_support: {
-        srcs: [
-            "arbiter/*.c",
-            "arbiter/*.h",
-            "arbiter/Kbuild",
-        ],
-    },
     kbuild_options: [
         "CONFIG_MALI_BIFROST=m",
         "CONFIG_MALI_KUTF=n",
diff --git a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c
index 8b1410886b05..fe1dbfaca872 100644
--- a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c
+++ b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -116,8 +116,7 @@ static void kbase_context_term_partial(struct kbase_context *kctx, unsigned int
 
 struct kbase_context *kbase_create_context(struct kbase_device *kbdev, bool is_compat,
 					   base_context_create_flags const flags,
-					   unsigned long const api_version,
-					   struct kbase_file *const kfile)
+					   unsigned long const api_version, struct file *const filp)
 {
 	struct kbase_context *kctx;
 	unsigned int i = 0;
@@ -136,7 +135,7 @@ struct kbase_context *kbase_create_context(struct kbase_device *kbdev, bool is_c
 
 	kctx->kbdev = kbdev;
 	kctx->api_version = api_version;
-	kctx->kfile = kfile;
+	kctx->filp = filp;
 	kctx->create_flags = flags;
 
 	memcpy(kctx->comm, current->comm, sizeof(current->comm));
@@ -187,11 +186,15 @@ void kbase_destroy_context(struct kbase_context *kctx)
 	 * Customer side that a hang could occur if context termination is
 	 * not blocked until the resume of GPU device.
 	 */
+	if (kbase_has_arbiter(kbdev))
+		atomic_inc(&kbdev->pm.gpu_users_waiting);
 	while (kbase_pm_context_active_handle_suspend(kbdev,
 						      KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) {
-		dev_info(kbdev->dev, "Suspend in progress when destroying context");
+		dev_dbg(kbdev->dev, "Suspend in progress when destroying context");
 		wait_event(kbdev->pm.resume_wait, !kbase_pm_is_suspending(kbdev));
 	}
+	if (kbase_has_arbiter(kbdev))
+		atomic_dec(&kbdev->pm.gpu_users_waiting);
 
 	/* Have synchronized against the System suspend and incremented the
 	 * pm.active_count. So any subsequent invocation of System suspend
diff --git a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c
index f2eefe9ddcd0..ef474f625f63 100644
--- a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c
+++ b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -168,8 +168,7 @@ static void kbase_context_term_partial(struct kbase_context *kctx, unsigned int
 
 struct kbase_context *kbase_create_context(struct kbase_device *kbdev, bool is_compat,
 					   base_context_create_flags const flags,
-					   unsigned long const api_version,
-					   struct kbase_file *const kfile)
+					   unsigned long const api_version, struct file *const filp)
 {
 	struct kbase_context *kctx;
 	unsigned int i = 0;
@@ -188,7 +187,7 @@ struct kbase_context *kbase_create_context(struct kbase_device *kbdev, bool is_c
 
 	kctx->kbdev = kbdev;
 	kctx->api_version = api_version;
-	kctx->kfile = kfile;
+	kctx->filp = filp;
 	kctx->create_flags = flags;
 
 	if (is_compat)
@@ -232,14 +231,13 @@ void kbase_destroy_context(struct kbase_context *kctx)
 	if (WARN_ON(!kbdev))
 		return;
 
-		/* Context termination could happen whilst the system suspend of
+	/* Context termination could happen whilst the system suspend of
 	 * the GPU device is ongoing or has completed. It has been seen on
 	 * Customer side that a hang could occur if context termination is
 	 * not blocked until the resume of GPU device.
 	 */
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
-	atomic_inc(&kbdev->pm.gpu_users_waiting);
-#endif /* CONFIG_MALI_ARBITER_SUPPORT */
+	if (kbase_has_arbiter(kbdev))
+		atomic_inc(&kbdev->pm.gpu_users_waiting);
 	while (kbase_pm_context_active_handle_suspend(kbdev,
 						      KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) {
 		dev_dbg(kbdev->dev, "Suspend in progress when destroying context");
@@ -256,9 +254,8 @@ void kbase_destroy_context(struct kbase_context *kctx)
 	 */
 	wait_event(kbdev->pm.resume_wait, !kbase_pm_is_resuming(kbdev));
 
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
-	atomic_dec(&kbdev->pm.gpu_users_waiting);
-#endif /* CONFIG_MALI_ARBITER_SUPPORT */
+	if (kbase_has_arbiter(kbdev))
+		atomic_dec(&kbdev->pm.gpu_users_waiting);
 
 	kbase_mem_pool_group_mark_dying(&kctx->mem_pools);
 
diff --git a/drivers/gpu/arm/bifrost/context/mali_kbase_context.c b/drivers/gpu/arm/bifrost/context/mali_kbase_context.c
index 36cfde3cdab1..2c7417bd6506 100644
--- a/drivers/gpu/arm/bifrost/context/mali_kbase_context.c
+++ b/drivers/gpu/arm/bifrost/context/mali_kbase_context.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -141,7 +141,7 @@ int kbase_context_common_init(struct kbase_context *kctx)
 	kctx->pid = task_pid_vnr(current);
 
 	/* Check if this is a Userspace created context */
-	if (likely(kctx->kfile)) {
+	if (likely(kctx->filp)) {
 		struct pid *pid_struct;
 
 		rcu_read_lock();
@@ -184,6 +184,8 @@ int kbase_context_common_init(struct kbase_context *kctx)
 	spin_lock_init(&kctx->waiting_soft_jobs_lock);
 	INIT_LIST_HEAD(&kctx->waiting_soft_jobs);
 
+	init_waitqueue_head(&kctx->event_queue);
+
 	kbase_gpu_vm_lock(kctx);
 	bitmap_copy(kctx->cookies, &cookies_mask, BITS_PER_LONG);
 	kbase_gpu_vm_unlock(kctx);
@@ -195,7 +197,7 @@ int kbase_context_common_init(struct kbase_context *kctx)
 	mutex_unlock(&kctx->kbdev->kctx_list_lock);
 	if (err) {
 		dev_err(kctx->kbdev->dev, "(err:%d) failed to insert kctx to kbase_process", err);
-		if (likely(kctx->kfile)) {
+		if (likely(kctx->filp)) {
 			mmdrop(kctx->process_mm);
 			put_task_struct(kctx->task);
 		}
@@ -284,7 +286,7 @@ void kbase_context_common_term(struct kbase_context *kctx)
 	kbase_remove_kctx_from_process(kctx);
 	mutex_unlock(&kctx->kbdev->kctx_list_lock);
 
-	if (likely(kctx->kfile)) {
+	if (likely(kctx->filp)) {
 		mmdrop(kctx->process_mm);
 		put_task_struct(kctx->task);
 	}
diff --git a/drivers/gpu/arm/bifrost/context/mali_kbase_context.h b/drivers/gpu/arm/bifrost/context/mali_kbase_context.h
index e2295d020292..07c235fab11e 100644
--- a/drivers/gpu/arm/bifrost/context/mali_kbase_context.h
+++ b/drivers/gpu/arm/bifrost/context/mali_kbase_context.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -56,9 +56,9 @@ void kbase_context_debugfs_term(struct kbase_context *const kctx);
  *               BASEP_CONTEXT_CREATE_KERNEL_FLAGS.
  * @api_version: Application program interface version, as encoded in
  *               a single integer by the KBASE_API_VERSION macro.
- * @kfile:       Pointer to the object representing the /dev/malixx device
- *               file instance. Shall be passed as NULL for internally created
- *               contexts.
+ * @filp:        Pointer to the struct file corresponding to device file
+ *               /dev/malixx instance, passed to the file's open method.
+ *               Shall be passed as NULL for internally created contexts.
  *
  * Up to one context can be created for each client that opens the device file
  * /dev/malixx. Context creation is deferred until a special ioctl() system call
@@ -68,8 +68,7 @@ void kbase_context_debugfs_term(struct kbase_context *const kctx);
  */
 struct kbase_context *kbase_create_context(struct kbase_device *kbdev, bool is_compat,
 					   base_context_create_flags const flags,
-					   unsigned long api_version,
-					   struct kbase_file *const kfile);
+					   unsigned long api_version, struct file *filp);
 
 /**
  * kbase_destroy_context - Destroy a kernel base context.
diff --git a/drivers/gpu/arm/bifrost/csf/Kbuild b/drivers/gpu/arm/bifrost/csf/Kbuild
index 5df35864efc7..8159bc9d10e8 100644
--- a/drivers/gpu/arm/bifrost/csf/Kbuild
+++ b/drivers/gpu/arm/bifrost/csf/Kbuild
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 #
-# (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -48,8 +48,10 @@ bifrost_kbase-y += \
 
 ifeq ($(CONFIG_MALI_BIFROST_NO_MALI),y)
 bifrost_kbase-y += csf/mali_kbase_csf_firmware_no_mali.o
+bifrost_kbase-y += csf/mali_kbase_csf_fw_io_no_mali.o
 else
 bifrost_kbase-y += csf/mali_kbase_csf_firmware.o
+bifrost_kbase-y += csf/mali_kbase_csf_fw_io.o
 endif
 
 bifrost_kbase-$(CONFIG_DEBUG_FS) += csf/mali_kbase_debug_csf_fault.o
diff --git a/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.c b/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.c
index 61a4be9ccc94..ec47b88fac53 100644
--- a/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.c
+++ b/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -943,6 +943,8 @@ void kbase_ipa_control_protm_entered(struct kbase_device *kbdev)
 	struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control;
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+
 	ipa_ctrl->protm_start = ktime_get_raw_ns();
 }
 
@@ -955,6 +957,7 @@ void kbase_ipa_control_protm_exited(struct kbase_device *kbdev)
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
+
 	for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) {
 		struct kbase_ipa_control_session *session = &ipa_ctrl->sessions[i];
 
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c
index 9dffe34f095b..d3300ea8dcde 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -45,6 +45,9 @@
 #define CS_RING_BUFFER_MAX_SIZE ((uint32_t)(1 << 31)) /* 2GiB */
 #define CS_RING_BUFFER_MIN_SIZE ((uint32_t)4096)
 
+/* 0.2 second assuming 600 MHz GPU clock, which is double of iterator disabling timeout */
+#define MAX_PROGRESS_TIMEOUT_EVENT_DELAY ((u32)120000000)
+
 #define PROTM_ALLOC_MAX_RETRIES ((u8)5)
 
 const u8 kbasep_csf_queue_group_priority_to_relative[BASE_QUEUE_GROUP_PRIORITY_COUNT] = {
@@ -539,6 +542,8 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
 
 	queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED;
 
+	queue->clear_faults = true;
+
 	INIT_LIST_HEAD(&queue->link);
 	atomic_set(&queue->pending_kick, 0);
 	INIT_LIST_HEAD(&queue->pending_kick_link);
@@ -589,11 +594,19 @@ int kbase_csf_queue_register_ex(struct kbase_context *kctx,
 	u32 const glb_version = iface->version;
 	u32 instr = iface->instr_features;
 	u8 max_size = GLB_INSTR_FEATURES_EVENT_SIZE_MAX_GET(instr);
-	u32 min_buf_size =
-		(1u << reg->ex_event_size) * GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_GET(instr);
+	const u8 event_size = reg->ex_event_size;
+	u64 min_buf_size;
 
 	/* If cs_trace_command not supported, the call fails */
 	if (glb_version < kbase_csf_interface_version(1, 1, 0))
+		return -EPERM;
+
+	/* Sanity check to avoid shift-out-of-bounds */
+	if (event_size >= 32)
+		return -EINVAL;
+
+	min_buf_size = ((u64)1 << event_size) * GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_GET(instr);
+	if (min_buf_size > UINT32_MAX)
 		return -EINVAL;
 
 	/* Validate the ring buffer configuration parameters */
@@ -605,8 +618,8 @@ int kbase_csf_queue_register_ex(struct kbase_context *kctx,
 
 	/* Validate the cs_trace configuration parameters */
 	if (reg->ex_buffer_size &&
-	    ((reg->ex_event_size > max_size) || (reg->ex_buffer_size & (reg->ex_buffer_size - 1)) ||
-	     (reg->ex_buffer_size < min_buf_size)))
+	    ((event_size > max_size) || (reg->ex_buffer_size & (reg->ex_buffer_size - 1)) ||
+	     (reg->ex_buffer_size < (u32)min_buf_size)))
 		return -EINVAL;
 
 	return csf_queue_register_internal(kctx, NULL, reg);
@@ -734,7 +747,7 @@ out:
 }
 
 /**
- * get_bound_queue_group - Get the group to which a queue was bound
+ * get_bound_queue_group() - Get the group to which a queue was bound
  *
  * @queue: Pointer to the queue for this group
  *
@@ -847,6 +860,47 @@ void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev, int csi_index
 		kbase_csf_ring_csg_doorbell(kbdev, csg_nr);
 }
 
+int kbase_csf_queue_group_clear_faults(struct kbase_context *kctx,
+				       struct kbase_ioctl_queue_group_clear_faults *faults)
+{
+	void __user *user_bufs = u64_to_user_ptr(faults->addr);
+	u32 i;
+	struct kbase_device *kbdev = kctx->kbdev;
+	const u32 nr_queues = faults->nr_queues;
+
+	if (unlikely(nr_queues > kbdev->csf.global_iface.groups[0].stream_num)) {
+		dev_warn(kbdev->dev, "Invalid nr_queues %u", nr_queues);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < nr_queues; ++i) {
+		u64 buf_gpu_addr;
+		struct kbase_va_region *region;
+
+		if (copy_from_user(&buf_gpu_addr, user_bufs, sizeof(buf_gpu_addr)))
+			return -EFAULT;
+		mutex_lock(&kctx->csf.lock);
+		kbase_gpu_vm_lock(kctx);
+		region = kbase_region_tracker_find_region_enclosing_address(kctx, buf_gpu_addr);
+		if (likely(!kbase_is_region_invalid_or_free(region))) {
+			struct kbase_queue *queue = region->user_data;
+
+			queue->clear_faults = true;
+		} else {
+			dev_warn(kbdev->dev, "GPU queue %u without a valid command buffer region",
+				 i);
+			kbase_gpu_vm_unlock(kctx);
+			mutex_unlock(&kctx->csf.lock);
+			return -EFAULT;
+		}
+		kbase_gpu_vm_unlock(kctx);
+		mutex_unlock(&kctx->csf.lock);
+		user_bufs = (void __user *)((uintptr_t)user_bufs + sizeof(buf_gpu_addr));
+	}
+
+	return 0;
+}
+
 int kbase_csf_queue_kick(struct kbase_context *kctx, struct kbase_ioctl_cs_queue_kick *kick)
 {
 	struct kbase_device *kbdev = kctx->kbdev;
@@ -868,7 +922,7 @@ int kbase_csf_queue_kick(struct kbase_context *kctx, struct kbase_ioctl_cs_queue
 		struct kbase_queue *queue = region->user_data;
 
 		if (queue && (queue->bind_state == KBASE_CSF_QUEUE_BOUND)) {
-			spin_lock(&kbdev->csf.pending_gpuq_kicks_lock);
+			spin_lock(&kbdev->csf.pending_gpuq_kick_queues_lock);
 			if (list_empty(&queue->pending_kick_link)) {
 				/* Queue termination shall block until this
 				 * kick has been handled.
@@ -876,10 +930,12 @@ int kbase_csf_queue_kick(struct kbase_context *kctx, struct kbase_ioctl_cs_queue
 				atomic_inc(&queue->pending_kick);
 				list_add_tail(
 					&queue->pending_kick_link,
-					&kbdev->csf.pending_gpuq_kicks[queue->group_priority]);
-				complete(&kbdev->csf.scheduler.kthread_signal);
+					&kbdev->csf.pending_gpuq_kick_queues[queue->group_priority]);
+				if (atomic_cmpxchg(&kbdev->csf.pending_gpuq_kicks, false, true) ==
+				    false)
+					complete(&kbdev->csf.scheduler.kthread_signal);
 			}
-			spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock);
+			spin_unlock(&kbdev->csf.pending_gpuq_kick_queues_lock);
 		}
 	} else {
 		dev_dbg(kbdev->dev,
@@ -1095,12 +1151,11 @@ static int create_normal_suspend_buffer(struct kbase_context *const kctx,
 }
 
 static void timer_event_worker(struct work_struct *data);
-static void protm_event_worker(struct work_struct *data);
 static void term_normal_suspend_buffer(struct kbase_context *const kctx,
 				       struct kbase_normal_suspend_buffer *s_buf);
 
 /**
- * create_suspend_buffers - Setup normal and protected mode
+ * create_suspend_buffers() - Setup normal and protected mode
  *				suspend buffers.
  *
  * @kctx:	Address of the kbase context within which the queue group
@@ -1199,6 +1254,8 @@ static int create_queue_group(struct kbase_context *const kctx,
 			group->deschedule_deferred_cnt = 0;
 #endif
 
+			group->cs_fault_report_enable = create->in.cs_fault_report_enable;
+
 			group->group_uid = generate_group_uid();
 			create->out.group_uid = group->group_uid;
 
@@ -1206,7 +1263,9 @@ static int create_queue_group(struct kbase_context *const kctx,
 			INIT_LIST_HEAD(&group->link_to_schedule);
 			INIT_LIST_HEAD(&group->error_fatal.link);
 			INIT_WORK(&group->timer_event_work, timer_event_worker);
-			INIT_WORK(&group->protm_event_work, protm_event_worker);
+			INIT_LIST_HEAD(&group->protm_event_work);
+			group->progress_timer_state = 0;
+			atomic_set(&group->pending_protm_event_work, 0);
 			bitmap_zero(group->protm_pending_bitmap, MAX_SUPPORTED_STREAMS_PER_GROUP);
 
 			group->run_state = KBASE_CSF_GROUP_INACTIVE;
@@ -1251,14 +1310,6 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx,
 	const u32 tiler_count = hweight64(create->in.tiler_mask);
 	const u32 fragment_count = hweight64(create->in.fragment_mask);
 	const u32 compute_count = hweight64(create->in.compute_mask);
-	size_t i;
-
-	for (i = 0; i < ARRAY_SIZE(create->in.padding); i++) {
-		if (create->in.padding[i] != 0) {
-			dev_warn(kctx->kbdev->dev, "Invalid padding not 0 in queue group create\n");
-			return -EINVAL;
-		}
-	}
 
 	mutex_lock(&kctx->csf.lock);
 
@@ -1379,7 +1430,7 @@ void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group)
 }
 
 /**
- * term_queue_group - Terminate a GPU command queue group.
+ * term_queue_group() - Terminate a GPU command queue group.
  *
  * @group: Pointer to GPU command queue group data.
  *
@@ -1407,8 +1458,8 @@ static void term_queue_group(struct kbase_queue_group *group)
 }
 
 /**
- * wait_group_deferred_deschedule_completion - Wait for refcount of the group to
- *         become 0 that was taken when the group deschedule had to be deferred.
+ * wait_group_deferred_deschedule_completion() - Wait for refcount of the group
+ *     to become 0 that was taken when the group deschedule had to be deferred.
  *
  * @group: Pointer to GPU command queue group that is being deleted.
  *
@@ -1437,7 +1488,10 @@ static void wait_group_deferred_deschedule_completion(struct kbase_queue_group *
 static void cancel_queue_group_events(struct kbase_queue_group *group)
 {
 	cancel_work_sync(&group->timer_event_work);
-	cancel_work_sync(&group->protm_event_work);
+
+	/* Drain a pending protected mode request if any */
+	kbase_csf_scheduler_wait_for_kthread_pending_work(group->kctx->kbdev,
+							  &group->pending_protm_event_work);
 }
 
 static void remove_pending_group_fatal_error(struct kbase_queue_group *group)
@@ -1592,6 +1646,7 @@ int kbase_csf_ctx_init(struct kbase_context *kctx)
 
 	INIT_LIST_HEAD(&kctx->csf.queue_list);
 	INIT_LIST_HEAD(&kctx->csf.link);
+	atomic_set(&kctx->csf.pending_sync_update, 0);
 
 	kbase_csf_event_init(kctx);
 
@@ -1827,7 +1882,7 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
 }
 
 /**
- * handle_oom_event - Handle the OoM event generated by the firmware for the
+ * handle_oom_event() - Handle the OoM event generated by the firmware for the
  *                    CSI.
  *
  * @group:  Pointer to the CSG group the oom-event belongs to.
@@ -1902,7 +1957,7 @@ static int handle_oom_event(struct kbase_queue_group *const group,
 }
 
 /**
- * report_tiler_oom_error - Report a CSG error due to a tiler heap OOM event
+ * report_tiler_oom_error() - Report a CSG error due to a tiler heap OOM event
  *
  * @group: Pointer to the GPU command queue group that encountered the error
  */
@@ -1945,7 +2000,7 @@ static void flush_gpu_cache_on_fatal_error(struct kbase_device *kbdev)
 }
 
 /**
- * kbase_queue_oom_event - Handle tiler out-of-memory for a GPU command queue.
+ * kbase_queue_oom_event() - Handle tiler out-of-memory for a GPU command queue.
  *
  * @queue: Pointer to queue for which out-of-memory event was received.
  *
@@ -2033,7 +2088,7 @@ unlock:
 }
 
 /**
- * oom_event_worker - Tiler out-of-memory handler called from a workqueue.
+ * oom_event_worker() - Tiler out-of-memory handler called from a workqueue.
  *
  * @data: Pointer to a work_struct embedded in GPU command queue data.
  *
@@ -2061,7 +2116,8 @@ static void oom_event_worker(struct work_struct *data)
 }
 
 /**
- * report_group_timeout_error - Report the timeout error for the group to userspace.
+ * report_group_timeout_error() - Report the timeout error for the group to
+ *                                userspace.
  *
  * @group: Pointer to the group for which timeout error occurred
  */
@@ -2085,7 +2141,7 @@ static void report_group_timeout_error(struct kbase_queue_group *const group)
 }
 
 /**
- * timer_event_worker - Handle the progress timeout error for the group
+ * timer_event_worker() - Handle the progress timeout error for the group
  *
  * @data: Pointer to a work_struct embedded in GPU command queue group data.
  *
@@ -2120,19 +2176,74 @@ static void timer_event_worker(struct work_struct *data)
 }
 
 /**
- * handle_progress_timer_event - Progress timer timeout event handler.
+ * handle_progress_timer_events() - Progress timer timeout events handler.
  *
- * @group: Pointer to GPU queue group for which the timeout event is received.
+ * @kbdev:     Instance of a GPU platform device that implements a CSF interface.
+ * @slot_mask: Bitmap reflecting the slots on which progress timer timeouts happen.
  *
  * Notify a waiting user space client of the timeout.
  * Enqueue a work item to terminate the group and notify the event notification
  * thread of progress timeout fault for the GPU command queue group.
+ * Ignore fragment timeout if it is following a compute timeout.
  */
-static void handle_progress_timer_event(struct kbase_queue_group *const group)
+static void handle_progress_timer_events(struct kbase_device *const kbdev, unsigned long *slot_mask)
 {
-	kbase_debug_csf_fault_notify(group->kctx->kbdev, group->kctx, DF_PROGRESS_TIMER_TIMEOUT);
+	u32 max_csg_slots = kbdev->csf.global_iface.group_num;
+	u32 csg_nr;
+	struct kbase_queue_group *group = NULL;
+	struct kbase_csf_cmd_stream_group_info *ginfo;
 
-	queue_work(group->kctx->csf.wq, &group->timer_event_work);
+	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+	if (likely(bitmap_empty(slot_mask, MAX_SUPPORTED_CSGS)))
+		return;
+
+	/* Log each timeout and Update timestamp of compute progress timeout */
+	for_each_set_bit(csg_nr, slot_mask, max_csg_slots) {
+		group = kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
+		ginfo = &kbdev->csf.global_iface.groups[csg_nr];
+		group->progress_timer_state =
+			kbase_csf_firmware_csg_output(ginfo, CSG_PROGRESS_TIMER_STATE);
+
+		dev_info(
+			kbdev->dev,
+			"[%llu] Iterator PROGRESS_TIMER timeout notification received for group %u of ctx %d_%d on slot %u with state %x",
+			kbase_backend_get_cycle_cnt(kbdev), group->handle, group->kctx->tgid,
+			group->kctx->id, csg_nr, group->progress_timer_state);
+
+		if (CSG_PROGRESS_TIMER_STATE_GET(group->progress_timer_state) ==
+		    CSG_PROGRESS_TIMER_STATE_COMPUTE)
+			kbdev->csf.compute_progress_timeout_cc = kbase_backend_get_cycle_cnt(kbdev);
+	}
+
+	/* Ignore fragment timeout if it is following a compute timeout.
+	 * Otherwise, terminate the command stream group.
+	 */
+	for_each_set_bit(csg_nr, slot_mask, max_csg_slots) {
+		group = kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
+
+		/* Check if it is a fragment timeout right after another compute timeout.
+		 * In such case, kill compute CSG and give fragment CSG a second chance
+		 */
+		if (CSG_PROGRESS_TIMER_STATE_GET(group->progress_timer_state) ==
+		    CSG_PROGRESS_TIMER_STATE_FRAGMENT) {
+			u64 cycle_counter = kbase_backend_get_cycle_cnt(kbdev);
+			u64 compute_progress_timeout_cc = kbdev->csf.compute_progress_timeout_cc;
+
+			if (compute_progress_timeout_cc <= cycle_counter &&
+			    cycle_counter <= compute_progress_timeout_cc +
+						     MAX_PROGRESS_TIMEOUT_EVENT_DELAY) {
+				dev_info(
+					kbdev->dev,
+					"Ignored Fragment iterator timeout for group %d on slot %d",
+					group->handle, group->csg_nr);
+				continue;
+			}
+		}
+
+		kbase_debug_csf_fault_notify(group->kctx->kbdev, group->kctx,
+					     DF_PROGRESS_TIMER_TIMEOUT);
+		queue_work(group->kctx->csf.wq, &group->timer_event_work);
+	}
 }
 
 /**
@@ -2211,41 +2322,7 @@ static void report_group_fatal_error(struct kbase_queue_group *const group)
 }
 
 /**
- * protm_event_worker - Protected mode switch request event handler
- *			called from a workqueue.
- *
- * @data: Pointer to a work_struct embedded in GPU command queue group data.
- *
- * Request to switch to protected mode.
- */
-static void protm_event_worker(struct work_struct *data)
-{
-	struct kbase_queue_group *const group =
-		container_of(data, struct kbase_queue_group, protm_event_work);
-	struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf;
-	int err = 0;
-
-	KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_START, group, 0u);
-
-	err = alloc_grp_protected_suspend_buffer_pages(group);
-	if (!err) {
-		kbase_csf_scheduler_group_protm_enter(group);
-	} else if (err == -ENOMEM && sbuf->alloc_retries <= PROTM_ALLOC_MAX_RETRIES) {
-		sbuf->alloc_retries++;
-		/* try again to allocate pages */
-		queue_work(group->kctx->csf.wq, &group->protm_event_work);
-	} else if (sbuf->alloc_retries >= PROTM_ALLOC_MAX_RETRIES || err != -ENOMEM) {
-		dev_err(group->kctx->kbdev->dev,
-			"Failed to allocate physical pages for Protected mode suspend buffer for the group %d of context %d_%d",
-			group->handle, group->kctx->tgid, group->kctx->id);
-		report_group_fatal_error(group);
-	}
-
-	KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_END, group, 0u);
-}
-
-/**
- * handle_fault_event - Handler for CS fault.
+ * handle_fault_event() - Handler for CS fault.
  *
  * @queue:  Pointer to queue for which fault event was received.
  * @cs_ack: Value of the CS_ACK register in the CS kernel input page used for
@@ -2267,14 +2344,14 @@ static void handle_fault_event(struct kbase_queue *const queue, const u32 cs_ack
 	const u8 cs_fault_exception_type = CS_FAULT_EXCEPTION_TYPE_GET(cs_fault);
 	const u32 cs_fault_exception_data = CS_FAULT_EXCEPTION_DATA_GET(cs_fault);
 	const u64 cs_fault_info_exception_data = CS_FAULT_INFO_EXCEPTION_DATA_GET(cs_fault_info);
-	bool use_old_log_format = true;
+	bool has_trace_info = false;
 	bool skip_fault_report = kbase_ctx_flag(queue->kctx, KCTX_PAGE_FAULT_REPORT_SKIP);
 
 
 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
 
 
-	if (use_old_log_format && !skip_fault_report)
+	if (!has_trace_info && !skip_fault_report)
 		dev_warn(kbdev->dev,
 			 "Ctx %d_%d Group %d CSG %d CSI: %d\n"
 			 "CS_FAULT.EXCEPTION_TYPE: 0x%x (%s)\n"
@@ -2286,47 +2363,32 @@ static void handle_fault_event(struct kbase_queue *const queue, const u32 cs_ack
 			 cs_fault_info_exception_data);
 
 
-#if IS_ENABLED(CONFIG_DEBUG_FS)
-	/* CS_RESOURCE_TERMINATED type fault event can be ignored from the
-	 * standpoint of dump on error. It is used to report fault for the CSIs
-	 * that are associated with the same CSG as the CSI for which the actual
-	 * fault was reported by the Iterator.
-	 * Dumping would be triggered when the actual fault is reported.
+	/* If dump-on-fault daemon is waiting for a fault, wake up the daemon.
+	 * Acknowledging the fault is deferred to the bottom-half until the wait
+	 * of the dump completion is done.
 	 *
-	 * CS_INHERIT_FAULT can also be ignored. It could happen due to the error
-	 * in other types of queues (cpu/kcpu). If a fault had occurred in some
-	 * other GPU queue then the dump would have been performed anyways when
-	 * that fault was reported.
+	 * Otherwise acknowledge the fault and ring the doorbell for the faulty queue
+	 * to enter into recoverable state.
 	 */
-	if ((cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT) &&
-	    (cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED)) {
-		if (unlikely(kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_CS_FAULT))) {
-			queue->cs_error = cs_fault;
-			queue->cs_error_info = cs_fault_info;
-			queue->cs_error_fatal = false;
-			queue_work(queue->kctx->csf.wq, &queue->cs_error_work);
-			return;
-		}
-	}
-#endif
+	if (likely(!kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_CS_FAULT))) {
+		kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, CS_REQ_FAULT_MASK);
+		kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index, queue->group->csg_nr,
+						  true);
+		queue->cs_error_acked = true;
+	} else
+		queue->cs_error_acked = false;
 
-	kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, CS_REQ_FAULT_MASK);
-	kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index, queue->group->csg_nr, true);
+	queue->cs_error = cs_fault;
+	queue->cs_error_info = cs_fault_info;
+	queue->cs_error_fatal = false;
+	if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work))
+		dev_warn(kbdev->dev, "%s: failed to enqueue a work", __func__);
 }
 
-static void report_queue_fatal_error(struct kbase_queue *const queue, u32 cs_fatal,
-				     u64 cs_fatal_info, struct kbase_queue_group *group)
+static void report_queue_error(struct kbase_queue *const queue, u32 cs_error, u64 cs_error_info,
+			       struct kbase_queue_group *group, bool fatal)
 {
-	struct base_csf_notification
-		error = { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
-			  .payload = {
-				  .csg_error = {
-					  .error = { .error_type =
-							     BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL,
-						     .payload = { .fatal_queue = {
-									  .sideband = cs_fatal_info,
-									  .status = cs_fatal,
-								  } } } } } };
+	struct base_csf_notification error = { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR };
 
 	if (!queue)
 		return;
@@ -2335,17 +2397,30 @@ static void report_queue_fatal_error(struct kbase_queue *const queue, u32 cs_fat
 		return;
 
 	error.payload.csg_error.handle = group->handle;
-	error.payload.csg_error.error.payload.fatal_queue.csi_index = (__u8)queue->csi_index;
+	if (fatal) {
+		error.payload.csg_error.error.error_type = BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL;
+		error.payload.csg_error.error.payload.fatal_queue.sideband = cs_error_info;
+		error.payload.csg_error.error.payload.fatal_queue.status = cs_error;
+		error.payload.csg_error.error.payload.fatal_queue.csi_index = queue->csi_index;
+	} else {
+		error.payload.csg_error.error.error_type = BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FAULT;
+		error.payload.csg_error.error.payload.fault_queue.sideband = cs_error_info;
+		error.payload.csg_error.error.payload.fault_queue.status = cs_error;
+		error.payload.csg_error.error.payload.fault_queue.csi_index = queue->csi_index;
+	}
 	kbase_csf_event_add_error(queue->kctx, &group->error_fatal, &error);
 	kbase_event_wakeup(queue->kctx);
+
+	if (!fatal)
+		queue->clear_faults = false;
 }
 
 /**
- * cs_error_worker - Handle the CS_FATAL/CS_FAULT error for the GPU queue
+ * cs_error_worker() - Handle the CS_FATAL/CS_FAULT error for the GPU queue
  *
  * @data: Pointer to a work_struct embedded in GPU command queue.
  *
- * Terminate the CSG and report the error to userspace.
+ * Terminate the CSG for CS_FATAL and report the error to userspace.
  */
 static void cs_error_worker(struct work_struct *const data)
 {
@@ -2356,6 +2431,7 @@ static void cs_error_worker(struct work_struct *const data)
 	struct kbase_queue_group *group;
 	bool reset_prevented = false;
 	int err;
+	const bool cs_fatal = queue->cs_error_fatal;
 
 	kbase_debug_csf_fault_wait_completion(kbdev);
 	err = kbase_reset_gpu_prevent_and_wait(kbdev);
@@ -2371,45 +2447,57 @@ static void cs_error_worker(struct work_struct *const data)
 
 	group = get_bound_queue_group(queue);
 	if (!group) {
-		dev_warn(kbdev->dev, "queue not bound when handling fatal event");
+		dev_warn(kbdev->dev, "queue not bound when handling an error event");
 		goto unlock;
 	}
 
-#if IS_ENABLED(CONFIG_DEBUG_FS)
-	if (!queue->cs_error_fatal) {
-		unsigned long flags;
-		int slot_num;
+	if (!cs_fatal) {
+		if (group->cs_fault_report_enable && queue->clear_faults)
+			report_queue_error(queue, queue->cs_error, queue->cs_error_info, group,
+					   false);
+		if (unlikely(!queue->cs_error_acked)) {
+			unsigned long flags;
+			int slot_num;
 
-		kbase_csf_scheduler_spin_lock(kbdev, &flags);
-		slot_num = kbase_csf_scheduler_group_get_slot_locked(group);
-		if (slot_num >= 0) {
-			struct kbase_csf_cmd_stream_group_info const *ginfo =
-				&kbdev->csf.global_iface.groups[slot_num];
-			struct kbase_csf_cmd_stream_info const *stream =
-				&ginfo->streams[queue->csi_index];
-			u32 const cs_ack = kbase_csf_firmware_cs_output(stream, CS_ACK);
+			kbase_csf_scheduler_spin_lock(kbdev, &flags);
+			slot_num = kbase_csf_scheduler_group_get_slot_locked(group);
+			if (likely(slot_num >= 0)) {
+				struct kbase_csf_cmd_stream_group_info const *ginfo =
+					&kbdev->csf.global_iface.groups[slot_num];
+				struct kbase_csf_cmd_stream_info const *stream =
+					&ginfo->streams[queue->csi_index];
+				u32 const cs_ack = kbase_csf_firmware_cs_output(stream, CS_ACK);
+				u32 const cs_req = kbase_csf_firmware_cs_input_read(stream, CS_REQ);
 
-			kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, CS_REQ_FAULT_MASK);
-			kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index, slot_num, true);
+				/* Acknowledge the fault and ring the doorbell for the queue
+				 * if it hasn't yet done.
+				 */
+				if ((cs_ack & CS_ACK_FAULT_MASK) != (cs_req & CS_REQ_FAULT_MASK)) {
+					kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
+									 CS_REQ_FAULT_MASK);
+					kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index,
+									  slot_num, true);
+				}
+			}
+			kbase_csf_scheduler_spin_unlock(kbdev, flags);
 		}
-		kbase_csf_scheduler_spin_unlock(kbdev, flags);
-		goto unlock;
-	}
-#endif
-
-	term_queue_group(group);
-	flush_gpu_cache_on_fatal_error(kbdev);
-	/* For an invalid GPU page fault, CS_BUS_FAULT fatal error is expected after the
-	 * page fault handler disables the AS of faulty context. Need to skip reporting the
-	 * CS_BUS_FAULT fatal error to the Userspace as it doesn't have the full fault info.
-	 * Page fault handler will report the fatal error with full page fault info.
-	 */
-	if ((cs_fatal_exception_type == CS_FATAL_EXCEPTION_TYPE_CS_BUS_FAULT) && group->faulted) {
-		dev_dbg(kbdev->dev,
-			"Skipped reporting CS_BUS_FAULT for queue %d of group %d of ctx %d_%d",
-			queue->csi_index, group->handle, kctx->tgid, kctx->id);
 	} else {
-		report_queue_fatal_error(queue, queue->cs_error, queue->cs_error_info, group);
+		term_queue_group(group);
+		flush_gpu_cache_on_fatal_error(kbdev);
+		/* For an invalid GPU page fault, CS_BUS_FAULT fatal error is expected after the
+		 * page fault handler disables the AS of faulty context. Need to skip reporting the
+		 * CS_BUS_FAULT fatal error to the Userspace as it doesn't have the full fault info.
+		 * Page fault handler will report the fatal error with full page fault info.
+		 */
+		if ((cs_fatal_exception_type == CS_FATAL_EXCEPTION_TYPE_CS_BUS_FAULT) &&
+		    group->faulted) {
+			dev_dbg(kbdev->dev,
+				"Skipped reporting CS_BUS_FAULT for queue %d of group %d of ctx %d_%d",
+				queue->csi_index, group->handle, kctx->tgid, kctx->id);
+		} else {
+			report_queue_error(queue, queue->cs_error, queue->cs_error_info, group,
+					   true);
+		}
 	}
 
 unlock:
@@ -2419,7 +2507,7 @@ unlock:
 }
 
 /**
- * handle_fatal_event - Handler for CS fatal.
+ * handle_fatal_event() - Handler for CS fatal.
  *
  * @queue:    Pointer to queue for which fatal event was received.
  * @stream:   Pointer to the structure containing info provided by the
@@ -2443,13 +2531,13 @@ static void handle_fatal_event(struct kbase_queue *const queue,
 	const u32 cs_fatal_exception_type = CS_FATAL_EXCEPTION_TYPE_GET(cs_fatal);
 	const u32 cs_fatal_exception_data = CS_FATAL_EXCEPTION_DATA_GET(cs_fatal);
 	const u64 cs_fatal_info_exception_data = CS_FATAL_INFO_EXCEPTION_DATA_GET(cs_fatal_info);
-	bool use_old_log_format = true;
+	bool has_trace_info = false;
 	bool skip_fault_report = kbase_ctx_flag(queue->kctx, KCTX_PAGE_FAULT_REPORT_SKIP);
 
 
 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
 
-	if (use_old_log_format && !skip_fault_report)
+	if (!has_trace_info && !skip_fault_report)
 		dev_warn(kbdev->dev,
 			 "Ctx %d_%d Group %d CSG %d CSI: %d\n"
 			 "CS_FATAL.EXCEPTION_TYPE: 0x%x (%s)\n"
@@ -2481,7 +2569,7 @@ static void handle_fatal_event(struct kbase_queue *const queue,
 }
 
 /**
- * process_cs_interrupts - Process interrupts for a CS.
+ * process_cs_interrupts() - Process interrupts for a CS.
  *
  * @group:  Pointer to GPU command queue group data.
  * @ginfo:  The CSG interface provided by the firmware.
@@ -2595,7 +2683,7 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
 		}
 
 		if (!group->protected_suspend_buf.pma)
-			queue_work(group->kctx->csf.wq, &group->protm_event_work);
+			kbase_csf_scheduler_enqueue_protm_event_work(group);
 
 		if (test_bit(group->csg_nr, scheduler->csg_slots_idle_mask)) {
 			clear_bit(group->csg_nr, scheduler->csg_slots_idle_mask);
@@ -2608,12 +2696,14 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
 }
 
 /**
- * process_csg_interrupts - Process interrupts for a CSG.
+ * process_csg_interrupts() - Process interrupts for a CSG.
  *
  * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  * @csg_nr: CSG number.
  * @track: Pointer that tracks the highest idle CSG and the newly possible viable
  *         protected mode requesting group, in current IRQ context.
+ * @progress_timeout_slot_mask: slot mask to indicate on which slot progress timeout
+ *         happens.
  *
  * Handles interrupts for a CSG and for CSs within it.
  *
@@ -2625,7 +2715,8 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
  * See process_cs_interrupts() for details of per-stream interrupt handling.
  */
 static void process_csg_interrupts(struct kbase_device *const kbdev, u32 const csg_nr,
-				   struct irq_idle_and_protm_track *track)
+				   struct irq_idle_and_protm_track *track,
+				   unsigned long *progress_timeout_slot_mask)
 {
 	struct kbase_csf_cmd_stream_group_info *ginfo;
 	struct kbase_queue_group *group = NULL;
@@ -2712,13 +2803,9 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, u32 const c
 
 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROGRESS_TIMER_EVENT, group,
 					 req ^ ack);
-		dev_info(
-			kbdev->dev,
-			"[%llu] Iterator PROGRESS_TIMER timeout notification received for group %u of ctx %d_%d on slot %u\n",
-			kbase_backend_get_cycle_cnt(kbdev), group->handle, group->kctx->tgid,
-			group->kctx->id, csg_nr);
 
-		handle_progress_timer_event(group);
+		set_bit(csg_nr, progress_timeout_slot_mask);
+
 	}
 
 	process_cs_interrupts(group, ginfo, irqreq, irqack, track);
@@ -2728,7 +2815,7 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, u32 const c
 }
 
 /**
- * process_prfcnt_interrupts - Process performance counter interrupts.
+ * process_prfcnt_interrupts() - Process performance counter interrupts.
  *
  * @kbdev:   Instance of a GPU platform device that implements a CSF interface.
  * @glb_req: Global request register value.
@@ -2800,7 +2887,7 @@ static void process_prfcnt_interrupts(struct kbase_device *kbdev, u32 glb_req, u
 }
 
 /**
- * check_protm_enter_req_complete - Check if PROTM_ENTER request completed
+ * check_protm_enter_req_complete() - Check if PROTM_ENTER request completed
  *
  * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  * @glb_req: Global request register value.
@@ -2828,13 +2915,14 @@ static inline void check_protm_enter_req_complete(struct kbase_device *kbdev, u3
 	dev_dbg(kbdev->dev, "Protected mode entry interrupt received");
 
 	kbdev->protected_mode = true;
+
 	kbase_ipa_protection_mode_switch_event(kbdev);
 	kbase_ipa_control_protm_entered(kbdev);
 	kbase_hwcnt_backend_csf_protm_entered(&kbdev->hwcnt_gpu_iface);
 }
 
 /**
- * process_protm_exit - Handle the protected mode exit interrupt
+ * process_protm_exit() - Handle the protected mode exit interrupt
  *
  * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  * @glb_ack: Global acknowledge register value.
@@ -2923,7 +3011,7 @@ static inline void process_tracked_info_for_protm(struct kbase_device *kbdev,
 		if (!tock_triggered) {
 			dev_dbg(kbdev->dev, "Group-%d on slot-%d start protm work\n", group->handle,
 				group->csg_nr);
-			queue_work(group->kctx->csf.wq, &group->protm_event_work);
+			kbase_csf_scheduler_enqueue_protm_event_work(group);
 		}
 	}
 }
@@ -2952,6 +3040,46 @@ static void order_job_irq_clear_with_iface_mem_read(void)
 	dmb(osh);
 }
 
+static const char *const glb_fatal_status_errors[GLB_FATAL_STATUS_VALUE_COUNT] = {
+	[GLB_FATAL_STATUS_VALUE_OK] = "OK",
+	[GLB_FATAL_STATUS_VALUE_ASSERT] = "Firmware assert triggered",
+	[GLB_FATAL_STATUS_VALUE_UNEXPECTED_EXCEPTION] =
+		"Hardware raised an exception firmware did not expect",
+	[GLB_FATAL_STATUS_VALUE_HANG] = "Firmware hangs and watchdog timer expired",
+};
+
+/**
+ * handle_glb_fatal_event() - Handle the GLB fatal event
+ *
+ * @kbdev:        Instance of GPU device.
+ * @global_iface: CSF global interface
+ */
+static void handle_glb_fatal_event(struct kbase_device *kbdev,
+				   const struct kbase_csf_global_iface *const global_iface)
+{
+	const char *error_string = NULL;
+	const u32 fatal_status = kbase_csf_firmware_global_output(global_iface, GLB_FATAL_STATUS);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+	dev_warn(kbdev->dev, "MCU encountered unrecoverable error");
+
+	if (fatal_status < GLB_FATAL_STATUS_VALUE_COUNT)
+		error_string = glb_fatal_status_errors[fatal_status];
+	else {
+		dev_err(kbdev->dev, "Invalid GLB_FATAL_STATUS (%u)", fatal_status);
+		return;
+	}
+
+	if (fatal_status == GLB_FATAL_STATUS_VALUE_OK)
+		dev_err(kbdev->dev, "GLB_FATAL_STATUS(OK) must be set with proper reason");
+	else {
+		dev_warn(kbdev->dev, "GLB_FATAL_STATUS: %s", error_string);
+		if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE))
+			kbase_reset_gpu_locked(kbdev);
+	}
+}
+
 void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
 {
 	bool deferred_handling_glb_idle_irq = false;
@@ -2972,18 +3100,25 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
 			struct irq_idle_and_protm_track track = { .protm_grp = NULL,
 								  .idle_seq = U32_MAX,
 								  .idle_slot = S8_MAX };
+			DECLARE_BITMAP(progress_timeout_csgs, MAX_SUPPORTED_CSGS) = { 0 };
 
 			kbase_csf_scheduler_spin_lock(kbdev, &flags);
-			/* Looping through and track the highest idle and protm groups */
+			/* Looping through and track the highest idle and protm groups.
+			 * Also track the groups for which progress timer timeout happened.
+			 */
 			while (csg_interrupts != 0) {
 				u32 const csg_nr = (u32)ffs((int)csg_interrupts) - 1;
 
-				process_csg_interrupts(kbdev, csg_nr, &track);
+				process_csg_interrupts(kbdev, csg_nr, &track,
+						       progress_timeout_csgs);
 				csg_interrupts &= ~(1U << csg_nr);
 			}
 
 			/* Handle protm from the tracked information */
 			process_tracked_info_for_protm(kbdev, &track);
+			/* Handle pending progress timeout(s) */
+			handle_progress_timer_events(kbdev, progress_timeout_csgs);
+
 			kbase_csf_scheduler_spin_unlock(kbdev, flags);
 		}
 
@@ -3012,11 +3147,28 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
 
 				/* Handle IDLE Hysteresis notification event */
 				if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) {
+					u32 const glb_idle_timer_cfg =
+						kbase_csf_firmware_global_input_read(
+							global_iface, GLB_IDLE_TIMER_CONFIG);
+
 					dev_dbg(kbdev->dev, "Idle-hysteresis event flagged");
 					kbase_csf_firmware_global_input_mask(
 						global_iface, GLB_REQ, glb_ack,
 						GLB_REQ_IDLE_EVENT_MASK);
 
+					if (glb_idle_timer_cfg &
+					    GLB_IDLE_TIMER_CONFIG_SLEEP_ON_IDLE_MASK) {
+						/* The FW is going to sleep, we shall:
+						 * - Enable fast GPU idle handling to avoid
+						 *   confirming CSGs status in gpu_idle_worker().
+						 * - Enable doorbell mirroring to minimise the
+						 *   chance of KBase raising kernel doorbells which
+						 *   would cause the FW to be woken up.
+						 */
+						kbdev->csf.scheduler.fast_gpu_idle_handling = true;
+						kbase_pm_enable_db_mirror_interrupt(kbdev);
+					}
+
 					glb_idle_irq_received = true;
 					/* Defer handling this IRQ to account for a race condition
 					 * where the idle worker could be executed before we have
@@ -3026,6 +3178,9 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
 					deferred_handling_glb_idle_irq = true;
 				}
 
+				if (glb_ack & GLB_ACK_FATAL_MASK)
+					handle_glb_fatal_event(kbdev, global_iface);
+
 				process_prfcnt_interrupts(kbdev, glb_req, glb_ack);
 
 				kbase_csf_scheduler_spin_unlock(kbdev, flags);
@@ -3050,13 +3205,10 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
 
 	if (deferred_handling_glb_idle_irq) {
 		unsigned long flags;
-		bool invoke_pm_state_machine;
 
 		kbase_csf_scheduler_spin_lock(kbdev, &flags);
-		invoke_pm_state_machine = kbase_csf_scheduler_process_gpu_idle_event(kbdev);
+		kbase_csf_scheduler_process_gpu_idle_event(kbdev);
 		kbase_csf_scheduler_spin_unlock(kbdev, flags);
-		if (unlikely(invoke_pm_state_machine))
-			kbase_pm_update_state(kbdev);
 	}
 
 	wake_up_all(&kbdev->csf.event_wait);
@@ -3087,6 +3239,11 @@ void kbase_csf_doorbell_mapping_term(struct kbase_device *kbdev)
 	if (kbdev->csf.db_filp) {
 		struct page *page = as_page(kbdev->csf.dummy_db_page);
 
+		/* This is a shared dummy sink page for avoiding potential segmentation fault
+		 * to user-side library when a csi is off slot. Additionally, the call is on
+		 * module unload path, so the page can be left uncleared before returning it
+		 * back to kbdev memory pool.
+		 */
 		kbase_mem_pool_free(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, false);
 
 		fput(kbdev->csf.db_filp);
@@ -3118,26 +3275,27 @@ int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev)
 	return 0;
 }
 
-void kbase_csf_pending_gpuq_kicks_init(struct kbase_device *kbdev)
+void kbase_csf_pending_gpuq_kick_queues_init(struct kbase_device *kbdev)
 {
 	size_t i;
 
-	for (i = 0; i != ARRAY_SIZE(kbdev->csf.pending_gpuq_kicks); ++i)
-		INIT_LIST_HEAD(&kbdev->csf.pending_gpuq_kicks[i]);
-	spin_lock_init(&kbdev->csf.pending_gpuq_kicks_lock);
+	atomic_set(&kbdev->csf.pending_gpuq_kicks, false);
+	for (i = 0; i != ARRAY_SIZE(kbdev->csf.pending_gpuq_kick_queues); ++i)
+		INIT_LIST_HEAD(&kbdev->csf.pending_gpuq_kick_queues[i]);
+	spin_lock_init(&kbdev->csf.pending_gpuq_kick_queues_lock);
 }
 
-void kbase_csf_pending_gpuq_kicks_term(struct kbase_device *kbdev)
+void kbase_csf_pending_gpuq_kick_queues_term(struct kbase_device *kbdev)
 {
 	size_t i;
 
-	spin_lock(&kbdev->csf.pending_gpuq_kicks_lock);
-	for (i = 0; i != ARRAY_SIZE(kbdev->csf.pending_gpuq_kicks); ++i) {
-		if (!list_empty(&kbdev->csf.pending_gpuq_kicks[i]))
+	spin_lock(&kbdev->csf.pending_gpuq_kick_queues_lock);
+	for (i = 0; i != ARRAY_SIZE(kbdev->csf.pending_gpuq_kick_queues); ++i) {
+		if (!list_empty(&kbdev->csf.pending_gpuq_kick_queues[i]))
 			dev_warn(kbdev->dev,
 				 "Some GPU queue kicks for priority %zu were not handled", i);
 	}
-	spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock);
+	spin_unlock(&kbdev->csf.pending_gpuq_kick_queues_lock);
 }
 
 void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev)
@@ -3145,6 +3303,11 @@ void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev)
 	if (kbdev->csf.user_reg.filp) {
 		struct page *page = as_page(kbdev->csf.user_reg.dummy_page);
 
+		/* This is a shared dummy page in place of the real USER Register page just
+		 * before the GPU is powered down. Additionally, the call is on module unload
+		 * path, so the page can be left uncleared before returning it back to kbdev
+		 * memory pool.
+		 */
 		kbase_mem_pool_free(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, false);
 		fput(kbdev->csf.user_reg.filp);
 	}
@@ -3227,17 +3390,17 @@ void kbase_csf_process_queue_kick(struct kbase_queue *queue)
 		if (err == -EBUSY) {
 			retry_kick = true;
 
-			spin_lock(&kbdev->csf.pending_gpuq_kicks_lock);
+			spin_lock(&kbdev->csf.pending_gpuq_kick_queues_lock);
 			if (list_empty(&queue->pending_kick_link)) {
 				/* A failed queue kick shall be pushed to the
 				 * back of the queue to avoid potential abuse.
 				 */
 				list_add_tail(
 					&queue->pending_kick_link,
-					&kbdev->csf.pending_gpuq_kicks[queue->group_priority]);
-				spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock);
+					&kbdev->csf.pending_gpuq_kick_queues[queue->group_priority]);
+				spin_unlock(&kbdev->csf.pending_gpuq_kick_queues_lock);
 			} else {
-				spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock);
+				spin_unlock(&kbdev->csf.pending_gpuq_kick_queues_lock);
 				WARN_ON(atomic_read(&queue->pending_kick) == 0);
 			}
 
@@ -3260,3 +3423,27 @@ out_release_queue:
 	WARN_ON(atomic_read(&queue->pending_kick) == 0);
 	atomic_dec(&queue->pending_kick);
 }
+
+void kbase_csf_process_protm_event_request(struct kbase_queue_group *group)
+{
+	struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf;
+	int err = 0;
+
+	KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_START, group, 0u);
+
+	err = alloc_grp_protected_suspend_buffer_pages(group);
+	if (!err) {
+		kbase_csf_scheduler_group_protm_enter(group);
+	} else if (err == -ENOMEM && sbuf->alloc_retries <= PROTM_ALLOC_MAX_RETRIES) {
+		sbuf->alloc_retries++;
+		/* try again to allocate pages */
+		kbase_csf_scheduler_enqueue_protm_event_work(group);
+	} else if (sbuf->alloc_retries >= PROTM_ALLOC_MAX_RETRIES || err != -ENOMEM) {
+		dev_err(group->kctx->kbdev->dev,
+			"Failed to allocate physical pages for Protected mode suspend buffer for the group %d of context %d_%d",
+			group->handle, group->kctx->tgid, group->kctx->id);
+		report_group_fatal_error(group);
+	}
+
+	KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_END, group, 0u);
+}
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h
index b2f6ab2c4a27..566136342a06 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h
@@ -243,6 +243,19 @@ struct kbase_queue_group *kbase_csf_find_queue_group(struct kbase_context *kctx,
  */
 int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx, u8 group_handle);
 
+/**
+ * kbase_csf_queue_group_clear_faults - Re-enable CS Fault reporting.
+ *
+ * @kctx:	Pointer to the kbase context within which the
+ *		CS Faults for the queues has to be re-enabled.
+ * @clear_faults:	Pointer to the structure which contains details of the
+ *		queues for which the CS Fault reporting has to be re-enabled.
+ *
+ * Return:	0 on success, or negative on failure.
+ */
+int kbase_csf_queue_group_clear_faults(struct kbase_context *kctx,
+				       struct kbase_ioctl_queue_group_clear_faults *clear_faults);
+
 /**
  * kbase_csf_queue_group_create - Create a GPU command queue group.
  *
@@ -379,20 +392,20 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev);
 void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev);
 
 /**
- * kbase_csf_pending_gpuq_kicks_init - Initialize the data used for handling
- *                                     GPU queue kicks.
+ * kbase_csf_pending_gpuq_kick_queues_init - Initialize the data used for handling
+ *                                           GPU queue kicks.
  *
  * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  */
-void kbase_csf_pending_gpuq_kicks_init(struct kbase_device *kbdev);
+void kbase_csf_pending_gpuq_kick_queues_init(struct kbase_device *kbdev);
 
 /**
- * kbase_csf_pending_gpuq_kicks_term - De-initialize the data used for handling
- *                                     GPU queue kicks.
+ * kbase_csf_pending_gpuq_kick_queues_term - De-initialize the data used for handling
+ *                                           GPU queue kicks.
  *
  * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  */
-void kbase_csf_pending_gpuq_kicks_term(struct kbase_device *kbdev);
+void kbase_csf_pending_gpuq_kick_queues_term(struct kbase_device *kbdev);
 
 /**
  * kbase_csf_ring_csg_doorbell - ring the doorbell for a CSG interface.
@@ -546,4 +559,13 @@ static inline u64 kbase_csf_ktrace_gpu_cycle_cnt(struct kbase_device *kbdev)
  */
 void kbase_csf_process_queue_kick(struct kbase_queue *queue);
 
+/**
+ * kbase_csf_process_protm_event_request - Handle protected mode switch request
+ *
+ * @group: The group to handle protected mode request
+ *
+ * Request to switch to protected mode.
+ */
+void kbase_csf_process_protm_event_request(struct kbase_queue_group *group);
+
 #endif /* _KBASE_CSF_H_ */
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c
index c885845bc62e..32f33a58a6f7 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -28,6 +28,8 @@
 #include <mali_kbase.h>
 #include <linux/seq_file.h>
 #include <linux/version_compat_defs.h>
+#include <mali_kbase_reset_gpu.h>
+#include <mali_kbase_config_defaults.h>
 
 #define MAX_SCHED_STATE_STRING_LEN (16)
 /**
@@ -268,6 +270,87 @@ static const struct file_operations kbasep_csf_debugfs_scheduler_state_fops = {
 	.open = simple_open,
 	.llseek = default_llseek,
 };
+static int kbasep_csf_debugfs_eviction_timeout_get(void *data, u64 *val)
+{
+	struct kbase_device *const kbdev = data;
+	unsigned long flags;
+
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	*val = kbdev->csf.csg_suspend_timeout_ms - CSG_SUSPEND_TIMEOUT_HOST_ADDED_MS;
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+	return 0;
+}
+
+static int kbasep_csf_debugfs_eviction_timeout_set(void *data, u64 val)
+{
+	struct kbase_device *const kbdev = data;
+	unsigned long flags_schd, flags_hw;
+	u64 dur_ms = val;
+	int ret = 0;
+
+	if (unlikely(dur_ms < CSG_SUSPEND_TIMEOUT_FIRMWARE_MS_MIN ||
+		     dur_ms > CSG_SUSPEND_TIMEOUT_FIRMWARE_MS_MAX)) {
+		dev_err(kbdev->dev, "Invalid CSG suspend timeout input (%llu)", dur_ms);
+		return -EFAULT;
+	}
+	dur_ms = dur_ms + CSG_SUSPEND_TIMEOUT_HOST_ADDED_MS;
+
+	/* The 'fw_load_lock' is taken to synchronize against the deferred
+	 * loading of FW, update will take effect after firmware gets loaded.
+	 */
+	mutex_lock(&kbdev->fw_load_lock);
+	if (unlikely(!kbdev->csf.firmware_inited)) {
+		kbase_csf_scheduler_spin_lock(kbdev, &flags_schd);
+		kbdev->csf.csg_suspend_timeout_ms = (unsigned int)dur_ms;
+		kbase_csf_scheduler_spin_unlock(kbdev, flags_schd);
+		mutex_unlock(&kbdev->fw_load_lock);
+		dev_info(kbdev->dev, "CSF set csg suspend timeout deferred till fw is loaded");
+		goto end;
+	}
+	mutex_unlock(&kbdev->fw_load_lock);
+
+	/* Firmware reloading is triggered by silent reset, and then update will take effect.
+	 */
+	kbase_csf_scheduler_pm_active(kbdev);
+	if (kbase_csf_scheduler_killable_wait_mcu_active(kbdev)) {
+		dev_err(kbdev->dev,
+			"Unable to activate the MCU, the csg suspend timeout value shall remain unchanged");
+		kbase_csf_scheduler_pm_idle(kbdev);
+		ret = -EFAULT;
+		goto exit;
+	}
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags_hw);
+	if (kbase_reset_gpu_silent(kbdev)) {
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags_hw);
+		dev_err(kbdev->dev, "CSF set csg suspend timeout pending reset, try again");
+		kbase_csf_scheduler_pm_idle(kbdev);
+		ret = -EFAULT;
+		goto exit;
+	}
+	/* GPU reset is placed and it will take place only after hwaccess_lock is released,
+	 * update on host side should be done after GPU reset is placed and before it takes place.
+	 */
+	kbase_csf_scheduler_spin_lock(kbdev, &flags_schd);
+	kbdev->csf.csg_suspend_timeout_ms = (unsigned int)dur_ms;
+	kbase_csf_scheduler_spin_unlock(kbdev, flags_schd);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags_hw);
+	/* Keep PM active until reset finished to allow FW reloading to take place,
+	 * and then update request will be sent to FW during initialization.
+	 */
+	kbase_reset_gpu_wait(kbdev);
+	kbase_csf_scheduler_pm_idle(kbdev);
+
+end:
+	dev_info(kbdev->dev, "CSF set csg suspend timeout: %u ms", (unsigned int)dur_ms);
+
+exit:
+	return ret;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(kbasep_csf_debugfs_eviction_timeout_fops,
+			 &kbasep_csf_debugfs_eviction_timeout_get,
+			 &kbasep_csf_debugfs_eviction_timeout_set, "%llu\n");
 
 void kbase_csf_debugfs_init(struct kbase_device *kbdev)
 {
@@ -280,6 +363,8 @@ void kbase_csf_debugfs_init(struct kbase_device *kbdev)
 			    &kbasep_csf_debugfs_scheduling_timer_kick_fops);
 	debugfs_create_file("scheduler_state", 0644, kbdev->mali_debugfs_directory, kbdev,
 			    &kbasep_csf_debugfs_scheduler_state_fops);
+	debugfs_create_file("eviction_timeout_ms", 0644, kbdev->mali_debugfs_directory, kbdev,
+			    &kbasep_csf_debugfs_eviction_timeout_fops);
 
 	kbase_csf_tl_reader_debugfs_init(kbdev);
 }
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h
index 8d7c896e1051..38e7cb940d97 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -30,6 +30,7 @@
 #include "mali_kbase_csf_firmware.h"
 #include "mali_kbase_csf_event.h"
 #include <uapi/gpu/arm/bifrost/csf/mali_kbase_csf_errors_dumpfault.h>
+#include "mali_kbase_csf_fw_io.h"
 
 #include <linux/version_compat_defs.h>
 
@@ -267,7 +268,7 @@ enum kbase_queue_group_priority {
  * @CSF_PM_TIMEOUT: Timeout for GPU Power Management to reach the desired
  *                  Shader, L2 and MCU state.
  * @CSF_GPU_RESET_TIMEOUT: Waiting timeout for GPU reset to complete.
- * @CSF_CSG_SUSPEND_TIMEOUT: Timeout given for a CSG to be suspended.
+ * @CSF_CSG_TERM_TIMEOUT: Timeout given for a CSG to be terminated.
  * @CSF_FIRMWARE_BOOT_TIMEOUT: Maximum time to wait for firmware to boot.
  * @CSF_FIRMWARE_PING_TIMEOUT: Maximum time to wait for firmware to respond
  *                             to a ping from KBase.
@@ -289,7 +290,7 @@ enum kbase_timeout_selector {
 	CSF_FIRMWARE_TIMEOUT,
 	CSF_PM_TIMEOUT,
 	CSF_GPU_RESET_TIMEOUT,
-	CSF_CSG_SUSPEND_TIMEOUT,
+	CSF_CSG_TERM_TIMEOUT,
 	CSF_FIRMWARE_BOOT_TIMEOUT,
 	CSF_FIRMWARE_PING_TIMEOUT,
 	CSF_SCHED_PROTM_PROGRESS_TIMEOUT,
@@ -398,6 +399,10 @@ struct kbase_csf_notification {
  * @cs_error:         Records information about the CS fatal event or
  *                    about CS fault event if dump on fault is enabled.
  * @cs_error_fatal:   Flag to track if the CS fault or CS fatal event occurred.
+ * @cs_error_acked:   Flag to indicate that acknowledging the fault has been done
+ *                    at top-half of fault handler.
+ * @clear_faults:     Flag to track if the CS fault reporting is enabled for this queue.
+ *                    It's protected by &kbase_context.csf.lock.
  * @extract_ofs: The current EXTRACT offset, this is only updated when handling
  *               the GLB IDLE IRQ if the idle timeout value is non-0 in order
  *               to help detect a queue's true idle status.
@@ -441,6 +446,8 @@ struct kbase_queue {
 	u64 cs_error_info;
 	u32 cs_error;
 	bool cs_error_fatal;
+	bool cs_error_acked;
+	bool clear_faults;
 	u64 extract_ofs;
 	u64 saved_cmd_ptr;
 };
@@ -501,6 +508,8 @@ struct kbase_protected_suspend_buffer {
  * @compute_max:    Maximum number of compute endpoints the group is
  *                  allowed to use.
  * @csi_handlers:   Requested CSI exception handler flags for the group.
+ * @cs_fault_report_enable:	Indicated if reporting of CS_FAULTs to
+ *				userspace is enabled.
  * @tiler_mask:     Mask of tiler endpoints the group is allowed to use.
  * @fragment_mask:  Mask of fragment endpoints the group is allowed to use.
  * @compute_mask:   Mask of compute endpoints the group is allowed to use.
@@ -531,14 +540,21 @@ struct kbase_protected_suspend_buffer {
  * @bound_queues:   Array of registered queues bound to this queue group.
  * @doorbell_nr:    Index of the hardware doorbell page assigned to the
  *                  group.
- * @protm_event_work:   Work item corresponding to the protected mode entry
- *                      event for this queue.
+ * @protm_event_work: List item corresponding to the protected mode entry
+ *                    event for this queue. This would be handled by
+ *                    kbase_csf_scheduler_kthread().
+ * @pending_protm_event_work: Indicates that kbase_csf_scheduler_kthread() should
+ *                            handle PROTM request for this group. This would
+ *                            be set to false when the work is done. This is used
+ *                            mainly for synchronisation with group termination.
  * @protm_pending_bitmap:  Bit array to keep a track of CSs that
  *                         have pending protected mode entry requests.
  * @error_fatal: An error of type BASE_GPU_QUEUE_GROUP_ERROR_FATAL to be
  *               returned to userspace if such an error has occurred.
  * @timer_event_work: Work item to handle the progress timeout fatal event
  *                    for the group.
+ * @progress_timer_state: Value of CSG_PROGRESS_TIMER_STATE register when progress
+ *                        timer timeout is reported for the group.
  * @deschedule_deferred_cnt: Counter keeping a track of the number of threads
  *                           that tried to deschedule the group and had to defer
  *                           the descheduling due to the dump on fault.
@@ -569,7 +585,7 @@ struct kbase_queue_group {
 	u8 compute_max;
 	u8 csi_handlers;
 
-
+	__u8 cs_fault_report_enable;
 	u64 tiler_mask;
 	u64 fragment_mask;
 	u64 compute_mask;
@@ -588,12 +604,14 @@ struct kbase_queue_group {
 	struct kbase_queue *bound_queues[MAX_SUPPORTED_STREAMS_PER_GROUP];
 
 	int doorbell_nr;
-	struct work_struct protm_event_work;
+	struct list_head protm_event_work;
+	atomic_t pending_protm_event_work;
 	DECLARE_BITMAP(protm_pending_bitmap, MAX_SUPPORTED_STREAMS_PER_GROUP);
 
 	struct kbase_csf_notification error_fatal;
 
 	struct work_struct timer_event_work;
+	u32 progress_timer_state;
 
 	/**
 	 * @dvs_buf: Address and size of scratch memory.
@@ -625,6 +643,9 @@ struct kbase_queue_group {
  * @cmd_seq_num:        The sequence number assigned to an enqueued command,
  *                      in incrementing order (older commands shall have a
  *                      smaller number).
+ * @kcpu_wq: Work queue to process KCPU commands for all queues in this
+ *           context. This would be used if the context is not prioritised,
+ *           otherwise it would be handled by kbase_csf_scheduler_kthread().
  * @jit_lock:           Lock to serialise JIT operations.
  * @jit_cmds_head:      A list of the just-in-time memory commands, both
  *                      allocate & free, in submission order, protected
@@ -640,6 +661,8 @@ struct kbase_csf_kcpu_queue_context {
 	DECLARE_BITMAP(in_use, KBASEP_MAX_KCPU_QUEUES);
 	atomic64_t cmd_seq_num;
 
+	struct workqueue_struct *kcpu_wq;
+
 	struct mutex jit_lock;
 	struct list_head jit_cmds_head;
 	struct list_head jit_blocked_queues;
@@ -747,15 +770,7 @@ struct kbase_csf_ctx_heap_reclaim_info {
  *                      GPU command queues are idle and at least one of them
  *                      is blocked on a sync wait operation.
  * @num_idle_wait_grps: Length of the @idle_wait_groups list.
- * @sync_update_wq_high_prio: high-priority work queue to process the
- *                            SYNC_UPDATE events by sync_set / sync_add
- *                            instruction execution on command streams bound to
- *                            groups of @idle_wait_groups list. This WQ would
- *                            be used if the context is prioritised.
- * @sync_update_wq_normal_prio: similar to sync_update_wq_high_prio, but this
- *                              WQ would be used if the context is not
- *                              prioritised.
- * @sync_update_work:   Work item to process the SYNC_UPDATE events.
+ * @sync_update_work:   List item to process the SYNC_UPDATE event.
  * @ngrp_to_schedule:	Number of groups added for the context to the
  *                      'groups_to_schedule' list of scheduler instance.
  * @heap_info:          Heap reclaim information data of the kctx. As the
@@ -768,9 +783,7 @@ struct kbase_csf_scheduler_context {
 	u32 num_runnable_grps;
 	struct list_head idle_wait_groups;
 	u32 num_idle_wait_grps;
-	struct workqueue_struct *sync_update_wq_high_prio;
-	struct workqueue_struct *sync_update_wq_normal_prio;
-	struct work_struct sync_update_work;
+	struct list_head sync_update_work;
 	u32 ngrp_to_schedule;
 	struct kbase_csf_ctx_heap_reclaim_info heap_info;
 };
@@ -865,17 +878,16 @@ struct kbase_csf_user_reg_context {
  * @wq:               Dedicated workqueue to process work items corresponding
  *                    to the OoM events raised for chunked tiler heaps being
  *                    used by GPU command queues, and progress timeout events.
- * @kcpu_wq_high_prio: High-priority work queue to process KCPU commands for
- *                     all queues in this context. This WQ would be used if
- *                     the context is prioritised.
- * @kcpu_wq_normal_prio: Similar to kcpu_wq_high_prio, but this WQ would be
- *                       used if the context is not prioritised.
  * @link:             Link to this csf context in the 'runnable_kctxs' list of
  *                    the scheduler instance
  * @sched:            Object representing the scheduler's context
  * @cpu_queue:        CPU queue information. Only be available when DEBUG_FS
  *                    is enabled.
  * @user_reg:         Collective information to support mapping to USER Register page.
+ * @pending_sync_update: Indicates that kbase_csf_scheduler_kthread() should
+ *                       handle SYNC_UPDATE event for this context. This would
+ *                       be set to false when the work is done. This is used
+ *                       mainly for synchronisation with context termination.
  */
 struct kbase_csf_context {
 	struct list_head event_pages_head;
@@ -888,12 +900,11 @@ struct kbase_csf_context {
 	struct kbase_csf_event event;
 	struct kbase_csf_tiler_heap_context tiler_heaps;
 	struct workqueue_struct *wq;
-	struct workqueue_struct *kcpu_wq_high_prio;
-	struct workqueue_struct *kcpu_wq_normal_prio;
 	struct list_head link;
 	struct kbase_csf_scheduler_context sched;
 	struct kbase_csf_cpu_queue_context cpu_queue;
 	struct kbase_csf_user_reg_context user_reg;
+	atomic_t pending_sync_update;
 };
 
 /**
@@ -922,13 +933,11 @@ struct kbase_csf_reset_gpu {
  *                             of CSG slots.
  * @resident_group:   pointer to the queue group that is resident on the CSG slot.
  * @state:            state of the slot as per enum @kbase_csf_csg_slot_state.
- * @trigger_jiffies:  value of jiffies when change in slot state is recorded.
  * @priority:         dynamic priority assigned to CSG slot.
  */
 struct kbase_csf_csg_slot {
 	struct kbase_queue_group *resident_group;
 	atomic_t state;
-	unsigned long trigger_jiffies;
 	u8 priority;
 };
 
@@ -936,14 +945,15 @@ struct kbase_csf_csg_slot {
  * struct kbase_csf_sched_heap_reclaim_mgr - Object for managing tiler heap reclaim
  *                                           kctx lists inside the CSF device's scheduler.
  *
- * @heap_reclaim:   Tiler heap reclaim shrinker object.
+ * @heap_reclaim:   Defines Tiler heap reclaim shrinker object.
  * @ctx_lists:      Array of kctx lists, size matching CSG defined priorities. The
  *                  lists track the kctxs attached to the reclaim manager.
  * @unused_pages:   Estimated number of unused pages from the @ctxlist array. The
  *                  number is indicative for use with reclaim shrinker's count method.
  */
 struct kbase_csf_sched_heap_reclaim_mgr {
-	struct shrinker heap_reclaim;
+	DEFINE_KBASE_SHRINKER heap_reclaim;
+
 	struct list_head ctx_lists[KBASE_QUEUE_GROUP_PRIORITY_COUNT];
 	atomic_t unused_pages;
 };
@@ -1042,10 +1052,29 @@ struct kbase_csf_mcu_shared_regions {
  *                          workqueue items (kernel-provided delayed_work
  *                          items do not use hrtimer and for some reason do
  *                          not provide sufficiently reliable periodicity).
- * @pending_tick_work:      Indicates that kbase_csf_scheduler_kthread() should perform
- *                          a scheduling tick.
- * @pending_tock_work:      Indicates that kbase_csf_scheduler_kthread() should perform
- *                          a scheduling tock.
+ * @pending_sync_update_works:  Indicates that kbase_csf_scheduler_kthread()
+ *                              should handle SYNC_UPDATE events.
+ * @sync_update_work_ctxs_lock: Lock protecting the list of contexts that
+ *                              require handling SYNC_UPDATE events.
+ * @sync_update_work_ctxs:      The list of contexts that require handling
+ *                              SYNC_UPDATE events.
+ * @pending_protm_event_works:  Indicates that kbase_csf_scheduler_kthread()
+ *                              should handle PROTM requests.
+ * @protm_event_work_grps_lock: Lock protecting the list of groups that
+ *                              have requested protected mode.
+ * @protm_event_work_grps:      The list of groups that have requested
+ *                              protected mode.
+ * @pending_kcpuq_works:    Indicates that kbase_csf_scheduler_kthread()
+ *                          should process pending KCPU queue works.
+ * @kcpuq_work_queues_lock: Lock protecting the list of KCPU queues that
+ *                          need to be processed.
+ * @kcpuq_work_queues:      The list of KCPU queue that need to be processed
+ * @pending_tick_work:      Indicates that kbase_csf_scheduler_kthread() should
+ *                          perform a scheduling tick.
+ * @pending_tock_work:      Indicates that kbase_csf_scheduler_kthread() should
+ *                          perform a scheduling tock.
+ * @pending_gpu_idle_work:  Indicates that kbase_csf_scheduler_kthread() should
+ *                          handle the GPU IDLE event.
  * @ping_work:              Work item that would ping the firmware at regular
  *                          intervals, only if there is a single active CSG
  *                          slot, to check if firmware is alive and would
@@ -1063,10 +1092,6 @@ struct kbase_csf_mcu_shared_regions {
  *                          This pointer being set doesn't necessarily indicates
  *                          that GPU is in protected mode, kbdev->protected_mode
  *                          needs to be checked for that.
- * @idle_wq:                Workqueue for executing GPU idle notification
- *                          handler.
- * @gpu_idle_work:          Work item for facilitating the scheduler to bring
- *                          the GPU to a low-power mode on becoming idle.
  * @fast_gpu_idle_handling: Indicates whether to relax many of the checks
  *                          normally done in the GPU idle worker. This is
  *                          set to true when handling the GLB IDLE IRQ if the
@@ -1109,8 +1134,11 @@ struct kbase_csf_mcu_shared_regions {
  *                          thread when a queue needs attention.
  * @kthread_running:        Whether the GPU queue submission thread should keep
  *                          executing.
- * @gpuq_kthread:           High-priority thread used to handle GPU queue
+ * @gpuq_kthread:           Dedicated thread primarily used to handle
+ *                          latency-sensitive tasks such as GPU queue
  *                          submissions.
+ * @gpu_idle_timer_enabled: Tracks whether the GPU idle timer is enabled or disabled.
+ * @fw_soi_enabled:         True if FW Sleep-on-Idle is currently enabled.
  */
 struct kbase_csf_scheduler {
 	struct mutex lock;
@@ -1134,14 +1162,22 @@ struct kbase_csf_scheduler {
 	unsigned long last_schedule;
 	atomic_t timer_enabled;
 	struct hrtimer tick_timer;
+	atomic_t pending_sync_update_works;
+	spinlock_t sync_update_work_ctxs_lock;
+	struct list_head sync_update_work_ctxs;
+	atomic_t pending_protm_event_works;
+	spinlock_t protm_event_work_grps_lock;
+	struct list_head protm_event_work_grps;
+	atomic_t pending_kcpuq_works;
+	spinlock_t kcpuq_work_queues_lock;
+	struct list_head kcpuq_work_queues;
 	atomic_t pending_tick_work;
 	atomic_t pending_tock_work;
+	atomic_t pending_gpu_idle_work;
 	struct delayed_work ping_work;
 	struct kbase_context *top_kctx;
 	struct kbase_queue_group *top_grp;
 	struct kbase_queue_group *active_protm_grp;
-	struct workqueue_struct *idle_wq;
-	struct work_struct gpu_idle_work;
 	bool fast_gpu_idle_handling;
 	atomic_t gpu_no_longer_idle;
 	atomic_t non_idle_offslot_grps;
@@ -1180,6 +1216,8 @@ struct kbase_csf_scheduler {
 	 */
 	spinlock_t gpu_metrics_lock;
 #endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
+	atomic_t gpu_idle_timer_enabled;
+	atomic_t fw_soi_enabled;
 };
 
 /*
@@ -1643,6 +1681,7 @@ struct kbase_csf_user_reg {
  * @gpu_idle_dur_count_no_modifier: Update csffw_glb_req_idle_enable to make the shr(10)
  *                                  modifier conditional on the new flag
  *                                  in GLB_IDLE_TIMER_CONFIG.
+ * @csg_suspend_timeout_ms: Timeout given for a CSG to be suspended.
  *                          for any request sent to the firmware.
  * @hwcnt:                  Contain members required for handling the dump of
  *                          HW counters.
@@ -1653,12 +1692,29 @@ struct kbase_csf_user_reg {
  * @dof:                    Structure for dump on fault.
  * @user_reg:               Collective information to support the mapping to
  *                          USER Register page for user processes.
- * @pending_gpuq_kicks:     Lists of GPU queue that have been kicked but not
- *                          yet processed, categorised by queue group's priority.
- * @pending_gpuq_kicks_lock: Protect @pending_gpu_kicks and
- *                           kbase_queue.pending_kick_link.
+ * @pending_gpuq_kicks:            Indicates that kbase_csf_scheduler_kthread()
+ *                                 should handle GPU queue kicks.
+ * @pending_gpuq_kick_queues:      Lists of GPU queued that have been kicked but not
+ *                                 yet processed, categorised by queue group's priority.
+ * @pending_gpuq_kick_queues_lock: Protect @pending_gpuq_kick_queues and
+ *                                 kbase_queue.pending_kick_link.
  * @quirks_ext:             Pointer to an allocated buffer containing the firmware
  *                          workarounds configuration.
+ * @mmu_sync_sem:           RW Semaphore to defer MMU operations till the P.Mode entrance
+ *                          or DCS request has been completed.
+ * @pmode_sync_sem:         RW Semaphore to prevent MMU operations during P.Mode entrance.
+ * @page_fault_cnt_ptr_address: GPU VA of the location in FW data memory, extracted from the
+ *                              FW image header, that will store the GPU VA of FW visible
+ *                              memory location where the @page_fault_cnt value will be written to.
+ * @page_fault_cnt_ptr:         CPU VA of the FW visible memory location where the @page_fault_cnt
+ *                              value will be written to.
+ * @page_fault_cnt:             Counter that is incremented on every GPU page fault, just before the
+ *                              MMU is unblocked to retry the memory transaction that caused the GPU
+ *                              page fault. The access to counter is serialized appropriately.
+ * @mcu_halted:             Flag to inform MCU FSM that the MCU has already halted.
+ * @fw_io:                  Firmware I/O interface.
+ * @compute_progress_timeout_cc: Value of GPU cycle count register when progress
+ *                               timer timeout is reported for the compute iterator.
  */
 struct kbase_csf_device {
 	struct kbase_mmu_table mcu_mmu;
@@ -1696,6 +1752,7 @@ struct kbase_csf_device {
 	u64 gpu_idle_hysteresis_ns;
 	u32 gpu_idle_dur_count;
 	u32 gpu_idle_dur_count_no_modifier;
+	u32 csg_suspend_timeout_ms;
 	struct kbase_csf_hwcnt hwcnt;
 	struct kbase_csf_mcu_fw fw;
 	struct kbase_csf_firmware_log fw_log;
@@ -1710,9 +1767,18 @@ struct kbase_csf_device {
 	struct kbase_debug_coresight_device coresight;
 #endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
 	struct kbase_csf_user_reg user_reg;
-	struct list_head pending_gpuq_kicks[KBASE_QUEUE_GROUP_PRIORITY_COUNT];
-	spinlock_t pending_gpuq_kicks_lock;
+	atomic_t pending_gpuq_kicks;
+	struct list_head pending_gpuq_kick_queues[KBASE_QUEUE_GROUP_PRIORITY_COUNT];
+	spinlock_t pending_gpuq_kick_queues_lock;
 	u32 *quirks_ext;
+	struct rw_semaphore mmu_sync_sem;
+	struct rw_semaphore pmode_sync_sem;
+	u32 page_fault_cnt_ptr_address;
+	u32 *page_fault_cnt_ptr;
+	u32 page_fault_cnt;
+	bool mcu_halted;
+	struct kbase_csf_fw_io fw_io;
+	u64 compute_progress_timeout_cc;
 };
 
 /**
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c
index 952a9b9cdd94..2d8f96641181 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -40,6 +40,7 @@
 #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
 #include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
 #include <csf/mali_kbase_csf_registers.h>
+#include <csf/mali_kbase_csf_fw_io.h>
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/firmware.h>
@@ -55,6 +56,7 @@
 #include <linux/delay.h>
 #include <linux/version_compat_defs.h>
 
+#include <mali_kbase_config_defaults.h>
 #define MALI_MAX_DEFAULT_FIRMWARE_NAME_LEN ((size_t)64)
 
 #define DEFAULT_FW_NAME MALI_RELEASE_NAME".mali_csffw.bin"
@@ -68,6 +70,7 @@ static unsigned int csf_firmware_boot_timeout_ms;
 module_param(csf_firmware_boot_timeout_ms, uint, 0444);
 MODULE_PARM_DESC(csf_firmware_boot_timeout_ms, "Maximum time to wait for firmware to boot.");
 
+static bool kbase_iter_trace_enable;
 
 #ifdef CONFIG_MALI_BIFROST_DEBUG
 /* Makes Driver wait indefinitely for an acknowledgment for the different
@@ -97,6 +100,7 @@ MODULE_PARM_DESC(fw_debug, "Enables effective use of a debugger for debugging fi
 #define CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA (4)
 #define CSF_FIRMWARE_ENTRY_TYPE_BUILD_INFO_METADATA (6)
 #define CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST (7)
+#define CSF_FIRMWARE_ENTRY_TYPE_PAGE_FAULT_CNT (8)
 #define CSF_FIRMWARE_ENTRY_TYPE_CORE_DUMP (9)
 
 #define CSF_FIRMWARE_CACHE_MODE_NONE (0ul << 3)
@@ -115,7 +119,8 @@ MODULE_PARM_DESC(fw_debug, "Enables effective use of a debugger for debugging fi
 
 #define CSF_GLB_REQ_CFG_MASK                                           \
 	(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \
-	 GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK)
+	 GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK |    \
+	 GLB_REQ_CFG_EVICTION_TIMER_MASK | GLB_REQ_ITER_TRACE_ENABLE_MASK)
 
 static inline u32 input_page_read(const u32 *const input, const u32 offset)
 {
@@ -179,6 +184,92 @@ struct firmware_timeline_metadata {
 	size_t size;
 };
 
+static void reinit_page_fault_cnt_firmware_memory(struct kbase_device *kbdev)
+{
+	if (!kbdev->csf.page_fault_cnt_ptr)
+		return;
+
+	/* Store the GPU address of shared memory location, where the page fault counter
+	 * value will be written, inside the FW data memory.
+	 */
+	kbase_csf_update_firmware_memory(
+		kbdev, kbdev->csf.page_fault_cnt_ptr_address,
+		(u32)((kbdev->csf.firmware_trace_buffers.mcu_rw.va_reg->start_pfn << PAGE_SHIFT) +
+		      PAGE_SIZE - sizeof(u32)));
+
+	*kbdev->csf.page_fault_cnt_ptr = kbdev->csf.page_fault_cnt = 0;
+}
+
+static void init_page_fault_cnt_firmware_memory(struct kbase_device *kbdev)
+{
+	if (!kbdev->csf.page_fault_cnt_ptr_address)
+		return;
+
+	if (WARN_ON_ONCE(!kbdev->csf.firmware_trace_buffers.mcu_rw.va_reg))
+		return;
+
+	/* Save the CPU address of shared memory location where the page fault counter
+	 * value will be written.
+	 * The shared memory location comes from the last 4 bytes of the page that
+	 * is allocated to maintain the extract offset value for different trace
+	 * buffers. Only the first 4 bytes of every cacheline is used for the extract offset
+	 * value.
+	 */
+	kbdev->csf.page_fault_cnt_ptr =
+		(u32 *)((u8 *)kbdev->csf.firmware_trace_buffers.mcu_rw.cpu_addr + PAGE_SIZE -
+			sizeof(u32));
+	reinit_page_fault_cnt_firmware_memory(kbdev);
+}
+
+/**
+ * set_iterator_trace_enable - Set the value for 'kbase_iter_trace_enable' global variable
+ *                             according to the value of GLB_FEATURES.ITER_TRACE_SUPPORTED bit,
+ *                             and the corresponding device tree entry.
+ * @kbdev: Kernel base device pointer
+ */
+static void set_iterator_trace_enable(struct kbase_device *kbdev)
+{
+	const struct kbase_csf_global_iface *iface = &kbdev->csf.global_iface;
+	bool dev_support_iter_trace = iface->features & GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK;
+	const void *dt_iter_trace_param;
+	unsigned int val;
+
+	if (!dev_support_iter_trace) {
+		kbase_iter_trace_enable = false;
+		return;
+	}
+
+
+	/* check device tree for iterator trace enable property and
+	 * fallback to "iter_trace_enable" if not found and try again
+	 */
+	dt_iter_trace_param = of_get_property(kbdev->dev->of_node, "iter-trace-enable", NULL);
+
+	if (!dt_iter_trace_param)
+		dt_iter_trace_param =
+			of_get_property(kbdev->dev->of_node, "iter_trace_enable", NULL);
+
+	val = (dt_iter_trace_param) ? be32_to_cpup(dt_iter_trace_param) : 0;
+	dev_dbg(kbdev->dev, "Iterator trace enable device-tree config value: %u", val);
+
+	kbase_iter_trace_enable = val ? true : false;
+}
+
+static void iterator_trace_reinit(struct kbase_device *kbdev)
+{
+	if (kbase_iter_trace_enable) {
+		kbase_csf_firmware_global_input_mask(&kbdev->csf.global_iface, GLB_REQ,
+						     GLB_REQ_ITER_TRACE_ENABLE_MASK,
+						     GLB_REQ_ITER_TRACE_ENABLE_MASK);
+	}
+}
+
+static void iterator_trace_init(struct kbase_device *kbdev)
+{
+	set_iterator_trace_enable(kbdev);
+	iterator_trace_reinit(kbdev);
+}
+
 /* The shared interface area, used for communicating with firmware, is managed
  * like a virtual memory zone. Reserve the virtual space from that zone
  * corresponding to shared interface entry parsed from the firmware image.
@@ -217,7 +308,7 @@ void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev)
 	kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(MCU_CONTROL), MCU_CONTROL_REQ_DISABLE);
 }
 
-static void wait_for_firmware_stop(struct kbase_device *kbdev)
+void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev)
 {
 	u32 val;
 	const u32 timeout_us =
@@ -232,17 +323,12 @@ static void wait_for_firmware_stop(struct kbase_device *kbdev)
 	KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_OFF(kbdev, kbase_backend_get_cycle_cnt(kbdev));
 }
 
-void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev)
-{
-	wait_for_firmware_stop(kbdev);
-}
-
-static void stop_csf_firmware(struct kbase_device *kbdev)
+void kbase_csf_stop_firmware_and_wait(struct kbase_device *kbdev)
 {
 	/* Stop the MCU firmware */
 	kbase_csf_firmware_disable_mcu(kbdev);
 
-	wait_for_firmware_stop(kbdev);
+	kbase_csf_firmware_disable_mcu_wait(kbdev);
 }
 
 static void wait_for_firmware_boot(struct kbase_device *kbdev)
@@ -261,7 +347,6 @@ static void wait_for_firmware_boot(struct kbase_device *kbdev)
 	 */
 	remaining = wait_event_timeout(kbdev->csf.event_wait, kbdev->csf.interrupt_received == true,
 				       wait_timeout);
-
 	if (!remaining)
 		dev_err(kbdev->dev, "Timed out waiting for fw boot completion");
 
@@ -485,6 +570,8 @@ static int reload_fw_image(struct kbase_device *kbdev)
 	kbdev->csf.firmware_full_reload_needed = false;
 
 	kbase_csf_firmware_reload_trace_buffers_data(kbdev);
+	reinit_page_fault_cnt_firmware_memory(kbdev);
+	iterator_trace_reinit(kbdev);
 out:
 	return ret;
 }
@@ -1043,6 +1130,14 @@ static int load_firmware_entry(struct kbase_device *kbdev, const struct kbase_cs
 		}
 		kbase_csf_firmware_log_parse_logging_call_list_entry(kbdev, entry);
 		return 0;
+	case CSF_FIRMWARE_ENTRY_TYPE_PAGE_FAULT_CNT:
+		/* Entry about the location of page fault counter */
+		if (size < sizeof(*entry)) {
+			dev_err(kbdev->dev, "Page fault counter entry too short (size=%u)", size);
+			return -EINVAL;
+		}
+		kbdev->csf.page_fault_cnt_ptr_address = *entry;
+		return 0;
 	case CSF_FIRMWARE_ENTRY_TYPE_CORE_DUMP:
 		/* Core Dump section */
 		if (size < CORE_DUMP_ENTRY_START_ADDR_OFFSET + sizeof(*entry)) {
@@ -1552,7 +1647,6 @@ static bool global_request_complete(struct kbase_device *const kbdev, u32 const
 	unsigned long flags;
 
 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
-
 	if ((kbase_csf_firmware_global_output(global_iface, GLB_ACK) & req_mask) ==
 	    (kbase_csf_firmware_global_input_read(global_iface, GLB_REQ) & req_mask))
 		complete = true;
@@ -1644,9 +1738,27 @@ static void set_timeout_global(const struct kbase_csf_global_iface *const global
 	set_global_request(global_iface, GLB_REQ_CFG_PROGRESS_TIMER_MASK);
 }
 
+static inline void set_gpu_idle_timer_glb_req(struct kbase_device *const kbdev, bool set)
+{
+	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+
+	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
+	if (set) {
+		kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE,
+						     GLB_REQ_IDLE_ENABLE_MASK);
+	} else {
+		kbase_csf_firmware_global_input_mask(
+			global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_DISABLE, GLB_REQ_IDLE_DISABLE_MASK);
+	}
+
+	atomic_set(&kbdev->csf.scheduler.gpu_idle_timer_enabled, set);
+}
+
 static void enable_gpu_idle_timer(struct kbase_device *const kbdev)
 {
 	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+	bool const fw_soi_allowed = kbase_pm_fw_sleep_on_idle_allowed(kbdev);
 
 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
 
@@ -1654,15 +1766,114 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev)
 					kbdev->csf.gpu_idle_dur_count);
 
 	kbase_csf_firmware_global_input_mask(global_iface, GLB_IDLE_TIMER_CONFIG,
-					     kbdev->csf.gpu_idle_dur_count_no_modifier,
+					     kbdev->csf.gpu_idle_dur_count_no_modifier
+						     << GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_SHIFT,
 					     GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_MASK);
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_IDLE_TIMER_CONFIG,
+					     fw_soi_allowed
+						     << GLB_IDLE_TIMER_CONFIG_SLEEP_ON_IDLE_SHIFT,
+					     GLB_IDLE_TIMER_CONFIG_SLEEP_ON_IDLE_MASK);
 
-	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE,
-					     GLB_REQ_IDLE_ENABLE_MASK);
+	set_gpu_idle_timer_glb_req(kbdev, true);
+	atomic_set(&kbdev->csf.scheduler.fw_soi_enabled, fw_soi_allowed);
 	dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x",
 		kbdev->csf.gpu_idle_dur_count);
 }
 
+/**
+ * convert_dur_to_suspend_count() - Convert CSG suspend timeout from ms to cycle count
+ * @kbdev:        Instance of a GPU platform device that implements a CSF interface
+ * @dur_ms:       Timeout value in ms
+ * @no_modifier:  Indicate whether bit-shift is applied, 0 when applied, 1 otherwise
+ *
+ * Convert CSG suspend timeout from ms to cycle count, then generate a register value
+ * combining cycle count and timer source
+ *
+ * Return:  Register value which will be stored into register GLB_EVICTION_TIMER.
+ */
+static u32 convert_dur_to_suspend_count(struct kbase_device *kbdev, const u64 dur_ms,
+					u32 *no_modifier)
+{
+	/* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */
+	u64 freq = kbase_arch_timer_get_cntfrq(kbdev);
+	u64 dur_val = dur_ms;
+	u32 cnt_val_u32, reg_val_u32;
+	const bool src_system_timestamp = freq > 0;
+	const u8 SUSPEND_VAL_UNIT_SHIFT = 10;
+
+	if (!src_system_timestamp) {
+		/* Get the cycle_counter source alternative */
+		spin_lock(&kbdev->pm.clk_rtm.lock);
+		if (kbdev->pm.clk_rtm.clks[0])
+			freq = kbdev->pm.clk_rtm.clks[0]->clock_val;
+		else
+			dev_err(kbdev->dev, "No GPU clock, unexpected intregration issue!");
+		spin_unlock(&kbdev->pm.clk_rtm.lock);
+
+		dev_info(kbdev->dev,
+			 "No timestamp frequency, use cycle counter for csg suspend timeout!");
+	}
+
+	/* Formula for dur_val = (dur/1e3) * freq_HZ) */
+	dur_val = dur_val * freq;
+	dur_val = div_u64(dur_val, MSEC_PER_SEC);
+	if (dur_val < S32_MAX) {
+		*no_modifier = 1;
+	} else {
+		dur_val = dur_val >> SUSPEND_VAL_UNIT_SHIFT;
+		*no_modifier = 0;
+	}
+
+	/* Interface limits the value field to S32_MAX */
+	cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val;
+
+	reg_val_u32 = GLB_EVICTION_TIMER_TIMEOUT_SET(0, cnt_val_u32);
+	/* add the source flag */
+	reg_val_u32 = GLB_EVICTION_TIMER_TIMER_SOURCE_SET(
+		reg_val_u32,
+		(src_system_timestamp ? GLB_EVICTION_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP :
+					      GLB_EVICTION_TIMER_TIMER_SOURCE_GPU_COUNTER));
+
+	return reg_val_u32;
+}
+
+/**
+ * set_csg_suspend_timeout() - Update CSG suspend timeout setting on FW side
+ *
+ * @kbdev:        Instance of a GPU platform device that implements a CSF interface
+ */
+static void set_csg_suspend_timeout(struct kbase_device *const kbdev)
+{
+	u32 dur_ms, dur_val;
+	u32 no_modifier = 0;
+	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+
+	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
+	dur_ms = kbdev->csf.csg_suspend_timeout_ms;
+	if (unlikely(dur_ms < CSG_SUSPEND_TIMEOUT_FIRMWARE_MS_MIN +
+				      CSG_SUSPEND_TIMEOUT_HOST_ADDED_MS ||
+		     dur_ms > CSG_SUSPEND_TIMEOUT_FIRMWARE_MS_MAX +
+				      CSG_SUSPEND_TIMEOUT_HOST_ADDED_MS)) {
+		dev_err(kbdev->dev, "Unexpected CSG suspend timeout: %ums, default to: %ums",
+			dur_ms, CSG_SUSPEND_TIMEOUT_MS);
+		kbdev->csf.csg_suspend_timeout_ms = CSG_SUSPEND_TIMEOUT_MS;
+		dur_ms = CSG_SUSPEND_TIMEOUT_MS;
+	}
+	dur_ms = dur_ms - CSG_SUSPEND_TIMEOUT_HOST_ADDED_MS;
+
+	dur_val = convert_dur_to_suspend_count(kbdev, dur_ms, &no_modifier);
+
+	kbase_csf_firmware_global_input(global_iface, GLB_EVICTION_TIMER, dur_val);
+
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_EVICTION_TIMER_CONFIG, no_modifier,
+					     GLB_EVICTION_TIMER_CONFIG_NO_MODIFIER_MASK);
+
+	set_global_request(global_iface, GLB_REQ_CFG_EVICTION_TIMER_MASK);
+
+	dev_dbg(kbdev->dev, "Updating CSG suspend timeout with count-value: 0x%.8x", dur_val);
+}
+
 static bool global_debug_request_complete(struct kbase_device *const kbdev, u32 const req_mask)
 {
 	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
@@ -1751,7 +1962,8 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask)
 		GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
 		GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
 		GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK |
-		GLB_REQ_DEBUG_CSF_REQ_MASK | GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK;
+		GLB_REQ_DEBUG_CSF_REQ_MASK | GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK |
+		GLB_ACK_IRQ_MASK_CFG_EVICTION_TIMER_MASK | GLB_ACK_IRQ_MASK_ITER_TRACE_ENABLE_MASK;
 
 	const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface;
 	unsigned long flags;
@@ -1766,11 +1978,10 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask)
 
 	set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev));
 
-	/* The GPU idle timer is always enabled for simplicity. Checks will be
-	 * done before scheduling the GPU idle worker to see if it is
-	 * appropriate for the current power policy.
+	/* The csg suspend timeout is always enabled so customer has the flexibility to update it
+	 * at any time.
 	 */
-	enable_gpu_idle_timer(kbdev);
+	set_csg_suspend_timeout(kbdev);
 
 	/* Unmask the interrupts */
 	kbase_csf_firmware_global_input(global_iface, GLB_ACK_IRQ_MASK, ack_irq_mask);
@@ -1890,6 +2101,7 @@ static void kbase_csf_firmware_reload_worker(struct work_struct *work)
 {
 	struct kbase_device *kbdev =
 		container_of(work, struct kbase_device, csf.firmware_reload_work);
+	unsigned long flags;
 	int err;
 
 	dev_info(kbdev->dev, "reloading firmware");
@@ -1908,7 +2120,9 @@ static void kbase_csf_firmware_reload_worker(struct work_struct *work)
 		return;
 
 	/* Reboot the firmware */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	kbase_csf_firmware_enable_mcu(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
 
 void kbase_csf_firmware_trigger_reload(struct kbase_device *kbdev)
@@ -1945,6 +2159,7 @@ void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev)
 
 	KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_REBOOT, NULL, 0u);
 
+
 	/* Tell MCU state machine to transit to next state */
 	kbdev->csf.firmware_reloaded = true;
 	kbase_pm_update_state(kbdev);
@@ -2045,29 +2260,44 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
 		return kbdev->csf.gpu_idle_dur_count;
 	}
 
-	/* The 'reg_lock' is also taken and is held till the update is not
+	/* The scheduler lock is also taken and is held till the update is not
 	 * complete, to ensure the update of idle timer value by multiple Users
 	 * gets serialized.
 	 */
-	mutex_lock(&kbdev->csf.reg_lock);
-	/* The firmware only reads the new idle timer value when the timer is
-	 * disabled.
-	 */
+	kbase_csf_scheduler_lock(kbdev);
+	while (atomic_read(&kbdev->csf.scheduler.pending_gpu_idle_work) > 0) {
+		kbase_csf_scheduler_unlock(kbdev);
+		kbase_csf_scheduler_wait_for_kthread_pending_work(
+			kbdev, &kbdev->csf.scheduler.pending_gpu_idle_work);
+		kbase_csf_scheduler_lock(kbdev);
+	}
 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
-	kbase_csf_firmware_disable_gpu_idle_timer(kbdev);
-	kbase_csf_scheduler_spin_unlock(kbdev, flags);
-	/* Ensure that the request has taken effect */
-	wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK);
 
-	kbase_csf_scheduler_spin_lock(kbdev, &flags);
 	kbdev->csf.gpu_idle_hysteresis_ns = dur_ns;
 	kbdev->csf.gpu_idle_dur_count = hysteresis_val;
 	kbdev->csf.gpu_idle_dur_count_no_modifier = no_modifier;
-	kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
-	kbase_csf_scheduler_spin_unlock(kbdev, flags);
-	wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK);
-	mutex_unlock(&kbdev->csf.reg_lock);
 
+	if (atomic_read(&kbdev->csf.scheduler.gpu_idle_timer_enabled)) {
+		/* Timer is already enabled. Disable the timer as FW only reads
+		 * the new idle timer value when timer is re-enabled.
+		 */
+		kbase_csf_firmware_disable_gpu_idle_timer(kbdev);
+		kbase_csf_scheduler_spin_unlock(kbdev, flags);
+		/* Ensure that the request has taken effect */
+		if (wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK))
+			dev_err(kbdev->dev,
+				"Failed to disable GLB_IDLE timer when setting a new idle hysteresis timeout");
+		kbase_csf_scheduler_spin_lock(kbdev, &flags);
+		kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
+		kbase_csf_scheduler_spin_unlock(kbdev, flags);
+		if (wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK))
+			dev_err(kbdev->dev,
+				"Failed to re-enable GLB_IDLE timer when setting a new idle hysteresis timeout");
+	} else {
+		kbase_csf_scheduler_spin_unlock(kbdev, flags);
+	}
+
+	kbase_csf_scheduler_unlock(kbdev);
 	kbase_csf_scheduler_pm_idle(kbdev);
 	kbase_reset_gpu_allow(kbdev);
 end:
@@ -2168,78 +2398,6 @@ u32 kbase_csf_firmware_reset_mcu_core_pwroff_time(struct kbase_device *kbdev)
 	return kbase_csf_firmware_set_mcu_core_pwroff_time(kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_NS);
 }
 
-/**
- * kbase_csf_get_iterator_trace_enable - Parsing the iterator_trace enable firstly from
- *                                       the module parameter, and then from device-tree.
- * @kbdev: Kernel base device pointer
- *
- * Return: true on enabled, otherwise false.
- */
-static bool kbase_csf_get_iterator_trace_enable(struct kbase_device *kbdev)
-{
-	const void *dt_iter_trace_param;
-	unsigned int val;
-
-
-	/* check device tree for iterator trace enable property and
-	 * fallback to "iter_trace_enable" if not found and try again
-	 */
-	dt_iter_trace_param = of_get_property(kbdev->dev->of_node, "iter-trace-enable", NULL);
-
-	if (!dt_iter_trace_param)
-		dt_iter_trace_param =
-			of_get_property(kbdev->dev->of_node, "iter_trace_enable", NULL);
-
-	val = (dt_iter_trace_param) ? be32_to_cpup(dt_iter_trace_param) : 0;
-	dev_dbg(kbdev->dev, "Iterator trace enable device-tree config value: %u", val);
-
-	return (val != 0);
-}
-
-/**
- * kbase_device_csf_iterator_trace_init - Send request to enable iterator
- *                                        trace port.
- * @kbdev: Kernel base device pointer
- *
- * Return: 0 on success (or if enable request is not sent), or error
- *         code -EINVAL on failure of GPU to acknowledge enable request.
- */
-static int kbase_device_csf_iterator_trace_init(struct kbase_device *kbdev)
-{
-	/* Enable the iterator trace port if supported by the GPU and is
-	 * configured to do so. The FW must advertise this feature in GLB_FEATURES.
-	 */
-	if (kbdev->pm.backend.gpu_powered) {
-		const struct kbase_csf_global_iface *iface = &kbdev->csf.global_iface;
-		bool dev_support_iter_trace = iface->features &
-					      GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK;
-
-		dev_dbg(kbdev->dev, "Device supporting iterator trace: %s\n",
-			dev_support_iter_trace ? "true" : "false");
-		if (dev_support_iter_trace && kbase_csf_get_iterator_trace_enable(kbdev)) {
-			long ack_timeout = kbase_csf_timeout_in_jiffies(
-				kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT));
-
-			/* write enable request to global input */
-			kbase_csf_firmware_global_input_mask(iface, GLB_REQ,
-							     GLB_REQ_ITER_TRACE_ENABLE_MASK,
-							     GLB_REQ_ITER_TRACE_ENABLE_MASK);
-			/* Ring global doorbell */
-			kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
-
-			ack_timeout = wait_event_timeout(
-				kbdev->csf.event_wait,
-				!((kbase_csf_firmware_global_input_read(iface, GLB_REQ) ^
-				   kbase_csf_firmware_global_output(iface, GLB_ACK)) &
-				  GLB_REQ_ITER_TRACE_ENABLE_MASK),
-				ack_timeout);
-
-			return ack_timeout ? 0 : -EINVAL;
-		}
-	}
-	return 0;
-}
-
 int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
 {
 	init_waitqueue_head(&kbdev->csf.event_wait);
@@ -2253,10 +2411,9 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
 	INIT_WORK(&kbdev->csf.firmware_reload_work, kbase_csf_firmware_reload_worker);
 	INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker);
 
-	kbdev->csf.glb_init_request_pending = true;
-
+	init_rwsem(&kbdev->csf.mmu_sync_sem);
 	mutex_init(&kbdev->csf.reg_lock);
-	kbase_csf_pending_gpuq_kicks_init(kbdev);
+	kbase_csf_pending_gpuq_kick_queues_init(kbdev);
 
 	kbdev->csf.fw = (struct kbase_csf_mcu_fw){ .data = NULL };
 
@@ -2265,7 +2422,7 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
 
 void kbase_csf_firmware_early_term(struct kbase_device *kbdev)
 {
-	kbase_csf_pending_gpuq_kicks_term(kbdev);
+	kbase_csf_pending_gpuq_kick_queues_term(kbdev);
 	mutex_destroy(&kbdev->csf.reg_lock);
 }
 
@@ -2284,6 +2441,8 @@ int kbase_csf_firmware_late_init(struct kbase_device *kbdev)
 		convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_ns, &no_modifier);
 	kbdev->csf.gpu_idle_dur_count_no_modifier = no_modifier;
 
+	kbdev->csf.csg_suspend_timeout_ms = CSG_SUSPEND_TIMEOUT_MS;
+
 	return 0;
 }
 
@@ -2472,6 +2631,8 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
 		goto err_out;
 	}
 
+	init_page_fault_cnt_firmware_memory(kbdev);
+
 	ret = kbase_csf_firmware_cfg_fw_wa_init(kbdev);
 	if (ret != 0) {
 		dev_err(kbdev->dev, "Failed to initialize firmware workarounds");
@@ -2492,6 +2653,8 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
 	if (ret != 0)
 		goto err_out;
 
+	iterator_trace_init(kbdev);
+
 	ret = kbase_csf_doorbell_mapping_init(kbdev);
 	if (ret != 0)
 		goto err_out;
@@ -2522,10 +2685,6 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
 	if (ret != 0)
 		goto err_out;
 
-	ret = kbase_device_csf_iterator_trace_init(kbdev);
-	if (ret != 0)
-		goto err_out;
-
 	if (kbdev->csf.fw_core_dump.available)
 		kbase_csf_firmware_core_dump_init(kbdev);
 
@@ -2575,7 +2734,7 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev)
 	kbdev->csf.firmware_inited = false;
 	if (WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_OFF)) {
 		kbdev->pm.backend.mcu_state = KBASE_MCU_OFF;
-		stop_csf_firmware(kbdev);
+		kbase_csf_stop_firmware_and_wait(kbdev);
 	}
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
@@ -2731,7 +2890,7 @@ int kbase_csf_firmware_mcu_register_poll(struct kbase_device *const kbdev, u32 c
 	unsigned long remaining =
 		kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT)) +
 		jiffies;
-	u32 read_val;
+	u32 read_val = 0;
 
 	dev_dbg(kbdev->dev, "p: reg %08x val %08x mask %08x", reg_addr, reg_val, val_mask);
 
@@ -2778,12 +2937,10 @@ void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev)
 
 void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev)
 {
-	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
-
 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
 
-	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_DISABLE,
-					     GLB_REQ_IDLE_DISABLE_MASK);
+	set_gpu_idle_timer_glb_req(kbdev, false);
+	atomic_set(&kbdev->csf.scheduler.fw_soi_enabled, false);
 	dev_dbg(kbdev->dev, "Sending request to disable gpu idle timer");
 
 	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
@@ -2807,6 +2964,7 @@ int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev, unsigned int
 	return wait_for_global_request_with_timeout(kbdev, GLB_REQ_PING_MASK, wait_timeout_ms);
 }
 
+
 int kbase_csf_firmware_set_timeout(struct kbase_device *const kbdev, u64 const timeout)
 {
 	const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface;
@@ -2845,8 +3003,6 @@ int kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev)
 {
 	int err;
 
-	lockdep_assert_held(&kbdev->mmu_hw_mutex);
-
 	err = wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK);
 
 	if (!err) {
@@ -2912,6 +3068,7 @@ void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev)
 {
 	struct kbase_csf_global_iface *iface = &kbdev->csf.global_iface;
 
+	lockdep_assert_held(&kbdev->hwaccess_lock);
 		/* Clear the HALT bit before triggering the boot of MCU firmware */
 		kbase_csf_firmware_global_input_mask(iface, GLB_REQ, 0, GLB_REQ_HALT_MASK);
 
@@ -2935,11 +3092,23 @@ void kbase_csf_firmware_trigger_mcu_sleep(struct kbase_device *kbdev)
 
 bool kbase_csf_firmware_is_mcu_in_sleep(struct kbase_device *kbdev)
 {
+	bool db_notif_disabled;
+
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
 
-	return (global_request_complete(kbdev, GLB_REQ_SLEEP_MASK) &&
-		kbase_csf_firmware_mcu_halted(kbdev));
+	db_notif_disabled = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(MCU_CONTROL)) &
+			    MCU_CNTRL_DOORBELL_DISABLE_MASK;
+
+	if (!db_notif_disabled || !kbase_csf_firmware_mcu_halted(kbdev))
+		return false;
+
+	if (global_request_complete(kbdev, GLB_REQ_SLEEP_MASK))
+		return true;
+
+	kbase_pm_enable_mcu_db_notification(kbdev);
+	dev_dbg(kbdev->dev, "Enabled DB notification");
+	return false;
 }
 #endif
 
@@ -3191,6 +3360,9 @@ void kbase_csf_firmware_mcu_shared_mapping_term(struct kbase_device *kbdev,
 	}
 
 	if (csf_mapping->phys) {
+		/* This is on module unload path, so the pages can be left uncleared before
+		 * returning them back to kbdev memory pool.
+		 */
 		kbase_mem_pool_free_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
 					  csf_mapping->num_pages, csf_mapping->phys, false, false);
 	}
@@ -3198,3 +3370,127 @@ void kbase_csf_firmware_mcu_shared_mapping_term(struct kbase_device *kbdev,
 	vunmap(csf_mapping->cpu_addr);
 	kfree(csf_mapping->phys);
 }
+
+#ifdef KBASE_PM_RUNTIME
+
+void kbase_csf_firmware_soi_update(struct kbase_device *kbdev)
+{
+	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+	unsigned long flags;
+
+	/* There are 3 possibilities:
+	 * - Sleep-on-Idle allowed
+	 * - Sleep-on-Idle not allowed, GLB_IDLE timer disabled
+	 * - Sleep-on-Idle not allowed, GLB_IDLE timer enabled
+	 */
+	if (kbase_pm_fw_sleep_on_idle_allowed(kbdev)) {
+		if (likely(atomic_read(&kbdev->csf.scheduler.fw_soi_enabled)))
+			return;
+	} else {
+		if (test_bit(KBASE_GPU_NON_IDLE_OFF_SLOT_GROUPS_AVAILABLE,
+			     &kbdev->pm.backend.gpu_sleep_allowed)) {
+			if (likely(!atomic_read(&kbdev->csf.scheduler.gpu_idle_timer_enabled)))
+				return;
+		} else if (likely(atomic_read(&kbdev->csf.scheduler.gpu_idle_timer_enabled))) {
+			return;
+		}
+	}
+
+	if (kbase_reset_gpu_try_prevent(kbdev))
+		return;
+
+	kbase_csf_scheduler_lock(kbdev);
+
+	if (atomic_read(&scheduler->pending_gpu_idle_work) > 0)
+		goto out_unlock_scheduler_lock;
+
+	if ((scheduler->state == SCHED_SUSPENDED) || (scheduler->state == SCHED_SLEEPING))
+		goto out_unlock_scheduler_lock;
+
+	if (kbdev->pm.backend.mcu_state != KBASE_MCU_ON)
+		goto out_unlock_scheduler_lock;
+
+	/* Ensure that an existing DISABLE request is completed before
+	 * proceeding. They are made without waiting for them to complete such
+	 * as when enabling the MCU.
+	 */
+	if (wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK)) {
+		dev_err(kbdev->dev,
+			"Existing GLB_IDLE timer config change failed to complete in time (gpu_sleep_allowed:%lx)",
+			kbdev->pm.backend.gpu_sleep_allowed);
+		goto out_unlock_scheduler_lock;
+	}
+
+	/* Disable the GLB IDLE timer if it's currently enabled */
+	if (atomic_read(&kbdev->csf.scheduler.gpu_idle_timer_enabled)) {
+		kbase_csf_scheduler_spin_lock(kbdev, &flags);
+		kbase_csf_firmware_disable_gpu_idle_timer(kbdev);
+		kbase_csf_scheduler_spin_unlock(kbdev, flags);
+		if (wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK)) {
+			dev_err(kbdev->dev,
+				"Failed to disable GLB_IDLE timer following FW Sleep-on-Idle config change (gpu_sleep_allowed:%lx)",
+				kbdev->pm.backend.gpu_sleep_allowed);
+			goto out_unlock_scheduler_lock;
+		}
+	}
+
+	/* The GLB IDLE timer and, consequently, FW Sleep-on-Idle could remain
+	 * disabled in certain cases. Otherwise, we shall re-enable GLB IDLE
+	 * timer with the new FW Sleep-on-Idle configuration.
+	 */
+	if (!test_bit(KBASE_GPU_NON_IDLE_OFF_SLOT_GROUPS_AVAILABLE,
+		      &kbdev->pm.backend.gpu_sleep_allowed)) {
+		kbase_csf_scheduler_spin_lock(kbdev, &flags);
+		kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
+		kbase_csf_scheduler_spin_unlock(kbdev, flags);
+		if (wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK)) {
+			dev_err(kbdev->dev,
+				"Failed to re-enable GLB_IDLE timer following FW Sleep-on-Idle config change (gpu_sleep_allowed:%lx)",
+				kbdev->pm.backend.gpu_sleep_allowed);
+			goto out_unlock_scheduler_lock;
+		}
+	}
+
+	if (atomic_read(&scheduler->fw_soi_enabled)) {
+		dev_dbg(kbdev->dev, "FW Sleep-on-Idle was enabled");
+		KBASE_KTRACE_ADD(kbdev, FIRMWARE_SLEEP_ON_IDLE_CHANGED, NULL, true);
+	} else {
+		dev_dbg(kbdev->dev, "FW Sleep-on-Idle was disabled");
+		KBASE_KTRACE_ADD(kbdev, FIRMWARE_SLEEP_ON_IDLE_CHANGED, NULL, false);
+	}
+
+out_unlock_scheduler_lock:
+	kbase_csf_scheduler_unlock(kbdev);
+	kbase_reset_gpu_allow(kbdev);
+}
+
+int kbase_csf_firmware_soi_disable_on_scheduler_suspend(struct kbase_device *kbdev)
+{
+	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+	unsigned long flags;
+
+	lockdep_assert_held(&scheduler->lock);
+
+	if (WARN_ON_ONCE(scheduler->state != SCHED_INACTIVE))
+		return 0;
+
+	if (!atomic_read(&kbdev->csf.scheduler.fw_soi_enabled))
+		return 0;
+
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	if (atomic_read(&scheduler->fw_soi_enabled)) {
+		kbase_csf_firmware_disable_gpu_idle_timer(kbdev);
+		kbase_csf_scheduler_spin_unlock(kbdev, flags);
+		if (wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK)) {
+			dev_err(kbdev->dev, "Failed to disable Sleep-on-Idle config");
+			return -ETIMEDOUT;
+		}
+		KBASE_KTRACE_ADD(kbdev, FIRMWARE_SLEEP_ON_IDLE_CHANGED, NULL, false);
+	} else {
+		kbase_csf_scheduler_spin_unlock(kbdev, flags);
+	}
+
+	return 0;
+}
+
+#endif /* KBASE_PM_RUNTIME */
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h
index a2948a98e9a7..20cb03991bbe 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -591,13 +591,20 @@ void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev);
 void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev);
 
 /**
- * kbase_csf_firmware_disable_mcu_wait - Wait for the MCU to reach disabled
- *                                       status.
+ * kbase_csf_firmware_disable_mcu_wait - Wait for the MCU to reach disabled status.
  *
  * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  */
 void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev);
 
+/**
+ * kbase_csf_stop_firmware_and_wait - Disable firmware and wait for the MCU to reach
+ *                                    disabled status.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ */
+void kbase_csf_stop_firmware_and_wait(struct kbase_device *kbdev);
+
 #ifdef KBASE_PM_RUNTIME
 /**
  * kbase_csf_firmware_trigger_mcu_sleep - Send the command to put MCU in sleep
@@ -618,6 +625,7 @@ void kbase_csf_firmware_trigger_mcu_sleep(struct kbase_device *kbdev);
 bool kbase_csf_firmware_is_mcu_in_sleep(struct kbase_device *kbdev);
 #endif
 
+
 /**
  * kbase_csf_firmware_trigger_reload() - Trigger the reboot of MCU firmware, for
  *                                       the cold boot case firmware image would
@@ -926,4 +934,27 @@ int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev);
  */
 int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev);
 
+#ifdef KBASE_PM_RUNTIME
+
+/**
+ * kbase_csf_firmware_soi_update - Update FW Sleep-on-Idle config
+ *
+ * @kbdev: Device pointer
+ *
+ * This function reconfigures the FW Sleep-on-Idle configuration if necessary.
+ */
+void kbase_csf_firmware_soi_update(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_firmware_soi_disable_on_scheduler_suspend - Disable FW Sleep-on-Idle config
+ *                                                       on scheduler suspension
+ *
+ * @kbdev: Device pointer
+ *
+ * Return: 0 on success, otherwise failure
+ */
+int kbase_csf_firmware_soi_disable_on_scheduler_suspend(struct kbase_device *kbdev);
+
+#endif /* KBASE_PM_RUNTIME */
+
 #endif
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c
index d08686f5829b..030a1ebf0ac6 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c
@@ -367,10 +367,10 @@ int kbase_csf_firmware_cfg_fw_wa_init(struct kbase_device *kbdev)
 	 */
 	entry_count = of_property_count_u32_elems(kbdev->dev->of_node, "quirks-ext");
 
-	if (entry_count == -EINVAL)
+	if (entry_count < 0)
 		entry_count = of_property_count_u32_elems(kbdev->dev->of_node, "quirks_ext");
 
-	if (entry_count == -EINVAL || entry_count == -ENODATA)
+	if (entry_count < 0)
 		return 0;
 
 	entry_bytes = (size_t)entry_count * sizeof(u32);
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c
index 90568f6fa09f..a206ed3da210 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -670,6 +670,23 @@ static void set_timeout_global(const struct kbase_csf_global_iface *const global
 	set_global_request(global_iface, GLB_REQ_CFG_PROGRESS_TIMER_MASK);
 }
 
+static inline void set_gpu_idle_timer_glb_req(struct kbase_device *const kbdev, bool set)
+{
+	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+
+	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
+	if (set) {
+		kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE,
+						     GLB_REQ_IDLE_ENABLE_MASK);
+	} else {
+		kbase_csf_firmware_global_input_mask(
+			global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_DISABLE, GLB_REQ_IDLE_DISABLE_MASK);
+	}
+
+	atomic_set(&kbdev->csf.scheduler.gpu_idle_timer_enabled, set);
+}
+
 static void enable_gpu_idle_timer(struct kbase_device *const kbdev)
 {
 	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
@@ -678,8 +695,11 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev)
 
 	kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER,
 					kbdev->csf.gpu_idle_dur_count);
-	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE,
-					     GLB_REQ_IDLE_ENABLE_MASK);
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_IDLE_TIMER_CONFIG,
+					     kbdev->csf.gpu_idle_dur_count_no_modifier,
+					     GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_MASK);
+
+	set_gpu_idle_timer_glb_req(kbdev, true);
 	dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x",
 		kbdev->csf.gpu_idle_dur_count);
 }
@@ -768,12 +788,6 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask)
 
 	set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev));
 
-	/* The GPU idle timer is always enabled for simplicity. Checks will be
-	 * done before scheduling the GPU idle worker to see if it is
-	 * appropriate for the current power policy.
-	 */
-	enable_gpu_idle_timer(kbdev);
-
 	/* Unmask the interrupts */
 	kbase_csf_firmware_global_input(global_iface, GLB_ACK_IRQ_MASK, ack_irq_mask);
 
@@ -857,11 +871,11 @@ static void kbase_csf_firmware_reload_worker(struct work_struct *work)
 		container_of(work, struct kbase_device, csf.firmware_reload_work);
 	unsigned long flags;
 
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	/* Reboot the firmware */
 	kbase_csf_firmware_enable_mcu(kbdev);
 
 	/* Tell MCU state machine to transit to next state */
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	kbdev->csf.firmware_reloaded = true;
 	kbase_pm_update_state(kbdev);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
@@ -881,6 +895,7 @@ void kbase_csf_firmware_trigger_reload(struct kbase_device *kbdev)
 		kbdev->csf.firmware_reloaded = true;
 	}
 }
+KBASE_EXPORT_TEST_API(kbase_csf_firmware_trigger_reload);
 
 void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev)
 {
@@ -889,6 +904,7 @@ void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev)
 	if (unlikely(!kbdev->csf.firmware_inited))
 		return;
 
+
 	/* Tell MCU state machine to transit to next state */
 	kbdev->csf.firmware_reloaded = true;
 	kbase_pm_update_state(kbdev);
@@ -900,7 +916,7 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_n
 	/* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */
 	u64 freq = kbase_arch_timer_get_cntfrq(kbdev);
 	u64 dur_val = dur_ns;
-	u32 cnt_val_u32, reg_val_u32;
+	u32 cnt_val_u32, reg_val_u32, timer_src;
 	bool src_system_timestamp = freq > 0;
 
 	if (!src_system_timestamp) {
@@ -932,9 +948,9 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_n
 
 	reg_val_u32 = GLB_IDLE_TIMER_TIMEOUT_SET(0, cnt_val_u32);
 	/* add the source flag */
-	reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(
-		reg_val_u32, (src_system_timestamp ? GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP :
-							  GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER));
+	timer_src = src_system_timestamp ? GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP :
+						 GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER;
+	reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32, timer_src);
 
 	return reg_val_u32;
 }
@@ -989,29 +1005,33 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
 		return kbdev->csf.gpu_idle_dur_count;
 	}
 
-	/* The 'reg_lock' is also taken and is held till the update is not
+	/* The scheduler lock is also taken and is held till the update is not
 	 * complete, to ensure the update of idle timer value by multiple Users
 	 * gets serialized.
 	 */
-	mutex_lock(&kbdev->csf.reg_lock);
-	/* The firmware only reads the new idle timer value when the timer is
-	 * disabled.
-	 */
-	kbase_csf_scheduler_spin_lock(kbdev, &flags);
-	kbase_csf_firmware_disable_gpu_idle_timer(kbdev);
-	kbase_csf_scheduler_spin_unlock(kbdev, flags);
-	/* Ensure that the request has taken effect */
-	wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK);
-
+	kbase_csf_scheduler_lock(kbdev);
 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
 	kbdev->csf.gpu_idle_hysteresis_ns = dur_ns;
 	kbdev->csf.gpu_idle_dur_count = hysteresis_val;
 	kbdev->csf.gpu_idle_dur_count_no_modifier = no_modifier;
-	kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
-	kbase_csf_scheduler_spin_unlock(kbdev, flags);
-	wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK);
-	mutex_unlock(&kbdev->csf.reg_lock);
 
+	if (atomic_read(&kbdev->csf.scheduler.gpu_idle_timer_enabled)) {
+		/* Timer is already enabled. Disable the timer as FW only reads
+		 * the new idle timer value when timer is re-enabled.
+		 */
+		kbase_csf_firmware_disable_gpu_idle_timer(kbdev);
+		kbase_csf_scheduler_spin_unlock(kbdev, flags);
+		/* Ensure that the request has taken effect */
+		wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK);
+		kbase_csf_scheduler_spin_lock(kbdev, &flags);
+		kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
+		kbase_csf_scheduler_spin_unlock(kbdev, flags);
+		wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK);
+	} else {
+		kbase_csf_scheduler_spin_unlock(kbdev, flags);
+	}
+
+	kbase_csf_scheduler_unlock(kbdev);
 	kbase_csf_scheduler_pm_idle(kbdev);
 	kbase_reset_gpu_allow(kbdev);
 end:
@@ -1118,15 +1138,16 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
 	INIT_WORK(&kbdev->csf.firmware_reload_work, kbase_csf_firmware_reload_worker);
 	INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker);
 
+	init_rwsem(&kbdev->csf.mmu_sync_sem);
 	mutex_init(&kbdev->csf.reg_lock);
-	kbase_csf_pending_gpuq_kicks_init(kbdev);
+	kbase_csf_pending_gpuq_kick_queues_init(kbdev);
 
 	return 0;
 }
 
 void kbase_csf_firmware_early_term(struct kbase_device *kbdev)
 {
-	kbase_csf_pending_gpuq_kicks_term(kbdev);
+	kbase_csf_pending_gpuq_kick_queues_term(kbdev);
 	mutex_destroy(&kbdev->csf.reg_lock);
 }
 
@@ -1185,6 +1206,7 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
 
 	/* NO_MALI: Don't load the MMU tables or boot CSF firmware */
 
+
 	ret = invent_capabilities(kbdev);
 	if (ret != 0)
 		goto error;
@@ -1278,13 +1300,9 @@ void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev)
 
 void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev)
 {
-	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
-
 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
 
-	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_DISABLE,
-					     GLB_REQ_IDLE_DISABLE_MASK);
-
+	set_gpu_idle_timer_glb_req(kbdev, false);
 	dev_dbg(kbdev->dev, "Sending request to disable gpu idle timer");
 
 	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
@@ -1308,6 +1326,7 @@ int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev, unsigned int
 	return wait_for_global_request(kbdev, GLB_REQ_PING_MASK);
 }
 
+
 int kbase_csf_firmware_set_timeout(struct kbase_device *const kbdev, u64 const timeout)
 {
 	const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface;
@@ -1370,6 +1389,8 @@ void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev)
 
 void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev)
 {
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
 	/* Trigger the boot of MCU firmware, Use the AUTO mode as
 	 * otherwise on fast reset, to exit protected mode, MCU will
 	 * not reboot by itself to enter normal mode.
@@ -1384,6 +1405,7 @@ void kbase_csf_firmware_trigger_mcu_sleep(struct kbase_device *kbdev)
 	unsigned long flags;
 
 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	set_gpu_idle_timer_glb_req(kbdev, false);
 	set_global_request(global_iface, GLB_REQ_SLEEP_MASK);
 	dev_dbg(kbdev->dev, "Sending sleep request to MCU");
 	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
@@ -1515,6 +1537,12 @@ void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev)
 	kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(MCU_CONTROL), MCU_CONTROL_REQ_DISABLE);
 }
 
+void kbase_csf_stop_firmware_and_wait(struct kbase_device *kbdev)
+{
+	/* Stop the MCU firmware, no wait required on NO_MALI instance */
+	kbase_csf_firmware_disable_mcu(kbdev);
+}
+
 void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev)
 {
 	/* NO_MALI: Nothing to do here */
@@ -1637,3 +1665,16 @@ void kbase_csf_firmware_mcu_shared_mapping_term(struct kbase_device *kbdev,
 	vunmap(csf_mapping->cpu_addr);
 	kfree(csf_mapping->phys);
 }
+
+#ifdef KBASE_PM_RUNTIME
+
+void kbase_csf_firmware_soi_update(struct kbase_device *kbdev)
+{
+}
+
+int kbase_csf_firmware_soi_disable_on_scheduler_suspend(struct kbase_device *kbdev)
+{
+	return 0;
+}
+
+#endif /* KBASE_PM_RUNTIME */
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_fw_io.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_fw_io.c
new file mode 100644
index 000000000000..c65f837a9f72
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_fw_io.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2024 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "mali_kbase.h"
+#include "mali_kbase_csf_fw_io.h"
+#include <mali_kbase_linux.h>
+
+#include <linux/mutex.h>
+
+static inline u32 input_page_read(const u32 *const input, const u32 offset)
+{
+	WARN_ON(offset % sizeof(u32));
+
+	return input[offset / sizeof(u32)];
+}
+
+static inline void input_page_write(u32 *const input, const u32 offset, const u32 value)
+{
+	WARN_ON(offset % sizeof(u32));
+
+	input[offset / sizeof(u32)] = value;
+}
+
+static inline void input_page_partial_write(u32 *const input, const u32 offset, u32 value, u32 mask)
+{
+	WARN_ON(offset % sizeof(u32));
+
+	input[offset / sizeof(u32)] = (input_page_read(input, offset) & ~mask) | (value & mask);
+}
+
+static inline u32 output_page_read(const u32 *const output, const u32 offset)
+{
+	WARN_ON(offset % sizeof(u32));
+
+	return output[offset / sizeof(u32)];
+}
+
+void kbase_csf_fw_io_init(struct kbase_csf_fw_io *fw_io)
+{
+	spin_lock_init(&fw_io->lock);
+	bitmap_zero(fw_io->status, KBASE_FW_IO_STATUS_NUM_BITS);
+}
+KBASE_EXPORT_TEST_API(kbase_csf_fw_io_init);
+
+void kbase_csf_fw_io_term(struct kbase_csf_fw_io *fw_io)
+{
+	/* Nothing to do. */
+}
+KBASE_EXPORT_TEST_API(kbase_csf_fw_io_term);
+
+void kbase_csf_fw_io_global_write(struct kbase_csf_fw_io *fw_io,
+				  const struct kbase_csf_global_iface *iface, u32 offset, u32 value)
+{
+	const struct kbase_device *const kbdev = iface->kbdev;
+
+	lockdep_assert_held(&fw_io->lock);
+
+	dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x\n", offset, value);
+	input_page_write(iface->input, offset, value);
+}
+KBASE_EXPORT_TEST_API(kbase_csf_fw_io_global_write);
+
+void kbase_csf_fw_io_global_write_mask(struct kbase_csf_fw_io *fw_io,
+				       const struct kbase_csf_global_iface *iface, u32 offset,
+				       u32 value, u32 mask)
+{
+	const struct kbase_device *const kbdev = iface->kbdev;
+
+	lockdep_assert_held(&fw_io->lock);
+
+	dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x mask %08x\n", offset, value, mask);
+	input_page_partial_write(iface->input, offset, value, mask);
+}
+KBASE_EXPORT_TEST_API(kbase_csf_fw_io_global_write_mask);
+
+u32 kbase_csf_fw_io_global_input_read(struct kbase_csf_fw_io *fw_io,
+				      const struct kbase_csf_global_iface *iface, u32 offset)
+{
+	const struct kbase_device *const kbdev = iface->kbdev;
+	u32 val;
+
+	lockdep_assert_held(&fw_io->lock);
+
+	val = input_page_read(iface->input, offset);
+	dev_dbg(kbdev->dev, "glob input r: reg %08x val %08x\n", offset, val);
+
+	return val;
+}
+KBASE_EXPORT_TEST_API(kbase_csf_fw_io_global_input_read);
+
+u32 kbase_csf_fw_io_global_read(struct kbase_csf_fw_io *fw_io,
+				const struct kbase_csf_global_iface *iface, u32 offset)
+{
+	const struct kbase_device *const kbdev = iface->kbdev;
+	u32 val;
+
+	lockdep_assert_held(&fw_io->lock);
+
+	val = output_page_read(iface->output, offset);
+	dev_dbg(kbdev->dev, "glob output r: reg %08x val %08x\n", offset, val);
+
+	return val;
+}
+
+void kbase_csf_fw_io_group_write(struct kbase_csf_fw_io *fw_io,
+				 const struct kbase_csf_cmd_stream_group_info *info, u32 offset,
+				 u32 value)
+{
+	const struct kbase_device *const kbdev = info->kbdev;
+
+	lockdep_assert_held(&fw_io->lock);
+
+	dev_dbg(kbdev->dev, "csg input w: reg %08x val %08x\n", offset, value);
+	input_page_write(info->input, offset, value);
+}
+KBASE_EXPORT_TEST_API(kbase_csf_fw_io_group_write);
+
+void kbase_csf_fw_io_group_write_mask(struct kbase_csf_fw_io *fw_io,
+				      const struct kbase_csf_cmd_stream_group_info *info,
+				      u32 offset, u32 value, u32 mask)
+{
+	const struct kbase_device *const kbdev = info->kbdev;
+
+	lockdep_assert_held(&fw_io->lock);
+
+	dev_dbg(kbdev->dev, "csg input w: reg %08x val %08x mask %08x\n", offset, value, mask);
+	input_page_partial_write(info->input, offset, value, mask);
+}
+KBASE_EXPORT_TEST_API(kbase_csf_fw_io_group_write_mask);
+
+u32 kbase_csf_fw_io_group_input_read(struct kbase_csf_fw_io *fw_io,
+				     const struct kbase_csf_cmd_stream_group_info *info, u32 offset)
+{
+	const struct kbase_device *const kbdev = info->kbdev;
+	u32 val;
+
+	lockdep_assert_held(&fw_io->lock);
+
+	val = input_page_read(info->input, offset);
+	dev_dbg(kbdev->dev, "csg input r: reg %08x val %08x\n", offset, val);
+
+	return val;
+}
+KBASE_EXPORT_TEST_API(kbase_csf_fw_io_group_input_read);
+
+u32 kbase_csf_fw_io_group_read(struct kbase_csf_fw_io *fw_io,
+			       const struct kbase_csf_cmd_stream_group_info *info, u32 offset)
+{
+	const struct kbase_device *const kbdev = info->kbdev;
+	u32 val;
+
+	lockdep_assert_held(&fw_io->lock);
+
+	val = output_page_read(info->output, offset);
+	dev_dbg(kbdev->dev, "csg output r: reg %08x val %08x\n", offset, val);
+
+	return val;
+}
+
+void kbase_csf_fw_io_stream_write(struct kbase_csf_fw_io *fw_io,
+				  const struct kbase_csf_cmd_stream_info *info, u32 offset,
+				  u32 value)
+{
+	const struct kbase_device *const kbdev = info->kbdev;
+
+	lockdep_assert_held(&fw_io->lock);
+
+	dev_dbg(kbdev->dev, "cs input w: reg %08x val %08x\n", offset, value);
+	input_page_write(info->input, offset, value);
+}
+KBASE_EXPORT_TEST_API(kbase_csf_fw_io_stream_write);
+
+void kbase_csf_fw_io_stream_write_mask(struct kbase_csf_fw_io *fw_io,
+				       const struct kbase_csf_cmd_stream_info *info, u32 offset,
+				       u32 value, u32 mask)
+{
+	const struct kbase_device *const kbdev = info->kbdev;
+
+	lockdep_assert_held(&fw_io->lock);
+
+	dev_dbg(kbdev->dev, "cs input w: reg %08x val %08x mask %08x\n", offset, value, mask);
+	input_page_partial_write(info->input, offset, value, mask);
+}
+KBASE_EXPORT_TEST_API(kbase_csf_fw_io_stream_write_mask);
+
+u32 kbase_csf_fw_io_stream_input_read(struct kbase_csf_fw_io *fw_io,
+				      const struct kbase_csf_cmd_stream_info *info, u32 offset)
+{
+	const struct kbase_device *const kbdev = info->kbdev;
+	u32 val;
+
+	lockdep_assert_held(&fw_io->lock);
+
+	val = input_page_read(info->input, offset);
+	dev_dbg(kbdev->dev, "cs input r: reg %08x val %08x\n", offset, val);
+
+	return val;
+}
+KBASE_EXPORT_TEST_API(kbase_csf_fw_io_stream_input_read);
+
+u32 kbase_csf_fw_io_stream_read(struct kbase_csf_fw_io *fw_io,
+				const struct kbase_csf_cmd_stream_info *info, u32 offset)
+{
+	const struct kbase_device *const kbdev = info->kbdev;
+	u32 val;
+
+	lockdep_assert_held(&fw_io->lock);
+
+	val = output_page_read(info->output, offset);
+	dev_dbg(kbdev->dev, "cs output r: reg %08x val %08x\n", offset, val);
+
+	return val;
+}
+
+void kbase_csf_fw_io_set_status(struct kbase_csf_fw_io *fw_io,
+				enum kbase_csf_fw_io_status_bits status_bit)
+{
+	set_bit(status_bit, fw_io->status);
+}
+KBASE_EXPORT_TEST_API(kbase_csf_fw_io_set_status);
+
+void kbase_csf_fw_io_clear_status(struct kbase_csf_fw_io *fw_io,
+				  enum kbase_csf_fw_io_status_bits status_bit)
+{
+	clear_bit(status_bit, fw_io->status);
+}
+
+bool kbase_csf_fw_io_test_status(struct kbase_csf_fw_io *fw_io,
+				 enum kbase_csf_fw_io_status_bits status_bit)
+{
+	return test_bit(status_bit, fw_io->status);
+}
+KBASE_EXPORT_TEST_API(kbase_csf_fw_io_test_status);
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_fw_io.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_fw_io.h
new file mode 100644
index 000000000000..a8eb1ab51fbc
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_fw_io.h
@@ -0,0 +1,362 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2024 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_CSF_FW_IO_H_
+#define _KBASE_CSF_FW_IO_H_
+
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+#include <linux/spinlock.h>
+
+/** The wait completed because the GPU was lost. */
+#define KBASE_CSF_FW_IO_WAIT_GPU_LOST 1
+
+/** The wait was aborted because of an unexpected event. */
+#define KBASE_CSF_FW_IO_WAIT_UNSUPPORTED 255
+
+/**
+ * enum kbase_csf_fw_io_status_bits - Status bits for firmware I/O interface.
+ *
+ * @KBASE_FW_IO_STATUS_GPU_SUSPENDED: The GPU is suspended.
+ * @KBASE_FW_IO_STATUS_NUM_BITS: Number of bits used to encode the state.
+ */
+enum kbase_csf_fw_io_status_bits {
+	KBASE_FW_IO_STATUS_GPU_SUSPENDED = 0,
+	KBASE_FW_IO_STATUS_NUM_BITS,
+};
+
+/**
+ * struct kbase_csf_fw_io - Manager of firmware input/output interface.
+ *
+ * @lock: Mutex to serialize access to the interface.
+ * @status: Internal status of the MCU interface.
+ */
+struct kbase_csf_fw_io {
+	spinlock_t lock;
+	DECLARE_BITMAP(status, KBASE_FW_IO_STATUS_NUM_BITS);
+};
+
+struct kbase_csf_global_iface;
+struct kbase_csf_cmd_stream_group_info;
+struct kbase_csf_cmd_stream_info;
+
+/**
+ * kbase_csf_fw_io_init() - Initialize manager of firmware input/output interface.
+ *
+ * @fw_io: Firmware I/O interface to initialize.
+ */
+void kbase_csf_fw_io_init(struct kbase_csf_fw_io *fw_io);
+
+/**
+ * kbase_csf_fw_io_term() - Terminate manager of firmware input/output interface.
+ *
+ * @fw_io: Firmware I/O interface to terminate.
+ */
+void kbase_csf_fw_io_term(struct kbase_csf_fw_io *fw_io);
+
+/**
+ * kbase_csf_fw_io_open() - Start a transaction with the firmware input/output interface.
+ *
+ * @fw_io: Firmware I/O interface to open.
+ *
+ * Return: 0 on success, otherwise an error code reflecting the status of the
+ *         interface.
+ */
+static inline int kbase_csf_fw_io_open(struct kbase_csf_fw_io *fw_io)
+{
+	if (test_bit(KBASE_FW_IO_STATUS_GPU_SUSPENDED, fw_io->status))
+		return -KBASE_CSF_FW_IO_WAIT_GPU_LOST;
+
+	spin_lock(&fw_io->lock);
+
+	return 0;
+}
+
+/**
+ * kbase_csf_fw_io_open_force() - Force a transaction with the firmware input/output interface.
+ *
+ * @fw_io: Firmware I/O interface to open.
+ *
+ * This function forces the start of a transaction regardless of the status
+ * of the interface.
+ */
+static inline void kbase_csf_fw_io_open_force(struct kbase_csf_fw_io *fw_io)
+{
+	spin_lock(&fw_io->lock);
+}
+
+/**
+ * kbase_csf_fw_io_close() - End a transaction with the firmware input/output interface.
+ *
+ * @fw_io: Firmware I/O interface to close.
+ */
+static inline void kbase_csf_fw_io_close(struct kbase_csf_fw_io *fw_io)
+{
+	spin_unlock(&fw_io->lock);
+}
+
+/**
+ * kbase_csf_fw_io_assert_opened() - Assert if a transaction with the firmware input/output
+ *                                   interface has started.
+ *
+ * @fw_io: Firmware I/O interface.
+ */
+static inline void kbase_csf_fw_io_assert_opened(struct kbase_csf_fw_io *fw_io)
+{
+	lockdep_assert_held(&fw_io->lock);
+}
+
+/**
+ * kbase_csf_fw_io_global_write() - Write a word in the global input page.
+ *
+ * @fw_io:  Firmware I/O manager.
+ * @iface:  CSF interface provided by the firmware.
+ * @offset: Offset of the word to write, in bytes.
+ * @value:  Value to be written.
+ */
+void kbase_csf_fw_io_global_write(struct kbase_csf_fw_io *fw_io,
+				  const struct kbase_csf_global_iface *iface, u32 offset,
+				  u32 value);
+
+/**
+ * kbase_csf_fw_io_global_write_mask() - Write part of a word in the global input page.
+ *
+ * @fw_io:  Firmware I/O manager.
+ * @iface:  CSF interface provided by the firmware.
+ * @offset: Offset of the word to write, in bytes.
+ * @value:  Value to be written.
+ * @mask:   Bitmask with the bits to be modified set.
+ */
+void kbase_csf_fw_io_global_write_mask(struct kbase_csf_fw_io *fw_io,
+				       const struct kbase_csf_global_iface *iface, u32 offset,
+				       u32 value, u32 mask);
+
+/**
+ * kbase_csf_fw_io_global_input_read() - Read a word in the global input page.
+ *
+ * @fw_io:  Firmware I/O manager.
+ * @iface:  CSF interface provided by the firmware.
+ * @offset: Offset of the word to be read, in bytes.
+ *
+ * Return: Value of the word read from the global input page.
+ */
+u32 kbase_csf_fw_io_global_input_read(struct kbase_csf_fw_io *fw_io,
+				      const struct kbase_csf_global_iface *iface, u32 offset);
+
+/**
+ * kbase_csf_fw_io_global_read() - Read a word in the global output page.
+ *
+ * @fw_io:  Firmware I/O manager.
+ * @iface:  CSF interface provided by the firmware.
+ * @offset: Offset of the word to be read, in bytes.
+ *
+ * Return: Value of the word read from the global output page.
+ */
+u32 kbase_csf_fw_io_global_read(struct kbase_csf_fw_io *fw_io,
+				const struct kbase_csf_global_iface *iface, u32 offset);
+
+/**
+ * kbase_csf_fw_io_group_write() - Write a word in a CSG's input page.
+ *
+ * @fw_io:  Firmware I/O manager.
+ * @info:   CSG interface provided by the firmware.
+ * @offset: Offset of the word to write, in bytes.
+ * @value:  Value to be written.
+ */
+void kbase_csf_fw_io_group_write(struct kbase_csf_fw_io *fw_io,
+				 const struct kbase_csf_cmd_stream_group_info *info, u32 offset,
+				 u32 value);
+
+/**
+ * kbase_csf_fw_io_group_write_mask() - Write part of a word in a CSG's input page.
+ *
+ * @fw_io:  Firmware I/O manager.
+ * @info:   CSG interface provided by the firmware.
+ * @offset: Offset of the word to write, in bytes.
+ * @value:  Value to be written.
+ * @mask:   Bitmask with the bits to be modified set.
+ */
+void kbase_csf_fw_io_group_write_mask(struct kbase_csf_fw_io *fw_io,
+				      const struct kbase_csf_cmd_stream_group_info *info,
+				      u32 offset, u32 value, u32 mask);
+
+/**
+ * kbase_csf_fw_io_group_input_read() - Read a word in a CSG's input page.
+ *
+ * @fw_io:  Firmware I/O manager.
+ * @info:   CSG interface provided by the firmware.
+ * @offset: Offset of the word to be read, in bytes.
+ *
+ * Return: Value of the word read from a CSG's input page.
+ */
+u32 kbase_csf_fw_io_group_input_read(struct kbase_csf_fw_io *fw_io,
+				     const struct kbase_csf_cmd_stream_group_info *info,
+				     u32 offset);
+
+/**
+ * kbase_csf_fw_io_group_read() - Read a word in a CSG's output page.
+ *
+ * @fw_io:  Firmware I/O manager.
+ * @info:   CSG interface provided by the firmware.
+ * @offset: Offset of the word to be read, in bytes.
+ *
+ * Return: Value of the word read from the CSG's output page.
+ */
+u32 kbase_csf_fw_io_group_read(struct kbase_csf_fw_io *fw_io,
+			       const struct kbase_csf_cmd_stream_group_info *info, u32 offset);
+
+/**
+ * kbase_csf_fw_io_stream_write() - Write a word in a CS's input page.
+ *
+ * @fw_io:  Firmware I/O manager.
+ * @info:   CSI interface provided by the firmware.
+ * @offset: Offset of the word to write, in bytes.
+ * @value:  Value to be written.
+ */
+void kbase_csf_fw_io_stream_write(struct kbase_csf_fw_io *fw_io,
+				  const struct kbase_csf_cmd_stream_info *info, u32 offset,
+				  u32 value);
+
+/**
+ * kbase_csf_fw_io_stream_write_mask() - Write part of a word in a CS's input page.
+ *
+ * @fw_io:  Firmware I/O manager.
+ * @info:   CSI interface provided by the firmware.
+ * @offset: Offset of the word to write, in bytes.
+ * @value:  Value to be written.
+ * @mask:   Bitmask with the bits to be modified set.
+ */
+void kbase_csf_fw_io_stream_write_mask(struct kbase_csf_fw_io *fw_io,
+				       const struct kbase_csf_cmd_stream_info *info, u32 offset,
+				       u32 value, u32 mask);
+
+/**
+ * kbase_csf_fw_io_stream_input_read() - Read a word in a CS's input page.
+ *
+ * @fw_io:  Firmware I/O manager.
+ * @info:   CSI interface provided by the firmware.
+ * @offset: Offset of the word to be read, in bytes.
+ *
+ * Return: Value of the word read from a CS's input page.
+ */
+u32 kbase_csf_fw_io_stream_input_read(struct kbase_csf_fw_io *fw_io,
+				      const struct kbase_csf_cmd_stream_info *info, u32 offset);
+
+/**
+ * kbase_csf_fw_io_stream_read() - Read a word in a CS's output page.
+ *
+ * @fw_io:  Firmware I/O manager.
+ * @info:   CSI interface provided by the firmware.
+ * @offset: Offset of the word to be read, in bytes.
+ *
+ * Return: Value of the word read from the CS's output page.
+ */
+u32 kbase_csf_fw_io_stream_read(struct kbase_csf_fw_io *fw_io,
+				const struct kbase_csf_cmd_stream_info *info, u32 offset);
+
+/**
+ * kbase_csf_fw_io_set_status() - Set a FW I/O status bit.
+ *
+ * @fw_io:      Firmware I/O manager.
+ * @status_bit: Status bit to set.
+ */
+void kbase_csf_fw_io_set_status(struct kbase_csf_fw_io *fw_io,
+				enum kbase_csf_fw_io_status_bits status_bit);
+
+/**
+ * kbase_csf_fw_io_clear_status() - Clear a FW I/O status bit.
+ *
+ * @fw_io:      Firmware I/O manager.
+ * @status_bit: Status bit to clear.
+ */
+void kbase_csf_fw_io_clear_status(struct kbase_csf_fw_io *fw_io,
+				  enum kbase_csf_fw_io_status_bits status_bit);
+
+/**
+ * kbase_csf_fw_io_test_status() - Test a FW I/O status bit.
+ *
+ * @fw_io:      Firmware I/O manager.
+ * @status_bit: Status bit to test.
+ *
+ * Return: Value of the tested status bit.
+ */
+bool kbase_csf_fw_io_test_status(struct kbase_csf_fw_io *fw_io,
+				 enum kbase_csf_fw_io_status_bits status_bit);
+
+/**
+ * kbase_csf_fw_io_wait_event_timeout() - Wait until condition gets true, timeout
+ * occurs or a FW I/O status bit is set. The rest of the functionalities is equal
+ * to wait_event_timeout().
+ *
+ * @fw_io:     Firmware I/O manager.
+ * @wq_head:   The waitqueue to wait on.
+ * @condition: C expression for the event to wait for
+ * @timeout:   Timeout, in jiffies
+ *
+ * Return: Remaining jiffies (at least 1) on success,
+ *         0 on timeout,
+ *         negative KBASE_CSF_FW_IO_WAIT_* error codes otherwise.
+ */
+#define kbase_csf_fw_io_wait_event_timeout(fw_io, wq_head, condition, timeout)                \
+	({                                                                                    \
+		int __ret;                                                                    \
+		int __wait_remaining = wait_event_timeout(                                    \
+			wq_head, condition || kbasep_csf_fw_io_check_status(fw_io), timeout); \
+		__ret = kbasep_csf_fw_io_handle_wait_result(fw_io, __wait_remaining);         \
+		__ret;                                                                        \
+	})
+
+/**
+ * kbasep_csf_fw_io_check_status() - Private function to check if any FW I/O status bit is set.
+ *
+ * @fw_io: Firmware I/O manager.
+ *
+ * Return: True if any FW I/O status bit is set, false otherwise.
+ */
+static inline bool kbasep_csf_fw_io_check_status(struct kbase_csf_fw_io *fw_io)
+{
+	return !bitmap_empty(fw_io->status, KBASE_FW_IO_STATUS_NUM_BITS);
+}
+
+/**
+ * kbasep_csf_fw_io_handle_wait_result() - Private function to handle the wait_event_timeout()
+ * result.
+ *
+ * @fw_io:          Firmware I/O manager
+ * @wait_remaining: Remaining jiffies returned by wait_event_timeout()
+ *
+ * Return: Remaining jiffies (at least 1) on success,
+ *         0 on timeout,
+ *         negative KBASE_CSF_FW_IO_WAIT_* error codes otherwise.
+ */
+static inline int kbasep_csf_fw_io_handle_wait_result(struct kbase_csf_fw_io *fw_io,
+						      int wait_remaining)
+{
+	/* Check for any FW IO status bit set */
+	if (!bitmap_empty(fw_io->status, KBASE_FW_IO_STATUS_NUM_BITS))
+		return (test_bit(KBASE_FW_IO_STATUS_GPU_SUSPENDED, fw_io->status)) ?
+				     -KBASE_CSF_FW_IO_WAIT_GPU_LOST :
+				     -KBASE_CSF_FW_IO_WAIT_UNSUPPORTED;
+
+	return wait_remaining;
+}
+#endif
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_fw_io_no_mali.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_fw_io_no_mali.c
new file mode 100644
index 000000000000..0cffc8475654
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_fw_io_no_mali.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2024 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "mali_kbase.h"
+#include "mali_kbase_csf_fw_io.h"
+#include <mali_kbase_linux.h>
+
+#include <linux/mutex.h>
+
+static inline u32 input_page_read(const u32 *const input, const u32 offset)
+{
+	WARN_ON(offset % sizeof(u32));
+
+	return input[offset / sizeof(u32)];
+}
+
+static inline void input_page_write(u32 *const input, const u32 offset, const u32 value)
+{
+	WARN_ON(offset % sizeof(u32));
+
+	input[offset / sizeof(u32)] = value;
+}
+
+static inline void input_page_partial_write(u32 *const input, const u32 offset, u32 value, u32 mask)
+{
+	WARN_ON(offset % sizeof(u32));
+
+	input[offset / sizeof(u32)] = (input_page_read(input, offset) & ~mask) | (value & mask);
+}
+
+static inline u32 output_page_read(const u32 *const output, const u32 offset)
+{
+	WARN_ON(offset % sizeof(u32));
+
+	return output[offset / sizeof(u32)];
+}
+
+static inline void output_page_write(u32 *const output, const u32 offset, const u32 value)
+{
+	WARN_ON(offset % sizeof(u32));
+
+	output[offset / sizeof(u32)] = value;
+}
+
+void kbase_csf_fw_io_init(struct kbase_csf_fw_io *fw_io)
+{
+	spin_lock_init(&fw_io->lock);
+	bitmap_zero(fw_io->status, KBASE_FW_IO_STATUS_NUM_BITS);
+}
+KBASE_EXPORT_TEST_API(kbase_csf_fw_io_init);
+
+void kbase_csf_fw_io_term(struct kbase_csf_fw_io *fw_io)
+{
+	/* Nothing to do. */
+}
+KBASE_EXPORT_TEST_API(kbase_csf_fw_io_term);
+
+void kbase_csf_fw_io_global_write(struct kbase_csf_fw_io *fw_io,
+				  const struct kbase_csf_global_iface *iface, u32 offset, u32 value)
+{
+	const struct kbase_device *const kbdev = iface->kbdev;
+
+	lockdep_assert_held(&fw_io->lock);
+
+	dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x\n", offset, value);
+	input_page_write(iface->input, offset, value);
+
+	if (offset == GLB_REQ) {
+		/* NO_MALI: Immediately acknowledge requests - except for PRFCNT_ENABLE
+		 * and PRFCNT_SAMPLE. These will be processed along with the
+		 * corresponding performance counter registers when the global doorbell
+		 * is rung in order to emulate the performance counter sampling behavior
+		 * of the real firmware.
+		 */
+		const u32 ack = output_page_read(iface->output, GLB_ACK);
+		const u32 req_mask = ~(GLB_REQ_PRFCNT_ENABLE_MASK | GLB_REQ_PRFCNT_SAMPLE_MASK);
+		const u32 toggled = (value ^ ack) & req_mask;
+
+		output_page_write(iface->output, GLB_ACK, ack ^ toggled);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_csf_fw_io_global_write);
+
+void kbase_csf_fw_io_global_write_mask(struct kbase_csf_fw_io *fw_io,
+				       const struct kbase_csf_global_iface *iface, u32 offset,
+				       u32 value, u32 mask)
+{
+	const struct kbase_device *const kbdev = iface->kbdev;
+
+	lockdep_assert_held(&fw_io->lock);
+
+	dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x mask %08x\n", offset, value, mask);
+
+	/* NO_MALI: Go through existing function to capture writes */
+	kbase_csf_fw_io_global_write(fw_io, iface, offset,
+				     (input_page_read(iface->input, offset) & ~mask) |
+					     (value & mask));
+}
+KBASE_EXPORT_TEST_API(kbase_csf_fw_io_global_write_mask);
+
+u32 kbase_csf_fw_io_global_input_read(struct kbase_csf_fw_io *fw_io,
+				      const struct kbase_csf_global_iface *iface, u32 offset)
+{
+	const struct kbase_device *const kbdev = iface->kbdev;
+	u32 val;
+
+	lockdep_assert_held(&fw_io->lock);
+
+	val = input_page_read(iface->input, offset);
+	dev_dbg(kbdev->dev, "glob input r: reg %08x val %08x\n", offset, val);
+
+	return val;
+}
+KBASE_EXPORT_TEST_API(kbase_csf_fw_io_global_input_read);
+
+u32 kbase_csf_fw_io_global_read(struct kbase_csf_fw_io *fw_io,
+				const struct kbase_csf_global_iface *iface, u32 offset)
+{
+	const struct kbase_device *const kbdev = iface->kbdev;
+	u32 val;
+
+	lockdep_assert_held(&fw_io->lock);
+
+	val = output_page_read(iface->output, offset);
+	dev_dbg(kbdev->dev, "glob output r: reg %08x val %08x\n", offset, val);
+
+	return val;
+}
+
+void kbase_csf_fw_io_group_write(struct kbase_csf_fw_io *fw_io,
+				 const struct kbase_csf_cmd_stream_group_info *info, u32 offset,
+				 u32 value)
+{
+	const struct kbase_device *const kbdev = info->kbdev;
+
+	lockdep_assert_held(&fw_io->lock);
+
+	dev_dbg(kbdev->dev, "csg input w: reg %08x val %08x\n", offset, value);
+	input_page_write(info->input, offset, value);
+
+	if (offset == CSG_REQ) {
+		/* NO_MALI: Immediately acknowledge requests */
+		output_page_write(info->output, CSG_ACK, value);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_csf_fw_io_group_write);
+
+void kbase_csf_fw_io_group_write_mask(struct kbase_csf_fw_io *fw_io,
+				      const struct kbase_csf_cmd_stream_group_info *info,
+				      u32 offset, u32 value, u32 mask)
+{
+	const struct kbase_device *const kbdev = info->kbdev;
+
+	lockdep_assert_held(&fw_io->lock);
+
+	dev_dbg(kbdev->dev, "csg input w: reg %08x val %08x mask %08x\n", offset, value, mask);
+
+	/* NO_MALI: Go through existing function to capture writes */
+	kbase_csf_fw_io_group_write(fw_io, info, offset,
+				    (input_page_read(info->input, offset) & ~mask) |
+					    (value & mask));
+}
+KBASE_EXPORT_TEST_API(kbase_csf_fw_io_group_write_mask);
+
+u32 kbase_csf_fw_io_group_input_read(struct kbase_csf_fw_io *fw_io,
+				     const struct kbase_csf_cmd_stream_group_info *info, u32 offset)
+{
+	const struct kbase_device *const kbdev = info->kbdev;
+	u32 val;
+
+	lockdep_assert_held(&fw_io->lock);
+
+	val = input_page_read(info->input, offset);
+	dev_dbg(kbdev->dev, "csg input r: reg %08x val %08x\n", offset, val);
+
+	return val;
+}
+KBASE_EXPORT_TEST_API(kbase_csf_fw_io_group_input_read);
+
+u32 kbase_csf_fw_io_group_read(struct kbase_csf_fw_io *fw_io,
+			       const struct kbase_csf_cmd_stream_group_info *info, u32 offset)
+{
+	const struct kbase_device *const kbdev = info->kbdev;
+	u32 val;
+
+	lockdep_assert_held(&fw_io->lock);
+
+	val = output_page_read(info->output, offset);
+	dev_dbg(kbdev->dev, "csg output r: reg %08x val %08x\n", offset, val);
+
+	return val;
+}
+
+void kbase_csf_fw_io_stream_write(struct kbase_csf_fw_io *fw_io,
+				  const struct kbase_csf_cmd_stream_info *info, u32 offset,
+				  u32 value)
+{
+	const struct kbase_device *const kbdev = info->kbdev;
+
+	lockdep_assert_held(&fw_io->lock);
+
+	dev_dbg(kbdev->dev, "cs input w: reg %08x val %08x\n", offset, value);
+	input_page_write(info->input, offset, value);
+
+	if (offset == CS_REQ) {
+		/* NO_MALI: Immediately acknowledge requests */
+		output_page_write(info->output, CS_ACK, value);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_csf_fw_io_stream_write);
+
+void kbase_csf_fw_io_stream_write_mask(struct kbase_csf_fw_io *fw_io,
+				       const struct kbase_csf_cmd_stream_info *info, u32 offset,
+				       u32 value, u32 mask)
+{
+	const struct kbase_device *const kbdev = info->kbdev;
+
+	lockdep_assert_held(&fw_io->lock);
+
+	dev_dbg(kbdev->dev, "cs input w: reg %08x val %08x mask %08x\n", offset, value, mask);
+
+	/* NO_MALI: Go through existing function to capture writes */
+	kbase_csf_fw_io_stream_write(fw_io, info, offset,
+				     (input_page_read(info->input, offset) & ~mask) |
+					     (value & mask));
+}
+KBASE_EXPORT_TEST_API(kbase_csf_fw_io_stream_write_mask);
+
+u32 kbase_csf_fw_io_stream_input_read(struct kbase_csf_fw_io *fw_io,
+				      const struct kbase_csf_cmd_stream_info *info, u32 offset)
+{
+	const struct kbase_device *const kbdev = info->kbdev;
+	u32 val;
+
+	lockdep_assert_held(&fw_io->lock);
+
+	val = input_page_read(info->input, offset);
+	dev_dbg(kbdev->dev, "cs input r: reg %08x val %08x\n", offset, val);
+
+	return val;
+}
+KBASE_EXPORT_TEST_API(kbase_csf_fw_io_stream_input_read);
+
+u32 kbase_csf_fw_io_stream_read(struct kbase_csf_fw_io *fw_io,
+				const struct kbase_csf_cmd_stream_info *info, u32 offset)
+{
+	const struct kbase_device *const kbdev = info->kbdev;
+	u32 val;
+
+	lockdep_assert_held(&fw_io->lock);
+
+	val = output_page_read(info->output, offset);
+	dev_dbg(kbdev->dev, "cs output r: reg %08x val %08x\n", offset, val);
+
+	return val;
+}
+
+void kbase_csf_fw_io_set_status(struct kbase_csf_fw_io *fw_io,
+				enum kbase_csf_fw_io_status_bits status_bit)
+{
+	set_bit(status_bit, fw_io->status);
+}
+KBASE_EXPORT_TEST_API(kbase_csf_fw_io_set_status);
+
+void kbase_csf_fw_io_clear_status(struct kbase_csf_fw_io *fw_io,
+				  enum kbase_csf_fw_io_status_bits status_bit)
+{
+	clear_bit(status_bit, fw_io->status);
+}
+
+bool kbase_csf_fw_io_test_status(struct kbase_csf_fw_io *fw_io,
+				 enum kbase_csf_fw_io_status_bits status_bit)
+{
+	return test_bit(status_bit, fw_io->status);
+}
+KBASE_EXPORT_TEST_API(kbase_csf_fw_io_test_status);
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c
index 12a79b4852fb..fb181026719f 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -180,8 +180,9 @@ void kbase_csf_heap_context_allocator_term(struct kbase_csf_heap_context_allocat
 u64 kbase_csf_heap_context_allocator_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc)
 {
 	struct kbase_context *const kctx = ctx_alloc->kctx;
-	u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR |
-		    BASEP_MEM_NO_USER_FREE | BASE_MEM_PROT_CPU_RD;
+	base_mem_alloc_flags flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR |
+				     BASE_MEM_PROT_CPU_WR | BASEP_MEM_NO_USER_FREE |
+				     BASE_MEM_PROT_CPU_RD;
 	u64 nr_pages = PFN_UP(MAX_TILER_HEAPS * ctx_alloc->heap_context_size_aligned);
 	u64 heap_gpu_va = 0;
 
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c
index 76e42e847fc3..09c92f0bed4e 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -39,13 +39,7 @@
 static DEFINE_SPINLOCK(kbase_csf_fence_lock);
 #endif
 
-#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG
 #define FENCE_WAIT_TIMEOUT_MS 3000
-#endif
-
-static void kcpu_queue_process(struct kbase_kcpu_command_queue *kcpu_queue, bool drain_queue);
-
-static void kcpu_queue_process_worker(struct work_struct *data);
 
 static int kbase_kcpu_map_import_prepare(struct kbase_kcpu_command_queue *kcpu_queue,
 					 struct base_kcpu_command_import_info *import_info,
@@ -377,7 +371,7 @@ static int kbase_kcpu_jit_allocate_prepare(struct kbase_kcpu_command_queue *kcpu
 		goto out;
 	}
 
-	if (copy_from_user(info, data, sizeof(*info) * count) != 0) {
+	if (copy_from_user(info, data, size_mul(sizeof(*info), count)) != 0) {
 		ret = -EINVAL;
 		goto out_free;
 	}
@@ -445,6 +439,16 @@ static void kbase_kcpu_jit_allocate_finish(struct kbase_kcpu_command_queue *queu
 	kfree(cmd->info.jit_alloc.info);
 }
 
+static void enqueue_kcpuq_work(struct kbase_kcpu_command_queue *queue)
+{
+	struct kbase_context *const kctx = queue->kctx;
+
+	if (!atomic_read(&kctx->prioritized))
+		queue_work(kctx->csf.kcpu_queues.kcpu_wq, &queue->work);
+	else
+		kbase_csf_scheduler_enqueue_kcpuq_work(queue);
+}
+
 /**
  * kbase_kcpu_jit_retry_pending_allocs() - Retry blocked JIT_ALLOC commands
  *
@@ -464,9 +468,7 @@ static void kbase_kcpu_jit_retry_pending_allocs(struct kbase_context *kctx)
 	 * kbase_csf_kcpu_queue_context.jit_lock .
 	 */
 	list_for_each_entry(blocked_queue, &kctx->csf.kcpu_queues.jit_blocked_queues, jit_blocked)
-		queue_work(atomic_read(&kctx->prioritized) ? kctx->csf.kcpu_wq_high_prio :
-								   kctx->csf.kcpu_wq_normal_prio,
-			   &blocked_queue->work);
+		enqueue_kcpuq_work(blocked_queue);
 }
 
 static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue,
@@ -561,7 +563,7 @@ static int kbase_kcpu_jit_free_prepare(struct kbase_kcpu_command_queue *kcpu_que
 		goto out_free;
 	}
 
-	if (copy_from_user(ids, data, sizeof(*ids) * count)) {
+	if (copy_from_user(ids, data, size_mul(sizeof(*ids), count))) {
 		ret = -EINVAL;
 		goto out_free;
 	}
@@ -717,11 +719,8 @@ static int kbase_csf_queue_group_suspend_process(struct kbase_context *kctx,
 static enum kbase_csf_event_callback_action event_cqs_callback(void *param)
 {
 	struct kbase_kcpu_command_queue *kcpu_queue = (struct kbase_kcpu_command_queue *)param;
-	struct kbase_context *kctx = kcpu_queue->kctx;
 
-	queue_work(atomic_read(&kctx->prioritized) ? kctx->csf.kcpu_wq_high_prio :
-							   kctx->csf.kcpu_wq_normal_prio,
-		   &kcpu_queue->work);
+	enqueue_kcpuq_work(kcpu_queue);
 
 	return KBASE_CSF_EVENT_CALLBACK_KEEP;
 }
@@ -853,7 +852,8 @@ static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue,
 	if (!objs)
 		return -ENOMEM;
 
-	if (copy_from_user(objs, u64_to_user_ptr(cqs_wait_info->objs), nr_objs * sizeof(*objs))) {
+	if (copy_from_user(objs, u64_to_user_ptr(cqs_wait_info->objs),
+			   size_mul(nr_objs, sizeof(*objs)))) {
 		kfree(objs);
 		return -ENOMEM;
 	}
@@ -958,7 +958,8 @@ static int kbase_kcpu_cqs_set_prepare(struct kbase_kcpu_command_queue *kcpu_queu
 	if (!objs)
 		return -ENOMEM;
 
-	if (copy_from_user(objs, u64_to_user_ptr(cqs_set_info->objs), nr_objs * sizeof(*objs))) {
+	if (copy_from_user(objs, u64_to_user_ptr(cqs_set_info->objs),
+			   size_mul(nr_objs, sizeof(*objs)))) {
 		kfree(objs);
 		return -ENOMEM;
 	}
@@ -1116,7 +1117,7 @@ static int kbase_kcpu_cqs_wait_operation_prepare(
 		return -ENOMEM;
 
 	if (copy_from_user(objs, u64_to_user_ptr(cqs_wait_operation_info->objs),
-			   nr_objs * sizeof(*objs))) {
+			   size_mul(nr_objs, sizeof(*objs)))) {
 		kfree(objs);
 		return -ENOMEM;
 	}
@@ -1281,7 +1282,7 @@ static int kbase_kcpu_cqs_set_operation_prepare(
 		return -ENOMEM;
 
 	if (copy_from_user(objs, u64_to_user_ptr(cqs_set_operation_info->objs),
-			   nr_objs * sizeof(*objs))) {
+			   size_mul(nr_objs, sizeof(*objs)))) {
 		kfree(objs);
 		return -ENOMEM;
 	}
@@ -1322,9 +1323,7 @@ static void kbase_csf_fence_wait_callback(struct dma_fence *fence, struct dma_fe
 				  fence->seqno);
 
 	/* Resume kcpu command queue processing. */
-	queue_work(atomic_read(&kctx->prioritized) ? kctx->csf.kcpu_wq_high_prio :
-							   kctx->csf.kcpu_wq_normal_prio,
-		   &kcpu_queue->work);
+	enqueue_kcpuq_work(kcpu_queue);
 }
 
 static void kbasep_kcpu_fence_wait_cancel(struct kbase_kcpu_command_queue *kcpu_queue,
@@ -1360,7 +1359,6 @@ static void kbasep_kcpu_fence_wait_cancel(struct kbase_kcpu_command_queue *kcpu_
 	fence_info->fence = NULL;
 }
 
-#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG
 /**
  * fence_timeout_callback() - Timeout callback function for fence-wait
  *
@@ -1399,9 +1397,7 @@ static void fence_timeout_callback(struct timer_list *timer)
 	kbase_sync_fence_info_get(fence, &info);
 
 	if (info.status == 1) {
-		queue_work(atomic_read(&kctx->prioritized) ? kctx->csf.kcpu_wq_high_prio :
-								   kctx->csf.kcpu_wq_normal_prio,
-			   &kcpu_queue->work);
+		enqueue_kcpuq_work(kcpu_queue);
 	} else if (info.status == 0) {
 		dev_warn(kctx->kbdev->dev, "fence has not yet signalled in %ums",
 			 FENCE_WAIT_TIMEOUT_MS);
@@ -1430,7 +1426,6 @@ static void fence_wait_timeout_start(struct kbase_kcpu_command_queue *cmd)
 {
 	mod_timer(&cmd->fence_timeout, jiffies + msecs_to_jiffies(FENCE_WAIT_TIMEOUT_MS));
 }
-#endif
 
 /**
  * kbase_kcpu_fence_wait_process() - Process the kcpu fence wait command
@@ -1469,9 +1464,8 @@ static int kbase_kcpu_fence_wait_process(struct kbase_kcpu_command_queue *kcpu_q
 		fence_status = cb_err;
 		if (cb_err == 0) {
 			kcpu_queue->fence_wait_processed = true;
-#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG
-			fence_wait_timeout_start(kcpu_queue);
-#endif
+			if (IS_ENABLED(CONFIG_MALI_BIFROST_FENCE_DEBUG))
+				fence_wait_timeout_start(kcpu_queue);
 		} else if (cb_err == -ENOENT) {
 			fence_status = dma_fence_get_status(fence);
 			if (!fence_status) {
@@ -1692,9 +1686,7 @@ static void fence_signal_timeout_cb(struct timer_list *timer)
 		if (atomic_read(&kcpu_queue->fence_signal_pending_cnt) > 1)
 			fence_signal_timeout_start(kcpu_queue);
 
-		queue_work(atomic_read(&kctx->prioritized) ? kctx->csf.kcpu_wq_high_prio :
-								   kctx->csf.kcpu_wq_normal_prio,
-			   &kcpu_queue->timeout_work);
+		queue_work(kctx->csf.kcpu_queues.kcpu_wq, &kcpu_queue->timeout_work);
 	}
 }
 
@@ -1973,7 +1965,7 @@ static void kcpu_queue_process_worker(struct work_struct *data)
 		container_of(data, struct kbase_kcpu_command_queue, work);
 
 	mutex_lock(&queue->lock);
-	kcpu_queue_process(queue, false);
+	kbase_csf_kcpu_queue_process(queue, false);
 	mutex_unlock(&queue->lock);
 }
 
@@ -2006,7 +1998,7 @@ static int delete_queue(struct kbase_context *kctx, u32 id)
 		/* Drain the remaining work for this queue first and go past
 		 * all the waits.
 		 */
-		kcpu_queue_process(queue, true);
+		kbase_csf_kcpu_queue_process(queue, true);
 
 		/* All commands should have been processed */
 		WARN_ON(queue->num_pending_cmds);
@@ -2022,11 +2014,20 @@ static int delete_queue(struct kbase_context *kctx, u32 id)
 		mutex_unlock(&queue->lock);
 
 		cancel_work_sync(&queue->timeout_work);
+
+		/*
+		 * Drain a pending request to process this queue in
+		 * kbase_csf_scheduler_kthread() if any. By this point the
+		 * queue would be empty so this would be a no-op.
+		 */
+		kbase_csf_scheduler_wait_for_kthread_pending_work(kctx->kbdev,
+								  &queue->pending_kick);
+
 		cancel_work_sync(&queue->work);
 
 		mutex_destroy(&queue->lock);
 
-		kfree(queue);
+		vfree(queue);
 	} else {
 		dev_dbg(kctx->kbdev->dev, "Attempt to delete a non-existent KCPU queue");
 		mutex_unlock(&kctx->csf.kcpu_queues.lock);
@@ -2079,7 +2080,7 @@ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END(struct kbase_device *kbde
 	KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END(kbdev, queue);
 }
 
-static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, bool drain_queue)
+void kbase_csf_kcpu_queue_process(struct kbase_kcpu_command_queue *queue, bool drain_queue)
 {
 	struct kbase_device *kbdev = queue->kctx->kbdev;
 	bool process_next = true;
@@ -2199,10 +2200,10 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, bool drai
 				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START(kbdev,
 											   queue);
 
-				kbase_gpu_vm_lock(queue->kctx);
+				kbase_gpu_vm_lock_with_pmode_sync(queue->kctx);
 				meta = kbase_sticky_resource_acquire(queue->kctx,
-								     cmd->info.import.gpu_va);
-				kbase_gpu_vm_unlock(queue->kctx);
+								     cmd->info.import.gpu_va, NULL);
+				kbase_gpu_vm_unlock_with_pmode_sync(queue->kctx);
 
 				if (meta == NULL) {
 					queue->has_error = true;
@@ -2219,10 +2220,10 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, bool drai
 
 			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START(kbdev, queue);
 
-			kbase_gpu_vm_lock(queue->kctx);
+			kbase_gpu_vm_lock_with_pmode_sync(queue->kctx);
 			ret = kbase_sticky_resource_release(queue->kctx, NULL,
 							    cmd->info.import.gpu_va);
-			kbase_gpu_vm_unlock(queue->kctx);
+			kbase_gpu_vm_unlock_with_pmode_sync(queue->kctx);
 
 			if (!ret) {
 				queue->has_error = true;
@@ -2240,10 +2241,10 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, bool drai
 			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START(kbdev,
 											   queue);
 
-			kbase_gpu_vm_lock(queue->kctx);
+			kbase_gpu_vm_lock_with_pmode_sync(queue->kctx);
 			ret = kbase_sticky_resource_release_force(queue->kctx, NULL,
 								  cmd->info.import.gpu_va);
-			kbase_gpu_vm_unlock(queue->kctx);
+			kbase_gpu_vm_unlock_with_pmode_sync(queue->kctx);
 
 			if (!ret) {
 				queue->has_error = true;
@@ -2642,7 +2643,7 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
 		}
 
 		queue->num_pending_cmds += enq->nr_commands;
-		kcpu_queue_process(queue, false);
+		kbase_csf_kcpu_queue_process(queue, false);
 	}
 
 out:
@@ -2653,23 +2654,14 @@ out:
 
 int kbase_csf_kcpu_queue_context_init(struct kbase_context *kctx)
 {
-	kctx->csf.kcpu_wq_high_prio = alloc_workqueue("mali_kcpu_wq_%i_high_prio",
-						      WQ_UNBOUND | WQ_HIGHPRI, 0, kctx->tgid);
-	if (kctx->csf.kcpu_wq_high_prio == NULL) {
+	kctx->csf.kcpu_queues.kcpu_wq =
+		alloc_workqueue("mali_kcpu_wq_%i_%i", 0, 0, kctx->tgid, kctx->id);
+	if (kctx->csf.kcpu_queues.kcpu_wq == NULL) {
 		dev_err(kctx->kbdev->dev,
 			"Failed to initialize KCPU queue high-priority workqueue");
 		return -ENOMEM;
 	}
 
-	kctx->csf.kcpu_wq_normal_prio =
-		alloc_workqueue("mali_kcpu_wq_%i_normal_prio", 0, 0, kctx->tgid);
-	if (kctx->csf.kcpu_wq_normal_prio == NULL) {
-		dev_err(kctx->kbdev->dev,
-			"Failed to initialize KCPU queue normal-priority workqueue");
-		destroy_workqueue(kctx->csf.kcpu_wq_high_prio);
-		return -ENOMEM;
-	}
-
 	mutex_init(&kctx->csf.kcpu_queues.lock);
 
 	return 0;
@@ -2688,8 +2680,7 @@ void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx)
 
 	mutex_destroy(&kctx->csf.kcpu_queues.lock);
 
-	destroy_workqueue(kctx->csf.kcpu_wq_normal_prio);
-	destroy_workqueue(kctx->csf.kcpu_wq_high_prio);
+	destroy_workqueue(kctx->csf.kcpu_queues.kcpu_wq);
 }
 KBASE_EXPORT_TEST_API(kbase_csf_kcpu_queue_context_term);
 
@@ -2699,15 +2690,42 @@ int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx,
 	return delete_queue(kctx, (u32)del->id);
 }
 
+static struct kbase_kcpu_dma_fence_meta *
+kbase_csf_kcpu_queue_metadata_new(struct kbase_context *kctx, u64 fence_context)
+{
+	int n;
+	struct kbase_kcpu_dma_fence_meta *metadata = kzalloc(sizeof(*metadata), GFP_KERNEL);
+
+	if (!metadata)
+		goto early_ret;
+
+	*metadata = (struct kbase_kcpu_dma_fence_meta){
+		.kbdev = kctx->kbdev,
+		.kctx_id = kctx->id,
+	};
+
+	/* Please update MAX_TIMELINE_NAME macro when making changes to the string. */
+	n = scnprintf(metadata->timeline_name, MAX_TIMELINE_NAME, "%u-%d_%u-%llu-kcpu",
+		      kctx->kbdev->id, kctx->tgid, kctx->id, fence_context);
+	if (WARN_ON(n >= MAX_TIMELINE_NAME)) {
+		kfree(metadata);
+		metadata = NULL;
+		goto early_ret;
+	}
+
+	kbase_refcount_set(&metadata->refcount, 1);
+
+early_ret:
+	return metadata;
+}
+KBASE_ALLOW_ERROR_INJECTION_TEST_API(kbase_csf_kcpu_queue_metadata_new, ERRNO_NULL);
+
 int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, struct kbase_ioctl_kcpu_queue_new *newq)
 {
 	struct kbase_kcpu_command_queue *queue;
-	int idx;
-	int n;
-	int ret = 0;
-#if IS_ENABLED(CONFIG_SYNC_FILE)
 	struct kbase_kcpu_dma_fence_meta *metadata;
-#endif
+	int idx;
+	int ret = 0;
 	/* The queue id is of u8 type and we use the index of the kcpu_queues
 	 * array as an id, so the number of elements in the array can't be
 	 * more than 256.
@@ -2727,54 +2745,48 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, struct kbase_ioctl_kcpu
 		goto out;
 	}
 
-	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
-
+	queue = vzalloc(sizeof(*queue));
 	if (!queue) {
 		ret = -ENOMEM;
 		goto out;
 	}
 
+	*queue = (struct kbase_kcpu_command_queue)
+	{
+		.kctx = kctx, .start_offset = 0, .num_pending_cmds = 0, .enqueue_failed = false,
+		.command_started = false, .has_error = false, .id = idx,
+#if IS_ENABLED(CONFIG_SYNC_FILE)
+		.fence_context = dma_fence_context_alloc(1), .fence_seqno = 0,
+		.fence_wait_processed = false,
+#endif /* IS_ENABLED(CONFIG_SYNC_FILE) */
+	};
+
+	mutex_init(&queue->lock);
+	INIT_WORK(&queue->work, kcpu_queue_process_worker);
+	INIT_LIST_HEAD(&queue->high_prio_work);
+	atomic_set(&queue->pending_kick, 0);
+	INIT_WORK(&queue->timeout_work, kcpu_queue_timeout_worker);
+	INIT_LIST_HEAD(&queue->jit_blocked);
+
+	if (IS_ENABLED(CONFIG_SYNC_FILE)) {
+		metadata = kbase_csf_kcpu_queue_metadata_new(kctx, queue->fence_context);
+		if (!metadata) {
+			vfree(queue);
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		queue->metadata = metadata;
+		atomic_inc(&kctx->kbdev->live_fence_metadata);
+		atomic_set(&queue->fence_signal_pending_cnt, 0);
+		kbase_timer_setup(&queue->fence_signal_timeout, fence_signal_timeout_cb);
+	}
+
+	if (IS_ENABLED(CONFIG_MALI_BIFROST_FENCE_DEBUG))
+		kbase_timer_setup(&queue->fence_timeout, fence_timeout_callback);
+
 	bitmap_set(kctx->csf.kcpu_queues.in_use, (unsigned int)idx, 1);
 	kctx->csf.kcpu_queues.array[idx] = queue;
-	mutex_init(&queue->lock);
-	queue->kctx = kctx;
-	queue->start_offset = 0;
-	queue->num_pending_cmds = 0;
-#if IS_ENABLED(CONFIG_SYNC_FILE)
-	queue->fence_context = dma_fence_context_alloc(1);
-	queue->fence_seqno = 0;
-	queue->fence_wait_processed = false;
-
-	metadata = kzalloc(sizeof(*metadata), GFP_KERNEL);
-	if (!metadata) {
-		kfree(queue);
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	metadata->kbdev = kctx->kbdev;
-	metadata->kctx_id = kctx->id;
-	n = snprintf(metadata->timeline_name, MAX_TIMELINE_NAME, "%u-%d_%u-%llu-kcpu",
-		     kctx->kbdev->id, kctx->tgid, kctx->id, queue->fence_context);
-	if (WARN_ON(n >= MAX_TIMELINE_NAME)) {
-		kfree(queue);
-		kfree(metadata);
-		ret = -EINVAL;
-		goto out;
-	}
-
-	kbase_refcount_set(&metadata->refcount, 1);
-	queue->metadata = metadata;
-	atomic_inc(&kctx->kbdev->live_fence_metadata);
-#endif /* CONFIG_SYNC_FILE */
-	queue->enqueue_failed = false;
-	queue->command_started = false;
-	INIT_LIST_HEAD(&queue->jit_blocked);
-	queue->has_error = false;
-	INIT_WORK(&queue->work, kcpu_queue_process_worker);
-	INIT_WORK(&queue->timeout_work, kcpu_queue_timeout_worker);
-	queue->id = idx;
-
 	newq->id = idx;
 
 	/* Fire the tracepoint with the mutex held to enforce correct ordering
@@ -2784,14 +2796,6 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, struct kbase_ioctl_kcpu
 					      queue->num_pending_cmds);
 
 	KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_CREATE, queue, queue->fence_context, 0);
-#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG
-	kbase_timer_setup(&queue->fence_timeout, fence_timeout_callback);
-#endif
-
-#if IS_ENABLED(CONFIG_SYNC_FILE)
-	atomic_set(&queue->fence_signal_pending_cnt, 0);
-	kbase_timer_setup(&queue->fence_signal_timeout, fence_signal_timeout_cb);
-#endif
 out:
 	mutex_unlock(&kctx->csf.kcpu_queues.lock);
 
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h
index d1f18ed5caca..291509bef5a6 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h
@@ -243,7 +243,19 @@ struct kbase_kcpu_command {
  * @work:			struct work_struct which contains a pointer to
  *				the function which handles processing of kcpu
  *				commands enqueued into a kcpu command queue;
- *				part of kernel API for processing workqueues
+ *				part of kernel API for processing workqueues.
+ *				This would be used if the context is not
+ *				prioritised, otherwise it would be handled by
+ *				kbase_csf_scheduler_kthread().
+ * @high_prio_work:		A counterpart to @work, this queue would be
+ *				added to a list to be processed by
+ *				kbase_csf_scheduler_kthread() if it is
+ *				prioritised.
+ * @pending_kick:		Indicates that kbase_csf_scheduler_kthread()
+ *				should re-evaluate pending commands for this
+ *				queue. This would be set to false when the work
+ *				is done. This is used mainly for
+ *				synchronisation with queue termination.
  * @timeout_work:		struct work_struct which contains a pointer to the
  *				function which handles post-timeout actions
  *				queue when a fence signal timeout occurs.
@@ -287,6 +299,8 @@ struct kbase_kcpu_command_queue {
 	struct kbase_context *kctx;
 	struct kbase_kcpu_command commands[KBASEP_KCPU_QUEUE_SIZE];
 	struct work_struct work;
+	struct list_head high_prio_work;
+	atomic_t pending_kick;
 	struct work_struct timeout_work;
 	u8 start_offset;
 	u8 id;
@@ -299,9 +313,7 @@ struct kbase_kcpu_command_queue {
 	bool command_started;
 	struct list_head jit_blocked;
 	bool has_error;
-#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG
 	struct timer_list fence_timeout;
-#endif /* CONFIG_MALI_BIFROST_FENCE_DEBUG */
 #if IS_ENABLED(CONFIG_SYNC_FILE)
 	struct kbase_kcpu_dma_fence_meta *metadata;
 #endif /* CONFIG_SYNC_FILE */
@@ -334,6 +346,18 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, struct kbase_ioctl_kcpu
 int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx,
 				struct kbase_ioctl_kcpu_queue_delete *del);
 
+/**
+ * kbase_csf_kcpu_queue_process - Proces pending KCPU queue commands
+ *
+ * @queue:		The queue to process pending commands for
+ * @drain_queue:	Whether to skip all blocking commands in the queue.
+ *			This is expected to be set to true on queue
+ *			termination.
+ *
+ * Return: 0 if successful or a negative error code on failure.
+ */
+void kbase_csf_kcpu_queue_process(struct kbase_kcpu_command_queue *queue, bool drain_queue);
+
 /**
  * kbase_csf_kcpu_queue_enqueue - Enqueue a KCPU command into a KCPU command
  *				  queue.
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h
index d01f3070cf5b..7e96a9d01fc7 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -172,6 +172,11 @@
 #define CSG_STATUS_EP_CURRENT 0x0010 /* () Endpoint allocation status register */
 #define CSG_STATUS_EP_REQ 0x0014 /* () Endpoint request status register */
 #define CSG_RESOURCE_DEP 0x001C /* () Current resource dependencies */
+/* TODO: GPUCORE-xxxx: Remove after spec alignment, use 0x1C as CSG_RESOURCE_DEP is deprecated*/
+/* CSG_OUTPUT_BLOCK register offsets */
+#ifndef CSG_PROGRESS_TIMER_STATE
+#define CSG_PROGRESS_TIMER_STATE 0x001C /* () Current resource status */
+#endif
 
 /* GLB_CONTROL_BLOCK register offsets */
 #define GLB_VERSION 0x0000 /* () Global interface version */
@@ -250,7 +255,7 @@
 
 #define GLB_ACK 0x0000 /* () Global acknowledge */
 #define GLB_DB_ACK 0x0008 /* () Global doorbell acknowledge */
-#define GLB_HALT_STATUS 0x0010 /* () Global halt status */
+#define GLB_FATAL_STATUS 0x0010 /* () Global fatal error status */
 #define GLB_PRFCNT_STATUS 0x0014 /* () Performance counter status */
 #define GLB_PRFCNT_INSERT 0x0018 /* () Performance counter buffer insert index */
 #define GLB_DEBUG_FWUTF_RESULT GLB_DEBUG_ARG_OUT0 /* () Firmware debug test result */
@@ -1243,6 +1248,21 @@
 	  CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK))
 
 
+/* CSG_PROGRESS_TIMER_STATE register */
+#ifndef CSG_PROGRESS_TIMER_STATE_GET
+#define CSG_PROGRESS_TIMER_STATE_SHIFT 0
+#define CSG_PROGRESS_TIMER_STATE_MASK ((u32)0xFFFFFFFF << CSG_PROGRESS_TIMER_STATE_SHIFT)
+#define CSG_PROGRESS_TIMER_STATE_GET(reg_val) \
+	(((reg_val)&CSG_PROGRESS_TIMER_STATE_MASK) >> CSG_PROGRESS_TIMER_STATE_SHIFT)
+#define CSG_PROGRESS_TIMER_STATE_SET(reg_val, value)    \
+	(((reg_val) & ~CSG_PROGRESS_TIMER_STATE_MASK) | \
+	 (((value) << CSG_PROGRESS_TIMER_STATE_SHIFT) & CSG_PROGRESS_TIMER_STATE_MASK))
+/* CSG_PROGRESS_TIMER_STATE values */
+#define CSG_PROGRESS_TIMER_STATE_COMPUTE 0x0
+#define CSG_PROGRESS_TIMER_STATE_FRAGMENT 0x1
+#define CSG_PROGRESS_TIMER_STATE_TILER 0x2
+#define CSG_PROGRESS_TIMER_STATE_NEURAL 0x3
+#endif
 /* End of CSG_OUTPUT_BLOCK register set definitions */
 
 /* STREAM_CONTROL_BLOCK register set definitions */
@@ -1380,6 +1400,13 @@
 #define GLB_REQ_SLEEP_SET(reg_val, value)    \
 	(((reg_val) & ~GLB_REQ_SLEEP_MASK) | \
 	 (((value) << GLB_REQ_SLEEP_SHIFT) & GLB_REQ_SLEEP_MASK))
+#define GLB_REQ_CFG_EVICTION_TIMER_SHIFT 16
+#define GLB_REQ_CFG_EVICTION_TIMER_MASK (0x1 << GLB_REQ_CFG_EVICTION_TIMER_SHIFT)
+#define GLB_REQ_CFG_EVICTION_TIMER_GET(reg_val) \
+	(((reg_val)&GLB_REQ_CFG_EVICTION_TIMER_MASK) >> GLB_REQ_CFG_EVICTION_TIMER_SHIFT)
+#define GLB_REQ_CFG_EVICTION_TIMER_SET(reg_val, value)    \
+	(((reg_val) & ~GLB_REQ_CFG_EVICTION_TIMER_MASK) | \
+	 (((value) << GLB_REQ_CFG_EVICTION_TIMER_SHIFT) & GLB_REQ_CFG_EVICTION_TIMER_MASK))
 #define GLB_REQ_INACTIVE_COMPUTE_SHIFT 20
 #define GLB_REQ_INACTIVE_COMPUTE_MASK (0x1 << GLB_REQ_INACTIVE_COMPUTE_SHIFT)
 #define GLB_REQ_INACTIVE_COMPUTE_GET(reg_val) \
@@ -1422,6 +1449,12 @@
 #define GLB_REQ_PRFCNT_OVERFLOW_SET(reg_val, value)    \
 	(((reg_val) & ~GLB_REQ_PRFCNT_OVERFLOW_MASK) | \
 	 (((value) << GLB_REQ_PRFCNT_OVERFLOW_SHIFT) & GLB_REQ_PRFCNT_OVERFLOW_MASK))
+#define GLB_ACK_FATAL_SHIFT GPU_U(27)
+#define GLB_ACK_FATAL_MASK (GPU_U(0x1) << GLB_ACK_FATAL_SHIFT)
+#define GLB_ACK_FATAL_GET(reg_val) (((reg_val)&GLB_ACK_FATAL_MASK) >> GLB_ACK_FATAL_SHIFT)
+#define GLB_ACK_FATAL_SET(reg_val, value)     \
+	(~(~(reg_val) | GLB_ACK_FATAL_MASK) | \
+	 (((value) << GLB_ACK_FATAL_SHIFT) & GLB_ACK_FATAL_MASK))
 #define GLB_REQ_DEBUG_CSF_REQ_SHIFT 30
 #define GLB_REQ_DEBUG_CSF_REQ_MASK (0x1 << GLB_REQ_DEBUG_CSF_REQ_SHIFT)
 #define GLB_REQ_DEBUG_CSF_REQ_GET(reg_val) \
@@ -1518,6 +1551,17 @@
 	(((reg_val) & ~GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK) | \
 	 (((value) << GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT) & \
 	  GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK))
+#define GLB_ACK_IRQ_MASK_ITER_TRACE_ENABLE_SHIFT 11
+#define GLB_ACK_IRQ_MASK_ITER_TRACE_ENABLE_MASK (0x1 << GLB_ACK_IRQ_MASK_ITER_TRACE_ENABLE_SHIFT)
+#define GLB_ACK_IRQ_MASK_CFG_EVICTION_TIMER_SHIFT 16
+#define GLB_ACK_IRQ_MASK_CFG_EVICTION_TIMER_MASK (0x1 << GLB_ACK_IRQ_MASK_CFG_EVICTION_TIMER_SHIFT)
+#define GLB_ACK_IRQ_MASK_CFG_EVICTION_TIMER_GET(reg_val)         \
+	(((reg_val)&GLB_ACK_IRQ_MASK_CFG_EVICTION_TIMER_MASK) >> \
+	 GLB_ACK_IRQ_MASK_CFG_EVICTION_TIMER_SHIFT)
+#define GLB_ACK_IRQ_MASK_CFG_EVICTION_TIMER_SET(reg_val, value)    \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_EVICTION_TIMER_MASK) | \
+	 (((value) << GLB_ACK_IRQ_MASK_CFG_EVICTION_TIMER_SHIFT) & \
+	  GLB_ACK_IRQ_MASK_CFG_EVICTION_TIMER_MASK))
 #define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT 20
 #define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT)
 #define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_GET(reg_val)         \
@@ -1629,6 +1673,45 @@
 	  GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_MASK))
 #endif /* End of GLB_PWROFF_TIMER_CONFIG values */
 
+/* GLB_EVICTION_TIMER register */
+#ifndef GLB_EVICTION_TIMER
+#define GLB_EVICTION_TIMER 0x0090
+#define GLB_EVICTION_TIMER_TIMEOUT_SHIFT (0)
+#define GLB_EVICTION_TIMER_TIMEOUT_MASK ((0x7FFFFFFF) << GLB_EVICTION_TIMER_TIMEOUT_SHIFT)
+#define GLB_EVICTION_TIMER_TIMEOUT_GET(reg_val) \
+	(((reg_val)&GLB_EVICTION_TIMER_TIMEOUT_MASK) >> GLB_EVICTION_TIMER_TIMEOUT_SHIFT)
+#define GLB_EVICTION_TIMER_TIMEOUT_SET(reg_val, value)    \
+	(((reg_val) & ~GLB_EVICTION_TIMER_TIMEOUT_MASK) | \
+	 (((value) << GLB_EVICTION_TIMER_TIMEOUT_SHIFT) & GLB_EVICTION_TIMER_TIMEOUT_MASK))
+#define GLB_EVICTION_TIMER_TIMER_SOURCE_SHIFT (31)
+#define GLB_EVICTION_TIMER_TIMER_SOURCE_MASK ((0x1) << GLB_EVICTION_TIMER_TIMER_SOURCE_SHIFT)
+#define GLB_EVICTION_TIMER_TIMER_SOURCE_GET(reg_val) \
+	(((reg_val)&GLB_EVICTION_TIMER_TIMER_SOURCE_MASK) >> GLB_EVICTION_TIMER_TIMER_SOURCE_SHIFT)
+#define GLB_EVICTION_TIMER_TIMER_SOURCE_SET(reg_val, value)    \
+	(((reg_val) & ~GLB_EVICTION_TIMER_TIMER_SOURCE_MASK) | \
+	 (((value) << GLB_EVICTION_TIMER_TIMER_SOURCE_SHIFT) & \
+	  GLB_EVICTION_TIMER_TIMER_SOURCE_MASK))
+/* GLB_EVICTION_TIMER_TIMER_SOURCE values */
+#define GLB_EVICTION_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP 0x0U
+#define GLB_EVICTION_TIMER_TIMER_SOURCE_GPU_COUNTER 0x1U
+/* End of GLB_EVICTION_TIMER_TIMER_SOURCE values */
+#endif /* End of GLB_EVICTION_TIMER */
+
+/* GLB_EVICTION_TIMER_CONFIG register */
+#ifndef GLB_EVICTION_TIMER_CONFIG
+#define GLB_EVICTION_TIMER_CONFIG 0x0094 /* () Configuration fields for GLB_EVICTION_TIMER */
+#define GLB_EVICTION_TIMER_CONFIG_NO_MODIFIER_SHIFT 0
+#define GLB_EVICTION_TIMER_CONFIG_NO_MODIFIER_MASK \
+	(0x1 << GLB_EVICTION_TIMER_CONFIG_NO_MODIFIER_SHIFT)
+#define GLB_EVICTION_TIMER_CONFIG_NO_MODIFIER_GET(reg_val)         \
+	(((reg_val)&GLB_EVICTION_TIMER_CONFIG_NO_MODIFIER_MASK) >> \
+	 GLB_EVICTION_TIMER_CONFIG_NO_MODIFIER_SHIFT)
+#define GLB_EVICTION_TIMER_CONFIG_NO_MODIFIER_SET(reg_val, value)    \
+	(((reg_val) & ~GLB_EVICTION_TIMER_CONFIG_NO_MODIFIER_MASK) | \
+	 (((value) << GLB_EVICTION_TIMER_CONFIG_NO_MODIFIER_SHIFT) & \
+	  GLB_EVICTION_TIMER_CONFIG_NO_MODIFIER_MASK))
+#endif /* End of GLB_EVICTION_TIMER_CONFIG values */
+
 /* GLB_ALLOC_EN register */
 #define GLB_ALLOC_EN_MASK_SHIFT 0
 #define GLB_ALLOC_EN_MASK_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << GLB_ALLOC_EN_MASK_SHIFT)
@@ -1711,6 +1794,15 @@
 	(((reg_val) & ~GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_MASK) | \
 	 (((value) << GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_SHIFT) & \
 	  GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_MASK))
+#define GLB_IDLE_TIMER_CONFIG_SLEEP_ON_IDLE_SHIFT 9
+#define GLB_IDLE_TIMER_CONFIG_SLEEP_ON_IDLE_MASK (0x1 << GLB_IDLE_TIMER_CONFIG_SLEEP_ON_IDLE_SHIFT)
+#define GLB_IDLE_TIMER_CONFIG_SLEEP_ON_IDLE_GET(reg_val)         \
+	(((reg_val)&GLB_IDLE_TIMER_CONFIG_SLEEP_ON_IDLE_MASK) >> \
+	 GLB_IDLE_TIMER_CONFIG_SLEEP_ON_IDLE_SHIFT)
+#define GLB_IDLE_TIMER_CONFIG_SLEEP_ON_IDLE_SET(reg_val, value)    \
+	(((reg_val) & ~GLB_IDLE_TIMER_CONFIG_SLEEP_ON_IDLE_MASK) | \
+	 (((value) << GLB_IDLE_TIMER_CONFIG_SLEEP_ON_IDLE_SHIFT) & \
+	  GLB_IDLE_TIMER_CONFIG_SLEEP_ON_IDLE_MASK))
 #endif /* End of GLB_IDLE_TIMER_CONFIG values */
 
 /* GLB_INSTR_FEATURES register */
@@ -1822,6 +1914,20 @@
 	(((reg_val) & ~GLB_DEBUG_REQ_RUN_MODE_MASK) | \
 	 (((value) << GLB_DEBUG_REQ_RUN_MODE_SHIFT) & GLB_DEBUG_REQ_RUN_MODE_MASK))
 
+/* GLB_FATAL_STATUS register */
+#define GLB_FATAL_STATUS_VALUE_SHIFT GPU_U(0)
+#define GLB_FATAL_STATUS_VALUE_MASK (GPU_U(0xFFFFFFFF) << GLB_FATAL_STATUS_VALUE_SHIFT)
+#define GLB_FATAL_STATUS_VALUE_GET(reg_val) \
+	(((reg_val)&GLB_FATAL_STATUS_VALUE_MASK) >> GLB_FATAL_STATUS_VALUE_SHIFT)
+
+enum glb_fatal_status {
+	GLB_FATAL_STATUS_VALUE_OK,
+	GLB_FATAL_STATUS_VALUE_ASSERT,
+	GLB_FATAL_STATUS_VALUE_UNEXPECTED_EXCEPTION,
+	GLB_FATAL_STATUS_VALUE_HANG,
+	GLB_FATAL_STATUS_VALUE_COUNT
+};
+
 /* GLB_DEBUG_ACK register */
 #define GLB_DEBUG_ACK_DEBUG_RUN_SHIFT GPU_U(23)
 #define GLB_DEBUG_ACK_DEBUG_RUN_MASK (GPU_U(0x1) << GLB_DEBUG_ACK_DEBUG_RUN_SHIFT)
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c
index 240397ebc16d..ffd27318cba3 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -224,8 +224,11 @@ static void kbase_csf_reset_end_hw_access(struct kbase_device *kbdev, int err_du
 
 static void kbase_csf_debug_dump_registers(struct kbase_device *kbdev)
 {
+	unsigned long flags;
+
 	kbase_io_history_dump(kbdev);
 
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	dev_err(kbdev->dev, "Register state:");
 	dev_err(kbdev->dev, "  GPU_IRQ_RAWSTAT=0x%08x  GPU_STATUS=0x%08x MCU_STATUS=0x%08x",
 		kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_IRQ_RAWSTAT)),
@@ -251,6 +254,7 @@ static void kbase_csf_debug_dump_registers(struct kbase_device *kbdev)
 			kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(TILER_CONFIG)));
 	}
 
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
 
 /**
@@ -396,6 +400,7 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev, bool firmware_ini
 	 */
 	if (likely(firmware_inited))
 		kbase_csf_scheduler_reset(kbdev);
+
 	cancel_work_sync(&kbdev->csf.firmware_reload_work);
 
 	dev_dbg(kbdev->dev, "Disable GPU hardware counters.\n");
@@ -403,6 +408,7 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev, bool firmware_ini
 	kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
 
 	ret = kbase_csf_reset_gpu_once(kbdev, firmware_inited, silent);
+
 	if (ret == SOFT_RESET_FAILED) {
 		dev_err(kbdev->dev, "Soft-reset failed");
 		goto err;
@@ -490,6 +496,11 @@ static void kbase_csf_reset_gpu_worker(struct work_struct *data)
 
 bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags)
 {
+	if (kbase_pm_is_gpu_lost(kbdev)) {
+		/* GPU access has been removed, reset will be done by Arbiter instead */
+		return false;
+	}
+
 	if (flags & RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)
 		kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface);
 
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c
index 81ddeb667d06..cd6abd62f6c5 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -36,11 +36,13 @@
 #include "mali_kbase_csf_tiler_heap_reclaim.h"
 #include "mali_kbase_csf_mcu_shared_reg.h"
 #include <linux/version_compat_defs.h>
+#include <hwcnt/mali_kbase_hwcnt_context.h>
 #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
 #include <mali_kbase_gpu_metrics.h>
 #include <csf/mali_kbase_csf_trace_buffer.h>
 #endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
 
+
 /* Value to indicate that a queue group is not groups_to_schedule list */
 #define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX)
 
@@ -84,13 +86,19 @@ scheduler_get_protm_enter_async_group(struct kbase_device *const kbdev,
 				      struct kbase_queue_group *const group);
 static struct kbase_queue_group *get_tock_top_group(struct kbase_csf_scheduler *const scheduler);
 static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev);
-static int suspend_active_queue_groups(struct kbase_device *kbdev, unsigned long *slot_mask);
+static int suspend_active_queue_groups(struct kbase_device *kbdev, unsigned long *slot_mask,
+				       bool reset);
 static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, bool system_suspend);
 static void schedule_in_cycle(struct kbase_queue_group *group, bool force);
 static bool queue_group_scheduled_locked(struct kbase_queue_group *group);
 
 #define kctx_as_enabled(kctx) (!kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT))
 
+bool is_gpu_level_suspend_supported(struct kbase_device *const kbdev)
+{
+	return false;
+}
+
 #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
 /**
  * gpu_metrics_ctx_init() - Take a reference on GPU metrics context if it exists,
@@ -119,7 +127,7 @@ static inline int gpu_metrics_ctx_init(struct kbase_context *kctx)
 	put_cred(cred);
 
 	/* Return early if this is not a Userspace created context */
-	if (unlikely(!kctx->kfile))
+	if (unlikely(!kctx->filp))
 		return 0;
 
 	/* Serialize against the other threads trying to create/destroy Kbase contexts. */
@@ -156,7 +164,7 @@ static inline int gpu_metrics_ctx_init(struct kbase_context *kctx)
 static inline void gpu_metrics_ctx_term(struct kbase_context *kctx)
 {
 	/* Return early if this is not a Userspace created context */
-	if (unlikely(!kctx->kfile))
+	if (unlikely(!kctx->filp))
 		return;
 
 	/* Serialize against the other threads trying to create/destroy Kbase contexts. */
@@ -458,11 +466,14 @@ static void wait_for_dump_complete_on_group_deschedule(struct kbase_queue_group
  *
  * This function notifies the Userspace client waiting for the faults and wait
  * for the Client to complete the dumping.
- * The function is called only from Scheduling tick/tock when a request sent by
- * the Scheduler to FW times out or from the protm event work item of the group
- * when the protected mode entry request times out.
- * In the latter case there is no wait done as scheduler lock would be released
- * immediately. In the former case the function waits and releases the scheduler
+ * The function is mainly called from Scheduling tick/tock when a request sent by
+ * the Scheduler to FW times out. It can be called outside the tick/tock when timeout
+ * happens in the following 3 cases :-
+ * - Entry to protected mode is initiated from protm event work item.
+ * - Forced exit from protected mode is triggered when GPU queue of an on-slot group is kicked.
+ * - CSG termination request is sent when Userspace tries to delete the queue group.
+ * In the latter 3 cases there is no wait done as scheduler lock would be released
+ * immediately. In the tick/tock case the function waits and releases the scheduler
  * lock before the wait. It has been ensured that the Scheduler view of the groups
  * won't change meanwhile, so no group can enter/exit the Scheduler, become
  * runnable or go off slot.
@@ -478,10 +489,9 @@ static void schedule_actions_trigger_df(struct kbase_device *kbdev, struct kbase
 	if (!kbase_debug_csf_fault_notify(kbdev, kctx, error))
 		return;
 
-	if (unlikely(scheduler->state != SCHED_BUSY)) {
-		WARN_ON(error != DF_PROTECTED_MODE_ENTRY_FAILURE);
+	/* Return early if the function was called outside the tick/tock */
+	if (unlikely(scheduler->state != SCHED_BUSY))
 		return;
-	}
 
 	mutex_unlock(&scheduler->lock);
 	kbase_debug_csf_fault_wait_completion(kbdev);
@@ -788,19 +798,20 @@ static void update_on_slot_queues_offsets(struct kbase_device *kbdev)
 static void enqueue_gpu_idle_work(struct kbase_csf_scheduler *const scheduler)
 {
 	atomic_set(&scheduler->gpu_no_longer_idle, false);
-	queue_work(scheduler->idle_wq, &scheduler->gpu_idle_work);
+	atomic_inc(&scheduler->pending_gpu_idle_work);
+	complete(&scheduler->kthread_signal);
 }
 
-bool kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev)
+void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev)
 {
 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
 	bool can_suspend_on_idle;
-	bool invoke_pm_state_machine = false;
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 	lockdep_assert_held(&scheduler->interrupt_lock);
 
-	can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev);
+	can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev) &&
+			      !kbase_pm_is_mcu_inactive(kbdev, kbdev->pm.backend.mcu_state);
 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_EVENT_CAN_SUSPEND, NULL,
 			 (((u64)can_suspend_on_idle) << 32));
 
@@ -812,23 +823,6 @@ bool kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev)
 		scheduler->fast_gpu_idle_handling = (kbdev->csf.gpu_idle_hysteresis_ns == 0) ||
 						    !kbase_csf_scheduler_all_csgs_idle(kbdev);
 
-		/* If GPU idle event occurred after the runtime suspend was aborted due to
-		 * DB_MIRROR irq then it suggests that Userspace submission didn't make GPU
-		 * non-idle. So the planned resumption of scheduling can be cancelled and
-		 * MCU can be put back to sleep state to re-trigger the runtime suspend.
-		 */
-		if (unlikely(kbdev->pm.backend.exit_gpu_sleep_mode &&
-			     kbdev->pm.backend.runtime_suspend_abort_reason ==
-				     ABORT_REASON_DB_MIRROR_IRQ)) {
-			/* Cancel the planned resumption of scheduling */
-			kbdev->pm.backend.exit_gpu_sleep_mode = false;
-			kbdev->pm.backend.runtime_suspend_abort_reason = ABORT_REASON_NONE;
-			/* PM state machine can be invoked to put MCU back to the sleep
-			 * state right away and thereby re-trigger the runtime suspend.
-			 */
-			invoke_pm_state_machine = true;
-		}
-
 		/* The GPU idle worker relies on update_on_slot_queues_offsets() to have
 		 * finished. It's queued before to reduce the time it takes till execution
 		 * but it'll eventually be blocked by the scheduler->interrupt_lock.
@@ -839,8 +833,6 @@ bool kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev)
 	/* The extract offsets are unused in fast GPU idle handling */
 	if (!scheduler->fast_gpu_idle_handling)
 		update_on_slot_queues_offsets(kbdev);
-
-	return invoke_pm_state_machine;
 }
 
 u32 kbase_csf_scheduler_get_nr_active_csgs_locked(struct kbase_device *kbdev)
@@ -1027,6 +1019,8 @@ static void scheduler_force_protm_exit(struct kbase_device *kbdev)
  *
  * @kbdev: Pointer to the device
  * @suspend_handler: Handler code for how to handle a suspend that might occur.
+ * @active_after_sleep: Flag to indicate that Scheduler is being activated from
+ *                      the sleeping state.
  *
  * This function is usually called when Scheduler needs to be activated.
  * The PM reference count is acquired for the Scheduler and the power on
@@ -1035,7 +1029,8 @@ static void scheduler_force_protm_exit(struct kbase_device *kbdev)
  * Return: 0 if successful or a negative error code on failure.
  */
 static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev,
-					      enum kbase_pm_suspend_handler suspend_handler)
+					      enum kbase_pm_suspend_handler suspend_handler,
+					      bool active_after_sleep)
 {
 	unsigned long flags;
 	u32 prev_count;
@@ -1043,24 +1038,35 @@ static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev,
 
 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	prev_count = kbdev->csf.scheduler.pm_active_count;
 	if (!WARN_ON(prev_count == U32_MAX))
 		kbdev->csf.scheduler.pm_active_count++;
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	/* On 0 => 1, make a pm_ctx_active request */
 	if (!prev_count) {
-		ret = kbase_pm_context_active_handle_suspend(kbdev, suspend_handler);
-		/* Invoke the PM state machines again as the change in MCU
-		 * desired status, due to the update of scheduler.pm_active_count,
-		 * may be missed by the thread that called pm_wait_for_desired_state()
-		 */
-		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-		if (ret)
+		kbase_pm_lock(kbdev);
+		kbdev->pm.backend.mcu_poweron_required = true;
+		ret = kbase_pm_context_active_handle_suspend_locked(kbdev, suspend_handler);
+		if (ret) {
 			kbdev->csf.scheduler.pm_active_count--;
-		kbase_pm_update_state(kbdev);
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			kbdev->pm.backend.mcu_poweron_required = false;
+		} else {
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+			if (active_after_sleep) {
+				kbdev->pm.backend.runtime_suspend_abort_reason = ABORT_REASON_NONE;
+				kbdev->pm.backend.gpu_sleep_mode_active = false;
+			}
+			/* Check if the GPU is already active */
+			if (kbdev->pm.active_count > 1) {
+				/* GPU is already active, so need to invoke the PM state machines
+				 * explicitly to turn on the MCU.
+				 */
+				kbdev->pm.backend.mcu_desired = true;
+				kbase_pm_update_state(kbdev);
+			}
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		}
+		kbase_pm_unlock(kbdev);
 	}
 
 	return ret;
@@ -1072,8 +1078,6 @@ static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev,
  *                                     Scheduler
  *
  * @kbdev: Pointer to the device
- * @flags: Pointer to the flags variable containing the interrupt state
- *         when hwaccess lock was acquired.
  *
  * This function is called when Scheduler needs to be activated from the
  * sleeping state.
@@ -1081,42 +1085,15 @@ static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev,
  * MCU is initiated. It resets the flag that indicates to the MCU state
  * machine that MCU needs to be put in sleep state.
  *
- * Note: This function shall be called with hwaccess lock held and it may
- * release that lock and reacquire it.
- *
  * Return: zero when the PM reference was taken and non-zero when the
  * system is being suspending/suspended.
  */
-static int scheduler_pm_active_after_sleep(struct kbase_device *kbdev, unsigned long *flags)
+static int scheduler_pm_active_after_sleep(struct kbase_device *kbdev)
 {
-	u32 prev_count;
-	int ret = 0;
-
 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
-	lockdep_assert_held(&kbdev->hwaccess_lock);
 
-	prev_count = kbdev->csf.scheduler.pm_active_count;
-	if (!WARN_ON(prev_count == U32_MAX))
-		kbdev->csf.scheduler.pm_active_count++;
-
-	kbdev->pm.backend.runtime_suspend_abort_reason = ABORT_REASON_NONE;
-
-	/* On 0 => 1, make a pm_ctx_active request */
-	if (!prev_count) {
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, *flags);
-
-		ret = kbase_pm_context_active_handle_suspend(
-			kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
-
-		spin_lock_irqsave(&kbdev->hwaccess_lock, *flags);
-		if (ret)
-			kbdev->csf.scheduler.pm_active_count--;
-		else
-			kbdev->pm.backend.gpu_sleep_mode_active = false;
-		kbase_pm_update_state(kbdev);
-	}
-
-	return ret;
+	return scheduler_pm_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE,
+						  true);
 }
 #endif
 
@@ -1136,28 +1113,32 @@ static void scheduler_pm_idle(struct kbase_device *kbdev)
 
 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	prev_count = kbdev->csf.scheduler.pm_active_count;
 	if (!WARN_ON(prev_count == 0))
 		kbdev->csf.scheduler.pm_active_count--;
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	if (prev_count == 1) {
-		kbase_pm_context_idle(kbdev);
-		/* Invoke the PM state machines again as the change in MCU
-		 * desired status, due to the update of scheduler.pm_active_count,
-		 * may be missed by the thread that called pm_wait_for_desired_state()
-		 */
-		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-		kbase_pm_update_state(kbdev);
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		kbase_pm_lock(kbdev);
+		kbdev->pm.backend.mcu_poweron_required = false;
+		kbase_pm_context_idle_locked(kbdev);
+		/* Check if GPU is still active */
+		if (kbdev->pm.active_count) {
+			/* GPU is still active, so need to invoke the PM state machines
+			 * explicitly to turn off the MCU.
+			 */
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+			kbdev->pm.backend.mcu_desired = false;
+			kbase_pm_update_state(kbdev);
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		}
+		kbase_pm_unlock(kbdev);
 	}
 }
 
 #ifdef KBASE_PM_RUNTIME
 /**
  * scheduler_pm_idle_before_sleep() - Release the PM reference count and
- *                                    trigger the tranistion to sleep state.
+ *                                    trigger the transition to sleep state.
  *
  * @kbdev: Pointer to the device
  *
@@ -1168,28 +1149,15 @@ static void scheduler_pm_idle(struct kbase_device *kbdev)
 static void scheduler_pm_idle_before_sleep(struct kbase_device *kbdev)
 {
 	unsigned long flags;
-	u32 prev_count;
 
 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	prev_count = kbdev->csf.scheduler.pm_active_count;
-	if (!WARN_ON(prev_count == 0))
-		kbdev->csf.scheduler.pm_active_count--;
 	kbdev->pm.backend.gpu_sleep_mode_active = true;
 	kbdev->pm.backend.exit_gpu_sleep_mode = false;
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
-	if (prev_count == 1) {
-		kbase_pm_context_idle(kbdev);
-		/* Invoke the PM state machines again as the change in MCU
-		 * desired status, due to the update of scheduler.pm_active_count,
-		 * may be missed by the thread that called pm_wait_for_desired_state()
-		 */
-		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-		kbase_pm_update_state(kbdev);
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-	}
+	scheduler_pm_idle(kbdev);
 }
 #endif
 
@@ -1205,8 +1173,8 @@ static void scheduler_wakeup(struct kbase_device *kbdev, bool kick)
 
 	if (scheduler->state == SCHED_SUSPENDED) {
 		dev_dbg(kbdev->dev, "Re-activating the Scheduler after suspend");
-		ret = scheduler_pm_active_handle_suspend(kbdev,
-							 KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
+		ret = scheduler_pm_active_handle_suspend(
+			kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE, false);
 #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
 		hrtimer_start(&scheduler->gpu_metrics_timer,
 			      HR_TIMER_DELAY_NSEC(kbase_gpu_metrics_get_tp_emit_interval()),
@@ -1214,13 +1182,8 @@ static void scheduler_wakeup(struct kbase_device *kbdev, bool kick)
 #endif
 	} else {
 #ifdef KBASE_PM_RUNTIME
-		unsigned long flags;
-
 		dev_dbg(kbdev->dev, "Re-activating the Scheduler out of sleep");
-
-		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-		ret = scheduler_pm_active_after_sleep(kbdev, &flags);
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		ret = scheduler_pm_active_after_sleep(kbdev);
 #endif
 	}
 
@@ -1228,7 +1191,7 @@ static void scheduler_wakeup(struct kbase_device *kbdev, bool kick)
 		/* GPUCORE-29850 would add the handling for the case where
 		 * Scheduler could not be activated due to system suspend.
 		 */
-		dev_info(kbdev->dev, "Couldn't wakeup Scheduler due to system suspend");
+		dev_dbg(kbdev->dev, "Couldn't wakeup Scheduler due to system suspend");
 		return;
 	}
 
@@ -1239,13 +1202,20 @@ static void scheduler_wakeup(struct kbase_device *kbdev, bool kick)
 		scheduler_enable_tick_timer_nolock(kbdev);
 }
 
-static void scheduler_suspend(struct kbase_device *kbdev)
+static int scheduler_suspend(struct kbase_device *kbdev)
 {
 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
 
 	lockdep_assert_held(&scheduler->lock);
 
 	if (!WARN_ON(scheduler->state == SCHED_SUSPENDED)) {
+#if KBASE_PM_RUNTIME
+		int ret;
+
+		ret = kbase_csf_firmware_soi_disable_on_scheduler_suspend(kbdev);
+		if (ret)
+			return ret;
+#endif /* KBASE_PM_RUNTIME */
 		dev_dbg(kbdev->dev, "Suspending the Scheduler");
 		scheduler_pm_idle(kbdev);
 		scheduler->state = SCHED_SUSPENDED;
@@ -1254,6 +1224,8 @@ static void scheduler_suspend(struct kbase_device *kbdev)
 #endif
 		KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state);
 	}
+
+	return 0;
 }
 
 /**
@@ -1505,7 +1477,7 @@ static int sched_halt_stream(struct kbase_queue *queue)
 	long remaining;
 	int slot;
 	int err = 0;
-	const u32 group_schedule_timeout = kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT);
+	const u32 group_schedule_timeout = kbdev->csf.csg_suspend_timeout_ms;
 	const u32 fw_timeout_ms = kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT);
 
 	if (WARN_ON(!group))
@@ -1985,7 +1957,6 @@ static enum kbase_csf_csg_slot_state update_csg_slot_status(struct kbase_device
 		if ((state == CSG_ACK_STATE_START) || (state == CSG_ACK_STATE_RESUME)) {
 			slot_state = CSG_SLOT_RUNNING;
 			atomic_set(&csg_slot->state, slot_state);
-			csg_slot->trigger_jiffies = jiffies;
 			KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_RUNNING, csg_slot->resident_group,
 						 state);
 			dev_dbg(kbdev->dev, "Group %u running on slot %d\n",
@@ -1996,7 +1967,6 @@ static enum kbase_csf_csg_slot_state update_csg_slot_status(struct kbase_device
 		if ((state == CSG_ACK_STATE_SUSPEND) || (state == CSG_ACK_STATE_TERMINATE)) {
 			slot_state = CSG_SLOT_STOPPED;
 			atomic_set(&csg_slot->state, slot_state);
-			csg_slot->trigger_jiffies = jiffies;
 			KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOPPED, csg_slot->resident_group,
 						 state);
 			dev_dbg(kbdev->dev, "Group %u stopped on slot %d\n",
@@ -2095,7 +2065,6 @@ static void halt_csg_slot(struct kbase_queue_group *group, bool suspend)
 		kbase_csf_ring_csg_doorbell(kbdev, slot);
 		spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
 		atomic_set(&csg_slot[slot].state, CSG_SLOT_DOWN2STOP);
-		csg_slot[slot].trigger_jiffies = jiffies;
 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP_REQ, group, halt_cmd);
 
 		KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG(kbdev, kbdev->id, (u32)slot, suspend);
@@ -2814,7 +2783,6 @@ static bool cleanup_csg_slot(struct kbase_queue_group *group)
 	spin_unlock_bh(&kbdev->csf.scheduler.gpu_metrics_lock);
 #endif
 
-	csg_slot->trigger_jiffies = jiffies;
 	atomic_set(&csg_slot->state, CSG_SLOT_READY);
 
 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_CLEANED, group, (u64)slot);
@@ -3028,7 +2996,6 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, u8 prio)
 
 	/* Update status before rings the door-bell, marking ready => run */
 	atomic_set(&csg_slot->state, CSG_SLOT_READY2RUN);
-	csg_slot->trigger_jiffies = jiffies;
 	csg_slot->priority = prio;
 
 	/* Trace the programming of the CSG on the slot */
@@ -3131,8 +3098,9 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault,
 static int term_group_sync(struct kbase_queue_group *group)
 {
 	struct kbase_device *kbdev = group->kctx->kbdev;
-	const unsigned int fw_timeout_ms = kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT);
-	long remaining = kbase_csf_timeout_in_jiffies(fw_timeout_ms);
+	const unsigned int group_term_timeout_ms =
+		kbase_get_timeout_ms(kbdev, CSF_CSG_TERM_TIMEOUT);
+	long remaining = kbase_csf_timeout_in_jiffies(group_term_timeout_ms);
 	int err = 0;
 
 	term_csg_slot(group);
@@ -3148,11 +3116,11 @@ static int term_group_sync(struct kbase_queue_group *group)
 		dev_warn(
 			kbdev->dev,
 			"[%llu] term request timeout (%d ms) for group %d of context %d_%d on slot %d",
-			kbase_backend_get_cycle_cnt(kbdev), fw_timeout_ms, group->handle,
+			kbase_backend_get_cycle_cnt(kbdev), group_term_timeout_ms, group->handle,
 			group->kctx->tgid, group->kctx->id, group->csg_nr);
 		if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS))
 			error_type = DF_PING_REQUEST_TIMEOUT;
-		kbase_debug_csf_fault_notify(kbdev, group->kctx, error_type);
+		schedule_actions_trigger_df(kbdev, group->kctx, error_type);
 		if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
 			kbase_reset_gpu(kbdev);
 
@@ -3593,8 +3561,7 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev)
 
 	while (!bitmap_empty(slot_mask, MAX_SUPPORTED_CSGS)) {
 		DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS);
-		long remaining = kbase_csf_timeout_in_jiffies(
-			kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT));
+		long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.csg_suspend_timeout_ms);
 
 		bitmap_copy(changed, slot_mask, MAX_SUPPORTED_CSGS);
 
@@ -4138,7 +4105,7 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
 	 * entry to protected mode happens with a memory region being locked and
 	 * the same region is then accessed by the GPU in protected mode.
 	 */
-	mutex_lock(&kbdev->mmu_hw_mutex);
+	down_write(&kbdev->csf.mmu_sync_sem);
 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
 
 	/* Check if the previous transition to enter & exit the protected
@@ -4204,7 +4171,7 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
 				spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
 
 				err = kbase_csf_wait_protected_mode_enter(kbdev);
-				mutex_unlock(&kbdev->mmu_hw_mutex);
+				up_write(&kbdev->csf.mmu_sync_sem);
 
 				if (err)
 					schedule_actions_trigger_df(
@@ -4219,7 +4186,7 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
 	}
 
 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
-	mutex_unlock(&kbdev->mmu_hw_mutex);
+	up_write(&kbdev->csf.mmu_sync_sem);
 }
 
 /**
@@ -4798,25 +4765,27 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, bool s
 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
 	DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = { 0 };
 
-	int ret = suspend_active_queue_groups(kbdev, slot_mask);
+	if (unlikely(suspend_active_queue_groups(kbdev, slot_mask, false))) {
+		if (!is_gpu_level_suspend_supported(kbdev)) {
+			const int csg_nr = ffs(slot_mask[0]) - 1;
+			struct kbase_queue_group *group;
+			enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT;
 
-	if (unlikely(ret)) {
-		const int csg_nr = ffs(slot_mask[0]) - 1;
-		struct kbase_queue_group *group = scheduler->csg_slots[csg_nr].resident_group;
-		enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT;
+			group = scheduler->csg_slots[csg_nr].resident_group;
 
-		/* The suspend of CSGs failed,
-		 * trigger the GPU reset to be in a deterministic state.
-		 */
-		dev_warn(
-			kbdev->dev,
-			"[%llu] Timeout (%d ms) waiting for CSG slots to suspend on power down, slot_mask: 0x%*pb\n",
-			kbase_backend_get_cycle_cnt(kbdev),
-			kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT),
-			kbdev->csf.global_iface.group_num, slot_mask);
-		if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS))
-			error_type = DF_PING_REQUEST_TIMEOUT;
-		schedule_actions_trigger_df(kbdev, group->kctx, error_type);
+			/* The suspend of CSGs failed,
+			 * trigger the GPU reset to be in a deterministic state.
+			 */
+			dev_warn(
+				kbdev->dev,
+				"[%llu] Timeout (%d ms) waiting for CSG slots to suspend on power down, slot_mask: 0x%*pb\n",
+				kbase_backend_get_cycle_cnt(kbdev),
+				kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT),
+				kbdev->csf.global_iface.group_num, slot_mask);
+			if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS))
+				error_type = DF_PING_REQUEST_TIMEOUT;
+			schedule_actions_trigger_df(kbdev, group->kctx, error_type);
+		}
 
 		if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
 			kbase_reset_gpu(kbdev);
@@ -4824,6 +4793,8 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, bool s
 		return -1;
 	}
 
+	kbdev->csf.mcu_halted = false;
+
 	/* Check if the groups became active whilst the suspend was ongoing,
 	 * but only for the case where the system suspend is not in progress
 	 */
@@ -4841,7 +4812,7 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, bool s
  * Returns false if any of the queues inside any of the groups that have been
  * assigned a physical CSG slot have work to execute, or have executed work
  * since having received a GPU idle notification. This function is used to
- * handle a rance condition between firmware reporting GPU idle and userspace
+ * handle a race condition between firmware reporting GPU idle and userspace
  * submitting more work by directly ringing a doorbell.
  *
  * Return: false if any queue inside any resident group has work to be processed
@@ -4983,19 +4954,23 @@ static bool scheduler_suspend_on_idle(struct kbase_device *kbdev)
 	}
 
 	dev_dbg(kbdev->dev, "Scheduler to be suspended on GPU becoming idle");
-	scheduler_suspend(kbdev);
-	cancel_tick_work(scheduler);
-	return true;
+	ret = scheduler_suspend(kbdev);
+	if (!ret) {
+		cancel_tick_work(scheduler);
+		return true;
+	}
+
+	return false;
 }
 
-static void gpu_idle_worker(struct work_struct *work)
+static void gpu_idle_worker(struct kbase_device *kbdev)
 {
-	struct kbase_device *kbdev =
-		container_of(work, struct kbase_device, csf.scheduler.gpu_idle_work);
 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
 	bool scheduler_is_idle_suspendable = false;
 	bool all_groups_suspended = false;
 
+	WARN_ON_ONCE(atomic_read(&scheduler->pending_gpu_idle_work) == 0);
+
 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_START, NULL, 0u);
 
 #define __ENCODE_KTRACE_INFO(reset, idle, all_suspend) \
@@ -5005,7 +4980,7 @@ static void gpu_idle_worker(struct work_struct *work)
 		dev_warn(kbdev->dev, "Quit idle for failing to prevent gpu reset.\n");
 		KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_END, NULL,
 				 __ENCODE_KTRACE_INFO(true, false, false));
-		return;
+		goto exit;
 	}
 	kbase_debug_csf_fault_wait_completion(kbdev);
 	mutex_lock(&scheduler->lock);
@@ -5014,7 +4989,7 @@ static void gpu_idle_worker(struct work_struct *work)
 	if (unlikely(scheduler->state == SCHED_BUSY)) {
 		mutex_unlock(&scheduler->lock);
 		kbase_reset_gpu_allow(kbdev);
-		return;
+		goto exit;
 	}
 #endif
 
@@ -5039,6 +5014,9 @@ static void gpu_idle_worker(struct work_struct *work)
 			 __ENCODE_KTRACE_INFO(false, scheduler_is_idle_suspendable,
 					      all_groups_suspended));
 #undef __ENCODE_KTRACE_INFO
+
+exit:
+	atomic_dec(&scheduler->pending_gpu_idle_work);
 }
 
 static int scheduler_prepare(struct kbase_device *kbdev)
@@ -5226,8 +5204,7 @@ static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slo
 	bitmap_copy(slot_mask_local, slot_mask, MAX_SUPPORTED_CSGS);
 
 	while (!bitmap_empty(slot_mask_local, MAX_SUPPORTED_CSGS)) {
-		long remaining = kbase_csf_timeout_in_jiffies(
-			kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT));
+		long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.csg_suspend_timeout_ms);
 		DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS);
 
 		bitmap_copy(changed, slot_mask_local, MAX_SUPPORTED_CSGS);
@@ -5417,8 +5394,9 @@ static void schedule_actions(struct kbase_device *kbdev, bool is_tick)
 		 * steps and thus extending the previous tick's arrangement,
 		 * in particular, no alterations to on-slot CSGs.
 		 */
-		if (keep_lru_on_slots(kbdev))
+		if (keep_lru_on_slots(kbdev)) {
 			return;
+		}
 	}
 
 	if (is_tick)
@@ -5511,6 +5489,15 @@ redo_local_tock:
 	}
 
 	evict_lru_or_blocked_csg(kbdev);
+
+#ifdef KBASE_PM_RUNTIME
+	if (atomic_read(&scheduler->non_idle_offslot_grps))
+		set_bit(KBASE_GPU_NON_IDLE_OFF_SLOT_GROUPS_AVAILABLE,
+			&kbdev->pm.backend.gpu_sleep_allowed);
+	else
+		clear_bit(KBASE_GPU_NON_IDLE_OFF_SLOT_GROUPS_AVAILABLE,
+			  &kbdev->pm.backend.gpu_sleep_allowed);
+#endif /* KBASE_PM_RUNTIME */
 }
 
 /**
@@ -5544,9 +5531,10 @@ static bool can_skip_scheduling(struct kbase_device *kbdev)
 
 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 		if (kbdev->pm.backend.exit_gpu_sleep_mode) {
-			int ret = scheduler_pm_active_after_sleep(kbdev, &flags);
+			int ret;
 
 			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			ret = scheduler_pm_active_after_sleep(kbdev);
 			if (!ret) {
 				scheduler->state = SCHED_INACTIVE;
 				KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state);
@@ -5662,25 +5650,28 @@ exit_no_schedule_unlock:
 	kbase_reset_gpu_allow(kbdev);
 }
 
-static int suspend_active_queue_groups(struct kbase_device *kbdev, unsigned long *slot_mask)
+
+static int suspend_active_queue_groups(struct kbase_device *kbdev, unsigned long *slot_mask,
+				       bool reset)
 {
 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
 	u32 num_groups = kbdev->csf.global_iface.group_num;
+	struct kbase_queue_group *group;
 	u32 slot_num;
 	int ret;
 
 	lockdep_assert_held(&scheduler->lock);
 
-	for (slot_num = 0; slot_num < num_groups; slot_num++) {
-		struct kbase_queue_group *group = scheduler->csg_slots[slot_num].resident_group;
+		for (slot_num = 0; slot_num < num_groups; slot_num++) {
+			group = scheduler->csg_slots[slot_num].resident_group;
 
-		if (group) {
-			suspend_queue_group(group);
-			set_bit(slot_num, slot_mask);
+			if (group) {
+				suspend_queue_group(group);
+				set_bit(slot_num, slot_mask);
+			}
 		}
-	}
 
-	ret = wait_csg_slots_suspend(kbdev, slot_mask);
+		ret = wait_csg_slots_suspend(kbdev, slot_mask);
 	return ret;
 }
 
@@ -5693,7 +5684,7 @@ static int suspend_active_queue_groups_on_reset(struct kbase_device *kbdev)
 
 	mutex_lock(&scheduler->lock);
 
-	ret = suspend_active_queue_groups(kbdev, slot_mask);
+	ret = suspend_active_queue_groups(kbdev, slot_mask, true);
 
 	if (ret) {
 		dev_warn(
@@ -5830,9 +5821,12 @@ static void scheduler_inner_reset(struct kbase_device *kbdev)
 	WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev));
 
 	/* Cancel any potential queued delayed work(s) */
-	cancel_work_sync(&kbdev->csf.scheduler.gpu_idle_work);
 	cancel_tick_work(scheduler);
 	cancel_tock_work(scheduler);
+	/* gpu_idle_worker() might already be running at this point, which
+	 * could decrement the pending_gpu_idle_worker counter to below 0.
+	 * It'd be safer to let it run if one has already been scheduled.
+	 */
 	cancel_delayed_work_sync(&scheduler->ping_work);
 
 	mutex_lock(&scheduler->lock);
@@ -5850,22 +5844,35 @@ static void scheduler_inner_reset(struct kbase_device *kbdev)
 	scheduler->top_kctx = NULL;
 	scheduler->top_grp = NULL;
 
+	atomic_set(&scheduler->gpu_idle_timer_enabled, false);
+	atomic_set(&scheduler->fw_soi_enabled, false);
+
 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp,
 				 scheduler->num_active_address_spaces |
 					 (((u64)scheduler->total_runnable_grps) << 32));
 
+#ifdef KBASE_PM_RUNTIME
+	if (scheduler->state == SCHED_SLEEPING) {
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+		hrtimer_cancel(&scheduler->gpu_metrics_timer);
+#endif
+		scheduler->state = SCHED_SUSPENDED;
+		KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state);
+	}
+#endif
 	mutex_unlock(&scheduler->lock);
 }
 
 void kbase_csf_scheduler_reset(struct kbase_device *kbdev)
 {
 	struct kbase_context *kctx;
-
 	WARN_ON(!kbase_reset_gpu_is_active(kbdev));
 
 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET_START, NULL, 0u);
 
-	kbase_debug_csf_fault_wait_completion(kbdev);
+	if (kbase_reset_gpu_is_active(kbdev))
+		kbase_debug_csf_fault_wait_completion(kbdev);
+
 
 	if (scheduler_handle_reset_in_protected_mode(kbdev) &&
 	    !suspend_active_queue_groups_on_reset(kbdev)) {
@@ -6453,8 +6460,8 @@ static void check_sync_update_in_sleep_mode(struct kbase_device *kbdev)
  * check_group_sync_update_worker() - Check the sync wait condition for all the
  *                                    blocked queue groups
  *
- * @work:    Pointer to the context-specific work item for evaluating the wait
- *           condition for all the queue groups in idle_wait_groups list.
+ * @kctx: The context to evaluate the wait condition for all the queue groups
+ *        in idle_wait_groups list.
  *
  * This function checks the gpu queues of all the groups present in both
  * idle_wait_groups list of a context and all on slot idle groups (if GPU
@@ -6464,27 +6471,14 @@ static void check_sync_update_in_sleep_mode(struct kbase_device *kbdev)
  * runnable groups so that Scheduler can consider scheduling the group
  * in next tick or exit protected mode.
  */
-static void check_group_sync_update_worker(struct work_struct *work)
+static void check_group_sync_update_worker(struct kbase_context *kctx)
 {
-	struct kbase_context *const kctx =
-		container_of(work, struct kbase_context, csf.sched.sync_update_work);
 	struct kbase_device *const kbdev = kctx->kbdev;
 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
 	bool sync_updated = false;
 
 	mutex_lock(&scheduler->lock);
 
-#if IS_ENABLED(CONFIG_DEBUG_FS)
-	if (unlikely(scheduler->state == SCHED_BUSY)) {
-		queue_work(atomic_read(&kctx->prioritized) ?
-					 kctx->csf.sched.sync_update_wq_high_prio :
-					 kctx->csf.sched.sync_update_wq_normal_prio,
-			   &kctx->csf.sched.sync_update_work);
-		mutex_unlock(&scheduler->lock);
-		return;
-	}
-#endif
-
 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_GROUP_SYNC_UPDATE_WORKER_START, kctx, 0u);
 	if (kctx->csf.sched.num_idle_wait_grps != 0) {
 		struct kbase_queue_group *group, *temp;
@@ -6522,13 +6516,10 @@ static void check_group_sync_update_worker(struct work_struct *work)
 static enum kbase_csf_event_callback_action check_group_sync_update_cb(void *param)
 {
 	struct kbase_context *const kctx = param;
-	struct workqueue_struct *wq = atomic_read(&kctx->prioritized) ?
-						    kctx->csf.sched.sync_update_wq_high_prio :
-						    kctx->csf.sched.sync_update_wq_normal_prio;
 
 	KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_GROUP_SYNC_UPDATE_EVENT, kctx, 0u);
 
-	queue_work(wq, &kctx->csf.sched.sync_update_work);
+	kbase_csf_scheduler_enqueue_sync_update_work(kctx);
 
 	return KBASE_CSF_EVENT_CALLBACK_KEEP;
 }
@@ -6539,6 +6530,8 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx)
 	int err;
 	struct kbase_device *kbdev = kctx->kbdev;
 
+	WARN_ON_ONCE(!kbdev->csf.scheduler.kthread_running);
+
 #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
 	err = gpu_metrics_ctx_init(kctx);
 	if (err)
@@ -6551,25 +6544,7 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx)
 
 	INIT_LIST_HEAD(&kctx->csf.sched.idle_wait_groups);
 
-	kctx->csf.sched.sync_update_wq_high_prio = alloc_ordered_workqueue(
-		"mali_sync_wq_%i_high_prio", WQ_UNBOUND | WQ_HIGHPRI, kctx->tgid);
-	if (kctx->csf.sched.sync_update_wq_high_prio == NULL) {
-		dev_err(kbdev->dev,
-			"Failed to initialize scheduler context high-priority workqueue");
-		err = -ENOMEM;
-		goto alloc_high_prio_wq_failed;
-	}
-
-	kctx->csf.sched.sync_update_wq_normal_prio =
-		alloc_ordered_workqueue("mali_sync_wq_%i_normal_prio", 0, kctx->tgid);
-	if (kctx->csf.sched.sync_update_wq_normal_prio == NULL) {
-		dev_err(kbdev->dev,
-			"Failed to initialize scheduler context normal-priority workqueue");
-		err = -ENOMEM;
-		goto alloc_normal_prio_wq_failed;
-	}
-
-	INIT_WORK(&kctx->csf.sched.sync_update_work, check_group_sync_update_worker);
+	INIT_LIST_HEAD(&kctx->csf.sched.sync_update_work);
 
 	kbase_csf_tiler_heap_reclaim_ctx_init(kctx);
 
@@ -6583,10 +6558,6 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx)
 	return err;
 
 event_wait_add_failed:
-	destroy_workqueue(kctx->csf.sched.sync_update_wq_normal_prio);
-alloc_normal_prio_wq_failed:
-	destroy_workqueue(kctx->csf.sched.sync_update_wq_high_prio);
-alloc_high_prio_wq_failed:
 	kbase_ctx_sched_remove_ctx(kctx);
 #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
 	gpu_metrics_ctx_term(kctx);
@@ -6597,9 +6568,10 @@ alloc_high_prio_wq_failed:
 void kbase_csf_scheduler_context_term(struct kbase_context *kctx)
 {
 	kbase_csf_event_wait_remove(kctx, check_group_sync_update_cb, kctx);
-	cancel_work_sync(&kctx->csf.sched.sync_update_work);
-	destroy_workqueue(kctx->csf.sched.sync_update_wq_normal_prio);
-	destroy_workqueue(kctx->csf.sched.sync_update_wq_high_prio);
+
+	/* Drain a pending SYNC_UPDATE work if any */
+	kbase_csf_scheduler_wait_for_kthread_pending_work(kctx->kbdev,
+							  &kctx->csf.pending_sync_update);
 
 	kbase_ctx_sched_remove_ctx(kctx);
 #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
@@ -6607,53 +6579,157 @@ void kbase_csf_scheduler_context_term(struct kbase_context *kctx)
 #endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
 }
 
+static void handle_pending_sync_update_works(struct kbase_csf_scheduler *scheduler)
+{
+	struct kbase_context *sync_update_ctx;
+
+	if (atomic_cmpxchg(&scheduler->pending_sync_update_works, true, false) == false)
+		return;
+
+	do {
+		unsigned long flags;
+
+		spin_lock_irqsave(&scheduler->sync_update_work_ctxs_lock, flags);
+		sync_update_ctx = NULL;
+		if (!list_empty(&scheduler->sync_update_work_ctxs)) {
+			sync_update_ctx = list_first_entry(&scheduler->sync_update_work_ctxs,
+							   struct kbase_context,
+							   csf.sched.sync_update_work);
+			list_del_init(&sync_update_ctx->csf.sched.sync_update_work);
+		}
+		spin_unlock_irqrestore(&scheduler->sync_update_work_ctxs_lock, flags);
+
+		if (sync_update_ctx != NULL) {
+			WARN_ON_ONCE(atomic_read(&sync_update_ctx->csf.pending_sync_update) == 0);
+			check_group_sync_update_worker(sync_update_ctx);
+			atomic_dec(&sync_update_ctx->csf.pending_sync_update);
+		}
+	} while (sync_update_ctx != NULL);
+}
+
+static void handle_pending_protm_requests(struct kbase_csf_scheduler *scheduler)
+{
+	struct kbase_queue_group *protm_grp;
+
+	if (atomic_cmpxchg(&scheduler->pending_protm_event_works, true, false) == false)
+		return;
+
+	do {
+		unsigned long flags;
+
+		spin_lock_irqsave(&scheduler->protm_event_work_grps_lock, flags);
+		protm_grp = NULL;
+		if (!list_empty(&scheduler->protm_event_work_grps)) {
+			protm_grp = list_first_entry(&scheduler->protm_event_work_grps,
+						     struct kbase_queue_group, protm_event_work);
+			list_del_init(&protm_grp->protm_event_work);
+		}
+		spin_unlock_irqrestore(&scheduler->protm_event_work_grps_lock, flags);
+
+		if (protm_grp != NULL) {
+			WARN_ON_ONCE(atomic_read(&protm_grp->pending_protm_event_work) == 0);
+			kbase_csf_process_protm_event_request(protm_grp);
+			atomic_dec(&protm_grp->pending_protm_event_work);
+		}
+	} while (protm_grp != NULL);
+}
+
+static void handle_pending_kcpuq_commands(struct kbase_csf_scheduler *scheduler)
+{
+	struct kbase_kcpu_command_queue *kcpuq;
+
+	if (atomic_cmpxchg(&scheduler->pending_kcpuq_works, true, false) == false)
+		return;
+
+	do {
+		unsigned long flags;
+
+		spin_lock_irqsave(&scheduler->kcpuq_work_queues_lock, flags);
+		kcpuq = NULL;
+		if (!list_empty(&scheduler->kcpuq_work_queues)) {
+			kcpuq = list_first_entry(&scheduler->kcpuq_work_queues,
+						 struct kbase_kcpu_command_queue, high_prio_work);
+			list_del_init(&kcpuq->high_prio_work);
+		}
+		spin_unlock_irqrestore(&scheduler->kcpuq_work_queues_lock, flags);
+
+		if (kcpuq != NULL) {
+			WARN_ON_ONCE(atomic_read(&kcpuq->pending_kick) == 0);
+
+			mutex_lock(&kcpuq->lock);
+			kbase_csf_kcpu_queue_process(kcpuq, false);
+			mutex_unlock(&kcpuq->lock);
+
+			atomic_dec(&kcpuq->pending_kick);
+		}
+	} while (kcpuq != NULL);
+}
+
+static void handle_pending_queue_kicks(struct kbase_device *kbdev)
+{
+	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+	struct kbase_queue *queue;
+
+	if (atomic_cmpxchg(&kbdev->csf.pending_gpuq_kicks, true, false) == false)
+		return;
+
+	do {
+		u8 prio;
+
+		spin_lock(&kbdev->csf.pending_gpuq_kick_queues_lock);
+		queue = NULL;
+		for (prio = 0; prio != KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++prio) {
+			if (!list_empty(&kbdev->csf.pending_gpuq_kick_queues[prio])) {
+				queue = list_first_entry(&kbdev->csf.pending_gpuq_kick_queues[prio],
+							 struct kbase_queue, pending_kick_link);
+				list_del_init(&queue->pending_kick_link);
+				break;
+			}
+		}
+		spin_unlock(&kbdev->csf.pending_gpuq_kick_queues_lock);
+
+		if (queue != NULL) {
+			WARN_ONCE(
+				prio != queue->group_priority,
+				"Queue %pK has priority %u but instead its kick was handled at priority %u",
+				(void *)queue, queue->group_priority, prio);
+			WARN_ON_ONCE(atomic_read(&queue->pending_kick) == 0);
+
+			kbase_csf_process_queue_kick(queue);
+
+			/* Perform a scheduling tock for high-priority queue groups if
+			 * required.
+			 */
+			BUILD_BUG_ON(KBASE_QUEUE_GROUP_PRIORITY_REALTIME != 0);
+			BUILD_BUG_ON(KBASE_QUEUE_GROUP_PRIORITY_HIGH != 1);
+			if ((prio <= KBASE_QUEUE_GROUP_PRIORITY_HIGH) &&
+			    atomic_read(&scheduler->pending_tock_work))
+				schedule_on_tock(kbdev);
+		}
+	} while (queue != NULL);
+}
+
 static int kbase_csf_scheduler_kthread(void *data)
 {
 	struct kbase_device *const kbdev = data;
 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
 
 	while (scheduler->kthread_running) {
-		struct kbase_queue *queue;
-
 		if (wait_for_completion_interruptible(&scheduler->kthread_signal) != 0)
 			continue;
 		reinit_completion(&scheduler->kthread_signal);
 
-		/* Iterate through queues with pending kicks */
-		do {
-			u8 prio;
+		/*
+		 * The order in which these requests are handled is based on
+		 * how they would influence each other's decisions. As a
+		 * result, the tick & tock requests must be handled after all
+		 * other requests, but before the GPU IDLE work.
+		 */
 
-			spin_lock(&kbdev->csf.pending_gpuq_kicks_lock);
-			queue = NULL;
-			for (prio = 0; prio != KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++prio) {
-				if (!list_empty(&kbdev->csf.pending_gpuq_kicks[prio])) {
-					queue = list_first_entry(
-						&kbdev->csf.pending_gpuq_kicks[prio],
-						struct kbase_queue, pending_kick_link);
-					list_del_init(&queue->pending_kick_link);
-					break;
-				}
-			}
-			spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock);
-
-			if (queue != NULL) {
-				WARN_ONCE(
-					prio != queue->group_priority,
-					"Queue %pK has priority %hhu but instead its kick was handled at priority %hhu",
-					(void *)queue, queue->group_priority, prio);
-
-				kbase_csf_process_queue_kick(queue);
-
-				/* Perform a scheduling tock for high-priority queue groups if
-				 * required.
-				 */
-				BUILD_BUG_ON(KBASE_QUEUE_GROUP_PRIORITY_REALTIME != 0);
-				BUILD_BUG_ON(KBASE_QUEUE_GROUP_PRIORITY_HIGH != 1);
-				if ((prio <= KBASE_QUEUE_GROUP_PRIORITY_HIGH) &&
-				    atomic_read(&scheduler->pending_tock_work))
-					schedule_on_tock(kbdev);
-			}
-		} while (queue != NULL);
+		handle_pending_sync_update_works(scheduler);
+		handle_pending_protm_requests(scheduler);
+		handle_pending_kcpuq_commands(scheduler);
+		handle_pending_queue_kicks(kbdev);
 
 		/* Check if we need to perform a scheduling tick/tock. A tick
 		 * event shall override a tock event but not vice-versa.
@@ -6665,6 +6741,15 @@ static int kbase_csf_scheduler_kthread(void *data)
 			schedule_on_tock(kbdev);
 		}
 
+		/* Drain pending GPU idle works */
+		while (atomic_read(&scheduler->pending_gpu_idle_work) > 0)
+			gpu_idle_worker(kbdev);
+
+		/* Update GLB_IDLE timer/FW Sleep-on-Idle config (which might
+		 * have been disabled during FW boot et. al.).
+		 */
+		kbase_csf_firmware_soi_update(kbdev);
+
 		dev_dbg(kbdev->dev, "Waking up for event after a scheduling iteration.");
 		wake_up_all(&kbdev->csf.event_wait);
 	}
@@ -6694,7 +6779,7 @@ int kbase_csf_scheduler_init(struct kbase_device *kbdev)
 	scheduler->kthread_running = true;
 	scheduler->gpuq_kthread =
 		kthread_run(&kbase_csf_scheduler_kthread, kbdev, "mali-gpuq-kthread");
-	if (!scheduler->gpuq_kthread) {
+	if (IS_ERR_OR_NULL(scheduler->gpuq_kthread)) {
 		kfree(scheduler->csg_slots);
 		scheduler->csg_slots = NULL;
 
@@ -6725,6 +6810,9 @@ int kbase_csf_scheduler_init(struct kbase_device *kbdev)
 	scheduler->gpu_metrics_timer.function = gpu_metrics_timer_callback;
 #endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
 
+	atomic_set(&scheduler->gpu_idle_timer_enabled, false);
+	atomic_set(&scheduler->fw_soi_enabled, false);
+
 	return kbase_csf_mcu_shared_regs_data_init(kbdev);
 }
 
@@ -6734,12 +6822,6 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
 
 	atomic_set(&scheduler->timer_enabled, true);
 
-	scheduler->idle_wq = alloc_ordered_workqueue("csf_scheduler_gpu_idle_wq", WQ_HIGHPRI);
-	if (!scheduler->idle_wq) {
-		dev_err(kbdev->dev, "Failed to allocate GPU idle scheduler workqueue\n");
-		return -ENOMEM;
-	}
-
 	INIT_DEFERRABLE_WORK(&scheduler->ping_work, firmware_aliveness_monitor);
 
 	mutex_init(&scheduler->lock);
@@ -6757,20 +6839,30 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
 	KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state);
 	scheduler->csg_scheduling_period_ms = CSF_SCHEDULER_TIME_TICK_MS;
 	scheduler_doorbell_init(kbdev);
-	INIT_WORK(&scheduler->gpu_idle_work, gpu_idle_worker);
 	hrtimer_init(&scheduler->tick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	scheduler->tick_timer.function = tick_timer_callback;
 
-	kbase_csf_tiler_heap_reclaim_mgr_init(kbdev);
+	atomic_set(&scheduler->pending_sync_update_works, false);
+	spin_lock_init(&scheduler->sync_update_work_ctxs_lock);
+	INIT_LIST_HEAD(&scheduler->sync_update_work_ctxs);
+	atomic_set(&scheduler->pending_protm_event_works, false);
+	spin_lock_init(&scheduler->protm_event_work_grps_lock);
+	INIT_LIST_HEAD(&scheduler->protm_event_work_grps);
+	atomic_set(&scheduler->pending_kcpuq_works, false);
+	spin_lock_init(&scheduler->kcpuq_work_queues_lock);
+	INIT_LIST_HEAD(&scheduler->kcpuq_work_queues);
+	atomic_set(&scheduler->pending_tick_work, false);
+	atomic_set(&scheduler->pending_tock_work, false);
+	atomic_set(&scheduler->pending_gpu_idle_work, 0);
 
-	return 0;
+	return kbase_csf_tiler_heap_reclaim_mgr_init(kbdev);
 }
 
 void kbase_csf_scheduler_term(struct kbase_device *kbdev)
 {
 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
 
-	if (scheduler->gpuq_kthread) {
+	if (!IS_ERR_OR_NULL(scheduler->gpuq_kthread)) {
 		scheduler->kthread_running = false;
 		complete(&scheduler->kthread_signal);
 		kthread_stop(scheduler->gpuq_kthread);
@@ -6784,7 +6876,6 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev)
 		 * to be active at the time of Driver unload.
 		 */
 		WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev));
-		flush_work(&kbdev->csf.scheduler.gpu_idle_work);
 		mutex_lock(&kbdev->csf.scheduler.lock);
 
 		if (kbdev->csf.scheduler.state != SCHED_SUSPENDED) {
@@ -6811,9 +6902,6 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev)
 
 void kbase_csf_scheduler_early_term(struct kbase_device *kbdev)
 {
-	if (kbdev->csf.scheduler.idle_wq)
-		destroy_workqueue(kbdev->csf.scheduler.idle_wq);
-
 	kbase_csf_tiler_heap_reclaim_mgr_term(kbdev);
 	mutex_destroy(&kbdev->csf.scheduler.lock);
 }
@@ -6926,9 +7014,10 @@ int kbase_csf_scheduler_pm_suspend_no_lock(struct kbase_device *kbdev)
 			dev_warn(kbdev->dev, "failed to suspend active groups");
 			goto exit;
 		} else {
-			dev_info(kbdev->dev, "Scheduler PM suspend");
-			scheduler_suspend(kbdev);
-			cancel_tick_work(scheduler);
+			dev_dbg(kbdev->dev, "Scheduler PM suspend");
+			result = scheduler_suspend(kbdev);
+			if (!result)
+				cancel_tick_work(scheduler);
 		}
 	}
 
@@ -6968,7 +7057,7 @@ void kbase_csf_scheduler_pm_resume_no_lock(struct kbase_device *kbdev)
 
 	lockdep_assert_held(&scheduler->lock);
 	if ((scheduler->total_runnable_grps > 0) && (scheduler->state == SCHED_SUSPENDED)) {
-		dev_info(kbdev->dev, "Scheduler PM resume");
+		dev_dbg(kbdev->dev, "Scheduler PM resume");
 		scheduler_wakeup(kbdev, true);
 	}
 }
@@ -6989,7 +7078,7 @@ void kbase_csf_scheduler_pm_active(struct kbase_device *kbdev)
 	 * the CSGs before powering down the GPU.
 	 */
 	mutex_lock(&kbdev->csf.scheduler.lock);
-	scheduler_pm_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE);
+	scheduler_pm_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE, false);
 	mutex_unlock(&kbdev->csf.scheduler.lock);
 }
 KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_active);
@@ -7013,9 +7102,7 @@ static int scheduler_wait_mcu_active(struct kbase_device *kbdev, bool killable_w
 
 	kbase_pm_lock(kbdev);
 	WARN_ON(!kbdev->pm.active_count);
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	WARN_ON(!scheduler->pm_active_count);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 	kbase_pm_unlock(kbdev);
 
 	if (killable_wait)
@@ -7096,6 +7183,65 @@ int kbase_csf_scheduler_handle_runtime_suspend(struct kbase_device *kbdev)
 	return 0;
 }
 
+void kbase_csf_scheduler_enqueue_sync_update_work(struct kbase_context *kctx)
+{
+	struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler;
+	unsigned long flags;
+
+	spin_lock_irqsave(&scheduler->sync_update_work_ctxs_lock, flags);
+	if (list_empty(&kctx->csf.sched.sync_update_work)) {
+		list_add_tail(&kctx->csf.sched.sync_update_work, &scheduler->sync_update_work_ctxs);
+		atomic_inc(&kctx->csf.pending_sync_update);
+		if (atomic_cmpxchg(&scheduler->pending_sync_update_works, false, true) == false)
+			complete(&scheduler->kthread_signal);
+	}
+	spin_unlock_irqrestore(&scheduler->sync_update_work_ctxs_lock, flags);
+}
+
+void kbase_csf_scheduler_enqueue_protm_event_work(struct kbase_queue_group *group)
+{
+	struct kbase_context *const kctx = group->kctx;
+	struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler;
+	unsigned long flags;
+
+	spin_lock_irqsave(&scheduler->protm_event_work_grps_lock, flags);
+	if (list_empty(&group->protm_event_work)) {
+		list_add_tail(&group->protm_event_work, &scheduler->protm_event_work_grps);
+		atomic_inc(&group->pending_protm_event_work);
+		if (atomic_cmpxchg(&scheduler->pending_protm_event_works, false, true) == false)
+			complete(&scheduler->kthread_signal);
+	}
+	spin_unlock_irqrestore(&scheduler->protm_event_work_grps_lock, flags);
+}
+
+void kbase_csf_scheduler_enqueue_kcpuq_work(struct kbase_kcpu_command_queue *queue)
+{
+	struct kbase_csf_scheduler *const scheduler = &queue->kctx->kbdev->csf.scheduler;
+	unsigned long flags;
+
+	spin_lock_irqsave(&scheduler->kcpuq_work_queues_lock, flags);
+	if (list_empty(&queue->high_prio_work)) {
+		list_add_tail(&queue->high_prio_work, &scheduler->kcpuq_work_queues);
+		atomic_inc(&queue->pending_kick);
+		if (atomic_cmpxchg(&scheduler->pending_kcpuq_works, false, true) == false)
+			complete(&scheduler->kthread_signal);
+	}
+	spin_unlock_irqrestore(&scheduler->kcpuq_work_queues_lock, flags);
+}
+
+void kbase_csf_scheduler_wait_for_kthread_pending_work(struct kbase_device *kbdev,
+						       atomic_t *pending)
+{
+	/*
+	 * Signal kbase_csf_scheduler_kthread() to allow for the
+	 * eventual completion of the current iteration. Once the work is
+	 * done, the event_wait wait queue shall be signalled.
+	 */
+
+	complete(&kbdev->csf.scheduler.kthread_signal);
+	wait_event(kbdev->csf.event_wait, atomic_read(pending) == 0);
+}
+
 void kbase_csf_scheduler_reval_idleness_post_sleep(struct kbase_device *kbdev)
 {
 	u32 csg_nr;
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h
index 5047092d6650..915945bb495e 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -235,7 +235,8 @@ void kbase_csf_scheduler_early_term(struct kbase_device *kbdev);
  * No explicit re-initialization is done for CSG & CS interface I/O pages;
  * instead, that happens implicitly on firmware reload.
  *
- * Should be called only after initiating the GPU reset.
+ * Should be called either after initiating the GPU reset or when MCU reset is
+ * expected to follow such as GPU_LOST case.
  */
 void kbase_csf_scheduler_reset(struct kbase_device *kbdev);
 
@@ -487,6 +488,48 @@ static inline bool kbase_csf_scheduler_all_csgs_idle(struct kbase_device *kbdev)
 			    kbdev->csf.global_iface.group_num);
 }
 
+/**
+ * kbase_csf_scheduler_enqueue_sync_update_work() - Add a context to the list
+ *                                                  of contexts to handle
+ *                                                  SYNC_UPDATE events.
+ *
+ * @kctx: The context to handle SYNC_UPDATE event
+ *
+ * This function wakes up kbase_csf_scheduler_kthread() to handle pending
+ * SYNC_UPDATE events for all contexts.
+ */
+void kbase_csf_scheduler_enqueue_sync_update_work(struct kbase_context *kctx);
+
+/**
+ * kbase_csf_scheduler_enqueue_protm_event_work() - Add a group to the list
+ *                                                  of groups to handle
+ *                                                  PROTM requests.
+ *
+ * @group: The group to handle protected mode request
+ *
+ * This function wakes up kbase_csf_scheduler_kthread() to handle pending
+ * protected mode requests for all groups.
+ */
+void kbase_csf_scheduler_enqueue_protm_event_work(struct kbase_queue_group *group);
+
+/**
+ * kbase_csf_scheduler_enqueue_kcpuq_work() - Wake up kbase_csf_scheduler_kthread() to process
+ *                                            pending commands for a KCPU queue.
+ *
+ * @queue: The queue to process pending commands for
+ */
+void kbase_csf_scheduler_enqueue_kcpuq_work(struct kbase_kcpu_command_queue *queue);
+
+/**
+ * kbase_csf_scheduler_wait_for_kthread_pending_work - Wait until a pending work has completed in
+ *                                                     kbase_csf_scheduler_kthread().
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface
+ * @pending: The work to wait for
+ */
+void kbase_csf_scheduler_wait_for_kthread_pending_work(struct kbase_device *kbdev,
+						       atomic_t *pending);
+
 /**
  * kbase_csf_scheduler_invoke_tick() - Invoke the scheduling tick
  *
@@ -591,11 +634,8 @@ int kbase_csf_scheduler_handle_runtime_suspend(struct kbase_device *kbdev);
  * @kbdev: Pointer to the device
  *
  * This function is called when a GPU idle IRQ has been raised.
- *
- * Return: true if the PM state machine needs to be invoked after the processing
- *         of GPU idle irq, otherwise false.
  */
-bool kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev);
+void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev);
 
 /**
  * kbase_csf_scheduler_get_nr_active_csgs() - Get the number of active CSGs
@@ -653,4 +693,6 @@ void kbase_csf_scheduler_force_wakeup(struct kbase_device *kbdev);
 void kbase_csf_scheduler_force_sleep(struct kbase_device *kbdev);
 #endif
 
+bool is_gpu_level_suspend_supported(struct kbase_device *const kbdev);
+
 #endif /* _KBASE_CSF_SCHEDULER_H_ */
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync.c
index aa88b5f59d3b..27b792500bdf 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -117,13 +117,13 @@ static void kbasep_csf_sync_print_kcpu_fence_wait_or_signal(char *buffer, int *l
 	timeline_name = fence->ops->get_timeline_name(fence);
 	is_signaled = info.status > 0;
 
-	*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
-			    "cmd:%s obj:0x%pK live_value:0x%.8x | ", cmd_name, fence, is_signaled);
+	*length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+			     "cmd:%s obj:0x%pK live_value:0x%.8x | ", cmd_name, fence, is_signaled);
 
 	/* Note: fence->seqno was u32 until 5.1 kernel, then u64 */
-	*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
-			    "timeline_name:%s timeline_context:0x%.16llx fence_seqno:0x%.16llx",
-			    timeline_name, fence->context, (u64)fence->seqno);
+	*length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+			     "timeline_name:%s timeline_context:0x%.16llx fence_seqno:0x%.16llx",
+			     timeline_name, fence->context, (u64)fence->seqno);
 
 	kbase_fence_put(fence);
 }
@@ -149,19 +149,19 @@ static void kbasep_csf_sync_print_kcpu_cqs_wait(struct kbase_context *kctx, char
 		int ret = kbasep_csf_sync_get_cqs_live_u32(kctx, cqs_obj->addr, &live_val);
 		bool live_val_valid = (ret >= 0);
 
-		*length +=
-			snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
-				 "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr);
+		*length += scnprintf(
+			buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+			"cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr);
 
 		if (live_val_valid)
-			*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
-					    "0x%.16llx", (u64)live_val);
+			*length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+					     "0x%.16llx", (u64)live_val);
 		else
-			*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
-					    CQS_UNREADABLE_LIVE_VALUE);
+			*length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+					     CQS_UNREADABLE_LIVE_VALUE);
 
-		*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
-				    " | op:gt arg_value:0x%.8x", cqs_obj->val);
+		*length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+				     " | op:gt arg_value:0x%.8x", cqs_obj->val);
 	}
 }
 
@@ -187,18 +187,18 @@ static void kbasep_csf_sync_print_kcpu_cqs_set(struct kbase_context *kctx, char
 		bool live_val_valid = (ret >= 0);
 
 		*length +=
-			snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
-				 "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr);
+			scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+				  "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr);
 
 		if (live_val_valid)
-			*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
-					    "0x%.16llx", (u64)live_val);
+			*length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+					     "0x%.16llx", (u64)live_val);
 		else
-			*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
-					    CQS_UNREADABLE_LIVE_VALUE);
+			*length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+					     CQS_UNREADABLE_LIVE_VALUE);
 
-		*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
-				    " | op:add arg_value:0x%.8x", 1);
+		*length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+				     " | op:add arg_value:0x%.8x", 1);
 	}
 }
 
@@ -277,19 +277,19 @@ static void kbasep_csf_sync_print_kcpu_cqs_wait_op(struct kbase_context *kctx, c
 
 		bool live_val_valid = (ret >= 0);
 
-		*length +=
-			snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
-				 "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", wait_op->addr);
+		*length += scnprintf(
+			buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+			"cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", wait_op->addr);
 
 		if (live_val_valid)
-			*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
-					    "0x%.16llx", live_val);
+			*length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+					     "0x%.16llx", live_val);
 		else
-			*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
-					    CQS_UNREADABLE_LIVE_VALUE);
+			*length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+					     CQS_UNREADABLE_LIVE_VALUE);
 
-		*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
-				    " | op:%s arg_value:0x%.16llx", op_name, wait_op->val);
+		*length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+				     " | op:%s arg_value:0x%.16llx", op_name, wait_op->val);
 	}
 }
 
@@ -319,18 +319,18 @@ static void kbasep_csf_sync_print_kcpu_cqs_set_op(struct kbase_context *kctx, ch
 		bool live_val_valid = (ret >= 0);
 
 		*length +=
-			snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
-				 "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", set_op->addr);
+			scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+				  "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", set_op->addr);
 
 		if (live_val_valid)
-			*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
-					    "0x%.16llx", live_val);
+			*length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+					     "0x%.16llx", live_val);
 		else
-			*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
-					    CQS_UNREADABLE_LIVE_VALUE);
+			*length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+					     CQS_UNREADABLE_LIVE_VALUE);
 
-		*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
-				    " | op:%s arg_value:0x%.16llx", op_name, set_op->val);
+		*length += scnprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+				     " | op:%s arg_value:0x%.16llx", op_name, set_op->val);
 	}
 }
 
@@ -360,8 +360,8 @@ static void kbasep_csf_sync_kcpu_print_queue(struct kbase_context *kctx,
 		int length = 0;
 
 		started_or_pending = ((i == 0) && queue->command_started) ? 'S' : 'P';
-		length += snprintf(buffer, CSF_SYNC_DUMP_SIZE, "queue:KCPU-%d-%d exec:%c ",
-				   kctx->id, queue->id, started_or_pending);
+		length += scnprintf(buffer, CSF_SYNC_DUMP_SIZE, "queue:KCPU-%d-%d exec:%c ",
+				    kctx->id, queue->id, started_or_pending);
 
 		cmd = &queue->commands[(u8)(queue->start_offset + i)];
 		switch (cmd->type) {
@@ -388,12 +388,12 @@ static void kbasep_csf_sync_kcpu_print_queue(struct kbase_context *kctx,
 			kbasep_csf_sync_print_kcpu_cqs_set_op(kctx, buffer, &length, cmd);
 			break;
 		default:
-			length += snprintf(buffer + length, CSF_SYNC_DUMP_SIZE - length,
-					   ", U, Unknown blocking command");
+			length += scnprintf(buffer + length, CSF_SYNC_DUMP_SIZE - length,
+					    ", U, Unknown blocking command");
 			break;
 		}
 
-		length += snprintf(buffer + length, CSF_SYNC_DUMP_SIZE - length, "\n");
+		length += scnprintf(buffer + length, CSF_SYNC_DUMP_SIZE - length, "\n");
 		kbasep_print(kbpr, buffer);
 	}
 
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c
index 2d148eea025e..5a5a4c315396 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -218,7 +218,7 @@ static void remove_unlinked_chunk(struct kbase_context *kctx,
 	if (WARN_ON(!list_empty(&chunk->link)))
 		return;
 
-	kbase_gpu_vm_lock(kctx);
+	kbase_gpu_vm_lock_with_pmode_sync(kctx);
 	kbase_vunmap(kctx, &chunk->map);
 	/* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT
 	 * regions), and so we must clear that flag too before freeing.
@@ -231,7 +231,7 @@ static void remove_unlinked_chunk(struct kbase_context *kctx,
 	chunk->region->flags &= ~KBASE_REG_DONT_NEED;
 #endif
 	kbase_mem_free_region(kctx, chunk->region);
-	kbase_gpu_vm_unlock(kctx);
+	kbase_gpu_vm_unlock_with_pmode_sync(kctx);
 
 	kfree(chunk);
 }
@@ -260,8 +260,9 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context *
 							  u64 chunk_size)
 {
 	u64 nr_pages = PFN_UP(chunk_size);
-	u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR |
-		    BASEP_MEM_NO_USER_FREE | BASE_MEM_COHERENT_LOCAL | BASE_MEM_PROT_CPU_RD;
+	base_mem_alloc_flags flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR |
+				     BASE_MEM_PROT_CPU_WR | BASEP_MEM_NO_USER_FREE |
+				     BASE_MEM_COHERENT_LOCAL | BASE_MEM_PROT_CPU_RD;
 	struct kbase_csf_tiler_heap_chunk *chunk = NULL;
 	/* The chunk kernel mapping needs to be large enough to:
 	 * - initially zero the CHUNK_HDR_SIZE area
@@ -350,13 +351,14 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context *
 	}
 
 	remove_external_chunk_mappings(kctx, chunk);
-	kbase_gpu_vm_unlock(kctx);
 
 	/* If page migration is enabled, we don't want to migrate tiler heap pages.
 	 * This does not change if the constituent pages are already marked as isolated.
 	 */
 	if (kbase_is_page_migration_enabled())
-		kbase_set_phy_alloc_page_status(chunk->region->gpu_alloc, NOT_MOVABLE);
+		kbase_set_phy_alloc_page_status(kctx, chunk->region->gpu_alloc, NOT_MOVABLE);
+
+	kbase_gpu_vm_unlock(kctx);
 
 	return chunk;
 
@@ -640,7 +642,7 @@ static bool kbasep_is_buffer_descriptor_region_suitable(struct kbase_context *co
 
 	if (!(reg->flags & KBASE_REG_CPU_RD) || kbase_is_region_shrinkable(reg) ||
 	    (reg->flags & KBASE_REG_PF_GROW)) {
-		dev_err(kctx->kbdev->dev, "Region has invalid flags: 0x%lX!\n", reg->flags);
+		dev_err(kctx->kbdev->dev, "Region has invalid flags: 0x%llX!\n", reg->flags);
 		return false;
 	}
 
@@ -737,7 +739,7 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_
 					  KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING);
 
 		if (kbase_is_page_migration_enabled())
-			kbase_set_phy_alloc_page_status(buf_desc_reg->gpu_alloc, NOT_MOVABLE);
+			kbase_set_phy_alloc_page_status(kctx, buf_desc_reg->gpu_alloc, NOT_MOVABLE);
 
 		kbase_gpu_vm_unlock(kctx);
 
@@ -1058,6 +1060,7 @@ static bool delete_chunk_physical_pages(struct kbase_csf_tiler_heap *heap, u64 c
 	struct kbase_csf_tiler_heap_chunk *chunk = NULL;
 
 	lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
+	lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock);
 
 	chunk = find_chunk(heap, chunk_gpu_va);
 	if (unlikely(!chunk)) {
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c
index a2bb49422e98..df4feb77f0cd 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c
@@ -331,8 +331,8 @@ static unsigned long kbase_csf_tiler_heap_reclaim_scan_free_pages(struct kbase_d
 static unsigned long kbase_csf_tiler_heap_reclaim_count_objects(struct shrinker *s,
 								struct shrink_control *sc)
 {
-	struct kbase_device *kbdev =
-		container_of(s, struct kbase_device, csf.scheduler.reclaim_mgr.heap_reclaim);
+	struct kbase_device *kbdev = KBASE_GET_KBASE_DATA_FROM_SHRINKER(
+		s, struct kbase_device, csf.scheduler.reclaim_mgr.heap_reclaim);
 
 	return kbase_csf_tiler_heap_reclaim_count_free_pages(kbdev, sc);
 }
@@ -340,8 +340,8 @@ static unsigned long kbase_csf_tiler_heap_reclaim_count_objects(struct shrinker
 static unsigned long kbase_csf_tiler_heap_reclaim_scan_objects(struct shrinker *s,
 							       struct shrink_control *sc)
 {
-	struct kbase_device *kbdev =
-		container_of(s, struct kbase_device, csf.scheduler.reclaim_mgr.heap_reclaim);
+	struct kbase_device *kbdev = KBASE_GET_KBASE_DATA_FROM_SHRINKER(
+		s, struct kbase_device, csf.scheduler.reclaim_mgr.heap_reclaim);
 
 	return kbase_csf_tiler_heap_reclaim_scan_free_pages(kbdev, sc);
 }
@@ -352,11 +352,17 @@ void kbase_csf_tiler_heap_reclaim_ctx_init(struct kbase_context *kctx)
 	INIT_LIST_HEAD(&kctx->csf.sched.heap_info.mgr_link);
 }
 
-void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev)
+int kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev)
 {
 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
-	struct shrinker *reclaim = &scheduler->reclaim_mgr.heap_reclaim;
 	u8 prio;
+	struct shrinker *reclaim;
+
+	reclaim =
+		KBASE_INIT_RECLAIM(&(scheduler->reclaim_mgr), heap_reclaim, "mali-csf-tiler-heap");
+	if (!reclaim)
+		return -ENOMEM;
+	KBASE_SET_RECLAIM(&(scheduler->reclaim_mgr), heap_reclaim, reclaim);
 
 	for (prio = KBASE_QUEUE_GROUP_PRIORITY_REALTIME; prio < KBASE_QUEUE_GROUP_PRIORITY_COUNT;
 	     prio++)
@@ -366,6 +372,11 @@ void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev)
 	reclaim->scan_objects = kbase_csf_tiler_heap_reclaim_scan_objects;
 	reclaim->seeks = HEAP_SHRINKER_SEEKS;
 	reclaim->batch = HEAP_SHRINKER_BATCH;
+
+	if (!IS_ENABLED(CONFIG_MALI_VECTOR_DUMP))
+		KBASE_REGISTER_SHRINKER(reclaim, "mali-csf-tiler-heap", kbdev);
+
+	return 0;
 }
 
 void kbase_csf_tiler_heap_reclaim_mgr_term(struct kbase_device *kbdev)
@@ -373,6 +384,9 @@ void kbase_csf_tiler_heap_reclaim_mgr_term(struct kbase_device *kbdev)
 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
 	u8 prio;
 
+	if (!IS_ENABLED(CONFIG_MALI_VECTOR_DUMP))
+		KBASE_UNREGISTER_SHRINKER(scheduler->reclaim_mgr.heap_reclaim);
+
 	for (prio = KBASE_QUEUE_GROUP_PRIORITY_REALTIME; prio < KBASE_QUEUE_GROUP_PRIORITY_COUNT;
 	     prio++)
 		WARN_ON(!list_empty(&scheduler->reclaim_mgr.ctx_lists[prio]));
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.h
index 7880de04c84f..d41b7baabd02 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.h
@@ -66,8 +66,10 @@ void kbase_csf_tiler_heap_reclaim_ctx_init(struct kbase_context *kctx);
  * @kbdev: Pointer to the device.
  *
  * This function must be called only when a kbase device is initialized.
+ *
+ * Return: 0 if issuing reclaim_mgr init was successful, otherwise an error code.
  */
-void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev);
+int kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev);
 
 /**
  * kbase_csf_tiler_heap_reclaim_mgr_term - Termination call for the tiler heap reclaim manger.
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c
index 54054661f7a9..eb5c8a40b8c9 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c
@@ -151,13 +151,22 @@ static bool tl_reader_overflow_check(struct kbase_csf_tl_reader *self, u16 event
  *
  * Reset the reader to the default state, i.e. set all the
  * mutable fields to zero.
+ *
+ * NOTE: this function expects the irq spinlock to be held.
  */
 static void tl_reader_reset(struct kbase_csf_tl_reader *self)
 {
+	lockdep_assert_held(&self->read_lock);
+
 	self->got_first_event = false;
 	self->is_active = false;
 	self->expected_event_id = 0;
 	self->tl_header.btc = 0;
+
+	/* There might be data left in the trace buffer from the previous
+	 * tracing session. We don't want it to leak into this session.
+	 */
+	kbase_csf_firmware_trace_buffer_discard_all(self->trace_buffer);
 }
 
 int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self)
@@ -324,21 +333,16 @@ static int tl_reader_update_enable_bit(struct kbase_csf_tl_reader *self, bool va
 
 void kbase_csf_tl_reader_init(struct kbase_csf_tl_reader *self, struct kbase_tlstream *stream)
 {
-	self->timer_interval = KBASE_CSF_TL_READ_INTERVAL_DEFAULT;
+	*self = (struct kbase_csf_tl_reader){
+		.timer_interval = KBASE_CSF_TL_READ_INTERVAL_DEFAULT,
+		.stream = stream,
+		.kbdev = NULL, /* This will be initialized by tl_reader_init_late() */
+		.is_active = false,
+	};
 
 	kbase_timer_setup(&self->read_timer, kbasep_csf_tl_reader_read_callback);
 
-	self->stream = stream;
-
-	/* This will be initialized by tl_reader_init_late() */
-	self->kbdev = NULL;
-	self->trace_buffer = NULL;
-	self->tl_header.data = NULL;
-	self->tl_header.size = 0;
-
 	spin_lock_init(&self->read_lock);
-
-	tl_reader_reset(self);
 }
 
 void kbase_csf_tl_reader_term(struct kbase_csf_tl_reader *self)
@@ -348,13 +352,19 @@ void kbase_csf_tl_reader_term(struct kbase_csf_tl_reader *self)
 
 int kbase_csf_tl_reader_start(struct kbase_csf_tl_reader *self, struct kbase_device *kbdev)
 {
+	unsigned long flags;
 	int rcode;
 
+	spin_lock_irqsave(&self->read_lock, flags);
+
 	/* If already running, early exit. */
-	if (self->is_active)
+	if (self->is_active) {
+		spin_unlock_irqrestore(&self->read_lock, flags);
 		return 0;
+	}
 
 	if (tl_reader_init_late(self, kbdev)) {
+		spin_unlock_irqrestore(&self->read_lock, flags);
 #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
 		dev_warn(kbdev->dev, "CSFFW timeline is not available for MALI_BIFROST_NO_MALI builds!");
 		return 0;
@@ -366,6 +376,9 @@ int kbase_csf_tl_reader_start(struct kbase_csf_tl_reader *self, struct kbase_dev
 	tl_reader_reset(self);
 
 	self->is_active = true;
+
+	spin_unlock_irqrestore(&self->read_lock, flags);
+
 	/* Set bytes to copy to the header size. This is to trigger copying
 	 * of the header to the user space.
 	 */
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c
index 8ed7c91553a6..a9469c5949b4 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -469,14 +469,15 @@ unsigned int kbase_csf_firmware_trace_buffer_read_data(struct firmware_trace_buf
 	} else {
 		unsigned int bytes_copied_head, bytes_copied_tail;
 
-		bytes_copied_tail = min_t(unsigned int, num_bytes, (buffer_size - extract_offset));
+		bytes_copied_tail =
+			min_t(unsigned int, num_bytes, size_sub(buffer_size, extract_offset));
 		memcpy(data, &data_cpu_va[extract_offset], bytes_copied_tail);
 
 		bytes_copied_head =
 			min_t(unsigned int, (num_bytes - bytes_copied_tail), insert_offset);
 		memcpy(&data[bytes_copied_tail], data_cpu_va, bytes_copied_head);
 
-		bytes_copied = bytes_copied_head + bytes_copied_tail;
+		bytes_copied = size_add(bytes_copied_head, bytes_copied_tail);
 		extract_offset += bytes_copied;
 		if (extract_offset >= buffer_size)
 			extract_offset = bytes_copied_head;
@@ -519,6 +520,14 @@ void kbase_csf_firmware_trace_buffer_discard(struct firmware_trace_buffer *trace
 }
 EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_discard);
 
+void kbase_csf_firmware_trace_buffer_discard_all(struct firmware_trace_buffer *trace_buffer)
+{
+	if (WARN_ON(!trace_buffer))
+		return;
+
+	*(trace_buffer->cpu_va.extract_cpu_va) = *(trace_buffer->cpu_va.insert_cpu_va);
+}
+
 static void update_trace_buffer_active_mask64(struct firmware_trace_buffer *tb, u64 mask)
 {
 	unsigned int i;
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.h
index 90dfcb2699bc..35988eaf8f5a 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.h
@@ -179,6 +179,15 @@ unsigned int kbase_csf_firmware_trace_buffer_read_data(struct firmware_trace_buf
  */
 void kbase_csf_firmware_trace_buffer_discard(struct firmware_trace_buffer *trace_buffer);
 
+/**
+ * kbase_csf_firmware_trace_buffer_discard_all - Discard all data from a trace buffer
+ *
+ * @trace_buffer: Trace buffer handle
+ *
+ * Discard all the data in the trace buffer to make it empty.
+ */
+void kbase_csf_firmware_trace_buffer_discard_all(struct firmware_trace_buffer *trace_buffer);
+
 /**
  * kbase_csf_firmware_trace_buffer_get_active_mask64 - Get trace buffer active mask
  *
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_util.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_util.c
index 7dc32a11bb29..5f13672e70b8 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_util.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_util.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -115,7 +115,7 @@ struct kbasep_printer *kbasep_printer_buffer_init(struct kbase_device *kbdev,
 
 	if (kbpr) {
 		if (kfifo_alloc(&kbpr->fifo, KBASEP_PRINTER_BUFFER_MAX_SIZE, GFP_KERNEL)) {
-			kfree(kbpr);
+			vfree(kbpr);
 			return NULL;
 		}
 		kbpr->kbdev = kbdev;
@@ -224,7 +224,7 @@ __attribute__((format(__printf__, 2, 3))) void kbasep_print(struct kbasep_printe
 	va_list arglist;
 
 	va_start(arglist, fmt);
-	len = vsnprintf(buffer, KBASEP_PRINT_FORMAT_BUFFER_MAX_SIZE, fmt, arglist);
+	len = vscnprintf(buffer, KBASEP_PRINT_FORMAT_BUFFER_MAX_SIZE, fmt, arglist);
 	if (len <= 0) {
 		pr_err("message write to the buffer failed");
 		goto exit;
diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_csf.c b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_csf.c
index fe8201f7f7e6..da56d71f473f 100644
--- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_csf.c
+++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_csf.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2022-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -443,7 +443,7 @@ kbase_debug_coresight_csf_config_create(void *client_data,
 	}
 
 	config = kzalloc(sizeof(struct kbase_debug_coresight_csf_config), GFP_KERNEL);
-	if (WARN_ON(!client))
+	if (WARN_ON(!config))
 		return NULL;
 
 	config->client = client;
diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_csf.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_csf.h
index 18520db15502..04da9c8b9057 100644
--- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_csf.h
+++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_csf.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -212,6 +212,9 @@ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_EVICT_CTX_SLOTS_START),
 	KBASE_KTRACE_CODE_MAKE_CODE(SCHED_BUSY), KBASE_KTRACE_CODE_MAKE_CODE(SCHED_INACTIVE),
 	KBASE_KTRACE_CODE_MAKE_CODE(SCHED_SUSPENDED), KBASE_KTRACE_CODE_MAKE_CODE(SCHED_SLEEPING),
 
+	/* info_val == true if FW Sleep-on-Idle is enabled, false otherwise */
+	KBASE_KTRACE_CODE_MAKE_CODE(FIRMWARE_SLEEP_ON_IDLE_CHANGED),
+
 /* info_val = mcu state */
 #define KBASEP_MCU_STATE(n) KBASE_KTRACE_CODE_MAKE_CODE(PM_MCU_##n),
 #include "backend/gpu/mali_kbase_pm_mcu_states.h"
diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.c b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.c
index ec5ca10e135b..b14ffc69c54c 100644
--- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.c
+++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -27,8 +27,8 @@
 
 void kbasep_ktrace_backend_format_header(char *buffer, int sz, s32 *written)
 {
-	*written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0),
-				 "group,slot,prio,csi,kcpu"),
+	*written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0),
+				  "group,slot,prio,csi,kcpu"),
 			0);
 }
 
@@ -44,38 +44,39 @@ void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg, char *
 	if (be_msg->gpu.flags & KBASE_KTRACE_FLAG_CSF_GROUP) {
 		const s8 slot = be_msg->gpu.csg_nr;
 		/* group,slot, */
-		*written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), "%u,%d,",
-					 be_msg->gpu.group_handle, slot),
+		*written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0),
+					  "%u,%d,", be_msg->gpu.group_handle, slot),
 				0);
 
 		/* prio */
 		if (slot >= 0)
-			*written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0),
-						 "%u", be_msg->gpu.slot_prio),
+			*written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0),
+						  "%u", be_msg->gpu.slot_prio),
 					0);
 
 		/* , */
-		*written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), ","), 0);
+		*written +=
+			MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), ","), 0);
 	} else {
 		/* No group,slot,prio fields, but ensure ending with "," */
 		*written +=
-			MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), ",,,"), 0);
+			MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), ",,,"), 0);
 	}
 
 	/* queue parts: csi */
 	if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_CSF_QUEUE)
-		*written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), "%d",
-					 be_msg->gpu.csi_index),
+		*written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), "%d",
+					  be_msg->gpu.csi_index),
 				0);
 
 	/* , */
-	*written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), ","), 0);
+	*written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), ","), 0);
 
 	if (be_msg->gpu.flags & KBASE_KTRACE_FLAG_CSF_KCPU) {
 		/* kcpu data */
-		*written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0),
-					 "kcpu %d (0x%llx)", be_msg->kcpu.id,
-					 be_msg->kcpu.extra_info_val),
+		*written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0),
+					  "kcpu %d (0x%llx)", be_msg->kcpu.id,
+					  be_msg->kcpu.extra_info_val),
 				0);
 	}
 
diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.c b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.c
index beac074f2035..39306e7d45e3 100644
--- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.c
+++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -27,8 +27,8 @@
 
 void kbasep_ktrace_backend_format_header(char *buffer, int sz, s32 *written)
 {
-	*written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0),
-				 "katom,gpu_addr,jobslot,refcount"),
+	*written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0),
+				  "katom,gpu_addr,jobslot,refcount"),
 			0);
 }
 
@@ -37,34 +37,34 @@ void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg, char *
 {
 	/* katom */
 	if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_JM_ATOM)
-		*written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0),
-					 "atom %u (ud: 0x%llx 0x%llx)",
-					 trace_msg->backend.gpu.atom_number,
-					 trace_msg->backend.gpu.atom_udata[0],
-					 trace_msg->backend.gpu.atom_udata[1]),
+		*written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0),
+					  "atom %u (ud: 0x%llx 0x%llx)",
+					  trace_msg->backend.gpu.atom_number,
+					  trace_msg->backend.gpu.atom_udata[0],
+					  trace_msg->backend.gpu.atom_udata[1]),
 				0);
 
 	/* gpu_addr */
 	if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_BACKEND)
-		*written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0),
-					 ",%.8llx,", trace_msg->backend.gpu.gpu_addr),
+		*written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0),
+					  ",%.8llx,", trace_msg->backend.gpu.gpu_addr),
 				0);
 	else
 		*written +=
-			MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), ",,"), 0);
+			MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), ",,"), 0);
 
 	/* jobslot */
 	if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_JM_JOBSLOT)
-		*written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), "%d",
-					 trace_msg->backend.gpu.jobslot),
+		*written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), "%d",
+					  trace_msg->backend.gpu.jobslot),
 				0);
 
-	*written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), ","), 0);
+	*written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), ","), 0);
 
 	/* refcount */
 	if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_JM_REFCOUNT)
-		*written += MAX(snprintf(buffer + *written, (size_t)MAX(sz - *written, 0), "%d",
-					 trace_msg->backend.gpu.refcount),
+		*written += MAX(scnprintf(buffer + *written, (size_t)MAX(sz - *written, 0), "%d",
+					  trace_msg->backend.gpu.refcount),
 				0);
 }
 
diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_csf.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_csf.h
index 0b0de2385f85..7c40f472a78b 100644
--- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_csf.h
+++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_csf.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -69,6 +69,7 @@ DEFINE_MALI_ADD_EVENT(SCHED_BUSY);
 DEFINE_MALI_ADD_EVENT(SCHED_INACTIVE);
 DEFINE_MALI_ADD_EVENT(SCHED_SUSPENDED);
 DEFINE_MALI_ADD_EVENT(SCHED_SLEEPING);
+DEFINE_MALI_ADD_EVENT(FIRMWARE_SLEEP_ON_IDLE_CHANGED);
 #define KBASEP_MCU_STATE(n) DEFINE_MALI_ADD_EVENT(PM_MCU_##n);
 #include "backend/gpu/mali_kbase_pm_mcu_states.h"
 #undef KBASEP_MCU_STATE
diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.c b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.c
index 0842460bc08a..036d1f5968f6 100644
--- a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.c
+++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -71,15 +71,15 @@ static const char *const kbasep_ktrace_code_string[] = {
 
 static void kbasep_ktrace_format_header(char *buffer, int sz, s32 written)
 {
-	written += MAX(snprintf(buffer + written, (size_t)MAX(sz - written, 0),
-				"secs,thread_id,cpu,code,kctx,"),
+	written += MAX(scnprintf(buffer + written, (size_t)MAX(sz - written, 0),
+				 "secs,thread_id,cpu,code,kctx,"),
 		       0);
 
 	kbasep_ktrace_backend_format_header(buffer, sz, &written);
 
-	written += MAX(snprintf(buffer + written, (size_t)MAX(sz - written, 0),
-				",info_val,ktrace_version=%u.%u", KBASE_KTRACE_VERSION_MAJOR,
-				KBASE_KTRACE_VERSION_MINOR),
+	written += MAX(scnprintf(buffer + written, (size_t)MAX(sz - written, 0),
+				 ",info_val,ktrace_version=%u.%u", KBASE_KTRACE_VERSION_MAJOR,
+				 KBASE_KTRACE_VERSION_MINOR),
 		       0);
 
 	buffer[sz - 1] = 0;
@@ -93,21 +93,21 @@ static void kbasep_ktrace_format_msg(struct kbase_ktrace_msg *trace_msg, char *b
 	 *
 	 * secs,thread_id,cpu,code,
 	 */
-	written += MAX(snprintf(buffer + written, (size_t)MAX(sz - written, 0), "%d.%.6d,%d,%d,%s,",
-				(int)trace_msg->timestamp.tv_sec,
-				(int)(trace_msg->timestamp.tv_nsec / 1000), trace_msg->thread_id,
-				trace_msg->cpu,
-				kbasep_ktrace_code_string[trace_msg->backend.gpu.code]),
+	written += MAX(scnprintf(buffer + written, (size_t)MAX(sz - written, 0),
+				 "%d.%.6d,%d,%d,%s,", (int)trace_msg->timestamp.tv_sec,
+				 (int)(trace_msg->timestamp.tv_nsec / 1000), trace_msg->thread_id,
+				 trace_msg->cpu,
+				 kbasep_ktrace_code_string[trace_msg->backend.gpu.code]),
 		       0);
 
 	/* kctx part: */
 	if (trace_msg->kctx_tgid) {
-		written += MAX(snprintf(buffer + written, (size_t)MAX(sz - written, 0), "%d_%u",
-					trace_msg->kctx_tgid, trace_msg->kctx_id),
+		written += MAX(scnprintf(buffer + written, (size_t)MAX(sz - written, 0), "%d_%u",
+					 trace_msg->kctx_tgid, trace_msg->kctx_id),
 			       0);
 	}
 	/* Trailing comma */
-	written += MAX(snprintf(buffer + written, (size_t)MAX(sz - written, 0), ","), 0);
+	written += MAX(scnprintf(buffer + written, (size_t)MAX(sz - written, 0), ","), 0);
 
 	/* Backend parts */
 	kbasep_ktrace_backend_format_msg(trace_msg, buffer, sz, &written);
@@ -119,8 +119,8 @@ static void kbasep_ktrace_format_msg(struct kbase_ktrace_msg *trace_msg, char *b
 	 * Note that the last column is empty, it's simply to hold the ktrace
 	 * version in the header
 	 */
-	written += MAX(snprintf(buffer + written, (size_t)MAX(sz - written, 0), ",0x%.16llx",
-				(unsigned long long)trace_msg->info_val),
+	written += MAX(scnprintf(buffer + written, (size_t)MAX(sz - written, 0), ",0x%.16llx",
+				 (unsigned long long)trace_msg->info_val),
 		       0);
 	buffer[sz - 1] = 0;
 }
diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_codes.h b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_codes.h
index 991f70fe8540..d40eec013cb5 100644
--- a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_codes.h
+++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_codes.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -149,13 +149,17 @@ KBASE_KTRACE_CODE_MAKE_CODE(CORE_CTX_DESTROY),
 	KBASE_KTRACE_CODE_MAKE_CODE(SCHED_RETAIN_CTX_NOLOCK),
 	/* info_val == kctx->refcount */
 	KBASE_KTRACE_CODE_MAKE_CODE(SCHED_RELEASE_CTX),
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
 	/*
 	 * Arbitration events
 	 */
-	KBASE_KTRACE_CODE_MAKE_CODE(ARB_GPU_LOST), KBASE_KTRACE_CODE_MAKE_CODE(ARB_VM_STATE),
+	KBASE_KTRACE_CODE_MAKE_CODE(ARB_VM_STATE),
 	KBASE_KTRACE_CODE_MAKE_CODE(ARB_VM_EVT),
-#endif
+	KBASE_KTRACE_CODE_MAKE_CODE(ARB_GPU_GRANTED),
+	KBASE_KTRACE_CODE_MAKE_CODE(ARB_GPU_LOST),
+	KBASE_KTRACE_CODE_MAKE_CODE(ARB_GPU_STARTED),
+	KBASE_KTRACE_CODE_MAKE_CODE(ARB_GPU_STOP_REQUESTED),
+	KBASE_KTRACE_CODE_MAKE_CODE(ARB_GPU_STOPPED),
+	KBASE_KTRACE_CODE_MAKE_CODE(ARB_GPU_REQUESTED),
 
 #if MALI_USE_CSF
 #include "debug/backend/mali_kbase_debug_ktrace_codes_csf.h"
diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_linux_ktrace.h b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_linux_ktrace.h
index 1ebddfa3f44f..acc78eb5b0b2 100644
--- a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_linux_ktrace.h
+++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_linux_ktrace.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -95,13 +95,16 @@ DEFINE_MALI_ADD_EVENT(PM_RUNTIME_RESUME_CALLBACK);
 #undef KBASEP_L2_STATE
 DEFINE_MALI_ADD_EVENT(SCHED_RETAIN_CTX_NOLOCK);
 DEFINE_MALI_ADD_EVENT(SCHED_RELEASE_CTX);
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
 
-DEFINE_MALI_ADD_EVENT(ARB_GPU_LOST);
 DEFINE_MALI_ADD_EVENT(ARB_VM_STATE);
 DEFINE_MALI_ADD_EVENT(ARB_VM_EVT);
+DEFINE_MALI_ADD_EVENT(ARB_GPU_GRANTED);
+DEFINE_MALI_ADD_EVENT(ARB_GPU_LOST);
+DEFINE_MALI_ADD_EVENT(ARB_GPU_STARTED);
+DEFINE_MALI_ADD_EVENT(ARB_GPU_STOP_REQUESTED);
+DEFINE_MALI_ADD_EVENT(ARB_GPU_STOPPED);
+DEFINE_MALI_ADD_EVENT(ARB_GPU_REQUESTED);
 
-#endif
 #if MALI_USE_CSF
 #include "backend/mali_kbase_debug_linux_ktrace_csf.h"
 #else
diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c
index 52aa63330afe..218022ac3186 100644
--- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c
+++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -132,11 +132,15 @@ static int kbase_backend_late_init(struct kbase_device *kbdev)
 
 fail_update_l2_features:
 	kbase_backend_devfreq_term(kbdev);
-fail_devfreq_init:
-	kbasep_pm_metrics_term(kbdev);
-fail_pm_metrics_init:
-	kbase_ipa_control_term(kbdev);
 
+fail_devfreq_init:
+	{
+		kbasep_pm_metrics_term(kbdev);
+	}
+fail_pm_metrics_init:
+	{
+		kbase_ipa_control_term(kbdev);
+	}
 #ifdef CONFIG_MALI_BIFROST_DEBUG
 #if IS_ENABLED(CONFIG_MALI_REAL_HW)
 fail_interrupt_test:
@@ -159,9 +163,11 @@ fail_reset_gpu_init:
  */
 static void kbase_backend_late_term(struct kbase_device *kbdev)
 {
-	kbase_backend_devfreq_term(kbdev);
-	kbasep_pm_metrics_term(kbdev);
-	kbase_ipa_control_term(kbdev);
+	{
+		kbase_backend_devfreq_term(kbdev);
+		kbasep_pm_metrics_term(kbdev);
+		kbase_ipa_control_term(kbdev);
+	}
 	kbase_hwaccess_pm_halt(kbdev);
 	kbase_reset_gpu_term(kbdev);
 	kbase_hwaccess_pm_term(kbdev);
@@ -279,10 +285,8 @@ static const struct kbase_device_init dev_init[] = {
 	{ kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" },
 #else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
 	{ kbase_get_irqs, NULL, "IRQ search failed" },
-#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
-#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
 	{ registers_map, registers_unmap, "Register map failed" },
-#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */
+#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
 #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
 	{ kbase_gpu_metrics_init, kbase_gpu_metrics_term, "GPU metrics initialization failed" },
 #endif /* IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) */
diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c
index ab9df01610ab..3b27b87657a5 100644
--- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c
+++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -28,6 +28,16 @@
 #include <mali_kbase_reset_gpu.h>
 #include <mmu/mali_kbase_mmu.h>
 #include <mali_kbase_ctx_sched.h>
+#include <mmu/mali_kbase_mmu_faults_decoder.h>
+
+bool kbase_is_gpu_removed(struct kbase_device *kbdev)
+{
+	if (!kbase_has_arbiter(kbdev))
+		return false;
+
+
+	return (KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(GPU_ID)) == 0);
+}
 
 /**
  * kbase_report_gpu_fault - Report a GPU fault of the device.
@@ -78,6 +88,7 @@ static void kbase_gpu_fault_interrupt(struct kbase_device *kbdev)
 void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
 {
 	u32 power_changed_mask = (POWER_CHANGED_ALL | MCU_STATUS_GPU_IRQ);
+	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
 
 
 	KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, val);
@@ -85,7 +96,6 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
 		kbase_gpu_fault_interrupt(kbdev);
 
 	if (val & GPU_PROTECTED_FAULT) {
-		struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
 		unsigned long flags;
 
 		dev_err_ratelimited(kbdev->dev, "GPU fault in protected mode");
@@ -139,10 +149,33 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
 		unsigned long flags;
 
 		dev_dbg(kbdev->dev, "Doorbell mirror interrupt received");
+
+		/* Assume that the doorbell comes from userspace which
+		 * presents new works in order to invalidate a possible GPU
+		 * idle event.
+		 * If the doorbell was raised by KBase then the FW would handle
+		 * the pending doorbell then raise a 2nd GBL_IDLE IRQ which
+		 * would allow us to put the GPU to sleep.
+		 */
+		atomic_set(&scheduler->gpu_no_longer_idle, true);
+
 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 		kbase_pm_disable_db_mirror_interrupt(kbdev);
-		kbdev->pm.backend.exit_gpu_sleep_mode = true;
-		kbase_csf_scheduler_invoke_tick(kbdev);
+
+		if (likely(kbdev->pm.backend.mcu_state == KBASE_MCU_IN_SLEEP)) {
+			kbdev->pm.backend.exit_gpu_sleep_mode = true;
+			kbase_csf_scheduler_invoke_tick(kbdev);
+		} else if (likely(test_bit(KBASE_GPU_SUPPORTS_FW_SLEEP_ON_IDLE,
+					   &kbdev->pm.backend.gpu_sleep_allowed)) &&
+			   (kbdev->pm.backend.mcu_state != KBASE_MCU_ON_PEND_SLEEP)) {
+			/* The firmware is going to sleep on its own but new
+			 * doorbells were rung before we manage to handle
+			 * the GLB_IDLE IRQ in the bottom half. We shall enable
+			 * DB notification to allow the DB to be handled by FW.
+			 */
+			dev_dbg(kbdev->dev, "Re-enabling MCU immediately following DB_MIRROR IRQ");
+			kbase_pm_enable_mcu_db_notification(kbdev);
+		}
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 	}
 #endif
@@ -169,10 +202,13 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
 		 * cores.
 		 */
 		if (kbdev->pm.backend.l2_always_on ||
-		    kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921))
+		    kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TTRX_921))
 			kbase_pm_power_changed(kbdev);
 	}
 
+	if (val & MCU_STATUS_GPU_IRQ)
+		wake_up_all(&kbdev->csf.event_wait);
+
 	KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, val);
 }
 KBASE_EXPORT_TEST_API(kbase_gpu_interrupt);
diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c
index f971b3b939df..4dd9a228aa11 100644
--- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c
+++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -28,6 +28,14 @@
 #include <mali_kbase_reset_gpu.h>
 #include <mmu/mali_kbase_mmu.h>
 
+bool kbase_is_gpu_removed(struct kbase_device *kbdev)
+{
+	if (!kbase_has_arbiter(kbdev))
+		return false;
+
+	return (KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(GPU_ID)) == 0);
+}
+
 /**
  * kbase_report_gpu_fault - Report a GPU fault.
  * @kbdev:    Kbase device pointer
@@ -95,7 +103,7 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
 		 * cores.
 		 */
 		if (kbdev->pm.backend.l2_always_on ||
-		    kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921))
+		    kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TTRX_921))
 			kbase_pm_power_changed(kbdev);
 	}
 
diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c
index ab46f858a542..8cdf26e28ac6 100644
--- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c
+++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -32,10 +32,6 @@
 #include <hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h>
 #include <backend/gpu/mali_kbase_model_linux.h>
 
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
-#include <arbiter/mali_kbase_arbiter_pm.h>
-#endif
-
 #include <mali_kbase.h>
 #include <backend/gpu/mali_kbase_irq_internal.h>
 #include <backend/gpu/mali_kbase_jm_internal.h>
@@ -217,16 +213,14 @@ static const struct kbase_device_init dev_init[] = {
 	{ kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" },
 #else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
 	{ kbase_get_irqs, NULL, "IRQ search failed" },
-#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
-#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
 	{ registers_map, registers_unmap, "Register map failed" },
-#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */
+#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
 #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
 	{ kbase_gpu_metrics_init, kbase_gpu_metrics_term, "GPU metrics initialization failed" },
 #endif /* IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) */
+	{ power_control_init, power_control_term, "Power control initialization failed" },
 	{ kbase_device_io_history_init, kbase_device_io_history_term,
 	  "Register access history initialization failed" },
-	{ kbase_device_pm_init, kbase_device_pm_term, "Power management initialization failed" },
 	{ kbase_device_early_init, kbase_device_early_term, "Early device initialization failed" },
 	{ kbase_backend_time_init, NULL, "Time backend initialization failed" },
 	{ kbase_device_misc_init, kbase_device_misc_term,
diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device.c b/drivers/gpu/arm/bifrost/device/mali_kbase_device.c
index b191c758c62f..e5bed33d1129 100644
--- a/drivers/gpu/arm/bifrost/device/mali_kbase_device.c
+++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -51,10 +51,7 @@
 #include "backend/gpu/mali_kbase_irq_internal.h"
 #include "mali_kbase_regs_history_debugfs.h"
 #include "mali_kbase_pbha.h"
-
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
 #include "arbiter/mali_kbase_arbiter_pm.h"
-#endif /* CONFIG_MALI_ARBITER_SUPPORT */
 
 #if defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
 
@@ -69,6 +66,22 @@ static DEFINE_MUTEX(kbase_dev_list_lock);
 static LIST_HEAD(kbase_dev_list);
 static unsigned int kbase_dev_nr;
 
+static unsigned int mma_wa_id;
+
+static int set_mma_wa_id(const char *val, const struct kernel_param *kp)
+{
+	return kbase_param_set_uint_minmax(val, kp, 1, 15);
+}
+
+static const struct kernel_param_ops mma_wa_id_ops = {
+	.set = set_mma_wa_id,
+	.get = param_get_uint,
+};
+
+module_param_cb(mma_wa_id, &mma_wa_id_ops, &mma_wa_id, 0444);
+__MODULE_PARM_TYPE(mma_wa_id, "uint");
+MODULE_PARM_DESC(mma_wa_id, "PBHA ID for MMA workaround. Valid range is from 1 to 15.");
+
 struct kbase_device *kbase_device_alloc(void)
 {
 	return vzalloc(sizeof(struct kbase_device));
@@ -320,6 +333,10 @@ int kbase_device_misc_init(struct kbase_device *const kbdev)
 	if (err)
 		goto dma_set_mask_failed;
 
+	/* Set mma_wa_id if it has been passed in as a module parameter */
+	if ((kbdev->gpu_props.gpu_id.arch_id >= GPU_ID_ARCH_MAKE(14, 8, 0)) && mma_wa_id != 0)
+		kbdev->mma_wa_id = mma_wa_id;
+
 	err = kbase_pbha_read_dtb(kbdev);
 	if (err)
 		goto term_as;
@@ -556,14 +573,27 @@ int kbase_device_early_init(struct kbase_device *kbdev)
 	/* Ensure we can access the GPU registers */
 	kbase_pm_register_access_enable(kbdev);
 
-	/* Initialize GPU_ID props */
-	kbase_gpuprops_parse_gpu_id(&kbdev->gpu_props.gpu_id, kbase_reg_get_gpu_id(kbdev));
-
-	/* Initialize register mapping LUTs */
-	err = kbase_regmap_init(kbdev);
-	if (err)
+	/*
+	 * If -EPERM is returned, it means the device backend is not supported, but
+	 * device initialization can continue.
+	 */
+	err = kbase_device_backend_init(kbdev);
+	if (err != 0 && err != -EPERM)
 		goto pm_runtime_term;
 
+	/*
+	 * Initialize register mapping LUTs. This would have been initialized on HW
+	 * Arbitration but not on PV or non-arbitration devices.
+	 */
+	if (!kbase_reg_is_init(kbdev)) {
+		/* Initialize GPU_ID props */
+		kbase_gpuprops_parse_gpu_id(&kbdev->gpu_props.gpu_id, kbase_reg_get_gpu_id(kbdev));
+
+		err = kbase_regmap_init(kbdev);
+		if (err)
+			goto backend_term;
+	}
+
 	/* Set the list of features available on the current HW
 	 * (identified by the GPU_ID register)
 	 */
@@ -572,7 +602,7 @@ int kbase_device_early_init(struct kbase_device *kbdev)
 	/* Find out GPU properties based on the GPU feature registers. */
 	err = kbase_gpuprops_init(kbdev);
 	if (err)
-		goto regmap_term;
+		goto backend_term;
 
 	/* Get the list of workarounds for issues on the current HW
 	 * (identified by the GPU_ID register and impl_tech in THREAD_FEATURES)
@@ -584,14 +614,12 @@ int kbase_device_early_init(struct kbase_device *kbdev)
 	/* We're done accessing the GPU registers for now. */
 	kbase_pm_register_access_disable(kbdev);
 
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
-	if (kbdev->arb.arb_if)
-		err = kbase_arbiter_pm_install_interrupts(kbdev);
-	else
+	if (kbase_has_arbiter(kbdev)) {
+		if (kbdev->pm.arb_vm_state)
+			err = kbase_arbiter_pm_install_interrupts(kbdev);
+	} else {
 		err = kbase_install_interrupts(kbdev);
-#else
-	err = kbase_install_interrupts(kbdev);
-#endif
+	}
 	if (err)
 		goto gpuprops_term;
 
@@ -599,9 +627,13 @@ int kbase_device_early_init(struct kbase_device *kbdev)
 
 gpuprops_term:
 	kbase_gpuprops_term(kbdev);
-regmap_term:
+backend_term:
+	kbase_device_backend_term(kbdev);
 	kbase_regmap_term(kbdev);
 pm_runtime_term:
+	if (kbdev->pm.backend.gpu_powered)
+		kbase_pm_register_access_disable(kbdev);
+
 	kbase_pm_runtime_term(kbdev);
 platform_device_term:
 	kbasep_platform_device_term(kbdev);
@@ -613,15 +645,13 @@ ktrace_term:
 
 void kbase_device_early_term(struct kbase_device *kbdev)
 {
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
-	if (kbdev->arb.arb_if)
+	if (kbase_has_arbiter(kbdev))
 		kbase_arbiter_pm_release_interrupts(kbdev);
 	else
 		kbase_release_interrupts(kbdev);
-#else
-	kbase_release_interrupts(kbdev);
-#endif /* CONFIG_MALI_ARBITER_SUPPORT */
 	kbase_gpuprops_term(kbdev);
+	kbase_device_backend_term(kbdev);
+	kbase_regmap_term(kbdev);
 	kbase_pm_runtime_term(kbdev);
 	kbasep_platform_device_term(kbdev);
 	kbase_ktrace_term(kbdev);
diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device.h b/drivers/gpu/arm/bifrost/device/mali_kbase_device.h
index 9cca6aff4554..1b15ff059194 100644
--- a/drivers/gpu/arm/bifrost/device/mali_kbase_device.h
+++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -58,6 +58,9 @@ void kbase_increment_device_id(void);
  * When a device file is opened for the first time,
  * load firmware and initialize hardware counter components.
  *
+ * It is safe for this function to be called multiple times without ill
+ * effects. Only the first call would be effective.
+ *
  * Return: 0 on success. An error code on failure.
  */
 int kbase_device_firmware_init_once(struct kbase_device *kbdev);
diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c b/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c
index da597af9c46e..91379ac6429d 100644
--- a/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c
+++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -27,14 +27,6 @@
 #include <mali_kbase_reset_gpu.h>
 #include <mmu/mali_kbase_mmu.h>
 
-bool kbase_is_gpu_removed(struct kbase_device *kbdev)
-{
-	if (!IS_ENABLED(CONFIG_MALI_ARBITER_SUPPORT))
-		return false;
-
-	return (kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_ID)) == 0);
-}
-
 /**
  * busy_wait_cache_operation - Wait for a pending cache flush to complete
  *
diff --git a/drivers/gpu/arm/bifrost/hw_access/backend/mali_kbase_hw_access_model_linux.c b/drivers/gpu/arm/bifrost/hw_access/backend/mali_kbase_hw_access_model_linux.c
index ca1ccbfb3dbe..9993b787ed21 100644
--- a/drivers/gpu/arm/bifrost/hw_access/backend/mali_kbase_hw_access_model_linux.c
+++ b/drivers/gpu/arm/bifrost/hw_access/backend/mali_kbase_hw_access_model_linux.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -46,7 +46,7 @@ u32 kbase_reg_read32(struct kbase_device *kbdev, u32 reg_enum)
 	u32 val = 0;
 	u32 offset;
 
-	if (WARN_ON(!kbdev->pm.backend.gpu_powered))
+	if (WARN_ON(!kbase_reg_is_powered_access_allowed(kbdev, reg_enum)))
 		return 0;
 	if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum,
 					      KBASE_REGMAP_PERM_READ | KBASE_REGMAP_WIDTH_32_BIT)))
@@ -68,7 +68,7 @@ u64 kbase_reg_read64(struct kbase_device *kbdev, u32 reg_enum)
 	u32 val32[2] = { 0 };
 	u32 offset;
 
-	if (WARN_ON(!kbdev->pm.backend.gpu_powered))
+	if (WARN_ON(!kbase_reg_is_powered_access_allowed(kbdev, reg_enum)))
 		return 0;
 	if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum,
 					      KBASE_REGMAP_PERM_READ | KBASE_REGMAP_WIDTH_64_BIT)))
@@ -91,7 +91,7 @@ u64 kbase_reg_read64_coherent(struct kbase_device *kbdev, u32 reg_enum)
 	u32 hi1 = 0, hi2 = 0, lo = 0;
 	u32 offset;
 
-	if (WARN_ON(!kbdev->pm.backend.gpu_powered))
+	if (WARN_ON(!kbase_reg_is_powered_access_allowed(kbdev, reg_enum)))
 		return 0;
 	if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum,
 					      KBASE_REGMAP_PERM_READ | KBASE_REGMAP_WIDTH_64_BIT)))
@@ -116,7 +116,7 @@ void kbase_reg_write32(struct kbase_device *kbdev, u32 reg_enum, u32 value)
 	unsigned long flags;
 	u32 offset;
 
-	if (WARN_ON(!kbdev->pm.backend.gpu_powered))
+	if (WARN_ON(!kbase_reg_is_powered_access_allowed(kbdev, reg_enum)))
 		return;
 	if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum,
 					      KBASE_REGMAP_PERM_WRITE | KBASE_REGMAP_WIDTH_32_BIT)))
@@ -135,7 +135,7 @@ void kbase_reg_write64(struct kbase_device *kbdev, u32 reg_enum, u64 value)
 	unsigned long flags;
 	u32 offset;
 
-	if (WARN_ON(!kbdev->pm.backend.gpu_powered))
+	if (WARN_ON(!kbase_reg_is_powered_access_allowed(kbdev, reg_enum)))
 		return;
 	if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum,
 					      KBASE_REGMAP_PERM_WRITE | KBASE_REGMAP_WIDTH_64_BIT)))
diff --git a/drivers/gpu/arm/bifrost/hw_access/backend/mali_kbase_hw_access_real_hw.c b/drivers/gpu/arm/bifrost/hw_access/backend/mali_kbase_hw_access_real_hw.c
index f4afbf55e312..ecf58cb45d15 100644
--- a/drivers/gpu/arm/bifrost/hw_access/backend/mali_kbase_hw_access_real_hw.c
+++ b/drivers/gpu/arm/bifrost/hw_access/backend/mali_kbase_hw_access_real_hw.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -24,12 +24,13 @@
 
 #include <mali_kbase.h>
 #include <hw_access/mali_kbase_hw_access.h>
+#include <linux/mali_hw_access.h>
 
 u64 kbase_reg_get_gpu_id(struct kbase_device *kbdev)
 {
 	u32 val[2] = { 0 };
 
-	val[0] = readl(kbdev->reg);
+	val[0] = mali_readl(kbdev->reg);
 
 
 	return (u64)val[0] | ((u64)val[1] << 32);
@@ -39,13 +40,13 @@ u32 kbase_reg_read32(struct kbase_device *kbdev, u32 reg_enum)
 {
 	u32 val;
 
-	if (WARN_ON(!kbdev->pm.backend.gpu_powered))
+	if (WARN_ON(!kbase_reg_is_powered_access_allowed(kbdev, reg_enum)))
 		return 0;
 	if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum,
 					      KBASE_REGMAP_PERM_READ | KBASE_REGMAP_WIDTH_32_BIT)))
 		return 0;
 
-	val = readl(kbdev->regmap.regs[reg_enum]);
+	val = mali_readl(kbdev->regmap.regs[reg_enum]);
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 	if (unlikely(kbdev->io_history.enabled))
@@ -63,14 +64,13 @@ u64 kbase_reg_read64(struct kbase_device *kbdev, u32 reg_enum)
 {
 	u64 val;
 
-	if (WARN_ON(!kbdev->pm.backend.gpu_powered))
+	if (WARN_ON(!kbase_reg_is_powered_access_allowed(kbdev, reg_enum)))
 		return 0;
 	if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum,
 					      KBASE_REGMAP_PERM_READ | KBASE_REGMAP_WIDTH_64_BIT)))
 		return 0;
 
-	val = (u64)readl(kbdev->regmap.regs[reg_enum]) |
-	      ((u64)readl(kbdev->regmap.regs[reg_enum] + 4) << 32);
+	val = mali_readq(kbdev->regmap.regs[reg_enum]);
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 	if (unlikely(kbdev->io_history.enabled)) {
@@ -90,23 +90,14 @@ KBASE_EXPORT_TEST_API(kbase_reg_read64);
 u64 kbase_reg_read64_coherent(struct kbase_device *kbdev, u32 reg_enum)
 {
 	u64 val;
-#if !IS_ENABLED(CONFIG_MALI_64BIT_HW_ACCESS)
-	u32 hi1, hi2, lo;
-#endif
 
-	if (WARN_ON(!kbdev->pm.backend.gpu_powered))
+	if (WARN_ON(!kbase_reg_is_powered_access_allowed(kbdev, reg_enum)))
 		return 0;
 	if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum,
 					      KBASE_REGMAP_PERM_READ | KBASE_REGMAP_WIDTH_64_BIT)))
 		return 0;
 
-	do {
-		hi1 = readl(kbdev->regmap.regs[reg_enum] + 4);
-		lo = readl(kbdev->regmap.regs[reg_enum]);
-		hi2 = readl(kbdev->regmap.regs[reg_enum] + 4);
-	} while (hi1 != hi2);
-
-	val = lo | (((u64)hi1) << 32);
+	val = mali_readq_coherent(kbdev->regmap.regs[reg_enum]);
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 	if (unlikely(kbdev->io_history.enabled)) {
@@ -125,13 +116,13 @@ KBASE_EXPORT_TEST_API(kbase_reg_read64_coherent);
 
 void kbase_reg_write32(struct kbase_device *kbdev, u32 reg_enum, u32 value)
 {
-	if (WARN_ON(!kbdev->pm.backend.gpu_powered))
+	if (WARN_ON(!kbase_reg_is_powered_access_allowed(kbdev, reg_enum)))
 		return;
 	if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum,
 					      KBASE_REGMAP_PERM_WRITE | KBASE_REGMAP_WIDTH_32_BIT)))
 		return;
 
-	writel(value, kbdev->regmap.regs[reg_enum]);
+	mali_writel(value, kbdev->regmap.regs[reg_enum]);
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 	if (unlikely(kbdev->io_history.enabled))
@@ -145,14 +136,13 @@ KBASE_EXPORT_TEST_API(kbase_reg_write32);
 
 void kbase_reg_write64(struct kbase_device *kbdev, u32 reg_enum, u64 value)
 {
-	if (WARN_ON(!kbdev->pm.backend.gpu_powered))
+	if (WARN_ON(!kbase_reg_is_powered_access_allowed(kbdev, reg_enum)))
 		return;
 	if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum,
 					      KBASE_REGMAP_PERM_WRITE | KBASE_REGMAP_WIDTH_64_BIT)))
 		return;
 
-	writel(value & 0xFFFFFFFF, kbdev->regmap.regs[reg_enum]);
-	writel(value >> 32, kbdev->regmap.regs[reg_enum] + 4);
+	mali_writeq(value, kbdev->regmap.regs[reg_enum]);
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 	if (unlikely(kbdev->io_history.enabled)) {
diff --git a/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access.c b/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access.c
index 16a27c780d3b..d7dd6200d497 100644
--- a/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access.c
+++ b/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -24,9 +24,56 @@
 
 #include <mali_kbase.h>
 #include "mali_kbase_hw_access.h"
+#include "mali_kbase_hw_access_regmap.h"
 
 #include <uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h>
 
+#define KBASE_REGMAP_ACCESS_ALWAYS_POWERED (1U << 16)
+
+static u32 always_powered_regs[] = {
+
+#if !MALI_USE_CSF
+	PTM_AW_IRQ_CLEAR,
+	PTM_AW_IRQ_INJECTION,
+	PTM_AW_IRQ_MASK,
+	PTM_AW_IRQ_RAWSTAT,
+	PTM_AW_IRQ_STATUS,
+	PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE0,
+	PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE1,
+	PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE0,
+	PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE1,
+	PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE_STATUS,
+	PTM_ID,
+#endif /* !MALI_USE_CSF */
+};
+
+static void kbasep_reg_setup_always_powered_registers(struct kbase_device *kbdev)
+{
+	u32 i;
+
+
+#if !MALI_USE_CSF
+	if (kbdev->gpu_props.gpu_id.arch_id < GPU_ID_ARCH_MAKE(9, 14, 0))
+		return;
+#endif /* MALI_USE_CSF */
+
+	for (i = 0; i < ARRAY_SIZE(always_powered_regs); i++) {
+		u32 reg_enum = always_powered_regs[i];
+
+		if (!kbase_reg_is_valid(kbdev, reg_enum))
+			continue;
+
+		kbdev->regmap.flags[reg_enum] |= KBASE_REGMAP_ACCESS_ALWAYS_POWERED;
+	}
+}
+
+bool kbase_reg_is_powered_access_allowed(struct kbase_device *kbdev, u32 reg_enum)
+{
+	if (kbdev->regmap.flags[reg_enum] & KBASE_REGMAP_ACCESS_ALWAYS_POWERED)
+		return true;
+	return kbdev->pm.backend.gpu_powered;
+}
+
 bool kbase_reg_is_size64(struct kbase_device *kbdev, u32 reg_enum)
 {
 	if (WARN_ON(reg_enum >= kbdev->regmap.size))
@@ -67,6 +114,11 @@ bool kbase_reg_is_accessible(struct kbase_device *kbdev, u32 reg_enum, u32 flags
 	return true;
 }
 
+bool kbase_reg_is_init(struct kbase_device *kbdev)
+{
+	return (kbdev->regmap.regs != NULL) && (kbdev->regmap.flags != NULL);
+}
+
 int kbase_reg_get_offset(struct kbase_device *kbdev, u32 reg_enum, u32 *offset)
 {
 	if (unlikely(!kbase_reg_is_accessible(kbdev, reg_enum, 0)))
@@ -108,12 +160,12 @@ int kbase_regmap_init(struct kbase_device *kbdev)
 		return -ENOMEM;
 	}
 
+	kbasep_reg_setup_always_powered_registers(kbdev);
+
 	dev_info(kbdev->dev, "Register LUT %08x initialized for GPU arch 0x%08x\n", lut_arch_id,
 		 kbdev->gpu_props.gpu_id.arch_id);
 
-#if IS_ENABLED(CONFIG_MALI_64BIT_HW_ACCESS) && IS_ENABLED(CONFIG_MALI_REAL_HW)
-	dev_info(kbdev->dev, "64-bit HW access enabled\n");
-#endif
+
 	return 0;
 }
 
diff --git a/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access.h b/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access.h
index 40356596163d..654fb685fa06 100644
--- a/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access.h
+++ b/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -128,6 +128,25 @@ bool kbase_reg_is_valid(struct kbase_device *kbdev, u32 reg_enum);
  */
 bool kbase_reg_is_accessible(struct kbase_device *kbdev, u32 reg_enum, u32 flags);
 
+/**
+ * kbase_reg_is_powered_access_allowed - check if registered is accessible given
+ * current power state
+ *
+ * @kbdev:    Kbase device pointer
+ * @reg_enum: Register enum
+ *
+ * Return: boolean if register is accessible
+ */
+bool kbase_reg_is_powered_access_allowed(struct kbase_device *kbdev, u32 reg_enum);
+
+/**
+ * kbase_reg_is_init - check if regmap is initialized
+ *
+ * @kbdev:     Kbase device pointer
+ * Return:     boolean if regmap is initialized
+ */
+bool kbase_reg_is_init(struct kbase_device *kbdev);
+
 /**
  * kbase_reg_get_offset - get register offset from enum
  * @kbdev:    Kbase device pointer
diff --git a/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access_regmap.h b/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access_regmap.h
index 97adb1322a35..591391c6a8a1 100644
--- a/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access_regmap.h
+++ b/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access_regmap.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -308,6 +308,16 @@
 #define TC_CLOCK_GATE_OVERRIDE (1ul << 0)
 /* End TILER_CONFIG register */
 
+/* L2_FEATURES register */
+#define L2_FEATURES_CACHE_SIZE_SHIFT GPU_U(16)
+#define L2_FEATURES_CACHE_SIZE_MASK (GPU_U(0xFF) << L2_FEATURES_CACHE_SIZE_SHIFT)
+#define L2_FEATURES_CACHE_SIZE_GET(reg_val) \
+	(((reg_val)&L2_FEATURES_CACHE_SIZE_MASK) >> L2_FEATURES_CACHE_SIZE_SHIFT)
+#define L2_FEATURES_CACHE_SIZE_SET(reg_val, value)     \
+	(~(~(reg_val) | L2_FEATURES_CACHE_SIZE_MASK) | \
+	 (((value) << L2_FEATURES_CACHE_SIZE_SHIFT) & L2_FEATURES_CACHE_SIZE_MASK))
+/* End L2_FEATURES register */
+
 /* L2_CONFIG register */
 #define L2_CONFIG_SIZE_SHIFT 16
 #define L2_CONFIG_SIZE_MASK (0xFFul << L2_CONFIG_SIZE_SHIFT)
diff --git a/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access_regmap_legacy.h b/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access_regmap_legacy.h
index a62d1707ebb7..9392d44f684b 100644
--- a/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access_regmap_legacy.h
+++ b/drivers/gpu/arm/bifrost/hw_access/mali_kbase_hw_access_regmap_legacy.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
diff --git a/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_csf_macros.h b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_csf_macros.h
index c3d12ad04c4e..e0568d8f8c6d 100644
--- a/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_csf_macros.h
+++ b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_csf_macros.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -185,6 +185,7 @@
  */
 #define AS_MEMATTR_ATTRIBUTE0_MEMORY_TYPE_SHARED 0x0
 
+
 /* CSF_CONFIG register */
 #define CSF_CONFIG_FORCE_COHERENCY_FEATURES_SHIFT 2
 
diff --git a/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm.c b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm.c
index 178d45501916..4f41693ff3c2 100644
--- a/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm.c
+++ b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -2240,6 +2240,56 @@ static void kbase_regmap_v9_2_init(struct kbase_device *kbdev)
 	kbdev->regmap.regs[GPU_CONTROL__L2_CONFIG] = kbdev->reg + 0x48;
 }
 
+static void kbase_regmap_v9_14_init(struct kbase_device *kbdev)
+{
+	if (kbdev->regmap.regs == NULL && kbdev->regmap.flags == NULL) {
+		kbdev->regmap.size = NR_V9_14_REGS;
+		kbdev->regmap.regs =
+			kcalloc(kbdev->regmap.size, sizeof(void __iomem *), GFP_KERNEL);
+		kbdev->regmap.flags = kcalloc(kbdev->regmap.size, sizeof(u32), GFP_KERNEL);
+	}
+
+	if (WARN_ON(kbdev->regmap.regs == NULL))
+		return;
+	if (WARN_ON(kbdev->regmap.flags == NULL))
+		return;
+
+	kbase_regmap_v9_2_init(kbdev);
+
+	kbdev->regmap.flags[PTM_AW_IRQ_CLEAR] = KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ |
+						KBASE_REGMAP_PERM_WRITE;
+	kbdev->regmap.flags[PTM_AW_IRQ_INJECTION] =
+		KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE;
+	kbdev->regmap.flags[PTM_AW_IRQ_MASK] = KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ |
+					       KBASE_REGMAP_PERM_WRITE;
+	kbdev->regmap.flags[PTM_AW_IRQ_RAWSTAT] = KBASE_REGMAP_WIDTH_32_BIT |
+						  KBASE_REGMAP_PERM_READ;
+	kbdev->regmap.flags[PTM_AW_IRQ_STATUS] = KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ;
+	kbdev->regmap.flags[PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE0] = KBASE_REGMAP_WIDTH_32_BIT |
+								     KBASE_REGMAP_PERM_READ;
+	kbdev->regmap.flags[PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE1] = KBASE_REGMAP_WIDTH_32_BIT |
+								     KBASE_REGMAP_PERM_READ;
+	kbdev->regmap.flags[PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE0] =
+		KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE;
+	kbdev->regmap.flags[PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE1] =
+		KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ | KBASE_REGMAP_PERM_WRITE;
+	kbdev->regmap.flags[PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE_STATUS] =
+		KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ;
+	kbdev->regmap.flags[PTM_ID] = KBASE_REGMAP_WIDTH_32_BIT | KBASE_REGMAP_PERM_READ;
+
+	kbdev->regmap.regs[PTM_AW_IRQ_CLEAR] = kbdev->reg + 0x1ffc8;
+	kbdev->regmap.regs[PTM_AW_IRQ_INJECTION] = kbdev->reg + 0x1ffd4;
+	kbdev->regmap.regs[PTM_AW_IRQ_MASK] = kbdev->reg + 0x1ffcc;
+	kbdev->regmap.regs[PTM_AW_IRQ_RAWSTAT] = kbdev->reg + 0x1ffc4;
+	kbdev->regmap.regs[PTM_AW_IRQ_STATUS] = kbdev->reg + 0x1ffd0;
+	kbdev->regmap.regs[PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE0] = kbdev->reg + 0x1ffd8;
+	kbdev->regmap.regs[PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE1] = kbdev->reg + 0x1ffdc;
+	kbdev->regmap.regs[PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE0] = kbdev->reg + 0x1ffe4;
+	kbdev->regmap.regs[PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE1] = kbdev->reg + 0x1ffe8;
+	kbdev->regmap.regs[PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE_STATUS] = kbdev->reg + 0x1ffe0;
+	kbdev->regmap.regs[PTM_ID] = kbdev->reg + 0x1ffc0;
+}
+
 u32 kbase_regmap_backend_init(struct kbase_device *kbdev)
 {
 	int i = 0;
@@ -2254,6 +2304,7 @@ u32 kbase_regmap_backend_init(struct kbase_device *kbdev)
 		{ GPU_ID_ARCH_MAKE(7, 2, 0), kbase_regmap_v7_2_init },
 		{ GPU_ID_ARCH_MAKE(9, 0, 0), kbase_regmap_v9_0_init },
 		{ GPU_ID_ARCH_MAKE(9, 2, 0), kbase_regmap_v9_2_init },
+		{ GPU_ID_ARCH_MAKE(9, 14, 0), kbase_regmap_v9_14_init },
 	};
 
 	for (i = 0; i < ARRAY_SIZE(init_array) - 1; i++) {
@@ -2967,6 +3018,18 @@ static char *enum_strings[] = {
 	[GPU_CONTROL__CORE_FEATURES] = "GPU_CONTROL__CORE_FEATURES",
 	[GPU_CONTROL__THREAD_TLS_ALLOC] = "GPU_CONTROL__THREAD_TLS_ALLOC",
 	[GPU_CONTROL__L2_CONFIG] = "GPU_CONTROL__L2_CONFIG",
+	[PTM_AW_IRQ_CLEAR] = "PTM_AW_IRQ_CLEAR",
+	[PTM_AW_IRQ_INJECTION] = "PTM_AW_IRQ_INJECTION",
+	[PTM_AW_IRQ_MASK] = "PTM_AW_IRQ_MASK",
+	[PTM_AW_IRQ_RAWSTAT] = "PTM_AW_IRQ_RAWSTAT",
+	[PTM_AW_IRQ_STATUS] = "PTM_AW_IRQ_STATUS",
+	[PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE0] = "PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE0",
+	[PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE1] = "PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE1",
+	[PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE0] = "PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE0",
+	[PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE1] = "PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE1",
+	[PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE_STATUS] =
+		"PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE_STATUS",
+	[PTM_ID] = "PTM_ID",
 };
 
 const char *kbase_reg_get_enum_string(u32 reg_enum)
diff --git a/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm_enums.h b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm_enums.h
index f5618c4794db..59d8745eaf4a 100644
--- a/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm_enums.h
+++ b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm_enums.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -759,4 +759,19 @@ enum kbase_regmap_enum_v9_2 {
 	NR_V9_2_REGS,
 };
 
+enum kbase_regmap_enum_v9_14 {
+	PTM_AW_IRQ_CLEAR = NR_V9_2_REGS, /* (RW) 32-bit 0x1FFC8 */
+	PTM_AW_IRQ_INJECTION, /* (RW) 32-bit 0x1FFD4 */
+	PTM_AW_IRQ_MASK, /* (RW) 32-bit 0x1FFCC */
+	PTM_AW_IRQ_RAWSTAT, /* (RO) 32-bit 0x1FFC4 */
+	PTM_AW_IRQ_STATUS, /* (RO) 32-bit 0x1FFD0 */
+	PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE0, /* (RO) 32-bit 0x1FFD8 */
+	PTM_AW_MESSAGE__PTM_INCOMING_MESSAGE1, /* (RO) 32-bit 0x1FFDC */
+	PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE0, /* (RW) 32-bit 0x1FFE4 */
+	PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE1, /* (RW) 32-bit 0x1FFE8 */
+	PTM_AW_MESSAGE__PTM_OUTGOING_MESSAGE_STATUS, /* (RO) 32-bit 0x1FFE0 */
+	PTM_ID, /* (RO) 32-bit 0x1FFC0 */
+	NR_V9_14_REGS,
+};
+
 #endif /* _MALI_KBASE_REGMAP_JM_ENUMS_H_ */
diff --git a/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm_macros.h b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm_macros.h
index c3bc0f3e9924..650ed9b31eea 100644
--- a/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm_macros.h
+++ b/drivers/gpu/arm/bifrost/hw_access/regmap/mali_kbase_regmap_jm_macros.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -47,6 +47,8 @@
 #define MMU_AS_OFFSET(n, regname) ENUM_OFFSET(n, MMU_AS_ENUM(0, regname), MMU_AS_ENUM(1, regname))
 #define MMU_AS_BASE_OFFSET(n) MMU_AS_OFFSET(n, TRANSTAB)
 
+#define PTM_AW_MESSAGE_ENUM(regname) PTM_AW_MESSAGE__##regname
+
 /* register value macros */
 /* GPU_STATUS values */
 #define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */
@@ -295,4 +297,11 @@
 	(GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED | POWER_CHANGED_ALL | \
 	 PRFCNT_SAMPLE_COMPLETED)
 
+#define WINDOW_IRQ_MESSAGE (1U << 0)
+#define WINDOW_IRQ_INVALID_ACCESS (1U << 1)
+#define WINDOW_IRQ_GPU (1U << 2)
+#define WINDOW_IRQ_JOB (1U << 3)
+#define WINDOW_IRQ_MMU (1U << 4)
+#define WINDOW_IRQ_EVENT (1U << 5)
+
 #endif /* _MALI_KBASE_REGMAP_JM_MACROS_H_ */
diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend.h b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend.h
index cc3ba98ab6fe..a6d418b8e82c 100644
--- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend.h
+++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -78,6 +78,18 @@ typedef int kbase_hwcnt_backend_init_fn(const struct kbase_hwcnt_backend_info *i
  */
 typedef void kbase_hwcnt_backend_term_fn(struct kbase_hwcnt_backend *backend);
 
+/**
+ * typedef kbase_hwcnt_backend_acquire_fn - Enable counter collection.
+ * @backend: Non-NULL pointer to backend interface.
+ */
+typedef void kbase_hwcnt_backend_acquire_fn(const struct kbase_hwcnt_backend *backend);
+
+/**
+ * typedef kbase_hwcnt_backend_release_fn - Disable counter collection.
+ * @backend: Non-NULL pointer to backend interface.
+ */
+typedef void kbase_hwcnt_backend_release_fn(const struct kbase_hwcnt_backend *backend);
+
 /**
  * typedef kbase_hwcnt_backend_timestamp_ns_fn - Get the current backend
  *                                               timestamp.
@@ -206,6 +218,10 @@ typedef int kbase_hwcnt_backend_dump_get_fn(struct kbase_hwcnt_backend *backend,
  *                      metadata.
  * @init:               Function ptr to initialise an instance of the backend.
  * @term:               Function ptr to terminate an instance of the backend.
+ * @acquire:            Callback to indicate that counter collection has
+ *                      been enabled.
+ * @release:            Callback to indicate that counter collection has
+ *                      been disabled.
  * @timestamp_ns:       Function ptr to get the current backend timestamp.
  * @dump_enable:        Function ptr to enable dumping.
  * @dump_enable_nolock: Function ptr to enable dumping while the
@@ -222,6 +238,8 @@ struct kbase_hwcnt_backend_interface {
 	kbase_hwcnt_backend_metadata_fn *metadata;
 	kbase_hwcnt_backend_init_fn *init;
 	kbase_hwcnt_backend_term_fn *term;
+	kbase_hwcnt_backend_acquire_fn *acquire;
+	kbase_hwcnt_backend_release_fn *release;
 	kbase_hwcnt_backend_timestamp_ns_fn *timestamp_ns;
 	kbase_hwcnt_backend_dump_enable_fn *dump_enable;
 	kbase_hwcnt_backend_dump_enable_nolock_fn *dump_enable_nolock;
diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c
index d605253752ca..b937c047a94a 100644
--- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c
+++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -21,7 +21,6 @@
 
 #include "hwcnt/backend/mali_kbase_hwcnt_backend_csf.h"
 #include "hwcnt/mali_kbase_hwcnt_gpu.h"
-#include "hwcnt/mali_kbase_hwcnt_types.h"
 
 #include <linux/log2.h>
 #include <linux/kernel.h>
@@ -31,6 +30,7 @@
 #include <linux/wait.h>
 #include <linux/workqueue.h>
 #include <linux/completion.h>
+#include <linux/version_compat_defs.h>
 
 #ifndef BASE_MAX_NR_CLOCKS_REGULATORS
 #define BASE_MAX_NR_CLOCKS_REGULATORS 4
@@ -255,7 +255,8 @@ struct kbase_hwcnt_csf_physical_layout {
  * @hwc_threshold_work:         Worker for consuming available samples when
  *                              threshold interrupt raised.
  * @num_l2_slices:              Current number of L2 slices allocated to the GPU.
- * @shader_present_bitmap:      Current shader-present bitmap that is allocated to the GPU.
+ * @powered_shader_core_mask:   The common mask between the debug_core_mask
+ *                              and the shader_present_bitmap.
  */
 struct kbase_hwcnt_backend_csf {
 	struct kbase_hwcnt_backend_csf_info *info;
@@ -283,7 +284,7 @@ struct kbase_hwcnt_backend_csf {
 	struct work_struct hwc_dump_work;
 	struct work_struct hwc_threshold_work;
 	size_t num_l2_slices;
-	u64 shader_present_bitmap;
+	u64 powered_shader_core_mask;
 };
 
 static bool kbasep_hwcnt_backend_csf_backend_exists(struct kbase_hwcnt_backend_csf_info *csf_info)
@@ -296,9 +297,11 @@ static bool kbasep_hwcnt_backend_csf_backend_exists(struct kbase_hwcnt_backend_c
 }
 
 void kbase_hwcnt_backend_csf_set_hw_availability(struct kbase_hwcnt_backend_interface *iface,
-						 size_t num_l2_slices, u64 shader_present_bitmap)
+						 size_t num_l2_slices, u64 shader_present,
+						 u64 power_core_mask)
 {
 	struct kbase_hwcnt_backend_csf_info *csf_info;
+	u64 norm_shader_present = power_core_mask & shader_present;
 
 	if (!iface)
 		return;
@@ -309,16 +312,17 @@ void kbase_hwcnt_backend_csf_set_hw_availability(struct kbase_hwcnt_backend_inte
 	if (!csf_info || !csf_info->backend)
 		return;
 
+
 	if (WARN_ON(csf_info->backend->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED))
 		return;
 
 	if (WARN_ON(num_l2_slices > csf_info->backend->phys_layout.mmu_l2_cnt) ||
-	    WARN_ON((shader_present_bitmap & csf_info->backend->phys_layout.shader_avail_mask) !=
-		    shader_present_bitmap))
+	    WARN_ON((norm_shader_present & csf_info->backend->phys_layout.shader_avail_mask) !=
+		    norm_shader_present))
 		return;
 
 	csf_info->backend->num_l2_slices = num_l2_slices;
-	csf_info->backend->shader_present_bitmap = shader_present_bitmap;
+	csf_info->backend->powered_shader_core_mask = norm_shader_present;
 }
 
 /**
@@ -424,7 +428,7 @@ static void kbasep_hwcnt_backend_csf_init_layout(
 	WARN_ON(!prfcnt_info);
 	WARN_ON(!phys_layout);
 
-	shader_core_cnt = (size_t)fls64(prfcnt_info->core_mask);
+	shader_core_cnt = (size_t)fls64(prfcnt_info->sc_core_mask);
 	values_per_block = prfcnt_info->prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES;
 	fw_block_cnt = div_u64(prfcnt_info->prfcnt_fw_size, prfcnt_info->prfcnt_block_size);
 	hw_block_cnt = div_u64(prfcnt_info->prfcnt_hw_size, prfcnt_info->prfcnt_block_size);
@@ -445,7 +449,7 @@ static void kbasep_hwcnt_backend_csf_init_layout(
 		.fw_block_cnt = fw_block_cnt,
 		.hw_block_cnt = hw_block_cnt,
 		.block_cnt = fw_block_cnt + hw_block_cnt,
-		.shader_avail_mask = prfcnt_info->core_mask,
+		.shader_avail_mask = prfcnt_info->sc_core_mask,
 		.headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
 		.values_per_block = values_per_block,
 		.counters_per_block = values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
@@ -454,17 +458,20 @@ static void kbasep_hwcnt_backend_csf_init_layout(
 }
 
 static void
-kbasep_hwcnt_backend_csf_reset_internal_buffers(struct kbase_hwcnt_backend_csf *backend_csf)
+kbasep_hwcnt_backend_csf_reset_internal_buffers(struct kbase_hwcnt_backend_csf *backend_csf,
+						bool user_bufs)
 {
 	size_t user_buf_bytes = backend_csf->info->metadata->dump_buf_bytes;
 	size_t block_state_bytes = backend_csf->phys_layout.block_cnt *
 				   KBASE_HWCNT_BLOCK_STATE_BYTES * KBASE_HWCNT_BLOCK_STATE_STRIDE;
 
-	memset(backend_csf->to_user_buf, 0, user_buf_bytes);
 	memset(backend_csf->accum_buf, 0, user_buf_bytes);
 	memset(backend_csf->old_sample_buf, 0, backend_csf->info->prfcnt_info.dump_bytes);
 	memset(backend_csf->block_states, 0, block_state_bytes);
-	memset(backend_csf->to_user_block_states, 0, block_state_bytes);
+	if (user_bufs) {
+		memset(backend_csf->to_user_buf, 0, user_buf_bytes);
+		memset(backend_csf->to_user_block_states, 0, block_state_bytes);
+	}
 }
 
 static void
@@ -517,34 +524,21 @@ static void kbasep_hwcnt_backend_csf_update_user_sample(struct kbase_hwcnt_backe
 	memset(backend_csf->block_states, 0, block_state_bytes);
 }
 
-/**
- * kbasep_hwcnt_backend_csf_update_block_state - Update block state of a block instance with
- *						   information from a sample.
- * @phys_layout:                Physical memory layout information of HWC
- *                              sample buffer.
- * @enable_mask:                Counter enable mask for the block whose state is being updated.
- * @enable_state:               The CSF backend internal enabled state.
- * @exiting_protm:              Whether or not the sample is taken when the GPU is exiting
- *                              protected mode.
- * @block_idx:                  Index of block within the ringbuffer.
- * @block_state:                Pointer to existing block state of the block whose state is being
- *                              updated.
- * @fw_in_protected_mode:       Whether or not GPU is in protected mode during sampling.
- */
-static void kbasep_hwcnt_backend_csf_update_block_state(
-	const struct kbase_hwcnt_csf_physical_layout *phys_layout, const u32 enable_mask,
-	enum kbase_hwcnt_backend_csf_enable_state enable_state, bool exiting_protm,
-	size_t block_idx, blk_stt_t *const block_state, bool fw_in_protected_mode)
+void kbasep_hwcnt_backend_csf_update_block_state(struct kbase_hwcnt_backend_csf *backend,
+						 const u32 enable_mask, bool exiting_protm,
+						 size_t block_idx, blk_stt_t *const block_state,
+						 bool fw_in_protected_mode)
 {
+	const struct kbase_hwcnt_csf_physical_layout *phys_layout = &backend->phys_layout;
 	/* Offset of shader core blocks from the start of the HW blocks in the sample */
 	size_t shader_core_block_offset =
-		(size_t)(phys_layout->hw_block_cnt - phys_layout->shader_cnt);
+		(size_t)(phys_layout->block_cnt - phys_layout->shader_cnt);
 	bool is_shader_core_block;
 
-	is_shader_core_block = block_idx >= shader_core_block_offset;
+	is_shader_core_block = (block_idx >= shader_core_block_offset);
 
 	/* Set power bits for the block state for the block, for the sample */
-	switch (enable_state) {
+	switch (backend->enable_state) {
 	/* Disabled states */
 	case KBASE_HWCNT_BACKEND_CSF_DISABLED:
 	case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED:
@@ -592,21 +586,45 @@ static void kbasep_hwcnt_backend_csf_update_block_state(
 								    KBASE_HWCNT_STATE_NORMAL);
 	else
 		kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_NORMAL);
+
+	/* powered_shader_core_mask stored in the backend is a combination of
+	 * the shader present and the debug core mask, so explicit checking of the
+	 * core mask is not required here.
+	 */
+	if (is_shader_core_block) {
+		u64 current_shader_core = 1ULL << (block_idx - shader_core_block_offset);
+
+		WARN_ON_ONCE(backend->phys_layout.shader_cnt > 64);
+
+		if (current_shader_core & backend->info->backend->powered_shader_core_mask)
+			kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_AVAILABLE);
+		else if (current_shader_core & ~backend->info->backend->powered_shader_core_mask)
+			kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_UNAVAILABLE);
+		else
+			WARN_ON_ONCE(true);
+	}
+	else
+		kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_AVAILABLE);
 }
 
-static void kbasep_hwcnt_backend_csf_accumulate_sample(
-	const struct kbase_hwcnt_csf_physical_layout *phys_layout, size_t dump_bytes,
-	u64 *accum_buf, const u32 *old_sample_buf, const u32 *new_sample_buf,
-	blk_stt_t *const block_states, bool clearing_samples,
-	enum kbase_hwcnt_backend_csf_enable_state enable_state, bool fw_in_protected_mode)
+static void kbasep_hwcnt_backend_csf_accumulate_sample(struct kbase_hwcnt_backend_csf *backend,
+						       const u32 *old_sample_buf,
+						       const u32 *new_sample_buf)
 {
+	const struct kbase_hwcnt_csf_physical_layout *phys_layout = &backend->phys_layout;
+	const size_t dump_bytes = backend->info->prfcnt_info.dump_bytes;
+	const size_t values_per_block = phys_layout->values_per_block;
+	blk_stt_t *const block_states = backend->block_states;
+	const bool fw_in_protected_mode = backend->info->fw_in_protected_mode;
+	const bool clearing_samples = backend->info->prfcnt_info.clearing_samples;
+	u64 *accum_buf = backend->accum_buf;
+
 	size_t block_idx;
 	const u32 *old_block = old_sample_buf;
 	const u32 *new_block = new_sample_buf;
 	u64 *acc_block = accum_buf;
 	/* Flag to indicate whether current sample is exiting protected mode. */
 	bool exiting_protm = false;
-	const size_t values_per_block = phys_layout->values_per_block;
 
 	/* The block pointers now point to the first HW block, which is always a CSHW/front-end
 	 * block. The counter enable mask for this block can be checked to determine whether this
@@ -620,9 +638,8 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample(
 		const u32 old_enable_mask = old_block[phys_layout->enable_mask_offset];
 		const u32 new_enable_mask = new_block[phys_layout->enable_mask_offset];
 		/* Update block state with information of the current sample */
-		kbasep_hwcnt_backend_csf_update_block_state(phys_layout, new_enable_mask,
-							    enable_state, exiting_protm, block_idx,
-							    &block_states[block_idx],
+		kbasep_hwcnt_backend_csf_update_block_state(backend, new_enable_mask, exiting_protm,
+							    block_idx, &block_states[block_idx],
 							    fw_in_protected_mode);
 
 		if (!(new_enable_mask & HWCNT_BLOCK_EMPTY_SAMPLE)) {
@@ -706,7 +723,6 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backe
 	u8 *cpu_dump_base = (u8 *)backend_csf->ring_buf_cpu_base;
 	const size_t ring_buf_cnt = backend_csf->info->ring_buf_cnt;
 	const size_t buf_dump_bytes = backend_csf->info->prfcnt_info.dump_bytes;
-	bool clearing_samples = backend_csf->info->prfcnt_info.clearing_samples;
 	u32 *old_sample_buf = backend_csf->old_sample_buf;
 	u32 *new_sample_buf = old_sample_buf;
 	const struct kbase_hwcnt_csf_physical_layout *phys_layout = &backend_csf->phys_layout;
@@ -740,10 +756,8 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backe
 		const u32 buf_idx = raw_idx & (ring_buf_cnt - 1);
 
 		new_sample_buf = (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes];
-		kbasep_hwcnt_backend_csf_accumulate_sample(
-			phys_layout, buf_dump_bytes, backend_csf->accum_buf, old_sample_buf,
-			new_sample_buf, backend_csf->block_states, clearing_samples,
-			backend_csf->enable_state, backend_csf->info->fw_in_protected_mode);
+		kbasep_hwcnt_backend_csf_accumulate_sample(backend_csf, old_sample_buf,
+							   new_sample_buf);
 
 		old_sample_buf = new_sample_buf;
 	}
@@ -1215,11 +1229,6 @@ static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *ba
 						 backend_csf->ring_buf, 0,
 						 backend_csf->info->ring_buf_cnt, false);
 
-	/* Reset accumulator, old_sample_buf and user_sample to all-0 to prepare
-	 * for next enable.
-	 */
-	kbasep_hwcnt_backend_csf_reset_internal_buffers(backend_csf);
-
 	/* Disabling HWCNT is an indication that blocks have been powered off. This is important to
 	 * know for L2, CSHW, and Tiler blocks, as this is currently the only way a backend can
 	 * know if they are being powered off.
@@ -1255,6 +1264,12 @@ static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *ba
 		kbase_hwcnt_block_state_set(&backend_csf->accum_all_blk_stt,
 					    KBASE_HWCNT_STATE_UNKNOWN);
 	}
+
+	/* Reset accumulator, old_sample_buf and block_states to all-0 to prepare for next enable.
+	 * Reset user buffers if ownership is transferred to the caller (i.e. dump_buffer
+	 * is provided).
+	 */
+	kbasep_hwcnt_backend_csf_reset_internal_buffers(backend_csf, dump_buffer);
 }
 
 /* CSF backend implementation of kbase_hwcnt_backend_dump_request_fn */
@@ -1279,6 +1294,11 @@ static int kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *bac
 		backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED;
 		*dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend);
 		kbasep_hwcnt_backend_csf_cc_update(backend_csf);
+		/* There is a possibility that the transition to enabled state will remain
+		 * during multiple dumps, hence append the OFF state.
+		 */
+		kbase_hwcnt_block_state_append(&backend_csf->accum_all_blk_stt,
+					       KBASE_HWCNT_STATE_OFF);
 		backend_csf->user_requested = true;
 		backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
 		return 0;
@@ -1457,7 +1477,7 @@ static int kbasep_hwcnt_backend_csf_dump_get(struct kbase_hwcnt_backend *backend
 	ret = kbase_hwcnt_csf_dump_get(dst, backend_csf->to_user_buf,
 				       backend_csf->to_user_block_states, dst_enable_map,
 				       backend_csf->num_l2_slices,
-				       backend_csf->shader_present_bitmap, accumulate);
+				       backend_csf->powered_shader_core_mask, accumulate);
 
 	/* If no error occurred (zero ret value), then update block state for all blocks in the
 	 * accumulation with the current sample's block state.
@@ -1469,6 +1489,12 @@ static int kbasep_hwcnt_backend_csf_dump_get(struct kbase_hwcnt_backend *backend
 					    KBASE_HWCNT_STATE_UNKNOWN);
 	}
 
+	/* Clear consumed user buffers. */
+	memset(backend_csf->to_user_buf, 0, backend_csf->info->metadata->dump_buf_bytes);
+	memset(backend_csf->to_user_block_states, 0,
+	       backend_csf->phys_layout.block_cnt * KBASE_HWCNT_BLOCK_STATE_BYTES *
+		       KBASE_HWCNT_BLOCK_STATE_STRIDE);
+
 	return ret;
 }
 
@@ -1684,6 +1710,22 @@ static void kbasep_hwcnt_backend_csf_term(struct kbase_hwcnt_backend *backend)
 	kbasep_hwcnt_backend_csf_destroy(backend_csf);
 }
 
+static void kbasep_hwcnt_backend_csf_acquire(const struct kbase_hwcnt_backend *backend)
+{
+	struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
+	struct kbase_hwcnt_backend_csf_info *csf_info = backend_csf->info;
+
+	csf_info->csf_if->acquire(csf_info->csf_if->ctx);
+}
+
+static void kbasep_hwcnt_backend_csf_release(const struct kbase_hwcnt_backend *backend)
+{
+	struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
+	struct kbase_hwcnt_backend_csf_info *csf_info = backend_csf->info;
+
+	csf_info->csf_if->release(csf_info->csf_if->ctx);
+}
+
 /**
  * kbasep_hwcnt_backend_csf_info_destroy() - Destroy a CSF backend info.
  * @info: Pointer to info to destroy.
@@ -2098,7 +2140,7 @@ int kbase_hwcnt_backend_csf_metadata_init(struct kbase_hwcnt_backend_interface *
 	gpu_info.has_fw_counters = csf_info->prfcnt_info.prfcnt_fw_size > 0;
 	gpu_info.l2_count = csf_info->prfcnt_info.l2_count;
 	gpu_info.csg_cnt = csf_info->prfcnt_info.csg_count;
-	gpu_info.core_mask = csf_info->prfcnt_info.core_mask;
+	gpu_info.sc_core_mask = csf_info->prfcnt_info.sc_core_mask;
 	gpu_info.clk_cnt = csf_info->prfcnt_info.clk_cnt;
 	gpu_info.prfcnt_values_per_block =
 		csf_info->prfcnt_info.prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES;
@@ -2115,7 +2157,7 @@ void kbase_hwcnt_backend_csf_metadata_term(struct kbase_hwcnt_backend_interface
 
 	csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
 	if (csf_info->metadata) {
-		kbase_hwcnt_csf_metadata_destroy(csf_info->metadata);
+		kbase_hwcnt_metadata_destroy(csf_info->metadata);
 		csf_info->metadata = NULL;
 	}
 }
@@ -2142,6 +2184,8 @@ int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if, u3
 	iface->metadata = kbasep_hwcnt_backend_csf_metadata;
 	iface->init = kbasep_hwcnt_backend_csf_init;
 	iface->term = kbasep_hwcnt_backend_csf_term;
+	iface->acquire = kbasep_hwcnt_backend_csf_acquire;
+	iface->release = kbasep_hwcnt_backend_csf_release;
 	iface->timestamp_ns = kbasep_hwcnt_backend_csf_timestamp_ns;
 	iface->dump_enable = kbasep_hwcnt_backend_csf_dump_enable;
 	iface->dump_enable_nolock = kbasep_hwcnt_backend_csf_dump_enable_nolock;
diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h
index 2487db272a35..104f9c77a945 100644
--- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h
+++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -30,8 +30,10 @@
 #include "hwcnt/backend/mali_kbase_hwcnt_backend.h"
 #include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h"
 #include "hwcnt/mali_kbase_hwcnt_watchdog_if.h"
+#include "hwcnt/mali_kbase_hwcnt_types.h"
 
 struct kbase_hwcnt_physical_enable_map;
+struct kbase_hwcnt_backend_csf;
 
 /**
  * kbase_hwcnt_backend_csf_create() - Create a CSF hardware counter backend
@@ -123,11 +125,12 @@ void kbase_hwcnt_backend_csf_on_before_reset(struct kbase_hwcnt_backend_interfac
  *                                                 this function is called.
  * @iface: Non-NULL pointer to HWC backend interface.
  * @num_l2_slices: Current number of L2 slices allocated to the GPU.
- * @shader_present_bitmap: Current shader-present bitmap that is allocated to the GPU.
+ * @shader_present: Shader_present of the current configuration.
+ * @power_core_mask: Mask containing changed shader core power state.
  */
 void kbase_hwcnt_backend_csf_set_hw_availability(struct kbase_hwcnt_backend_interface *iface,
-						 size_t num_l2_slices,
-						 uint64_t shader_present_bitmap);
+						 size_t num_l2_slices, u64 shader_present,
+						 u64 power_core_mask);
 
 /** kbasep_hwcnt_backend_csf_process_enable_map() - Process the enable_map to
  *                                                  guarantee headers are
@@ -174,4 +177,21 @@ void kbase_hwcnt_backend_csf_on_prfcnt_enable(struct kbase_hwcnt_backend_interfa
  */
 void kbase_hwcnt_backend_csf_on_prfcnt_disable(struct kbase_hwcnt_backend_interface *iface);
 
+/**
+ * kbasep_hwcnt_backend_csf_update_block_state - Update block state of a block instance with
+ *                              information from a sample.
+ * @backend:                    CSF hardware counter backend.
+ * @enable_mask:                Counter enable mask for the block whose state is being updated.
+ * @exiting_protm:              Whether or not the sample is taken when the GPU is exiting
+ *                              protected mode.
+ * @block_idx:                  Index of block within the ringbuffer.
+ * @block_state:                Pointer to existing block state of the block whose state is being
+ *                              updated.
+ * @fw_in_protected_mode:       Whether or not GPU is in protected mode during sampling.
+ */
+void kbasep_hwcnt_backend_csf_update_block_state(struct kbase_hwcnt_backend_csf *backend,
+						 const u32 enable_mask, bool exiting_protm,
+						 size_t block_idx, blk_stt_t *const block_state,
+						 bool fw_in_protected_mode);
+
 #endif /* _KBASE_HWCNT_BACKEND_CSF_H_ */
diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h
index 65bb965bcf9c..81f809fdc83a 100644
--- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h
+++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -68,7 +68,7 @@ struct kbase_hwcnt_backend_csf_if_enable {
  * @prfcnt_block_size: Bytes of each performance counter block.
  * @l2_count:          The MMU L2 cache count.
  * @csg_count:         The total number of CSGs in the system
- * @core_mask:         Shader core mask.
+ * @sc_core_mask:         Shader core mask.
  * @clk_cnt:           Clock domain count in the system.
  * @clearing_samples:  Indicates whether counters are cleared after each sample
  *                     is taken.
@@ -80,7 +80,7 @@ struct kbase_hwcnt_backend_csf_if_prfcnt_info {
 	size_t prfcnt_block_size;
 	size_t l2_count;
 	u32 csg_count;
-	u64 core_mask;
+	u64 sc_core_mask;
 	u8 clk_cnt;
 	bool clearing_samples;
 };
@@ -114,6 +114,20 @@ typedef void (*kbase_hwcnt_backend_csf_if_lock_fn)(struct kbase_hwcnt_backend_cs
 typedef void (*kbase_hwcnt_backend_csf_if_unlock_fn)(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
 						     unsigned long flags);
 
+/**
+ * typedef kbase_hwcnt_backend_csf_if_acquire_fn - Enable counter collection.
+ *
+ * @ctx:   Non-NULL pointer to a CSF context.
+ */
+typedef void (*kbase_hwcnt_backend_csf_if_acquire_fn)(struct kbase_hwcnt_backend_csf_if_ctx *ctx);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_release_fn - Disable counter collection.
+ *
+ * @ctx:   Non-NULL pointer to a CSF context.
+ */
+typedef void (*kbase_hwcnt_backend_csf_if_release_fn)(struct kbase_hwcnt_backend_csf_if_ctx *ctx);
+
 /**
  * typedef kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn - Get performance
  *                                                         counter information.
@@ -272,6 +286,10 @@ typedef void (*kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn)(
  * @assert_lock_held:    Function ptr to assert backend spinlock is held.
  * @lock:                Function ptr to acquire backend spinlock.
  * @unlock:              Function ptr to release backend spinlock.
+ * @acquire:             Callback to indicate that counter collection has
+ *                       been enabled.
+ * @release:             Callback to indicate that counter collection has
+ *                       been disabled.
  * @get_prfcnt_info:     Function ptr to get performance counter related
  *                       information.
  * @ring_buf_alloc:      Function ptr to allocate ring buffer for CSF HWC.
@@ -292,6 +310,8 @@ struct kbase_hwcnt_backend_csf_if {
 	kbase_hwcnt_backend_csf_if_assert_lock_held_fn assert_lock_held;
 	kbase_hwcnt_backend_csf_if_lock_fn lock;
 	kbase_hwcnt_backend_csf_if_unlock_fn unlock;
+	kbase_hwcnt_backend_csf_if_acquire_fn acquire;
+	kbase_hwcnt_backend_csf_if_release_fn release;
 	kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn get_prfcnt_info;
 	kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn ring_buf_alloc;
 	kbase_hwcnt_backend_csf_if_ring_buf_sync_fn ring_buf_sync;
diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
index d79a99e5e89f..29f8a2a8838d 100644
--- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
+++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -131,6 +131,26 @@ static void kbasep_hwcnt_backend_csf_if_fw_unlock(struct kbase_hwcnt_backend_csf
 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
 }
 
+static void kbasep_hwcnt_backend_csf_if_fw_acquire(struct kbase_hwcnt_backend_csf_if_ctx *ctx)
+{
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+
+	/* Mark performance counters collection as enabled */
+	set_bit(KBASE_GPU_PERF_COUNTERS_COLLECTION_ENABLED,
+		&fw_ctx->kbdev->pm.backend.gpu_sleep_allowed);
+}
+
+static void kbasep_hwcnt_backend_csf_if_fw_release(struct kbase_hwcnt_backend_csf_if_ctx *ctx)
+{
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+
+	/* Mark performance counters collection as disabled */
+	clear_bit(KBASE_GPU_PERF_COUNTERS_COLLECTION_ENABLED,
+		  &fw_ctx->kbdev->pm.backend.gpu_sleep_allowed);
+}
+
 /**
  * kbasep_hwcnt_backend_csf_if_fw_on_freq_change() - On freq change callback
  *
@@ -229,7 +249,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
 
 	*prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){
 		.l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS,
-		.core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1,
+		.sc_core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1,
 		.prfcnt_hw_size =
 			KBASE_DUMMY_MODEL_MAX_NUM_HARDWARE_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE,
 		.prfcnt_fw_size =
@@ -290,12 +310,13 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
 		.dump_bytes = fw_ctx->buf_bytes,
 		.prfcnt_block_size = prfcnt_block_size,
 		.l2_count = kbdev->gpu_props.num_l2_slices,
-		.core_mask = kbasep_hwcnt_backend_csf_core_mask(&kbdev->gpu_props),
+		.sc_core_mask = kbasep_hwcnt_backend_csf_core_mask(&kbdev->gpu_props),
 		.csg_count = fw_block_count > 1 ? csg_count : 0,
 		.clk_cnt = fw_ctx->clk_cnt,
 		.clearing_samples = true,
 	};
 
+
 	/* Block size must be multiple of counter size. */
 	WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_HW_BYTES) != 0);
 	/* Total size must be multiple of block size. */
@@ -513,10 +534,15 @@ kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_c
 			fw_ring_buf->phys, fw_ring_buf->num_pages, fw_ring_buf->num_pages,
 			MCU_AS_NR));
 
+		/* Clear the dump ring_buf content to zeros */
+		memset(fw_ring_buf->cpu_dump_base, 0, fw_ring_buf->num_pages * PAGE_SIZE);
 		vunmap(fw_ring_buf->cpu_dump_base);
 
+		/* After zeroing, the ring_buf pages are dirty so need to pass the 'dirty' flag
+		 * as true when freeing the pages to the Global pool.
+		 */
 		kbase_mem_pool_free_pages(&fw_ctx->kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
-					  fw_ring_buf->num_pages, fw_ring_buf->phys, false, false);
+					  fw_ring_buf->num_pages, fw_ring_buf->phys, true, false);
 
 		kfree(fw_ring_buf->phys);
 
@@ -807,6 +833,8 @@ int kbase_hwcnt_backend_csf_if_fw_create(struct kbase_device *kbdev,
 	if_fw->assert_lock_held = kbasep_hwcnt_backend_csf_if_fw_assert_lock_held;
 	if_fw->lock = kbasep_hwcnt_backend_csf_if_fw_lock;
 	if_fw->unlock = kbasep_hwcnt_backend_csf_if_fw_unlock;
+	if_fw->acquire = kbasep_hwcnt_backend_csf_if_fw_acquire;
+	if_fw->release = kbasep_hwcnt_backend_csf_if_fw_release;
 	if_fw->get_prfcnt_info = kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info;
 	if_fw->ring_buf_alloc = kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc;
 	if_fw->ring_buf_sync = kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync;
diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c
index 7fbef163976a..c3f2bcdbf256 100644
--- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c
+++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -165,7 +165,7 @@ static int kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev,
 #endif
 
 	info->l2_count = l2_count;
-	info->core_mask = core_mask;
+	info->sc_core_mask = core_mask;
 	info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK;
 
 	/* Determine the number of available clock domains. */
@@ -186,7 +186,7 @@ static void kbasep_hwcnt_backend_jm_init_layout(const struct kbase_hwcnt_gpu_inf
 	WARN_ON(!gpu_info);
 	WARN_ON(!phys_layout);
 
-	shader_core_cnt = fls64(gpu_info->core_mask);
+	shader_core_cnt = fls64(gpu_info->sc_core_mask);
 
 	*phys_layout = (struct kbase_hwcnt_jm_physical_layout){
 		.fe_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT,
@@ -195,7 +195,7 @@ static void kbasep_hwcnt_backend_jm_init_layout(const struct kbase_hwcnt_gpu_inf
 		.shader_cnt = shader_core_cnt,
 		.block_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT + KBASE_HWCNT_V5_TILER_BLOCK_COUNT +
 			     gpu_info->l2_count + shader_core_cnt,
-		.shader_avail_mask = gpu_info->core_mask,
+		.shader_avail_mask = gpu_info->sc_core_mask,
 		.headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
 		.values_per_block = gpu_info->prfcnt_values_per_block,
 		.counters_per_block =
@@ -384,14 +384,12 @@ kbasep_hwcnt_backend_jm_dump_enable_nolock(struct kbase_hwcnt_backend *backend,
 
 	enable = (struct kbase_instr_hwcnt_enable)
 	{
-		.fe_bm = phys_enable_map.fe_bm,
-		.shader_bm = phys_enable_map.shader_bm,
-		.tiler_bm = phys_enable_map.tiler_bm,
-		.mmu_l2_bm = phys_enable_map.mmu_l2_bm,
+		.fe_bm = phys_enable_map.fe_bm, .shader_bm = phys_enable_map.shader_bm,
+		.tiler_bm = phys_enable_map.tiler_bm, .mmu_l2_bm = phys_enable_map.mmu_l2_bm,
 		.counter_set = phys_counter_set,
 #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
 		/* The dummy model needs the CPU mapping. */
-		.dump_buffer = (uintptr_t)backend_jm->cpu_dump_va,
+			.dump_buffer = (uintptr_t)backend_jm->cpu_dump_va,
 #else
 		.dump_buffer = backend_jm->gpu_dump_va,
 #endif /* CONFIG_MALI_BIFROST_NO_MALI */
@@ -411,7 +409,7 @@ kbasep_hwcnt_backend_jm_dump_enable_nolock(struct kbase_hwcnt_backend *backend,
 
 	backend_jm->debug_core_mask = kbase_pm_ca_get_debug_core_mask(kbdev);
 	backend_jm->max_l2_slices = backend_jm->info->hwcnt_gpu_info.l2_count;
-	backend_jm->max_core_mask = backend_jm->info->hwcnt_gpu_info.core_mask;
+	backend_jm->max_core_mask = backend_jm->info->hwcnt_gpu_info.sc_core_mask;
 
 	backend_jm->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev);
 
@@ -660,8 +658,8 @@ static int kbasep_hwcnt_backend_jm_dump_get(struct kbase_hwcnt_backend *backend,
 #endif /* CONFIG_MALI_BIFROST_NO_MALI */
 	errcode = kbase_hwcnt_jm_dump_get(dst, backend_jm->to_user_buf, dst_enable_map,
 					  backend_jm->pm_core_mask, backend_jm->debug_core_mask,
-					  backend_jm->max_core_mask, backend_jm->max_l2_slices,
-					  &backend_jm->curr_config, accumulate);
+					  backend_jm->max_l2_slices, &backend_jm->curr_config,
+					  accumulate);
 
 	if (errcode)
 		return errcode;
@@ -685,7 +683,7 @@ static int kbasep_hwcnt_backend_jm_dump_alloc(const struct kbase_hwcnt_backend_j
 					      struct kbase_context *kctx, u64 *gpu_dump_va)
 {
 	struct kbase_va_region *reg;
-	u64 flags;
+	base_mem_alloc_flags flags;
 	u64 nr_pages;
 
 	/* Calls to this function are inherently asynchronous, with respect to
@@ -853,6 +851,14 @@ static void kbasep_hwcnt_backend_jm_term(struct kbase_hwcnt_backend *backend)
 	kbasep_hwcnt_backend_jm_destroy((struct kbase_hwcnt_backend_jm *)backend);
 }
 
+static void kbasep_hwcnt_backend_jm_acquire(const struct kbase_hwcnt_backend *backend)
+{
+}
+
+static void kbasep_hwcnt_backend_jm_release(const struct kbase_hwcnt_backend *backend)
+{
+}
+
 /**
  * kbasep_hwcnt_backend_jm_info_destroy() - Destroy a JM backend info.
  * @info: Pointer to info to destroy.
@@ -864,7 +870,7 @@ static void kbasep_hwcnt_backend_jm_info_destroy(const struct kbase_hwcnt_backen
 	if (!info)
 		return;
 
-	kbase_hwcnt_jm_metadata_destroy(info->metadata);
+	kbase_hwcnt_metadata_destroy(info->metadata);
 	kfree(info);
 }
 
@@ -934,6 +940,8 @@ int kbase_hwcnt_backend_jm_create(struct kbase_device *kbdev,
 	iface->metadata = kbasep_hwcnt_backend_jm_metadata;
 	iface->init = kbasep_hwcnt_backend_jm_init;
 	iface->term = kbasep_hwcnt_backend_jm_term;
+	iface->acquire = kbasep_hwcnt_backend_jm_acquire;
+	iface->release = kbasep_hwcnt_backend_jm_release;
 	iface->timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns;
 	iface->dump_enable = kbasep_hwcnt_backend_jm_dump_enable;
 	iface->dump_enable_nolock = kbasep_hwcnt_backend_jm_dump_enable_nolock;
diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c
index cf2a2e65bc25..88917e72ac58 100644
--- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c
+++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -317,6 +317,14 @@ kbasep_hwcnt_backend_jm_watchdog_term_partial(struct kbase_hwcnt_backend_jm_watc
 	kfree(wd_backend);
 }
 
+static void kbasep_hwcnt_backend_jm_watchdog_acquire(const struct kbase_hwcnt_backend *backend)
+{
+}
+
+static void kbasep_hwcnt_backend_jm_watchdog_release(const struct kbase_hwcnt_backend *backend)
+{
+}
+
 /* Job manager watchdog backend, implementation of kbase_hwcnt_backend_term_fn
  * Calling term does *not* destroy the interface
  */
@@ -807,6 +815,8 @@ int kbase_hwcnt_backend_jm_watchdog_create(struct kbase_hwcnt_backend_interface
 		.metadata = kbasep_hwcnt_backend_jm_watchdog_metadata,
 		.init = kbasep_hwcnt_backend_jm_watchdog_init,
 		.term = kbasep_hwcnt_backend_jm_watchdog_term,
+		.acquire = kbasep_hwcnt_backend_jm_watchdog_acquire,
+		.release = kbasep_hwcnt_backend_jm_watchdog_release,
 		.timestamp_ns = kbasep_hwcnt_backend_jm_watchdog_timestamp_ns,
 		.dump_enable = kbasep_hwcnt_backend_jm_watchdog_dump_enable,
 		.dump_enable_nolock = kbasep_hwcnt_backend_jm_watchdog_dump_enable_nolock,
diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt.c b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt.c
index 8b1de2e1cdaf..8d308f1138a7 100644
--- a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt.c
+++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -599,6 +599,9 @@ int kbase_hwcnt_accumulator_acquire(struct kbase_hwcnt_context *hctx,
 		return errcode;
 	}
 
+	/* Inform the backend that counter collection has been enabled. */
+	hctx->iface->acquire(hctx->accum.backend);
+
 	spin_lock_irqsave(&hctx->state_lock, flags);
 
 	WARN_ON(hctx->disable_count == 0);
@@ -646,6 +649,9 @@ void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum)
 
 	mutex_unlock(&hctx->accum_lock);
 
+	/* Inform the backend that counter collection has been disabled. */
+	hctx->iface->release(hctx->accum.backend);
+
 	kbasep_hwcnt_accumulator_term(hctx);
 
 	mutex_lock(&hctx->accum_lock);
diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.c b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.c
index 5da564546608..7cd16a0de4ce 100644
--- a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.c
+++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -169,7 +169,7 @@ static int kbasep_hwcnt_backend_gpu_metadata_create(const struct kbase_hwcnt_gpu
 	/* Calculate number of block instances that aren't cores */
 	non_core_block_count = 2 + gpu_info->l2_count;
 	/* Calculate number of block instances that are shader cores */
-	sc_block_count = (size_t)fls64(gpu_info->core_mask);
+	sc_block_count = (size_t)fls64(gpu_info->sc_core_mask);
 	/* Determine the total number of cores */
 	core_block_count = sc_block_count;
 
@@ -277,7 +277,7 @@ static int kbasep_hwcnt_backend_gpu_metadata_create(const struct kbase_hwcnt_gpu
 	kbase_hwcnt_set_avail_mask(&desc.avail_mask, 0, 0);
 	kbase_hwcnt_set_avail_mask_bits(&desc.avail_mask, 0, non_core_block_count, U64_MAX);
 	kbase_hwcnt_set_avail_mask_bits(&desc.avail_mask, non_core_block_count, sc_block_count,
-					gpu_info->core_mask);
+					gpu_info->sc_core_mask);
 
 
 	return kbase_hwcnt_metadata_create(&desc, metadata);
@@ -294,7 +294,7 @@ static size_t kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_in
 {
 	WARN_ON(!gpu_info);
 
-	return (2 + gpu_info->l2_count + (size_t)fls64(gpu_info->core_mask)) *
+	return (2 + gpu_info->l2_count + (size_t)fls64(gpu_info->sc_core_mask)) *
 	       gpu_info->prfcnt_values_per_block * KBASE_HWCNT_VALUE_HW_BYTES;
 }
 
@@ -338,14 +338,6 @@ int kbase_hwcnt_jm_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info,
 	return 0;
 }
 
-void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata)
-{
-	if (!metadata)
-		return;
-
-	kbase_hwcnt_metadata_destroy(metadata);
-}
-
 int kbase_hwcnt_csf_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info,
 				    enum kbase_hwcnt_set counter_set,
 				    const struct kbase_hwcnt_metadata **out_metadata)
@@ -365,14 +357,6 @@ int kbase_hwcnt_csf_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info,
 	return 0;
 }
 
-void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadata)
-{
-	if (!metadata)
-		return;
-
-	kbase_hwcnt_metadata_destroy(metadata);
-}
-
 bool kbase_hwcnt_is_block_type_shader(const enum kbase_hwcnt_gpu_v5_block_type blk_type)
 {
 	if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC ||
@@ -384,6 +368,7 @@ bool kbase_hwcnt_is_block_type_shader(const enum kbase_hwcnt_gpu_v5_block_type b
 	return false;
 }
 
+
 bool kbase_hwcnt_is_block_type_memsys(const enum kbase_hwcnt_gpu_v5_block_type blk_type)
 {
 	if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS ||
@@ -416,7 +401,7 @@ bool kbase_hwcnt_is_block_type_fe(const enum kbase_hwcnt_gpu_v5_block_type blk_t
 
 int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
 			    const struct kbase_hwcnt_enable_map *dst_enable_map, u64 pm_core_mask,
-			    u64 debug_core_mask, u64 max_core_mask, size_t max_l2_slices,
+			    u64 debug_core_mask, size_t max_l2_slices,
 			    const struct kbase_hwcnt_curr_config *curr_config, bool accumulate)
 {
 	const struct kbase_hwcnt_metadata *metadata;
@@ -466,9 +451,7 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
 		else
 			hw_res_available = true;
 
-		/*
-		 * Skip block if no values in the destination block are enabled.
-		 */
+		/* Skip block if no values in the destination block are enabled. */
 		if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) {
 			u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst);
 			const u64 *src_blk = dump_src + src_offset;
@@ -581,7 +564,6 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
 			/* Shift each core mask right by 1 */
 			core_mask >>= 1;
 			debug_core_mask >>= 1;
-			max_core_mask >>= 1;
 			shader_present >>= 1;
 		}
 	}
@@ -592,7 +574,7 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
 int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
 			     blk_stt_t *src_block_stt,
 			     const struct kbase_hwcnt_enable_map *dst_enable_map,
-			     size_t num_l2_slices, u64 shader_present_bitmap, bool accumulate)
+			     size_t num_l2_slices, u64 powered_shader_core_mask, bool accumulate)
 {
 	const struct kbase_hwcnt_metadata *metadata;
 	const u64 *dump_src = src;
@@ -614,9 +596,7 @@ int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
 		blk_stt_t *dst_blk_stt =
 			kbase_hwcnt_dump_buffer_block_state_instance(dst, blk, blk_inst);
 
-		/*
-		 * Skip block if no values in the destination block are enabled.
-		 */
+		/* Skip block if no values in the destination block are enabled. */
 		if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) {
 			u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst);
 			const u64 *src_blk = dump_src + src_offset;
diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.h b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.h
index 4339fddd64e2..896f1389eb37 100644
--- a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.h
+++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -169,7 +169,7 @@ enum kbase_hwcnt_physical_set {
 /**
  * struct kbase_hwcnt_gpu_info - Information about hwcnt blocks on the GPUs.
  * @l2_count:                L2 cache count.
- * @core_mask:               Shader core mask. May be sparse.
+ * @sc_core_mask:            Shader core mask. May be sparse.
  * @clk_cnt:                 Number of clock domains available.
  * @csg_cnt:                 Number of CSGs available.
  * @prfcnt_values_per_block: Total entries (header + counters) of performance
@@ -178,7 +178,7 @@ enum kbase_hwcnt_physical_set {
  */
 struct kbase_hwcnt_gpu_info {
 	size_t l2_count;
-	u64 core_mask;
+	u64 sc_core_mask;
 	u8 clk_cnt;
 	u8 csg_cnt;
 	size_t prfcnt_values_per_block;
@@ -261,13 +261,6 @@ int kbase_hwcnt_jm_metadata_create(const struct kbase_hwcnt_gpu_info *info,
 				   const struct kbase_hwcnt_metadata **out_metadata,
 				   size_t *out_dump_bytes);
 
-/**
- * kbase_hwcnt_jm_metadata_destroy() - Destroy JM GPU hardware counter metadata.
- *
- * @metadata: Pointer to metadata to destroy.
- */
-void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata);
-
 /**
  * kbase_hwcnt_csf_metadata_create() - Create hardware counter metadata for the
  *                                     CSF GPUs.
@@ -282,13 +275,6 @@ int kbase_hwcnt_csf_metadata_create(const struct kbase_hwcnt_gpu_info *info,
 				    enum kbase_hwcnt_set counter_set,
 				    const struct kbase_hwcnt_metadata **out_metadata);
 
-/**
- * kbase_hwcnt_csf_metadata_destroy() - Destroy CSF GPU hardware counter
- *                                      metadata.
- * @metadata: Pointer to metadata to destroy.
- */
-void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadata);
-
 /**
  * kbase_hwcnt_jm_dump_get() - Copy or accumulate enabled counters from the raw
  *                             dump buffer in src into the dump buffer
@@ -300,9 +286,6 @@ void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadat
  * @dst_enable_map:  Non-NULL pointer to enable map specifying enabled values.
  * @pm_core_mask:    PM state synchronized shaders core mask with the dump.
  * @debug_core_mask: User-set mask of cores to be used by the GPU.
- * @max_core_mask:   Core mask of all cores allocated to the GPU (non
- *                   virtualized platforms) or resource group (virtualized
- *                   platforms).
  * @max_l2_slices:   Maximum number of L2 slices allocated to the GPU (non
  *                   virtualised platforms) or resource group (virtualized
  *                   platforms).
@@ -319,23 +302,23 @@ void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadat
  */
 int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
 			    const struct kbase_hwcnt_enable_map *dst_enable_map,
-			    const u64 pm_core_mask, u64 debug_core_mask, u64 max_core_mask,
-			    size_t max_l2_slices, const struct kbase_hwcnt_curr_config *curr_config,
-			    bool accumulate);
+			    const u64 pm_core_mask, u64 debug_core_mask, size_t max_l2_slices,
+			    const struct kbase_hwcnt_curr_config *curr_config, bool accumulate);
 
 /**
  * kbase_hwcnt_csf_dump_get() - Copy or accumulate enabled counters from the raw
  *                              dump buffer in src into the dump buffer
  *                              abstraction in dst.
- * @dst:                   Non-NULL pointer to destination dump buffer.
- * @src:                   Non-NULL pointer to source raw dump buffer, of same length
- *                         as dump_buf_bytes in the metadata of dst dump buffer.
- * @src_block_stt:         Non-NULL pointer to source block state buffer.
- * @dst_enable_map:        Non-NULL pointer to enable map specifying enabled values.
- * @num_l2_slices:         Current number of L2 slices allocated to the GPU.
- * @shader_present_bitmap: Current shader-present bitmap that is allocated to the GPU.
- * @accumulate:            True if counters in src should be accumulated into
- *                         destination, rather than copied.
+ * @dst:                      Non-NULL pointer to destination dump buffer.
+ * @src:                      Non-NULL pointer to source raw dump buffer, of same length
+ *                            as dump_buf_bytes in the metadata of dst dump buffer.
+ * @src_block_stt:            Non-NULL pointer to source block state buffer.
+ * @dst_enable_map:           Non-NULL pointer to enable map specifying enabled values.
+ * @num_l2_slices:            Current number of L2 slices allocated to the GPU.
+ * @powered_shader_core_mask: The common mask between the debug_core_mask
+ *                            and the shader_present_bitmap.
+ * @accumulate:               True if counters in src should be accumulated into
+ *                            destination, rather than copied.
  *
  * The dst and dst_enable_map MUST have been created from the same metadata as
  * returned from the call to kbase_hwcnt_csf_metadata_create as was used to get
@@ -346,7 +329,7 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
 int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
 			     blk_stt_t *src_block_stt,
 			     const struct kbase_hwcnt_enable_map *dst_enable_map,
-			     size_t num_l2_slices, u64 shader_present_bitmap, bool accumulate);
+			     size_t num_l2_slices, u64 powered_shader_core_mask, bool accumulate);
 
 /**
  * kbase_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block
@@ -453,6 +436,7 @@ bool kbase_hwcnt_is_block_type_memsys(const enum kbase_hwcnt_gpu_v5_block_type b
 bool kbase_hwcnt_is_block_type_tiler(const enum kbase_hwcnt_gpu_v5_block_type blk_type);
 
 bool kbase_hwcnt_is_block_type_fe(const enum kbase_hwcnt_gpu_v5_block_type blk_type);
+
 /**
  * kbase_hwcnt_gpu_enable_map_from_cm() - Builds enable map abstraction from
  *                                        counter selection bitmasks.
diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.c b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.c
index 3d0ad5af7263..3d2fd5e088da 100644
--- a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.c
+++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -125,6 +125,9 @@ int kbase_hwcnt_metadata_create(const struct kbase_hwcnt_description *desc,
 
 void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata)
 {
+	if (!metadata)
+		return;
+
 	kfree(metadata);
 }
 
diff --git a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.h b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.h
index c7afe173d426..45f67f7c9a1b 100644
--- a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.h
+++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -19,65 +19,6 @@
  *
  */
 
-/*
- * Hardware counter types.
- * Contains structures for describing the physical layout of hardware counter
- * dump buffers and enable maps within a system.
- *
- * Also contains helper functions for manipulation of these dump buffers and
- * enable maps.
- *
- * Through use of these structures and functions, hardware counters can be
- * enabled, copied, accumulated, and generally manipulated in a generic way,
- * regardless of the physical counter dump layout.
- *
- * Terminology:
- *
- * Hardware Counter System:
- *   A collection of hardware counter blocks, making a full hardware counter
- *   system.
- * Hardware Counter Block:
- *   A block of hardware counters (e.g. shader block, tiler block).
- * Hardware Counter Block Instance:
- *   An instance of a Hardware Counter Block (e.g. an MP4 GPU might have
- *   4 shader block instances).
- *
- * Block Header:
- *   A header value inside a counter block. Headers don't count anything,
- *   so it is only valid to copy or zero them. Headers are always the first
- *   values in the block.
- * Block Counter:
- *   A counter value inside a counter block. Counters can be zeroed, copied,
- *   or accumulated. Counters are always immediately after the headers in the
- *   block.
- * Block Value:
- *   A catch-all term for block headers and block counters.
- *
- * Enable Map:
- *   An array of u64 bitfields, where each bit either enables exactly one
- *   block value, or is unused (padding). Note that this is derived from
- *   the client configuration, and is not obtained from the hardware.
- * Dump Buffer:
- *   An array of u64 values, where each u64 corresponds either to one block
- *   value, or is unused (padding).
- * Block State Buffer:
- *   An array of blk_stt_t values, where each blk_stt_t corresponds to one block
- *   instance and is used to track the on/off power state transitions, as well has
- *   hardware resource availability, and whether the block was operating
- *   in normal or protected mode.
- * Availability Mask:
- *   A bitfield, where each bit corresponds to whether a block instance is
- *   physically available (e.g. an MP3 GPU may have a sparse core mask of
- *   0b1011, meaning it only has 3 cores but for hardware counter dumps has the
- *   same dump buffer layout as an MP4 GPU with a core mask of 0b1111. In this
- *   case, the availability mask might be 0b1011111 (the exact layout will
- *   depend on the specific hardware architecture), with the 3 extra early bits
- *   corresponding to other block instances in the hardware counter system).
- * Metadata:
- *   Structure describing the physical layout of the enable map and dump buffers
- *   for a specific hardware counter system.
- */
-
 #ifndef _KBASE_HWCNT_TYPES_H_
 #define _KBASE_HWCNT_TYPES_H_
 
diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.c b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.c
index e4138580de20..9305747ff472 100644
--- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.c
+++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -115,7 +115,7 @@ static ssize_t param_string_set(struct file *file, const char __user *user_buf,
 		goto end;
 	}
 
-	buf_size = min(param->size - 1, count);
+	buf_size = min(size_sub(param->size, 1), count);
 	if (copy_from_user(param->addr.str, user_buf, buf_size)) {
 		ret = -EFAULT;
 		goto end;
diff --git a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h
index 373b9b1b73b3..23e919314333 100644
--- a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h
+++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -308,11 +308,11 @@ enum kbase_atom_gpu_rb_state {
  *                      powered down and GPU shall come out of fully
  *                      coherent mode before entering protected mode.
  * @KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY: Prepare coherency change;
- *                      for BASE_HW_ISSUE_TGOX_R1_1234 also request L2 power on
+ *                      for KBASE_HW_ISSUE_TGOX_R1_1234 also request L2 power on
  *                      so that coherency register contains correct value when
  *                      GPU enters protected mode.
  * @KBASE_ATOM_ENTER_PROTECTED_FINISHED: End state; for
- *                      BASE_HW_ISSUE_TGOX_R1_1234 check
+ *                      KBASE_HW_ISSUE_TGOX_R1_1234 check
  *                      that L2 is powered up and switch GPU to protected mode.
  */
 enum kbase_atom_enter_protected_state {
@@ -500,10 +500,6 @@ enum kbase_atom_exit_protected_state {
  *                         is snapshot of the age_count counter in kbase
  *                         context.
  * @jobslot: Job slot to use when BASE_JD_REQ_JOB_SLOT is specified.
- * @renderpass_id:Renderpass identifier used to associate an atom that has
- *                 BASE_JD_REQ_START_RENDERPASS set in its core requirements
- *                 with an atom that has BASE_JD_REQ_END_RENDERPASS set.
- * @jc_fragment:          Set of GPU fragment job chains
  */
 struct kbase_jd_atom {
 	struct work_struct work;
@@ -564,8 +560,6 @@ struct kbase_jd_atom {
 	enum base_jd_event_code event_code;
 	base_jd_core_req core_req;
 	u8 jobslot;
-	u8 renderpass_id;
-	struct base_jd_fragment jc_fragment;
 
 	u32 ticks;
 	int sched_priority;
@@ -676,71 +670,6 @@ static inline bool kbase_jd_atom_is_earlier(const struct kbase_jd_atom *katom_a,
 
 #define KBASE_JD_DEP_QUEUE_SIZE 256
 
-/**
- * enum kbase_jd_renderpass_state - State of a renderpass
- * @KBASE_JD_RP_COMPLETE: Unused or completed renderpass. Can only transition to
- *                        START.
- * @KBASE_JD_RP_START:    Renderpass making a first attempt at tiling.
- *                        Can transition to PEND_OOM or COMPLETE.
- * @KBASE_JD_RP_PEND_OOM: Renderpass whose first attempt at tiling used too much
- *                        memory and has a soft-stop pending. Can transition to
- *                        OOM or COMPLETE.
- * @KBASE_JD_RP_OOM:      Renderpass whose first attempt at tiling used too much
- *                        memory and therefore switched to incremental
- *                        rendering. The fragment job chain is forced to run.
- *                        Can only transition to RETRY.
- * @KBASE_JD_RP_RETRY:    Renderpass making a second or subsequent attempt at
- *                        tiling. Can transition to RETRY_PEND_OOM or COMPLETE.
- * @KBASE_JD_RP_RETRY_PEND_OOM: Renderpass whose second or subsequent attempt at
- *                              tiling used too much memory again and has a
- *                              soft-stop pending. Can transition to RETRY_OOM
- *                              or COMPLETE.
- * @KBASE_JD_RP_RETRY_OOM: Renderpass whose second or subsequent attempt at
- *                         tiling used too much memory again. The fragment job
- *                         chain is forced to run. Can only transition to RETRY.
- *
- * A state machine is used to control incremental rendering.
- */
-enum kbase_jd_renderpass_state {
-	KBASE_JD_RP_COMPLETE, /* COMPLETE => START */
-	KBASE_JD_RP_START, /* START => PEND_OOM or COMPLETE */
-	KBASE_JD_RP_PEND_OOM, /* PEND_OOM => OOM or COMPLETE */
-	KBASE_JD_RP_OOM, /* OOM => RETRY */
-	KBASE_JD_RP_RETRY, /* RETRY => RETRY_PEND_OOM or COMPLETE */
-	KBASE_JD_RP_RETRY_PEND_OOM, /* RETRY_PEND_OOM => RETRY_OOM or COMPLETE */
-	KBASE_JD_RP_RETRY_OOM /* RETRY_OOM => RETRY */
-};
-
-/**
- * struct kbase_jd_renderpass - Data for a renderpass
- * @state:        Current state of the renderpass. If KBASE_JD_RP_COMPLETE then
- *                all other members are invalid.
- *                Both the job dispatcher context and hwaccess_lock must be
- *                locked to modify this so that it can be read with either
- *                (or both) locked.
- * @start_katom:  Address of the atom that is the start of a renderpass.
- *                Both the job dispatcher context and hwaccess_lock must be
- *                locked to modify this so that it can be read with either
- *                (or both) locked.
- * @end_katom:    Address of the atom that is the end of a renderpass, or NULL
- *                if that atom hasn't been added to the job scheduler yet.
- *                The job dispatcher context and hwaccess_lock must be
- *                locked to modify this so that it can be read with either
- *                (or both) locked.
- * @oom_reg_list: A list of region structures which triggered out-of-memory.
- *                The hwaccess_lock must be locked to access this.
- *
- * Atoms tagged with BASE_JD_REQ_START_RENDERPASS or BASE_JD_REQ_END_RENDERPASS
- * are associated with an object of this type, which is created and maintained
- * by kbase to keep track of each renderpass.
- */
-struct kbase_jd_renderpass {
-	enum kbase_jd_renderpass_state state;
-	struct kbase_jd_atom *start_katom;
-	struct kbase_jd_atom *end_katom;
-	struct list_head oom_reg_list;
-};
-
 /**
  * struct kbase_jd_context  - per context object encapsulating all the
  *                            Job dispatcher related state.
@@ -751,9 +680,6 @@ struct kbase_jd_renderpass {
  * @atoms:                    Array of the objects representing atoms,
  *                            containing the complete state and attributes
  *                            of an atom.
- * @renderpasses:             Array of renderpass state for incremental
- *                            rendering, indexed by user-specified renderpass
- *                            ID.
  * @job_nr:                   Tracks the number of atoms being processed by the
  *                            kbase. This includes atoms that are not tracked by
  *                            scheduler: 'not ready to run' & 'dependency-only'
@@ -803,7 +729,6 @@ struct kbase_jd_context {
 	struct mutex lock;
 	struct kbasep_js_kctx_info sched_info;
 	struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT];
-	struct kbase_jd_renderpass renderpasses[BASE_JD_RP_COUNT];
 	struct workqueue_struct *job_done_wq;
 
 	wait_queue_head_t zero_jobs_wait;
diff --git a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h
index 333ad2d2b150..65b54c68d8c7 100644
--- a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h
+++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -567,22 +567,6 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, struct kbase_jd_atom
  */
 struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp);
 
-/**
- * kbase_js_atom_blocked_on_x_dep - Decide whether to ignore a cross-slot
- *                                  dependency
- * @katom:	Pointer to an atom in the slot ringbuffer
- *
- * A cross-slot dependency is ignored if necessary to unblock incremental
- * rendering. If the atom at the start of a renderpass used too much memory
- * and was soft-stopped then the atom at the end of a renderpass is submitted
- * to hardware regardless of its dependency on the start-of-renderpass atom.
- * This can happen multiple times for the same pair of atoms.
- *
- * Return: true to block the atom or false to allow it to be submitted to
- * hardware.
- */
-bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *katom);
-
 /**
  * kbase_js_sched - Submit atoms from all available contexts.
  *
@@ -809,8 +793,7 @@ static inline bool
 kbasep_js_has_atom_finished(const struct kbasep_js_atom_retained_state *katom_retained_state)
 {
 	return (bool)(katom_retained_state->event_code != BASE_JD_EVENT_STOPPED &&
-		      katom_retained_state->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT &&
-		      katom_retained_state->event_code != BASE_JD_EVENT_END_RP_DONE);
+		      katom_retained_state->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT);
 }
 
 /**
diff --git a/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h b/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h
deleted file mode 100644
index 1f32fc9dd553..000000000000
--- a/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h
+++ /dev/null
@@ -1,161 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- *
- * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
- * please update base/tools/hwconfig_generator/hwc_{issues,features}.py
- * For more information see base/tools/docs/hwconfig_generator.md
- */
-
-#ifndef _BASE_HWCONFIG_FEATURES_H_
-#define _BASE_HWCONFIG_FEATURES_H_
-
-enum base_hw_feature {
-	BASE_HW_FEATURE_FLUSH_REDUCTION,
-	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
-	BASE_HW_FEATURE_TLS_HASHING,
-	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
-	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
-	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
-	BASE_HW_FEATURE_L2_CONFIG,
-	BASE_HW_FEATURE_L2_SLICE_HASH,
-	BASE_HW_FEATURE_GPU_SLEEP,
-	BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
-	BASE_HW_FEATURE_CORE_FEATURES,
-	BASE_HW_FEATURE_PBHA_HWU,
-	BASE_HW_FEATURE_LARGE_PAGE_ALLOC,
-	BASE_HW_FEATURE_THREAD_TLS_ALLOC,
-	BASE_HW_FEATURE_END
-};
-
-__attribute__((unused)) static const enum base_hw_feature base_hw_features_generic[] = {
-	BASE_HW_FEATURE_END
-};
-
-__attribute__((unused)) static const enum base_hw_feature base_hw_features_tMIx[] = {
-	BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_END
-};
-
-__attribute__((unused)) static const enum base_hw_feature base_hw_features_tHEx[] = {
-	BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION,
-	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_END
-};
-
-__attribute__((unused)) static const enum base_hw_feature base_hw_features_tSIx[] = {
-	BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION,
-	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_END
-};
-
-__attribute__((unused)) static const enum base_hw_feature base_hw_features_tDVx[] = {
-	BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION,
-	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_END
-};
-
-__attribute__((unused)) static const enum base_hw_feature base_hw_features_tNOx[] = {
-	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,   BASE_HW_FEATURE_FLUSH_REDUCTION,
-	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_TLS_HASHING,
-	BASE_HW_FEATURE_IDVS_GROUP_SIZE,      BASE_HW_FEATURE_END
-};
-
-__attribute__((unused)) static const enum base_hw_feature base_hw_features_tGOx[] = {
-	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,   BASE_HW_FEATURE_FLUSH_REDUCTION,
-	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_TLS_HASHING,
-	BASE_HW_FEATURE_IDVS_GROUP_SIZE,      BASE_HW_FEATURE_CORE_FEATURES,
-	BASE_HW_FEATURE_THREAD_TLS_ALLOC,     BASE_HW_FEATURE_END
-};
-
-__attribute__((unused)) static const enum base_hw_feature base_hw_features_tTRx[] = {
-	BASE_HW_FEATURE_FLUSH_REDUCTION,	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
-	BASE_HW_FEATURE_IDVS_GROUP_SIZE,	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
-	BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, BASE_HW_FEATURE_END
-};
-
-__attribute__((unused)) static const enum base_hw_feature base_hw_features_tNAx[] = {
-	BASE_HW_FEATURE_FLUSH_REDUCTION,	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
-	BASE_HW_FEATURE_IDVS_GROUP_SIZE,	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
-	BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, BASE_HW_FEATURE_END
-};
-
-__attribute__((unused)) static const enum base_hw_feature base_hw_features_tBEx[] = {
-	BASE_HW_FEATURE_FLUSH_REDUCTION,
-	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
-	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
-	BASE_HW_FEATURE_L2_CONFIG,
-	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
-	BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
-	BASE_HW_FEATURE_END
-};
-
-__attribute__((unused)) static const enum base_hw_feature base_hw_features_tBAx[] = {
-	BASE_HW_FEATURE_FLUSH_REDUCTION,
-	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
-	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
-	BASE_HW_FEATURE_L2_CONFIG,
-	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
-	BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
-	BASE_HW_FEATURE_END
-};
-
-__attribute__((unused)) static const enum base_hw_feature base_hw_features_tODx[] = {
-	BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
-	BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, BASE_HW_FEATURE_END
-};
-
-__attribute__((unused)) static const enum base_hw_feature base_hw_features_tGRx[] = {
-	BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
-	BASE_HW_FEATURE_L2_CONFIG,	 BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
-	BASE_HW_FEATURE_CORE_FEATURES,	 BASE_HW_FEATURE_END
-};
-
-__attribute__((unused)) static const enum base_hw_feature base_hw_features_tVAx[] = {
-	BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
-	BASE_HW_FEATURE_L2_CONFIG,	 BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
-	BASE_HW_FEATURE_CORE_FEATURES,	 BASE_HW_FEATURE_END
-};
-
-__attribute__((unused)) static const enum base_hw_feature base_hw_features_tTUx[] = {
-	BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
-	BASE_HW_FEATURE_L2_CONFIG,	 BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
-	BASE_HW_FEATURE_L2_SLICE_HASH,	 BASE_HW_FEATURE_GPU_SLEEP,
-	BASE_HW_FEATURE_CORE_FEATURES,	 BASE_HW_FEATURE_END
-};
-
-__attribute__((unused)) static const enum base_hw_feature base_hw_features_tTIx[] = {
-	BASE_HW_FEATURE_FLUSH_REDUCTION,
-	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
-	BASE_HW_FEATURE_L2_CONFIG,
-	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
-	BASE_HW_FEATURE_L2_SLICE_HASH,
-	BASE_HW_FEATURE_GPU_SLEEP,
-	BASE_HW_FEATURE_CORE_FEATURES,
-	BASE_HW_FEATURE_PBHA_HWU,
-	BASE_HW_FEATURE_END
-};
-
-__attribute__((unused)) static const enum base_hw_feature base_hw_features_tKRx[] = {
-	BASE_HW_FEATURE_FLUSH_REDUCTION,  BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
-	BASE_HW_FEATURE_L2_CONFIG,	  BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
-	BASE_HW_FEATURE_L2_SLICE_HASH,	  BASE_HW_FEATURE_GPU_SLEEP,
-	BASE_HW_FEATURE_CORE_FEATURES,	  BASE_HW_FEATURE_PBHA_HWU,
-	BASE_HW_FEATURE_LARGE_PAGE_ALLOC, BASE_HW_FEATURE_END
-};
-
-
-#endif /* _BASE_HWCONFIG_FEATURES_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h b/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h
deleted file mode 100644
index 4426bd743b4e..000000000000
--- a/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h
+++ /dev/null
@@ -1,618 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- *
- * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
- * please update base/tools/hwconfig_generator/hwc_{issues,features}.py
- * For more information see base/tools/docs/hwconfig_generator.md
- */
-
-#ifndef _BASE_HWCONFIG_ISSUES_H_
-#define _BASE_HWCONFIG_ISSUES_H_
-
-enum base_hw_issue {
-	BASE_HW_ISSUE_5736,
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_10682,
-	BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_T76X_3953,
-	BASE_HW_ISSUE_TMIX_7891,
-	BASE_HW_ISSUE_TMIX_7940,
-	BASE_HW_ISSUE_TMIX_8042,
-	BASE_HW_ISSUE_TMIX_8133,
-	BASE_HW_ISSUE_TMIX_8138,
-	BASE_HW_ISSUE_TMIX_8206,
-	BASE_HW_ISSUE_TMIX_8343,
-	BASE_HW_ISSUE_TMIX_8463,
-	BASE_HW_ISSUE_TMIX_8456,
-	BASE_HW_ISSUE_TSIX_1116,
-	BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TMIX_8438,
-	BASE_HW_ISSUE_TNOX_1194,
-	BASE_HW_ISSUE_TGOX_R1_1234,
-	BASE_HW_ISSUE_TTRX_1337,
-	BASE_HW_ISSUE_TSIX_1792,
-	BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
-	BASE_HW_ISSUE_TTRX_3076,
-	BASE_HW_ISSUE_TTRX_921,
-	BASE_HW_ISSUE_TTRX_3414,
-	BASE_HW_ISSUE_GPU2017_1336,
-	BASE_HW_ISSUE_TTRX_3083,
-	BASE_HW_ISSUE_TTRX_3470,
-	BASE_HW_ISSUE_TTRX_3464,
-	BASE_HW_ISSUE_TTRX_3485,
-	BASE_HW_ISSUE_GPU2019_3212,
-	BASE_HW_ISSUE_TURSEHW_1997,
-	BASE_HW_ISSUE_GPU2019_3878,
-	BASE_HW_ISSUE_TURSEHW_2716,
-	BASE_HW_ISSUE_GPU2019_3901,
-	BASE_HW_ISSUE_GPU2021PRO_290,
-	BASE_HW_ISSUE_TITANHW_2710,
-	BASE_HW_ISSUE_TITANHW_2679,
-	BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_TITANHW_2922,
-	BASE_HW_ISSUE_TITANHW_2952,
-	BASE_HW_ISSUE_KRAKEHW_2151,
-	BASE_HW_ISSUE_TITANHW_2938,
-	BASE_HW_ISSUE_KRAKEHW_2269,
-	BASE_HW_ISSUE_END
-};
-
-__attribute__((
-	unused)) static const enum base_hw_issue base_hw_issues_generic[] = { BASE_HW_ISSUE_END };
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = {
-	BASE_HW_ISSUE_9435,	    BASE_HW_ISSUE_10682,	BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_T76X_3953,    BASE_HW_ISSUE_TMIX_7891,	BASE_HW_ISSUE_TMIX_8042,
-	BASE_HW_ISSUE_TMIX_8133,    BASE_HW_ISSUE_TMIX_8138,	BASE_HW_ISSUE_TMIX_8206,
-	BASE_HW_ISSUE_TMIX_8343,    BASE_HW_ISSUE_TMIX_8463,	BASE_HW_ISSUE_TMIX_8456,
-	BASE_HW_ISSUE_TMIX_8438,    BASE_HW_ISSUE_TSIX_2033,	BASE_HW_ISSUE_TTRX_921,
-	BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = {
-	BASE_HW_ISSUE_9435,	    BASE_HW_ISSUE_10682,	BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_TMIX_7891,    BASE_HW_ISSUE_TMIX_7940,	BASE_HW_ISSUE_TMIX_8042,
-	BASE_HW_ISSUE_TMIX_8133,    BASE_HW_ISSUE_TMIX_8138,	BASE_HW_ISSUE_TMIX_8206,
-	BASE_HW_ISSUE_TMIX_8343,    BASE_HW_ISSUE_TMIX_8463,	BASE_HW_ISSUE_TMIX_8456,
-	BASE_HW_ISSUE_TMIX_8438,    BASE_HW_ISSUE_TSIX_2033,	BASE_HW_ISSUE_TTRX_921,
-	BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p1[] = {
-	BASE_HW_ISSUE_9435,	    BASE_HW_ISSUE_10682,	BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_TMIX_7891,    BASE_HW_ISSUE_TMIX_7940,	BASE_HW_ISSUE_TMIX_8042,
-	BASE_HW_ISSUE_TMIX_8133,    BASE_HW_ISSUE_TMIX_8138,	BASE_HW_ISSUE_TMIX_8206,
-	BASE_HW_ISSUE_TMIX_8343,    BASE_HW_ISSUE_TMIX_8463,	BASE_HW_ISSUE_TMIX_8456,
-	BASE_HW_ISSUE_TMIX_8438,    BASE_HW_ISSUE_TSIX_2033,	BASE_HW_ISSUE_TTRX_921,
-	BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tMIx[] = {
-	BASE_HW_ISSUE_5736,	      BASE_HW_ISSUE_9435,	  BASE_HW_ISSUE_TMIX_7891,
-	BASE_HW_ISSUE_TMIX_7940,      BASE_HW_ISSUE_TMIX_8042,	  BASE_HW_ISSUE_TMIX_8133,
-	BASE_HW_ISSUE_TMIX_8138,      BASE_HW_ISSUE_TMIX_8206,	  BASE_HW_ISSUE_TMIX_8343,
-	BASE_HW_ISSUE_TMIX_8456,      BASE_HW_ISSUE_TSIX_2033,	  BASE_HW_ISSUE_TITANHW_2710,
-	BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = {
-	BASE_HW_ISSUE_9435,	    BASE_HW_ISSUE_10682,	  BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_TMIX_7891,    BASE_HW_ISSUE_TMIX_8042,	  BASE_HW_ISSUE_TMIX_8133,
-	BASE_HW_ISSUE_TSIX_2033,    BASE_HW_ISSUE_TTRX_921,	  BASE_HW_ISSUE_GPU2017_1336,
-	BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938,
-	BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p1[] = {
-	BASE_HW_ISSUE_9435,	    BASE_HW_ISSUE_10682,	  BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_TMIX_7891,    BASE_HW_ISSUE_TMIX_8042,	  BASE_HW_ISSUE_TMIX_8133,
-	BASE_HW_ISSUE_TSIX_2033,    BASE_HW_ISSUE_TTRX_921,	  BASE_HW_ISSUE_GPU2017_1336,
-	BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938,
-	BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p2[] = {
-	BASE_HW_ISSUE_9435,	    BASE_HW_ISSUE_10682,	  BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_TMIX_7891,    BASE_HW_ISSUE_TMIX_8042,	  BASE_HW_ISSUE_TMIX_8133,
-	BASE_HW_ISSUE_TSIX_2033,    BASE_HW_ISSUE_TTRX_921,	  BASE_HW_ISSUE_GPU2017_1336,
-	BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938,
-	BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p3[] = {
-	BASE_HW_ISSUE_9435,	      BASE_HW_ISSUE_10682,	  BASE_HW_ISSUE_TMIX_7891,
-	BASE_HW_ISSUE_TMIX_8042,      BASE_HW_ISSUE_TMIX_8133,	  BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_921,	      BASE_HW_ISSUE_GPU2017_1336, BASE_HW_ISSUE_TITANHW_2710,
-	BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tHEx[] = {
-	BASE_HW_ISSUE_5736,	    BASE_HW_ISSUE_9435,		  BASE_HW_ISSUE_TMIX_7891,
-	BASE_HW_ISSUE_TMIX_8042,    BASE_HW_ISSUE_TMIX_8133,	  BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938,
-	BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = {
-	BASE_HW_ISSUE_9435,	    BASE_HW_ISSUE_11054,	  BASE_HW_ISSUE_TMIX_8133,
-	BASE_HW_ISSUE_TSIX_1116,    BASE_HW_ISSUE_TSIX_2033,	  BASE_HW_ISSUE_TSIX_1792,
-	BASE_HW_ISSUE_TTRX_921,	    BASE_HW_ISSUE_GPU2017_1336,	  BASE_HW_ISSUE_TTRX_3464,
-	BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938,
-	BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r0p1[] = {
-	BASE_HW_ISSUE_9435,	    BASE_HW_ISSUE_11054,	  BASE_HW_ISSUE_TMIX_8133,
-	BASE_HW_ISSUE_TSIX_1116,    BASE_HW_ISSUE_TSIX_2033,	  BASE_HW_ISSUE_TSIX_1792,
-	BASE_HW_ISSUE_TTRX_921,	    BASE_HW_ISSUE_GPU2017_1336,	  BASE_HW_ISSUE_TTRX_3464,
-	BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938,
-	BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r1p0[] = {
-	BASE_HW_ISSUE_9435,	      BASE_HW_ISSUE_11054,	  BASE_HW_ISSUE_TMIX_8133,
-	BASE_HW_ISSUE_TSIX_1116,      BASE_HW_ISSUE_TSIX_2033,	  BASE_HW_ISSUE_TTRX_921,
-	BASE_HW_ISSUE_GPU2017_1336,   BASE_HW_ISSUE_TTRX_3464,	  BASE_HW_ISSUE_TITANHW_2710,
-	BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r1p1[] = {
-	BASE_HW_ISSUE_9435,	    BASE_HW_ISSUE_TMIX_8133,	BASE_HW_ISSUE_TSIX_1116,
-	BASE_HW_ISSUE_TSIX_2033,    BASE_HW_ISSUE_TTRX_921,	BASE_HW_ISSUE_GPU2017_1336,
-	BASE_HW_ISSUE_TTRX_3464,    BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tSIx[] = {
-	BASE_HW_ISSUE_5736,	    BASE_HW_ISSUE_9435,		  BASE_HW_ISSUE_TMIX_8133,
-	BASE_HW_ISSUE_TSIX_1116,    BASE_HW_ISSUE_TSIX_2033,	  BASE_HW_ISSUE_TTRX_3464,
-	BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938,
-	BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tDVx_r0p0[] = {
-	BASE_HW_ISSUE_9435,	    BASE_HW_ISSUE_TMIX_8133,	BASE_HW_ISSUE_TSIX_1116,
-	BASE_HW_ISSUE_TSIX_2033,    BASE_HW_ISSUE_TTRX_921,	BASE_HW_ISSUE_GPU2017_1336,
-	BASE_HW_ISSUE_TTRX_3464,    BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tDVx[] = {
-	BASE_HW_ISSUE_5736,	    BASE_HW_ISSUE_9435,		  BASE_HW_ISSUE_TMIX_8133,
-	BASE_HW_ISSUE_TSIX_1116,    BASE_HW_ISSUE_TSIX_2033,	  BASE_HW_ISSUE_TTRX_3464,
-	BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938,
-	BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNOx_r0p0[] = {
-	BASE_HW_ISSUE_9435,	      BASE_HW_ISSUE_TMIX_8133,	  BASE_HW_ISSUE_TSIX_1116,
-	BASE_HW_ISSUE_TSIX_2033,      BASE_HW_ISSUE_TNOX_1194,	  BASE_HW_ISSUE_TTRX_921,
-	BASE_HW_ISSUE_GPU2017_1336,   BASE_HW_ISSUE_TTRX_3464,	  BASE_HW_ISSUE_TITANHW_2710,
-	BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tNOx[] = {
-	BASE_HW_ISSUE_5736,	    BASE_HW_ISSUE_9435,		  BASE_HW_ISSUE_TMIX_8133,
-	BASE_HW_ISSUE_TSIX_1116,    BASE_HW_ISSUE_TSIX_2033,	  BASE_HW_ISSUE_TTRX_3464,
-	BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938,
-	BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGOx_r0p0[] = {
-	BASE_HW_ISSUE_9435,	      BASE_HW_ISSUE_TMIX_8133,	  BASE_HW_ISSUE_TSIX_1116,
-	BASE_HW_ISSUE_TSIX_2033,      BASE_HW_ISSUE_TNOX_1194,	  BASE_HW_ISSUE_TTRX_921,
-	BASE_HW_ISSUE_GPU2017_1336,   BASE_HW_ISSUE_TTRX_3464,	  BASE_HW_ISSUE_TITANHW_2710,
-	BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGOx_r1p0[] = {
-	BASE_HW_ISSUE_9435,	      BASE_HW_ISSUE_TMIX_8133,	  BASE_HW_ISSUE_TSIX_1116,
-	BASE_HW_ISSUE_TSIX_2033,      BASE_HW_ISSUE_TGOX_R1_1234, BASE_HW_ISSUE_TTRX_921,
-	BASE_HW_ISSUE_GPU2017_1336,   BASE_HW_ISSUE_TTRX_3464,	  BASE_HW_ISSUE_TITANHW_2710,
-	BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGOx[] = {
-	BASE_HW_ISSUE_5736,	    BASE_HW_ISSUE_9435,		  BASE_HW_ISSUE_TMIX_8133,
-	BASE_HW_ISSUE_TSIX_1116,    BASE_HW_ISSUE_TSIX_2033,	  BASE_HW_ISSUE_TTRX_3464,
-	BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938,
-	BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = {
-	BASE_HW_ISSUE_9435,	    BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_1337,    BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
-	BASE_HW_ISSUE_TTRX_3076,    BASE_HW_ISSUE_TTRX_921,
-	BASE_HW_ISSUE_TTRX_3414,    BASE_HW_ISSUE_GPU2017_1336,
-	BASE_HW_ISSUE_TTRX_3083,    BASE_HW_ISSUE_TTRX_3470,
-	BASE_HW_ISSUE_TTRX_3464,    BASE_HW_ISSUE_TTRX_3485,
-	BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p1[] = {
-	BASE_HW_ISSUE_9435,	    BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_1337,    BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
-	BASE_HW_ISSUE_TTRX_3076,    BASE_HW_ISSUE_TTRX_921,
-	BASE_HW_ISSUE_TTRX_3414,    BASE_HW_ISSUE_GPU2017_1336,
-	BASE_HW_ISSUE_TTRX_3083,    BASE_HW_ISSUE_TTRX_3470,
-	BASE_HW_ISSUE_TTRX_3464,    BASE_HW_ISSUE_TTRX_3485,
-	BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p2[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_1337,
-	BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
-	BASE_HW_ISSUE_TTRX_3076,
-	BASE_HW_ISSUE_TTRX_921,
-	BASE_HW_ISSUE_TTRX_3414,
-	BASE_HW_ISSUE_GPU2017_1336,
-	BASE_HW_ISSUE_TTRX_3083,
-	BASE_HW_ISSUE_TTRX_3470,
-	BASE_HW_ISSUE_TTRX_3464,
-	BASE_HW_ISSUE_TITANHW_2710,
-	BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_TITANHW_2938,
-	BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTRx[] = {
-	BASE_HW_ISSUE_5736,	      BASE_HW_ISSUE_9435,	  BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_1337,      BASE_HW_ISSUE_TTRX_3414,	  BASE_HW_ISSUE_TTRX_3083,
-	BASE_HW_ISSUE_TTRX_3470,      BASE_HW_ISSUE_TTRX_3464,	  BASE_HW_ISSUE_TITANHW_2710,
-	BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNAx_r0p0[] = {
-	BASE_HW_ISSUE_9435,	    BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_1337,    BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
-	BASE_HW_ISSUE_TTRX_3076,    BASE_HW_ISSUE_TTRX_921,
-	BASE_HW_ISSUE_TTRX_3414,    BASE_HW_ISSUE_GPU2017_1336,
-	BASE_HW_ISSUE_TTRX_3083,    BASE_HW_ISSUE_TTRX_3470,
-	BASE_HW_ISSUE_TTRX_3464,    BASE_HW_ISSUE_TTRX_3485,
-	BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNAx_r0p1[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_1337,
-	BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
-	BASE_HW_ISSUE_TTRX_3076,
-	BASE_HW_ISSUE_TTRX_921,
-	BASE_HW_ISSUE_TTRX_3414,
-	BASE_HW_ISSUE_GPU2017_1336,
-	BASE_HW_ISSUE_TTRX_3083,
-	BASE_HW_ISSUE_TTRX_3470,
-	BASE_HW_ISSUE_TTRX_3464,
-	BASE_HW_ISSUE_TITANHW_2710,
-	BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_TITANHW_2938,
-	BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tNAx[] = {
-	BASE_HW_ISSUE_5736,	      BASE_HW_ISSUE_9435,	  BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_1337,      BASE_HW_ISSUE_TTRX_3414,	  BASE_HW_ISSUE_TTRX_3083,
-	BASE_HW_ISSUE_TTRX_3470,      BASE_HW_ISSUE_TTRX_3464,	  BASE_HW_ISSUE_TITANHW_2710,
-	BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r0p0[] = {
-	BASE_HW_ISSUE_9435,	    BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_1337,    BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
-	BASE_HW_ISSUE_TTRX_921,	    BASE_HW_ISSUE_TTRX_3414,
-	BASE_HW_ISSUE_TTRX_3083,    BASE_HW_ISSUE_TTRX_3470,
-	BASE_HW_ISSUE_TTRX_3464,    BASE_HW_ISSUE_TTRX_3485,
-	BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r0p1[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_1337,
-	BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
-	BASE_HW_ISSUE_TTRX_921,
-	BASE_HW_ISSUE_TTRX_3414,
-	BASE_HW_ISSUE_TTRX_3083,
-	BASE_HW_ISSUE_TTRX_3470,
-	BASE_HW_ISSUE_TTRX_3464,
-	BASE_HW_ISSUE_TITANHW_2710,
-	BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_TITANHW_2938,
-	BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r1p0[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_1337,
-	BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
-	BASE_HW_ISSUE_TTRX_921,
-	BASE_HW_ISSUE_TTRX_3414,
-	BASE_HW_ISSUE_TTRX_3083,
-	BASE_HW_ISSUE_TTRX_3470,
-	BASE_HW_ISSUE_TTRX_3464,
-	BASE_HW_ISSUE_TITANHW_2710,
-	BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_TITANHW_2938,
-	BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r1p1[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_1337,
-	BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
-	BASE_HW_ISSUE_TTRX_921,
-	BASE_HW_ISSUE_TTRX_3414,
-	BASE_HW_ISSUE_TTRX_3083,
-	BASE_HW_ISSUE_TTRX_3470,
-	BASE_HW_ISSUE_TTRX_3464,
-	BASE_HW_ISSUE_TITANHW_2710,
-	BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_TITANHW_2938,
-	BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tBEx[] = {
-	BASE_HW_ISSUE_5736,	      BASE_HW_ISSUE_9435,	  BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_1337,      BASE_HW_ISSUE_TTRX_3414,	  BASE_HW_ISSUE_TTRX_3083,
-	BASE_HW_ISSUE_TTRX_3470,      BASE_HW_ISSUE_TTRX_3464,	  BASE_HW_ISSUE_TITANHW_2710,
-	BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_lBEx_r1p0[] = {
-	BASE_HW_ISSUE_9435,	    BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_1337,    BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
-	BASE_HW_ISSUE_TTRX_921,	    BASE_HW_ISSUE_TTRX_3414,
-	BASE_HW_ISSUE_TTRX_3083,    BASE_HW_ISSUE_TTRX_3470,
-	BASE_HW_ISSUE_TTRX_3464,    BASE_HW_ISSUE_TTRX_3485,
-	BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_lBEx_r1p1[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_1337,
-	BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
-	BASE_HW_ISSUE_TTRX_921,
-	BASE_HW_ISSUE_TTRX_3414,
-	BASE_HW_ISSUE_TTRX_3083,
-	BASE_HW_ISSUE_TTRX_3470,
-	BASE_HW_ISSUE_TTRX_3464,
-	BASE_HW_ISSUE_TITANHW_2710,
-	BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_TITANHW_2938,
-	BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r0p0[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_1337,
-	BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
-	BASE_HW_ISSUE_TTRX_921,
-	BASE_HW_ISSUE_TTRX_3414,
-	BASE_HW_ISSUE_TTRX_3083,
-	BASE_HW_ISSUE_TTRX_3470,
-	BASE_HW_ISSUE_TTRX_3464,
-	BASE_HW_ISSUE_TITANHW_2710,
-	BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_TITANHW_2938,
-	BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r0p1[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_1337,
-	BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
-	BASE_HW_ISSUE_TTRX_921,
-	BASE_HW_ISSUE_TTRX_3414,
-	BASE_HW_ISSUE_TTRX_3083,
-	BASE_HW_ISSUE_TTRX_3470,
-	BASE_HW_ISSUE_TTRX_3464,
-	BASE_HW_ISSUE_TITANHW_2710,
-	BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_TITANHW_2938,
-	BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r0p2[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_1337,
-	BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
-	BASE_HW_ISSUE_TTRX_921,
-	BASE_HW_ISSUE_TTRX_3414,
-	BASE_HW_ISSUE_TTRX_3083,
-	BASE_HW_ISSUE_TTRX_3470,
-	BASE_HW_ISSUE_TTRX_3464,
-	BASE_HW_ISSUE_TITANHW_2710,
-	BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_TITANHW_2938,
-	BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r1p0[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_1337,
-	BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
-	BASE_HW_ISSUE_TTRX_921,
-	BASE_HW_ISSUE_TTRX_3414,
-	BASE_HW_ISSUE_TTRX_3083,
-	BASE_HW_ISSUE_TTRX_3470,
-	BASE_HW_ISSUE_TTRX_3464,
-	BASE_HW_ISSUE_TITANHW_2710,
-	BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_TITANHW_2938,
-	BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tBAx[] = {
-	BASE_HW_ISSUE_5736,	      BASE_HW_ISSUE_9435,	  BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_1337,      BASE_HW_ISSUE_TTRX_3414,	  BASE_HW_ISSUE_TTRX_3083,
-	BASE_HW_ISSUE_TTRX_3470,      BASE_HW_ISSUE_TTRX_3464,	  BASE_HW_ISSUE_TITANHW_2710,
-	BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = {
-	BASE_HW_ISSUE_TSIX_2033,      BASE_HW_ISSUE_TTRX_1337,	  BASE_HW_ISSUE_GPU2019_3212,
-	BASE_HW_ISSUE_GPU2019_3878,   BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_TITANHW_2710,
-	BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tODx[] = {
-	BASE_HW_ISSUE_TSIX_2033,      BASE_HW_ISSUE_TTRX_1337,	  BASE_HW_ISSUE_GPU2019_3212,
-	BASE_HW_ISSUE_GPU2019_3878,   BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_TITANHW_2710,
-	BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGRx_r0p0[] = {
-	BASE_HW_ISSUE_TSIX_2033,    BASE_HW_ISSUE_TTRX_1337,	BASE_HW_ISSUE_GPU2019_3878,
-	BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGRx[] = {
-	BASE_HW_ISSUE_TSIX_2033,    BASE_HW_ISSUE_TTRX_1337,	BASE_HW_ISSUE_GPU2019_3878,
-	BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = {
-	BASE_HW_ISSUE_TSIX_2033,    BASE_HW_ISSUE_TTRX_1337,	BASE_HW_ISSUE_GPU2019_3878,
-	BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tVAx[] = {
-	BASE_HW_ISSUE_TSIX_2033,    BASE_HW_ISSUE_TTRX_1337,	BASE_HW_ISSUE_GPU2019_3878,
-	BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = {
-	BASE_HW_ISSUE_TSIX_2033,      BASE_HW_ISSUE_TTRX_1337,	  BASE_HW_ISSUE_TURSEHW_1997,
-	BASE_HW_ISSUE_GPU2019_3878,   BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901,
-	BASE_HW_ISSUE_GPU2021PRO_290, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679,
-	BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p1[] = {
-	BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_1337,
-	BASE_HW_ISSUE_TURSEHW_1997,
-	BASE_HW_ISSUE_GPU2019_3878,
-	BASE_HW_ISSUE_TURSEHW_2716,
-	BASE_HW_ISSUE_GPU2019_3901,
-	BASE_HW_ISSUE_GPU2021PRO_290,
-	BASE_HW_ISSUE_TITANHW_2710,
-	BASE_HW_ISSUE_TITANHW_2679,
-	BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_TITANHW_2922,
-	BASE_HW_ISSUE_TITANHW_2938,
-	BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTUx[] = {
-	BASE_HW_ISSUE_TSIX_2033,    BASE_HW_ISSUE_TTRX_1337,	BASE_HW_ISSUE_GPU2019_3878,
-	BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_GPU2021PRO_290,
-	BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p0[] = {
-	BASE_HW_ISSUE_TSIX_2033,    BASE_HW_ISSUE_TTRX_1337,	BASE_HW_ISSUE_GPU2019_3878,
-	BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_GPU2021PRO_290,
-	BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p1[] = {
-	BASE_HW_ISSUE_TSIX_2033,    BASE_HW_ISSUE_TTRX_1337,	BASE_HW_ISSUE_GPU2019_3878,
-	BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_GPU2021PRO_290,
-	BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p2[] = {
-	BASE_HW_ISSUE_TSIX_2033,    BASE_HW_ISSUE_TTRX_1337,	BASE_HW_ISSUE_GPU2019_3878,
-	BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_GPU2021PRO_290,
-	BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p3[] = {
-	BASE_HW_ISSUE_TSIX_2033,    BASE_HW_ISSUE_TTRX_1337,	BASE_HW_ISSUE_GPU2019_3878,
-	BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_GPU2021PRO_290,
-	BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679, BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTIx[] = {
-	BASE_HW_ISSUE_TSIX_2033,      BASE_HW_ISSUE_TTRX_1337,	  BASE_HW_ISSUE_TURSEHW_2716,
-	BASE_HW_ISSUE_GPU2021PRO_290, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679,
-	BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_TITANHW_2952,
-	BASE_HW_ISSUE_TITANHW_2938,   BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTIx_r0p0[] = {
-	BASE_HW_ISSUE_TSIX_2033,      BASE_HW_ISSUE_TTRX_1337,	  BASE_HW_ISSUE_TURSEHW_2716,
-	BASE_HW_ISSUE_GPU2021PRO_290, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679,
-	BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2922, BASE_HW_ISSUE_TITANHW_2952,
-	BASE_HW_ISSUE_TITANHW_2938,   BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTIx_r0p1[] = {
-	BASE_HW_ISSUE_TSIX_2033,      BASE_HW_ISSUE_TTRX_1337,	  BASE_HW_ISSUE_TURSEHW_2716,
-	BASE_HW_ISSUE_GPU2021PRO_290, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_TITANHW_2679,
-	BASE_HW_ISSUE_GPU2022PRO_148, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tKRx_r0p0[] = {
-	BASE_HW_ISSUE_TTRX_1337,    BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_KRAKEHW_2151, BASE_HW_ISSUE_KRAKEHW_2269, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tKRx_r0p1[] = {
-	BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_KRAKEHW_2269, BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tKRx[] = {
-	BASE_HW_ISSUE_TTRX_1337,    BASE_HW_ISSUE_TURSEHW_2716, BASE_HW_ISSUE_GPU2022PRO_148,
-	BASE_HW_ISSUE_KRAKEHW_2151, BASE_HW_ISSUE_KRAKEHW_2269, BASE_HW_ISSUE_END
-};
-
-
-#endif /* _BASE_HWCONFIG_ISSUES_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_csffw.bin b/drivers/gpu/arm/bifrost/mali_csffw.bin
index 1f8413ba14d7..4319d6b74e3b 100644
Binary files a/drivers/gpu/arm/bifrost/mali_csffw.bin and b/drivers/gpu/arm/bifrost/mali_csffw.bin differ
diff --git a/drivers/gpu/arm/bifrost/mali_kbase.h b/drivers/gpu/arm/bifrost/mali_kbase.h
index 498d53f15f9e..4d845ea08adb 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -204,22 +204,24 @@ int kbase_protected_mode_init(struct kbase_device *kbdev);
 void kbase_protected_mode_term(struct kbase_device *kbdev);
 
 /**
- * kbase_device_pm_init() - Performs power management initialization and
- * Verifies device tree configurations.
+ * kbase_device_backend_init() - Performs backend initialization and performs
+ * devicetree validation.
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
  *
  * Return: 0 if successful, otherwise a standard Linux error code
+ * If -EPERM is returned, it means the device backend is not supported, but
+ * device initialization can continue.
  */
-int kbase_device_pm_init(struct kbase_device *kbdev);
+int kbase_device_backend_init(struct kbase_device *kbdev);
 
 /**
- * kbase_device_pm_term() - Performs power management deinitialization and
- * Free resources.
+ * kbase_device_backend_term() - Performs backend deinitialization and free
+ * resources.
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
  *
  * Clean up all the resources
  */
-void kbase_device_pm_term(struct kbase_device *kbdev);
+void kbase_device_backend_term(struct kbase_device *kbdev);
 
 int power_control_init(struct kbase_device *kbdev);
 void power_control_term(struct kbase_device *kbdev);
@@ -343,21 +345,6 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done);
 void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
 					      struct kbase_jd_atom *katom);
 
-/**
- * kbase_job_slot_softstop_start_rp() - Soft-stop the atom at the start
- *                                      of a renderpass.
- * @kctx: Pointer to a kernel base context.
- * @reg:  Reference of a growable GPU memory region in the same context.
- *        Takes ownership of the reference if successful.
- *
- * Used to switch to incremental rendering if we have nearly run out of
- * virtual address space in a growable memory region and the atom currently
- * executing on a job slot is the tiler job chain at the start of a renderpass.
- *
- * Return: 0 if successful, otherwise a negative error code.
- */
-int kbase_job_slot_softstop_start_rp(struct kbase_context *kctx, struct kbase_va_region *reg);
-
 /**
  * kbase_job_slot_softstop - Soft-stop the specified job slot
  *
@@ -494,9 +481,7 @@ void kbasep_as_do_poke(struct work_struct *work);
  * or a dmb was executed recently (to ensure the value is most up-to-date).
  * However, without a lock the value could change afterwards.
  *
- * Return:
- * * false if a suspend is not in progress
- * * !=false otherwise
+ * Return: False if a suspend is not in progress, true otherwise,
  */
 static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev)
 {
@@ -519,21 +504,20 @@ static inline bool kbase_pm_is_resuming(struct kbase_device *kbdev)
 	return kbdev->pm.resuming;
 }
 
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
 /*
  * Check whether a gpu lost is in progress
  *
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
  *
  * Indicates whether a gpu lost has been received and jobs are no longer
- * being scheduled
+ * being scheduled.
  *
- * Return: false if gpu is lost
- * Return: != false otherwise
+ * Return: false if GPU is already lost or if no Arbiter is present (as GPU will
+ *         always be present in this case), true otherwise.
  */
 static inline bool kbase_pm_is_gpu_lost(struct kbase_device *kbdev)
 {
-	return (atomic_read(&kbdev->pm.gpu_lost) == 0 ? false : true);
+	return (kbdev->arb.arb_if && ((bool)atomic_read(&kbdev->pm.gpu_lost)));
 }
 
 /*
@@ -554,7 +538,6 @@ static inline void kbase_pm_set_gpu_lost(struct kbase_device *kbdev, bool gpu_lo
 	if (new_val != cur_val)
 		KBASE_KTRACE_ADD(kbdev, ARB_GPU_LOST, NULL, (u64)new_val);
 }
-#endif
 
 /**
  * kbase_pm_is_active - Determine whether the GPU is active
@@ -812,108 +795,8 @@ bool kbasep_adjust_prioritized_process(struct kbase_device *kbdev, bool add, uin
 #define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
 #endif
 
-/**
- * kbase_file_fops_count() - Get the kfile::fops_count value
- *
- * @kfile: Pointer to the object representing the mali device file.
- *
- * The value is read with kfile::lock held.
- *
- * Return: sampled value of kfile::fops_count.
- */
-static inline int kbase_file_fops_count(struct kbase_file *kfile)
-{
-	int fops_count;
-
-	spin_lock(&kfile->lock);
-	fops_count = kfile->fops_count;
-	spin_unlock(&kfile->lock);
-
-	return fops_count;
-}
-
-/**
- * kbase_file_inc_fops_count_unless_closed() - Increment the kfile::fops_count value if the
- *                                             kfile::owner is still set.
- *
- * @kfile: Pointer to the object representing the /dev/malixx device file instance.
- *
- * Return: true if the increment was done otherwise false.
- */
-static inline bool kbase_file_inc_fops_count_unless_closed(struct kbase_file *kfile)
-{
-	bool count_incremented = false;
-
-	spin_lock(&kfile->lock);
-	if (kfile->owner) {
-		kfile->fops_count++;
-		count_incremented = true;
-	}
-	spin_unlock(&kfile->lock);
-
-	return count_incremented;
-}
-
-/**
- * kbase_file_dec_fops_count() - Decrement the kfile::fops_count value
- *
- * @kfile: Pointer to the object representing the /dev/malixx device file instance.
- *
- * This function shall only be called to decrement kfile::fops_count if a successful call
- * to kbase_file_inc_fops_count_unless_closed() was made previously by the current thread.
- *
- * The function would enqueue the kfile::destroy_kctx_work if the process that originally
- * created the file instance has closed its copy and no Kbase handled file operations are
- * in progress and no memory mappings are present for the file instance.
- */
-static inline void kbase_file_dec_fops_count(struct kbase_file *kfile)
-{
-	spin_lock(&kfile->lock);
-	WARN_ON_ONCE(kfile->fops_count <= 0);
-	kfile->fops_count--;
-	if (unlikely(!kfile->fops_count && !kfile->owner && !kfile->map_count)) {
-		queue_work(system_wq, &kfile->destroy_kctx_work);
-#if IS_ENABLED(CONFIG_DEBUG_FS)
-		wake_up(&kfile->zero_fops_count_wait);
+#if !defined(UINT32_MAX)
+#define UINT32_MAX ((uint32_t)0xFFFFFFFFU)
 #endif
-	}
-	spin_unlock(&kfile->lock);
-}
-
-/**
- * kbase_file_inc_cpu_mapping_count() - Increment the kfile::map_count value.
- *
- * @kfile: Pointer to the object representing the /dev/malixx device file instance.
- *
- * This function shall be called when the memory mapping on /dev/malixx device file
- * instance is created. The kbase_file::setup_state shall be KBASE_FILE_COMPLETE.
- */
-static inline void kbase_file_inc_cpu_mapping_count(struct kbase_file *kfile)
-{
-	spin_lock(&kfile->lock);
-	kfile->map_count++;
-	spin_unlock(&kfile->lock);
-}
-
-/**
- * kbase_file_dec_cpu_mapping_count() - Decrement the kfile::map_count value
- *
- * @kfile: Pointer to the object representing the /dev/malixx device file instance.
- *
- * This function is called to decrement kfile::map_count value when the memory mapping
- * on /dev/malixx device file is closed.
- * The function would enqueue the kfile::destroy_kctx_work if the process that originally
- * created the file instance has closed its copy and there are no mappings present and no
- * Kbase handled file operations are in progress for the file instance.
- */
-static inline void kbase_file_dec_cpu_mapping_count(struct kbase_file *kfile)
-{
-	spin_lock(&kfile->lock);
-	WARN_ON_ONCE(kfile->map_count <= 0);
-	kfile->map_count--;
-	if (unlikely(!kfile->map_count && !kfile->owner && !kfile->fops_count))
-		queue_work(system_wq, &kfile->destroy_kctx_work);
-	spin_unlock(&kfile->lock);
-}
 
 #endif
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_caps.h b/drivers/gpu/arm/bifrost/mali_kbase_caps.h
index a92569d31f06..000e30e1ed84 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_caps.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_caps.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -33,15 +33,40 @@
  *
  * @MALI_KBASE_CAP_SYSTEM_MONITOR: System Monitor
  * @MALI_KBASE_CAP_JIT_PRESSURE_LIMIT: JIT Pressure limit
- * @MALI_KBASE_CAP_MEM_GROW_ON_GPF: Memory grow on page fault
- * @MALI_KBASE_CAP_MEM_PROTECTED: Protected memory
+ * @MALI_KBASE_CAP_QUERY_MEM_DONT_NEED: BASE_MEM_DONT_NEED is queryable
+ * @MALI_KBASE_CAP_QUERY_MEM_GROW_ON_GPF: BASE_MEM_GROW_ON_GPF is queryable
+ * @MALI_KBASE_CAP_QUERY_MEM_PROTECTED: BASE_MEM_PROTECTED is queryable
+ * @MALI_KBASE_CAP_QUERY_MEM_IMPORT_SYNC_ON_MAP_UNMAP: BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP is
+ *                                                     queryable
+ * @MALI_KBASE_CAP_QUERY_MEM_KERNEL_SYNC: BASE_MEM_KERNEL_SYNC is queryable
+ * @MALI_KBASE_CAP_QUERY_MEM_SAME_VA: BASE_MEM_SAME_VA is queryable
+ * @MALI_KBASE_CAP_REJECT_ALLOC_MEM_DONT_NEED: BASE_MEM_DONT_NEED is not allocatable
+ * @MALI_KBASE_CAP_REJECT_ALLOC_MEM_PROTECTED_IN_UNPROTECTED_ALLOCS: BASE_MEM_PROTECTED is not
+ *                                                                   allocatable in functions other
+ *                                                                   than base_mem_protected
+ * @MALI_KBASE_CAP_REJECT_ALLOC_MEM_UNUSED_BIT_8: BASE_MEM_UNUSED_BIT_8 is not allocatable
+ * @MALI_KBASE_CAP_REJECT_ALLOC_MEM_UNUSED_BIT_19: BASE_MEM_UNUSED_BIT_19 is not allocatable
+ * @MALI_KBASE_CAP_REJECT_ALLOC_MEM_UNUSED_BIT_20: BASE_MEM_UNUSED_BIT_20 is not allocatable
+ * @MALI_KBASE_CAP_REJECT_ALLOC_MEM_UNUSED_BIT_27: BASE_MEM_UNUSED_BIT_27 is not allocatable
  * @MALI_KBASE_NUM_CAPS: Delimiter
+ *
+ * New enumerator must not be negative and smaller than @MALI_KBASE_NUM_CAPS.
  */
 enum mali_kbase_cap {
 	MALI_KBASE_CAP_SYSTEM_MONITOR = 0,
 	MALI_KBASE_CAP_JIT_PRESSURE_LIMIT,
-	MALI_KBASE_CAP_MEM_GROW_ON_GPF,
-	MALI_KBASE_CAP_MEM_PROTECTED,
+	MALI_KBASE_CAP_QUERY_MEM_DONT_NEED,
+	MALI_KBASE_CAP_QUERY_MEM_GROW_ON_GPF,
+	MALI_KBASE_CAP_QUERY_MEM_PROTECTED,
+	MALI_KBASE_CAP_QUERY_MEM_IMPORT_SYNC_ON_MAP_UNMAP,
+	MALI_KBASE_CAP_QUERY_MEM_KERNEL_SYNC,
+	MALI_KBASE_CAP_QUERY_MEM_SAME_VA,
+	MALI_KBASE_CAP_REJECT_ALLOC_MEM_DONT_NEED,
+	MALI_KBASE_CAP_REJECT_ALLOC_MEM_PROTECTED_IN_UNPROTECTED_ALLOCS,
+	MALI_KBASE_CAP_REJECT_ALLOC_MEM_UNUSED_BIT_8,
+	MALI_KBASE_CAP_REJECT_ALLOC_MEM_UNUSED_BIT_19,
+	MALI_KBASE_CAP_REJECT_ALLOC_MEM_UNUSED_BIT_20,
+	MALI_KBASE_CAP_REJECT_ALLOC_MEM_UNUSED_BIT_27,
 	MALI_KBASE_NUM_CAPS
 };
 
@@ -57,14 +82,67 @@ static inline bool mali_kbase_supports_jit_pressure_limit(unsigned long api_vers
 	return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_JIT_PRESSURE_LIMIT);
 }
 
-static inline bool mali_kbase_supports_mem_grow_on_gpf(unsigned long api_version)
+static inline bool mali_kbase_supports_query_mem_dont_need(unsigned long api_version)
 {
-	return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_GROW_ON_GPF);
+	return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_QUERY_MEM_DONT_NEED);
 }
 
-static inline bool mali_kbase_supports_mem_protected(unsigned long api_version)
+static inline bool mali_kbase_supports_query_mem_grow_on_gpf(unsigned long api_version)
 {
-	return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_PROTECTED);
+	return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_QUERY_MEM_GROW_ON_GPF);
+}
+
+static inline bool mali_kbase_supports_query_mem_protected(unsigned long api_version)
+{
+	return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_QUERY_MEM_PROTECTED);
+}
+
+static inline bool mali_kbase_supports_query_mem_import_sync_on_map_unmap(unsigned long api_version)
+{
+	return mali_kbase_supports_cap(api_version,
+				       MALI_KBASE_CAP_QUERY_MEM_IMPORT_SYNC_ON_MAP_UNMAP);
+}
+
+static inline bool mali_kbase_supports_query_mem_kernel_sync(unsigned long api_version)
+{
+	return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_QUERY_MEM_KERNEL_SYNC);
+}
+
+static inline bool mali_kbase_supports_query_mem_same_va(unsigned long api_version)
+{
+	return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_QUERY_MEM_SAME_VA);
+}
+
+static inline bool mali_kbase_supports_reject_alloc_mem_dont_need(unsigned long api_version)
+{
+	return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_REJECT_ALLOC_MEM_DONT_NEED);
+}
+
+static inline bool
+mali_kbase_supports_reject_alloc_mem_protected_in_unprotected_allocs(unsigned long api_version)
+{
+	return mali_kbase_supports_cap(
+		api_version, MALI_KBASE_CAP_REJECT_ALLOC_MEM_PROTECTED_IN_UNPROTECTED_ALLOCS);
+}
+
+static inline bool mali_kbase_supports_reject_alloc_mem_unused_bit_8(unsigned long api_version)
+{
+	return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_REJECT_ALLOC_MEM_UNUSED_BIT_8);
+}
+
+static inline bool mali_kbase_supports_reject_alloc_mem_unused_bit_19(unsigned long api_version)
+{
+	return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_REJECT_ALLOC_MEM_UNUSED_BIT_19);
+}
+
+static inline bool mali_kbase_supports_reject_alloc_mem_unused_bit_20(unsigned long api_version)
+{
+	return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_REJECT_ALLOC_MEM_UNUSED_BIT_20);
+}
+
+static inline bool mali_kbase_supports_reject_alloc_mem_unused_bit_27(unsigned long api_version)
+{
+	return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_REJECT_ALLOC_MEM_UNUSED_BIT_27);
 }
 
 #endif /* __KBASE_CAPS_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_config.h b/drivers/gpu/arm/bifrost/mali_kbase_config.h
index 7233e2dd3920..2f9e28aaec9a 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_config.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_config.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -166,8 +166,9 @@ struct kbase_pm_callback_conf {
 	 *
 	 * The system integrator can decide whether to either do nothing, just switch off
 	 * the clocks to the GPU, or to completely power down the GPU.
-	 * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
-	 * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+	 * The platform specific private pointer kbase_device::platform_context can be
+	 * accessed and modified in here. It is the platform \em callbacks responsibility
+	 * to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
 	 *
 	 * If runtime PM is enabled and @power_runtime_gpu_idle_callback is used
 	 * then this callback should power off the GPU (or switch off the clocks
@@ -179,15 +180,18 @@ struct kbase_pm_callback_conf {
 
 	/** Callback for when the GPU is about to become active and power must be supplied.
 	 *
-	 * This function must not return until the GPU is powered and clocked sufficiently for register access to
-	 * succeed.  The return value specifies whether the GPU was powered down since the call to power_off_callback.
-	 * If the GPU state has been lost then this function must return 1, otherwise it should return 0.
-	 * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
-	 * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+	 * This function must not return until the GPU is powered and clocked sufficiently
+	 * for register access to succeed. The return value specifies whether the GPU was
+	 * powered down since the call to power_off_callback.
+	 * If the GPU is in reset state it should return 2, if the GPU state has been lost
+	 * then this function must return 1, otherwise it should return 0.
+	 * The platform specific private pointer kbase_device::platform_context can be
+	 * accessed and modified in here. It is the platform \em callbacks responsibility
+	 * to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
 	 *
 	 * The return value of the first call to this function is ignored.
 	 *
-	 * @return 1 if the GPU state may have been lost, 0 otherwise.
+	 * @return 2 if GPU in reset state, 1 if the GPU state may have been lost, 0 otherwise.
 	 */
 	int (*power_on_callback)(struct kbase_device *kbdev);
 
@@ -223,9 +227,11 @@ struct kbase_pm_callback_conf {
 
 	/** Callback for handling runtime power management initialization.
 	 *
-	 * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
-	 * will become active from calls made to the OS from within this function.
-	 * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback.
+	 * The runtime power management callbacks @ref power_runtime_off_callback
+	 * and @ref power_runtime_on_callback will become active from calls made
+	 * to the OS from within this function.
+	 * The runtime calls can be triggered by calls from @ref power_off_callback
+	 * and @ref power_on_callback.
 	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
 	 *
 	 * @return 0 on success, else int error code.
@@ -234,8 +240,9 @@ struct kbase_pm_callback_conf {
 
 	/** Callback for handling runtime power management termination.
 	 *
-	 * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
-	 * should no longer be called by the OS on completion of this function.
+	 * The runtime power management callbacks @ref power_runtime_off_callback
+	 * and @ref power_runtime_on_callback should no longer be called by the
+	 * OS on completion of this function.
 	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
 	 */
 	void (*power_runtime_term_callback)(struct kbase_device *kbdev);
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h b/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h
index 20003c852863..7657c25d565c 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2013-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -194,9 +194,22 @@ enum {
  */
 #define CSF_CSG_SUSPEND_TIMEOUT_CYCLES (3100000000ull)
 
+/* Waiting timeout in clock cycles for GPU suspend to complete. */
+#define CSF_GPU_SUSPEND_TIMEOUT_CYCLES (CSF_CSG_SUSPEND_TIMEOUT_CYCLES)
+
 /* Waiting timeout in clock cycles for GPU reset to complete. */
 #define CSF_GPU_RESET_TIMEOUT_CYCLES (CSF_CSG_SUSPEND_TIMEOUT_CYCLES * 2)
 
+/* Waiting timeout in clock cycles for a CSG to be terminated.
+ *
+ * Based on 0.6s timeout at 100MHZ, scaled from 0.1s at 600Mhz GPU frequency
+ * which is the timeout defined in FW to wait for iterator to complete the
+ * transitioning to DISABLED state.
+ * More cycles (0.4s @ 100Mhz = 40000000) are added up to ensure that
+ * host timeout is always bigger than FW timeout.
+ */
+#define CSF_CSG_TERM_TIMEOUT_CYCLES (100000000)
+
 /* Waiting timeout in clock cycles for GPU firmware to boot.
  *
  * Based on 250ms timeout at 100MHz, scaled from a 50MHz GPU system.
@@ -213,7 +226,10 @@ enum {
  *
  * Based on 10s timeout at 100MHz, scaled from a 50MHz GPU system.
  */
-#if IS_ENABLED(CONFIG_MALI_IS_FPGA)
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP)
+/* Set a large value to avoid timing out while vector dumping */
+#define KCPU_FENCE_SIGNAL_TIMEOUT_CYCLES (250000000000ull)
+#elif IS_ENABLED(CONFIG_MALI_IS_FPGA)
 #define KCPU_FENCE_SIGNAL_TIMEOUT_CYCLES (2500000000ull)
 #else
 #define KCPU_FENCE_SIGNAL_TIMEOUT_CYCLES (1000000000ull)
@@ -239,6 +255,42 @@ enum {
  */
 #define DEFAULT_PROGRESS_TIMEOUT_CYCLES (2500000000ull)
 
+/* MIN value of iterators' suspend timeout*/
+#define CSG_SUSPEND_TIMEOUT_FIRMWARE_MS_MIN (200)
+#if CSG_SUSPEND_TIMEOUT_FIRMWARE_MS_MIN <= 0
+#error "CSG_SUSPEND_TIMEOUT_FIRMWARE_MS_MIN should be larger than 0"
+#endif
+
+/* MAX value of iterators' suspend timeout*/
+#define CSG_SUSPEND_TIMEOUT_FIRMWARE_MS_MAX (60000)
+#if CSG_SUSPEND_TIMEOUT_FIRMWARE_MS_MAX >= (0xFFFFFFFF)
+#error "CSG_SUSPEND_TIMEOUT_FIRMWARE_MS_MAX should be less than U32_MAX"
+#endif
+
+/* Firmware iterators' suspend timeout, default 4000ms. Customer can update this by
+ * using debugfs -- csg_suspend_timeout
+ */
+#if IS_ENABLED(CONFIG_MALI_REAL_HW) && !IS_ENABLED(CONFIG_MALI_IS_FPGA)
+#define CSG_SUSPEND_TIMEOUT_FIRMWARE_MS (4000)
+#else
+#define CSG_SUSPEND_TIMEOUT_FIRMWARE_MS (31000)
+#endif
+#if (CSG_SUSPEND_TIMEOUT_FIRMWARE_MS < CSG_SUSPEND_TIMEOUT_FIRMWARE_MS_MIN) || \
+	(CSG_SUSPEND_TIMEOUT_FIRMWARE_MS > CSG_SUSPEND_TIMEOUT_FIRMWARE_MS_MAX)
+#error "CSG_SUSPEND_TIMEOUT_FIRMWARE_MS is out of range"
+#endif
+
+/* Additional time in milliseconds added to the firmware iterators' suspend timeout,
+ * default 100ms
+ */
+#define CSG_SUSPEND_TIMEOUT_HOST_ADDED_MS (100)
+
+/* Host side CSG suspend timeout */
+#define CSG_SUSPEND_TIMEOUT_MS (CSG_SUSPEND_TIMEOUT_FIRMWARE_MS + CSG_SUSPEND_TIMEOUT_HOST_ADDED_MS)
+
+/* MAX allowed timeout value(ms) on host side, should be less than ANR timeout */
+#define MAX_TIMEOUT_MS (4500)
+
 #else /* MALI_USE_CSF */
 
 /* A default timeout in clock cycles to be used when an invalid timeout
@@ -311,14 +363,6 @@ enum {
  */
 #define DEFAULT_PROGRESS_TIMEOUT ((u64)5 * 500 * 1024 * 1024)
 
-/* Default threshold at which to switch to incremental rendering
- *
- * Fraction of the maximum size of an allocation that grows on GPU page fault
- * that can be used up before the driver switches to incremental rendering,
- * in 256ths. 0 means disable incremental rendering.
- */
-#define DEFAULT_IR_THRESHOLD (192)
-
 /* Waiting time in clock cycles for the completion of a MMU operation.
  *
  * Ideally 1.6M GPU cycles required for the L2 cache (512KiB slice) flush.
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c b/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c
index 237a3b829be9..9f88d4f3fbc6 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -62,9 +62,7 @@
 #include "csf/mali_kbase_csf_cpu_queue.h"
 #include "csf/mali_kbase_csf_event.h"
 #endif
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
 #include "arbiter/mali_kbase_arbiter_pm.h"
-#endif
 
 #include "mali_kbase_cs_experimental.h"
 
@@ -76,6 +74,7 @@
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 #include "mali_kbase_pbha_debugfs.h"
 #endif
+#include "mali_kbase_ioctl_helpers.h"
 
 #include <linux/module.h>
 #include <linux/init.h>
@@ -106,6 +105,7 @@
 #include <linux/clk-provider.h>
 #include <linux/delay.h>
 #include <linux/log2.h>
+#include <linux/mali_hw_access.h>
 
 #include <mali_kbase_config.h>
 
@@ -152,13 +152,29 @@ static const struct mali_kbase_capability_def kbase_caps_table[MALI_KBASE_NUM_CA
 #if MALI_USE_CSF
 	{ 1, 0 }, /* SYSTEM_MONITOR */
 	{ 1, 0 }, /* JIT_PRESSURE_LIMIT */
-	{ 1, 0 }, /* MEM_GROW_ON_GPF */
-	{ 1, 0 } /* MEM_PROTECTED */
+	{ 1, 22 }, /* QUERY_MEM_DONT_NEED */
+	{ 1, 0 }, /* QUERY_MEM_GROW_ON_GPF */
+	{ 1, 0 }, /* QUERY_MEM_PROTECTED */
+	{ 1, 26 }, /* QUERY_MEM_IMPORT_SYNC_ON_MAP_UNMAP */
+	{ 1, 26 }, /* QUERY_MEM_KERNEL_SYNC */
+	{ 1, 28 }, /* QUERY_MEM_SAME_VA */
+	{ 1, 31 }, /* REJECT_ALLOC_MEM_DONT_NEED */
+	{ 1, 31 }, /* REJECT_ALLOC_MEM_PROTECTED_IN_UNPROTECTED_ALLOCS */
+	{ 1, 31 }, /* REJECT_ALLOC_MEM_UNUSED_BIT_20 */
+	{ 1, 31 } /* REJECT_ALLOC_MEM_UNUSED_BIT_27 */
 #else
 	{ 11, 15 }, /* SYSTEM_MONITOR */
 	{ 11, 25 }, /* JIT_PRESSURE_LIMIT */
-	{ 11, 2 }, /* MEM_GROW_ON_GPF */
-	{ 11, 2 } /* MEM_PROTECTED */
+	{ 11, 40 }, /* QUERY_MEM_DONT_NEED */
+	{ 11, 2 }, /* QUERY_MEM_GROW_ON_GPF */
+	{ 11, 2 }, /* QUERY_MEM_PROTECTED */
+	{ 11, 43 }, /* QUERY_MEM_IMPORT_SYNC_ON_MAP_UNMAP */
+	{ 11, 43 }, /* QUERY_MEM_KERNEL_SYNC */
+	{ 11, 44 }, /* QUERY_MEM_SAME_VA */
+	{ 11, 46 }, /* REJECT_ALLOC_MEM_DONT_NEED */
+	{ 11, 46 }, /* REJECT_ALLOC_MEM_PROTECTED_IN_UNPROTECTED_ALLOCS */
+	{ 11, 46 }, /* REJECT_ALLOC_MEM_UNUSED_BIT_8 */
+	{ 11, 46 } /* REJECT_ALLOC_MEM_UNUSED_BIT_19 */
 #endif
 };
 
@@ -167,13 +183,11 @@ static const struct mali_kbase_capability_def kbase_caps_table[MALI_KBASE_NUM_CA
 static struct mutex kbase_probe_mutex;
 #endif
 
-static void kbase_file_destroy_kctx_worker(struct work_struct *work);
-
 /**
  * mali_kbase_supports_cap - Query whether a kbase capability is supported
  *
  * @api_version: API version to convert
- * @cap:         Capability to query for - see mali_kbase_caps.h
+ * @cap:         Capability to query for - see mali_kbase_caps.h. Shouldn't be negative.
  *
  * Return: true if the capability is supported
  */
@@ -184,13 +198,10 @@ bool mali_kbase_supports_cap(unsigned long api_version, enum mali_kbase_cap cap)
 
 	struct mali_kbase_capability_def const *cap_def;
 
-	if (WARN_ON(cap < 0))
-		return false;
-
 	if (WARN_ON(cap >= MALI_KBASE_NUM_CAPS))
 		return false;
 
-	cap_def = &kbase_caps_table[(int)cap];
+	cap_def = &kbase_caps_table[cap];
 	required_ver = KBASE_API_VERSION(cap_def->required_major, cap_def->required_minor);
 	supported = (api_version >= required_ver);
 
@@ -212,7 +223,7 @@ bool mali_kbase_supports_cap(unsigned long api_version, enum mali_kbase_cap cap)
  * Return: Address of an object representing a simulated device file, or NULL
  *         on failure.
  *
- * Note: This function always gets called in Userspace context.
+ * Note: This function shall always be called in Userspace context.
  */
 static struct kbase_file *kbase_file_new(struct kbase_device *const kbdev, struct file *const filp)
 {
@@ -224,17 +235,6 @@ static struct kbase_file *kbase_file_new(struct kbase_device *const kbdev, struc
 		kfile->kctx = NULL;
 		kfile->api_version = 0;
 		atomic_set(&kfile->setup_state, KBASE_FILE_NEED_VSN);
-		/* Store the pointer to the file table structure of current process. */
-		kfile->owner = current->files;
-		INIT_WORK(&kfile->destroy_kctx_work, kbase_file_destroy_kctx_worker);
-		spin_lock_init(&kfile->lock);
-		kfile->fops_count = 0;
-		kfile->map_count = 0;
-		typecheck(typeof(kfile->map_count), typeof(current->mm->map_count));
-#if IS_ENABLED(CONFIG_DEBUG_FS)
-		init_waitqueue_head(&kfile->zero_fops_count_wait);
-#endif
-		init_waitqueue_head(&kfile->event_queue);
 	}
 	return kfile;
 }
@@ -313,33 +313,6 @@ static unsigned long kbase_file_get_api_version(struct kbase_file *const kfile)
  */
 static int kbase_file_create_kctx(struct kbase_file *kfile, base_context_create_flags flags);
 
-/**
- * kbase_file_inc_fops_count_if_allowed - Increment the kfile::fops_count value if the file
- *                                        operation is allowed for the current process.
- *
- * @kfile: Pointer to the object representing the /dev/malixx device file instance.
- *
- * The function shall be called at the beginning of certain file operation methods
- * implemented for @kbase_fops, like ioctl, poll, read and mmap.
- *
- * kbase_file_dec_fops_count() shall be called if the increment was done.
- *
- * Return: true if the increment was done otherwise false.
- *
- * Note: This function shall always be called in Userspace context.
- */
-static bool kbase_file_inc_fops_count_if_allowed(struct kbase_file *const kfile)
-{
-	/* Disallow file operations from the other process that shares the instance
-	 * of /dev/malixx file i.e. 'kfile' or disallow file operations if parent
-	 * process has closed the file instance.
-	 */
-	if (unlikely(kfile->owner != current->files))
-		return false;
-
-	return kbase_file_inc_fops_count_unless_closed(kfile);
-}
-
 /**
  * kbase_file_get_kctx_if_setup_complete - Get a kernel base context
  *                                         pointer from a device file
@@ -352,8 +325,6 @@ static bool kbase_file_inc_fops_count_if_allowed(struct kbase_file *const kfile)
  *
  * Return: Address of the kernel base context associated with the @kfile, or
  *         NULL if no context exists.
- *
- * Note: This function shall always be called in Userspace context.
  */
 static struct kbase_context *kbase_file_get_kctx_if_setup_complete(struct kbase_file *const kfile)
 {
@@ -364,103 +335,38 @@ static struct kbase_context *kbase_file_get_kctx_if_setup_complete(struct kbase_
 	return kfile->kctx;
 }
 
-/**
- * kbase_file_destroy_kctx - Destroy the Kbase context created for @kfile.
- *
- * @kfile: A device file created by kbase_file_new()
- */
-static void kbase_file_destroy_kctx(struct kbase_file *const kfile)
-{
-	if (atomic_cmpxchg(&kfile->setup_state, KBASE_FILE_COMPLETE, KBASE_FILE_DESTROY_CTX) !=
-	    KBASE_FILE_COMPLETE)
-		return;
-
-#if IS_ENABLED(CONFIG_DEBUG_FS)
-	kbasep_mem_profile_debugfs_remove(kfile->kctx);
-	kbase_context_debugfs_term(kfile->kctx);
-#endif
-
-	kbase_destroy_context(kfile->kctx);
-	dev_dbg(kfile->kbdev->dev, "Deleted kbase context");
-}
-
-/**
- * kbase_file_destroy_kctx_worker - Work item to destroy the Kbase context.
- *
- * @work: Pointer to the kfile::destroy_kctx_work.
- *
- * The work item shall only be enqueued if the context termination could not
- * be done from @kbase_flush().
- */
-static void kbase_file_destroy_kctx_worker(struct work_struct *work)
-{
-	struct kbase_file *kfile = container_of(work, struct kbase_file, destroy_kctx_work);
-
-	WARN_ON_ONCE(kfile->owner);
-	WARN_ON_ONCE(kfile->map_count);
-	WARN_ON_ONCE(kfile->fops_count);
-
-	kbase_file_destroy_kctx(kfile);
-}
-
-/**
- * kbase_file_destroy_kctx_on_flush - Try destroy the Kbase context from the flush()
- *                                    method of @kbase_fops.
- *
- * @kfile: A device file created by kbase_file_new()
- */
-static void kbase_file_destroy_kctx_on_flush(struct kbase_file *const kfile)
-{
-	bool can_destroy_context = false;
-
-	spin_lock(&kfile->lock);
-	kfile->owner = NULL;
-	/* To destroy the context from flush() method, unlike the release()
-	 * method, need to synchronize manually against the other threads in
-	 * the current process that could be operating on the /dev/malixx file.
-	 *
-	 * Only destroy the context if all the memory mappings on the
-	 * /dev/malixx file instance have been closed. If there are mappings
-	 * present then the context would be destroyed later when the last
-	 * mapping is closed.
-	 * Also, only destroy the context if no file operations are in progress.
-	 */
-	can_destroy_context = !kfile->map_count && !kfile->fops_count;
-	spin_unlock(&kfile->lock);
-
-	if (likely(can_destroy_context)) {
-		WARN_ON_ONCE(work_pending(&kfile->destroy_kctx_work));
-		kbase_file_destroy_kctx(kfile);
-	}
-}
-
 /**
  * kbase_file_delete - Destroy an object representing a device file
  *
  * @kfile: A device file created by kbase_file_new()
  *
- * If any context was created for the @kfile and is still alive, then it is destroyed.
+ * If any context was created for the @kfile then it is destroyed.
  */
 static void kbase_file_delete(struct kbase_file *const kfile)
 {
+	struct kbase_device *kbdev = NULL;
+
 	if (WARN_ON(!kfile))
 		return;
 
-	/* All the CPU mappings on the device file should have been closed */
-	WARN_ON_ONCE(kfile->map_count);
-#if IS_ENABLED(CONFIG_DEBUG_FS)
-	/* There could still be file operations due to the debugfs file (mem_view) */
-	wait_event(kfile->zero_fops_count_wait, !kbase_file_fops_count(kfile));
-#else
-	/* There shall not be any file operations in progress on the device file */
-	WARN_ON_ONCE(kfile->fops_count);
-#endif
-
 	kfile->filp->private_data = NULL;
-	cancel_work_sync(&kfile->destroy_kctx_work);
-	/* Destroy the context if it wasn't done earlier from the flush() method. */
-	kbase_file_destroy_kctx(kfile);
-	kbase_release_device(kfile->kbdev);
+	kbdev = kfile->kbdev;
+
+	if (atomic_read(&kfile->setup_state) == KBASE_FILE_COMPLETE) {
+		struct kbase_context *kctx = kfile->kctx;
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+		kbasep_mem_profile_debugfs_remove(kctx);
+#endif
+		kbase_context_debugfs_term(kctx);
+
+		kbase_destroy_context(kctx);
+
+		dev_dbg(kbdev->dev, "deleted base context\n");
+	}
+
+	kbase_release_device(kbdev);
+
 	kfree(kfile);
 }
 
@@ -585,6 +491,9 @@ int kbase_get_irqs(struct kbase_device *kbdev)
 
 	kbdev->nr_irqs = 0;
 	result = get_irqs(kbdev, pdev);
+	if (!result)
+		return result;
+
 	if (result)
 		dev_err(kbdev->dev, "Invalid or No interrupt resources");
 
@@ -736,7 +645,8 @@ static int kbase_file_create_kctx(struct kbase_file *const kfile,
 
 	kbdev = kfile->kbdev;
 
-	kctx = kbase_create_context(kbdev, in_compat_syscall(), flags, kfile->api_version, kfile);
+	kctx = kbase_create_context(kbdev, in_compat_syscall(), flags, kfile->api_version,
+				    kfile->filp);
 
 	/* if bad flags, will stay stuck in setup mode */
 	if (!kctx)
@@ -823,36 +733,6 @@ static int kbase_release(struct inode *inode, struct file *filp)
 	return 0;
 }
 
-/**
- * kbase_flush - Function implementing the flush() method of @kbase_fops.
- *
- * @filp: Pointer to the /dev/malixx device file instance.
- * @id:   Pointer to the file table structure of current process.
- *        If @filp is being shared by multiple processes then @id can differ
- *        from kfile::owner.
- *
- * This function is called everytime the copy of @filp is closed. So if 3 processes
- * are sharing the @filp then this function would be called 3 times and only after
- * that kbase_release() would get called.
- *
- * Return: 0 if successful, otherwise a negative error code.
- *
- * Note: This function always gets called in Userspace context when the
- *       file is closed.
- */
-static int kbase_flush(struct file *filp, fl_owner_t id)
-{
-	struct kbase_file *const kfile = filp->private_data;
-
-	/* Try to destroy the context if the flush() method has been called for the
-	 * process that created the instance of /dev/malixx file i.e. 'kfile'.
-	 */
-	if (kfile->owner == id)
-		kbase_file_destroy_kctx_on_flush(kfile);
-
-	return 0;
-}
-
 static int kbase_api_set_flags(struct kbase_file *kfile, struct kbase_ioctl_set_flags *flags)
 {
 	int err = 0;
@@ -946,7 +826,7 @@ static int kbase_api_mem_alloc_ex(struct kbase_context *kctx,
 				  union kbase_ioctl_mem_alloc_ex *alloc_ex)
 {
 	struct kbase_va_region *reg;
-	u64 flags = alloc_ex->in.flags;
+	base_mem_alloc_flags flags = alloc_ex->in.flags;
 	u64 gpu_va;
 
 	/* Calls to this function are inherently asynchronous, with respect to
@@ -1056,7 +936,7 @@ static int kbase_api_mem_alloc(struct kbase_context *kctx, union kbase_ioctl_mem
 static int kbase_api_mem_alloc(struct kbase_context *kctx, union kbase_ioctl_mem_alloc *alloc)
 {
 	struct kbase_va_region *reg;
-	u64 flags = alloc->in.flags;
+	base_mem_alloc_flags flags = alloc->in.flags;
 	u64 gpu_va;
 
 	/* Calls to this function are inherently asynchronous, with respect to
@@ -1182,16 +1062,6 @@ static int kbase_api_get_ddk_version(struct kbase_context *kctx,
 static int kbase_api_mem_jit_init(struct kbase_context *kctx,
 				  struct kbase_ioctl_mem_jit_init *jit_init)
 {
-	size_t i;
-
-	for (i = 0; i < sizeof(jit_init->padding); i++) {
-		/* Ensure all padding bytes are 0 for potential future
-		 * extension
-		 */
-		if (jit_init->padding[i])
-			return -EINVAL;
-	}
-
 	return kbase_region_tracker_init_jit(kctx, jit_init->va_pages, jit_init->max_allocations,
 					     jit_init->trim_level, jit_init->group_id,
 					     jit_init->phys_pages);
@@ -1257,7 +1127,7 @@ static int kbase_api_mem_commit(struct kbase_context *kctx, struct kbase_ioctl_m
 static int kbase_api_mem_alias(struct kbase_context *kctx, union kbase_ioctl_mem_alias *alias)
 {
 	struct base_mem_aliasing_info *ai;
-	u64 flags;
+	base_mem_alloc_flags flags;
 	int err;
 
 	if (alias->in.nents == 0 || alias->in.nents > BASE_MEM_ALIAS_MAX_ENTS)
@@ -1268,7 +1138,7 @@ static int kbase_api_mem_alias(struct kbase_context *kctx, union kbase_ioctl_mem
 		return -ENOMEM;
 
 	err = copy_from_user(ai, u64_to_user_ptr(alias->in.aliasing_info),
-			     sizeof(*ai) * alias->in.nents);
+			     size_mul(sizeof(*ai), alias->in.nents));
 	if (err) {
 		vfree(ai);
 		return -EFAULT;
@@ -1296,7 +1166,7 @@ static int kbase_api_mem_alias(struct kbase_context *kctx, union kbase_ioctl_mem
 static int kbase_api_mem_import(struct kbase_context *kctx, union kbase_ioctl_mem_import *import)
 {
 	int ret;
-	u64 flags = import->in.flags;
+	base_mem_alloc_flags flags = import->in.flags;
 
 	if (flags & BASEP_MEM_FLAGS_KERNEL_ONLY)
 		return -ENOMEM;
@@ -1408,15 +1278,17 @@ static int kbase_api_sticky_resource_map(struct kbase_context *kctx,
 	if (!map->count || map->count > BASE_EXT_RES_COUNT_MAX)
 		return -EOVERFLOW;
 
-	ret = copy_from_user(gpu_addr, u64_to_user_ptr(map->address), sizeof(u64) * map->count);
+	ret = copy_from_user(gpu_addr, u64_to_user_ptr(map->address),
+			     size_mul(sizeof(u64), map->count));
 
 	if (ret != 0)
 		return -EFAULT;
 
-	kbase_gpu_vm_lock(kctx);
+	down_read(kbase_mem_get_process_mmap_lock());
+	kbase_gpu_vm_lock_with_pmode_sync(kctx);
 
 	for (i = 0; i < map->count; i++) {
-		if (!kbase_sticky_resource_acquire(kctx, gpu_addr[i])) {
+		if (!kbase_sticky_resource_acquire(kctx, gpu_addr[i], current->mm)) {
 			/* Invalid resource */
 			ret = -EINVAL;
 			break;
@@ -1430,7 +1302,8 @@ static int kbase_api_sticky_resource_map(struct kbase_context *kctx,
 		}
 	}
 
-	kbase_gpu_vm_unlock(kctx);
+	kbase_gpu_vm_unlock_with_pmode_sync(kctx);
+	up_read(kbase_mem_get_process_mmap_lock());
 
 	return ret;
 }
@@ -1445,12 +1318,13 @@ static int kbase_api_sticky_resource_unmap(struct kbase_context *kctx,
 	if (!unmap->count || unmap->count > BASE_EXT_RES_COUNT_MAX)
 		return -EOVERFLOW;
 
-	ret = copy_from_user(gpu_addr, u64_to_user_ptr(unmap->address), sizeof(u64) * unmap->count);
+	ret = copy_from_user(gpu_addr, u64_to_user_ptr(unmap->address),
+			     size_mul(sizeof(u64), unmap->count));
 
 	if (ret != 0)
 		return -EFAULT;
 
-	kbase_gpu_vm_lock(kctx);
+	kbase_gpu_vm_lock_with_pmode_sync(kctx);
 
 	for (i = 0; i < unmap->count; i++) {
 		if (!kbase_sticky_resource_release_force(kctx, NULL, gpu_addr[i])) {
@@ -1459,7 +1333,7 @@ static int kbase_api_sticky_resource_unmap(struct kbase_context *kctx,
 		}
 	}
 
-	kbase_gpu_vm_unlock(kctx);
+	kbase_gpu_vm_unlock_with_pmode_sync(kctx);
 
 	return ret;
 }
@@ -1517,11 +1391,16 @@ static int kbasep_cs_queue_kick(struct kbase_context *kctx, struct kbase_ioctl_c
 	return kbase_csf_queue_kick(kctx, kick);
 }
 
+static int kbasep_queue_group_clear_faults(struct kbase_context *kctx,
+					   struct kbase_ioctl_queue_group_clear_faults *faults)
+{
+	return kbase_csf_queue_group_clear_faults(kctx, faults);
+}
+
 static int kbasep_cs_queue_group_create_1_6(struct kbase_context *kctx,
 					    union kbase_ioctl_cs_queue_group_create_1_6 *create)
 {
 	int ret;
-	size_t i;
 	union kbase_ioctl_cs_queue_group_create
 		new_create = { .in = {
 				       .tiler_mask = create->in.tiler_mask,
@@ -1534,15 +1413,7 @@ static int kbasep_cs_queue_group_create_1_6(struct kbase_context *kctx,
 				       .compute_max = create->in.compute_max,
 			       } };
 
-	for (i = 0; i < ARRAY_SIZE(create->in.padding); i++) {
-		if (create->in.padding[i] != 0) {
-			dev_warn(kctx->kbdev->dev, "Invalid padding not 0 in queue group create\n");
-			return -EINVAL;
-		}
-	}
-
 	ret = kbase_csf_queue_group_create(kctx, &new_create);
-
 	create->out.group_handle = new_create.out.group_handle;
 	create->out.group_uid = new_create.out.group_uid;
 
@@ -1553,7 +1424,6 @@ static int kbasep_cs_queue_group_create_1_18(struct kbase_context *kctx,
 					     union kbase_ioctl_cs_queue_group_create_1_18 *create)
 {
 	int ret;
-	size_t i;
 	union kbase_ioctl_cs_queue_group_create
 		new_create = { .in = {
 				       .tiler_mask = create->in.tiler_mask,
@@ -1568,15 +1438,7 @@ static int kbasep_cs_queue_group_create_1_18(struct kbase_context *kctx,
 				       .dvs_buf = create->in.dvs_buf,
 			       } };
 
-	for (i = 0; i < ARRAY_SIZE(create->in.padding); i++) {
-		if (create->in.padding[i] != 0) {
-			dev_warn(kctx->kbdev->dev, "Invalid padding not 0 in queue group create\n");
-			return -EINVAL;
-		}
-	}
-
 	ret = kbase_csf_queue_group_create(kctx, &new_create);
-
 	create->out.group_handle = new_create.out.group_handle;
 	create->out.group_uid = new_create.out.group_uid;
 
@@ -1586,6 +1448,8 @@ static int kbasep_cs_queue_group_create_1_18(struct kbase_context *kctx,
 static int kbasep_cs_queue_group_create(struct kbase_context *kctx,
 					union kbase_ioctl_cs_queue_group_create *create)
 {
+	/* create->in.reserved only present pre-TDRX configuration. */
+
 	if (create->in.reserved != 0) {
 		dev_warn(kctx->kbdev->dev, "Invalid reserved field not 0 in queue group create\n");
 		return -EINVAL;
@@ -1701,14 +1565,15 @@ static int kbase_ioctl_cs_get_glb_iface(struct kbase_context *kctx,
 			&param->out.prfcnt_size, &param->out.instr_features);
 
 		if (copy_to_user(user_groups, group_data,
-				 MIN(max_group_num, param->out.group_num) * sizeof(*group_data)))
+				 size_mul(MIN(max_group_num, param->out.group_num),
+					  sizeof(*group_data))))
 			err = -EFAULT;
 	}
 
 	if (!err)
 		if (copy_to_user(user_streams, stream_data,
-				 MIN(max_total_stream_num, param->out.total_stream_num) *
-					 sizeof(*stream_data)))
+				 size_mul(MIN(max_total_stream_num, param->out.total_stream_num),
+					  sizeof(*stream_data))))
 			err = -EFAULT;
 
 	kfree(group_data);
@@ -1732,10 +1597,6 @@ static int kbase_ioctl_read_user_page(struct kbase_context *kctx,
 	if (unlikely(user_page->in.offset != LATEST_FLUSH))
 		return -EINVAL;
 
-	/* Validating padding that must be zero */
-	if (unlikely(user_page->in.padding != 0))
-		return -EINVAL;
-
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	if (!kbdev->pm.backend.gpu_powered)
 		user_page->out.val_lo = POWER_DOWN_LATEST_FLUSH_VALUE;
@@ -1762,83 +1623,33 @@ kbasep_ioctl_context_priority_check(struct kbase_context *kctx,
 	return 0;
 }
 
-#define KBASE_HANDLE_IOCTL(cmd, function, arg)                                         \
-	do {                                                                           \
-		int ret;                                                               \
-		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_NONE);                              \
-		dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function);               \
-		ret = function(arg);                                                   \
-		dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, #function); \
-		return ret;                                                            \
-	} while (0)
-
-#define KBASE_HANDLE_IOCTL_IN(cmd, function, type, arg)                                \
-	do {                                                                           \
-		type param;                                                            \
-		int ret, err;                                                          \
-		dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function);               \
-		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_WRITE);                             \
-		BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd));                         \
-		err = copy_from_user(&param, uarg, sizeof(param));                     \
-		if (err)                                                               \
-			return -EFAULT;                                                \
-		ret = function(arg, &param);                                           \
-		dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, #function); \
-		return ret;                                                            \
-	} while (0)
-
-#define KBASE_HANDLE_IOCTL_OUT(cmd, function, type, arg)                               \
-	do {                                                                           \
-		type param;                                                            \
-		int ret, err;                                                          \
-		dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function);               \
-		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_READ);                              \
-		BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd));                         \
-		memset(&param, 0, sizeof(param));                                      \
-		ret = function(arg, &param);                                           \
-		err = copy_to_user(uarg, &param, sizeof(param));                       \
-		if (err)                                                               \
-			return -EFAULT;                                                \
-		dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, #function); \
-		return ret;                                                            \
-	} while (0)
-
-#define KBASE_HANDLE_IOCTL_INOUT(cmd, function, type, arg)                             \
-	do {                                                                           \
-		type param;                                                            \
-		int ret, err;                                                          \
-		dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function);               \
-		BUILD_BUG_ON(_IOC_DIR(cmd) != (_IOC_WRITE | _IOC_READ));               \
-		BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd));                         \
-		err = copy_from_user(&param, uarg, sizeof(param));                     \
-		if (err)                                                               \
-			return -EFAULT;                                                \
-		ret = function(arg, &param);                                           \
-		err = copy_to_user(uarg, &param, sizeof(param));                       \
-		if (err)                                                               \
-			return -EFAULT;                                                \
-		dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, #function); \
-		return ret;                                                            \
-	} while (0)
-
 static int kbasep_ioctl_set_limited_core_count(
 	struct kbase_context *kctx,
 	struct kbase_ioctl_set_limited_core_count *set_limited_core_count)
 {
 	const u64 shader_core_mask = kbase_pm_get_present_cores(kctx->kbdev, KBASE_PM_CORE_SHADER);
-	const u64 limited_core_mask = ((u64)1 << (set_limited_core_count->max_core_count)) - 1;
+	const u8 max_core_count = set_limited_core_count->max_core_count;
+	u64 limited_core_mask = 0;
 
-	if ((shader_core_mask & limited_core_mask) == 0) {
-		/* At least one shader core must be available after applying the mask */
+	/* Sanity check to avoid shift-out-of-bounds */
+	if (max_core_count > 64)
+		return -EINVAL;
+	else if (max_core_count == 64)
+		limited_core_mask = UINT64_MAX;
+	else
+		limited_core_mask = ((u64)1 << max_core_count) - 1;
+
+	/* At least one shader core must be available after applying the mask */
+	if ((shader_core_mask & limited_core_mask) == 0)
 		return -EINVAL;
-	}
 
 	kctx->limited_core_mask = limited_core_mask;
 	return 0;
 }
 
-static long kbase_kfile_ioctl(struct kbase_file *kfile, unsigned int cmd, unsigned long arg)
+static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
+	struct kbase_file *const kfile = filp->private_data;
 	struct kbase_context *kctx = NULL;
 	struct kbase_device *kbdev = kfile->kbdev;
 	void __user *uarg = (void __user *)arg;
@@ -2087,6 +1898,11 @@ static long kbase_kfile_ioctl(struct kbase_file *kfile, unsigned int cmd, unsign
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_KCPU_QUEUE_ENQUEUE, kbasep_kcpu_queue_enqueue,
 				      struct kbase_ioctl_kcpu_queue_enqueue, kctx);
 		break;
+	case KBASE_IOCTL_QUEUE_GROUP_CLEAR_FAULTS:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_QUEUE_GROUP_CLEAR_FAULTS,
+				      kbasep_queue_group_clear_faults,
+				      struct kbase_ioctl_queue_group_clear_faults, kctx);
+		break;
 	case KBASE_IOCTL_CS_TILER_HEAP_INIT:
 		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_TILER_HEAP_INIT, kbasep_cs_tiler_heap_init,
 					 union kbase_ioctl_cs_tiler_heap_init, kctx);
@@ -2137,45 +1953,22 @@ static long kbase_kfile_ioctl(struct kbase_file *kfile, unsigned int cmd, unsign
 	return -ENOIOCTLCMD;
 }
 
-static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
-{
-	struct kbase_file *const kfile = filp->private_data;
-	long ioctl_ret;
-
-	if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile)))
-		return -EPERM;
-
-	ioctl_ret = kbase_kfile_ioctl(kfile, cmd, arg);
-	kbase_file_dec_fops_count(kfile);
-
-	return ioctl_ret;
-}
-
 #if MALI_USE_CSF
 static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos)
 {
 	struct kbase_file *const kfile = filp->private_data;
-	struct kbase_context *kctx;
+	struct kbase_context *const kctx = kbase_file_get_kctx_if_setup_complete(kfile);
 	struct base_csf_notification event_data = { .type = BASE_CSF_NOTIFICATION_EVENT };
 	const size_t data_size = sizeof(event_data);
 	bool read_event = false, read_error = false;
-	ssize_t err = 0;
 
 	CSTD_UNUSED(f_pos);
 
-	if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile)))
+	if (unlikely(!kctx))
 		return -EPERM;
 
-	kctx = kbase_file_get_kctx_if_setup_complete(kfile);
-	if (unlikely(!kctx)) {
-		err = -EPERM;
-		goto out;
-	}
-
-	if (count < data_size) {
-		err = -ENOBUFS;
-		goto out;
-	}
+	if (count < data_size)
+		return -ENOBUFS;
 
 	if (atomic_read(&kctx->event_count))
 		read_event = true;
@@ -2196,41 +1989,29 @@ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, lof
 
 	if (copy_to_user(buf, &event_data, data_size) != 0) {
 		dev_warn(kctx->kbdev->dev, "Failed to copy data\n");
-		err = -EFAULT;
-		goto out;
+		return -EFAULT;
 	}
 
 	if (read_event)
 		atomic_set(&kctx->event_count, 0);
 
-out:
-	kbase_file_dec_fops_count(kfile);
-	return err ? err : (ssize_t)data_size;
+	return data_size;
 }
 #else /* MALI_USE_CSF */
 static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos)
 {
 	struct kbase_file *const kfile = filp->private_data;
-	struct kbase_context *kctx;
+	struct kbase_context *const kctx = kbase_file_get_kctx_if_setup_complete(kfile);
 	struct base_jd_event_v2 uevent;
-	size_t out_count = 0;
-	ssize_t err = 0;
+	int out_count = 0;
 
 	CSTD_UNUSED(f_pos);
 
-	if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile)))
+	if (unlikely(!kctx))
 		return -EPERM;
 
-	kctx = kbase_file_get_kctx_if_setup_complete(kfile);
-	if (unlikely(!kctx)) {
-		err = -EPERM;
-		goto out;
-	}
-
-	if (count < sizeof(uevent)) {
-		err = -ENOBUFS;
-		goto out;
-	}
+	if (count < sizeof(uevent))
+		return -ENOBUFS;
 
 	memset(&uevent, 0, sizeof(uevent));
 
@@ -2239,29 +2020,21 @@ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, lof
 			if (out_count > 0)
 				goto out;
 
-			if (filp->f_flags & O_NONBLOCK) {
-				err = -EAGAIN;
-				goto out;
-			}
+			if (filp->f_flags & O_NONBLOCK)
+				return -EAGAIN;
 
-			if (wait_event_interruptible(kfile->event_queue,
-						     kbase_event_pending(kctx)) != 0) {
-				err = -ERESTARTSYS;
-				goto out;
-			}
+			if (wait_event_interruptible(kctx->event_queue,
+						     kbase_event_pending(kctx)) != 0)
+				return -ERESTARTSYS;
 		}
 		if (uevent.event_code == BASE_JD_EVENT_DRV_TERMINATED) {
-			if (out_count == 0) {
-				err = -EPIPE;
-				goto out;
-			}
+			if (out_count == 0)
+				return -EPIPE;
 			goto out;
 		}
 
-		if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0) {
-			err = -EFAULT;
-			goto out;
-		}
+		if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0)
+			return -EFAULT;
 
 		buf += sizeof(uevent);
 		out_count++;
@@ -2269,59 +2042,40 @@ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, lof
 	} while (count >= sizeof(uevent));
 
 out:
-	kbase_file_dec_fops_count(kfile);
-	return err ? err : (ssize_t)(out_count * sizeof(uevent));
+	return out_count * sizeof(uevent);
 }
 #endif /* MALI_USE_CSF */
 
 static __poll_t kbase_poll(struct file *filp, poll_table *wait)
 {
 	struct kbase_file *const kfile = filp->private_data;
-	struct kbase_context *kctx;
-	__poll_t ret = 0;
+	struct kbase_context *const kctx = kbase_file_get_kctx_if_setup_complete(kfile);
 
-	if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile))) {
-#if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE)
-		ret = POLLNVAL;
-#else
-		ret = EPOLLNVAL;
-#endif
-		return ret;
-	}
-
-	kctx = kbase_file_get_kctx_if_setup_complete(kfile);
 	if (unlikely(!kctx)) {
 #if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE)
-		ret = POLLERR;
+		return POLLERR;
 #else
-		ret = EPOLLERR;
+		return EPOLLERR;
 #endif
-		goto out;
 	}
 
-	poll_wait(filp, &kfile->event_queue, wait);
+	poll_wait(filp, &kctx->event_queue, wait);
 	if (kbase_event_pending(kctx)) {
 #if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE)
-		ret = POLLIN | POLLRDNORM;
+		return POLLIN | POLLRDNORM;
 #else
-		ret = EPOLLIN | EPOLLRDNORM;
+		return EPOLLIN | EPOLLRDNORM;
 #endif
 	}
 
-out:
-	kbase_file_dec_fops_count(kfile);
-	return ret;
+	return 0;
 }
 
 void kbase_event_wakeup(struct kbase_context *kctx)
 {
 	KBASE_DEBUG_ASSERT(kctx);
 	dev_dbg(kctx->kbdev->dev, "Waking event queue for context %pK\n", (void *)kctx);
-#ifdef CONFIG_MALI_BIFROST_DEBUG
-	if (WARN_ON_ONCE(!kctx->kfile))
-		return;
-#endif
-	wake_up_interruptible(&kctx->kfile->event_queue);
+	wake_up_interruptible(&kctx->event_queue);
 }
 
 KBASE_EXPORT_TEST_API(kbase_event_wakeup);
@@ -2354,20 +2108,12 @@ KBASE_EXPORT_TEST_API(kbase_event_pending);
 static int kbase_mmap(struct file *const filp, struct vm_area_struct *const vma)
 {
 	struct kbase_file *const kfile = filp->private_data;
-	struct kbase_context *kctx;
-	int ret;
+	struct kbase_context *const kctx = kbase_file_get_kctx_if_setup_complete(kfile);
 
-	if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile)))
+	if (unlikely(!kctx))
 		return -EPERM;
 
-	kctx = kbase_file_get_kctx_if_setup_complete(kfile);
-	if (likely(kctx))
-		ret = kbase_context_mmap(kctx, vma);
-	else
-		ret = -EPERM;
-
-	kbase_file_dec_fops_count(kfile);
-	return ret;
+	return kbase_context_mmap(kctx, vma);
 }
 
 static int kbase_check_flags(int flags)
@@ -2386,26 +2132,17 @@ static unsigned long kbase_get_unmapped_area(struct file *const filp, const unsi
 					     const unsigned long flags)
 {
 	struct kbase_file *const kfile = filp->private_data;
-	struct kbase_context *kctx;
-	unsigned long address;
+	struct kbase_context *const kctx = kbase_file_get_kctx_if_setup_complete(kfile);
 
-	if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile)))
+	if (unlikely(!kctx))
 		return -EPERM;
 
-	kctx = kbase_file_get_kctx_if_setup_complete(kfile);
-	if (likely(kctx))
-		address = kbase_context_get_unmapped_area(kctx, addr, len, pgoff, flags);
-	else
-		address = -EPERM;
-
-	kbase_file_dec_fops_count(kfile);
-	return address;
+	return kbase_context_get_unmapped_area(kctx, addr, len, pgoff, flags);
 }
 
 static const struct file_operations kbase_fops = {
 	.owner = THIS_MODULE,
 	.open = kbase_open,
-	.flush = kbase_flush,
 	.release = kbase_release,
 	.read = kbase_read,
 	.poll = kbase_poll,
@@ -2544,6 +2281,9 @@ static ssize_t core_mask_show(struct device *dev, struct device_attribute *attr,
 	struct kbase_device *kbdev;
 	unsigned long flags;
 	ssize_t ret = 0;
+#if !MALI_USE_CSF
+	size_t i;
+#endif
 
 	CSTD_UNUSED(attr);
 
@@ -2562,22 +2302,173 @@ static ssize_t core_mask_show(struct device *dev, struct device_attribute *attr,
 	ret += scnprintf(buf + ret, (size_t)(PAGE_SIZE - ret),
 			 "Current in use core mask : 0x%llX\n", kbdev->pm.backend.shaders_avail);
 #else
-	ret += scnprintf(buf + ret, (size_t)(PAGE_SIZE - ret), "Current core mask (JS0) : 0x%llX\n",
-			 kbdev->pm.debug_core_mask[0]);
-	ret += scnprintf(buf + ret, (size_t)(PAGE_SIZE - ret), "Current core mask (JS1) : 0x%llX\n",
-			 kbdev->pm.debug_core_mask[1]);
-	ret += scnprintf(buf + ret, (size_t)(PAGE_SIZE - ret), "Current core mask (JS2) : 0x%llX\n",
-			 kbdev->pm.debug_core_mask[2]);
+	for (i = 0; i < BASE_JM_MAX_NR_SLOTS; i++) {
+		if (PAGE_SIZE < ret)
+			goto out_unlock;
+
+		ret += scnprintf(buf + ret, (size_t)(PAGE_SIZE - ret),
+				 "Current core mask (JS%zu) : 0x%llX\n", i,
+				 kbdev->pm.debug_core_mask[i]);
+	}
 #endif /* MALI_USE_CSF */
 
 	ret += scnprintf(buf + ret, (size_t)(PAGE_SIZE - ret), "Available core mask : 0x%llX\n",
 			 kbdev->gpu_props.shader_present);
-
+#if !MALI_USE_CSF
+out_unlock:
+#endif
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	return ret;
 }
 
+#if MALI_USE_CSF
+struct kbase_core_mask {
+	u64 new_core_mask;
+};
+
+static int core_mask_parse(struct kbase_device *const kbdev, const char *const buf,
+			   struct kbase_core_mask *const mask)
+{
+	int err = kstrtou64(buf, 0, &mask->new_core_mask);
+
+	if (err)
+		dev_err(kbdev->dev, "Couldn't process core mask write operation.\n");
+
+	return err;
+}
+
+static int core_mask_set(struct kbase_device *kbdev, struct kbase_core_mask *const new_mask)
+{
+	u64 new_core_mask = new_mask->new_core_mask;
+	u64 shader_present;
+	unsigned long flags;
+	int ret = 0;
+
+	kbase_csf_scheduler_lock(kbdev);
+	kbase_pm_lock(kbdev);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	shader_present = kbdev->gpu_props.shader_present;
+
+	if ((new_core_mask & shader_present) != new_core_mask) {
+		dev_err(kbdev->dev,
+			"Invalid requested core mask 0x%llX: Includes non-existent cores (present = 0x%llX)",
+			new_core_mask, shader_present);
+		ret = -EINVAL;
+		goto exit;
+	} else if (!(new_core_mask & shader_present & kbdev->pm.backend.ca_cores_enabled)) {
+		dev_err(kbdev->dev,
+			"Invalid requested core mask 0x%llX: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX)",
+			new_core_mask, kbdev->gpu_props.shader_present,
+			kbdev->pm.backend.ca_cores_enabled);
+		ret = -EINVAL;
+		goto exit;
+	}
+
+
+	if (kbdev->pm.debug_core_mask != new_core_mask)
+		kbase_pm_set_debug_core_mask(kbdev, new_core_mask);
+
+exit:
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	kbase_pm_unlock(kbdev);
+	kbase_csf_scheduler_unlock(kbdev);
+
+	return ret;
+}
+#else
+struct kbase_core_mask {
+	u64 new_core_mask[BASE_JM_MAX_NR_SLOTS];
+};
+
+static int core_mask_parse(struct kbase_device *const kbdev, const char *const buf,
+			   struct kbase_core_mask *const mask)
+{
+	int items;
+
+	items = sscanf(buf, "%llx %llx %llx", &mask->new_core_mask[0], &mask->new_core_mask[1],
+		       &mask->new_core_mask[2]);
+
+	if (items != 1 && items != BASE_JM_MAX_NR_SLOTS) {
+		dev_err(kbdev->dev, "Couldn't process core mask write operation.\n"
+				    "Use format <core_mask>\n"
+				    "or <core_mask_js0> <core_mask_js1> <core_mask_js2>\n");
+		return -EINVAL;
+	}
+
+	/* If only one value was provided, set all other core masks equal to the value. */
+	if (items == 1) {
+		size_t i;
+
+		for (i = 1; i < BASE_JM_MAX_NR_SLOTS; i++)
+			mask->new_core_mask[i] = mask->new_core_mask[0];
+	}
+
+	return 0;
+}
+
+static int core_mask_set(struct kbase_device *kbdev, struct kbase_core_mask *const new_mask)
+{
+	u64 shader_present = kbdev->gpu_props.shader_present;
+	u64 group_core_mask = kbdev->gpu_props.coherency_info.group.core_mask;
+	u64 *new_core_mask;
+	unsigned long flags;
+	int ret = 0;
+	size_t i;
+
+	kbase_pm_lock(kbdev);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	new_core_mask = &new_mask->new_core_mask[0];
+
+	for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i) {
+		if ((new_core_mask[i] & shader_present) != new_core_mask[i]) {
+			dev_err(kbdev->dev,
+				"Invalid core mask 0x%llX for JS %zu: Includes non-existent cores (present = 0x%llX)",
+				new_core_mask[i], i, shader_present);
+			ret = -EINVAL;
+			goto exit;
+
+		} else if (!(new_core_mask[i] & shader_present &
+			     kbdev->pm.backend.ca_cores_enabled)) {
+			dev_err(kbdev->dev,
+				"Invalid core mask 0x%llX for JS %zu: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX)",
+				new_core_mask[i], i, kbdev->gpu_props.shader_present,
+				kbdev->pm.backend.ca_cores_enabled);
+			ret = -EINVAL;
+			goto exit;
+		} else if (!(new_core_mask[i] & group_core_mask)) {
+			dev_err(kbdev->dev,
+				"Invalid core mask 0x%llX for JS %zu: No intersection with group 0 core mask 0x%llX",
+				new_core_mask[i], i, group_core_mask);
+			ret = -EINVAL;
+			goto exit;
+		} else if (!(new_core_mask[i] & kbdev->gpu_props.curr_config.shader_present)) {
+			dev_err(kbdev->dev,
+				"Invalid core mask 0x%llX for JS %zu: No intersection with current core mask 0x%llX",
+				new_core_mask[i], i, kbdev->gpu_props.curr_config.shader_present);
+			ret = -EINVAL;
+			goto exit;
+		}
+	}
+
+	for (i = 0; i < BASE_JM_MAX_NR_SLOTS; i++) {
+		if (kbdev->pm.debug_core_mask[i] != new_core_mask[i]) {
+			kbase_pm_set_debug_core_mask(kbdev, new_core_mask, BASE_JM_MAX_NR_SLOTS);
+			break;
+		}
+	}
+
+exit:
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	kbase_pm_unlock(kbdev);
+
+	return ret;
+}
+
+#endif
+
 /**
  * core_mask_store - Store callback for the core_mask sysfs file.
  *
@@ -2594,18 +2485,9 @@ static ssize_t core_mask_store(struct device *dev, struct device_attribute *attr
 			       size_t count)
 {
 	struct kbase_device *kbdev;
-#if MALI_USE_CSF
-	u64 new_core_mask;
-#else
-	u64 new_core_mask[3];
-	u64 group_core_mask;
-	int i;
-#endif /* MALI_USE_CSF */
+	struct kbase_core_mask core_mask = {};
 
-	int items;
-	ssize_t err = (ssize_t)count;
-	unsigned long flags;
-	u64 shader_present;
+	int err;
 
 	CSTD_UNUSED(attr);
 
@@ -2614,102 +2496,16 @@ static ssize_t core_mask_store(struct device *dev, struct device_attribute *attr
 	if (!kbdev)
 		return -ENODEV;
 
-#if MALI_USE_CSF
-	items = sscanf(buf, "%llx", &new_core_mask);
+	err = core_mask_parse(kbdev, buf, &core_mask);
+	if (err)
+		return err;
 
-	if (items != 1) {
-		dev_err(kbdev->dev, "Couldn't process core mask write operation.\n"
-				    "Use format <core_mask>\n");
-		err = -EINVAL;
-		goto end;
-	}
-#else
-	items = sscanf(buf, "%llx %llx %llx", &new_core_mask[0], &new_core_mask[1],
-		       &new_core_mask[2]);
+	err = core_mask_set(kbdev, &core_mask);
 
-	if (items != 1 && items != 3) {
-		dev_err(kbdev->dev, "Couldn't process core mask write operation.\n"
-				    "Use format <core_mask>\n"
-				    "or <core_mask_js0> <core_mask_js1> <core_mask_js2>\n");
-		err = -EINVAL;
-		goto end;
-	}
+	if (err)
+		return err;
 
-	if (items == 1)
-		new_core_mask[1] = new_core_mask[2] = new_core_mask[0];
-#endif
-
-	mutex_lock(&kbdev->pm.lock);
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-
-	shader_present = kbdev->gpu_props.shader_present;
-
-#if MALI_USE_CSF
-	if ((new_core_mask & shader_present) != new_core_mask) {
-		dev_err(dev,
-			"Invalid core mask 0x%llX: Includes non-existent cores (present = 0x%llX)",
-			new_core_mask, shader_present);
-		err = -EINVAL;
-		goto unlock;
-
-	} else if (!(new_core_mask & shader_present & kbdev->pm.backend.ca_cores_enabled)) {
-		dev_err(dev,
-			"Invalid core mask 0x%llX: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX\n",
-			new_core_mask, kbdev->gpu_props.shader_present,
-			kbdev->pm.backend.ca_cores_enabled);
-		err = -EINVAL;
-		goto unlock;
-	}
-
-	if (kbdev->pm.debug_core_mask != new_core_mask)
-		kbase_pm_set_debug_core_mask(kbdev, new_core_mask);
-#else
-	group_core_mask = kbdev->gpu_props.coherency_info.group.core_mask;
-
-	for (i = 0; i < 3; ++i) {
-		if ((new_core_mask[i] & shader_present) != new_core_mask[i]) {
-			dev_err(dev,
-				"Invalid core mask 0x%llX for JS %d: Includes non-existent cores (present = 0x%llX)",
-				new_core_mask[i], i, shader_present);
-			err = -EINVAL;
-			goto unlock;
-
-		} else if (!(new_core_mask[i] & shader_present &
-			     kbdev->pm.backend.ca_cores_enabled)) {
-			dev_err(dev,
-				"Invalid core mask 0x%llX for JS %d: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX\n",
-				new_core_mask[i], i, kbdev->gpu_props.shader_present,
-				kbdev->pm.backend.ca_cores_enabled);
-			err = -EINVAL;
-			goto unlock;
-		} else if (!(new_core_mask[i] & group_core_mask)) {
-			dev_err(dev,
-				"Invalid core mask 0x%llX for JS %d: No intersection with group 0 core mask 0x%llX\n",
-				new_core_mask[i], i, group_core_mask);
-			err = -EINVAL;
-			goto unlock;
-		} else if (!(new_core_mask[i] & kbdev->gpu_props.curr_config.shader_present)) {
-			dev_err(dev,
-				"Invalid core mask 0x%llX for JS %d: No intersection with current core mask 0x%llX\n",
-				new_core_mask[i], i, kbdev->gpu_props.curr_config.shader_present);
-			err = -EINVAL;
-			goto unlock;
-		}
-	}
-
-	if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] ||
-	    kbdev->pm.debug_core_mask[1] != new_core_mask[1] ||
-	    kbdev->pm.debug_core_mask[2] != new_core_mask[2]) {
-		kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0], new_core_mask[1],
-					     new_core_mask[2]);
-	}
-#endif /* MALI_USE_CSF */
-
-unlock:
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-	mutex_unlock(&kbdev->pm.lock);
-end:
-	return err;
+	return count;
 }
 
 /*
@@ -3478,12 +3274,8 @@ int kbase_pm_gpu_freq_init(struct kbase_device *kbdev)
 		/* convert found frequency to KHz */
 		found_freq /= 1000;
 
-		/* If lowest frequency in OPP table is still higher
-		 * than the reference, then keep the reference frequency
-		 * as the one to use for scaling .
-		 */
-		if (found_freq < lowest_freq_khz)
-			lowest_freq_khz = found_freq;
+		/* always use the lowest freqency from opp table */
+		lowest_freq_khz = found_freq;
 	}
 #else
 	dev_err(kbdev->dev, "No operating-points-v2 node or operating-points property in DT");
@@ -4466,7 +4258,7 @@ static int kbase_common_reg_map(struct kbase_device *kbdev)
 		goto out_region;
 	}
 
-	kbdev->reg = ioremap(kbdev->reg_start, kbdev->reg_size);
+	kbdev->reg = mali_ioremap(kbdev->reg_start, kbdev->reg_size);
 	if (!kbdev->reg) {
 		dev_err(kbdev->dev, "Can't remap register window\n");
 		err = -EINVAL;
@@ -4484,7 +4276,7 @@ out_region:
 static void kbase_common_reg_unmap(struct kbase_device *const kbdev)
 {
 	if (kbdev->reg) {
-		iounmap(kbdev->reg);
+		mali_iounmap(kbdev->reg);
 		release_mem_region(kbdev->reg_start, kbdev->reg_size);
 		kbdev->reg = NULL;
 		kbdev->reg_start = 0;
@@ -4535,7 +4327,7 @@ void registers_unmap(struct kbase_device *kbdev)
 	kbase_common_reg_unmap(kbdev);
 }
 
-#if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF)
+#if defined(CONFIG_OF)
 
 static bool kbase_is_pm_enabled(const struct device_node *gpu_node)
 {
@@ -4562,17 +4354,6 @@ static bool kbase_is_pm_enabled(const struct device_node *gpu_node)
 	return is_pm_enable;
 }
 
-static bool kbase_is_pv_enabled(const struct device_node *gpu_node)
-{
-	const void *arbiter_if_node;
-
-	arbiter_if_node = of_get_property(gpu_node, "arbiter-if", NULL);
-	if (!arbiter_if_node)
-		arbiter_if_node = of_get_property(gpu_node, "arbiter_if", NULL);
-
-	return arbiter_if_node ? true : false;
-}
-
 static bool kbase_is_full_coherency_enabled(const struct device_node *gpu_node)
 {
 	const void *coherency_dts;
@@ -4586,72 +4367,61 @@ static bool kbase_is_full_coherency_enabled(const struct device_node *gpu_node)
 	}
 	return false;
 }
+#endif /* defined(CONFIG_OF) */
 
-#endif /* CONFIG_MALI_ARBITER_SUPPORT && CONFIG_OF */
-
-int kbase_device_pm_init(struct kbase_device *kbdev)
+int kbase_device_backend_init(struct kbase_device *kbdev)
 {
 	int err = 0;
 
-#if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF)
-	u32 product_model;
+#if defined(CONFIG_OF)
+	/*
+	 * Attempt to initialize arbitration.
+	 * If the platform is not suitable for arbitration, return -EPERM.
+	 * The device initialization should not fail but kbase will
+	 * not support arbitration.
+	 */
+	if (kbase_is_pm_enabled(kbdev->dev->of_node)) {
+		/* Arbitration AND power management invalid */
+		dev_dbg(kbdev->dev, "Arbitration not supported with power management");
+		return -EPERM;
+	}
 
-	if (kbase_is_pv_enabled(kbdev->dev->of_node)) {
-		dev_info(kbdev->dev, "Arbitration interface enabled\n");
-		if (kbase_is_pm_enabled(kbdev->dev->of_node)) {
-			/* Arbitration AND power management invalid */
-			dev_err(kbdev->dev,
-				"Invalid combination of arbitration AND power management\n");
-			return -EPERM;
-		}
-		if (kbase_is_full_coherency_enabled(kbdev->dev->of_node)) {
-			/* Arbitration AND full coherency invalid */
-			dev_err(kbdev->dev,
-				"Invalid combination of arbitration AND full coherency\n");
-			return -EPERM;
-		}
-		err = kbase_arbiter_pm_early_init(kbdev);
-		if (err == 0) {
-			/* Check if Arbitration is running on
-			 * supported GPU platform
-			 */
-			kbase_pm_register_access_enable(kbdev);
+	if (kbase_is_full_coherency_enabled(kbdev->dev->of_node)) {
+		/* Arbitration AND full coherency invalid */
+		dev_dbg(kbdev->dev, "Arbitration not supported with full coherency");
+		return -EPERM;
+	}
+
+	err = kbase_arbiter_pm_early_init(kbdev);
+	if (err == 0) {
+#if !MALI_USE_CSF
+		u32 product_model;
+
+		/*
+		 * Attempt to obtain and parse gpu_id in the event an external AW module
+		 * is used for messaging. We should have access to GPU at this point.
+		 */
+		if (kbdev->gpu_props.gpu_id.arch_major == 0)
 			kbase_gpuprops_parse_gpu_id(&kbdev->gpu_props.gpu_id,
 						    kbase_reg_get_gpu_id(kbdev));
-			kbase_pm_register_access_disable(kbdev);
-			product_model = kbdev->gpu_props.gpu_id.product_model;
 
-			if (product_model != GPU_ID_PRODUCT_TGOX &&
-			    product_model != GPU_ID_PRODUCT_TNOX &&
-			    product_model != GPU_ID_PRODUCT_TBAX) {
-				kbase_arbiter_pm_early_term(kbdev);
-				dev_err(kbdev->dev, "GPU platform not suitable for arbitration\n");
-				return -EPERM;
-			}
+		product_model = kbdev->gpu_props.gpu_id.product_model;
+		if (product_model != GPU_ID_PRODUCT_TGOX && product_model != GPU_ID_PRODUCT_TNOX &&
+		    product_model != GPU_ID_PRODUCT_TBAX) {
+			kbase_arbiter_pm_early_term(kbdev);
+			dev_dbg(kbdev->dev, "GPU platform not suitable for arbitration");
+			return -EPERM;
 		}
-	} else {
-		kbdev->arb.arb_if = NULL;
-		kbdev->arb.arb_dev = NULL;
-		err = power_control_init(kbdev);
+#endif /* !MALI_USE_CSF */
+		dev_info(kbdev->dev, "Arbitration interface enabled");
 	}
-#else
-	err = power_control_init(kbdev);
-#endif /* CONFIG_MALI_ARBITER_SUPPORT && CONFIG_OF */
+#endif /* defined(CONFIG_OF) */
 	return err;
 }
 
-void kbase_device_pm_term(struct kbase_device *kbdev)
+void kbase_device_backend_term(struct kbase_device *kbdev)
 {
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
-#if IS_ENABLED(CONFIG_OF)
-	if (kbase_is_pv_enabled(kbdev->dev->of_node))
-		kbase_arbiter_pm_early_term(kbdev);
-	else
-		power_control_term(kbdev);
-#endif /* CONFIG_OF */
-#else
-	power_control_term(kbdev);
-#endif
+	kbase_arbiter_pm_early_term(kbdev);
 }
 
 int power_control_init(struct kbase_device *kbdev)
@@ -5064,11 +4834,12 @@ static struct dentry *init_debugfs(struct kbase_device *kbdev)
 		return dentry;
 	}
 
+
 	dentry = debugfs_ctx_defaults_init(kbdev);
 	if (IS_ERR_OR_NULL(dentry))
 		return dentry;
 
-	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
+	if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
 		dentry = debugfs_create_file("protected_debug_mode", 0444,
 					     kbdev->mali_debugfs_directory, kbdev,
 					     &fops_protected_debug_mode);
@@ -5955,11 +5726,11 @@ static int kbase_platform_device_probe(struct platform_device *pdev)
 #if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE)
 		mutex_unlock(&kbase_probe_mutex);
 #endif
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
-		mutex_lock(&kbdev->pm.lock);
-		kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_INITIALIZED_EVT);
-		mutex_unlock(&kbdev->pm.lock);
-#endif
+		if (kbase_has_arbiter(kbdev)) {
+			mutex_lock(&kbdev->pm.lock);
+			kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_INITIALIZED_EVT);
+			mutex_unlock(&kbdev->pm.lock);
+		}
 	}
 
 	return err;
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_cs_experimental.h b/drivers/gpu/arm/bifrost/mali_kbase_cs_experimental.h
index 0c794e2e90bc..e6222979b72c 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_cs_experimental.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_cs_experimental.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -30,9 +30,6 @@
  */
 static inline void mali_kbase_print_cs_experimental(void)
 {
-#if MALI_INCREMENTAL_RENDERING_JM
-	pr_info("mali_kbase: INCREMENTAL_RENDERING_JM (experimental) enabled");
-#endif /* MALI_INCREMENTAL_RENDERING_JM */
 }
 
 #endif /* _KBASE_CS_EXPERIMENTAL_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_allocs.c b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_allocs.c
index c92fb9e0957e..4b7f6a186ac0 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_allocs.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_allocs.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2022-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -58,7 +58,7 @@ static void debug_zone_mem_allocs_show(struct kbase_reg_zone *zone, struct seq_f
 	for (p = rb_first(rbtree); p; p = rb_next(p)) {
 		reg = rb_entry(p, struct kbase_va_region, rblink);
 		if (!(reg->flags & KBASE_REG_FREE)) {
-			seq_printf(sfile, "%16llx, %16zx, %16zx, %8lx, %s\n",
+			seq_printf(sfile, "%16llx, %16zx, %16zx, %8llx, %s\n",
 				   reg->start_pfn << PAGE_SHIFT, reg->nr_pages << PAGE_SHIFT,
 				   kbase_reg_current_backed_size(reg) << PAGE_SHIFT, reg->flags,
 				   type_names[reg->gpu_alloc->type]);
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c
index dd8f8ff6fe79..48469cdcc34e 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2013-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -237,7 +237,11 @@ static int debug_mem_open(struct inode *i, struct file *file)
 	int ret;
 	enum kbase_memory_zone idx;
 
-	if (!kbase_file_inc_fops_count_unless_closed(kctx->kfile))
+#if (KERNEL_VERSION(6, 7, 0) > LINUX_VERSION_CODE)
+	if (get_file_rcu(kctx->filp) == 0)
+#else
+	if (get_file_rcu(&kctx->filp) == 0)
+#endif
 		return -ENOENT;
 
 	/* Check if file was opened in write mode. GPU memory contents
@@ -297,7 +301,7 @@ out:
 	}
 	seq_release(i, file);
 open_fail:
-	kbase_file_dec_fops_count(kctx->kfile);
+	fput(kctx->filp);
 
 	return ret;
 }
@@ -327,7 +331,7 @@ static int debug_mem_release(struct inode *inode, struct file *file)
 		kfree(mem_data);
 	}
 
-	kbase_file_dec_fops_count(kctx->kfile);
+	fput(kctx->filp);
 
 	return 0;
 }
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_defs.h b/drivers/gpu/arm/bifrost/mali_kbase_defs.h
index 13a5c30dcb61..b97df15f7a17 100755
--- a/drivers/gpu/arm/bifrost/mali_kbase_defs.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_defs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -28,8 +28,8 @@
 #define _KBASE_DEFS_H_
 
 #include <mali_kbase_config.h>
-#include <mali_base_hwconfig_features.h>
-#include <mali_base_hwconfig_issues.h>
+#include <mali_kbase_hwconfig_features.h>
+#include <mali_kbase_hwconfig_issues.h>
 #include <mali_kbase_mem_lowlevel.h>
 #include <mmu/mali_kbase_mmu_hw.h>
 #include <backend/gpu/mali_kbase_instr_defs.h>
@@ -52,10 +52,6 @@
 #include <linux/version_compat_defs.h>
 
 
-#if IS_ENABLED(CONFIG_DEBUG_FS)
-#include <linux/debugfs.h>
-#endif /* CONFIG_DEBUG_FS */
-
 #ifdef CONFIG_MALI_BIFROST_DEVFREQ
 #include <linux/devfreq.h>
 #endif /* CONFIG_MALI_BIFROST_DEVFREQ */
@@ -64,9 +60,7 @@
 #include <linux/devfreq_cooling.h>
 #endif
 
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
 #include <arbiter/mali_kbase_arbiter_defs.h>
-#endif /* CONFIG_MALI_ARBITER_SUPPORT */
 
 #include <linux/memory_group_manager.h>
 #include <soc/rockchip/rockchip_opp_select.h>
@@ -78,6 +72,7 @@
 #include <linux/file.h>
 #include <linux/sizes.h>
 #include <linux/clk.h>
+#include <linux/debugfs.h>
 #include <linux/regulator/consumer.h>
 
 /** Number of milliseconds before we time out on a GPU soft/hard reset */
@@ -178,16 +173,11 @@ struct kbase_gpu_metrics {
  *
  * @link:                    Links the object in kbase_device::gpu_metrics::active_list
  *                           or kbase_device::gpu_metrics::inactive_list.
- * @first_active_start_time: Records the time at which the application first became
+ * @active_start_time:       Records the time at which the application first became
  *                           active in the current work period.
- * @last_active_start_time:  Records the time at which the application last became
- *                           active in the current work period.
- * @last_active_end_time:    Records the time at which the application last became
- *                           inactive in the current work period.
- * @total_active:            Tracks the time for which application has been active
- *                           in the current work period.
- * @prev_wp_active_end_time: Records the time at which the application last became
- *                           inactive in the previous work period.
+ * @active_end_time:         Records the time at which the application last became
+ *                           inactive in the current work period, or the time of the end of
+ *                           previous work period if the application remained active.
  * @aid:                     Unique identifier for an application.
  * @kctx_count:              Counter to keep a track of the number of Kbase contexts
  *                           created for an application. There may be multiple Kbase
@@ -195,19 +185,14 @@ struct kbase_gpu_metrics {
  *                           metrics context.
  * @active_cnt:              Counter that is updated every time the GPU activity starts
  *                           and ends in the current work period for an application.
- * @flags:                   Flags to track the state of GPU metrics context.
  */
 struct kbase_gpu_metrics_ctx {
 	struct list_head link;
-	u64 first_active_start_time;
-	u64 last_active_start_time;
-	u64 last_active_end_time;
-	u64 total_active;
-	u64 prev_wp_active_end_time;
+	u64 active_start_time;
+	u64 active_end_time;
 	unsigned int aid;
 	unsigned int kctx_count;
 	u8 active_cnt;
-	u8 flags;
 };
 #endif
 
@@ -307,24 +292,33 @@ struct kbase_fault {
 #define MAX_PAGES_FOR_FREE_PGDS ((size_t)9)
 
 /* Maximum number of pointers to free PGDs */
-#define MAX_FREE_PGDS ((PAGE_SIZE / sizeof(struct page *)) * MAX_PAGES_FOR_FREE_PGDS)
+#define MAX_FREE_PGDS ((PAGE_SIZE / sizeof(phys_addr_t)) * MAX_PAGES_FOR_FREE_PGDS)
 
 /**
  * struct kbase_mmu_table  - object representing a set of GPU page tables
- * @mmu_lock:             Lock to serialize the accesses made to multi level GPU
- *                        page tables
- * @pgd:                  Physical address of the page allocated for the top
- *                        level page table of the context, this is used for
- *                        MMU HW programming as the address translation will
- *                        start from the top level page table.
- * @group_id:             A memory group ID to be passed to a platform-specific
- *                        memory group manager.
- *                        Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
- * @kctx:                 If this set of MMU tables belongs to a context then
- *                        this is a back-reference to the context, otherwise
- *                        it is NULL.
- * @scratch_mem:          Scratch memory used for MMU operations, which are
- *                        serialized by the @mmu_lock.
+ * @mmu_lock:                Lock to serialize the accesses made to multi level GPU
+ *                           page tables
+ * @pgd:                     Physical address of the page allocated for the top
+ *                           level page table of the context, this is used for
+ *                           MMU HW programming as the address translation will
+ *                           start from the top level page table.
+ * @group_id:                A memory group ID to be passed to a platform-specific
+ *                           memory group manager.
+ *                           Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ * @kctx:                    If this set of MMU tables belongs to a context then
+ *                           this is a back-reference to the context, otherwise
+ *                           it is NULL.
+ * @scratch_mem:             Scratch memory used for MMU operations, which are
+ *                           serialized by the @mmu_lock.
+ * @pgd_pages_list:          List head to link all 16K/64K pages allocated for the PGDs of mmut.
+ *                           These pages will be used to allocate 4KB PGD pages for
+ *                           the GPU page table.
+ *                           Linked with &kbase_page_metadata.data.pt_mapped.pgd_link.
+ * @last_allocated_pgd_page: Pointer to PGD page from where the last sub page
+ *                           was allocated for mmut.
+ * @last_freed_pgd_page:     Pointer to PGD page to which the last freed 4K sub page
+ *                           was returned for mmut.
+ * @num_free_pgd_sub_pages:  The total number of free 4K PGD pages in the mmut.
  */
 struct kbase_mmu_table {
 	struct mutex mmu_lock;
@@ -342,7 +336,7 @@ struct kbase_mmu_table {
 			 * @levels: Array of PGD pages, large enough to copy one PGD
 			 *          for each level of the MMU table.
 			 */
-			u64 levels[MIDGARD_MMU_BOTTOMLEVEL][PAGE_SIZE / sizeof(u64)];
+			u64 levels[MIDGARD_MMU_BOTTOMLEVEL][GPU_PAGE_SIZE / sizeof(u64)];
 		} teardown_pages;
 		/**
 		 * @free_pgds: Scratch memory used for insertion, update and teardown
@@ -351,11 +345,18 @@ struct kbase_mmu_table {
 		 */
 		struct {
 			/** @pgds: Array of pointers to PGDs to free. */
-			struct page *pgds[MAX_FREE_PGDS];
+			phys_addr_t pgds[MAX_FREE_PGDS];
 			/** @head_index: Index of first free element in the PGDs array. */
 			size_t head_index;
 		} free_pgds;
 	} scratch_mem;
+
+#if GPU_PAGES_PER_CPU_PAGE > 1
+	struct list_head pgd_pages_list;
+	struct page *last_allocated_pgd_page;
+	struct page *last_freed_pgd_page;
+	u32 num_free_pgd_sub_pages;
+#endif
 };
 
 #if MALI_USE_CSF
@@ -381,14 +382,9 @@ static inline int kbase_as_has_page_fault(struct kbase_as *as, struct kbase_faul
  *
  * @used_pages:   Tracks usage of OS shared memory. Updated when OS memory is
  *                allocated/freed.
- * @ir_threshold: Fraction of the maximum size of an allocation that grows
- *                on GPU page fault that can be used before the driver
- *                switches to incremental rendering, in 1/256ths.
- *                0 means disabled.
  */
 struct kbasep_mem_device {
 	atomic_t used_pages;
-	atomic_t ir_threshold;
 };
 
 struct kbase_clk_rate_listener;
@@ -493,9 +489,7 @@ struct kbase_pm_device_data {
 #if MALI_USE_CSF
 	bool runtime_active;
 #endif
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
 	atomic_t gpu_lost;
-#endif /* CONFIG_MALI_ARBITER_SUPPORT */
 	wait_queue_head_t zero_active_count_wait;
 	wait_queue_head_t resume_wait;
 
@@ -511,10 +505,8 @@ struct kbase_pm_device_data {
 	void (*callback_power_runtime_term)(struct kbase_device *kbdev);
 	u32 dvfs_period;
 	struct kbase_pm_backend_data backend;
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
 	struct kbase_arbiter_vm_state *arb_vm_state;
 	atomic_t gpu_users_waiting;
-#endif /* CONFIG_MALI_ARBITER_SUPPORT */
 	struct kbase_clk_rate_trace_manager clk_rtm;
 };
 
@@ -555,7 +547,7 @@ struct kbase_mem_pool {
 	u8 group_id;
 	spinlock_t pool_lock;
 	struct list_head page_list;
-	struct shrinker reclaim;
+	DEFINE_KBASE_SHRINKER reclaim;
 	atomic_t isolation_in_progress_cnt;
 
 	struct kbase_mem_pool *next_pool;
@@ -847,8 +839,6 @@ struct kbase_mem_migrate {
  * @as_free:               Bitpattern of free/available GPU address spaces.
  * @mmu_mask_change:       Lock to serialize the access to MMU interrupt mask
  *                         register used in the handling of Bus & Page faults.
- * @pagesize_2mb:          Boolean to determine whether 2MiB page sizes are
- *                         supported and used where possible.
  * @gpu_props:             Object containing complete information about the
  *                         configuration/properties of GPU HW device in use.
  * @hw_issues_mask:        List of SW workarounds for HW issues
@@ -959,7 +949,7 @@ struct kbase_mem_migrate {
  * @ipa.last_sample_time:  Records the time when counters, used for dynamic
  *                         energy estimation, were last sampled.
  * @previous_frequency:    Previous frequency of GPU clock used for
- *                         BASE_HW_ISSUE_GPU2017_1336 workaround, This clock is
+ *                         KBASE_HW_ISSUE_GPU2017_1336 workaround, This clock is
  *                         restored when L2 is powered on.
  * @job_fault_debug:       Flag to control the dumping of debug data for job faults,
  *                         set when the 'job_fault' debugfs file is opened.
@@ -1081,7 +1071,8 @@ struct kbase_mem_migrate {
  *                          KCPU queue. These structures may outlive kbase module
  *                          itself. Therefore, in such a case, a warning should be
  *                          be produced.
- * @va_region_slab:         kmem_cache (slab) for allocated kbase_va_region structures.
+ * @va_region_slab:         kmem_cache (slab) for allocated @kbase_va_region structures.
+ * @page_metadata_slab:     kmem_cache (slab) for allocated @kbase_page_metadata structures.
  * @fence_signal_timeout_enabled: Global flag for whether fence signal timeout tracking
  *                                is enabled.
  * @pcm_prioritized_process_nb: Notifier block for the Priority Control Manager
@@ -1144,12 +1135,10 @@ struct kbase_device {
 
 	spinlock_t mmu_mask_change;
 
-	bool pagesize_2mb;
-
 	struct kbase_gpu_props gpu_props;
 
-	unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
-	unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
+	unsigned long hw_issues_mask[(KBASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
+	unsigned long hw_features_mask[(KBASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
 
 	struct {
 		atomic_t count;
@@ -1165,6 +1154,12 @@ struct kbase_device {
 	 */
 	u8 pbha_propagate_bits;
 
+	/**
+	 * @mma_wa_id: The PBHA ID to use for the PBHA OVERRIDE based workaround for MMA violation.
+	 *
+	 */
+	u32 mma_wa_id;
+
 #if MALI_USE_CSF
 	struct kbase_hwcnt_backend_csf_if hwcnt_backend_csf_if_fw;
 #else
@@ -1256,7 +1251,6 @@ struct kbase_device {
 	atomic_t job_fault_debug;
 #endif /* !MALI_USE_CSF */
 
-#if IS_ENABLED(CONFIG_DEBUG_FS)
 	struct dentry *mali_debugfs_directory;
 	struct dentry *debugfs_ctx_directory;
 	struct dentry *debugfs_instr_directory;
@@ -1278,7 +1272,6 @@ struct kbase_device {
 		u32 reg_offset;
 	} regs_dump_debugfs_data;
 #endif /* !MALI_CUSTOMER_RELEASE */
-#endif /* CONFIG_DEBUG_FS */
 
 	atomic_t ctx_num;
 
@@ -1369,9 +1362,7 @@ struct kbase_device {
 	} dummy_job_wa;
 	bool dummy_job_wa_loaded;
 
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
 	struct kbase_arbiter_device arb;
-#endif
 	/* Priority Control Manager device */
 	struct priority_control_manager_device *pcm_dev;
 
@@ -1396,6 +1387,9 @@ struct kbase_device {
 	atomic_t live_fence_metadata;
 #endif
 	struct kmem_cache *va_region_slab;
+#if GPU_PAGES_PER_CPU_PAGE > 1
+	struct kmem_cache *page_metadata_slab;
+#endif
 
 #if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
 	/**
@@ -1424,9 +1418,6 @@ struct kbase_device {
  * @KBASE_FILE_COMPLETE:        Indicates if the setup for context has
  *                              completed, i.e. flags have been set for the
  *                              context.
- * @KBASE_FILE_DESTROY_CTX:     Indicates that destroying of context has begun or
- *                              is complete. This state can only be reached after
- *                              @KBASE_FILE_COMPLETE.
  *
  * The driver allows only limited interaction with user-space until setup
  * is complete.
@@ -1436,8 +1427,7 @@ enum kbase_file_state {
 	KBASE_FILE_VSN_IN_PROGRESS,
 	KBASE_FILE_NEED_CTX,
 	KBASE_FILE_CTX_IN_PROGRESS,
-	KBASE_FILE_COMPLETE,
-	KBASE_FILE_DESTROY_CTX
+	KBASE_FILE_COMPLETE
 };
 
 /**
@@ -1447,12 +1437,6 @@ enum kbase_file_state {
  *                       allocated from the probe method of the Mali driver.
  * @filp:                Pointer to the struct file corresponding to device file
  *                       /dev/malixx instance, passed to the file's open method.
- * @owner:               Pointer to the file table structure of a process that
- *                       created the instance of /dev/malixx device file. Set to
- *                       NULL when that process closes the file instance. No more
- *                       file operations would be allowed once set to NULL.
- *                       It would be updated only in the Userspace context, i.e.
- *                       when @kbase_open or @kbase_flush is called.
  * @kctx:                Object representing an entity, among which GPU is
  *                       scheduled and which gets its own GPU address space.
  *                       Invalid until @setup_state is KBASE_FILE_COMPLETE.
@@ -1461,44 +1445,13 @@ enum kbase_file_state {
  *                       @setup_state is KBASE_FILE_NEED_CTX.
  * @setup_state:         Initialization state of the file. Values come from
  *                       the kbase_file_state enumeration.
- * @destroy_kctx_work:   Work item for destroying the @kctx, enqueued only when
- *                       @fops_count and @map_count becomes zero after /dev/malixx
- *                       file was previously closed by the @owner.
- * @lock:                Lock to serialize the access to members like @owner, @fops_count,
- *                       @map_count.
- * @fops_count:          Counter that is incremented at the beginning of a method
- *                       defined for @kbase_fops and is decremented at the end.
- *                       So the counter keeps a track of the file operations in progress
- *                       for /dev/malixx file, that are being handled by the Kbase.
- *                       The counter is needed to defer the context termination as
- *                       Userspace can close the /dev/malixx file and flush() method
- *                       can get called when some other file operation is in progress.
- * @map_count:           Counter to keep a track of the memory mappings present on
- *                       /dev/malixx file instance. The counter is needed to defer the
- *                       context termination as Userspace can close the /dev/malixx
- *                       file and flush() method can get called when mappings are still
- *                       present.
- * @zero_fops_count_wait: Waitqueue used to wait for the @fops_count to become 0.
- *                        Currently needed only for the "mem_view" debugfs file.
- * @event_queue:          Wait queue used for blocking the thread, which consumes
- *                        the base_jd_event corresponding to an atom, when there
- *                        are no more posted events.
  */
 struct kbase_file {
 	struct kbase_device *kbdev;
 	struct file *filp;
-	fl_owner_t owner;
 	struct kbase_context *kctx;
 	unsigned long api_version;
 	atomic_t setup_state;
-	struct work_struct destroy_kctx_work;
-	spinlock_t lock;
-	int fops_count;
-	int map_count;
-#if IS_ENABLED(CONFIG_DEBUG_FS)
-	wait_queue_head_t zero_fops_count_wait;
-#endif
-	wait_queue_head_t event_queue;
 };
 #if MALI_JIT_PRESSURE_LIMIT_BASE
 /**
@@ -1680,8 +1633,8 @@ struct kbase_sub_alloc {
 /**
  * struct kbase_context - Kernel base context
  *
- * @kfile:                Pointer to the object representing the /dev/malixx device
- *                        file instance.
+ * @filp:                 Pointer to the struct file corresponding to device file
+ *                        /dev/malixx instance, passed to the file's open method.
  * @kbdev:                Pointer to the Kbase device for which the context is created.
  * @kctx_list_link:       Node into Kbase device list of contexts.
  * @mmu:                  Structure holding details of the MMU tables for this
@@ -1734,6 +1687,9 @@ struct kbase_sub_alloc {
  *                        used in conjunction with @cookies bitmask mainly for
  *                        providing a mechansim to have the same value for CPU &
  *                        GPU virtual address.
+ * @event_queue:          Wait queue used for blocking the thread, which consumes
+ *                        the base_jd_event corresponding to an atom, when there
+ *                        are no more posted events.
  * @tgid:                 Thread group ID of the process whose thread created
  *                        the context (by calling KBASE_IOCTL_VERSION_CHECK or
  *                        KBASE_IOCTL_SET_FLAGS, depending on the @api_version).
@@ -1945,7 +1901,7 @@ struct kbase_sub_alloc {
  * is made on the device file.
  */
 struct kbase_context {
-	struct kbase_file *kfile;
+	struct file *filp;
 	struct kbase_device *kbdev;
 	struct list_head kctx_list_link;
 	struct kbase_mmu_table mmu;
@@ -1997,6 +1953,7 @@ struct kbase_context {
 	DECLARE_BITMAP(cookies, BITS_PER_LONG);
 	struct kbase_va_region *pending_regions[BITS_PER_LONG];
 
+	wait_queue_head_t event_queue;
 	pid_t tgid;
 	pid_t pid;
 	atomic_t prioritized;
@@ -2006,7 +1963,8 @@ struct kbase_context {
 
 	struct kbase_mem_pool_group mem_pools;
 
-	struct shrinker reclaim;
+	DEFINE_KBASE_SHRINKER reclaim;
+
 	struct list_head evict_list;
 	atomic_t evict_nents;
 
@@ -2181,6 +2139,18 @@ static inline u64 kbase_get_lock_region_min_size_log2(struct kbase_gpu_props con
 	return 15; /* 32 kB */
 }
 
+/**
+ * kbase_has_arbiter - Check whether GPU has an arbiter.
+ *
+ * @kbdev: KBase device.
+ *
+ * Return: True if there is an arbiter, False otherwise.
+ */
+static inline bool kbase_has_arbiter(struct kbase_device *kbdev)
+{
+	return (bool)kbdev->arb.arb_if;
+}
+
 /* Conversion helpers for setting up high resolution timers */
 #define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x)) * 1000000U))
 #define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x))
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.c b/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.c
index 9c39f0e20f76..7b578c81af60 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -218,7 +218,7 @@ static bool wa_blob_load_needed(struct kbase_device *kbdev)
 	if (of_machine_is_compatible("arm,juno"))
 		return false;
 
-	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_3485))
+	if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_TTRX_3485))
 		return true;
 
 	return false;
@@ -311,7 +311,7 @@ int kbase_dummy_job_wa_load(struct kbase_device *kbdev)
 	while (blob_offset) {
 		const struct wa_blob *blob;
 		size_t nr_pages;
-		u64 flags;
+		base_mem_alloc_flags flags;
 		u64 gpu_va;
 		struct kbase_va_region *va_region;
 
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence.h b/drivers/gpu/arm/bifrost/mali_kbase_fence.h
index 06690d4f17bb..d45a0fec4104 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_fence.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_fence.h
@@ -35,8 +35,37 @@
 #include <linux/version_compat_defs.h>
 
 #if MALI_USE_CSF
+/* Number of digits needed to express the max value of given unsigned type.
+ *
+ * Details: The number of digits needed to express the max value of given type is log10(t_max) + 1
+ * sizeof(t) == log2(t_max)/8
+ * log10(t_max) == log2(t_max) / log2(10)
+ * log2(t_max) == sizeof(type) * 8
+ * 1/log2(10) is approx (1233 >> 12)
+ * Hence, number of digits for given type == log10(t_max) + 1 == sizeof(type) * 8 * (1233 >> 12) + 1
+ */
+#define MAX_DIGITS_FOR_UNSIGNED_TYPE(t) ((((sizeof(t) * BITS_PER_BYTE) * 1233) >> 12) + 1)
+
+/* Number of digits needed to express the max value of given signed type,
+ * including the sign character,
+ */
+#define MAX_DIGITS_FOR_SIGNED_TYPE(t) (MAX_DIGITS_FOR_UNSIGNED_TYPE(t) + 1)
+
+/* Max number of characters for id member of kbase_device struct. */
+#define MAX_KBDEV_ID_LEN MAX_DIGITS_FOR_UNSIGNED_TYPE(u32)
+/* Max number of characters for tgid member of kbase_context struct. */
+#define MAX_KCTX_TGID_LEN MAX_DIGITS_FOR_SIGNED_TYPE(pid_t)
+/* Max number of characters for id member of kbase_context struct. */
+#define MAX_KCTX_ID_LEN MAX_DIGITS_FOR_UNSIGNED_TYPE(u32)
+/* Max number of characters for fence_context member of kbase_kcpu_command_queue struct. */
+#define MAX_KCTX_QUEUE_FENCE_CTX_LEN MAX_DIGITS_FOR_UNSIGNED_TYPE(u64)
+/* Max number of characters for timeline name fixed format, including null character. */
+#define FIXED_FORMAT_LEN (9)
+
 /* Maximum number of characters in DMA fence timeline name. */
-#define MAX_TIMELINE_NAME (32)
+#define MAX_TIMELINE_NAME                                                                        \
+	(MAX_KBDEV_ID_LEN + MAX_KCTX_TGID_LEN + MAX_KCTX_ID_LEN + MAX_KCTX_QUEUE_FENCE_CTX_LEN + \
+	 FIXED_FORMAT_LEN)
 
 /**
  * struct kbase_kcpu_dma_fence_meta - Metadata structure for dma fence objects containing
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.c b/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.c
index 3a5b97db7c04..60ad1c272f84 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -29,96 +29,46 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 
-/**
- * enum gpu_metrics_ctx_flags - Flags for the GPU metrics context
- *
- * @ACTIVE_INTERVAL_IN_WP: Flag set when the application first becomes active in
- *                         the current work period.
- *
- * @INSIDE_ACTIVE_LIST:    Flag to track if object is in kbase_device::gpu_metrics::active_list
- *
- * All members need to be separate bits. This enum is intended for use in a
- * bitmask where multiple values get OR-ed together.
- */
-enum gpu_metrics_ctx_flags {
-	ACTIVE_INTERVAL_IN_WP = 1 << 0,
-	INSIDE_ACTIVE_LIST = 1 << 1,
-};
-
 static unsigned long gpu_metrics_tp_emit_interval_ns = DEFAULT_GPU_METRICS_TP_EMIT_INTERVAL_NS;
 
 module_param(gpu_metrics_tp_emit_interval_ns, ulong, 0444);
 MODULE_PARM_DESC(gpu_metrics_tp_emit_interval_ns,
 		 "Time interval in nano seconds at which GPU metrics tracepoints are emitted");
 
-static inline bool gpu_metrics_ctx_flag(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx,
-					enum gpu_metrics_ctx_flags flag)
-{
-	return (gpu_metrics_ctx->flags & flag);
-}
-
-static inline void gpu_metrics_ctx_flag_set(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx,
-					    enum gpu_metrics_ctx_flags flag)
-{
-	gpu_metrics_ctx->flags |= flag;
-}
-
-static inline void gpu_metrics_ctx_flag_clear(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx,
-					      enum gpu_metrics_ctx_flags flag)
-{
-	gpu_metrics_ctx->flags &= ~flag;
-}
-
 static inline void validate_tracepoint_data(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx,
 					    u64 start_time, u64 end_time, u64 total_active)
 {
 #if 0
-	WARN(total_active > NSEC_PER_SEC, "total_active %llu > 1 second for aid %u active_cnt %u",
-	     total_active, gpu_metrics_ctx->aid, gpu_metrics_ctx->active_cnt);
-
 	WARN(start_time >= end_time, "start_time %llu >= end_time %llu for aid %u active_cnt %u",
 	     start_time, end_time, gpu_metrics_ctx->aid, gpu_metrics_ctx->active_cnt);
 
 	WARN(total_active > (end_time - start_time),
 	     "total_active %llu > end_time %llu - start_time %llu for aid %u active_cnt %u",
 	     total_active, end_time, start_time, gpu_metrics_ctx->aid, gpu_metrics_ctx->active_cnt);
-
-	WARN(gpu_metrics_ctx->prev_wp_active_end_time > start_time,
-	     "prev_wp_active_end_time %llu > start_time %llu for aid %u active_cnt %u",
-	     gpu_metrics_ctx->prev_wp_active_end_time, start_time, gpu_metrics_ctx->aid,
-	     gpu_metrics_ctx->active_cnt);
 #endif
 }
 
 static void emit_tracepoint_for_active_gpu_metrics_ctx(
 	struct kbase_device *kbdev, struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, u64 current_time)
 {
-	const u64 start_time = gpu_metrics_ctx->first_active_start_time;
-	u64 total_active = gpu_metrics_ctx->total_active;
-	u64 end_time;
+	const u64 start_time = gpu_metrics_ctx->active_start_time;
+	u64 total_active, end_time = current_time;
 
 	/* Check if the GPU activity is currently ongoing */
 	if (gpu_metrics_ctx->active_cnt) {
 		/* The following check is to handle the race on CSF GPUs that can happen between
 		 * the draining of trace buffer and FW emitting the ACT=1 event .
 		 */
-		if (unlikely(current_time == gpu_metrics_ctx->last_active_start_time))
-			current_time++;
-		end_time = current_time;
-		total_active += end_time - gpu_metrics_ctx->last_active_start_time;
-
-		gpu_metrics_ctx->first_active_start_time = current_time;
-		gpu_metrics_ctx->last_active_start_time = current_time;
-	} else {
-		end_time = gpu_metrics_ctx->last_active_end_time;
-		gpu_metrics_ctx_flag_clear(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP);
+		if (unlikely(end_time == start_time))
+			end_time++;
+		gpu_metrics_ctx->active_start_time = end_time;
 	}
 
+	total_active = end_time - start_time;
 	trace_gpu_work_period(kbdev->id, gpu_metrics_ctx->aid, start_time, end_time, total_active);
 
 	validate_tracepoint_data(gpu_metrics_ctx, start_time, end_time, total_active);
-	gpu_metrics_ctx->prev_wp_active_end_time = end_time;
-	gpu_metrics_ctx->total_active = 0;
+	gpu_metrics_ctx->active_end_time = end_time;
 }
 
 void kbase_gpu_metrics_ctx_put(struct kbase_device *kbdev,
@@ -131,7 +81,8 @@ void kbase_gpu_metrics_ctx_put(struct kbase_device *kbdev,
 	if (gpu_metrics_ctx->kctx_count)
 		return;
 
-	if (gpu_metrics_ctx_flag(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP))
+	/* Generate a tracepoint if there's still activity */
+	if (gpu_metrics_ctx->active_cnt)
 		emit_tracepoint_for_active_gpu_metrics_ctx(kbdev, gpu_metrics_ctx,
 							   ktime_get_raw_ns());
 
@@ -166,12 +117,11 @@ struct kbase_gpu_metrics_ctx *kbase_gpu_metrics_ctx_get(struct kbase_device *kbd
 void kbase_gpu_metrics_ctx_init(struct kbase_device *kbdev,
 				struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, unsigned int aid)
 {
+	gpu_metrics_ctx->active_start_time = 0;
+	gpu_metrics_ctx->active_end_time = 0;
 	gpu_metrics_ctx->aid = aid;
-	gpu_metrics_ctx->total_active = 0;
 	gpu_metrics_ctx->kctx_count = 1;
 	gpu_metrics_ctx->active_cnt = 0;
-	gpu_metrics_ctx->prev_wp_active_end_time = 0;
-	gpu_metrics_ctx->flags = 0;
 	list_add_tail(&gpu_metrics_ctx->link, &kbdev->gpu_metrics.inactive_list);
 }
 
@@ -180,17 +130,9 @@ void kbase_gpu_metrics_ctx_start_activity(struct kbase_context *kctx, u64 timest
 	struct kbase_gpu_metrics_ctx *gpu_metrics_ctx = kctx->gpu_metrics_ctx;
 
 	gpu_metrics_ctx->active_cnt++;
-	if (gpu_metrics_ctx->active_cnt == 1)
-		gpu_metrics_ctx->last_active_start_time = timestamp_ns;
-
-	if (!gpu_metrics_ctx_flag(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP)) {
-		gpu_metrics_ctx->first_active_start_time = timestamp_ns;
-		gpu_metrics_ctx_flag_set(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP);
-	}
-
-	if (!gpu_metrics_ctx_flag(gpu_metrics_ctx, INSIDE_ACTIVE_LIST)) {
+	if (gpu_metrics_ctx->active_cnt == 1) {
+		gpu_metrics_ctx->active_start_time = timestamp_ns;
 		list_move_tail(&gpu_metrics_ctx->link, &kctx->kbdev->gpu_metrics.active_list);
-		gpu_metrics_ctx_flag_set(gpu_metrics_ctx, INSIDE_ACTIVE_LIST);
 	}
 }
 
@@ -201,22 +143,22 @@ void kbase_gpu_metrics_ctx_end_activity(struct kbase_context *kctx, u64 timestam
 	if (WARN_ON_ONCE(!gpu_metrics_ctx->active_cnt))
 		return;
 
+	/* Do not emit tracepoint if GPU activity still continues. */
 	if (--gpu_metrics_ctx->active_cnt)
 		return;
 
-	if (likely(timestamp_ns > gpu_metrics_ctx->last_active_start_time)) {
-		gpu_metrics_ctx->last_active_end_time = timestamp_ns;
-		gpu_metrics_ctx->total_active +=
-			timestamp_ns - gpu_metrics_ctx->last_active_start_time;
+	if (likely(timestamp_ns > gpu_metrics_ctx->active_start_time)) {
+		emit_tracepoint_for_active_gpu_metrics_ctx(kctx->kbdev, gpu_metrics_ctx,
+							   timestamp_ns);
 		return;
 	}
 
 	/* Due to conversion from system timestamp to CPU timestamp (which involves rounding)
 	 * the value for start and end timestamp could come as same on CSF GPUs.
 	 */
-	if (timestamp_ns == gpu_metrics_ctx->last_active_start_time) {
-		gpu_metrics_ctx->last_active_end_time = timestamp_ns + 1;
-		gpu_metrics_ctx->total_active += 1;
+	if (timestamp_ns == gpu_metrics_ctx->active_start_time) {
+		emit_tracepoint_for_active_gpu_metrics_ctx(kctx->kbdev, gpu_metrics_ctx,
+							   timestamp_ns + 1);
 		return;
 	}
 
@@ -224,12 +166,9 @@ void kbase_gpu_metrics_ctx_end_activity(struct kbase_context *kctx, u64 timestam
 	 * visible to the Kbase even though the system timestamp value sampled by FW was less than
 	 * the system timestamp value sampled by Kbase just before the draining of trace buffer.
 	 */
-	if (gpu_metrics_ctx->last_active_start_time == gpu_metrics_ctx->first_active_start_time &&
-	    gpu_metrics_ctx->prev_wp_active_end_time == gpu_metrics_ctx->first_active_start_time) {
-		WARN_ON_ONCE(gpu_metrics_ctx->total_active);
-		gpu_metrics_ctx->last_active_end_time =
-			gpu_metrics_ctx->prev_wp_active_end_time + 1;
-		gpu_metrics_ctx->total_active = 1;
+	if (gpu_metrics_ctx->active_end_time == gpu_metrics_ctx->active_start_time) {
+		emit_tracepoint_for_active_gpu_metrics_ctx(kctx->kbdev, gpu_metrics_ctx,
+							   gpu_metrics_ctx->active_end_time + 1);
 		return;
 	}
 
@@ -242,15 +181,12 @@ void kbase_gpu_metrics_emit_tracepoint(struct kbase_device *kbdev, u64 ts)
 	struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, *tmp;
 
 	list_for_each_entry_safe(gpu_metrics_ctx, tmp, &gpu_metrics->active_list, link) {
-		if (!gpu_metrics_ctx_flag(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP)) {
-			WARN_ON(!gpu_metrics_ctx_flag(gpu_metrics_ctx, INSIDE_ACTIVE_LIST));
-			WARN_ON(gpu_metrics_ctx->active_cnt);
-			list_move_tail(&gpu_metrics_ctx->link, &gpu_metrics->inactive_list);
-			gpu_metrics_ctx_flag_clear(gpu_metrics_ctx, INSIDE_ACTIVE_LIST);
+		if (gpu_metrics_ctx->active_cnt) {
+			emit_tracepoint_for_active_gpu_metrics_ctx(kbdev, gpu_metrics_ctx, ts);
 			continue;
 		}
 
-		emit_tracepoint_for_active_gpu_metrics_ctx(kbdev, gpu_metrics_ctx, ts);
+		list_move_tail(&gpu_metrics_ctx->link, &gpu_metrics->inactive_list);
 	}
 }
 
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.h b/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.h
index c445dff32dc9..658cf1c164c5 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_gpu_metrics.h
@@ -106,7 +106,7 @@ void kbase_gpu_metrics_ctx_init(struct kbase_device *kbdev,
  * @kctx:         Pointer to the Kbase context contributing data to the GPU metrics context.
  * @timestamp_ns: CPU timestamp at which the GPU activity started.
  *
- * The provided timestamp would be later used as the "start_time_ns" for the
+ * The provided timestamp is used as the "start_time_ns" for the
  * power/gpu_work_period tracepoint if this is the first GPU activity for the GPU
  * metrics context in the current work period.
  *
@@ -122,9 +122,9 @@ void kbase_gpu_metrics_ctx_start_activity(struct kbase_context *kctx, u64 timest
  * @kctx:         Pointer to the Kbase context contributing data to the GPU metrics context.
  * @timestamp_ns: CPU timestamp at which the GPU activity ended.
  *
- * The provided timestamp would be later used as the "end_time_ns" for the
- * power/gpu_work_period tracepoint if this is the last GPU activity for the GPU
- * metrics context in the current work period.
+ * The provided timestamp is used as the "end_time_ns" for the power/gpu_work_period
+ * tracepoint if this is the last GPU activity for the GPU metrics context
+ * in the current work period.
  *
  * Note: The caller must appropriately serialize the call to this function with the
  *       call to other GPU metrics functions declared in this file.
@@ -138,8 +138,8 @@ void kbase_gpu_metrics_ctx_end_activity(struct kbase_context *kctx, u64 timestam
  * @kbdev: Pointer to the GPU device.
  * @ts:    Timestamp at which the tracepoint is being emitted.
  *
- * This function would loop through all the active GPU metrics contexts and emit a
- * power/gpu_work_period tracepoint for them.
+ * This function would loop through all GPU metrics contexts in the active list and
+ * emit a power/gpu_work_period tracepoint if the GPU work in the context still active.
  * The GPU metrics context that is found to be inactive since the last tracepoint
  * was emitted would be moved to the inactive list.
  * The current work period would be considered as over and a new work period would
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c
index 10b3b506e84e..9719580837cc 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -357,6 +357,7 @@ enum l2_config_override_result {
 /**
  * kbase_read_l2_config_from_dt - Read L2 configuration
  * @kbdev: The kbase device for which to get the L2 configuration.
+ * @regdump: Pointer to struct kbase_gpuprops_regdump structure.
  *
  * Check for L2 configuration overrides in module parameters and device tree.
  * Override values in module parameters take priority over override values in
@@ -366,9 +367,16 @@ enum l2_config_override_result {
  *         overridden, L2_CONFIG_OVERRIDE_NONE if no overrides are provided.
  *         L2_CONFIG_OVERRIDE_FAIL otherwise.
  */
-static enum l2_config_override_result kbase_read_l2_config_from_dt(struct kbase_device *const kbdev)
+static enum l2_config_override_result
+kbase_read_l2_config_from_dt(struct kbase_device *const kbdev,
+			     struct kbasep_gpuprops_regdump *regdump)
 {
 	struct device_node *np = kbdev->dev->of_node;
+	/*
+	 * CACHE_SIZE bit fields in L2_FEATURES register, default value after the reset/powerup
+	 * holds the maximum size of the cache that can be programmed in L2_CONFIG register.
+	 */
+	const u8 l2_size_max = L2_FEATURES_CACHE_SIZE_GET(regdump->l2_features);
 
 	if (!np)
 		return L2_CONFIG_OVERRIDE_NONE;
@@ -378,8 +386,12 @@ static enum l2_config_override_result kbase_read_l2_config_from_dt(struct kbase_
 	else if (of_property_read_u8(np, "l2-size", &kbdev->l2_size_override))
 		kbdev->l2_size_override = 0;
 
-	if (kbdev->l2_size_override != 0 && kbdev->l2_size_override < OVERRIDE_L2_SIZE_MIN_LOG2)
+	if (kbdev->l2_size_override != 0 && (kbdev->l2_size_override < OVERRIDE_L2_SIZE_MIN_LOG2 ||
+					     kbdev->l2_size_override > l2_size_max)) {
+		dev_err(kbdev->dev, "Invalid Cache Size in %s",
+			override_l2_size ? "Module parameters" : "Device tree node");
 		return L2_CONFIG_OVERRIDE_FAIL;
+	}
 
 	/* Check overriding value is supported, if not will result in
 	 * undefined behavior.
@@ -425,11 +437,11 @@ int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev)
 {
 	int err = 0;
 
-	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) {
+	if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_L2_CONFIG)) {
 		struct kbasep_gpuprops_regdump *regdump = &PRIV_DATA_REGDUMP(kbdev);
 
 		/* Check for L2 cache size & hash overrides */
-		switch (kbase_read_l2_config_from_dt(kbdev)) {
+		switch (kbase_read_l2_config_from_dt(kbdev, regdump)) {
 		case L2_CONFIG_OVERRIDE_FAIL:
 			err = -EIO;
 			goto exit;
@@ -687,7 +699,7 @@ static void kbase_populate_user_data(struct kbase_device *kbdev, struct gpu_prop
 		data->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT;
 	}
 
-	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_THREAD_GROUP_SPLIT))
+	if (!kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_THREAD_GROUP_SPLIT))
 		data->thread_props.max_thread_group_split = 0;
 
 	/* Raw Register Values */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gwt.c b/drivers/gpu/arm/bifrost/mali_kbase_gwt.c
index c92d54c9e663..99558b82ba7b 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_gwt.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_gwt.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -30,9 +30,10 @@
 #include <linux/module.h>
 
 static inline void kbase_gpu_gwt_setup_page_permission(struct kbase_context *kctx,
-						       unsigned long flag, struct rb_node *node)
+						       unsigned long flag,
+						       struct kbase_reg_zone *zone)
 {
-	struct rb_node *rbnode = node;
+	struct rb_node *rbnode = rb_first(&zone->reg_rbtree);
 
 	while (rbnode) {
 		struct kbase_va_region *reg;
@@ -55,17 +56,15 @@ static inline void kbase_gpu_gwt_setup_page_permission(struct kbase_context *kct
 
 static void kbase_gpu_gwt_setup_pages(struct kbase_context *kctx, unsigned long flag)
 {
-	kbase_gpu_gwt_setup_page_permission(kctx, flag,
-					    rb_first(&kctx->reg_zone[SAME_VA_ZONE].reg_rbtree));
-	kbase_gpu_gwt_setup_page_permission(kctx, flag,
-					    rb_first(&kctx->reg_zone[CUSTOM_VA_ZONE].reg_rbtree));
+	kbase_gpu_gwt_setup_page_permission(kctx, flag, &kctx->reg_zone[SAME_VA_ZONE]);
+	kbase_gpu_gwt_setup_page_permission(kctx, flag, &kctx->reg_zone[CUSTOM_VA_ZONE]);
 }
 
 int kbase_gpu_gwt_start(struct kbase_context *kctx)
 {
-	kbase_gpu_vm_lock(kctx);
+	kbase_gpu_vm_lock_with_pmode_sync(kctx);
 	if (kctx->gwt_enabled) {
-		kbase_gpu_vm_unlock(kctx);
+		kbase_gpu_vm_unlock_with_pmode_sync(kctx);
 		return -EBUSY;
 	}
 
@@ -91,7 +90,7 @@ int kbase_gpu_gwt_start(struct kbase_context *kctx)
 
 	kbase_gpu_gwt_setup_pages(kctx, ~KBASE_REG_GPU_WR);
 
-	kbase_gpu_vm_unlock(kctx);
+	kbase_gpu_vm_unlock_with_pmode_sync(kctx);
 	return 0;
 }
 
@@ -179,6 +178,10 @@ int kbase_gpu_gwt_dump(struct kbase_context *kctx, union kbase_ioctl_cinstr_gwt_
 	__user void *user_addr = (__user void *)(uintptr_t)gwt_dump->in.addr_buffer;
 	__user void *user_sizes = (__user void *)(uintptr_t)gwt_dump->in.size_buffer;
 
+	/* We don't have any valid user space buffer to copy the write modified addresses. */
+	if (!gwt_dump->in.len || !gwt_dump->in.addr_buffer || !gwt_dump->in.size_buffer)
+		return -EINVAL;
+
 	kbase_gpu_vm_lock(kctx);
 
 	if (!kctx->gwt_enabled) {
@@ -187,14 +190,6 @@ int kbase_gpu_gwt_dump(struct kbase_context *kctx, union kbase_ioctl_cinstr_gwt_
 		return -EPERM;
 	}
 
-	if (!gwt_dump->in.len || !gwt_dump->in.addr_buffer || !gwt_dump->in.size_buffer) {
-		kbase_gpu_vm_unlock(kctx);
-		/* We don't have any valid user space buffer to copy the
-		 * write modified addresses.
-		 */
-		return -EINVAL;
-	}
-
 	if (list_empty(&kctx->gwt_snapshot_list) && !list_empty(&kctx->gwt_current_list)) {
 		list_replace_init(&kctx->gwt_current_list, &kctx->gwt_snapshot_list);
 
@@ -228,14 +223,14 @@ int kbase_gpu_gwt_dump(struct kbase_context *kctx, union kbase_ioctl_cinstr_gwt_
 
 		if (count) {
 			err = copy_to_user((user_addr + (ubuf_count * sizeof(u64))),
-					   (void *)addr_buffer, count * sizeof(u64));
+					   (void *)addr_buffer, size_mul(count, sizeof(u64)));
 			if (err) {
 				dev_err(kctx->kbdev->dev, "Copy to user failure\n");
 				kbase_gpu_vm_unlock(kctx);
 				return err;
 			}
 			err = copy_to_user((user_sizes + (ubuf_count * sizeof(u64))),
-					   (void *)num_page_buffer, count * sizeof(u64));
+					   (void *)num_page_buffer, size_mul(count, sizeof(u64)));
 			if (err) {
 				dev_err(kctx->kbdev->dev, "Copy to user failure\n");
 				kbase_gpu_vm_unlock(kctx);
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hw.c b/drivers/gpu/arm/bifrost/mali_kbase_hw.c
index 7d4200e96fd3..e04aad2422c7 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hw.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hw.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -23,8 +23,8 @@
  * Run-time work-arounds helpers
  */
 
-#include <mali_base_hwconfig_features.h>
-#include <mali_base_hwconfig_issues.h>
+#include <mali_kbase_hwconfig_features.h>
+#include <mali_kbase_hwconfig_issues.h>
 #include <hw_access/mali_kbase_hw_access_regmap.h>
 #include "mali_kbase.h"
 #include "mali_kbase_hw.h"
@@ -92,7 +92,7 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev)
 		break;
 	}
 
-	for (; *features != BASE_HW_FEATURE_END; features++)
+	for (; *features != KBASE_HW_FEATURE_END; features++)
 		set_bit(*features, &kbdev->hw_features_mask[0]);
 
 #if defined(CONFIG_MALI_VECTOR_DUMP)
@@ -103,8 +103,8 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev)
 	 * in the implementation of flush reduction optimization due to
 	 * unclear or ambiguous ARCH spec.
 	 */
-	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CLEAN_ONLY_SAFE))
-		clear_bit(BASE_HW_FEATURE_FLUSH_REDUCTION, &kbdev->hw_features_mask[0]);
+	if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_CLEAN_ONLY_SAFE))
+		clear_bit(KBASE_HW_FEATURE_FLUSH_REDUCTION, &kbdev->hw_features_mask[0]);
 #endif
 }
 
@@ -113,7 +113,7 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev)
  * @kbdev: Device pointer
  *
  * Return: pointer to an array of hardware issues, terminated by
- * BASE_HW_ISSUE_END.
+ * KBASE_HW_ISSUE_END.
  *
  * In debugging versions of the driver, unknown versions of a known GPU will
  * be treated as the most recent known version not later than the actual
@@ -225,6 +225,8 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(struct kbase_dev
 
 		{ GPU_ID_PRODUCT_TVAX,
 		  { { GPU_ID_VERSION_MAKE(0, 0, 0), base_hw_issues_tVAx_r0p0 },
+		    { GPU_ID_VERSION_MAKE(0, 0, 5), base_hw_issues_tVAx_r0p0 },
+		    { GPU_ID_VERSION_MAKE(0, 1, 0), base_hw_issues_tVAx_r0p1 },
 		    { U32_MAX, NULL } } },
 
 		{ GPU_ID_PRODUCT_TTUX,
@@ -334,6 +336,8 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(struct kbase_dev
 			gpu_id->version_id = fallback_version;
 		}
 	}
+
+
 	return issues;
 }
 
@@ -420,7 +424,7 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
 		 gpu_id->product_major, gpu_id->arch_major, gpu_id->arch_minor, gpu_id->arch_rev,
 		 gpu_id->version_major, gpu_id->version_minor, gpu_id->version_status);
 
-	for (; *issues != BASE_HW_ISSUE_END; issues++)
+	for (; *issues != KBASE_HW_ISSUE_END; issues++)
 		set_bit(*issues, &kbdev->hw_issues_mask[0]);
 
 	return 0;
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hw.h b/drivers/gpu/arm/bifrost/mali_kbase_hw.h
index 44e1ee4a4a50..f14e5fb6d9ab 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hw.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hw.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -23,8 +23,8 @@
  * DOC: Run-time work-arounds helpers
  */
 
-#ifndef _KBASE_HW_H_
-#define _KBASE_HW_H_
+#ifndef _MALI_KBASE_HW_H_
+#define _MALI_KBASE_HW_H_
 
 #include "mali_kbase_defs.h"
 
@@ -47,7 +47,7 @@
  * @kbdev: Device pointer
  */
 #define kbase_hw_has_l2_slice_hash_feature(kbdev) \
-	test_bit(BASE_HW_FEATURE_L2_SLICE_HASH, &(kbdev)->hw_features_mask[0])
+	test_bit(KBASE_HW_FEATURE_L2_SLICE_HASH, &(kbdev)->hw_features_mask[0])
 
 /**
  * kbase_hw_set_issues_mask - Set the hardware issues mask based on the GPU ID
@@ -73,4 +73,4 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev);
  */
 void kbase_hw_set_features_mask(struct kbase_device *kbdev);
 
-#endif /* _KBASE_HW_H_ */
+#endif /* _MALI_KBASE_HW_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_pm.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_pm.h
index 7a0ea49099ba..982547d16022 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_pm.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_pm.h
@@ -129,14 +129,14 @@ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask)
  * kbase_pm_set_debug_core_mask - Set the debug core mask.
  *
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
- * @new_core_mask_js0: The core mask to use for job slot 0
- * @new_core_mask_js1: The core mask to use for job slot 1
- * @new_core_mask_js2: The core mask to use for job slot 2
+ * @new_core_mask: The core mask to use, as an array where each element refers
+ *                 to a job slot.
+ * @new_core_mask_size: Number of elements in the core mask array.
  *
  * This determines which cores the power manager is allowed to use.
  */
-void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask_js0,
-				  u64 new_core_mask_js1, u64 new_core_mask_js2);
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 *new_core_mask,
+				  size_t new_core_mask_size);
 #endif /* MALI_USE_CSF */
 
 /**
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_time.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_time.h
index 0630dfa6db3a..222ff2001e56 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_time.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_time.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2014, 2018-2021, 2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2018-2021, 2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -27,7 +27,8 @@
  *
  * @multiplier:		Numerator of the converter's fraction.
  * @divisor:		Denominator of the converter's fraction.
- * @offset:		Converter's offset term.
+ * @gpu_timestamp_offset: Cached CPU to GPU TS offset computed whenever whole system
+ *                        enters into standby mode where CPU Monotonic time is suspend.
  * @device_scaled_timeouts: Timeouts in milliseconds that were scaled to be
  *                          consistent with the minimum MCU frequency. This
  *                          array caches the results of all of the conversions
@@ -55,7 +56,7 @@ struct kbase_backend_time {
 #if MALI_USE_CSF
 	u64 multiplier;
 	u64 divisor;
-	s64 offset;
+	s64 gpu_timestamp_offset;
 #endif
 	unsigned int device_scaled_timeouts[KBASE_TIMEOUT_SELECTOR_COUNT];
 };
@@ -70,6 +71,40 @@ struct kbase_backend_time {
  * Return: The CPU timestamp.
  */
 u64 __maybe_unused kbase_backend_time_convert_gpu_to_cpu(struct kbase_device *kbdev, u64 gpu_ts);
+
+/**
+ * kbase_backend_update_gpu_timestamp_offset() - Updates GPU timestamp offset register with the
+ *                                               cached value.
+ *
+ * @kbdev:	Kbase device pointer
+ *
+ * Compute the new cached value for GPU timestamp offset if the previously cached value has been
+ * invalidated and update the GPU timestamp offset register with the cached value.
+ */
+void kbase_backend_update_gpu_timestamp_offset(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_invalidate_gpu_timestamp_offset() - Invalidate cached GPU timestamp offset value
+ *
+ * @kbdev:	Kbase device pointer
+ *
+ * This function invalidates cached GPU timestamp offset value whenever system suspend
+ * is about to happen where CPU TS counter will be stopped.
+ */
+void kbase_backend_invalidate_gpu_timestamp_offset(struct kbase_device *kbdev);
+
+#if MALI_UNIT_TEST
+/**
+ * kbase_backend_read_gpu_timestamp_offset_reg() - Read GPU TIMESTAMP OFFSET Register
+ *
+ * @kbdev:	Kbase device pointer
+ *
+ * This function read GPU TIMESTAMP OFFSET Register with proper register access
+ *
+ * Return: GPU TIMESTAMP OFFSET Register value, as unsigned 64 bit value
+ */
+u64 kbase_backend_read_gpu_timestamp_offset_reg(struct kbase_device *kbdev);
+#endif
 #endif
 
 /**
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwconfig_features.h b/drivers/gpu/arm/bifrost/mali_kbase_hwconfig_features.h
new file mode 100644
index 000000000000..265cb9585cc6
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwconfig_features.h
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_HWCONFIG_FEATURES_H_
+#define _KBASE_HWCONFIG_FEATURES_H_
+
+#include <linux/version_compat_defs.h>
+
+enum base_hw_feature {
+	KBASE_HW_FEATURE_FLUSH_REDUCTION,
+	KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	KBASE_HW_FEATURE_TLS_HASHING,
+	KBASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	KBASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+	KBASE_HW_FEATURE_IDVS_GROUP_SIZE,
+	KBASE_HW_FEATURE_L2_CONFIG,
+	KBASE_HW_FEATURE_L2_SLICE_HASH,
+	KBASE_HW_FEATURE_GPU_SLEEP,
+	KBASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
+	KBASE_HW_FEATURE_CORE_FEATURES,
+	KBASE_HW_FEATURE_PBHA_HWU,
+	KBASE_HW_FEATURE_LARGE_PAGE_ALLOC,
+	KBASE_HW_FEATURE_THREAD_TLS_ALLOC,
+	KBASE_HW_FEATURE_END
+};
+
+__maybe_unused static const enum base_hw_feature base_hw_features_generic[] = {
+	KBASE_HW_FEATURE_END
+};
+
+__maybe_unused static const enum base_hw_feature base_hw_features_tMIx[] = {
+	KBASE_HW_FEATURE_THREAD_GROUP_SPLIT, KBASE_HW_FEATURE_FLUSH_REDUCTION, KBASE_HW_FEATURE_END
+};
+
+__maybe_unused static const enum base_hw_feature base_hw_features_tHEx[] = {
+	KBASE_HW_FEATURE_THREAD_GROUP_SPLIT, KBASE_HW_FEATURE_FLUSH_REDUCTION,
+	KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE, KBASE_HW_FEATURE_END
+};
+
+__maybe_unused static const enum base_hw_feature base_hw_features_tSIx[] = {
+	KBASE_HW_FEATURE_THREAD_GROUP_SPLIT, KBASE_HW_FEATURE_FLUSH_REDUCTION,
+	KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE, KBASE_HW_FEATURE_END
+};
+
+__maybe_unused static const enum base_hw_feature base_hw_features_tDVx[] = {
+	KBASE_HW_FEATURE_THREAD_GROUP_SPLIT, KBASE_HW_FEATURE_FLUSH_REDUCTION,
+	KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE, KBASE_HW_FEATURE_END
+};
+
+__maybe_unused static const enum base_hw_feature base_hw_features_tNOx[] = {
+	KBASE_HW_FEATURE_THREAD_GROUP_SPLIT,   KBASE_HW_FEATURE_FLUSH_REDUCTION,
+	KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE, KBASE_HW_FEATURE_TLS_HASHING,
+	KBASE_HW_FEATURE_IDVS_GROUP_SIZE,      KBASE_HW_FEATURE_END
+};
+
+__maybe_unused static const enum base_hw_feature base_hw_features_tGOx[] = {
+	KBASE_HW_FEATURE_THREAD_GROUP_SPLIT,   KBASE_HW_FEATURE_FLUSH_REDUCTION,
+	KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE, KBASE_HW_FEATURE_TLS_HASHING,
+	KBASE_HW_FEATURE_IDVS_GROUP_SIZE,      KBASE_HW_FEATURE_CORE_FEATURES,
+	KBASE_HW_FEATURE_THREAD_TLS_ALLOC,     KBASE_HW_FEATURE_END
+};
+
+__maybe_unused static const enum base_hw_feature base_hw_features_tTRx[] = {
+	KBASE_HW_FEATURE_FLUSH_REDUCTION,	 KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	KBASE_HW_FEATURE_IDVS_GROUP_SIZE,	 KBASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+	KBASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, KBASE_HW_FEATURE_END
+};
+
+__maybe_unused static const enum base_hw_feature base_hw_features_tNAx[] = {
+	KBASE_HW_FEATURE_FLUSH_REDUCTION,	 KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	KBASE_HW_FEATURE_IDVS_GROUP_SIZE,	 KBASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+	KBASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, KBASE_HW_FEATURE_END
+};
+
+__maybe_unused static const enum base_hw_feature base_hw_features_tBEx[] = {
+	KBASE_HW_FEATURE_FLUSH_REDUCTION,
+	KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	KBASE_HW_FEATURE_IDVS_GROUP_SIZE,
+	KBASE_HW_FEATURE_L2_CONFIG,
+	KBASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+	KBASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
+	KBASE_HW_FEATURE_END
+};
+
+__maybe_unused static const enum base_hw_feature base_hw_features_tBAx[] = {
+	KBASE_HW_FEATURE_FLUSH_REDUCTION,
+	KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	KBASE_HW_FEATURE_IDVS_GROUP_SIZE,
+	KBASE_HW_FEATURE_L2_CONFIG,
+	KBASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+	KBASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
+	KBASE_HW_FEATURE_END
+};
+
+__maybe_unused static const enum base_hw_feature base_hw_features_tODx[] = {
+	KBASE_HW_FEATURE_FLUSH_REDUCTION, KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	KBASE_HW_FEATURE_L2_CONFIG, KBASE_HW_FEATURE_CLEAN_ONLY_SAFE, KBASE_HW_FEATURE_END
+};
+
+__maybe_unused static const enum base_hw_feature base_hw_features_tGRx[] = {
+	KBASE_HW_FEATURE_FLUSH_REDUCTION, KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	KBASE_HW_FEATURE_L2_CONFIG,	  KBASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+	KBASE_HW_FEATURE_CORE_FEATURES,	  KBASE_HW_FEATURE_END
+};
+
+__maybe_unused static const enum base_hw_feature base_hw_features_tVAx[] = {
+	KBASE_HW_FEATURE_FLUSH_REDUCTION, KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	KBASE_HW_FEATURE_L2_CONFIG,	  KBASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+	KBASE_HW_FEATURE_CORE_FEATURES,	  KBASE_HW_FEATURE_END
+};
+
+__maybe_unused static const enum base_hw_feature base_hw_features_tTUx[] = {
+	KBASE_HW_FEATURE_FLUSH_REDUCTION, KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	KBASE_HW_FEATURE_L2_CONFIG,	  KBASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+	KBASE_HW_FEATURE_L2_SLICE_HASH,	  KBASE_HW_FEATURE_GPU_SLEEP,
+	KBASE_HW_FEATURE_CORE_FEATURES,	  KBASE_HW_FEATURE_END
+};
+
+__maybe_unused static const enum base_hw_feature base_hw_features_tTIx[] = {
+	KBASE_HW_FEATURE_FLUSH_REDUCTION,
+	KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	KBASE_HW_FEATURE_L2_CONFIG,
+	KBASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+	KBASE_HW_FEATURE_L2_SLICE_HASH,
+	KBASE_HW_FEATURE_GPU_SLEEP,
+	KBASE_HW_FEATURE_CORE_FEATURES,
+	KBASE_HW_FEATURE_PBHA_HWU,
+	KBASE_HW_FEATURE_END
+};
+
+__maybe_unused static const enum base_hw_feature base_hw_features_tKRx[] = {
+	KBASE_HW_FEATURE_FLUSH_REDUCTION,  KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	KBASE_HW_FEATURE_L2_CONFIG,	   KBASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+	KBASE_HW_FEATURE_L2_SLICE_HASH,	   KBASE_HW_FEATURE_GPU_SLEEP,
+	KBASE_HW_FEATURE_CORE_FEATURES,	   KBASE_HW_FEATURE_PBHA_HWU,
+	KBASE_HW_FEATURE_LARGE_PAGE_ALLOC, KBASE_HW_FEATURE_END
+};
+
+
+#endif /* _KBASE_HWCONFIG_FEATURES_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwconfig_issues.h b/drivers/gpu/arm/bifrost/mali_kbase_hwconfig_issues.h
new file mode 100644
index 000000000000..b1a3a41b232b
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwconfig_issues.h
@@ -0,0 +1,609 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_HWCONFIG_ISSUES_H_
+#define _KBASE_HWCONFIG_ISSUES_H_
+
+#include <linux/version_compat_defs.h>
+
+enum base_hw_issue {
+	KBASE_HW_ISSUE_5736,
+	KBASE_HW_ISSUE_9435,
+	KBASE_HW_ISSUE_10682,
+	KBASE_HW_ISSUE_11054,
+	KBASE_HW_ISSUE_T76X_3953,
+	KBASE_HW_ISSUE_TMIX_7891,
+	KBASE_HW_ISSUE_TMIX_7940,
+	KBASE_HW_ISSUE_TMIX_8042,
+	KBASE_HW_ISSUE_TMIX_8133,
+	KBASE_HW_ISSUE_TMIX_8138,
+	KBASE_HW_ISSUE_TMIX_8206,
+	KBASE_HW_ISSUE_TMIX_8343,
+	KBASE_HW_ISSUE_TMIX_8463,
+	KBASE_HW_ISSUE_TMIX_8456,
+	KBASE_HW_ISSUE_TSIX_1116,
+	KBASE_HW_ISSUE_TSIX_2033,
+	KBASE_HW_ISSUE_TMIX_8438,
+	KBASE_HW_ISSUE_TNOX_1194,
+	KBASE_HW_ISSUE_TGOX_R1_1234,
+	KBASE_HW_ISSUE_TTRX_1337,
+	KBASE_HW_ISSUE_TSIX_1792,
+	KBASE_HW_ISSUE_TTRX_2968_TTRX_3162,
+	KBASE_HW_ISSUE_TTRX_3076,
+	KBASE_HW_ISSUE_TTRX_921,
+	KBASE_HW_ISSUE_TTRX_3414,
+	KBASE_HW_ISSUE_GPU2017_1336,
+	KBASE_HW_ISSUE_TTRX_3083,
+	KBASE_HW_ISSUE_TTRX_3470,
+	KBASE_HW_ISSUE_TTRX_3464,
+	KBASE_HW_ISSUE_TTRX_3485,
+	KBASE_HW_ISSUE_GPU2019_3212,
+	KBASE_HW_ISSUE_TURSEHW_1997,
+	KBASE_HW_ISSUE_GPU2019_3878,
+	KBASE_HW_ISSUE_TURSEHW_2716,
+	KBASE_HW_ISSUE_GPU2019_3901,
+	KBASE_HW_ISSUE_GPU2021PRO_290,
+	KBASE_HW_ISSUE_TITANHW_2710,
+	KBASE_HW_ISSUE_TITANHW_2679,
+	KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_TITANHW_2922,
+	KBASE_HW_ISSUE_TITANHW_2952,
+	KBASE_HW_ISSUE_KRAKEHW_2151,
+	KBASE_HW_ISSUE_TITANHW_2938,
+	KBASE_HW_ISSUE_KRAKEHW_2269,
+	KBASE_HW_ISSUE_TURSEHW_2934,
+	KBASE_HW_ISSUE_KRAKEHW_2321,
+	KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_generic[] = { KBASE_HW_ISSUE_END };
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = {
+	KBASE_HW_ISSUE_9435,	     KBASE_HW_ISSUE_10682,	  KBASE_HW_ISSUE_11054,
+	KBASE_HW_ISSUE_T76X_3953,    KBASE_HW_ISSUE_TMIX_7891,	  KBASE_HW_ISSUE_TMIX_8042,
+	KBASE_HW_ISSUE_TMIX_8133,    KBASE_HW_ISSUE_TMIX_8138,	  KBASE_HW_ISSUE_TMIX_8206,
+	KBASE_HW_ISSUE_TMIX_8343,    KBASE_HW_ISSUE_TMIX_8463,	  KBASE_HW_ISSUE_TMIX_8456,
+	KBASE_HW_ISSUE_TMIX_8438,    KBASE_HW_ISSUE_TSIX_2033,	  KBASE_HW_ISSUE_TTRX_921,
+	KBASE_HW_ISSUE_GPU2017_1336, KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = {
+	KBASE_HW_ISSUE_9435,	     KBASE_HW_ISSUE_10682,	  KBASE_HW_ISSUE_11054,
+	KBASE_HW_ISSUE_TMIX_7891,    KBASE_HW_ISSUE_TMIX_7940,	  KBASE_HW_ISSUE_TMIX_8042,
+	KBASE_HW_ISSUE_TMIX_8133,    KBASE_HW_ISSUE_TMIX_8138,	  KBASE_HW_ISSUE_TMIX_8206,
+	KBASE_HW_ISSUE_TMIX_8343,    KBASE_HW_ISSUE_TMIX_8463,	  KBASE_HW_ISSUE_TMIX_8456,
+	KBASE_HW_ISSUE_TMIX_8438,    KBASE_HW_ISSUE_TSIX_2033,	  KBASE_HW_ISSUE_TTRX_921,
+	KBASE_HW_ISSUE_GPU2017_1336, KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tMIx_r0p1[] = {
+	KBASE_HW_ISSUE_9435,	     KBASE_HW_ISSUE_10682,	  KBASE_HW_ISSUE_11054,
+	KBASE_HW_ISSUE_TMIX_7891,    KBASE_HW_ISSUE_TMIX_7940,	  KBASE_HW_ISSUE_TMIX_8042,
+	KBASE_HW_ISSUE_TMIX_8133,    KBASE_HW_ISSUE_TMIX_8138,	  KBASE_HW_ISSUE_TMIX_8206,
+	KBASE_HW_ISSUE_TMIX_8343,    KBASE_HW_ISSUE_TMIX_8463,	  KBASE_HW_ISSUE_TMIX_8456,
+	KBASE_HW_ISSUE_TMIX_8438,    KBASE_HW_ISSUE_TSIX_2033,	  KBASE_HW_ISSUE_TTRX_921,
+	KBASE_HW_ISSUE_GPU2017_1336, KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_model_tMIx[] = {
+	KBASE_HW_ISSUE_5736,	       KBASE_HW_ISSUE_9435,	    KBASE_HW_ISSUE_TMIX_7891,
+	KBASE_HW_ISSUE_TMIX_7940,      KBASE_HW_ISSUE_TMIX_8042,    KBASE_HW_ISSUE_TMIX_8133,
+	KBASE_HW_ISSUE_TMIX_8138,      KBASE_HW_ISSUE_TMIX_8206,    KBASE_HW_ISSUE_TMIX_8343,
+	KBASE_HW_ISSUE_TMIX_8456,      KBASE_HW_ISSUE_TSIX_2033,    KBASE_HW_ISSUE_TITANHW_2710,
+	KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = {
+	KBASE_HW_ISSUE_9435,	     KBASE_HW_ISSUE_10682,	    KBASE_HW_ISSUE_11054,
+	KBASE_HW_ISSUE_TMIX_7891,    KBASE_HW_ISSUE_TMIX_8042,	    KBASE_HW_ISSUE_TMIX_8133,
+	KBASE_HW_ISSUE_TSIX_2033,    KBASE_HW_ISSUE_TTRX_921,	    KBASE_HW_ISSUE_GPU2017_1336,
+	KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938,
+	KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tHEx_r0p1[] = {
+	KBASE_HW_ISSUE_9435,	     KBASE_HW_ISSUE_10682,	    KBASE_HW_ISSUE_11054,
+	KBASE_HW_ISSUE_TMIX_7891,    KBASE_HW_ISSUE_TMIX_8042,	    KBASE_HW_ISSUE_TMIX_8133,
+	KBASE_HW_ISSUE_TSIX_2033,    KBASE_HW_ISSUE_TTRX_921,	    KBASE_HW_ISSUE_GPU2017_1336,
+	KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938,
+	KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tHEx_r0p2[] = {
+	KBASE_HW_ISSUE_9435,	     KBASE_HW_ISSUE_10682,	    KBASE_HW_ISSUE_11054,
+	KBASE_HW_ISSUE_TMIX_7891,    KBASE_HW_ISSUE_TMIX_8042,	    KBASE_HW_ISSUE_TMIX_8133,
+	KBASE_HW_ISSUE_TSIX_2033,    KBASE_HW_ISSUE_TTRX_921,	    KBASE_HW_ISSUE_GPU2017_1336,
+	KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938,
+	KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tHEx_r0p3[] = {
+	KBASE_HW_ISSUE_9435,	       KBASE_HW_ISSUE_10682,	    KBASE_HW_ISSUE_TMIX_7891,
+	KBASE_HW_ISSUE_TMIX_8042,      KBASE_HW_ISSUE_TMIX_8133,    KBASE_HW_ISSUE_TSIX_2033,
+	KBASE_HW_ISSUE_TTRX_921,       KBASE_HW_ISSUE_GPU2017_1336, KBASE_HW_ISSUE_TITANHW_2710,
+	KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_model_tHEx[] = {
+	KBASE_HW_ISSUE_5736,	     KBASE_HW_ISSUE_9435,	    KBASE_HW_ISSUE_TMIX_7891,
+	KBASE_HW_ISSUE_TMIX_8042,    KBASE_HW_ISSUE_TMIX_8133,	    KBASE_HW_ISSUE_TSIX_2033,
+	KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938,
+	KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = {
+	KBASE_HW_ISSUE_9435,	     KBASE_HW_ISSUE_11054,	    KBASE_HW_ISSUE_TMIX_8133,
+	KBASE_HW_ISSUE_TSIX_1116,    KBASE_HW_ISSUE_TSIX_2033,	    KBASE_HW_ISSUE_TSIX_1792,
+	KBASE_HW_ISSUE_TTRX_921,     KBASE_HW_ISSUE_GPU2017_1336,   KBASE_HW_ISSUE_TTRX_3464,
+	KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938,
+	KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tSIx_r0p1[] = {
+	KBASE_HW_ISSUE_9435,	     KBASE_HW_ISSUE_11054,	    KBASE_HW_ISSUE_TMIX_8133,
+	KBASE_HW_ISSUE_TSIX_1116,    KBASE_HW_ISSUE_TSIX_2033,	    KBASE_HW_ISSUE_TSIX_1792,
+	KBASE_HW_ISSUE_TTRX_921,     KBASE_HW_ISSUE_GPU2017_1336,   KBASE_HW_ISSUE_TTRX_3464,
+	KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938,
+	KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tSIx_r1p0[] = {
+	KBASE_HW_ISSUE_9435,	       KBASE_HW_ISSUE_11054,	    KBASE_HW_ISSUE_TMIX_8133,
+	KBASE_HW_ISSUE_TSIX_1116,      KBASE_HW_ISSUE_TSIX_2033,    KBASE_HW_ISSUE_TTRX_921,
+	KBASE_HW_ISSUE_GPU2017_1336,   KBASE_HW_ISSUE_TTRX_3464,    KBASE_HW_ISSUE_TITANHW_2710,
+	KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tSIx_r1p1[] = {
+	KBASE_HW_ISSUE_9435,	     KBASE_HW_ISSUE_TMIX_8133,	  KBASE_HW_ISSUE_TSIX_1116,
+	KBASE_HW_ISSUE_TSIX_2033,    KBASE_HW_ISSUE_TTRX_921,	  KBASE_HW_ISSUE_GPU2017_1336,
+	KBASE_HW_ISSUE_TTRX_3464,    KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_model_tSIx[] = {
+	KBASE_HW_ISSUE_5736,	     KBASE_HW_ISSUE_9435,	    KBASE_HW_ISSUE_TMIX_8133,
+	KBASE_HW_ISSUE_TSIX_1116,    KBASE_HW_ISSUE_TSIX_2033,	    KBASE_HW_ISSUE_TTRX_3464,
+	KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938,
+	KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tDVx_r0p0[] = {
+	KBASE_HW_ISSUE_9435,	     KBASE_HW_ISSUE_TMIX_8133,	  KBASE_HW_ISSUE_TSIX_1116,
+	KBASE_HW_ISSUE_TSIX_2033,    KBASE_HW_ISSUE_TTRX_921,	  KBASE_HW_ISSUE_GPU2017_1336,
+	KBASE_HW_ISSUE_TTRX_3464,    KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_model_tDVx[] = {
+	KBASE_HW_ISSUE_5736,	     KBASE_HW_ISSUE_9435,	    KBASE_HW_ISSUE_TMIX_8133,
+	KBASE_HW_ISSUE_TSIX_1116,    KBASE_HW_ISSUE_TSIX_2033,	    KBASE_HW_ISSUE_TTRX_3464,
+	KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938,
+	KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tNOx_r0p0[] = {
+	KBASE_HW_ISSUE_9435,	     KBASE_HW_ISSUE_TMIX_8133,
+	KBASE_HW_ISSUE_TSIX_1116,    KBASE_HW_ISSUE_TSIX_2033,
+	KBASE_HW_ISSUE_TNOX_1194,    KBASE_HW_ISSUE_TTRX_921,
+	KBASE_HW_ISSUE_GPU2017_1336, KBASE_HW_ISSUE_TTRX_3464,
+	KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_KRAKEHW_2321,
+	KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_model_tNOx[] = {
+	KBASE_HW_ISSUE_5736,	     KBASE_HW_ISSUE_9435,
+	KBASE_HW_ISSUE_TMIX_8133,    KBASE_HW_ISSUE_TSIX_1116,
+	KBASE_HW_ISSUE_TSIX_2033,    KBASE_HW_ISSUE_TTRX_3464,
+	KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_KRAKEHW_2321,
+	KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tGOx_r0p0[] = {
+	KBASE_HW_ISSUE_9435,	     KBASE_HW_ISSUE_TMIX_8133,
+	KBASE_HW_ISSUE_TSIX_1116,    KBASE_HW_ISSUE_TSIX_2033,
+	KBASE_HW_ISSUE_TNOX_1194,    KBASE_HW_ISSUE_TTRX_921,
+	KBASE_HW_ISSUE_GPU2017_1336, KBASE_HW_ISSUE_TTRX_3464,
+	KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_KRAKEHW_2321,
+	KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tGOx_r1p0[] = {
+	KBASE_HW_ISSUE_9435,	     KBASE_HW_ISSUE_TMIX_8133,
+	KBASE_HW_ISSUE_TSIX_1116,    KBASE_HW_ISSUE_TSIX_2033,
+	KBASE_HW_ISSUE_TGOX_R1_1234, KBASE_HW_ISSUE_TTRX_921,
+	KBASE_HW_ISSUE_GPU2017_1336, KBASE_HW_ISSUE_TTRX_3464,
+	KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_KRAKEHW_2321,
+	KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_model_tGOx[] = {
+	KBASE_HW_ISSUE_5736,	     KBASE_HW_ISSUE_9435,
+	KBASE_HW_ISSUE_TMIX_8133,    KBASE_HW_ISSUE_TSIX_1116,
+	KBASE_HW_ISSUE_TSIX_2033,    KBASE_HW_ISSUE_TTRX_3464,
+	KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_KRAKEHW_2321,
+	KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = {
+	KBASE_HW_ISSUE_9435,	     KBASE_HW_ISSUE_TSIX_2033,
+	KBASE_HW_ISSUE_TTRX_1337,    KBASE_HW_ISSUE_TTRX_2968_TTRX_3162,
+	KBASE_HW_ISSUE_TTRX_3076,    KBASE_HW_ISSUE_TTRX_921,
+	KBASE_HW_ISSUE_TTRX_3414,    KBASE_HW_ISSUE_GPU2017_1336,
+	KBASE_HW_ISSUE_TTRX_3083,    KBASE_HW_ISSUE_TTRX_3470,
+	KBASE_HW_ISSUE_TTRX_3464,    KBASE_HW_ISSUE_TTRX_3485,
+	KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tTRx_r0p1[] = {
+	KBASE_HW_ISSUE_9435,	     KBASE_HW_ISSUE_TSIX_2033,
+	KBASE_HW_ISSUE_TTRX_1337,    KBASE_HW_ISSUE_TTRX_2968_TTRX_3162,
+	KBASE_HW_ISSUE_TTRX_3076,    KBASE_HW_ISSUE_TTRX_921,
+	KBASE_HW_ISSUE_TTRX_3414,    KBASE_HW_ISSUE_GPU2017_1336,
+	KBASE_HW_ISSUE_TTRX_3083,    KBASE_HW_ISSUE_TTRX_3470,
+	KBASE_HW_ISSUE_TTRX_3464,    KBASE_HW_ISSUE_TTRX_3485,
+	KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tTRx_r0p2[] = {
+	KBASE_HW_ISSUE_9435,
+	KBASE_HW_ISSUE_TSIX_2033,
+	KBASE_HW_ISSUE_TTRX_1337,
+	KBASE_HW_ISSUE_TTRX_2968_TTRX_3162,
+	KBASE_HW_ISSUE_TTRX_3076,
+	KBASE_HW_ISSUE_TTRX_921,
+	KBASE_HW_ISSUE_TTRX_3414,
+	KBASE_HW_ISSUE_GPU2017_1336,
+	KBASE_HW_ISSUE_TTRX_3083,
+	KBASE_HW_ISSUE_TTRX_3470,
+	KBASE_HW_ISSUE_TTRX_3464,
+	KBASE_HW_ISSUE_TITANHW_2710,
+	KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_TITANHW_2938,
+	KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_model_tTRx[] = {
+	KBASE_HW_ISSUE_5736,	       KBASE_HW_ISSUE_9435,	    KBASE_HW_ISSUE_TSIX_2033,
+	KBASE_HW_ISSUE_TTRX_1337,      KBASE_HW_ISSUE_TTRX_3414,    KBASE_HW_ISSUE_TTRX_3083,
+	KBASE_HW_ISSUE_TTRX_3470,      KBASE_HW_ISSUE_TTRX_3464,    KBASE_HW_ISSUE_TITANHW_2710,
+	KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tNAx_r0p0[] = {
+	KBASE_HW_ISSUE_9435,	     KBASE_HW_ISSUE_TSIX_2033,
+	KBASE_HW_ISSUE_TTRX_1337,    KBASE_HW_ISSUE_TTRX_2968_TTRX_3162,
+	KBASE_HW_ISSUE_TTRX_3076,    KBASE_HW_ISSUE_TTRX_921,
+	KBASE_HW_ISSUE_TTRX_3414,    KBASE_HW_ISSUE_GPU2017_1336,
+	KBASE_HW_ISSUE_TTRX_3083,    KBASE_HW_ISSUE_TTRX_3470,
+	KBASE_HW_ISSUE_TTRX_3464,    KBASE_HW_ISSUE_TTRX_3485,
+	KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tNAx_r0p1[] = {
+	KBASE_HW_ISSUE_9435,
+	KBASE_HW_ISSUE_TSIX_2033,
+	KBASE_HW_ISSUE_TTRX_1337,
+	KBASE_HW_ISSUE_TTRX_2968_TTRX_3162,
+	KBASE_HW_ISSUE_TTRX_3076,
+	KBASE_HW_ISSUE_TTRX_921,
+	KBASE_HW_ISSUE_TTRX_3414,
+	KBASE_HW_ISSUE_GPU2017_1336,
+	KBASE_HW_ISSUE_TTRX_3083,
+	KBASE_HW_ISSUE_TTRX_3470,
+	KBASE_HW_ISSUE_TTRX_3464,
+	KBASE_HW_ISSUE_TITANHW_2710,
+	KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_TITANHW_2938,
+	KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_model_tNAx[] = {
+	KBASE_HW_ISSUE_5736,	       KBASE_HW_ISSUE_9435,	    KBASE_HW_ISSUE_TSIX_2033,
+	KBASE_HW_ISSUE_TTRX_1337,      KBASE_HW_ISSUE_TTRX_3414,    KBASE_HW_ISSUE_TTRX_3083,
+	KBASE_HW_ISSUE_TTRX_3470,      KBASE_HW_ISSUE_TTRX_3464,    KBASE_HW_ISSUE_TITANHW_2710,
+	KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tBEx_r0p0[] = {
+	KBASE_HW_ISSUE_9435,	     KBASE_HW_ISSUE_TSIX_2033,
+	KBASE_HW_ISSUE_TTRX_1337,    KBASE_HW_ISSUE_TTRX_2968_TTRX_3162,
+	KBASE_HW_ISSUE_TTRX_921,     KBASE_HW_ISSUE_TTRX_3414,
+	KBASE_HW_ISSUE_TTRX_3083,    KBASE_HW_ISSUE_TTRX_3470,
+	KBASE_HW_ISSUE_TTRX_3464,    KBASE_HW_ISSUE_TTRX_3485,
+	KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_KRAKEHW_2321,
+	KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tBEx_r0p1[] = {
+	KBASE_HW_ISSUE_9435,	       KBASE_HW_ISSUE_TSIX_2033,
+	KBASE_HW_ISSUE_TTRX_1337,      KBASE_HW_ISSUE_TTRX_2968_TTRX_3162,
+	KBASE_HW_ISSUE_TTRX_921,       KBASE_HW_ISSUE_TTRX_3414,
+	KBASE_HW_ISSUE_TTRX_3083,      KBASE_HW_ISSUE_TTRX_3470,
+	KBASE_HW_ISSUE_TTRX_3464,      KBASE_HW_ISSUE_TITANHW_2710,
+	KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938,
+	KBASE_HW_ISSUE_KRAKEHW_2321,   KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tBEx_r1p0[] = {
+	KBASE_HW_ISSUE_9435,	       KBASE_HW_ISSUE_TSIX_2033,
+	KBASE_HW_ISSUE_TTRX_1337,      KBASE_HW_ISSUE_TTRX_2968_TTRX_3162,
+	KBASE_HW_ISSUE_TTRX_921,       KBASE_HW_ISSUE_TTRX_3414,
+	KBASE_HW_ISSUE_TTRX_3083,      KBASE_HW_ISSUE_TTRX_3470,
+	KBASE_HW_ISSUE_TTRX_3464,      KBASE_HW_ISSUE_TITANHW_2710,
+	KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938,
+	KBASE_HW_ISSUE_KRAKEHW_2321,   KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tBEx_r1p1[] = {
+	KBASE_HW_ISSUE_9435,	       KBASE_HW_ISSUE_TSIX_2033,
+	KBASE_HW_ISSUE_TTRX_1337,      KBASE_HW_ISSUE_TTRX_2968_TTRX_3162,
+	KBASE_HW_ISSUE_TTRX_921,       KBASE_HW_ISSUE_TTRX_3414,
+	KBASE_HW_ISSUE_TTRX_3083,      KBASE_HW_ISSUE_TTRX_3470,
+	KBASE_HW_ISSUE_TTRX_3464,      KBASE_HW_ISSUE_TITANHW_2710,
+	KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938,
+	KBASE_HW_ISSUE_KRAKEHW_2321,   KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_model_tBEx[] = {
+	KBASE_HW_ISSUE_5736,	     KBASE_HW_ISSUE_9435,
+	KBASE_HW_ISSUE_TSIX_2033,    KBASE_HW_ISSUE_TTRX_1337,
+	KBASE_HW_ISSUE_TTRX_3414,    KBASE_HW_ISSUE_TTRX_3083,
+	KBASE_HW_ISSUE_TTRX_3470,    KBASE_HW_ISSUE_TTRX_3464,
+	KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_KRAKEHW_2321,
+	KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_lBEx_r1p0[] = {
+	KBASE_HW_ISSUE_9435,	     KBASE_HW_ISSUE_TSIX_2033,
+	KBASE_HW_ISSUE_TTRX_1337,    KBASE_HW_ISSUE_TTRX_2968_TTRX_3162,
+	KBASE_HW_ISSUE_TTRX_921,     KBASE_HW_ISSUE_TTRX_3414,
+	KBASE_HW_ISSUE_TTRX_3083,    KBASE_HW_ISSUE_TTRX_3470,
+	KBASE_HW_ISSUE_TTRX_3464,    KBASE_HW_ISSUE_TTRX_3485,
+	KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_KRAKEHW_2321,
+	KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_lBEx_r1p1[] = {
+	KBASE_HW_ISSUE_9435,	       KBASE_HW_ISSUE_TSIX_2033,
+	KBASE_HW_ISSUE_TTRX_1337,      KBASE_HW_ISSUE_TTRX_2968_TTRX_3162,
+	KBASE_HW_ISSUE_TTRX_921,       KBASE_HW_ISSUE_TTRX_3414,
+	KBASE_HW_ISSUE_TTRX_3083,      KBASE_HW_ISSUE_TTRX_3470,
+	KBASE_HW_ISSUE_TTRX_3464,      KBASE_HW_ISSUE_TITANHW_2710,
+	KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938,
+	KBASE_HW_ISSUE_KRAKEHW_2321,   KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tBAx_r0p0[] = {
+	KBASE_HW_ISSUE_9435,	       KBASE_HW_ISSUE_TSIX_2033,
+	KBASE_HW_ISSUE_TTRX_1337,      KBASE_HW_ISSUE_TTRX_2968_TTRX_3162,
+	KBASE_HW_ISSUE_TTRX_921,       KBASE_HW_ISSUE_TTRX_3414,
+	KBASE_HW_ISSUE_TTRX_3083,      KBASE_HW_ISSUE_TTRX_3470,
+	KBASE_HW_ISSUE_TTRX_3464,      KBASE_HW_ISSUE_TITANHW_2710,
+	KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938,
+	KBASE_HW_ISSUE_KRAKEHW_2321,   KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tBAx_r0p1[] = {
+	KBASE_HW_ISSUE_9435,	       KBASE_HW_ISSUE_TSIX_2033,
+	KBASE_HW_ISSUE_TTRX_1337,      KBASE_HW_ISSUE_TTRX_2968_TTRX_3162,
+	KBASE_HW_ISSUE_TTRX_921,       KBASE_HW_ISSUE_TTRX_3414,
+	KBASE_HW_ISSUE_TTRX_3083,      KBASE_HW_ISSUE_TTRX_3470,
+	KBASE_HW_ISSUE_TTRX_3464,      KBASE_HW_ISSUE_TITANHW_2710,
+	KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938,
+	KBASE_HW_ISSUE_KRAKEHW_2321,   KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tBAx_r0p2[] = {
+	KBASE_HW_ISSUE_9435,	       KBASE_HW_ISSUE_TSIX_2033,
+	KBASE_HW_ISSUE_TTRX_1337,      KBASE_HW_ISSUE_TTRX_2968_TTRX_3162,
+	KBASE_HW_ISSUE_TTRX_921,       KBASE_HW_ISSUE_TTRX_3414,
+	KBASE_HW_ISSUE_TTRX_3083,      KBASE_HW_ISSUE_TTRX_3470,
+	KBASE_HW_ISSUE_TTRX_3464,      KBASE_HW_ISSUE_TITANHW_2710,
+	KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938,
+	KBASE_HW_ISSUE_KRAKEHW_2321,   KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_model_tBAx[] = {
+	KBASE_HW_ISSUE_5736,	     KBASE_HW_ISSUE_9435,
+	KBASE_HW_ISSUE_TSIX_2033,    KBASE_HW_ISSUE_TTRX_1337,
+	KBASE_HW_ISSUE_TTRX_3414,    KBASE_HW_ISSUE_TTRX_3083,
+	KBASE_HW_ISSUE_TTRX_3470,    KBASE_HW_ISSUE_TTRX_3464,
+	KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_KRAKEHW_2321,
+	KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = {
+	KBASE_HW_ISSUE_TSIX_2033,      KBASE_HW_ISSUE_TTRX_1337,
+	KBASE_HW_ISSUE_GPU2019_3212,   KBASE_HW_ISSUE_GPU2019_3878,
+	KBASE_HW_ISSUE_GPU2019_3901,   KBASE_HW_ISSUE_TITANHW_2710,
+	KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938,
+	KBASE_HW_ISSUE_KRAKEHW_2321,   KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_model_tODx[] = {
+	KBASE_HW_ISSUE_TSIX_2033,      KBASE_HW_ISSUE_TTRX_1337,
+	KBASE_HW_ISSUE_GPU2019_3212,   KBASE_HW_ISSUE_GPU2019_3878,
+	KBASE_HW_ISSUE_GPU2019_3901,   KBASE_HW_ISSUE_TITANHW_2710,
+	KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938,
+	KBASE_HW_ISSUE_KRAKEHW_2321,   KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tGRx_r0p0[] = {
+	KBASE_HW_ISSUE_TSIX_2033,    KBASE_HW_ISSUE_TTRX_1337,	  KBASE_HW_ISSUE_GPU2019_3878,
+	KBASE_HW_ISSUE_GPU2019_3901, KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_model_tGRx[] = {
+	KBASE_HW_ISSUE_TSIX_2033,    KBASE_HW_ISSUE_TTRX_1337,	  KBASE_HW_ISSUE_GPU2019_3878,
+	KBASE_HW_ISSUE_GPU2019_3901, KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = {
+	KBASE_HW_ISSUE_TSIX_2033,    KBASE_HW_ISSUE_TTRX_1337,	  KBASE_HW_ISSUE_GPU2019_3878,
+	KBASE_HW_ISSUE_GPU2019_3901, KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tVAx_r0p1[] = {
+	KBASE_HW_ISSUE_TSIX_2033,    KBASE_HW_ISSUE_TTRX_1337,	  KBASE_HW_ISSUE_GPU2019_3878,
+	KBASE_HW_ISSUE_GPU2019_3901, KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_model_tVAx[] = {
+	KBASE_HW_ISSUE_TSIX_2033,    KBASE_HW_ISSUE_TTRX_1337,	  KBASE_HW_ISSUE_GPU2019_3878,
+	KBASE_HW_ISSUE_GPU2019_3901, KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = {
+	KBASE_HW_ISSUE_TSIX_2033,      KBASE_HW_ISSUE_TTRX_1337,    KBASE_HW_ISSUE_TURSEHW_1997,
+	KBASE_HW_ISSUE_GPU2019_3878,   KBASE_HW_ISSUE_TURSEHW_2716, KBASE_HW_ISSUE_GPU2019_3901,
+	KBASE_HW_ISSUE_GPU2021PRO_290, KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_TITANHW_2679,
+	KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_TURSEHW_2934,
+	KBASE_HW_ISSUE_KRAKEHW_2321,   KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tTUx_r0p1[] = {
+	KBASE_HW_ISSUE_TSIX_2033,      KBASE_HW_ISSUE_TTRX_1337,    KBASE_HW_ISSUE_TURSEHW_1997,
+	KBASE_HW_ISSUE_GPU2019_3878,   KBASE_HW_ISSUE_TURSEHW_2716, KBASE_HW_ISSUE_GPU2019_3901,
+	KBASE_HW_ISSUE_GPU2021PRO_290, KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_TITANHW_2679,
+	KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2922, KBASE_HW_ISSUE_TITANHW_2938,
+	KBASE_HW_ISSUE_TURSEHW_2934,   KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_model_tTUx[] = {
+	KBASE_HW_ISSUE_TSIX_2033,    KBASE_HW_ISSUE_TTRX_1337,	  KBASE_HW_ISSUE_GPU2019_3878,
+	KBASE_HW_ISSUE_TURSEHW_2716, KBASE_HW_ISSUE_GPU2019_3901, KBASE_HW_ISSUE_GPU2021PRO_290,
+	KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_TITANHW_2679, KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_TITANHW_2922, KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_TURSEHW_2934,
+	KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tTUx_r1p0[] = {
+	KBASE_HW_ISSUE_TSIX_2033,    KBASE_HW_ISSUE_TTRX_1337,	  KBASE_HW_ISSUE_GPU2019_3878,
+	KBASE_HW_ISSUE_TURSEHW_2716, KBASE_HW_ISSUE_GPU2019_3901, KBASE_HW_ISSUE_GPU2021PRO_290,
+	KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_TITANHW_2679, KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_TITANHW_2922, KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_TURSEHW_2934,
+	KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tTUx_r1p1[] = {
+	KBASE_HW_ISSUE_TSIX_2033,    KBASE_HW_ISSUE_TTRX_1337,	  KBASE_HW_ISSUE_GPU2019_3878,
+	KBASE_HW_ISSUE_TURSEHW_2716, KBASE_HW_ISSUE_GPU2019_3901, KBASE_HW_ISSUE_GPU2021PRO_290,
+	KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_TITANHW_2679, KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_TITANHW_2922, KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_TURSEHW_2934,
+	KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tTUx_r1p2[] = {
+	KBASE_HW_ISSUE_TSIX_2033,    KBASE_HW_ISSUE_TTRX_1337,	  KBASE_HW_ISSUE_GPU2019_3878,
+	KBASE_HW_ISSUE_TURSEHW_2716, KBASE_HW_ISSUE_GPU2019_3901, KBASE_HW_ISSUE_GPU2021PRO_290,
+	KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_TITANHW_2679, KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_TITANHW_2922, KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_TURSEHW_2934,
+	KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tTUx_r1p3[] = {
+	KBASE_HW_ISSUE_TSIX_2033,    KBASE_HW_ISSUE_TTRX_1337,	  KBASE_HW_ISSUE_GPU2019_3878,
+	KBASE_HW_ISSUE_TURSEHW_2716, KBASE_HW_ISSUE_GPU2019_3901, KBASE_HW_ISSUE_GPU2021PRO_290,
+	KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_TITANHW_2679, KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_TITANHW_2922, KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_TURSEHW_2934,
+	KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_model_tTIx[] = {
+	KBASE_HW_ISSUE_TSIX_2033,
+	KBASE_HW_ISSUE_TTRX_1337,
+	KBASE_HW_ISSUE_TURSEHW_2716,
+	KBASE_HW_ISSUE_GPU2021PRO_290,
+	KBASE_HW_ISSUE_TITANHW_2710,
+	KBASE_HW_ISSUE_TITANHW_2679,
+	KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_TITANHW_2922,
+	KBASE_HW_ISSUE_TITANHW_2952,
+	KBASE_HW_ISSUE_TITANHW_2938,
+	KBASE_HW_ISSUE_TURSEHW_2934,
+	KBASE_HW_ISSUE_KRAKEHW_2321,
+	KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tTIx_r0p0[] = {
+	KBASE_HW_ISSUE_TSIX_2033,
+	KBASE_HW_ISSUE_TTRX_1337,
+	KBASE_HW_ISSUE_TURSEHW_2716,
+	KBASE_HW_ISSUE_GPU2021PRO_290,
+	KBASE_HW_ISSUE_TITANHW_2710,
+	KBASE_HW_ISSUE_TITANHW_2679,
+	KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_TITANHW_2922,
+	KBASE_HW_ISSUE_TITANHW_2952,
+	KBASE_HW_ISSUE_TITANHW_2938,
+	KBASE_HW_ISSUE_TURSEHW_2934,
+	KBASE_HW_ISSUE_KRAKEHW_2321,
+	KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tTIx_r0p1[] = {
+	KBASE_HW_ISSUE_TSIX_2033,      KBASE_HW_ISSUE_TTRX_1337,    KBASE_HW_ISSUE_TURSEHW_2716,
+	KBASE_HW_ISSUE_GPU2021PRO_290, KBASE_HW_ISSUE_TITANHW_2710, KBASE_HW_ISSUE_TITANHW_2679,
+	KBASE_HW_ISSUE_GPU2022PRO_148, KBASE_HW_ISSUE_TITANHW_2938, KBASE_HW_ISSUE_TURSEHW_2934,
+	KBASE_HW_ISSUE_KRAKEHW_2321,   KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tKRx_r0p0[] = {
+	KBASE_HW_ISSUE_TTRX_1337,    KBASE_HW_ISSUE_TURSEHW_2716, KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_KRAKEHW_2151, KBASE_HW_ISSUE_KRAKEHW_2269, KBASE_HW_ISSUE_TITANHW_2922,
+	KBASE_HW_ISSUE_TURSEHW_2934, KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_tKRx_r0p1[] = {
+	KBASE_HW_ISSUE_TTRX_1337,    KBASE_HW_ISSUE_TURSEHW_2716, KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_KRAKEHW_2269, KBASE_HW_ISSUE_TURSEHW_2934, KBASE_HW_ISSUE_KRAKEHW_2321,
+	KBASE_HW_ISSUE_END
+};
+
+__maybe_unused static const enum base_hw_issue base_hw_issues_model_tKRx[] = {
+	KBASE_HW_ISSUE_TTRX_1337,    KBASE_HW_ISSUE_TURSEHW_2716, KBASE_HW_ISSUE_GPU2022PRO_148,
+	KBASE_HW_ISSUE_KRAKEHW_2151, KBASE_HW_ISSUE_KRAKEHW_2269, KBASE_HW_ISSUE_TURSEHW_2934,
+	KBASE_HW_ISSUE_KRAKEHW_2321, KBASE_HW_ISSUE_END
+};
+
+
+#endif /* _KBASE_HWCONFIG_ISSUES_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ioctl_helpers.h b/drivers/gpu/arm/bifrost/mali_kbase_ioctl_helpers.h
new file mode 100644
index 000000000000..e87925bab9b0
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/mali_kbase_ioctl_helpers.h
@@ -0,0 +1,542 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2024 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_IOCTL_HELPERS_H_
+#define _KBASE_IOCTL_HELPERS_H_
+
+#include <uapi/gpu/arm/bifrost/mali_kbase_ioctl.h>
+
+/* Macro for IOCTLs that don't have IOCTL struct */
+#define KBASE_HANDLE_IOCTL(cmd, function, arg)                                         \
+	do {                                                                           \
+		int ret;                                                               \
+		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_NONE);                              \
+		dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function);               \
+		ret = function(arg);                                                   \
+		dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, #function); \
+		return ret;                                                            \
+	} while (0)
+
+/* Macro for IOCTLs that have input IOCTL struct */
+#define KBASE_HANDLE_IOCTL_IN(cmd, function, type, arg)                                \
+	do {                                                                           \
+		type param;                                                            \
+		int ret, err;                                                          \
+		dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function);               \
+		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_WRITE);                             \
+		BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd));                         \
+		err = copy_from_user(&param, uarg, sizeof(param));                     \
+		if (unlikely(err))                                                     \
+			return -EFAULT;                                                \
+		err = check_padding_##cmd(&param);                                     \
+		if (unlikely(err))                                                     \
+			return -EINVAL;                                                \
+		ret = function(arg, &param);                                           \
+		dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, #function); \
+		return ret;                                                            \
+	} while (0)
+
+/* Macro for IOCTLs that have output IOCTL struct */
+#define KBASE_HANDLE_IOCTL_OUT(cmd, function, type, arg)                               \
+	do {                                                                           \
+		type param;                                                            \
+		int ret, err;                                                          \
+		dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function);               \
+		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_READ);                              \
+		BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd));                         \
+		memset(&param, 0, sizeof(param));                                      \
+		ret = function(arg, &param);                                           \
+		err = copy_to_user(uarg, &param, sizeof(param));                       \
+		if (unlikely(err))                                                     \
+			return -EFAULT;                                                \
+		dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, #function); \
+		return ret;                                                            \
+	} while (0)
+
+/* Macro for IOCTLs that have input and output IOCTL struct */
+#define KBASE_HANDLE_IOCTL_INOUT(cmd, function, type, arg)                             \
+	do {                                                                           \
+		type param;                                                            \
+		int ret, err;                                                          \
+		dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function);               \
+		BUILD_BUG_ON(_IOC_DIR(cmd) != (_IOC_WRITE | _IOC_READ));               \
+		BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd));                         \
+		err = copy_from_user(&param, uarg, sizeof(param));                     \
+		if (unlikely(err))                                                     \
+			return -EFAULT;                                                \
+		err = check_padding_##cmd(&param);                                     \
+		if (unlikely(err))                                                     \
+			return -EINVAL;                                                \
+		ret = function(arg, &param);                                           \
+		err = copy_to_user(uarg, &param, sizeof(param));                       \
+		if (unlikely(err))                                                     \
+			return -EFAULT;                                                \
+		dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, #function); \
+		return ret;                                                            \
+	} while (0)
+
+/* Inline functions to check padding bytes in the input IOCTL struct.
+ * Return 0 if all padding bytes are zero, non-zero otherwise.
+ */
+static inline int check_padding_KBASE_IOCTL_VERSION_CHECK(struct kbase_ioctl_version_check *p)
+{
+	return 0;
+}
+
+static inline int
+check_padding_KBASE_IOCTL_VERSION_CHECK_RESERVED(struct kbase_ioctl_version_check *p)
+{
+	return 0;
+}
+
+static inline int check_padding_KBASE_IOCTL_SET_FLAGS(struct kbase_ioctl_set_flags *p)
+{
+	return 0;
+}
+
+static inline int check_padding_KBASE_IOCTL_GET_GPUPROPS(struct kbase_ioctl_get_gpuprops *p)
+{
+	return 0;
+}
+
+static inline int check_padding_KBASE_IOCTL_MEM_ALLOC(union kbase_ioctl_mem_alloc *p)
+{
+	return 0;
+}
+
+static inline int check_padding_KBASE_IOCTL_MEM_QUERY(union kbase_ioctl_mem_query *p)
+{
+	return 0;
+}
+
+static inline int check_padding_KBASE_IOCTL_MEM_FREE(struct kbase_ioctl_mem_free *p)
+{
+	return 0;
+}
+
+static inline int
+check_padding_KBASE_IOCTL_HWCNT_READER_SETUP(struct kbase_ioctl_hwcnt_reader_setup *p)
+{
+	return 0;
+}
+
+static inline int check_padding_KBASE_IOCTL_HWCNT_SET(struct kbase_ioctl_hwcnt_values *p)
+{
+	return p->padding;
+}
+
+static inline int check_padding_KBASE_IOCTL_GET_DDK_VERSION(struct kbase_ioctl_get_ddk_version *p)
+{
+	return p->padding;
+}
+
+static inline int check_padding_KBASE_IOCTL_MEM_JIT_INIT(struct kbase_ioctl_mem_jit_init *p)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(p->padding); i++) {
+		if (p->padding[i])
+			return -1;
+	}
+
+	return 0;
+}
+
+static inline int check_padding_KBASE_IOCTL_MEM_SYNC(struct kbase_ioctl_mem_sync *p)
+{
+	size_t i;
+
+	/*
+	 * Checking p->padding is deferred till the support window for backward-compatibility ends.
+	 * GPUCORE-42000 will add the checking.
+	 *
+	 * To avoid the situation with old version of base which might not set padding bytes as 0,
+	 * padding bytes are set as zero here on behalf on user space.
+	 */
+	for (i = 0; i < ARRAY_SIZE(p->padding); i++)
+		p->padding[i] = 0;
+
+	return 0;
+}
+
+static inline int
+check_padding_KBASE_IOCTL_MEM_FIND_CPU_OFFSET(union kbase_ioctl_mem_find_cpu_offset *p)
+{
+	return 0;
+}
+
+static inline int check_padding_KBASE_IOCTL_TLSTREAM_ACQUIRE(struct kbase_ioctl_tlstream_acquire *p)
+{
+	return 0;
+}
+
+static inline int check_padding_KBASE_IOCTL_MEM_COMMIT(struct kbase_ioctl_mem_commit *p)
+{
+	return 0;
+}
+
+static inline int check_padding_KBASE_IOCTL_MEM_ALIAS(union kbase_ioctl_mem_alias *p)
+{
+	return 0;
+}
+
+static inline int check_padding_KBASE_IOCTL_MEM_IMPORT(union kbase_ioctl_mem_import *p)
+{
+	return 0;
+}
+
+static inline int check_padding_KBASE_IOCTL_MEM_FLAGS_CHANGE(struct kbase_ioctl_mem_flags_change *p)
+{
+	return 0;
+}
+
+static inline int check_padding_KBASE_IOCTL_STREAM_CREATE(struct kbase_ioctl_stream_create *p)
+{
+	return 0;
+}
+
+static inline int check_padding_KBASE_IOCTL_FENCE_VALIDATE(struct kbase_ioctl_fence_validate *p)
+{
+	return 0;
+}
+
+static inline int check_padding_KBASE_IOCTL_MEM_PROFILE_ADD(struct kbase_ioctl_mem_profile_add *p)
+{
+	return p->padding;
+}
+
+static inline int
+check_padding_KBASE_IOCTL_STICKY_RESOURCE_MAP(struct kbase_ioctl_sticky_resource_map *p)
+{
+	return 0;
+}
+
+static inline int
+check_padding_KBASE_IOCTL_STICKY_RESOURCE_UNMAP(struct kbase_ioctl_sticky_resource_unmap *p)
+{
+	return 0;
+}
+
+static inline int check_padding_KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET(
+	union kbase_ioctl_mem_find_gpu_start_and_offset *p)
+{
+	return 0;
+}
+
+static inline int check_padding_KBASE_IOCTL_CINSTR_GWT_DUMP(union kbase_ioctl_cinstr_gwt_dump *p)
+{
+	/*
+	 * Checking p->padding is deferred till the support window for backward-compatibility ends.
+	 * GPUCORE-42000 will add the checking.
+	 *
+	 * To avoid the situation with old version of base which might not set padding bytes as 0,
+	 * padding bytes are set as zero here on behalf on user space.
+	 */
+	p->in.padding = 0;
+	return 0;
+}
+
+static inline int check_padding_KBASE_IOCTL_MEM_EXEC_INIT(struct kbase_ioctl_mem_exec_init *p)
+{
+	return 0;
+}
+
+static inline int
+check_padding_KBASE_IOCTL_GET_CPU_GPU_TIMEINFO(union kbase_ioctl_get_cpu_gpu_timeinfo *p)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(p->in.paddings); i++) {
+		if (p->in.paddings[i])
+			return -1;
+	}
+
+	return 0;
+}
+
+static inline int
+check_padding_KBASE_IOCTL_CONTEXT_PRIORITY_CHECK(struct kbase_ioctl_context_priority_check *p)
+{
+	return 0;
+}
+
+static inline int
+check_padding_KBASE_IOCTL_SET_LIMITED_CORE_COUNT(struct kbase_ioctl_set_limited_core_count *p)
+{
+	return 0;
+}
+
+static inline int
+check_padding_KBASE_IOCTL_KINSTR_PRFCNT_ENUM_INFO(struct kbase_ioctl_kinstr_prfcnt_enum_info *p)
+{
+	return 0;
+}
+
+static inline int
+check_padding_KBASE_IOCTL_KINSTR_PRFCNT_SETUP(union kbase_ioctl_kinstr_prfcnt_setup *p)
+{
+	return 0;
+}
+
+#if MALI_UNIT_TEST
+#endif /* MALI_UNIT_TEST */
+
+#if MALI_USE_CSF
+
+static inline int
+check_padding_KBASE_IOCTL_CS_QUEUE_REGISTER(struct kbase_ioctl_cs_queue_register *p)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(p->padding); i++) {
+		if (p->padding[i])
+			return -1;
+	}
+
+	return 0;
+}
+
+static inline int check_padding_KBASE_IOCTL_CS_QUEUE_KICK(struct kbase_ioctl_cs_queue_kick *p)
+{
+	return 0;
+}
+
+static inline int check_padding_KBASE_IOCTL_CS_QUEUE_BIND(union kbase_ioctl_cs_queue_bind *p)
+{
+	size_t i;
+
+	/*
+	 * Checking p->padding is deferred till the support window for backward-compatibility ends.
+	 * GPUCORE-42000 will add the checking.
+	 *
+	 * To avoid the situation with old version of base which might not set padding bytes as 0,
+	 * padding bytes are set as zero here on behalf on user space.
+	 */
+	for (i = 0; i < ARRAY_SIZE(p->in.padding); i++)
+		p->in.padding[i] = 0;
+
+	return 0;
+}
+
+static inline int
+check_padding_KBASE_IOCTL_CS_QUEUE_REGISTER_EX(struct kbase_ioctl_cs_queue_register_ex *p)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(p->padding); i++) {
+		if (p->padding[i])
+			return -1;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(p->ex_padding); i++) {
+		if (p->ex_padding[i])
+			return -1;
+	}
+
+	return 0;
+}
+
+static inline int
+check_padding_KBASE_IOCTL_CS_QUEUE_TERMINATE(struct kbase_ioctl_cs_queue_terminate *p)
+{
+	return 0;
+}
+
+static inline int
+check_padding_KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_6(union kbase_ioctl_cs_queue_group_create_1_6 *p)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(p->in.padding); i++) {
+		if (p->in.padding[i])
+			return -1;
+	}
+
+	return 0;
+}
+
+static inline int check_padding_KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_18(
+	union kbase_ioctl_cs_queue_group_create_1_18 *p)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(p->in.padding); i++) {
+		if (p->in.padding[i])
+			return -1;
+	}
+
+	return 0;
+}
+
+static inline int
+check_padding_KBASE_IOCTL_CS_QUEUE_GROUP_CREATE(union kbase_ioctl_cs_queue_group_create *p)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(p->in.padding); i++) {
+		if (p->in.padding[i])
+			return -1;
+	}
+
+	return 0;
+}
+
+static inline int
+check_padding_KBASE_IOCTL_CS_QUEUE_GROUP_TERMINATE(struct kbase_ioctl_cs_queue_group_term *p)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(p->padding); i++) {
+		if (p->padding[i])
+			return -1;
+	}
+
+	return 0;
+}
+
+static inline int
+check_padding_KBASE_IOCTL_KCPU_QUEUE_DELETE(struct kbase_ioctl_kcpu_queue_delete *p)
+{
+	size_t i;
+
+	/*
+	 * Checking p->padding is deferred till the support window for backward-compatibility ends.
+	 * GPUCORE-42000 will add the checking.
+	 *
+	 * To avoid the situation with old version of base which might not set padding bytes as 0,
+	 * padding bytes are set as zero here on behalf on user space.
+	 */
+	for (i = 0; i < ARRAY_SIZE(p->padding); i++)
+		p->padding[i] = 0;
+
+	return 0;
+}
+
+static inline int
+check_padding_KBASE_IOCTL_KCPU_QUEUE_ENQUEUE(struct kbase_ioctl_kcpu_queue_enqueue *p)
+{
+	size_t i;
+
+	/*
+	 * Checking p->padding is deferred till the support window for backward-compatibility ends.
+	 * GPUCORE-42000 will add the checking.
+	 *
+	 * To avoid the situation with old version of base which might not set padding bytes as 0,
+	 * padding bytes are set as zero here on behalf on user space.
+	 */
+	for (i = 0; i < ARRAY_SIZE(p->padding); i++)
+		p->padding[i] = 0;
+
+	return 0;
+}
+
+static inline int
+check_padding_KBASE_IOCTL_CS_TILER_HEAP_INIT(union kbase_ioctl_cs_tiler_heap_init *p)
+{
+	return p->in.padding;
+}
+
+static inline int
+check_padding_KBASE_IOCTL_CS_TILER_HEAP_INIT_1_13(union kbase_ioctl_cs_tiler_heap_init_1_13 *p)
+{
+	return p->in.padding;
+}
+
+static inline int
+check_padding_KBASE_IOCTL_CS_TILER_HEAP_TERM(struct kbase_ioctl_cs_tiler_heap_term *p)
+{
+	return 0;
+}
+
+static inline int check_padding_KBASE_IOCTL_CS_GET_GLB_IFACE(union kbase_ioctl_cs_get_glb_iface *p)
+{
+	return 0;
+}
+
+static inline int
+check_padding_KBASE_IOCTL_CS_CPU_QUEUE_DUMP(struct kbase_ioctl_cs_cpu_queue_info *p)
+{
+	return 0;
+}
+
+static inline int check_padding_KBASE_IOCTL_MEM_ALLOC_EX(union kbase_ioctl_mem_alloc_ex *p)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(p->in.extra); i++) {
+		if (p->in.extra[i])
+			return -1;
+	}
+
+	return 0;
+}
+
+static inline int check_padding_KBASE_IOCTL_READ_USER_PAGE(union kbase_ioctl_read_user_page *p)
+{
+	return p->in.padding;
+}
+
+static inline int
+check_padding_KBASE_IOCTL_QUEUE_GROUP_CLEAR_FAULTS(struct kbase_ioctl_queue_group_clear_faults *p)
+{
+	size_t i;
+
+	/*
+	 * Checking p->padding is deferred till the support window for backward-compatibility ends.
+	 * GPUCORE-42000 will add the checking.
+	 *
+	 * To avoid the situation with old version of base which might not set padding bytes as 0,
+	 * padding bytes are set as zero here on behalf on user space.
+	 */
+	for (i = 0; i < ARRAY_SIZE(p->padding); i++)
+		p->padding[i] = 0;
+
+	return 0;
+}
+
+#else /* MALI_USE_CSF */
+
+static inline int check_padding_KBASE_IOCTL_JOB_SUBMIT(struct kbase_ioctl_job_submit *p)
+{
+	return 0;
+}
+
+static inline int
+check_padding_KBASE_IOCTL_SOFT_EVENT_UPDATE(struct kbase_ioctl_soft_event_update *p)
+{
+	return 0;
+}
+
+static inline int check_padding_KBASE_IOCTL_KINSTR_JM_FD(union kbase_kinstr_jm_fd *p)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(p->in.padding); i++) {
+		if (p->in.padding[i])
+			return -1;
+	}
+
+	return 0;
+}
+
+#endif /* !MALI_USE_CSF */
+
+#endif /* _KBASE_IOCTL_HELPERS_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jd.c b/drivers/gpu/arm/bifrost/mali_kbase_jd.c
index 418a1913b241..4da7fa377bd7 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_jd.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_jd.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -39,9 +39,6 @@
 #include <mali_kbase_hwaccess_jm.h>
 #include <tl/mali_kbase_tracepoints.h>
 #include <mali_linux_trace.h>
-
-#include <mali_kbase_cs_experimental.h>
-
 #include <mali_kbase_caps.h>
 
 /* Return whether katom will run on the GPU or not. Currently only soft jobs and
@@ -209,7 +206,7 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom,
 	}
 
 	if (copy_from_user(input_extres, get_compat_pointer(katom->kctx, user_atom->extres_list),
-			   sizeof(*input_extres) * katom->nr_extres) != 0) {
+			   size_mul(sizeof(*input_extres), katom->nr_extres)) != 0) {
 		err = -EINVAL;
 		goto failed_input_copy;
 	}
@@ -697,7 +694,6 @@ static void jd_trace_atom_submit(struct kbase_context *const kctx,
 
 static bool jd_submit_atom(struct kbase_context *const kctx,
 			   const struct base_jd_atom *const user_atom,
-			   const struct base_jd_fragment *const user_jc_incr,
 			   struct kbase_jd_atom *const katom)
 {
 	struct kbase_device *kbdev = kctx->kbdev;
@@ -755,8 +751,6 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
 	}
 #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 
-	katom->renderpass_id = user_atom->renderpass_id;
-
 	/* Implicitly sets katom->protected_state.enter as well. */
 	katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
 
@@ -875,20 +869,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
 	/* Create a new atom. */
 	jd_trace_atom_submit(kctx, katom, &katom->sched_priority);
 
-#if !MALI_INCREMENTAL_RENDERING_JM
-	/* Reject atoms for incremental rendering if not supported */
-	if (katom->core_req & (BASE_JD_REQ_START_RENDERPASS | BASE_JD_REQ_END_RENDERPASS)) {
-		dev_err(kctx->kbdev->dev, "Rejecting atom with unsupported core_req 0x%x\n",
-			katom->core_req);
-		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
-		return kbase_jd_done_nolock(katom, true);
-	}
-#endif /* !MALI_INCREMENTAL_RENDERING_JM */
-
-	if (katom->core_req & BASE_JD_REQ_END_RENDERPASS) {
-		WARN_ON(katom->jc != 0);
-		katom->jc_fragment = *user_jc_incr;
-	} else if (!katom->jc && (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+	if (!katom->jc && (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
 		/* Reject atoms with job chain = NULL, as these cause issues
 		 * with soft-stop
 		 */
@@ -1018,8 +999,7 @@ int kbase_jd_submit(struct kbase_context *kctx, void __user *user_addr, u32 nr_a
 	struct kbase_device *kbdev;
 	u32 latest_flush;
 
-	bool jd_atom_is_v2 = (stride == sizeof(struct base_jd_atom_v2) ||
-			      stride == offsetof(struct base_jd_atom_v2, renderpass_id));
+	bool jd_atom_is_v2 = (stride == sizeof(struct base_jd_atom_v2));
 
 	CSTD_UNUSED(uk6_atom);
 
@@ -1035,10 +1015,7 @@ int kbase_jd_submit(struct kbase_context *kctx, void __user *user_addr, u32 nr_a
 		return -EINVAL;
 	}
 
-	if (stride != offsetof(struct base_jd_atom_v2, renderpass_id) &&
-	    stride != sizeof(struct base_jd_atom_v2) &&
-	    stride != offsetof(struct base_jd_atom, renderpass_id) &&
-	    stride != sizeof(struct base_jd_atom)) {
+	if (stride != sizeof(struct base_jd_atom_v2) && stride != sizeof(struct base_jd_atom)) {
 		dev_err(kbdev->dev,
 			"Stride %u passed to job_submit isn't supported by the kernel\n", stride);
 		return -EINVAL;
@@ -1057,7 +1034,6 @@ int kbase_jd_submit(struct kbase_context *kctx, void __user *user_addr, u32 nr_a
 		struct base_jd_atom user_atom = {
 			.seq_nr = 0,
 		};
-		struct base_jd_fragment user_jc_incr;
 		struct kbase_jd_atom *katom;
 
 		if (unlikely(jd_atom_is_v2)) {
@@ -1082,44 +1058,6 @@ int kbase_jd_submit(struct kbase_context *kctx, void __user *user_addr, u32 nr_a
 			}
 		}
 
-		if (stride == offsetof(struct base_jd_atom_v2, renderpass_id)) {
-			dev_dbg(kbdev->dev, "No renderpass ID: use 0\n");
-			user_atom.renderpass_id = 0;
-		} else {
-			/* Ensure all padding bytes are 0 for potential future
-			 * extension
-			 */
-			size_t j;
-
-			dev_dbg(kbdev->dev, "Renderpass ID is %d\n", user_atom.renderpass_id);
-			for (j = 0; j < sizeof(user_atom.padding); j++) {
-				if (user_atom.padding[j]) {
-					dev_err(kbdev->dev, "Bad padding byte %zu: %d\n", j,
-						user_atom.padding[j]);
-					err = -EINVAL;
-					break;
-				}
-			}
-			if (err)
-				break;
-		}
-
-		/* In this case 'jc' is the CPU address of a struct
-		 * instead of a GPU address of a job chain.
-		 */
-		if (user_atom.core_req & BASE_JD_REQ_END_RENDERPASS) {
-			if (copy_from_user(&user_jc_incr, u64_to_user_ptr(user_atom.jc),
-					   sizeof(user_jc_incr))) {
-				dev_err(kbdev->dev,
-					"Invalid jc address 0x%llx passed to job_submit\n",
-					user_atom.jc);
-				err = -EFAULT;
-				break;
-			}
-			dev_dbg(kbdev->dev, "Copied IR jobchain addresses\n");
-			user_atom.jc = 0;
-		}
-
 		user_addr = (void __user *)((uintptr_t)user_addr + stride);
 
 		mutex_lock(&jctx->lock);
@@ -1172,8 +1110,7 @@ int kbase_jd_submit(struct kbase_context *kctx, void __user *user_addr, u32 nr_a
 			mutex_lock(&jctx->lock);
 		}
 		KBASE_TLSTREAM_TL_JD_SUBMIT_ATOM_START(kbdev, katom);
-		need_to_try_schedule_context |=
-			jd_submit_atom(kctx, &user_atom, &user_jc_incr, katom);
+		need_to_try_schedule_context |= jd_submit_atom(kctx, &user_atom, katom);
 		KBASE_TLSTREAM_TL_JD_SUBMIT_ATOM_END(kbdev, katom);
 		/* Register a completed job as a disjoint event when the GPU is in a disjoint state
 		 * (ie. being reset).
@@ -1579,9 +1516,6 @@ int kbase_jd_init(struct kbase_context *kctx)
 #endif
 	}
 
-	for (i = 0; i < BASE_JD_RP_COUNT; i++)
-		kctx->jctx.renderpasses[i].state = KBASE_JD_RP_COMPLETE;
-
 	mutex_init(&kctx->jctx.lock);
 
 	init_waitqueue_head(&kctx->jctx.zero_jobs_wait);
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js.c b/drivers/gpu/arm/bifrost/mali_kbase_js.c
index 55c1f4be25d5..d42fde37db2a 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_js.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_js.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -161,7 +161,7 @@ static inline int gpu_metrics_ctx_init(struct kbase_context *kctx)
 	put_cred(cred);
 
 	/* Return early if this is not a Userspace created context */
-	if (unlikely(!kctx->kfile))
+	if (unlikely(!kctx->filp))
 		return 0;
 
 	/* Serialize against the other threads trying to create/destroy Kbase contexts. */
@@ -200,7 +200,7 @@ static inline void gpu_metrics_ctx_term(struct kbase_context *kctx)
 	unsigned long flags;
 
 	/* Return early if this is not a Userspace created context */
-	if (unlikely(!kctx->kfile))
+	if (unlikely(!kctx->filp))
 		return;
 
 	/* Serialize against the other threads trying to create/destroy Kbase contexts. */
@@ -333,19 +333,6 @@ static void jsctx_queue_foreach_prio(struct kbase_context *kctx, unsigned int js
 
 		rb_erase(node, &queue->runnable_tree);
 		callback(kctx->kbdev, entry);
-
-		/* Runnable end-of-renderpass atoms can also be in the linked
-		 * list of atoms blocked on cross-slot dependencies. Remove them
-		 * to avoid calling the callback twice.
-		 */
-		if (entry->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST) {
-			WARN_ON(!(entry->core_req & BASE_JD_REQ_END_RENDERPASS));
-			dev_dbg(kctx->kbdev->dev, "Del runnable atom %pK from X_DEP list\n",
-				(void *)entry);
-
-			list_del(&entry->queue);
-			entry->atom_flags &= ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST;
-		}
 	}
 
 	while (!list_empty(&queue->x_dep_head)) {
@@ -1230,7 +1217,7 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, unsigned int js, b
 		dev_dbg(kbdev->dev, "JS: Atom %pK is blocked in js_ctx_pullable\n", (void *)katom);
 		return false; /* next atom blocked */
 	}
-	if (kbase_js_atom_blocked_on_x_dep(katom)) {
+	if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
 		if (katom->x_pre_dep->gpu_rb_state == KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
 		    katom->x_pre_dep->will_fail_event_code) {
 			dev_dbg(kbdev->dev,
@@ -1371,9 +1358,6 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, struct kbase_jd_at
 				    (dep_atom->status != KBASE_JD_ATOM_STATE_UNUSED)) {
 					katom->atom_flags |= KBASE_KATOM_FLAG_X_DEP_BLOCKED;
 
-					dev_dbg(kbdev->dev, "Set X_DEP flag on atom %pK\n",
-						(void *)katom);
-
 					katom->x_pre_dep = dep_atom;
 					dep_atom->x_post_dep = katom;
 					if (kbase_jd_katom_dep_type(&katom->dep[i]) ==
@@ -1447,110 +1431,12 @@ void kbase_js_update_ctx_priority(struct kbase_context *kctx)
 }
 KBASE_EXPORT_TEST_API(kbase_js_update_ctx_priority);
 
-/**
- * js_add_start_rp() - Add an atom that starts a renderpass to the job scheduler
- * @start_katom: Pointer to the atom to be added.
- * Return: 0 if successful or a negative value on failure.
- */
-static int js_add_start_rp(struct kbase_jd_atom *const start_katom)
-{
-	struct kbase_context *const kctx = start_katom->kctx;
-	struct kbase_jd_renderpass *rp;
-	struct kbase_device *const kbdev = kctx->kbdev;
-	unsigned long flags;
-
-	lockdep_assert_held(&kctx->jctx.lock);
-
-	if (WARN_ON(!(start_katom->core_req & BASE_JD_REQ_START_RENDERPASS)))
-		return -EINVAL;
-
-	if (start_katom->core_req & BASE_JD_REQ_END_RENDERPASS)
-		return -EINVAL;
-
-	compiletime_assert((1ull << (sizeof(start_katom->renderpass_id) * 8)) <=
-				   ARRAY_SIZE(kctx->jctx.renderpasses),
-			   "Should check invalid access to renderpasses");
-
-	rp = &kctx->jctx.renderpasses[start_katom->renderpass_id];
-
-	if (rp->state != KBASE_JD_RP_COMPLETE)
-		return -EINVAL;
-
-	dev_dbg(kctx->kbdev->dev, "JS add start atom %pK of RP %d\n", (void *)start_katom,
-		start_katom->renderpass_id);
-
-	/* The following members are read when updating the job slot
-	 * ringbuffer/fifo therefore they require additional locking.
-	 */
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-
-	rp->state = KBASE_JD_RP_START;
-	rp->start_katom = start_katom;
-	rp->end_katom = NULL;
-	INIT_LIST_HEAD(&rp->oom_reg_list);
-
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
-	return 0;
-}
-
-/**
- * js_add_end_rp() - Add an atom that ends a renderpass to the job scheduler
- * @end_katom: Pointer to the atom to be added.
- * Return: 0 if successful or a negative value on failure.
- */
-static int js_add_end_rp(struct kbase_jd_atom *const end_katom)
-{
-	struct kbase_context *const kctx = end_katom->kctx;
-	struct kbase_jd_renderpass *rp;
-	struct kbase_device *const kbdev = kctx->kbdev;
-
-	lockdep_assert_held(&kctx->jctx.lock);
-
-	if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS)))
-		return -EINVAL;
-
-	if (end_katom->core_req & BASE_JD_REQ_START_RENDERPASS)
-		return -EINVAL;
-
-	compiletime_assert((1ull << (sizeof(end_katom->renderpass_id) * 8)) <=
-				   ARRAY_SIZE(kctx->jctx.renderpasses),
-			   "Should check invalid access to renderpasses");
-
-	rp = &kctx->jctx.renderpasses[end_katom->renderpass_id];
-
-	dev_dbg(kbdev->dev, "JS add end atom %pK in state %d of RP %d\n", (void *)end_katom,
-		(int)rp->state, end_katom->renderpass_id);
-
-	if (rp->state == KBASE_JD_RP_COMPLETE)
-		return -EINVAL;
-
-	if (rp->end_katom == NULL) {
-		/* We can't be in a retry state until the fragment job chain
-		 * has completed.
-		 */
-		unsigned long flags;
-
-		WARN_ON(rp->state == KBASE_JD_RP_RETRY);
-		WARN_ON(rp->state == KBASE_JD_RP_RETRY_PEND_OOM);
-		WARN_ON(rp->state == KBASE_JD_RP_RETRY_OOM);
-
-		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-		rp->end_katom = end_katom;
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-	} else
-		WARN_ON(rp->end_katom != end_katom);
-
-	return 0;
-}
-
 bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom)
 {
 	unsigned long flags;
 	struct kbasep_js_kctx_info *js_kctx_info;
 	struct kbase_device *kbdev;
 	struct kbasep_js_device_data *js_devdata;
-	int err = 0;
 
 	bool enqueue_required = false;
 	bool timer_sync = false;
@@ -1566,17 +1452,6 @@ bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom)
 	mutex_lock(&js_devdata->queue_mutex);
 	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
 
-	if (atom->core_req & BASE_JD_REQ_START_RENDERPASS)
-		err = js_add_start_rp(atom);
-	else if (atom->core_req & BASE_JD_REQ_END_RENDERPASS)
-		err = js_add_end_rp(atom);
-
-	if (err < 0) {
-		atom->event_code = BASE_JD_EVENT_JOB_INVALID;
-		atom->status = KBASE_JD_ATOM_STATE_COMPLETED;
-		goto out_unlock;
-	}
-
 	/*
 	 * Begin Runpool transaction
 	 */
@@ -1860,10 +1735,7 @@ kbasep_js_runpool_release_ctx_internal(struct kbase_device *kbdev, struct kbase_
 			kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx, katom_retained_state);
 
 	if (new_ref_count == 2 && kbase_ctx_flag(kctx, KCTX_PRIVILEGED) &&
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
-	    !kbase_pm_is_gpu_lost(kbdev) &&
-#endif
-	    !kbase_pm_is_suspending(kbdev)) {
+	    !kbase_pm_is_gpu_lost(kbdev) && !kbase_pm_is_suspending(kbdev)) {
 		/* Context is kept scheduled into an address space even when
 		 * there are no jobs, in this case we have to handle the
 		 * situation where all jobs have been evicted from the GPU and
@@ -1880,10 +1752,7 @@ kbasep_js_runpool_release_ctx_internal(struct kbase_device *kbdev, struct kbase_
 	 * which was previously acquired by kbasep_js_schedule_ctx().
 	 */
 	if (new_ref_count == 1 && (!kbasep_js_is_submit_allowed(js_devdata, kctx) ||
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
-				   kbase_pm_is_gpu_lost(kbdev) ||
-#endif
-				   kbase_pm_is_suspending(kbdev))) {
+				   kbase_pm_is_gpu_lost(kbdev) || kbase_pm_is_suspending(kbdev))) {
 		int num_slots = kbdev->gpu_props.num_job_slots;
 		unsigned int slot;
 
@@ -2189,11 +2058,7 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, struct kbase_cont
 	 * of it being called strictly after the suspend flag is set, and will
 	 * wait for this lock to drop)
 	 */
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
 	if (kbase_pm_is_suspending(kbdev) || kbase_pm_is_gpu_lost(kbdev)) {
-#else
-	if (kbase_pm_is_suspending(kbdev)) {
-#endif
 		/* Cause it to leave at some later point */
 		bool retained;
 		CSTD_UNUSED(retained);
@@ -2267,7 +2132,6 @@ void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, struct kbase_
 	js_devdata = &kbdev->js_data;
 	js_kctx_info = &kctx->jctx.sched_info;
 
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
 	/* This should only happen in response to a system call
 	 * from a user-space thread.
 	 * In a non-arbitrated environment this can never happen
@@ -2279,18 +2143,10 @@ void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, struct kbase_
 	 * the wait event for KCTX_SCHEDULED, since no context
 	 * can be scheduled until we have the GPU again.
 	 */
-	if (kbdev->arb.arb_if == NULL)
+	if (!kbase_has_arbiter(kbdev)) {
 		if (WARN_ON(kbase_pm_is_suspending(kbdev)))
 			return;
-#else
-	/* This should only happen in response to a system call
-	 * from a user-space thread.
-	 * In a non-arbitrated environment this can never happen
-	 * whilst suspending.
-	 */
-	if (WARN_ON(kbase_pm_is_suspending(kbdev)))
-		return;
-#endif
+	}
 
 	mutex_lock(&js_devdata->queue_mutex);
 	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
@@ -2416,63 +2272,63 @@ void kbasep_js_resume(struct kbase_device *kbdev)
 			struct kbase_context *kctx, *n;
 			unsigned long flags;
 
-#ifndef CONFIG_MALI_ARBITER_SUPPORT
-			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+			if (kbase_has_arbiter(kbdev)) {
+				spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
-			list_for_each_entry_safe(kctx, n,
-						 &kbdev->js_data.ctx_list_unpullable[js][prio],
-						 jctx.sched_info.ctx.ctx_list_entry[js]) {
-				struct kbasep_js_kctx_info *js_kctx_info;
+				list_for_each_entry_safe(
+					kctx, n, &kbdev->js_data.ctx_list_unpullable[js][prio],
+					jctx.sched_info.ctx.ctx_list_entry[js]) {
+					struct kbasep_js_kctx_info *js_kctx_info;
+					bool timer_sync = false;
+
+					/* Drop lock so we can take kctx mutexes */
+					spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+					js_kctx_info = &kctx->jctx.sched_info;
+
+					mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+					mutex_lock(&js_devdata->runpool_mutex);
+					spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+					if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) &&
+					    kbase_js_ctx_pullable(kctx, js, false))
+						timer_sync = kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, js);
+
+					spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+					if (timer_sync)
+						kbase_backend_ctx_count_changed(kbdev);
+
+					mutex_unlock(&js_devdata->runpool_mutex);
+					mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+					/* Take lock before accessing list again */
+					spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+				}
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			} else {
 				bool timer_sync = false;
 
-				/* Drop lock so we can take kctx mutexes */
-				spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
-				js_kctx_info = &kctx->jctx.sched_info;
-
-				mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
-				mutex_lock(&js_devdata->runpool_mutex);
 				spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
-				if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) &&
-				    kbase_js_ctx_pullable(kctx, js, false))
-					timer_sync = kbase_js_ctx_list_add_pullable_nolock(
-						kbdev, kctx, js);
+				list_for_each_entry_safe(
+					kctx, n, &kbdev->js_data.ctx_list_unpullable[js][prio],
+					jctx.sched_info.ctx.ctx_list_entry[js]) {
+					if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) &&
+					    kbase_js_ctx_pullable(kctx, js, false))
+						timer_sync |= kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, js);
+				}
 
 				spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
-				if (timer_sync)
+				if (timer_sync) {
+					mutex_lock(&js_devdata->runpool_mutex);
 					kbase_backend_ctx_count_changed(kbdev);
-
-				mutex_unlock(&js_devdata->runpool_mutex);
-				mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
-
-				/* Take lock before accessing list again */
-				spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+					mutex_unlock(&js_devdata->runpool_mutex);
+				}
 			}
-			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-#else
-			bool timer_sync = false;
-
-			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-
-			list_for_each_entry_safe(kctx, n,
-						 &kbdev->js_data.ctx_list_unpullable[js][prio],
-						 jctx.sched_info.ctx.ctx_list_entry[js]) {
-				if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) &&
-				    kbase_js_ctx_pullable(kctx, js, false))
-					timer_sync |= kbase_js_ctx_list_add_pullable_nolock(
-						kbdev, kctx, js);
-			}
-
-			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
-			if (timer_sync) {
-				mutex_lock(&js_devdata->runpool_mutex);
-				kbase_backend_ctx_count_changed(kbdev);
-				mutex_unlock(&js_devdata->runpool_mutex);
-			}
-#endif
 		}
 	}
 	mutex_unlock(&js_devdata->queue_mutex);
@@ -2515,7 +2371,7 @@ static unsigned int kbase_js_get_slot(struct kbase_device *kbdev, struct kbase_j
 
 bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, struct kbase_jd_atom *katom)
 {
-	bool enqueue_required, add_required = true;
+	bool enqueue_required;
 
 	katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom);
 
@@ -2525,10 +2381,7 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, struct kbase_jd_at
 	/* If slot will transition from unpullable to pullable then add to
 	 * pullable list
 	 */
-	if (jsctx_rb_none_to_pull(kctx, katom->slot_nr))
-		enqueue_required = true;
-	else
-		enqueue_required = false;
+	enqueue_required = jsctx_rb_none_to_pull(kctx, katom->slot_nr);
 
 	if ((katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) ||
 	    (katom->pre_dep &&
@@ -2541,15 +2394,9 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, struct kbase_jd_at
 
 		list_add_tail(&katom->queue, &queue->x_dep_head);
 		katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST;
-		if (kbase_js_atom_blocked_on_x_dep(katom)) {
-			enqueue_required = false;
-			add_required = false;
-		}
+		enqueue_required = false;
 	} else {
 		dev_dbg(kctx->kbdev->dev, "Atom %pK not added to X_DEP list\n", (void *)katom);
-	}
-
-	if (add_required) {
 		/* Check if there are lower priority jobs to soft stop */
 		kbase_job_slot_ctx_priority_check_locked(kctx, katom);
 
@@ -2575,30 +2422,22 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, struct kbase_jd_at
  */
 static void kbase_js_move_to_tree(struct kbase_jd_atom *katom)
 {
-	struct kbase_context *const kctx = katom->kctx;
-
-	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&katom->kctx->kbdev->hwaccess_lock);
 
 	while (katom) {
 		WARN_ON(!(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST));
 
-		if (!kbase_js_atom_blocked_on_x_dep(katom)) {
-			dev_dbg(kctx->kbdev->dev,
+		if (!(katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) {
+			dev_dbg(katom->kctx->kbdev->dev,
 				"Del atom %pK from X_DEP list in js_move_to_tree\n", (void *)katom);
 
 			list_del(&katom->queue);
 			katom->atom_flags &= ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST;
-			/* For incremental rendering, an end-of-renderpass atom
-			 * may have had its dependency on start-of-renderpass
-			 * ignored and may therefore already be in the tree.
-			 */
-			if (!(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE)) {
-				jsctx_tree_add(kctx, katom);
-				katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE;
-			}
+			jsctx_tree_add(katom->kctx, katom);
+			katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE;
 		} else {
-			dev_dbg(kctx->kbdev->dev, "Atom %pK blocked on x-dep in js_move_to_tree\n",
-				(void *)katom);
+			dev_dbg(katom->kctx->kbdev->dev,
+				"Atom %pK blocked on x-dep in js_move_to_tree\n", (void *)katom);
 			break;
 		}
 
@@ -2615,7 +2454,7 @@ static void kbase_js_move_to_tree(struct kbase_jd_atom *katom)
  *
  * Remove all post dependencies of an atom from the context ringbuffers.
  *
- * The original atom's event_code will be propogated to all dependent atoms.
+ * The original atom's event_code will be propagated to all dependent atoms.
  *
  * Context: Caller must hold the HW access lock
  */
@@ -2671,11 +2510,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, unsigned int js)
 		dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %pK\n", (void *)kctx);
 		return NULL;
 	}
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
 	if (kbase_pm_is_suspending(kbdev) || kbase_pm_is_gpu_lost(kbdev))
-#else
-	if (kbase_pm_is_suspending(kbdev))
-#endif
 		return NULL;
 
 	katom = jsctx_rb_peek(kctx, js);
@@ -2705,7 +2540,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, unsigned int js)
 			return NULL;
 	}
 
-	if (kbase_js_atom_blocked_on_x_dep(katom)) {
+	if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
 		if (katom->x_pre_dep->gpu_rb_state == KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
 		    katom->x_pre_dep->will_fail_event_code) {
 			dev_dbg(kbdev->dev,
@@ -2745,190 +2580,6 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, unsigned int js)
 	return katom;
 }
 
-/**
- * js_return_of_start_rp() - Handle soft-stop of an atom that starts a
- *                           renderpass
- * @start_katom: Pointer to the start-of-renderpass atom that was soft-stopped
- *
- * This function is called to switch to incremental rendering if the tiler job
- * chain at the start of a renderpass has used too much memory. It prevents the
- * tiler job being pulled for execution in the job scheduler again until the
- * next phase of incremental rendering is complete.
- *
- * If the end-of-renderpass atom is already in the job scheduler (because a
- * previous attempt at tiling used too much memory during the same renderpass)
- * then it is unblocked; otherwise, it is run by handing it to the scheduler.
- */
-static void js_return_of_start_rp(struct kbase_jd_atom *const start_katom)
-{
-	struct kbase_context *const kctx = start_katom->kctx;
-	struct kbase_device *const kbdev = kctx->kbdev;
-	struct kbase_jd_renderpass *rp;
-	struct kbase_jd_atom *end_katom;
-	unsigned long flags;
-
-	lockdep_assert_held(&kctx->jctx.lock);
-
-	if (WARN_ON(!(start_katom->core_req & BASE_JD_REQ_START_RENDERPASS)))
-		return;
-
-	compiletime_assert((1ull << (sizeof(start_katom->renderpass_id) * 8)) <=
-				   ARRAY_SIZE(kctx->jctx.renderpasses),
-			   "Should check invalid access to renderpasses");
-
-	rp = &kctx->jctx.renderpasses[start_katom->renderpass_id];
-
-	if (WARN_ON(rp->start_katom != start_katom))
-		return;
-
-	dev_dbg(kctx->kbdev->dev, "JS return start atom %pK in state %d of RP %d\n",
-		(void *)start_katom, (int)rp->state, start_katom->renderpass_id);
-
-	if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE))
-		return;
-
-	/* The tiler job might have been soft-stopped for some reason other
-	 * than running out of memory.
-	 */
-	if (rp->state == KBASE_JD_RP_START || rp->state == KBASE_JD_RP_RETRY) {
-		dev_dbg(kctx->kbdev->dev, "JS return isn't OOM in state %d of RP %d\n",
-			(int)rp->state, start_katom->renderpass_id);
-		return;
-	}
-
-	dev_dbg(kctx->kbdev->dev, "JS return confirm OOM in state %d of RP %d\n", (int)rp->state,
-		start_katom->renderpass_id);
-
-	if (WARN_ON(rp->state != KBASE_JD_RP_PEND_OOM && rp->state != KBASE_JD_RP_RETRY_PEND_OOM))
-		return;
-
-	/* Prevent the tiler job being pulled for execution in the
-	 * job scheduler again.
-	 */
-	dev_dbg(kbdev->dev, "Blocking start atom %pK\n", (void *)start_katom);
-	atomic_inc(&start_katom->blocked);
-
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-
-	rp->state = (rp->state == KBASE_JD_RP_PEND_OOM) ? KBASE_JD_RP_OOM : KBASE_JD_RP_RETRY_OOM;
-
-	/* Was the fragment job chain submitted to kbase yet? */
-	end_katom = rp->end_katom;
-	if (end_katom) {
-		dev_dbg(kctx->kbdev->dev, "JS return add end atom %pK\n", (void *)end_katom);
-
-		if (rp->state == KBASE_JD_RP_RETRY_OOM) {
-			/* Allow the end of the renderpass to be pulled for
-			 * execution again to continue incremental rendering.
-			 */
-			dev_dbg(kbdev->dev, "Unblocking end atom %pK\n", (void *)end_katom);
-			atomic_dec(&end_katom->blocked);
-			WARN_ON(!(end_katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE));
-			WARN_ON(end_katom->status != KBASE_JD_ATOM_STATE_IN_JS);
-
-			kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, end_katom->slot_nr);
-
-			/* Expect the fragment job chain to be scheduled without
-			 * further action because this function is called when
-			 * returning an atom to the job scheduler ringbuffer.
-			 */
-			end_katom = NULL;
-		} else {
-			WARN_ON(end_katom->status != KBASE_JD_ATOM_STATE_QUEUED &&
-				end_katom->status != KBASE_JD_ATOM_STATE_IN_JS);
-		}
-	}
-
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
-	if (end_katom)
-		kbase_jd_dep_clear_locked(end_katom);
-}
-
-/**
- * js_return_of_end_rp() - Handle completion of an atom that ends a renderpass
- * @end_katom: Pointer to the end-of-renderpass atom that was completed
- *
- * This function is called to continue incremental rendering if the tiler job
- * chain at the start of a renderpass used too much memory. It resets the
- * mechanism for detecting excessive memory usage then allows the soft-stopped
- * tiler job chain to be pulled for execution again.
- *
- * The start-of-renderpass atom must already been submitted to kbase.
- */
-static void js_return_of_end_rp(struct kbase_jd_atom *const end_katom)
-{
-	struct kbase_context *const kctx = end_katom->kctx;
-	struct kbase_device *const kbdev = kctx->kbdev;
-	struct kbase_jd_renderpass *rp;
-	struct kbase_jd_atom *start_katom;
-	unsigned long flags;
-
-	lockdep_assert_held(&kctx->jctx.lock);
-
-	if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS)))
-		return;
-
-	compiletime_assert((1ull << (sizeof(end_katom->renderpass_id) * 8)) <=
-				   ARRAY_SIZE(kctx->jctx.renderpasses),
-			   "Should check invalid access to renderpasses");
-
-	rp = &kctx->jctx.renderpasses[end_katom->renderpass_id];
-
-	if (WARN_ON(rp->end_katom != end_katom))
-		return;
-
-	dev_dbg(kctx->kbdev->dev, "JS return end atom %pK in state %d of RP %d\n",
-		(void *)end_katom, (int)rp->state, end_katom->renderpass_id);
-
-	if (WARN_ON(rp->state != KBASE_JD_RP_OOM && rp->state != KBASE_JD_RP_RETRY_OOM))
-		return;
-
-	/* Reduce the number of mapped pages in the memory regions that
-	 * triggered out-of-memory last time so that we can detect excessive
-	 * memory usage again.
-	 */
-	kbase_gpu_vm_lock(kctx);
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-
-	while (!list_empty(&rp->oom_reg_list)) {
-		struct kbase_va_region *reg =
-			list_first_entry(&rp->oom_reg_list, struct kbase_va_region, link);
-
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
-		dev_dbg(kbdev->dev, "Reset backing to %zu pages for region %pK\n",
-			reg->threshold_pages, (void *)reg);
-
-		if (!WARN_ON(reg->flags & KBASE_REG_VA_FREED))
-			kbase_mem_shrink(kctx, reg, reg->threshold_pages);
-
-		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-		dev_dbg(kbdev->dev, "Deleting region %pK from list\n", (void *)reg);
-		list_del_init(&reg->link);
-		kbase_va_region_alloc_put(kctx, reg);
-	}
-
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-	kbase_gpu_vm_unlock(kctx);
-
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	rp->state = KBASE_JD_RP_RETRY;
-	dev_dbg(kbdev->dev, "Changed state to %d for retry\n", rp->state);
-
-	/* Allow the start of the renderpass to be pulled for execution again
-	 * to begin/continue incremental rendering.
-	 */
-	start_katom = rp->start_katom;
-	if (!WARN_ON(!start_katom)) {
-		dev_dbg(kbdev->dev, "Unblocking start atom %pK\n", (void *)start_katom);
-		atomic_dec(&start_katom->blocked);
-		(void)kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, start_katom->slot_nr);
-	}
-
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-}
-
 static void js_return_worker(struct work_struct *data)
 {
 	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work);
@@ -2949,9 +2600,7 @@ static void js_return_worker(struct work_struct *data)
 		katom->event_code);
 
 	KBASE_KTRACE_ADD_JM(kbdev, JS_RETURN_WORKER, kctx, katom, katom->jc, 0);
-
-	if (katom->event_code != BASE_JD_EVENT_END_RP_DONE)
-		KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(kbdev, katom);
+	KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(kbdev, katom);
 
 	kbase_backend_complete_wq(kbdev, katom);
 
@@ -2960,8 +2609,7 @@ static void js_return_worker(struct work_struct *data)
 	mutex_lock(&js_devdata->queue_mutex);
 	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
 
-	if (katom->event_code != BASE_JD_EVENT_END_RP_DONE)
-		atomic_dec(&katom->blocked);
+	atomic_dec(&katom->blocked);
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
@@ -3026,16 +2674,6 @@ static void js_return_worker(struct work_struct *data)
 	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
 	mutex_unlock(&js_devdata->queue_mutex);
 
-	if (katom->core_req & BASE_JD_REQ_START_RENDERPASS) {
-		mutex_lock(&kctx->jctx.lock);
-		js_return_of_start_rp(katom);
-		mutex_unlock(&kctx->jctx.lock);
-	} else if (katom->event_code == BASE_JD_EVENT_END_RP_DONE) {
-		mutex_lock(&kctx->jctx.lock);
-		js_return_of_end_rp(katom);
-		mutex_unlock(&kctx->jctx.lock);
-	}
-
 	dev_dbg(kbdev->dev, "JS: retained state %s finished",
 		kbasep_js_has_atom_finished(&retained_state) ? "has" : "hasn't");
 
@@ -3071,144 +2709,6 @@ void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
 	queue_work(kctx->jctx.job_done_wq, &katom->work);
 }
 
-/**
- * js_complete_start_rp() - Handle completion of atom that starts a renderpass
- * @kctx:        Context pointer
- * @start_katom: Pointer to the atom that completed
- *
- * Put any references to virtual memory regions that might have been added by
- * kbase_job_slot_softstop_start_rp() because the tiler job chain completed
- * despite any pending soft-stop request.
- *
- * If the atom that just completed was soft-stopped during a previous attempt to
- * run it then there should be a blocked end-of-renderpass atom waiting for it,
- * which we must unblock to process the output of the tiler job chain.
- *
- * Return: true if caller should call kbase_backend_ctx_count_changed()
- */
-static bool js_complete_start_rp(struct kbase_context *kctx,
-				 struct kbase_jd_atom *const start_katom)
-{
-	struct kbase_device *const kbdev = kctx->kbdev;
-	struct kbase_jd_renderpass *rp;
-	bool timer_sync = false;
-
-	lockdep_assert_held(&kctx->jctx.lock);
-
-	if (WARN_ON(!(start_katom->core_req & BASE_JD_REQ_START_RENDERPASS)))
-		return false;
-
-	compiletime_assert((1ull << (sizeof(start_katom->renderpass_id) * 8)) <=
-				   ARRAY_SIZE(kctx->jctx.renderpasses),
-			   "Should check invalid access to renderpasses");
-
-	rp = &kctx->jctx.renderpasses[start_katom->renderpass_id];
-
-	if (WARN_ON(rp->start_katom != start_katom))
-		return false;
-
-	dev_dbg(kctx->kbdev->dev, "Start atom %pK is done in state %d of RP %d\n",
-		(void *)start_katom, (int)rp->state, start_katom->renderpass_id);
-
-	if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE))
-		return false;
-
-	if (rp->state == KBASE_JD_RP_PEND_OOM || rp->state == KBASE_JD_RP_RETRY_PEND_OOM) {
-		unsigned long flags;
-
-		dev_dbg(kctx->kbdev->dev, "Start atom %pK completed before soft-stop\n",
-			(void *)start_katom);
-
-		kbase_gpu_vm_lock(kctx);
-		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-
-		while (!list_empty(&rp->oom_reg_list)) {
-			struct kbase_va_region *reg =
-				list_first_entry(&rp->oom_reg_list, struct kbase_va_region, link);
-
-			WARN_ON(reg->flags & KBASE_REG_VA_FREED);
-			dev_dbg(kctx->kbdev->dev, "Deleting region %pK from list\n", (void *)reg);
-			list_del_init(&reg->link);
-			kbase_va_region_alloc_put(kctx, reg);
-		}
-
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-		kbase_gpu_vm_unlock(kctx);
-	} else {
-		dev_dbg(kctx->kbdev->dev, "Start atom %pK did not exceed memory threshold\n",
-			(void *)start_katom);
-
-		WARN_ON(rp->state != KBASE_JD_RP_START && rp->state != KBASE_JD_RP_RETRY);
-	}
-
-	if (rp->state == KBASE_JD_RP_RETRY || rp->state == KBASE_JD_RP_RETRY_PEND_OOM) {
-		struct kbase_jd_atom *const end_katom = rp->end_katom;
-
-		if (!WARN_ON(!end_katom)) {
-			unsigned long flags;
-
-			/* Allow the end of the renderpass to be pulled for
-			 * execution again to continue incremental rendering.
-			 */
-			dev_dbg(kbdev->dev, "Unblocking end atom %pK!\n", (void *)end_katom);
-			atomic_dec(&end_katom->blocked);
-
-			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-			timer_sync = kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx,
-									   end_katom->slot_nr);
-			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-		}
-	}
-
-	return timer_sync;
-}
-
-/**
- * js_complete_end_rp() - Handle final completion of atom that ends a renderpass
- * @kctx:      Context pointer
- * @end_katom: Pointer to the atom that completed for the last time
- *
- * This function must only be called if the renderpass actually completed
- * without the tiler job chain at the start using too much memory; otherwise
- * completion of the end-of-renderpass atom is handled similarly to a soft-stop.
- */
-static void js_complete_end_rp(struct kbase_context *kctx, struct kbase_jd_atom *const end_katom)
-{
-	struct kbase_device *const kbdev = kctx->kbdev;
-	unsigned long flags;
-	struct kbase_jd_renderpass *rp;
-
-	lockdep_assert_held(&kctx->jctx.lock);
-
-	if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS)))
-		return;
-
-	compiletime_assert((1ull << (sizeof(end_katom->renderpass_id) * 8)) <=
-				   ARRAY_SIZE(kctx->jctx.renderpasses),
-			   "Should check invalid access to renderpasses");
-
-	rp = &kctx->jctx.renderpasses[end_katom->renderpass_id];
-
-	if (WARN_ON(rp->end_katom != end_katom))
-		return;
-
-	dev_dbg(kbdev->dev, "End atom %pK is done in state %d of RP %d\n", (void *)end_katom,
-		(int)rp->state, end_katom->renderpass_id);
-
-	if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE) || WARN_ON(rp->state == KBASE_JD_RP_OOM) ||
-	    WARN_ON(rp->state == KBASE_JD_RP_RETRY_OOM))
-		return;
-
-	/* Rendering completed without running out of memory.
-	 */
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	WARN_ON(!list_empty(&rp->oom_reg_list));
-	rp->state = KBASE_JD_RP_COMPLETE;
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
-	dev_dbg(kbdev->dev, "Renderpass %d is complete\n", end_katom->renderpass_id);
-}
-
 bool kbase_js_complete_atom_wq(struct kbase_context *kctx, struct kbase_jd_atom *katom)
 {
 	struct kbasep_js_kctx_info *js_kctx_info;
@@ -3225,13 +2725,6 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, struct kbase_jd_atom
 
 	dev_dbg(kbdev->dev, "%s for atom %pK (s:%u)\n", __func__, (void *)katom, atom_slot);
 
-	/* Update the incremental rendering state machine.
-	 */
-	if (katom->core_req & BASE_JD_REQ_START_RENDERPASS)
-		timer_sync |= js_complete_start_rp(kctx, katom);
-	else if (katom->core_req & BASE_JD_REQ_END_RENDERPASS)
-		js_complete_end_rp(kctx, katom);
-
 	js_kctx_info = &kctx->jctx.sched_info;
 	js_devdata = &kbdev->js_data;
 
@@ -3320,61 +2813,6 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, struct kbase_jd_atom
 	return context_idle;
 }
 
-/**
- * js_end_rp_is_complete() - Check whether an atom that ends a renderpass has
- *                           completed for the last time.
- *
- * @end_katom: Pointer to the atom that completed on the hardware.
- *
- * An atom that ends a renderpass may be run on the hardware several times
- * before notifying userspace or allowing dependent atoms to be executed.
- *
- * This function is used to decide whether or not to allow end-of-renderpass
- * atom completion. It only returns false if the atom at the start of the
- * renderpass was soft-stopped because it used too much memory during the most
- * recent attempt at tiling.
- *
- * Return: True if the atom completed for the last time.
- */
-static bool js_end_rp_is_complete(struct kbase_jd_atom *const end_katom)
-{
-	struct kbase_context *const kctx = end_katom->kctx;
-	struct kbase_device *const kbdev = kctx->kbdev;
-	struct kbase_jd_renderpass *rp;
-
-	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
-
-	if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS)))
-		return true;
-
-	compiletime_assert((1ull << (sizeof(end_katom->renderpass_id) * 8)) <=
-				   ARRAY_SIZE(kctx->jctx.renderpasses),
-			   "Should check invalid access to renderpasses");
-
-	rp = &kctx->jctx.renderpasses[end_katom->renderpass_id];
-
-	if (WARN_ON(rp->end_katom != end_katom))
-		return true;
-
-	dev_dbg(kbdev->dev, "JS complete end atom %pK in state %d of RP %d\n", (void *)end_katom,
-		(int)rp->state, end_katom->renderpass_id);
-
-	if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE))
-		return true;
-
-	/* Failure of end-of-renderpass atoms must not return to the
-	 * start of the renderpass.
-	 */
-	if (end_katom->event_code != BASE_JD_EVENT_DONE)
-		return true;
-
-	if (rp->state != KBASE_JD_RP_OOM && rp->state != KBASE_JD_RP_RETRY_OOM)
-		return true;
-
-	dev_dbg(kbdev->dev, "Suppressing end atom completion\n");
-	return false;
-}
-
 struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp)
 {
 	struct kbase_device *kbdev;
@@ -3387,12 +2825,6 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_
 
 	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
 
-	if ((katom->core_req & BASE_JD_REQ_END_RENDERPASS) && !js_end_rp_is_complete(katom)) {
-		katom->event_code = BASE_JD_EVENT_END_RP_DONE;
-		kbase_js_unpull(kctx, katom);
-		return NULL;
-	}
-
 	if (katom->will_fail_event_code)
 		katom->event_code = katom->will_fail_event_code;
 
@@ -3442,70 +2874,6 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_
 	return NULL;
 }
 
-/**
- * kbase_js_atom_blocked_on_x_dep - Decide whether to ignore a cross-slot
- *                                  dependency
- * @katom:	Pointer to an atom in the slot ringbuffer
- *
- * A cross-slot dependency is ignored if necessary to unblock incremental
- * rendering. If the atom at the start of a renderpass used too much memory
- * and was soft-stopped then the atom at the end of a renderpass is submitted
- * to hardware regardless of its dependency on the start-of-renderpass atom.
- * This can happen multiple times for the same pair of atoms.
- *
- * Return: true to block the atom or false to allow it to be submitted to
- *         hardware
- */
-bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *const katom)
-{
-	struct kbase_context *const kctx = katom->kctx;
-	struct kbase_device *kbdev = kctx->kbdev;
-	struct kbase_jd_renderpass *rp;
-
-	lockdep_assert_held(&kbdev->hwaccess_lock);
-
-	if (!(katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) {
-		dev_dbg(kbdev->dev, "Atom %pK is not blocked on a cross-slot dependency",
-			(void *)katom);
-		return false;
-	}
-
-	if (!(katom->core_req & BASE_JD_REQ_END_RENDERPASS)) {
-		dev_dbg(kbdev->dev, "Atom %pK is blocked on a cross-slot dependency",
-			(void *)katom);
-		return true;
-	}
-
-	compiletime_assert((1ull << (sizeof(katom->renderpass_id) * 8)) <=
-				   ARRAY_SIZE(kctx->jctx.renderpasses),
-			   "Should check invalid access to renderpasses");
-
-	rp = &kctx->jctx.renderpasses[katom->renderpass_id];
-	/* We can read a subset of renderpass state without holding
-	 * higher-level locks (but not end_katom, for example).
-	 */
-
-	WARN_ON(rp->state == KBASE_JD_RP_COMPLETE);
-
-	dev_dbg(kbdev->dev, "End atom has cross-slot dep in state %d\n", (int)rp->state);
-
-	if (rp->state != KBASE_JD_RP_OOM && rp->state != KBASE_JD_RP_RETRY_OOM)
-		return true;
-
-	/* Tiler ran out of memory so allow the fragment job chain to run
-	 * if it only depends on the tiler job chain.
-	 */
-	if (katom->x_pre_dep != rp->start_katom) {
-		dev_dbg(kbdev->dev, "Dependency is on %pK not start atom %pK\n",
-			(void *)katom->x_pre_dep, (void *)rp->start_katom);
-		return true;
-	}
-
-	dev_dbg(kbdev->dev, "Ignoring cross-slot dep on atom %pK\n", (void *)katom->x_pre_dep);
-
-	return false;
-}
-
 void kbase_js_sched(struct kbase_device *kbdev, unsigned int js_mask)
 {
 	struct kbasep_js_device_data *js_devdata;
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c
index aae4df83e98d..87085912bd6c 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -516,7 +516,8 @@ static ssize_t reader_changes_copy_to_user(struct reader_changes *const changes,
 	do {
 		changes_tail = changes->tail;
 		changes_count = reader_changes_count_locked(changes);
-		read_size = min(changes_count * entry_size, buffer_size & ~(entry_size - 1));
+		read_size =
+			min(size_mul(changes_count, entry_size), buffer_size & ~(entry_size - 1));
 
 		if (!read_size)
 			break;
@@ -743,7 +744,6 @@ int kbase_kinstr_jm_get_fd(struct kbase_kinstr_jm *const ctx, union kbase_kinstr
 	size_t const change_size = sizeof(struct kbase_kinstr_jm_atom_state_change);
 	int status;
 	int fd;
-	size_t i;
 
 	if (!ctx || !jm_fd_arg)
 		return -EINVAL;
@@ -753,10 +753,6 @@ int kbase_kinstr_jm_get_fd(struct kbase_kinstr_jm *const ctx, union kbase_kinstr
 	if (!is_power_of_2(in->count))
 		return -EINVAL;
 
-	for (i = 0; i < sizeof(in->padding); ++i)
-		if (in->padding[i])
-			return -EINVAL;
-
 	status = reader_init(&reader, ctx, in->count);
 	if (status < 0)
 		return status;
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_linux.h b/drivers/gpu/arm/bifrost/mali_kbase_linux.h
index 9195be347e2b..cb55d4b417c4 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_linux.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_linux.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -35,8 +35,13 @@
 
 #if IS_ENABLED(MALI_KERNEL_TEST_API)
 #define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func)
+/* Note: due to the 2-layer macro translation, using the NULL _etype does not
+ * compile, and one workaround is to use ERRNO_NULL instead.
+ */
+#define KBASE_ALLOW_ERROR_INJECTION_TEST_API(func, etype) ALLOW_ERROR_INJECTION(func, etype)
 #else
 #define KBASE_EXPORT_TEST_API(func)
+#define KBASE_ALLOW_ERROR_INJECTION_TEST_API(func, etype)
 #endif
 
 #define KBASE_EXPORT_SYMBOL(func) EXPORT_SYMBOL(func)
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem.c b/drivers/gpu/arm/bifrost/mali_kbase_mem.c
index ddf6ea352e72..1436d8290ebc 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -35,6 +35,7 @@
 #include <mali_kbase_config.h>
 #include <mali_kbase.h>
 #include <mali_kbase_reg_track.h>
+#include <mali_kbase_caps.h>
 #include <hw_access/mali_kbase_hw_access_regmap.h>
 #include <mali_kbase_cache_policy.h>
 #include <mali_kbase_hw.h>
@@ -42,13 +43,20 @@
 #include <mali_kbase_native_mgm.h>
 #include <mali_kbase_mem_pool_group.h>
 #include <mmu/mali_kbase_mmu.h>
-#include <mali_kbase_config_defaults.h>
 #include <mali_kbase_trace_gpu_mem.h>
 #include <linux/version_compat_defs.h>
 
+/* Static key used to determine if large pages are enabled or not */
+static DEFINE_STATIC_KEY_FALSE(large_pages_static_key);
+
 #define VA_REGION_SLAB_NAME_PREFIX "va-region-slab-"
 #define VA_REGION_SLAB_NAME_SIZE (DEVNAME_SIZE + sizeof(VA_REGION_SLAB_NAME_PREFIX) + 1)
 
+#if GPU_PAGES_PER_CPU_PAGE > 1
+#define PAGE_METADATA_SLAB_NAME_PREFIX "page-metadata-slab-"
+#define PAGE_METADATA_SLAB_NAME_SIZE (DEVNAME_SIZE + sizeof(PAGE_METADATA_SLAB_NAME_PREFIX) + 1)
+#endif
+
 #if MALI_JIT_PRESSURE_LIMIT_BASE
 
 /*
@@ -143,21 +151,21 @@ MODULE_PARM_DESC(large_page_conf, "User override for large page usage on support
 static void kbasep_mem_page_size_init(struct kbase_device *kbdev)
 {
 	if (!IS_ENABLED(CONFIG_LARGE_PAGE_SUPPORT)) {
-		kbdev->pagesize_2mb = false;
 		dev_info(kbdev->dev, "Large page support was disabled at compile-time!");
 		return;
 	}
 
 	switch (large_page_conf) {
 	case LARGE_PAGE_AUTO: {
-		kbdev->pagesize_2mb = kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC);
+		if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_LARGE_PAGE_ALLOC))
+			static_branch_inc(&large_pages_static_key);
 		dev_info(kbdev->dev, "Large page allocation set to %s after hardware feature check",
-			 kbdev->pagesize_2mb ? "true" : "false");
+			 static_branch_unlikely(&large_pages_static_key) ? "true" : "false");
 		break;
 	}
 	case LARGE_PAGE_ON: {
-		kbdev->pagesize_2mb = true;
-		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC))
+		static_branch_inc(&large_pages_static_key);
+		if (!kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_LARGE_PAGE_ALLOC))
 			dev_warn(kbdev->dev,
 				 "Enabling large page allocations on unsupporting GPU!");
 		else
@@ -165,12 +173,10 @@ static void kbasep_mem_page_size_init(struct kbase_device *kbdev)
 		break;
 	}
 	case LARGE_PAGE_OFF: {
-		kbdev->pagesize_2mb = false;
 		dev_info(kbdev->dev, "Large page allocation override: turned off\n");
 		break;
 	}
 	default: {
-		kbdev->pagesize_2mb = false;
 		dev_info(kbdev->dev, "Invalid large page override, turning off large pages\n");
 		break;
 	}
@@ -180,25 +186,31 @@ static void kbasep_mem_page_size_init(struct kbase_device *kbdev)
 	 * so that userspace could read it to figure out the state of the configuration
 	 * if necessary.
 	 */
-	if (kbdev->pagesize_2mb)
+	if (static_branch_unlikely(&large_pages_static_key))
 		large_page_conf = LARGE_PAGE_ON;
 	else
 		large_page_conf = LARGE_PAGE_OFF;
 }
 
+inline bool kbase_is_large_pages_enabled(void)
+{
+	return static_branch_unlikely(&large_pages_static_key);
+}
+KBASE_EXPORT_TEST_API(kbase_is_large_pages_enabled);
+
 int kbase_mem_init(struct kbase_device *kbdev)
 {
 	int err = 0;
-	struct kbasep_mem_device *memdev;
 	char va_region_slab_name[VA_REGION_SLAB_NAME_SIZE];
+#if GPU_PAGES_PER_CPU_PAGE > 1
+	char page_metadata_slab_name[PAGE_METADATA_SLAB_NAME_SIZE];
+#endif
 #if IS_ENABLED(CONFIG_OF)
 	struct device_node *mgm_node = NULL;
 #endif
 
 	KBASE_DEBUG_ASSERT(kbdev);
 
-	memdev = &kbdev->memdev;
-
 	kbasep_mem_page_size_init(kbdev);
 
 	scnprintf(va_region_slab_name, VA_REGION_SLAB_NAME_SIZE, VA_REGION_SLAB_NAME_PREFIX "%s",
@@ -212,6 +224,17 @@ int kbase_mem_init(struct kbase_device *kbdev)
 		return -ENOMEM;
 	}
 
+#if GPU_PAGES_PER_CPU_PAGE > 1
+	scnprintf(page_metadata_slab_name, PAGE_METADATA_SLAB_NAME_SIZE,
+		  PAGE_METADATA_SLAB_NAME_PREFIX "%s", kbdev->devname);
+	kbdev->page_metadata_slab = kmem_cache_create(
+		page_metadata_slab_name, sizeof(struct kbase_page_metadata), 0, 0, NULL);
+	if (kbdev->page_metadata_slab == NULL) {
+		dev_err(kbdev->dev, "Failed to create page_metadata_slab");
+		return -ENOMEM;
+	}
+#endif
+
 	kbase_mem_migrate_init(kbdev);
 	kbase_mem_pool_group_config_set_max_size(&kbdev->mem_pool_defaults,
 						 KBASE_MEM_POOL_MAX_SIZE_KCTX);
@@ -221,12 +244,6 @@ int kbase_mem_init(struct kbase_device *kbdev)
 	kbdev->dma_buf_root = RB_ROOT;
 	mutex_init(&kbdev->dma_buf_lock);
 
-#ifdef IR_THRESHOLD
-	atomic_set(&memdev->ir_threshold, IR_THRESHOLD);
-#else
-	atomic_set(&memdev->ir_threshold, DEFAULT_IR_THRESHOLD);
-#endif
-
 	kbdev->mgm_dev = &kbase_native_mgm_dev;
 
 #if IS_ENABLED(CONFIG_OF)
@@ -292,6 +309,10 @@ void kbase_mem_term(struct kbase_device *kbdev)
 
 	kbase_mem_migrate_term(kbdev);
 
+#if GPU_PAGES_PER_CPU_PAGE > 1
+	kmem_cache_destroy(kbdev->page_metadata_slab);
+	kbdev->page_metadata_slab = NULL;
+#endif
 	kmem_cache_destroy(kbdev->va_region_slab);
 	kbdev->va_region_slab = NULL;
 
@@ -524,15 +545,20 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
 		switch (alloc->imported.user_buf.state) {
 		case KBASE_USER_BUF_STATE_GPU_MAPPED: {
 			alloc->imported.user_buf.current_mapping_usage_count = 0;
-			kbase_user_buf_from_gpu_mapped_to_empty(kctx, reg);
+			kbase_mem_phy_alloc_ref_read(alloc) ?
+				      kbase_user_buf_from_gpu_mapped_to_pinned(kctx, reg) :
+				      kbase_user_buf_from_gpu_mapped_to_empty(kctx, reg);
 			break;
 		}
 		case KBASE_USER_BUF_STATE_DMA_MAPPED: {
-			kbase_user_buf_from_dma_mapped_to_empty(kctx, reg);
+			kbase_mem_phy_alloc_ref_read(alloc) ?
+				      kbase_user_buf_from_dma_mapped_to_pinned(kctx, reg) :
+				      kbase_user_buf_from_dma_mapped_to_empty(kctx, reg);
 			break;
 		}
 		case KBASE_USER_BUF_STATE_PINNED: {
-			kbase_user_buf_from_pinned_to_empty(kctx, reg);
+			if (!kbase_mem_phy_alloc_ref_read(alloc))
+				kbase_user_buf_from_pinned_to_empty(kctx, reg);
 			break;
 		}
 		case KBASE_USER_BUF_STATE_EMPTY: {
@@ -672,7 +698,9 @@ void kbase_sync_single(struct kbase_context *kctx, struct tagged_addr t_cpu_pa,
 		dma_addr_t dma_addr;
 
 		WARN_ON(!cpu_page);
-		WARN_ON((size_t)offset + size > PAGE_SIZE);
+
+		if ((size_t)offset + size > PAGE_SIZE)
+			dev_warn(kctx->kbdev->dev, "Size and offset exceed page size");
 
 		dma_addr = kbase_dma_addr_from_tagged(t_cpu_pa) + (dma_addr_t)offset;
 
@@ -713,19 +741,105 @@ void kbase_sync_single(struct kbase_context *kctx, struct tagged_addr t_cpu_pa,
 	}
 }
 
+static int kbase_get_sync_scope_params(struct kbase_context *kctx, unsigned long start, size_t size,
+				       u64 *page_off, u64 *page_cnt, u64 *offset)
+{
+	u64 tmp_off;
+	struct kbase_cpu_mapping *map =
+		kbasep_find_enclosing_cpu_mapping(kctx, start, size, &tmp_off);
+
+	if (!map) {
+		dev_dbg(kctx->kbdev->dev, "%s: Can't find CPU mapping 0x%016lX", __func__, start);
+		return -EINVAL;
+	}
+
+	*page_off = tmp_off >> PAGE_SHIFT;
+	tmp_off &= ~PAGE_MASK;
+	*page_cnt = (size + tmp_off + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+	*offset = tmp_off;
+
+	return 0;
+}
+
+static int kbase_sync_imported_user_buf(struct kbase_context *kctx, struct kbase_va_region *reg,
+					struct basep_syncset *sset, enum kbase_sync_type sync_fn)
+{
+	unsigned long start = (uintptr_t)sset->user_addr;
+	size_t size = (size_t)sset->size;
+	dma_addr_t *dma_addr = reg->gpu_alloc->imported.user_buf.dma_addrs;
+	u64 page_off = 0, page_count = 0, offset = 0;
+	u64 i;
+	size_t sz;
+	int err;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	if (sync_fn != KBASE_SYNC_TO_CPU && sync_fn != KBASE_SYNC_TO_DEVICE) {
+		dev_dbg(kctx->kbdev->dev, "%s: Unknown kbase sync_fn type!", __func__);
+		return -EINVAL;
+	}
+
+	/* Early return if the imported user_buffer is not yet mapped to GPU */
+	if (reg->gpu_alloc->imported.user_buf.state != KBASE_USER_BUF_STATE_GPU_MAPPED)
+		return -EINVAL;
+
+	err = kbase_get_sync_scope_params(kctx, start, size, &page_off, &page_count, &offset);
+	if (err)
+		return err;
+
+	/* Check the sync is inside the imported range */
+	if ((page_off >= reg->gpu_alloc->nents) ||
+	    ((page_off + page_count) > reg->gpu_alloc->nents))
+		return -EINVAL;
+
+	dma_addr = reg->gpu_alloc->imported.user_buf.dma_addrs;
+	/* Sync first page */
+	sz = MIN(((size_t)PAGE_SIZE - offset), size);
+	if (sync_fn == KBASE_SYNC_TO_CPU)
+		dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr[page_off] + offset, sz,
+					DMA_BIDIRECTIONAL);
+	else
+		dma_sync_single_for_device(kctx->kbdev->dev, dma_addr[page_off] + offset, sz,
+					   DMA_BIDIRECTIONAL);
+
+	/* Calculate the size for last page */
+	sz = ((start + size - 1) & ~PAGE_MASK) + 1;
+
+	/* Sync middle pages (if any) */
+	for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+		if (sync_fn == KBASE_SYNC_TO_CPU)
+			dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr[page_off + i], PAGE_SIZE,
+						DMA_BIDIRECTIONAL);
+		else
+			dma_sync_single_for_device(kctx->kbdev->dev, dma_addr[page_off + i],
+						   PAGE_SIZE, DMA_BIDIRECTIONAL);
+	}
+
+	/* Sync last page (if any) */
+	if (page_count > 1) {
+		i = page_off + page_count - 1;
+		if (sync_fn == KBASE_SYNC_TO_CPU)
+			dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr[i], sz,
+						DMA_BIDIRECTIONAL);
+		else
+			dma_sync_single_for_device(kctx->kbdev->dev, dma_addr[i], sz,
+						   DMA_BIDIRECTIONAL);
+	}
+
+	return 0;
+}
+
 static int kbase_do_syncset(struct kbase_context *kctx, struct basep_syncset *sset,
 			    enum kbase_sync_type sync_fn)
 {
 	int err = 0;
 	struct kbase_va_region *reg;
-	struct kbase_cpu_mapping *map;
 	unsigned long start;
 	size_t size;
 	struct tagged_addr *cpu_pa;
 	struct tagged_addr *gpu_pa;
-	u64 page_off, page_count;
+	u64 page_off = 0, page_count = 0, offset = 0;
 	u64 i;
-	u64 offset;
 	size_t sz;
 
 	kbase_os_mem_map_lock(kctx);
@@ -748,7 +862,10 @@ static int kbase_do_syncset(struct kbase_context *kctx, struct basep_syncset *ss
 	 * memory may be cached.
 	 */
 	if (kbase_mem_is_imported(reg->gpu_alloc->type)) {
-		err = kbase_mem_do_sync_imported(kctx, reg, sync_fn);
+		if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF)
+			err = kbase_sync_imported_user_buf(kctx, reg, sset, sync_fn);
+		else
+			err = kbase_sync_imported_umm(kctx, reg, sync_fn);
 		goto out_unlock;
 	}
 
@@ -758,17 +875,10 @@ static int kbase_do_syncset(struct kbase_context *kctx, struct basep_syncset *ss
 	start = (uintptr_t)sset->user_addr;
 	size = (size_t)sset->size;
 
-	map = kbasep_find_enclosing_cpu_mapping(kctx, start, size, &offset);
-	if (!map) {
-		dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX",
-			 start, sset->mem_handle.basep.handle);
-		err = -EINVAL;
+	err = kbase_get_sync_scope_params(kctx, start, size, &page_off, &page_count, &offset);
+	if (err)
 		goto out_unlock;
-	}
 
-	page_off = offset >> PAGE_SHIFT;
-	offset &= ~PAGE_MASK;
-	page_count = (size + offset + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
 	cpu_pa = kbase_get_cpu_phy_pages(reg);
 	gpu_pa = kbase_get_gpu_phy_pages(reg);
 
@@ -777,7 +887,6 @@ static int kbase_do_syncset(struct kbase_context *kctx, struct basep_syncset *ss
 		err = -EINVAL;
 		goto out_unlock;
 	}
-
 	if (page_off >= reg->gpu_alloc->nents) {
 		/* Start of sync range is outside the physically backed region
 		 * so nothing to do
@@ -942,7 +1051,7 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
 			__func__);
 		return -EINVAL;
 	}
-	kbase_gpu_vm_lock(kctx);
+	kbase_gpu_vm_lock_with_pmode_sync(kctx);
 
 	if (gpu_addr >= BASE_MEM_COOKIE_BASE && gpu_addr < BASE_MEM_FIRST_FREE_ADDRESS) {
 		unsigned int cookie = PFN_DOWN(gpu_addr - BASE_MEM_COOKIE_BASE);
@@ -981,7 +1090,7 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
 	}
 
 out_unlock:
-	kbase_gpu_vm_unlock(kctx);
+	kbase_gpu_vm_unlock_with_pmode_sync(kctx);
 	return err;
 }
 
@@ -1126,6 +1235,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pa
 	 * to satisfy the memory allocation request.
 	 */
 	size_t nr_pages_to_account = 0;
+	size_t nr_pages_from_partials = 0;
 
 	if (WARN_ON(alloc->type != KBASE_MEM_TYPE_NATIVE) ||
 	    WARN_ON(alloc->imported.native.kctx == NULL) ||
@@ -1156,7 +1266,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pa
 	/* Check if we have enough pages requested so we can allocate a large
 	 * page (512 * 4KB = 2MB )
 	 */
-	if (kbdev->pagesize_2mb && nr_left >= NUM_PAGES_IN_2MB_LARGE_PAGE) {
+	if (kbase_is_large_pages_enabled() && nr_left >= NUM_PAGES_IN_2MB_LARGE_PAGE) {
 		size_t nr_lp = nr_left / NUM_PAGES_IN_2MB_LARGE_PAGE;
 
 		res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.large[alloc->group_id],
@@ -1184,6 +1294,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pa
 					*tp++ = as_tagged_tag(page_to_phys(sa->page + pidx),
 							      FROM_PARTIAL);
 					nr_left--;
+					nr_pages_from_partials++;
 
 					if (bitmap_full(sa->sub_pages,
 							NUM_PAGES_IN_2MB_LARGE_PAGE)) {
@@ -1291,6 +1402,13 @@ alloc_failed:
 
 		alloc->nents += nr_pages_to_free;
 		kbase_free_phy_pages_helper(alloc, nr_pages_to_free);
+
+		/* Notice that the sub-pages from "partials" are not subtracted
+		 * from the counter by the free pages helper, because they just go
+		 * back to the "partials" they belong to, therefore they must be
+		 * subtracted from the counter here.
+		 */
+		nr_left += nr_pages_from_partials;
 	}
 
 	/* Undo the preliminary memory accounting that was done early on
@@ -1307,6 +1425,7 @@ alloc_failed:
 invalid_request:
 	return -ENOMEM;
 }
+KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages_helper);
 
 static size_t free_partial_locked(struct kbase_context *kctx, struct kbase_mem_pool *pool,
 				  struct tagged_addr tp)
@@ -1363,7 +1482,7 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(struct kbase_mem_phy_all
 	kctx = alloc->imported.native.kctx;
 	kbdev = kctx->kbdev;
 
-	if (!kbdev->pagesize_2mb)
+	if (!kbase_is_large_pages_enabled())
 		WARN_ON(pool->order);
 
 	if (alloc->reg) {
@@ -1386,7 +1505,7 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(struct kbase_mem_phy_all
 	tp = alloc->pages + alloc->nents;
 	new_pages = tp;
 
-	if (kbdev->pagesize_2mb && pool->order) {
+	if (kbase_is_large_pages_enabled() && pool->order) {
 		size_t nr_lp = nr_left / NUM_PAGES_IN_2MB_LARGE_PAGE;
 
 		res = kbase_mem_pool_alloc_pages_locked(pool, nr_lp * NUM_PAGES_IN_2MB_LARGE_PAGE,
@@ -1503,7 +1622,7 @@ alloc_failed:
 
 		struct tagged_addr *start_free = alloc->pages + alloc->nents;
 
-		if (kbdev->pagesize_2mb && pool->order) {
+		if (kbase_is_large_pages_enabled() && pool->order) {
 			while (nr_pages_to_free) {
 				if (is_huge_head(*start_free)) {
 					kbase_mem_pool_free_pages_locked(
@@ -1659,6 +1778,7 @@ int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pag
 
 	return 0;
 }
+KBASE_EXPORT_TEST_API(kbase_free_phy_pages_helper);
 
 void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc,
 					struct kbase_mem_pool *pool, struct tagged_addr *pages,
@@ -1897,11 +2017,13 @@ out_term:
 }
 KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages);
 
-void kbase_set_phy_alloc_page_status(struct kbase_mem_phy_alloc *alloc,
+void kbase_set_phy_alloc_page_status(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc,
 				     enum kbase_page_status status)
 {
 	u32 i = 0;
 
+	lockdep_assert_held(&kctx->reg_lock);
+
 	for (; i < alloc->nents; i++) {
 		struct tagged_addr phys = alloc->pages[i];
 		struct kbase_page_metadata *page_md = kbase_page_private(as_page(phys));
@@ -1921,7 +2043,7 @@ void kbase_set_phy_alloc_page_status(struct kbase_mem_phy_alloc *alloc,
 	}
 }
 
-bool kbase_check_alloc_flags(unsigned long flags)
+bool kbase_check_alloc_flags(struct kbase_context *kctx, unsigned long flags)
 {
 	/* Only known input flags should be set. */
 	if (flags & ~BASE_MEM_FLAGS_INPUT_MASK)
@@ -1997,6 +2119,36 @@ bool kbase_check_alloc_flags(unsigned long flags)
 		return false;
 #endif
 
+	/* Cannot be set only allocation, only with base_mem_set */
+	if ((flags & BASE_MEM_DONT_NEED) &&
+	    (mali_kbase_supports_reject_alloc_mem_dont_need(kctx->api_version)))
+		return false;
+
+	/* Cannot directly allocate protected memory, it is imported instead */
+	if ((flags & BASE_MEM_PROTECTED) &&
+	    (mali_kbase_supports_reject_alloc_mem_protected_in_unprotected_allocs(
+		    kctx->api_version)))
+		return false;
+
+/* No unused bits are valid for allocations */
+#if MALI_USE_CSF
+	if ((flags & BASE_MEM_UNUSED_BIT_20) &&
+	    (mali_kbase_supports_reject_alloc_mem_unused_bit_20(kctx->api_version)))
+		return false;
+
+	if ((flags & BASE_MEM_UNUSED_BIT_27) &&
+	    (mali_kbase_supports_reject_alloc_mem_unused_bit_27(kctx->api_version)))
+		return false;
+#else /* MALI_USE_CSF */
+	if ((flags & BASE_MEM_UNUSED_BIT_8) &&
+	    (mali_kbase_supports_reject_alloc_mem_unused_bit_8(kctx->api_version)))
+		return false;
+
+	if ((flags & BASE_MEM_UNUSED_BIT_19) &&
+	    (mali_kbase_supports_reject_alloc_mem_unused_bit_19(kctx->api_version)))
+		return false;
+#endif /* MALI_USE_CSF */
+
 	return true;
 }
 
@@ -2156,17 +2308,31 @@ void kbase_gpu_vm_lock(struct kbase_context *kctx)
 	KBASE_DEBUG_ASSERT(kctx != NULL);
 	mutex_lock(&kctx->reg_lock);
 }
-
 KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock);
 
+void kbase_gpu_vm_lock_with_pmode_sync(struct kbase_context *kctx)
+{
+#if MALI_USE_CSF
+	down_read(&kctx->kbdev->csf.mmu_sync_sem);
+#endif
+	kbase_gpu_vm_lock(kctx);
+}
+
 void kbase_gpu_vm_unlock(struct kbase_context *kctx)
 {
 	KBASE_DEBUG_ASSERT(kctx != NULL);
 	mutex_unlock(&kctx->reg_lock);
 }
-
 KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock);
 
+void kbase_gpu_vm_unlock_with_pmode_sync(struct kbase_context *kctx)
+{
+	kbase_gpu_vm_unlock(kctx);
+#if MALI_USE_CSF
+	up_read(&kctx->kbdev->csf.mmu_sync_sem);
+#endif
+}
+
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 struct kbase_jit_debugfs_data {
 	int (*func)(struct kbase_jit_debugfs_data *data);
@@ -2708,7 +2874,7 @@ static int kbase_jit_grow(struct kbase_context *kctx, const struct base_jit_allo
 	delta = info->commit_pages - reg->gpu_alloc->nents;
 	pages_required = delta;
 
-	if (kctx->kbdev->pagesize_2mb && pages_required >= NUM_PAGES_IN_2MB_LARGE_PAGE) {
+	if (kbase_is_large_pages_enabled() && pages_required >= NUM_PAGES_IN_2MB_LARGE_PAGE) {
 		pool = &kctx->mem_pools.large[kctx->jit_group_id];
 		/* Round up to number of 2 MB pages required */
 		pages_required += (NUM_PAGES_IN_2MB_LARGE_PAGE - 1);
@@ -2746,10 +2912,10 @@ static int kbase_jit_grow(struct kbase_context *kctx, const struct base_jit_allo
 		kbase_mem_pool_lock(pool);
 	}
 
-	if (reg->gpu_alloc->nents > info->commit_pages) {
+	if (reg->gpu_alloc->nents >= info->commit_pages) {
 		kbase_mem_pool_unlock(pool);
 		spin_unlock(&kctx->mem_partials_lock);
-		dev_warn(
+		dev_info(
 			kctx->kbdev->dev,
 			"JIT alloc grown beyond the required number of initially required pages, this grow no longer needed.");
 		goto done;
@@ -2999,7 +3165,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 	if (!jit_allow_allocate(kctx, info, ignore_pressure_limit))
 		return NULL;
 
-	if (kctx->kbdev->pagesize_2mb) {
+	if (kbase_is_large_pages_enabled()) {
 		/* Preallocate memory for the sub-allocation structs */
 		for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
 			prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
@@ -3008,7 +3174,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 		}
 	}
 
-	kbase_gpu_vm_lock(kctx);
+	kbase_gpu_vm_lock_with_pmode_sync(kctx);
 	mutex_lock(&kctx->jit_evict_lock);
 
 	/*
@@ -3086,7 +3252,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 			kbase_jit_done_phys_increase(kctx, needed_pages);
 #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 
-		kbase_gpu_vm_unlock(kctx);
+		kbase_gpu_vm_unlock_with_pmode_sync(kctx);
 
 		if (ret) {
 			/*
@@ -3119,15 +3285,17 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 			if (kbase_is_page_migration_enabled()) {
 				kbase_gpu_vm_lock(kctx);
 				mutex_lock(&kctx->jit_evict_lock);
-				kbase_set_phy_alloc_page_status(reg->gpu_alloc, ALLOCATED_MAPPED);
+				kbase_set_phy_alloc_page_status(kctx, reg->gpu_alloc,
+								ALLOCATED_MAPPED);
 				mutex_unlock(&kctx->jit_evict_lock);
 				kbase_gpu_vm_unlock(kctx);
 			}
 		}
 	} else {
 		/* No suitable JIT allocation was found so create a new one */
-		u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR |
-			    BASE_MEM_GROW_ON_GPF | BASE_MEM_COHERENT_LOCAL | BASEP_MEM_NO_USER_FREE;
+		base_mem_alloc_flags flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD |
+					     BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF |
+					     BASE_MEM_COHERENT_LOCAL | BASEP_MEM_NO_USER_FREE;
 		u64 gpu_addr;
 
 #if !MALI_USE_CSF
@@ -3147,7 +3315,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 
 		mutex_unlock(&kctx->jit_evict_lock);
-		kbase_gpu_vm_unlock(kctx);
+		kbase_gpu_vm_unlock_with_pmode_sync(kctx);
 
 		reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, info->extension,
 				      &flags, &gpu_addr, mmu_sync_info);
@@ -3224,6 +3392,7 @@ end:
 
 	return reg;
 }
+KBASE_EXPORT_TEST_API(kbase_jit_allocate);
 
 void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
 {
@@ -3249,9 +3418,9 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
 		u64 delta = old_pages - new_size;
 
 		if (delta) {
-			mutex_lock(&kctx->reg_lock);
+			kbase_gpu_vm_lock_with_pmode_sync(kctx);
 			kbase_mem_shrink(kctx, reg, old_pages - delta);
-			mutex_unlock(&kctx->reg_lock);
+			kbase_gpu_vm_unlock_with_pmode_sync(kctx);
 		}
 	}
 
@@ -3265,13 +3434,30 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
 
 	trace_jit_stats(kctx, reg->jit_bin_id, UINT_MAX);
 
+	kbase_gpu_vm_lock_with_pmode_sync(kctx);
+	if (unlikely(atomic_read(&reg->cpu_alloc->kernel_mappings))) {
+		WARN_ON(atomic64_read(&reg->no_user_free_count) > 1);
+		kbase_va_region_no_user_free_dec(reg);
+		mutex_lock(&kctx->jit_evict_lock);
+		list_del(&reg->jit_node);
+		mutex_unlock(&kctx->jit_evict_lock);
+		kbase_mem_free_region(kctx, reg);
+		kbase_gpu_vm_unlock_with_pmode_sync(kctx);
+		return;
+	}
 	kbase_mem_evictable_mark_reclaim(reg->gpu_alloc);
-
-	kbase_gpu_vm_lock(kctx);
 	reg->flags |= KBASE_REG_DONT_NEED;
 	reg->flags &= ~KBASE_REG_ACTIVE_JIT_ALLOC;
 	kbase_mem_shrink_cpu_mapping(kctx, reg, 0, reg->gpu_alloc->nents);
-	kbase_gpu_vm_unlock(kctx);
+
+	/* Inactive JIT regions should be freed by the shrinker and not impacted
+	 * by page migration. Once freed, they will enter into the page migration
+	 * state machine via the mempools.
+	 */
+	if (kbase_is_page_migration_enabled())
+		kbase_set_phy_alloc_page_status(kctx, reg->gpu_alloc, NOT_MOVABLE);
+
+	kbase_gpu_vm_unlock_with_pmode_sync(kctx);
 
 	/*
 	 * Add the allocation to the eviction list and the jit pool, after this
@@ -3286,14 +3472,9 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
 
 	list_move(&reg->jit_node, &kctx->jit_pool_head);
 
-	/* Inactive JIT regions should be freed by the shrinker and not impacted
-	 * by page migration. Once freed, they will enter into the page migration
-	 * state machine via the mempools.
-	 */
-	if (kbase_is_page_migration_enabled())
-		kbase_set_phy_alloc_page_status(reg->gpu_alloc, NOT_MOVABLE);
 	mutex_unlock(&kctx->jit_evict_lock);
 }
+KBASE_EXPORT_TEST_API(kbase_jit_free);
 
 void kbase_jit_backing_lost(struct kbase_va_region *reg)
 {
@@ -3356,8 +3537,7 @@ void kbase_jit_term(struct kbase_context *kctx)
 	struct kbase_va_region *walker;
 
 	/* Free all allocations for this context */
-
-	kbase_gpu_vm_lock(kctx);
+	kbase_gpu_vm_lock_with_pmode_sync(kctx);
 	mutex_lock(&kctx->jit_evict_lock);
 	/* Free all allocations from the pool */
 	while (!list_empty(&kctx->jit_pool_head)) {
@@ -3398,7 +3578,7 @@ void kbase_jit_term(struct kbase_context *kctx)
 	WARN_ON(kctx->jit_phys_pages_to_be_allocated);
 #endif
 	mutex_unlock(&kctx->jit_evict_lock);
-	kbase_gpu_vm_unlock(kctx);
+	kbase_gpu_vm_unlock_with_pmode_sync(kctx);
 
 	/*
 	 * Flush the freeing of allocations whose backing has been freed
@@ -3916,9 +4096,6 @@ int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_regi
 	case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
 		user_buf_original_state = reg->gpu_alloc->imported.user_buf.state;
 
-		if ((reg->gpu_alloc->imported.user_buf.mm != locked_mm) && (!reg->gpu_alloc->nents))
-			return -EINVAL;
-
 		/* This function is reachable through many code paths, and the imported
 		 * memory handle could be in any of the possible states: consider all
 		 * of them as a valid starting point, and progress through all stages
@@ -3928,19 +4105,31 @@ int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_regi
 		 * Error recovery restores the original state and goes no further.
 		 */
 		switch (user_buf_original_state) {
-		case KBASE_USER_BUF_STATE_EMPTY:
-		case KBASE_USER_BUF_STATE_PINNED:
+		case KBASE_USER_BUF_STATE_EMPTY: {
+			if (reg->gpu_alloc->imported.user_buf.mm != locked_mm)
+				return -EINVAL;
+			err = kbase_user_buf_from_empty_to_gpu_mapped(kctx, reg);
+			break;
+		}
+		case KBASE_USER_BUF_STATE_PINNED: {
+			if (!reg->gpu_alloc->nents)
+				return -EINVAL;
+			err = kbase_user_buf_from_pinned_to_gpu_mapped(kctx, reg);
+			break;
+		}
 		case KBASE_USER_BUF_STATE_DMA_MAPPED: {
-			if (user_buf_original_state == KBASE_USER_BUF_STATE_EMPTY)
-				err = kbase_user_buf_from_empty_to_gpu_mapped(kctx, reg);
-			else if (user_buf_original_state == KBASE_USER_BUF_STATE_PINNED)
-				err = kbase_user_buf_from_pinned_to_gpu_mapped(kctx, reg);
-			else
-				err = kbase_user_buf_from_dma_mapped_to_gpu_mapped(kctx, reg);
-
-			if (err)
-				return err;
-
+			/* If the imported handle has not pinned any physical pages yet:
+			 * this function can only be called within the context of a user
+			 * process, which must be the same process as the one that
+			 * originally created the memory handle.
+			 *
+			 * In all other transitions: make sure that the imported handle
+			 * has already pinned physical pages before proceeding to mapping
+			 * operations.
+			 */
+			if (!reg->gpu_alloc->nents)
+				return -EINVAL;
+			err = kbase_user_buf_from_dma_mapped_to_gpu_mapped(kctx, reg);
 			break;
 		}
 		case KBASE_USER_BUF_STATE_GPU_MAPPED: {
@@ -3954,6 +4143,8 @@ int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_regi
 				reg->gpu_alloc->imported.user_buf.state);
 			return -EINVAL;
 		}
+		if (err)
+			return err;
 
 		/* If the state was valid and the transition is happening, then the handle
 		 * must be in GPU_MAPPED state now and the reference counter of GPU mappings
@@ -4021,13 +4212,8 @@ void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_r
 	kbase_va_region_alloc_put(kctx, reg);
 }
 
-static inline u64 kbasep_get_va_gpu_addr(struct kbase_va_region *reg)
-{
-	return reg->start_pfn << PAGE_SHIFT;
-}
-
-struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(struct kbase_context *kctx,
-							     u64 gpu_addr)
+struct kbase_ctx_ext_res_meta *
+kbase_sticky_resource_acquire(struct kbase_context *kctx, u64 gpu_addr, struct mm_struct *locked_mm)
 {
 	struct kbase_ctx_ext_res_meta *meta = NULL;
 	struct kbase_ctx_ext_res_meta *walker;
@@ -4066,7 +4252,7 @@ struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(struct kbase_contex
 		/* Map the external resource to the GPU allocation of the region
 		 * and acquire the reference to the VA region
 		 */
-		if (kbase_map_external_resource(kctx, meta->reg, NULL))
+		if (kbase_map_external_resource(kctx, meta->reg, locked_mm))
 			goto fail_map;
 		meta->ref = 1;
 
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem.h b/drivers/gpu/arm/bifrost/mali_kbase_mem.h
index e4a7d6bd0a30..880b8525ae37 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -104,8 +104,8 @@ static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, int
 
 /* Index of chosen MEMATTR for this region (0..7) */
 #define KBASE_REG_MEMATTR_MASK (7ul << 16)
-#define KBASE_REG_MEMATTR_INDEX(x) (((x)&7) << 16)
-#define KBASE_REG_MEMATTR_VALUE(x) (((x)&KBASE_REG_MEMATTR_MASK) >> 16)
+#define KBASE_REG_MEMATTR_INDEX(x) (((x) & 7) << 16)
+#define KBASE_REG_MEMATTR_VALUE(x) (((x) & KBASE_REG_MEMATTR_MASK) >> 16)
 
 /* AS<n>_MEMATTR values from MMU_MEMATTR_STAGE1: */
 /* Use GPU implementation-defined caching policy. */
@@ -482,6 +482,26 @@ struct kbase_page_metadata {
 			struct kbase_mmu_table *mmut;
 			/* GPU virtual page frame number info is in GPU_PAGE_SIZE units */
 			u64 pgd_vpfn_level;
+#if GPU_PAGES_PER_CPU_PAGE > 1
+			/**
+			 * @pgd_link: Link to the &kbase_mmu_table.pgd_pages_list
+			 */
+			struct list_head pgd_link;
+			/**
+			 * @pgd_page: Back pointer to the PGD page that the metadata is
+			 *            associated with
+			 */
+			struct page *pgd_page;
+			/**
+			 * @allocated_sub_pages: Bitmap representing the allocation status
+			 *                       of sub pages in the @pgd_page
+			 */
+			DECLARE_BITMAP(allocated_sub_pages, GPU_PAGES_PER_CPU_PAGE);
+			/**
+			 * @num_allocated_sub_pages: The number of allocated sub pages in @pgd_page
+			 */
+			s8 num_allocated_sub_pages;
+#endif
 		} pt_mapped;
 		struct {
 			struct kbase_device *kbdev;
@@ -510,6 +530,7 @@ enum kbase_jit_report_flags { KBASE_JIT_REPORT_ON_ALLOC_OR_FREE = (1u << 0) };
 /**
  * kbase_set_phy_alloc_page_status - Set the page migration status of the underlying
  *                                   physical allocation.
+ * @kctx:   Pointer to Kbase context.
  * @alloc:  the physical allocation containing the pages whose metadata is going
  *          to be modified
  * @status: the status the pages should end up in
@@ -518,7 +539,7 @@ enum kbase_jit_report_flags { KBASE_JIT_REPORT_ON_ALLOC_OR_FREE = (1u << 0) };
  * proper states are set. Instead, it is only used when we change the allocation
  * to NOT_MOVABLE or from NOT_MOVABLE to ALLOCATED_MAPPED
  */
-void kbase_set_phy_alloc_page_status(struct kbase_mem_phy_alloc *alloc,
+void kbase_set_phy_alloc_page_status(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc,
 				     enum kbase_page_status status);
 
 static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc)
@@ -589,6 +610,11 @@ int kbase_mem_init(struct kbase_device *kbdev);
 void kbase_mem_halt(struct kbase_device *kbdev);
 void kbase_mem_term(struct kbase_device *kbdev);
 
+static inline unsigned int kbase_mem_phy_alloc_ref_read(struct kbase_mem_phy_alloc *alloc)
+{
+	return kref_read(&alloc->kref);
+}
+
 static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_get(struct kbase_mem_phy_alloc *alloc)
 {
 	kref_get(&alloc->kref);
@@ -615,9 +641,6 @@ static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_m
  * @nr_pages:        The size of the region in pages.
  * @initial_commit:  Initial commit, for aligning the start address and
  *                   correctly growing KBASE_REG_TILER_ALIGN_TOP regions.
- * @threshold_pages: If non-zero and the amount of memory committed to a region
- *                   that can grow on page fault exceeds this number of pages
- *                   then the driver switches to incremental rendering.
  * @flags:           Flags
  * @extension:    Number of pages allocated on page fault.
  * @cpu_alloc: The physical memory we mmap to the CPU when mapping this region.
@@ -654,8 +677,7 @@ struct kbase_va_region {
 	void *user_data;
 	size_t nr_pages;
 	size_t initial_commit;
-	size_t threshold_pages;
-	unsigned long flags;
+	base_mem_alloc_flags flags;
 	size_t extension;
 	struct kbase_mem_phy_alloc *cpu_alloc;
 	struct kbase_mem_phy_alloc *gpu_alloc;
@@ -909,10 +931,12 @@ static inline struct kbase_mem_phy_alloc *kbase_alloc_create(struct kbase_contex
 	atomic_set(&alloc->gpu_mappings, 0);
 	atomic_set(&alloc->kernel_mappings, 0);
 	alloc->nents = 0;
-	alloc->pages = (void *)(alloc + 1);
-	/* fill pages with invalid address value */
-	for (i = 0; i < nr_pages; i++)
-		alloc->pages[i] = as_tagged(KBASE_INVALID_PHYSICAL_ADDRESS);
+	if (type != KBASE_MEM_TYPE_ALIAS) {
+		alloc->pages = (void *)(alloc + 1);
+		/* fill pages with invalid address value */
+		for (i = 0; i < nr_pages; i++)
+			alloc->pages[i] = as_tagged(KBASE_INVALID_PHYSICAL_ADDRESS);
+	}
 	INIT_LIST_HEAD(&alloc->mappings);
 	alloc->type = type;
 	alloc->group_id = group_id;
@@ -1302,7 +1326,7 @@ struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool, const bool alloc_
  */
 void kbase_mem_pool_free_page(struct kbase_mem_pool *pool, struct page *p);
 
-bool kbase_check_alloc_flags(unsigned long flags);
+bool kbase_check_alloc_flags(struct kbase_context *kctx, unsigned long flags);
 bool kbase_check_import_flags(unsigned long flags);
 
 static inline bool kbase_import_size_is_valid(struct kbase_device *kbdev, u64 va_pages)
@@ -1408,12 +1432,30 @@ int kbase_update_region_flags(struct kbase_context *kctx, struct kbase_va_region
  */
 void kbase_gpu_vm_lock(struct kbase_context *kctx);
 
+/**
+ * kbase_gpu_vm_lock_with_pmode_sync() - Wrapper of kbase_gpu_vm_lock.
+ * @kctx:  KBase context
+ *
+ * Same as kbase_gpu_vm_lock for JM GPU.
+ * Additionally acquire P.mode read-write semaphore for CSF GPU.
+ */
+void kbase_gpu_vm_lock_with_pmode_sync(struct kbase_context *kctx);
+
 /**
  * kbase_gpu_vm_unlock() - Release the per-context region list lock
  * @kctx:  KBase context
  */
 void kbase_gpu_vm_unlock(struct kbase_context *kctx);
 
+/**
+ * kbase_gpu_vm_unlock_with_pmode_sync() - Wrapper of kbase_gpu_vm_unlock.
+ * @kctx:  KBase context
+ *
+ * Same as kbase_gpu_vm_unlock for JM GPU.
+ * Additionally release P.mode read-write semaphore for CSF GPU.
+ */
+void kbase_gpu_vm_unlock_with_pmode_sync(struct kbase_context *kctx);
+
 int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size);
 
 /**
@@ -1651,7 +1693,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pa
  *
  * @prealloc_sa:        Information about the partial allocation if the amount of memory requested
  *                      is not a multiple of 2MB. One instance of struct kbase_sub_alloc must be
- *                      allocated by the caller if kbdev->pagesize_2mb is enabled.
+ *                      allocated by the caller if large pages are enabled.
  *
  * Allocates @nr_pages_requested and updates the alloc object. This function does not allocate new
  * pages from the kernel, and therefore will never trigger the OoM killer. Therefore, it can be
@@ -1679,9 +1721,9 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pa
  * This ensures that the pool can be grown to the required size and that the allocation can
  * complete without another thread using the newly grown pages.
  *
- * If kbdev->pagesize_2mb is enabled and the allocation is >= 2MB, then @pool must be one of the
- * pools from alloc->imported.native.kctx->mem_pools.large[]. Otherwise it must be one of the
- * mempools from alloc->imported.native.kctx->mem_pools.small[].
+ * If large (2MiB) pages are enabled and the allocation is >= 2MiB, then @pool
+ * must be one of the pools from alloc->imported.native.kctx->mem_pools.large[]. Otherwise it
+ * must be one of the mempools from alloc->imported.native.kctx->mem_pools.small[].
  *
  * @prealloc_sa is used to manage the non-2MB sub-allocation. It has to be pre-allocated because we
  * must not sleep (due to the usage of kmalloc()) whilst holding pool->pool_lock.  @prealloc_sa
@@ -1776,8 +1818,8 @@ static inline dma_addr_t kbase_dma_addr_from_tagged(struct tagged_addr tagged_pa
 	phys_addr_t pa = as_phys_addr_t(tagged_pa);
 	struct page *page = pfn_to_page(PFN_DOWN(pa));
 	dma_addr_t dma_addr = (is_huge(tagged_pa) || is_partial(tagged_pa)) ?
-					    kbase_dma_addr_as_priv(page) :
-					    kbase_dma_addr(page);
+				      kbase_dma_addr_as_priv(page) :
+				      kbase_dma_addr(page);
 
 	return dma_addr;
 }
@@ -2070,7 +2112,8 @@ bool kbase_has_exec_va_zone(struct kbase_context *kctx);
  * kbase_map_external_resource - Map an external resource to the GPU.
  * @kctx:              kbase context.
  * @reg:               External resource to map.
- * @locked_mm:         The mm_struct which has been locked for this operation.
+ * @locked_mm:         The mm_struct which has been locked for this operation,
+ *                     or NULL if none is available.
  *
  * On successful mapping, the VA region and the gpu_alloc refcounts will be
  * increased, making it safe to use and store both values directly.
@@ -2335,12 +2378,15 @@ int kbase_sticky_resource_init(struct kbase_context *kctx);
  * kbase_sticky_resource_acquire - Acquire a reference on a sticky resource.
  * @kctx:     kbase context.
  * @gpu_addr: The GPU address of the external resource.
+ * @locked_mm:         The mm_struct which has been locked for this operation,
+ *                     or NULL if none is available.
  *
  * Return: The metadata object which represents the binding between the
  * external resource and the kbase context on success or NULL on failure.
  */
 struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(struct kbase_context *kctx,
-							     u64 gpu_addr);
+							     u64 gpu_addr,
+							     struct mm_struct *locked_mm);
 
 /**
  * kbase_sticky_resource_release - Release a reference on a sticky resource.
@@ -2494,19 +2540,19 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx, struct kbase_va_region *reg
 			 struct kbase_mem_phy_alloc *alloc);
 
 /**
- * kbase_mem_do_sync_imported - Sync caches for imported memory
+ * kbase_sync_imported_umm - Sync caches for imported UMM memory
  * @kctx: Pointer to the kbase context
  * @reg: Pointer to the region with imported memory to sync
  * @sync_fn: The type of sync operation to perform
  *
- * Sync CPU caches for supported (currently only dma-buf (UMM)) memory.
+ * Sync CPU caches for supported dma-buf (UMM) memory.
  * Attempting to sync unsupported imported memory types will result in an error
  * code, -EINVAL.
  *
  * Return: 0 on success, or a negative error code.
  */
-int kbase_mem_do_sync_imported(struct kbase_context *kctx, struct kbase_va_region *reg,
-			       enum kbase_sync_type sync_fn);
+int kbase_sync_imported_umm(struct kbase_context *kctx, struct kbase_va_region *reg,
+			    enum kbase_sync_type sync_fn);
 
 /**
  * kbase_mem_copy_to_pinned_user_pages - Memcpy from source input page to
@@ -2595,4 +2641,7 @@ static inline base_mem_alloc_flags kbase_mem_group_id_set(int id)
 {
 	return BASE_MEM_GROUP_ID_SET(id);
 }
+
+bool kbase_is_large_pages_enabled(void);
+
 #endif /* _KBASE_MEM_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c
index 34d8f990f65c..a32da2645077 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -46,6 +46,7 @@
 #include <mali_kbase_caps.h>
 #include <mali_kbase_trace_gpu_mem.h>
 #include <mali_kbase_reset_gpu.h>
+#include <linux/version_compat_defs.h>
 
 #if ((KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) || \
      (KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE))
@@ -82,8 +83,6 @@
 #define KBASE_MEM_ION_SYNC_WORKAROUND
 #endif
 
-#define IR_THRESHOLD_STEPS (256u)
-
 /*
  * fully_backed_gpf_memory - enable full physical backing of all grow-on-GPU-page-fault
  * allocations in the kernel.
@@ -294,7 +293,7 @@ void kbase_phy_alloc_mapping_put(struct kbase_context *kctx, struct kbase_vmap_s
 }
 
 struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages,
-					u64 extension, u64 *flags, u64 *gpu_va,
+					u64 extension, base_mem_alloc_flags *flags, u64 *gpu_va,
 					enum kbase_caller_mmu_sync_info mmu_sync_info)
 {
 	struct kbase_va_region *reg;
@@ -319,9 +318,8 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
 	else
 		dev_dbg(dev, "Keeping requested GPU VA of 0x%llx\n", (unsigned long long)*gpu_va);
 
-	if (!kbase_check_alloc_flags(*flags)) {
-		dev_warn(dev, "%s called with bad flags (%llx)", __func__,
-			 (unsigned long long)*flags);
+	if (!kbase_check_alloc_flags(kctx, *flags)) {
+		dev_warn(dev, "%s called with bad flags (%llx)", __func__, *flags);
 		goto bad_flags;
 	}
 
@@ -334,6 +332,12 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
 	}
 #endif
 
+	/* Ensure GPU cached if CPU cached */
+	if ((*flags & BASE_MEM_CACHED_CPU) != 0) {
+		dev_warn_once(dev, "Clearing BASE_MEM_UNCACHED_GPU flag to avoid MMA violation\n");
+		*flags &= ~BASE_MEM_UNCACHED_GPU;
+	}
+
 	if ((*flags & BASE_MEM_UNCACHED_GPU) != 0 &&
 	    (*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0) {
 		/* Remove COHERENT_SYSTEM_REQUIRED flag if uncached GPU mapping is requested */
@@ -405,17 +409,8 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
 		*flags &= ~BASE_MEM_CACHED_CPU;
 
 	if (*flags & BASE_MEM_GROW_ON_GPF) {
-		unsigned int const ir_threshold =
-			(unsigned int)atomic_read(&kctx->kbdev->memdev.ir_threshold);
-
-		reg->threshold_pages =
-			((va_pages * ir_threshold) + (IR_THRESHOLD_STEPS / 2)) / IR_THRESHOLD_STEPS;
-	} else
-		reg->threshold_pages = 0;
-
-	if (*flags & BASE_MEM_GROW_ON_GPF) {
-		/* kbase_check_alloc_sizes() already checks extension is valid for
-		 * assigning to reg->extension
+		/* kbase_check_alloc_sizes() already checks extension is valid for assigning to
+		 * reg->extension.
 		 */
 		reg->extension = extension;
 #if !MALI_USE_CSF
@@ -433,7 +428,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
 	}
 	reg->initial_commit = commit_pages;
 
-	kbase_gpu_vm_lock(kctx);
+	kbase_gpu_vm_lock_with_pmode_sync(kctx);
 
 	if (reg->flags & KBASE_REG_PERMANENT_KERNEL_MAPPING) {
 		/* Permanent kernel mappings must happen as soon as
@@ -443,7 +438,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
 		 */
 		int err = kbase_phy_alloc_mapping_init(kctx, reg, va_pages, commit_pages);
 		if (err < 0) {
-			kbase_gpu_vm_unlock(kctx);
+			kbase_gpu_vm_unlock_with_pmode_sync(kctx);
 			goto no_kern_mapping;
 		}
 	}
@@ -455,7 +450,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
 		/* Bind to a cookie */
 		if (bitmap_empty(kctx->cookies, BITS_PER_LONG)) {
 			dev_err(dev, "No cookies available for allocation!");
-			kbase_gpu_vm_unlock(kctx);
+			kbase_gpu_vm_unlock_with_pmode_sync(kctx);
 			goto no_cookie;
 		}
 		/* return a cookie */
@@ -472,7 +467,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
 	} else /* we control the VA */ {
 		size_t align = 1;
 
-		if (kctx->kbdev->pagesize_2mb) {
+		if (kbase_is_large_pages_enabled()) {
 			/* If there's enough (> 33 bits) of GPU VA space, align to 2MB
 			* boundaries. The similar condition is used for mapping from
 			* the SAME_VA zone inside kbase_context_get_unmapped_area().
@@ -490,7 +485,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
 		}
 		if (kbase_gpu_mmap(kctx, reg, *gpu_va, va_pages, align, mmu_sync_info) != 0) {
 			dev_warn(dev, "Failed to map memory on GPU");
-			kbase_gpu_vm_unlock(kctx);
+			kbase_gpu_vm_unlock_with_pmode_sync(kctx);
 			goto no_mmap;
 		}
 		/* return real GPU VA */
@@ -508,7 +503,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
 	}
 #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 
-	kbase_gpu_vm_unlock(kctx);
+	kbase_gpu_vm_unlock_with_pmode_sync(kctx);
 
 #if MALI_USE_CSF
 	if (*flags & BASE_MEM_FIXABLE)
@@ -596,9 +591,11 @@ int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, u64 query, u64 *co
 			*out |= BASE_MEM_COHERENT_SYSTEM;
 		if (KBASE_REG_SHARE_IN & reg->flags)
 			*out |= BASE_MEM_COHERENT_LOCAL;
-		if (KBASE_REG_DONT_NEED & reg->flags)
-			*out |= BASE_MEM_DONT_NEED;
-		if (mali_kbase_supports_mem_grow_on_gpf(kctx->api_version)) {
+		if (mali_kbase_supports_query_mem_dont_need(kctx->api_version)) {
+			if (KBASE_REG_DONT_NEED & reg->flags)
+				*out |= BASE_MEM_DONT_NEED;
+		}
+		if (mali_kbase_supports_query_mem_grow_on_gpf(kctx->api_version)) {
 			/* Prior to this version, this was known about by
 			 * user-side but we did not return them. Returning
 			 * it caused certain clients that were not expecting
@@ -608,7 +605,7 @@ int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, u64 query, u64 *co
 			if (KBASE_REG_PF_GROW & reg->flags)
 				*out |= BASE_MEM_GROW_ON_GPF;
 		}
-		if (mali_kbase_supports_mem_protected(kctx->api_version)) {
+		if (mali_kbase_supports_query_mem_protected(kctx->api_version)) {
 			/* Prior to this version, this was known about by
 			 * user-side but we did not return them. Returning
 			 * it caused certain clients that were not expecting
@@ -634,9 +631,30 @@ int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, u64 query, u64 *co
 			else
 				*out |= BASE_MEM_FIXABLE;
 		}
-#endif
+#endif /* MALI_USE_CSF */
 		if (KBASE_REG_GPU_VA_SAME_4GB_PAGE & reg->flags)
 			*out |= BASE_MEM_GPU_VA_SAME_4GB_PAGE;
+		if (mali_kbase_supports_query_mem_import_sync_on_map_unmap(kctx->api_version)) {
+			if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
+				if (reg->gpu_alloc->imported.umm.need_sync)
+					*out |= BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP;
+			}
+		}
+		if (mali_kbase_supports_query_mem_kernel_sync(kctx->api_version)) {
+			if (unlikely(reg->cpu_alloc != reg->gpu_alloc))
+				*out |= BASE_MEM_KERNEL_SYNC;
+		}
+		if (mali_kbase_supports_query_mem_same_va(kctx->api_version)) {
+			if (kbase_bits_to_zone(reg->flags) == SAME_VA_ZONE) {
+				/* Imported memory is an edge case, where declaring it SAME_VA
+				 * would be ambiguous.
+				 */
+				if (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM &&
+				    reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_USER_BUF) {
+					*out |= BASE_MEM_SAME_VA;
+				}
+			}
+		}
 
 		*out |= kbase_mem_group_id_set(reg->cpu_alloc->group_id);
 
@@ -667,7 +685,9 @@ out_unlock:
 static unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s,
 							       struct shrink_control *sc)
 {
-	struct kbase_context *kctx = container_of(s, struct kbase_context, reclaim);
+	struct kbase_context *kctx =
+		KBASE_GET_KBASE_DATA_FROM_SHRINKER(s, struct kbase_context, reclaim);
+
 	int evict_nents = atomic_read(&kctx->evict_nents);
 	unsigned long nr_freeable_items;
 
@@ -717,8 +737,15 @@ static unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s
 	struct kbase_mem_phy_alloc *tmp;
 	unsigned long freed = 0;
 
-	kctx = container_of(s, struct kbase_context, reclaim);
+	kctx = KBASE_GET_KBASE_DATA_FROM_SHRINKER(s, struct kbase_context, reclaim);
 
+#if MALI_USE_CSF
+	if (!down_read_trylock(&kctx->kbdev->csf.mmu_sync_sem)) {
+		dev_warn(kctx->kbdev->dev,
+			 "Can't shrink GPU memory when P.Mode entrance is in progress");
+		return 0;
+	}
+#endif
 	mutex_lock(&kctx->jit_evict_lock);
 
 	list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) {
@@ -757,32 +784,36 @@ static unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s
 	}
 
 	mutex_unlock(&kctx->jit_evict_lock);
-
+#if MALI_USE_CSF
+	up_read(&kctx->kbdev->csf.mmu_sync_sem);
+#endif
 	return freed;
 }
 
 int kbase_mem_evictable_init(struct kbase_context *kctx)
 {
+	struct shrinker *reclaim;
+
 	INIT_LIST_HEAD(&kctx->evict_list);
 	mutex_init(&kctx->jit_evict_lock);
 
-	kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects;
-	kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects;
-	kctx->reclaim.seeks = DEFAULT_SEEKS;
-	/* Kernel versions prior to 3.1 :
-	 * struct shrinker does not define batch
-	 */
-#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE
-	register_shrinker(&kctx->reclaim);
-#else
-	register_shrinker(&kctx->reclaim, "mali-mem");
-#endif
+	reclaim = KBASE_INIT_RECLAIM(kctx, reclaim, "mali-mem");
+	if (!reclaim)
+		return -ENOMEM;
+	KBASE_SET_RECLAIM(kctx, reclaim, reclaim);
+
+	reclaim->count_objects = kbase_mem_evictable_reclaim_count_objects;
+	reclaim->scan_objects = kbase_mem_evictable_reclaim_scan_objects;
+	reclaim->seeks = DEFAULT_SEEKS;
+
+	KBASE_REGISTER_SHRINKER(reclaim, "mali-mem", kctx);
+
 	return 0;
 }
 
 void kbase_mem_evictable_deinit(struct kbase_context *kctx)
 {
-	unregister_shrinker(&kctx->reclaim);
+	KBASE_UNREGISTER_SHRINKER(kctx->reclaim);
 }
 
 /**
@@ -849,7 +880,7 @@ void kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc)
 	/* Indicate to page migration that the memory can be reclaimed by the shrinker.
 	 */
 	if (kbase_is_page_migration_enabled())
-		kbase_set_phy_alloc_page_status(gpu_alloc, NOT_MOVABLE);
+		kbase_set_phy_alloc_page_status(kctx, gpu_alloc, NOT_MOVABLE);
 
 	mutex_unlock(&kctx->jit_evict_lock);
 	kbase_mem_evictable_mark_reclaim(gpu_alloc);
@@ -907,7 +938,7 @@ bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc)
 			 * from.
 			 */
 			if (kbase_is_page_migration_enabled())
-				kbase_set_phy_alloc_page_status(gpu_alloc, ALLOCATED_MAPPED);
+				kbase_set_phy_alloc_page_status(kctx, gpu_alloc, ALLOCATED_MAPPED);
 		}
 	}
 
@@ -925,7 +956,8 @@ bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc)
  *
  * Return: 0 on success, error code otherwise.
  */
-static int kbase_mem_flags_change_imported_umm(struct kbase_context *kctx, unsigned int flags,
+static int kbase_mem_flags_change_imported_umm(struct kbase_context *kctx,
+					       base_mem_alloc_flags flags,
 					       struct kbase_va_region *reg)
 {
 	unsigned int real_flags = 0;
@@ -1008,7 +1040,7 @@ static int kbase_mem_flags_change_imported_umm(struct kbase_context *kctx, unsig
  *
  * Return: 0 on success, error code otherwise.
  */
-static int kbase_mem_flags_change_native(struct kbase_context *kctx, unsigned int flags,
+static int kbase_mem_flags_change_native(struct kbase_context *kctx, base_mem_alloc_flags flags,
 					 struct kbase_va_region *reg)
 {
 	bool kbase_reg_dont_need_flag = (KBASE_REG_DONT_NEED & reg->flags);
@@ -1040,8 +1072,8 @@ static int kbase_mem_flags_change_native(struct kbase_context *kctx, unsigned in
 	return ret;
 }
 
-int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags,
-			   unsigned int mask)
+int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, base_mem_alloc_flags flags,
+			   base_mem_alloc_flags mask)
 {
 	struct kbase_va_region *reg;
 	int ret = -EINVAL;
@@ -1058,7 +1090,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in
 
 	/* Lock down the context, and find the region */
 	down_write(kbase_mem_get_process_mmap_lock());
-	kbase_gpu_vm_lock(kctx);
+	kbase_gpu_vm_lock_with_pmode_sync(kctx);
 
 	/* Validate the region */
 	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
@@ -1110,7 +1142,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in
 	}
 
 out_unlock:
-	kbase_gpu_vm_unlock(kctx);
+	kbase_gpu_vm_unlock_with_pmode_sync(kctx);
 	up_write(kbase_mem_get_process_mmap_lock());
 
 	return ret;
@@ -1118,8 +1150,8 @@ out_unlock:
 
 #define KBASE_MEM_IMPORT_HAVE_PAGES (1UL << BASE_MEM_FLAGS_NR_BITS)
 
-int kbase_mem_do_sync_imported(struct kbase_context *kctx, struct kbase_va_region *reg,
-			       enum kbase_sync_type sync_fn)
+int kbase_sync_imported_umm(struct kbase_context *kctx, struct kbase_va_region *reg,
+			    enum kbase_sync_type sync_fn)
 {
 	int ret = -EINVAL;
 	struct dma_buf __maybe_unused *dma_buf;
@@ -1317,7 +1349,7 @@ int kbase_mem_umm_map(struct kbase_context *kctx, struct kbase_va_region *reg)
 		if (IS_ENABLED(CONFIG_MALI_DMA_BUF_LEGACY_COMPAT) ||
 		    alloc->imported.umm.need_sync) {
 			if (!kbase_is_region_invalid_or_free(reg)) {
-				err = kbase_mem_do_sync_imported(kctx, reg, KBASE_SYNC_TO_DEVICE);
+				err = kbase_sync_imported_umm(kctx, reg, KBASE_SYNC_TO_DEVICE);
 				WARN_ON_ONCE(err);
 			}
 		}
@@ -1379,7 +1411,7 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx, struct kbase_va_region *reg
 		if (IS_ENABLED(CONFIG_MALI_DMA_BUF_LEGACY_COMPAT) ||
 		    alloc->imported.umm.need_sync) {
 			if (!kbase_is_region_invalid_or_free(reg)) {
-				int err = kbase_mem_do_sync_imported(kctx, reg, KBASE_SYNC_TO_CPU);
+				int err = kbase_sync_imported_umm(kctx, reg, KBASE_SYNC_TO_CPU);
 				WARN_ON_ONCE(err);
 			}
 		}
@@ -1431,7 +1463,7 @@ static int get_umm_memory_group_id(struct kbase_context *kctx, struct dma_buf *d
  * object that wraps the dma-buf.
  */
 static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, int fd, u64 *va_pages,
-						  u64 *flags, u32 padding)
+						  base_mem_alloc_flags *flags, u32 padding)
 {
 	struct kbase_va_region *reg;
 	struct dma_buf *dma_buf;
@@ -1577,7 +1609,8 @@ u32 kbase_get_cache_line_alignment(struct kbase_device *kbdev)
 
 static struct kbase_va_region *kbase_mem_from_user_buffer(struct kbase_context *kctx,
 							  unsigned long address, unsigned long size,
-							  u64 *va_pages, u64 *flags)
+							  u64 *va_pages,
+							  base_mem_alloc_flags *flags)
 {
 	struct kbase_va_region *reg;
 	enum kbase_memory_zone zone = CUSTOM_VA_ZONE;
@@ -1709,7 +1742,7 @@ bad_size:
 	return NULL;
 }
 
-u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents,
+u64 kbase_mem_alias(struct kbase_context *kctx, base_mem_alloc_flags *flags, u64 stride, u64 nents,
 		    struct base_mem_aliasing_info *ai, u64 *num_pages)
 {
 	struct kbase_va_region *reg;
@@ -1794,7 +1827,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nent
 	if (!reg->gpu_alloc->imported.alias.aliased)
 		goto no_aliased_array;
 
-	kbase_gpu_vm_lock(kctx);
+	kbase_gpu_vm_lock_with_pmode_sync(kctx);
 
 	/* validate and add src handles */
 	for (i = 0; i < nents; i++) {
@@ -1904,7 +1937,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nent
 	reg->flags &= ~KBASE_REG_FREE;
 	reg->flags &= ~KBASE_REG_GROWABLE;
 
-	kbase_gpu_vm_unlock(kctx);
+	kbase_gpu_vm_unlock_with_pmode_sync(kctx);
 
 	return gpu_va;
 
@@ -1915,7 +1948,7 @@ bad_handle:
 	 * them is handled by putting reg's allocs, so no rollback of those
 	 * actions is done here.
 	 */
-	kbase_gpu_vm_unlock(kctx);
+	kbase_gpu_vm_unlock_with_pmode_sync(kctx);
 no_aliased_array:
 invalid_flags:
 	kbase_mem_phy_alloc_put(reg->cpu_alloc);
@@ -1931,7 +1964,8 @@ bad_flags:
 }
 
 int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
-		     void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages, u64 *flags)
+		     void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages,
+		     base_mem_alloc_flags *flags)
 {
 	struct kbase_va_region *reg;
 
@@ -2016,7 +2050,7 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
 	if (!reg)
 		goto no_reg;
 
-	kbase_gpu_vm_lock(kctx);
+	kbase_gpu_vm_lock_with_pmode_sync(kctx);
 
 	/* mmap needed to setup VA? */
 	if (*flags & (BASE_MEM_SAME_VA | BASE_MEM_NEED_MMAP)) {
@@ -2050,13 +2084,13 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
 	/* clear out private flags */
 	*flags &= ((1UL << BASE_MEM_FLAGS_NR_BITS) - 1);
 
-	kbase_gpu_vm_unlock(kctx);
+	kbase_gpu_vm_unlock_with_pmode_sync(kctx);
 
 	return 0;
 
 no_gpu_va:
 no_cookie:
-	kbase_gpu_vm_unlock(kctx);
+	kbase_gpu_vm_unlock_with_pmode_sync(kctx);
 	kbase_mem_phy_alloc_put(reg->cpu_alloc);
 	kbase_mem_phy_alloc_put(reg->gpu_alloc);
 	kfree(reg);
@@ -2096,7 +2130,7 @@ void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, struct kbase_va_re
 		/* Nothing to do */
 		return;
 
-	unmap_mapping_range(kctx->kfile->filp->f_inode->i_mapping,
+	unmap_mapping_range(kctx->filp->f_inode->i_mapping,
 			    (loff_t)(gpu_va_start + new_pages) << PAGE_SHIFT,
 			    (loff_t)(old_pages - new_pages) << PAGE_SHIFT, 1);
 }
@@ -2142,7 +2176,7 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages)
 	}
 
 	down_write(kbase_mem_get_process_mmap_lock());
-	kbase_gpu_vm_lock(kctx);
+	kbase_gpu_vm_lock_with_pmode_sync(kctx);
 
 	/* Validate the region */
 	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
@@ -2250,7 +2284,7 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages)
 	}
 
 out_unlock:
-	kbase_gpu_vm_unlock(kctx);
+	kbase_gpu_vm_unlock_with_pmode_sync(kctx);
 	if (read_locked)
 		up_read(kbase_mem_get_process_mmap_lock());
 	else
@@ -2274,11 +2308,16 @@ int kbase_mem_shrink(struct kbase_context *const kctx, struct kbase_va_region *c
 		return -EINVAL;
 
 	old_pages = kbase_reg_current_backed_size(reg);
-	if (WARN_ON(old_pages < new_pages))
+	if (old_pages < new_pages) {
+		dev_warn(
+			kctx->kbdev->dev,
+			"Requested number of pages (%llu) is larger than the current number of pages (%llu)",
+			new_pages, old_pages);
 		return -EINVAL;
+	}
 
 	delta = old_pages - new_pages;
-	if (kctx->kbdev->pagesize_2mb) {
+	if (kbase_is_large_pages_enabled()) {
 		struct tagged_addr *start_free = reg->gpu_alloc->pages + new_pages;
 
 		/* Move the end of new commited range to a valid location.
@@ -2332,7 +2371,7 @@ static void kbase_cpu_vm_close(struct vm_area_struct *vma)
 	KBASE_DEBUG_ASSERT(map->kctx);
 	KBASE_DEBUG_ASSERT(map->alloc);
 
-	kbase_gpu_vm_lock(map->kctx);
+	kbase_gpu_vm_lock_with_pmode_sync(map->kctx);
 
 	if (map->free_on_close) {
 		KBASE_DEBUG_ASSERT(kbase_bits_to_zone(map->region->flags) == SAME_VA_ZONE);
@@ -2346,10 +2385,9 @@ static void kbase_cpu_vm_close(struct vm_area_struct *vma)
 	list_del(&map->mappings_list);
 
 	kbase_va_region_alloc_put(map->kctx, map->region);
-	kbase_gpu_vm_unlock(map->kctx);
+	kbase_gpu_vm_unlock_with_pmode_sync(map->kctx);
 
 	kbase_mem_phy_alloc_put(map->alloc);
-	kbase_file_dec_cpu_mapping_count(map->kctx->kfile);
 	kfree(map);
 }
 
@@ -2549,7 +2587,6 @@ static int kbase_cpu_mmap(struct kbase_context *kctx, struct kbase_va_region *re
 		map->alloc->properties |= KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
 
 	list_add(&map->mappings_list, &map->alloc->mappings);
-	kbase_file_inc_cpu_mapping_count(kctx->kfile);
 
 out:
 	return err;
@@ -2749,7 +2786,7 @@ int kbase_context_mmap(struct kbase_context *const kctx, struct vm_area_struct *
 		goto out;
 	}
 
-	kbase_gpu_vm_lock(kctx);
+	kbase_gpu_vm_lock_with_pmode_sync(kctx);
 
 	if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MAP_TRACKING_HANDLE)) {
 		/* The non-mapped tracking helper page */
@@ -2784,11 +2821,11 @@ int kbase_context_mmap(struct kbase_context *const kctx, struct vm_area_struct *
 #endif /* defined(CONFIG_MALI_VECTOR_DUMP) */
 #if MALI_USE_CSF
 	case PFN_DOWN(BASEP_MEM_CSF_USER_REG_PAGE_HANDLE):
-		kbase_gpu_vm_unlock(kctx);
+		kbase_gpu_vm_unlock_with_pmode_sync(kctx);
 		err = kbase_csf_cpu_mmap_user_reg_page(kctx, vma);
 		goto out;
 	case PFN_DOWN(BASEP_MEM_CSF_USER_IO_PAGES_HANDLE)... PFN_DOWN(BASE_MEM_COOKIE_BASE) - 1: {
-		kbase_gpu_vm_unlock(kctx);
+		kbase_gpu_vm_unlock_with_pmode_sync(kctx);
 		mutex_lock(&kctx->csf.lock);
 		err = kbase_csf_cpu_mmap_user_io_pages(kctx, vma);
 		mutex_unlock(&kctx->csf.lock);
@@ -2882,7 +2919,7 @@ int kbase_context_mmap(struct kbase_context *const kctx, struct vm_area_struct *
 	}
 #endif /* defined(CONFIG_MALI_VECTOR_DUMP) */
 out_unlock:
-	kbase_gpu_vm_unlock(kctx);
+	kbase_gpu_vm_unlock_with_pmode_sync(kctx);
 out:
 	if (err)
 		dev_err(dev, "mmap failed %d\n", err);
@@ -3067,7 +3104,7 @@ static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_regi
 		return -ENOMEM;
 	}
 
-	if (reg->flags & KBASE_REG_DONT_NEED)
+	if (kbase_is_region_shrinkable(reg))
 		return -EINVAL;
 
 	prot = PAGE_KERNEL;
@@ -3276,25 +3313,6 @@ void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages)
 #endif
 }
 
-static void kbase_special_vm_open(struct vm_area_struct *vma)
-{
-	struct kbase_context *kctx = vma->vm_private_data;
-
-	kbase_file_inc_cpu_mapping_count(kctx->kfile);
-}
-
-static void kbase_special_vm_close(struct vm_area_struct *vma)
-{
-	struct kbase_context *kctx = vma->vm_private_data;
-
-	kbase_file_dec_cpu_mapping_count(kctx->kfile);
-}
-
-static const struct vm_operations_struct kbase_vm_special_ops = {
-	.open = kbase_special_vm_open,
-	.close = kbase_special_vm_close,
-};
-
 static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma)
 {
 	if (vma_pages(vma) != 1)
@@ -3303,10 +3321,7 @@ static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_
 	/* no real access */
 	vm_flags_clear(vma, VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
 	vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO);
-	vma->vm_ops = &kbase_vm_special_ops;
-	vma->vm_private_data = kctx;
 
-	kbase_file_inc_cpu_mapping_count(kctx->kfile);
 	return 0;
 }
 
@@ -3367,7 +3382,6 @@ static void kbase_csf_user_io_pages_vm_close(struct vm_area_struct *vma)
 	struct kbase_device *kbdev;
 	int err;
 	bool reset_prevented = false;
-	struct kbase_file *kfile;
 
 	if (!queue) {
 		pr_debug("Close method called for the new User IO pages mapping vma\n");
@@ -3376,7 +3390,6 @@ static void kbase_csf_user_io_pages_vm_close(struct vm_area_struct *vma)
 
 	kctx = queue->kctx;
 	kbdev = kctx->kbdev;
-	kfile = kctx->kfile;
 
 	err = kbase_reset_gpu_prevent_and_wait(kbdev);
 	if (err)
@@ -3394,9 +3407,8 @@ static void kbase_csf_user_io_pages_vm_close(struct vm_area_struct *vma)
 	if (reset_prevented)
 		kbase_reset_gpu_allow(kbdev);
 
-	kbase_file_dec_cpu_mapping_count(kfile);
 	/* Now as the vma is closed, drop the reference on mali device file */
-	fput(kfile->filp);
+	fput(kctx->filp);
 }
 
 #if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
@@ -3546,7 +3558,6 @@ static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx, struct v
 	/* Also adjust the vm_pgoff */
 	vma->vm_pgoff = queue->db_file_offset;
 
-	kbase_file_inc_cpu_mapping_count(kctx->kfile);
 	return 0;
 
 map_failed:
@@ -3586,7 +3597,6 @@ static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma)
 {
 	struct kbase_context *kctx = vma->vm_private_data;
 	struct kbase_device *kbdev;
-	struct kbase_file *kfile;
 
 	if (unlikely(!kctx)) {
 		pr_debug("Close function called for the unexpected mapping");
@@ -3594,7 +3604,6 @@ static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma)
 	}
 
 	kbdev = kctx->kbdev;
-	kfile = kctx->kfile;
 
 	if (unlikely(!kctx->csf.user_reg.vma))
 		dev_warn(kbdev->dev, "user_reg VMA pointer unexpectedly NULL for ctx %d_%d",
@@ -3606,9 +3615,8 @@ static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma)
 
 	kctx->csf.user_reg.vma = NULL;
 
-	kbase_file_dec_cpu_mapping_count(kfile);
 	/* Now as the VMA is closed, drop the reference on mali device file */
-	fput(kfile->filp);
+	fput(kctx->filp);
 }
 
 /**
@@ -3738,7 +3746,6 @@ static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx, struct v
 	vma->vm_ops = &kbase_csf_user_reg_vm_ops;
 	vma->vm_private_data = kctx;
 
-	kbase_file_inc_cpu_mapping_count(kctx->kfile);
 	return 0;
 }
 
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h
index 28666037d8c6..a4b3db7fdf89 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -51,7 +51,7 @@ struct kbase_hwc_dma_mapping {
  * Return: 0 on success or error code
  */
 struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages,
-					u64 extension, u64 *flags, u64 *gpu_va,
+					u64 extension, base_mem_alloc_flags *flags, u64 *gpu_va,
 					enum kbase_caller_mmu_sync_info mmu_sync_info);
 
 /**
@@ -84,7 +84,8 @@ int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, u64 query, u64 *co
  * Return: 0 on success or error code
  */
 int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
-		     void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages, u64 *flags);
+		     void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages,
+		     base_mem_alloc_flags *flags);
 
 /**
  * kbase_mem_alias - Create a new allocation for GPU, aliasing one or more
@@ -99,7 +100,7 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
  *
  * Return: 0 on failure or otherwise the GPU VA for the alias
  */
-u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents,
+u64 kbase_mem_alias(struct kbase_context *kctx, base_mem_alloc_flags *flags, u64 stride, u64 nents,
 		    struct base_mem_aliasing_info *ai, u64 *num_pages);
 
 /**
@@ -112,8 +113,8 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nent
  *
  * Return: 0 on success or error code
  */
-int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags,
-			   unsigned int mask);
+int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, base_mem_alloc_flags flags,
+			   base_mem_alloc_flags mask);
 
 /**
  * kbase_mem_commit - Change the physical backing size of a region
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c
index 93a07e7db4fa..eecab323f59f 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2022-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -28,6 +28,9 @@
 #include <mali_kbase_mem_migrate.h>
 #include <mmu/mali_kbase_mmu.h>
 
+/* Static key used to determine if page migration is enabled or not */
+static DEFINE_STATIC_KEY_FALSE(page_migration_static_key);
+
 /* Global integer used to determine if module parameter value has been
  * provided and if page migration feature is enabled.
  * Feature is disabled on all platforms by default.
@@ -50,15 +53,6 @@ MODULE_PARM_DESC(kbase_page_migration_enabled,
 
 KBASE_EXPORT_TEST_API(kbase_page_migration_enabled);
 
-bool kbase_is_page_migration_enabled(void)
-{
-	/* Handle uninitialised int case */
-	if (kbase_page_migration_enabled < 0)
-		return false;
-	return IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT) && kbase_page_migration_enabled;
-}
-KBASE_EXPORT_SYMBOL(kbase_is_page_migration_enabled);
-
 #if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
 static const struct movable_operations movable_ops;
 #endif
@@ -74,6 +68,12 @@ bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_a
 	if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
 		return false;
 
+	/* Composite large-page is excluded from migration, trigger a warn if a development
+	 * wrongly leads to it.
+	 */
+	if (is_huge_head(as_tagged(page_to_phys(p))) || is_partial(as_tagged(page_to_phys(p))))
+		dev_WARN(kbdev->dev, "%s: migration-metadata attempted on large-page.", __func__);
+
 	page_md = kzalloc(sizeof(struct kbase_page_metadata), GFP_KERNEL);
 	if (!page_md)
 		return false;
@@ -225,7 +225,7 @@ static int kbasep_migrate_page_pt_mapped(struct page *old_page, struct page *new
 	 * This blocks the CPU page fault handler from remapping pages.
 	 * Only MCU's mmut is device wide, i.e. no corresponding kctx.
 	 */
-	kbase_gpu_vm_lock(kctx);
+	kbase_gpu_vm_lock_with_pmode_sync(kctx);
 
 	ret = kbase_mmu_migrate_page(
 		as_tagged(page_to_phys(old_page)), as_tagged(page_to_phys(new_page)), old_dma_addr,
@@ -254,7 +254,7 @@ static int kbasep_migrate_page_pt_mapped(struct page *old_page, struct page *new
 		dma_unmap_page(kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
 
 	/* Page fault handler for CPU mapping unblocked. */
-	kbase_gpu_vm_unlock(kctx);
+	kbase_gpu_vm_unlock_with_pmode_sync(kctx);
 
 	return ret;
 }
@@ -293,10 +293,10 @@ static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct pa
 	/* Lock context to protect access to array of pages in physical allocation.
 	 * This blocks the CPU page fault handler from remapping pages.
 	 */
-	kbase_gpu_vm_lock(kctx);
+	kbase_gpu_vm_lock_with_pmode_sync(kctx);
 
 	/* Unmap the old physical range. */
-	unmap_mapping_range(kctx->kfile->filp->f_inode->i_mapping,
+	unmap_mapping_range(kctx->filp->f_inode->i_mapping,
 			    (loff_t)(page_md->data.mapped.vpfn / GPU_PAGES_PER_CPU_PAGE)
 				    << PAGE_SHIFT,
 			    PAGE_SIZE, 1);
@@ -332,7 +332,7 @@ static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct pa
 		dma_unmap_page(kctx->kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
 
 	/* Page fault handler for CPU mapping unblocked. */
-	kbase_gpu_vm_unlock(kctx);
+	kbase_gpu_vm_unlock_with_pmode_sync(kctx);
 
 	return ret;
 }
@@ -685,11 +685,15 @@ void kbase_mem_migrate_init(struct kbase_device *kbdev)
 	 * integer for a negative value to see if insmod parameter was
 	 * passed in at all (it will override the default negative value).
 	 */
-	if (kbase_page_migration_enabled < 0)
-		kbase_page_migration_enabled = kbdev->pagesize_2mb ? 1 : 0;
-	else
+	if (kbase_page_migration_enabled < 0) {
+		if (kbase_is_large_pages_enabled())
+			static_branch_inc(&page_migration_static_key);
+	} else {
 		dev_info(kbdev->dev, "Page migration support explicitly %s at insmod.",
 			 kbase_page_migration_enabled ? "enabled" : "disabled");
+		if (kbase_page_migration_enabled)
+			static_branch_inc(&page_migration_static_key);
+	}
 
 	spin_lock_init(&mem_migrate->free_pages_lock);
 	INIT_LIST_HEAD(&mem_migrate->free_pages_list);
@@ -714,3 +718,9 @@ void kbase_mem_migrate_term(struct kbase_device *kbdev)
 	iput(mem_migrate->inode);
 #endif
 }
+
+bool kbase_is_page_migration_enabled(void)
+{
+	return static_branch_unlikely(&page_migration_static_key);
+}
+KBASE_EXPORT_TEST_API(kbase_is_page_migration_enabled);
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h
index ece8734de792..70c3135a7829 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2022-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c
index cb862d5b029c..5984730c337c 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c
@@ -480,7 +480,7 @@ static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s,
 
 	CSTD_UNUSED(sc);
 
-	pool = container_of(s, struct kbase_mem_pool, reclaim);
+	pool = KBASE_GET_KBASE_DATA_FROM_SHRINKER(s, struct kbase_mem_pool, reclaim);
 
 	kbase_mem_pool_lock(pool);
 	if (pool->dont_reclaim && !pool->dying) {
@@ -502,7 +502,7 @@ static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s,
 	struct kbase_mem_pool *pool;
 	unsigned long freed;
 
-	pool = container_of(s, struct kbase_mem_pool, reclaim);
+	pool = KBASE_GET_KBASE_DATA_FROM_SHRINKER(s, struct kbase_mem_pool, reclaim);
 
 	kbase_mem_pool_lock(pool);
 	if (pool->dont_reclaim && !pool->dying) {
@@ -528,6 +528,8 @@ int kbase_mem_pool_init(struct kbase_mem_pool *pool, const struct kbase_mem_pool
 			unsigned int order, int group_id, struct kbase_device *kbdev,
 			struct kbase_mem_pool *next_pool)
 {
+	struct shrinker *reclaim;
+
 	if (WARN_ON(group_id < 0) || WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) {
 		return -EINVAL;
 	}
@@ -544,18 +546,17 @@ int kbase_mem_pool_init(struct kbase_mem_pool *pool, const struct kbase_mem_pool
 	spin_lock_init(&pool->pool_lock);
 	INIT_LIST_HEAD(&pool->page_list);
 
-	pool->reclaim.count_objects = kbase_mem_pool_reclaim_count_objects;
-	pool->reclaim.scan_objects = kbase_mem_pool_reclaim_scan_objects;
-	pool->reclaim.seeks = DEFAULT_SEEKS;
-	/* Kernel versions prior to 3.1 :
-	 * struct shrinker does not define batch
-	 */
-	pool->reclaim.batch = 0;
-#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE
-	register_shrinker(&pool->reclaim);
-#else
-	register_shrinker(&pool->reclaim, "mali-mem-pool");
-#endif
+	reclaim = KBASE_INIT_RECLAIM(pool, reclaim, "mali-mem-pool");
+	if (!reclaim)
+		return -ENOMEM;
+	KBASE_SET_RECLAIM(pool, reclaim, reclaim);
+
+	reclaim->count_objects = kbase_mem_pool_reclaim_count_objects;
+	reclaim->scan_objects = kbase_mem_pool_reclaim_scan_objects;
+	reclaim->seeks = DEFAULT_SEEKS;
+	reclaim->batch = 0;
+
+	KBASE_REGISTER_SHRINKER(reclaim, "mali-mem-pool", pool);
 
 	pool_dbg(pool, "initialized\n");
 
@@ -581,7 +582,7 @@ void kbase_mem_pool_term(struct kbase_mem_pool *pool)
 
 	pool_dbg(pool, "terminate()\n");
 
-	unregister_shrinker(&pool->reclaim);
+	KBASE_UNREGISTER_SHRINKER(pool->reclaim);
 
 	kbase_mem_pool_lock(pool);
 	pool->max_size = 0;
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.c b/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.c
index 5e3d1eeb6d28..f9a3788a2ecf 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -121,44 +121,24 @@ static vm_fault_t kbase_native_mgm_vmf_insert_pfn_prot(struct memory_group_manag
 	return vmf_insert_pfn_prot(vma, addr, pfn, pgprot);
 }
 
-/**
- * kbase_native_mgm_update_gpu_pte - Native method to modify a GPU page table
- *                                   entry
- *
- * @mgm_dev:   The memory group manager the request is being made through.
- * @group_id:  A physical memory group ID, which must be valid but is not used.
- *             Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1.
- * @mmu_level: The level of the MMU page table where the page is getting mapped.
- * @pte:       The prepared page table entry.
- *
- * This function simply returns the @pte without modification.
- *
- * Return: A GPU page table entry to be stored in a page table.
- */
 static u64 kbase_native_mgm_update_gpu_pte(struct memory_group_manager_device *mgm_dev,
-					   unsigned int group_id, int mmu_level, u64 pte)
+					   unsigned int group_id, unsigned int pbha_id,
+					   unsigned int pte_flags, int mmu_level, u64 pte)
 {
-	CSTD_UNUSED(mgm_dev);
-	CSTD_UNUSED(group_id);
-	CSTD_UNUSED(mmu_level);
+	if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS))
+		return pte;
+
+	if ((pte_flags & BIT(MMA_VIOLATION)) && pbha_id) {
+		pr_warn_once("MMA violation! Applying PBHA override workaround to PTE\n");
+		pte |= ((u64)pbha_id << PTE_PBHA_SHIFT) & PTE_PBHA_MASK;
+	}
+
+	/* Address could be translated into a different bus address here */
+	pte |= ((u64)1 << PTE_RES_BIT_MULTI_AS_SHIFT);
 
 	return pte;
 }
 
-/**
- * kbase_native_mgm_pte_to_original_pte - Native method to undo changes done in
- *                                        kbase_native_mgm_update_gpu_pte()
- *
- * @mgm_dev:   The memory group manager the request is being made through.
- * @group_id:  A physical memory group ID, which must be valid but is not used.
- *             Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1.
- * @mmu_level: The level of the MMU page table where the page is getting mapped.
- * @pte:       The prepared page table entry.
- *
- * This function simply returns the @pte without modification.
- *
- * Return: A GPU page table entry to be stored in a page table.
- */
 static u64 kbase_native_mgm_pte_to_original_pte(struct memory_group_manager_device *mgm_dev,
 						unsigned int group_id, int mmu_level, u64 pte)
 {
@@ -166,17 +146,32 @@ static u64 kbase_native_mgm_pte_to_original_pte(struct memory_group_manager_devi
 	CSTD_UNUSED(group_id);
 	CSTD_UNUSED(mmu_level);
 
+	/* Undo the group ID modification */
+	pte &= ~PTE_PBHA_MASK;
+	/* Undo the bit set */
+	pte &= ~((u64)1 << PTE_RES_BIT_MULTI_AS_SHIFT);
+
 	return pte;
 }
 
+static bool kbase_native_mgm_get_import_memory_cached_access_permitted(
+	struct memory_group_manager_device *mgm_dev,
+	struct memory_group_manager_import_data *import_data)
+{
+	CSTD_UNUSED(mgm_dev);
+	CSTD_UNUSED(import_data);
+
+	return true;
+}
+
 struct memory_group_manager_device kbase_native_mgm_dev = {
-	.ops = {
-		.mgm_alloc_page = kbase_native_mgm_alloc,
-		.mgm_free_page = kbase_native_mgm_free,
-		.mgm_get_import_memory_id = NULL,
-		.mgm_vmf_insert_pfn_prot = kbase_native_mgm_vmf_insert_pfn_prot,
-		.mgm_update_gpu_pte = kbase_native_mgm_update_gpu_pte,
-		.mgm_pte_to_original_pte = kbase_native_mgm_pte_to_original_pte,
-	},
+	.ops = { .mgm_alloc_page = kbase_native_mgm_alloc,
+		 .mgm_free_page = kbase_native_mgm_free,
+		 .mgm_get_import_memory_id = NULL,
+		 .mgm_vmf_insert_pfn_prot = kbase_native_mgm_vmf_insert_pfn_prot,
+		 .mgm_update_gpu_pte = kbase_native_mgm_update_gpu_pte,
+		 .mgm_pte_to_original_pte = kbase_native_mgm_pte_to_original_pte,
+		 .mgm_get_import_memory_cached_access_permitted =
+			 kbase_native_mgm_get_import_memory_cached_access_permitted },
 	.data = NULL
 };
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pbha.c b/drivers/gpu/arm/bifrost/mali_kbase_pbha.c
index 341ea901e2e1..ea79811ea293 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_pbha.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_pbha.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -220,6 +220,24 @@ void kbase_pbha_write_settings(struct kbase_device *kbdev)
 		for (i = 0; i < GPU_SYSC_ALLOC_COUNT; ++i)
 			kbase_reg_write32(kbdev, GPU_SYSC_ALLOC_OFFSET(i), kbdev->sysc_alloc[i]);
 	}
+
+	if (kbdev->mma_wa_id) {
+		/* PBHA OVERRIDE register index (0-3) */
+		uint reg_index = kbdev->mma_wa_id >> 2;
+		/* PBHA index within a PBHA OVERRIDE register (0-3) */
+		uint pbha_index = kbdev->mma_wa_id & 0x3;
+		/* 4 bits of read attributes + 4 bits of write attributes for each PBHA */
+		uint pbha_shift = pbha_index * 8;
+		/* Noncacheable read = noncacheable write = b0001*/
+		uint pbha_override_rw_noncacheable = 0x01 | 0x10;
+
+		u32 pbha_override_val =
+			kbase_reg_read32(kbdev, GPU_SYSC_PBHA_OVERRIDE_OFFSET(reg_index));
+		pbha_override_val &= ~((u32)0xFF << pbha_shift);
+		pbha_override_val |= ((u32)pbha_override_rw_noncacheable << pbha_shift);
+		kbase_reg_write32(kbdev, GPU_SYSC_PBHA_OVERRIDE_OFFSET(reg_index),
+				  pbha_override_val);
+	}
 #else
 	CSTD_UNUSED(kbdev);
 #endif /* MALI_USE_CSF */
@@ -277,16 +295,16 @@ static int kbase_pbha_read_int_id_override_property(struct kbase_device *kbdev,
 static int kbase_pbha_read_propagate_bits_property(struct kbase_device *kbdev,
 						   const struct device_node *pbha_node)
 {
-	u32 bits = 0;
+	u8 bits = 0;
 	int err;
 
-	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PBHA_HWU))
+	if (!kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_PBHA_HWU))
 		return 0;
 
-	err = of_property_read_u32(pbha_node, "propagate-bits", &bits);
+	err = of_property_read_u8(pbha_node, "propagate-bits", &bits);
 
 	if (err == -EINVAL) {
-		err = of_property_read_u32(pbha_node, "propagate_bits", &bits);
+		err = of_property_read_u8(pbha_node, "propagate_bits", &bits);
 	}
 
 	if (err < 0) {
@@ -310,6 +328,43 @@ static int kbase_pbha_read_propagate_bits_property(struct kbase_device *kbdev,
 	kbdev->pbha_propagate_bits = bits;
 	return 0;
 }
+
+static int kbase_pbha_read_mma_wa_id_property(struct kbase_device *kbdev,
+					      const struct device_node *pbha_node)
+{
+	u32 mma_wa_id = 0;
+	int err;
+
+	/* Skip if kbdev->mma_wa_id has already been set via the module parameter */
+	if ((kbdev->gpu_props.gpu_id.arch_id < GPU_ID_ARCH_MAKE(14, 8, 0)) || kbdev->mma_wa_id != 0)
+		return 0;
+
+	err = of_property_read_u32(pbha_node, "mma-wa-id", &mma_wa_id);
+
+	/* Property does not exist. This is not a mandatory property, ignore this error */
+	if (err == -EINVAL)
+		return 0;
+
+	if (err == -ENODATA) {
+		dev_err(kbdev->dev, "DTB property mma-wa-id has no value\n");
+		return err;
+	}
+
+	if (err == -EOVERFLOW) {
+		dev_err(kbdev->dev, "DTB value for mma-wa-id is out of range\n");
+		return err;
+	}
+
+	if (mma_wa_id == 0 || mma_wa_id > 15) {
+		dev_err(kbdev->dev,
+			"Invalid DTB value for mma-wa-id: %u. Valid range is between 1 and 15.\n",
+			mma_wa_id);
+		return -EINVAL;
+	}
+
+	kbdev->mma_wa_id = mma_wa_id;
+	return 0;
+}
 #endif /* MALI_USE_CSF */
 
 int kbase_pbha_read_dtb(struct kbase_device *kbdev)
@@ -331,6 +386,12 @@ int kbase_pbha_read_dtb(struct kbase_device *kbdev)
 		return err;
 
 	err = kbase_pbha_read_propagate_bits_property(kbdev, pbha_node);
+
+	if (err < 0)
+		return err;
+
+	err = kbase_pbha_read_mma_wa_id_property(kbdev, pbha_node);
+
 	return err;
 #else
 	return 0;
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pbha_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_pbha_debugfs.c
index f1d2794dd86a..81f2df5ea977 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_pbha_debugfs.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_pbha_debugfs.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -50,8 +50,8 @@ static int int_id_overrides_show(struct seq_file *sfile, void *data)
 #endif /* MALI_USE_CSF */
 
 		for (j = 0; j < sizeof(u32); ++j) {
-			u8 r_val;
-			u8 w_val;
+			u8 r_val = 0;
+			u8 w_val = 0;
 
 			switch (j) {
 			case 0:
@@ -234,7 +234,7 @@ void kbase_pbha_debugfs_init(struct kbase_device *kbdev)
 		debugfs_create_file("int_id_overrides", mode, debugfs_pbha_dir, kbdev,
 				    &pbha_int_id_overrides_fops);
 #if MALI_USE_CSF
-		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PBHA_HWU))
+		if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_PBHA_HWU))
 			debugfs_create_file("propagate_bits", mode, debugfs_pbha_dir, kbdev,
 					    &pbha_propagate_bits_fops);
 #endif /* MALI_USE_CSF */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pm.c b/drivers/gpu/arm/bifrost/mali_kbase_pm.c
index ff71524eeaaa..6719a120c1f3 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_pm.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_pm.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -31,9 +31,7 @@
 #include <mali_kbase_pm.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
 
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
 #include <arbiter/mali_kbase_arbiter_pm.h>
-#endif /* CONFIG_MALI_ARBITER_SUPPORT */
 
 #include <backend/gpu/mali_kbase_clk_rate_trace_mgr.h>
 
@@ -52,22 +50,21 @@ void kbase_pm_context_active(struct kbase_device *kbdev)
 	(void)kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE);
 }
 
-int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev,
-					   enum kbase_pm_suspend_handler suspend_handler)
+int kbase_pm_context_active_handle_suspend_locked(struct kbase_device *kbdev,
+						  enum kbase_pm_suspend_handler suspend_handler)
 {
 	int c;
 
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
 	dev_dbg(kbdev->dev, "%s - reason = %d, pid = %d\n", __func__, suspend_handler,
 		current->pid);
-	kbase_pm_lock(kbdev);
+	lockdep_assert_held(&kbdev->pm.lock);
 
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
-	if (kbase_arbiter_pm_ctx_active_handle_suspend(kbdev, suspend_handler)) {
-		kbase_pm_unlock(kbdev);
+	/* If there is an Arbiter, wait for Arbiter to grant GPU back to KBase
+	 * so suspend request can be handled.
+	 */
+	if (kbase_arbiter_pm_ctx_active_handle_suspend(kbdev, suspend_handler))
 		return 1;
-	}
-#endif /* CONFIG_MALI_ARBITER_SUPPORT */
 
 	if (kbase_pm_is_suspending(kbdev)) {
 		switch (suspend_handler) {
@@ -76,7 +73,6 @@ int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev,
 				break;
 			fallthrough;
 		case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE:
-			kbase_pm_unlock(kbdev);
 			return 1;
 
 		case KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE:
@@ -94,27 +90,35 @@ int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev,
 		 * any cores requested by the policy
 		 */
 		kbase_hwaccess_pm_gpu_active(kbdev);
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
 		kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_REF_EVENT);
-#endif /* CONFIG_MALI_ARBITER_SUPPORT */
 		kbase_clk_rate_trace_manager_gpu_active(kbdev);
 	}
 
-	kbase_pm_unlock(kbdev);
 	dev_dbg(kbdev->dev, "%s %d\n", __func__, kbdev->pm.active_count);
 
 	return 0;
 }
 
+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev,
+					   enum kbase_pm_suspend_handler suspend_handler)
+{
+	int ret;
+
+	kbase_pm_lock(kbdev);
+	ret = kbase_pm_context_active_handle_suspend_locked(kbdev, suspend_handler);
+	kbase_pm_unlock(kbdev);
+
+	return ret;
+}
+
 KBASE_EXPORT_TEST_API(kbase_pm_context_active);
 
-void kbase_pm_context_idle(struct kbase_device *kbdev)
+void kbase_pm_context_idle_locked(struct kbase_device *kbdev)
 {
 	int c;
 
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
-
-	kbase_pm_lock(kbdev);
+	lockdep_assert_held(&kbdev->pm.lock);
 
 	c = --kbdev->pm.active_count;
 	KBASE_KTRACE_ADD(kbdev, PM_CONTEXT_IDLE, NULL, (u64)c);
@@ -133,10 +137,16 @@ void kbase_pm_context_idle(struct kbase_device *kbdev)
 		wake_up(&kbdev->pm.zero_active_count_wait);
 	}
 
-	kbase_pm_unlock(kbdev);
 	dev_dbg(kbdev->dev, "%s %d (pid = %d)\n", __func__, kbdev->pm.active_count, current->pid);
 }
 
+void kbase_pm_context_idle(struct kbase_device *kbdev)
+{
+	kbase_pm_lock(kbdev);
+	kbase_pm_context_idle_locked(kbdev);
+	kbase_pm_unlock(kbdev);
+}
+
 KBASE_EXPORT_TEST_API(kbase_pm_context_idle);
 
 static void reenable_hwcnt_on_resume(struct kbase_device *kbdev)
@@ -155,7 +165,12 @@ static void reenable_hwcnt_on_resume(struct kbase_device *kbdev)
 #endif
 
 	/* Resume HW counters intermediaries. */
-	kbase_kinstr_prfcnt_resume(kbdev->kinstr_prfcnt_ctx);
+#if MALI_USE_CSF
+	if (kbdev->csf.firmware_inited)
+#endif
+	{
+		kbase_kinstr_prfcnt_resume(kbdev->kinstr_prfcnt_ctx);
+	}
 }
 
 static void resume_job_scheduling(struct kbase_device *kbdev)
@@ -183,7 +198,12 @@ int kbase_pm_driver_suspend(struct kbase_device *kbdev)
 	/* Suspend HW counter intermediaries. This blocks until workers and timers
 	 * are no longer running.
 	 */
-	kbase_kinstr_prfcnt_suspend(kbdev->kinstr_prfcnt_ctx);
+#if MALI_USE_CSF
+	if (kbdev->csf.firmware_inited)
+#endif
+	{
+		kbase_kinstr_prfcnt_suspend(kbdev->kinstr_prfcnt_ctx);
+	}
 
 	/* Disable GPU hardware counters.
 	 * This call will block until counters are disabled.
@@ -199,21 +219,24 @@ int kbase_pm_driver_suspend(struct kbase_device *kbdev)
 	kbdev->pm.suspending = true;
 	mutex_unlock(&kbdev->pm.lock);
 
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
-#if !MALI_USE_CSF
-	if (kbdev->arb.arb_if) {
-		unsigned int i;
+	if (kbase_has_arbiter(kbdev)) {
 		unsigned long flags;
 
+#if MALI_USE_CSF
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_disjoint_state_up(kbdev);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+#else
+		unsigned int i;
+
 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 		kbdev->js_data.runpool_irq.submit_allowed = 0;
 		kbase_disjoint_state_up(kbdev);
 		for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
 			kbase_job_slot_softstop(kbdev, i, NULL);
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+#endif
 	}
-#endif /* !MALI_USE_CSF */
-#endif /* CONFIG_MALI_ARBITER_SUPPORT */
 
 	/* From now on, the active count will drop towards zero. Sometimes,
 	 * it'll go up briefly before going down again. However, once
@@ -259,19 +282,21 @@ int kbase_pm_driver_suspend(struct kbase_device *kbdev)
 	 */
 	if (kbase_hwaccess_pm_suspend(kbdev)) {
 		/* No early return yet */
-		if (IS_ENABLED(CONFIG_MALI_ARBITER_SUPPORT))
+		if (kbase_has_arbiter(kbdev))
 			WARN_ON_ONCE(1);
 		else
 			goto exit;
 	}
 
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
-	if (kbdev->arb.arb_if) {
+	if (kbase_has_arbiter(kbdev)) {
 		mutex_lock(&kbdev->pm.arb_vm_state->vm_state_lock);
 		kbase_arbiter_pm_vm_stopped(kbdev);
 		mutex_unlock(&kbdev->pm.arb_vm_state->vm_state_lock);
 	}
-#endif /* CONFIG_MALI_ARBITER_SUPPORT */
+
+#if MALI_USE_CSF
+	kbase_backend_invalidate_gpu_timestamp_offset(kbdev);
+#endif
 
 	return 0;
 
@@ -307,14 +332,13 @@ void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start)
 	kbase_hwaccess_pm_resume(kbdev);
 
 	/* Initial active call, to power on the GPU/cores if needed */
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
-	if (kbase_pm_context_active_handle_suspend(
-		    kbdev, (arb_gpu_start ? KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED :
-						  KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE)))
-		return;
-#else
-	kbase_pm_context_active(kbdev);
-#endif
+	if (kbase_has_arbiter(kbdev)) {
+		if (kbase_pm_context_active_handle_suspend(
+			    kbdev, (arb_gpu_start ? KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED :
+							  KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE)))
+			return;
+	} else
+		kbase_pm_context_active(kbdev);
 
 	resume_job_scheduling(kbdev);
 
@@ -338,26 +362,19 @@ void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start)
 int kbase_pm_suspend(struct kbase_device *kbdev)
 {
 	int result = 0;
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
-	if (kbdev->arb.arb_if)
+
+	if (kbase_has_arbiter(kbdev))
 		kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_OS_SUSPEND_EVENT);
 	else
 		result = kbase_pm_driver_suspend(kbdev);
-#else
-	result = kbase_pm_driver_suspend(kbdev);
-#endif /* CONFIG_MALI_ARBITER_SUPPORT */
 
 	return result;
 }
 
 void kbase_pm_resume(struct kbase_device *kbdev)
 {
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
-	if (kbdev->arb.arb_if)
+	if (kbase_has_arbiter(kbdev))
 		kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_OS_RESUME_EVENT);
 	else
 		kbase_pm_driver_resume(kbdev, false);
-#else
-	kbase_pm_driver_resume(kbdev, false);
-#endif /* CONFIG_MALI_ARBITER_SUPPORT */
 }
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pm.h b/drivers/gpu/arm/bifrost/mali_kbase_pm.h
index 46db4db5ffe0..25e4732a8d08 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_pm.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_pm.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -33,13 +33,12 @@ struct kbase_device;
 #define PM_ENABLE_IRQS 0x01
 #define PM_HW_ISSUES_DETECT 0x02
 
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
-/* In the case that the GPU was granted by the Arbiter, it will have
+/* Case 1: the GPU was granted by the Arbiter, it will have
  * already been reset. The following flag ensures it is not reset
  * twice.
+ * Case 2: GPU already in reset state after power on, then no soft-reset is needed.
  */
 #define PM_NO_RESET 0x04
-#endif
 
 /**
  * kbase_pm_init - Initialize the power management framework.
@@ -121,12 +120,10 @@ enum kbase_pm_suspend_handler {
 	 * (e.g. guarantee it's going to be idled very soon after)
 	 */
 	KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE,
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
 	/** Special case when Arbiter has notified we can use GPU.
 	 * Active count should always start at 0 in this case.
 	 */
 	KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED,
-#endif /* CONFIG_MALI_ARBITER_SUPPORT */
 };
 
 /**
@@ -148,6 +145,18 @@ enum kbase_pm_suspend_handler {
 int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev,
 					   enum kbase_pm_suspend_handler suspend_handler);
 
+/**
+ * kbase_pm_context_active_handle_suspend_locked - Same as kbase_pm_context_active_handle_suspend(),
+ *                                                 except that pm.lock is held by the caller.
+ *
+ * @kbdev:     The kbase device structure for the device (must be a valid pointer)
+ * @suspend_handler: The handler code for how to handle a suspend that might occur
+ *
+ * Return: 0 on success, non-zero othrewise.
+ */
+int kbase_pm_context_active_handle_suspend_locked(struct kbase_device *kbdev,
+						  enum kbase_pm_suspend_handler suspend_handler);
+
 /**
  * kbase_pm_context_idle - Decrement the reference count of active contexts.
  *
@@ -159,6 +168,14 @@ int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev,
  */
 void kbase_pm_context_idle(struct kbase_device *kbdev);
 
+/**
+ * kbase_pm_context_idle_locked - Same as kbase_pm_context_idle(), except that
+ *                                pm.lock is held by the caller.
+ *
+ * @kbdev:     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_context_idle_locked(struct kbase_device *kbdev);
+
 /* NOTE: kbase_pm_is_active() is in mali_kbase.h, because it is an inline
  * function
  */
@@ -215,7 +232,7 @@ void kbase_pm_vsync_callback(int buffer_updated, void *data);
  * kbase components to complete the suspend.
  *
  * Despite kbase_pm_suspend(), it will ignore to update Arbiter
- * status if MALI_ARBITER_SUPPORT is enabled.
+ * status if there is one.
  *
  * @note the mechanisms used here rely on all user-space threads being frozen
  * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up
@@ -239,11 +256,10 @@ int kbase_pm_driver_suspend(struct kbase_device *kbdev);
  * Also called when using VM arbiter, when GPU access has been granted.
  *
  * Despite kbase_pm_resume(), it will ignore to update Arbiter
- * status if MALI_ARBITER_SUPPORT is enabled.
+ * status if there is one.
  */
 void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start);
 
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
 /**
  * kbase_pm_handle_gpu_lost() - Handle GPU Lost for the VM
  * @kbdev: Device pointer
@@ -254,6 +270,5 @@ void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start);
  * Kill any running tasks and put the driver into a GPU powered-off state.
  */
 void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev);
-#endif /* CONFIG_MALI_ARBITER_SUPPORT */
 
 #endif /* _KBASE_PM_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_reg_track.c b/drivers/gpu/arm/bifrost/mali_kbase_reg_track.c
index 3128292a9a30..e490a2a3d179 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_reg_track.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_reg_track.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -648,7 +648,7 @@ int kbase_add_va_region_rbtree(struct kbase_device *kbdev, struct kbase_va_regio
 		} else if (!kbase_is_region_free(tmp)) {
 			dev_warn(
 				dev,
-				"!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n",
+				"!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%llx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n",
 				tmp->start_pfn, tmp->flags, tmp->nr_pages, gpu_pfn, nr_pages);
 			err = -ENOMEM;
 			goto exit;
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c b/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c
index 0cee2f0e6fd5..bae1630c94a9 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -143,9 +143,8 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom)
 	 * delay suspend until we process the atom (which may be at the end of a
 	 * long chain of dependencies
 	 */
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
-	atomic_inc(&kctx->kbdev->pm.gpu_users_waiting);
-#endif /* CONFIG_MALI_ARBITER_SUPPORT */
+	if (kbase_has_arbiter(kctx->kbdev))
+		atomic_inc(&kctx->kbdev->pm.gpu_users_waiting);
 	pm_active_err = kbase_pm_context_active_handle_suspend(
 		kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
 	if (pm_active_err) {
@@ -163,11 +162,8 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom)
 		kbasep_add_waiting_soft_job(katom);
 
 		return pm_active_err;
-	}
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
-	else
+	} else if (kbase_has_arbiter(kctx->kbdev))
 		atomic_dec(&kctx->kbdev->pm.gpu_users_waiting);
-#endif /* CONFIG_MALI_ARBITER_SUPPORT */
 
 	kbase_backend_get_gpu_time(kctx->kbdev, &cycle_counter, &system_time, &ts);
 
@@ -553,7 +549,7 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom)
 		goto out_cleanup;
 	}
 
-	ret = copy_from_user(user_buffers, user_structs, sizeof(*user_buffers) * nr);
+	ret = copy_from_user(user_buffers, user_structs, size_mul(sizeof(*user_buffers), nr));
 	if (ret) {
 		ret = -EFAULT;
 		goto out_cleanup;
@@ -1235,7 +1231,7 @@ static int kbase_jit_free_prepare(struct kbase_jd_atom *katom)
 			goto free_info;
 		}
 
-		if (copy_from_user(ids, data, sizeof(*ids) * count) != 0) {
+		if (copy_from_user(ids, data, size_mul(sizeof(*ids), count)) != 0) {
 			ret = -EINVAL;
 			goto free_info;
 		}
@@ -1408,7 +1404,7 @@ static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map)
 
 		gpu_addr = ext_res->ext_res[i].ext_resource & ~(__u64)BASE_EXT_RES_ACCESS_EXCLUSIVE;
 		if (map) {
-			if (!kbase_sticky_resource_acquire(katom->kctx, gpu_addr))
+			if (!kbase_sticky_resource_acquire(katom->kctx, gpu_addr, NULL))
 				goto failed_loop;
 		} else {
 			if (!kbase_sticky_resource_release_force(katom->kctx, NULL, gpu_addr))
@@ -1688,9 +1684,8 @@ void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev)
 		if (kbase_process_soft_job(katom_iter) == 0) {
 			kbase_finish_soft_job(katom_iter);
 			resched |= kbase_jd_done_nolock(katom_iter, true);
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
-			atomic_dec(&kbdev->pm.gpu_users_waiting);
-#endif /* CONFIG_MALI_ARBITER_SUPPORT */
+			if (kbase_has_arbiter(kctx->kbdev))
+				atomic_dec(&kbdev->pm.gpu_users_waiting);
 		}
 		mutex_unlock(&kctx->jctx.lock);
 	}
diff --git a/drivers/gpu/arm/bifrost/mmu/Kbuild b/drivers/gpu/arm/bifrost/mmu/Kbuild
index 416432397b5c..3c3defdb88e9 100644
--- a/drivers/gpu/arm/bifrost/mmu/Kbuild
+++ b/drivers/gpu/arm/bifrost/mmu/Kbuild
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 #
-# (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -21,10 +21,15 @@
 bifrost_kbase-y += \
     mmu/mali_kbase_mmu.o \
     mmu/mali_kbase_mmu_hw_direct.o \
+    mmu/mali_kbase_mmu_faults_decoder_luts.o \
+    mmu/mali_kbase_mmu_faults_decoder.o \
     mmu/mali_kbase_mmu_mode_aarch64.o
 
 ifeq ($(CONFIG_MALI_CSF_SUPPORT),y)
-    bifrost_kbase-y += mmu/backend/mali_kbase_mmu_csf.o
+    bifrost_kbase-y += mmu/backend/mali_kbase_mmu_csf.o \
+    mmu/backend/mali_kbase_mmu_faults_decoder_luts_csf.o
 else
-    bifrost_kbase-y += mmu/backend/mali_kbase_mmu_jm.o
+    bifrost_kbase-y += mmu/backend/mali_kbase_mmu_jm.o \
+	mmu/backend/mali_kbase_mmu_faults_decoder_luts_jm.o
+
 endif
diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c
index df027c727a2c..196d481d6827 100644
--- a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c
+++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -29,6 +29,7 @@
 #include <mali_kbase_reset_gpu.h>
 #include <mali_kbase_as_fault_debugfs.h>
 #include <mmu/mali_kbase_mmu_internal.h>
+#include <mmu/mali_kbase_mmu_faults_decoder.h>
 
 void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, struct kbase_mmu_setup *const setup)
 {
@@ -99,15 +100,22 @@ void kbase_mmu_report_mcu_as_fault_and_reset(struct kbase_device *kbdev, struct
 	u32 as_no;
 
 	/* terminal fault, print info about the fault */
-	dev_err(kbdev->dev,
-		"Unexpected Page fault in firmware address space at VA 0x%016llX\n"
-		"raw fault status: 0x%X\n"
-		"exception type 0x%X: %s\n"
-		"access type 0x%X: %s\n"
-		"source id 0x%X\n",
-		fault->addr, fault->status, exception_type,
-		kbase_gpu_exception_name(exception_type), access_type,
-		kbase_gpu_access_type_name(fault->status), source_id);
+	if (kbdev->gpu_props.gpu_id.product_model < GPU_ID_MODEL_MAKE(14, 0)) {
+		dev_err(kbdev->dev,
+			"Unexpected Page fault in firmware address space at VA 0x%016llX\n"
+			"raw fault status: 0x%X\n"
+			"exception type 0x%X: %s\n"
+			"access type 0x%X: %s\n"
+			"source id 0x%X (core_id:utlb:IR 0x%X:0x%X:0x%X): %s, %s\n",
+			fault->addr, fault->status, exception_type,
+			kbase_gpu_exception_name(exception_type), access_type,
+			kbase_gpu_access_type_name(fault->status), source_id,
+			FAULT_SOURCE_ID_CORE_ID_GET(source_id),
+			FAULT_SOURCE_ID_UTLB_ID_GET(source_id),
+			fault_source_id_internal_requester_get(kbdev, source_id),
+			fault_source_id_core_type_description_get(kbdev, source_id),
+			fault_source_id_internal_requester_get_str(kbdev, source_id, access_type));
+	}
 
 	kbase_debug_csf_fault_notify(kbdev, NULL, DF_GPU_PAGE_FAULT);
 
@@ -139,17 +147,25 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, struct kbas
 	const uintptr_t fault_addr = fault->addr;
 
 	/* terminal fault, print info about the fault */
-	dev_err(kbdev->dev,
-		"GPU bus fault in AS%u at PA %pK\n"
-		"PA_VALID: %s\n"
-		"raw fault status: 0x%X\n"
-		"exception type 0x%X: %s\n"
-		"access type 0x%X: %s\n"
-		"source id 0x%X\n"
-		"pid: %d\n",
-		as_no, (void *)fault_addr, addr_valid, status, exception_type,
-		kbase_gpu_exception_name(exception_type), access_type,
-		kbase_gpu_access_type_name(access_type), source_id, kctx->pid);
+	if (kbdev->gpu_props.gpu_id.product_model < GPU_ID_MODEL_MAKE(14, 0)) {
+		dev_err(kbdev->dev,
+			"GPU bus fault in AS%u at PA %pK\n"
+			"PA_VALID: %s\n"
+			"raw fault status: 0x%X\n"
+			"exception type 0x%X: %s\n"
+			"access type 0x%X: %s\n"
+			"source id 0x%X (core_id:utlb:IR 0x%X:0x%X:0x%X): %s, %s\n"
+			"pid: %d\n",
+			as_no, (void *)fault_addr, addr_valid, status, exception_type,
+			kbase_gpu_exception_name(exception_type), access_type,
+			kbase_gpu_access_type_name(access_type), source_id,
+			FAULT_SOURCE_ID_CORE_ID_GET(source_id),
+			FAULT_SOURCE_ID_UTLB_ID_GET(source_id),
+			fault_source_id_internal_requester_get(kbdev, source_id),
+			fault_source_id_core_type_description_get(kbdev, source_id),
+			fault_source_id_internal_requester_get_str(kbdev, source_id, access_type),
+			kctx->pid);
+	}
 
 	/* AS transaction begin */
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@@ -195,17 +211,26 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, struct kbase_as
 		unsigned int as_no = as->number;
 
 		/* terminal fault, print info about the fault */
-		dev_err(kbdev->dev,
-			"Unhandled Page fault in AS%u at VA 0x%016llX\n"
-			"Reason: %s\n"
-			"raw fault status: 0x%X\n"
-			"exception type 0x%X: %s\n"
-			"access type 0x%X: %s\n"
-			"source id 0x%X\n"
-			"pid: %d\n",
-			as_no, fault->addr, reason_str, status, exception_type,
-			kbase_gpu_exception_name(exception_type), access_type,
-			kbase_gpu_access_type_name(status), source_id, kctx->pid);
+		if (kbdev->gpu_props.gpu_id.product_model < GPU_ID_MODEL_MAKE(14, 0)) {
+			dev_err(kbdev->dev,
+				"Unhandled Page fault in AS%u at VA 0x%016llX\n"
+				"Reason: %s\n"
+				"raw fault status: 0x%X\n"
+				"exception type 0x%X: %s\n"
+				"access type 0x%X: %s\n"
+				"source id 0x%X (core_id:utlb:IR 0x%X:0x%X:0x%X): %s, %s\n"
+				"pid: %d\n",
+				as_no, fault->addr, reason_str, status, exception_type,
+				kbase_gpu_exception_name(exception_type), access_type,
+				kbase_gpu_access_type_name(status), source_id,
+				FAULT_SOURCE_ID_CORE_ID_GET(source_id),
+				FAULT_SOURCE_ID_UTLB_ID_GET(source_id),
+				fault_source_id_internal_requester_get(kbdev, source_id),
+				fault_source_id_core_type_description_get(kbdev, source_id),
+				fault_source_id_internal_requester_get_str(kbdev, source_id,
+									   access_type),
+				kctx->pid);
+		}
 	}
 
 	/* AS transaction begin */
@@ -214,6 +239,14 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, struct kbase_as
 	 * will abort all jobs and stop any hw counter dumping
 	 */
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	/* Update the page fault counter value in firmware visible memory, just before disabling
+	 * the MMU which would in turn unblock the MCU firmware.
+	 */
+	if (kbdev->csf.page_fault_cnt_ptr) {
+		spin_lock(&kbdev->mmu_mask_change);
+		*kbdev->csf.page_fault_cnt_ptr = ++kbdev->csf.page_fault_cnt;
+		spin_unlock(&kbdev->mmu_mask_change);
+	}
 	kbase_mmu_disable(kctx);
 	kbase_ctx_flag_set(kctx, KCTX_AS_DISABLED_ON_FAULT);
 	kbase_debug_csf_fault_notify(kbdev, kctx, DF_GPU_PAGE_FAULT);
@@ -407,15 +440,6 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
 	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
 }
 
-int kbase_mmu_switch_to_ir(struct kbase_context *const kctx, struct kbase_va_region *const reg)
-{
-	CSTD_UNUSED(kctx);
-	CSTD_UNUSED(reg);
-
-	/* Can't soft-stop the provoking job */
-	return -EPERM;
-}
-
 /**
  * kbase_mmu_gpu_fault_worker() - Process a GPU fault for the device.
  *
diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_csf.c b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_csf.c
new file mode 100644
index 000000000000..d8eec91ba887
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_csf.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/**
+ * DOC: Base kernel MMU faults decoder for CSF GPUs.
+ */
+
+#include <mmu/backend/mali_kbase_mmu_faults_decoder_luts_csf.h>
+
+#define GPU_ID_ARCH_ID_MAJOR_GET(gpu_id) ((gpu_id >> 16) & 0xFF)
+#define GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id) (gpu_id & 0xFFFF)
+#define NELEMS(s) (sizeof(s) / sizeof((s)[0]))
+
+struct decode_lut_element {
+	u16 arch_minor_rev;
+	u16 key;
+	const char *text;
+};
+
+static const char *decode_lut_element_lookup(u16 arch_minor_rev, u16 key,
+					     struct decode_lut_element *decode_element_lut,
+					     unsigned int lut_len)
+{
+	struct decode_lut_element *p;
+
+	for (p = decode_element_lut; p < decode_element_lut + lut_len; p++) {
+		if (p->key == key &&
+		    (p->arch_minor_rev == 0xffff || p->arch_minor_rev == arch_minor_rev))
+			break;
+	}
+	if (p < decode_element_lut + lut_len)
+		return p->text;
+	else
+		return "unknown";
+}
+
+/* Auto-generated code: DO NOT MODIFY! */
+
+static struct decode_lut_element lut_fault_source_csf_r_t_major_10[] = {
+	{ 0xFFFF, 0, "pref0" },
+	{ 0xFFFF, 4, "iter0" },
+	{ 0xFFFF, 12, "lsu" },
+	{ 0xFFFF, 13, "mcu" },
+};
+
+static struct decode_lut_element lut_fault_source_csf_r_t_major_11[] = {
+	{ 0xFFFF, 0, "pref0" },
+	{ 0xFFFF, 4, "iter0" },
+	{ 0xFFFF, 12, "lsu" },
+	{ 0xFFFF, 13, "mcu" },
+};
+
+static struct decode_lut_element lut_fault_source_csf_r_t_major_12[] = {
+	{ 0xFFFF, 0, "pref0" },
+	{ 0xFFFF, 4, "iter0" },
+	{ 0xFFFF, 12, "lsu" },
+	{ 0xFFFF, 13, "mcu" },
+};
+
+static struct decode_lut_element lut_fault_source_csf_w_t_major_10[] = {
+	{ 0xFFFF, 8, "pcb0" },
+	{ 0xFFFF, 12, "lsu" },
+	{ 0xFFFF, 13, "mcu" },
+};
+
+static struct decode_lut_element lut_fault_source_csf_w_t_major_11[] = {
+	{ 0xFFFF, 8, "pcb0" },
+	{ 0xFFFF, 12, "lsu" },
+	{ 0xFFFF, 13, "mcu" },
+};
+
+static struct decode_lut_element lut_fault_source_csf_w_t_major_12[] = {
+	{ 0xFFFF, 8, "pcb0" },
+	{ 0xFFFF, 12, "lsu" },
+	{ 0xFFFF, 13, "mcu" },
+};
+
+
+const char *decode_fault_source_csf_r_t(u16 idx, u32 gpu_id)
+{
+	u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id);
+	const char *ret = "unknown";
+
+	switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) {
+	case 10:
+		ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_csf_r_t_major_10,
+						NELEMS(lut_fault_source_csf_r_t_major_10));
+		break;
+	case 11:
+		ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_csf_r_t_major_11,
+						NELEMS(lut_fault_source_csf_r_t_major_11));
+		break;
+	case 12:
+		ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_csf_r_t_major_12,
+						NELEMS(lut_fault_source_csf_r_t_major_12));
+		break;
+	}
+	return ret;
+}
+
+const char *decode_fault_source_csf_w_t(u16 idx, u32 gpu_id)
+{
+	u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id);
+	const char *ret = "unknown";
+
+	switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) {
+	case 10:
+		ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_csf_w_t_major_10,
+						NELEMS(lut_fault_source_csf_w_t_major_10));
+		break;
+	case 11:
+		ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_csf_w_t_major_11,
+						NELEMS(lut_fault_source_csf_w_t_major_11));
+		break;
+	case 12:
+		ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_csf_w_t_major_12,
+						NELEMS(lut_fault_source_csf_w_t_major_12));
+		break;
+	}
+	return ret;
+}
diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_csf.h b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_csf.h
new file mode 100644
index 000000000000..04f5c02ccc3d
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_csf.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2024 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _MALI_KBASE_MMU_FAULTS_DECODER_LUTS_CSF_H_
+#define _MALI_KBASE_MMU_FAULTS_DECODER_LUTS_CSF_H_
+#include <linux/types.h>
+
+/**
+ * decode_fault_source_csf_r_t() - Get internal requester of a
+ * fault in a human readable format.
+ *
+ * @idx: Internal requester part of SOURCE_ID field of the fault.
+ * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev.
+ *
+ * Return: Internal requester of a fault in a human readable format for a read
+ * operation on a CSF core.
+ */
+const char *decode_fault_source_csf_r_t(u16 idx, u32 gpu_id);
+
+/**
+ * decode_fault_source_csf_w_t() - Get internal requester of a
+ * fault in a human readable format.
+ *
+ * @idx: Internal requester part of SOURCE_ID field of the fault.
+ * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev.
+ *
+ * Return: Internal requester of a fault in a human readable format for a write
+ * operation on a CSF core.
+ */
+const char *decode_fault_source_csf_w_t(u16 idx, u32 gpu_id);
+
+#endif /* _MALI_KBASE_MMU_FAULTS_DECODER_LUTS_CSF_H_ */
diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_jm.c b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_jm.c
new file mode 100644
index 000000000000..a053a93978b5
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_jm.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/**
+ * DOC: Base kernel MMU faults decoder for Job Manager GPUs.
+ */
+
+#include <mmu/backend/mali_kbase_mmu_faults_decoder_luts_jm.h>
+
+#define GPU_ID_ARCH_ID_MAJOR_GET(gpu_id) ((gpu_id >> 16) & 0xFF)
+#define GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id) (gpu_id & 0xFFFF)
+#define NELEMS(s) (sizeof(s) / sizeof((s)[0]))
+
+struct decode_lut_element {
+	u16 arch_minor_rev;
+	u16 key;
+	const char *text;
+};
+
+static const char *decode_lut_element_lookup(u16 arch_minor_rev, u16 key,
+					     struct decode_lut_element *decode_element_lut,
+					     unsigned int lut_len)
+{
+	struct decode_lut_element *p;
+
+	for (p = decode_element_lut; p < decode_element_lut + lut_len; p++) {
+		if (p->key == key &&
+		    (p->arch_minor_rev == 0xffff || p->arch_minor_rev == arch_minor_rev))
+			break;
+	}
+	if (p < decode_element_lut + lut_len)
+		return p->text;
+	else
+		return "unknown";
+}
+
+/* Auto-generated code: DO NOT MODIFY! */
+
+static struct decode_lut_element lut_fault_source_jm_t_major_9[] = {
+	{ 0xFFFF, 0, "js" },
+	{ 0xFFFF, 1, "pcm" },
+};
+
+const char *decode_fault_source_jm_t(u16 idx, u32 gpu_id)
+{
+	u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id);
+	const char *ret = "unknown";
+
+	switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) {
+	case 9:
+		ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_jm_t_major_9,
+						NELEMS(lut_fault_source_jm_t_major_9));
+		break;
+	}
+	return ret;
+}
diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_jm.h b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_jm.h
new file mode 100644
index 000000000000..f686e555d86a
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_faults_decoder_luts_jm.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2024 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _MALI_KBASE_MMU_FAULTS_DECODER_LUTS_JM_H_
+#define _MALI_KBASE_MMU_FAULTS_DECODER_LUTS_JM_H_
+#include <linux/types.h>
+
+/**
+ * decode_fault_source_jm_t() - Get internal requester of a
+ * fault in a human readable format.
+ *
+ * @idx: Internal requester part of SOURCE_ID field of the fault.
+ * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev.
+ *
+ * Return: Internal requester of a fault in a human readable format for a JM core.
+ */
+const char *decode_fault_source_jm_t(u16 idx, u32 gpu_id);
+
+#endif /* _MALI_KBASE_MMU_FAULTS_DECODER_LUTS_JM_H_ */
diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c
index 1b2df11f3c3c..a7f3f40ef325 100644
--- a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c
+++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -29,6 +29,7 @@
 #include <device/mali_kbase_device.h>
 #include <mali_kbase_as_fault_debugfs.h>
 #include <mmu/mali_kbase_mmu_internal.h>
+#include <mmu/mali_kbase_mmu_faults_decoder.h>
 
 void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, struct kbase_mmu_setup *const setup)
 {
@@ -52,9 +53,10 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, struct kbas
 					 struct kbase_fault *fault)
 {
 	struct kbase_device *const kbdev = kctx->kbdev;
-	u32 const status = fault->status;
-	u32 const exception_type = (status & 0xFF);
-	u32 const exception_data = (status >> 8) & 0xFFFFFF;
+	const u32 status = fault->status;
+	const u32 exception_type = AS_FAULTSTATUS_EXCEPTION_TYPE_GET(status);
+	const u32 access_type = AS_FAULTSTATUS_ACCESS_TYPE_GET(status);
+	const u32 source_id = AS_FAULTSTATUS_SOURCE_ID_GET(status);
 	unsigned int const as_no = as->number;
 	unsigned long flags;
 	const uintptr_t fault_addr = fault->addr;
@@ -64,10 +66,17 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, struct kbas
 		"GPU bus fault in AS%u at PA %pK\n"
 		"raw fault status: 0x%X\n"
 		"exception type 0x%X: %s\n"
-		"exception data 0x%X\n"
+		"access type 0x%X: %s\n"
+		"source id 0x%X (core_id:utlb:IR 0x%X:0x%X:0x%X): %s, %s\n"
 		"pid: %d\n",
 		as_no, (void *)fault_addr, status, exception_type,
-		kbase_gpu_exception_name(exception_type), exception_data, kctx->pid);
+		kbase_gpu_exception_name(exception_type), access_type,
+		kbase_gpu_access_type_name(access_type), source_id,
+		FAULT_SOURCE_ID_CORE_ID_GET(source_id), FAULT_SOURCE_ID_UTLB_ID_GET(source_id),
+		fault_source_id_internal_requester_get(kbdev, source_id),
+		fault_source_id_core_type_description_get(kbdev, source_id),
+		fault_source_id_internal_requester_get_str(kbdev, source_id, access_type),
+		kctx->pid);
 
 	/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter
 	 * dumping AS transaction begin
@@ -105,22 +114,42 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, struct kbase_as
 
 	if (!kbase_ctx_flag(kctx, KCTX_PAGE_FAULT_REPORT_SKIP)) {
 		/* decode the fault status */
-		u32 exception_type = fault->status & 0xFF;
-		u32 access_type = (fault->status >> 8) & 0x3;
-		u32 source_id = (fault->status >> 16);
-
+		const u32 status = fault->status;
+		const u32 exception_type = AS_FAULTSTATUS_EXCEPTION_TYPE_GET(status);
+		const u32 access_type = AS_FAULTSTATUS_ACCESS_TYPE_GET(status);
+		const u32 source_id = AS_FAULTSTATUS_SOURCE_ID_GET(status);
 		/* terminal fault, print info about the fault */
-		dev_err(kbdev->dev,
-			"Unhandled Page fault in AS%u at VA 0x%016llX\n"
-			"Reason: %s\n"
-			"raw fault status: 0x%X\n"
-			"exception type 0x%X: %s\n"
-			"access type 0x%X: %s\n"
-			"source id 0x%X\n"
-			"pid: %d\n",
-			as_no, fault->addr, reason_str, fault->status, exception_type,
-			kbase_gpu_exception_name(exception_type), access_type,
-			kbase_gpu_access_type_name(fault->status), source_id, kctx->pid);
+		if (kbdev->gpu_props.gpu_id.product_model < GPU_ID_MODEL_MAKE(9, 0)) {
+			dev_err(kbdev->dev,
+				"Unhandled Page fault in AS%u at VA 0x%016llX\n"
+				"Reason: %s\n"
+				"raw fault status: 0x%X\n"
+				"exception type 0x%X: %s\n"
+				"access type 0x%X: %s\n"
+				"pid: %d\n",
+				as_no, fault->addr, reason_str, status, exception_type,
+				kbase_gpu_exception_name(exception_type), access_type,
+				kbase_gpu_access_type_name(status), kctx->pid);
+		} else {
+			dev_err(kbdev->dev,
+				"Unhandled Page fault in AS%u at VA 0x%016llX\n"
+				"Reason: %s\n"
+				"raw fault status: 0x%X\n"
+				"exception type 0x%X: %s\n"
+				"access type 0x%X: %s\n"
+				"source id 0x%X (core_id:utlb:IR 0x%X:0x%X:0x%X): %s, %s\n"
+				"pid: %d\n",
+				as_no, fault->addr, reason_str, status, exception_type,
+				kbase_gpu_exception_name(exception_type), access_type,
+				kbase_gpu_access_type_name(status), source_id,
+				FAULT_SOURCE_ID_CORE_ID_GET(source_id),
+				FAULT_SOURCE_ID_UTLB_ID_GET(source_id),
+				fault_source_id_internal_requester_get(kbdev, source_id),
+				fault_source_id_core_type_description_get(kbdev, source_id),
+				fault_source_id_internal_requester_get_str(kbdev, source_id,
+									   access_type),
+				kctx->pid);
+		}
 	}
 
 	/* hardware counters dump fault handling */
@@ -256,7 +285,7 @@ static void validate_protected_page_fault(struct kbase_device *kbdev)
 	 */
 	u32 protected_debug_mode = 0;
 
-	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
+	if (kbase_hw_has_feature(kbdev, KBASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
 		protected_debug_mode = kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_STATUS)) &
 				       GPU_STATUS_GPU_DBG_ENABLED;
 	}
@@ -372,13 +401,6 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
 	dev_dbg(kbdev->dev, "Leaving %s irq_stat %u\n", __func__, irq_stat);
 }
 
-int kbase_mmu_switch_to_ir(struct kbase_context *const kctx, struct kbase_va_region *const reg)
-{
-	dev_dbg(kctx->kbdev->dev, "Switching to incremental rendering for region %pK\n",
-		(void *)reg);
-	return kbase_job_slot_softstop_start_rp(kctx, reg);
-}
-
 int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i)
 {
 	kbdev->as[i].number = i;
diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c
index becbb02aa15a..4963d990054f 100644
--- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c
+++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -40,7 +40,6 @@
 #include <mali_kbase_reset_gpu.h>
 #include <mmu/mali_kbase_mmu.h>
 #include <mmu/mali_kbase_mmu_internal.h>
-#include <mali_kbase_cs_experimental.h>
 #include <device/mali_kbase_device.h>
 #include <uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h>
 #if !MALI_USE_CSF
@@ -58,6 +57,257 @@
 /* Macro to convert updated PDGs to flags indicating levels skip in flush */
 #define pgd_level_to_skip_flush(dirty_pgds) (~(dirty_pgds)&0xF)
 
+/**
+ * kmap_pgd() - Map a PGD page and return the address of it
+ *
+ * @p:           Pointer to the PGD page to be mapped.
+ * @pgd:         The physical address of the PGD. May not be PAGE_SIZE aligned but shall be
+ *               GPU_PAGE_SIZE aligned.
+ *
+ * Return: The mapped address of the @pgd, adjusted by the offset of @pgd from the start of page.
+ */
+static inline void *kmap_pgd(struct page *p, phys_addr_t pgd)
+{
+#if GPU_PAGES_PER_CPU_PAGE > 1
+	return kbase_kmap(p) + (pgd & ~PAGE_MASK);
+#else
+	CSTD_UNUSED(pgd);
+	return kbase_kmap(p);
+#endif
+}
+
+/**
+ * kmap_atomic_pgd() - Variant of kmap_pgd for atomic mapping
+ *
+ * @p:           Pointer to the PGD page to be mapped.
+ * @pgd:         The physical address of the PGD. May not be PAGE_SIZE aligned but shall be
+ *               GPU_PAGE_SIZE aligned.
+ *
+ * Return: The mapped address of the @pgd.
+ */
+static inline void *kmap_atomic_pgd(struct page *p, phys_addr_t pgd)
+{
+#if GPU_PAGES_PER_CPU_PAGE > 1
+	return kbase_kmap_atomic(p) + (pgd & ~PAGE_MASK);
+#else
+	CSTD_UNUSED(pgd);
+	return kbase_kmap_atomic(p);
+#endif
+}
+
+/**
+ * kunmap_pgd() - Unmap a PGD page
+ *
+ * @p:           Pointer to the PGD page to be unmapped.
+ * @pgd_address: The address of the PGD. May not be PAGE_SIZE aligned but shall be
+ *               GPU_PAGE_SIZE aligned.
+ */
+static inline void kunmap_pgd(struct page *p, void *pgd_address)
+{
+	/* It is okay to not align pgd_address to PAGE_SIZE boundary */
+	kbase_kunmap(p, pgd_address);
+}
+
+/**
+ * kunmap_atomic_pgd() - Variant of kunmap_pgd for atomic unmapping
+ *
+ * @pgd_address: The address of the PGD. May not be PAGE_SIZE aligned but shall be
+ *               GPU_PAGE_SIZE aligned.
+ */
+static inline void kunmap_atomic_pgd(void *pgd_address)
+{
+	/* It is okay to not align pgd_address to PAGE_SIZE boundary */
+	kbase_kunmap_atomic(pgd_address);
+}
+
+/**
+ * pgd_dma_addr() - Return dma addr of a PGD
+ *
+ * @p:       Pointer to the PGD page.
+ * @pgd:     The physical address of the PGD.
+ *
+ * Return:   DMA address of the PGD
+ */
+static inline dma_addr_t pgd_dma_addr(struct page *p, phys_addr_t pgd)
+{
+#if GPU_PAGES_PER_CPU_PAGE > 1
+	return kbase_page_private(p)->dma_addr + (pgd & ~PAGE_MASK);
+#else
+	CSTD_UNUSED(pgd);
+	return kbase_dma_addr(p);
+#endif
+}
+
+/**
+ * get_pgd_sub_page_index() - Return the index of a sub PGD page in the PGD page.
+ *
+ * @pgd:         The physical address of the PGD.
+ *
+ * Return:       The index value ranging from 0 to (GPU_PAGES_PER_CPU_PAGE - 1)
+ */
+static inline u32 get_pgd_sub_page_index(phys_addr_t pgd)
+{
+	return (pgd & ~PAGE_MASK) / GPU_PAGE_SIZE;
+}
+
+#if GPU_PAGES_PER_CPU_PAGE > 1
+/**
+ * alloc_pgd_page_metadata() - Allocate page metadata for a PGD.
+ *
+ * @kbdev:      Pointer to the instance of a kbase device.
+ * @mmut:       Structure holding details of the MMU table for a kcontext.
+ * @p:          PGD page.
+ *
+ * The PGD page, @p is linked to &kbase_mmu_table.pgd_pages_list for allocating
+ * sub PGD pages from the list.
+ *
+ * Return:      True on success.
+ */
+static bool alloc_pgd_page_metadata(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+				    struct page *p)
+{
+	struct kbase_page_metadata *page_md;
+
+	if (!kbase_is_page_migration_enabled()) {
+		page_md = kmem_cache_zalloc(kbdev->page_metadata_slab, GFP_KERNEL);
+		if (!page_md)
+			return false;
+
+		page_md->dma_addr = kbase_dma_addr_as_priv(p);
+		set_page_private(p, (unsigned long)page_md);
+	} else {
+		page_md = kbase_page_private(p);
+	}
+
+	page_md->data.pt_mapped.num_allocated_sub_pages = 1;
+	set_bit(0, page_md->data.pt_mapped.allocated_sub_pages);
+	page_md->data.pt_mapped.pgd_page = p;
+	list_add(&page_md->data.pt_mapped.pgd_link, &mmut->pgd_pages_list);
+
+	return true;
+}
+
+/**
+ * free_pgd_page_metadata() - Free page metadata for a PGD.
+ *
+ * @kbdev:      Pointer to the instance of a kbase device.
+ * @p:          PGD page where the metadata belongs to.
+ *
+ * The PGD page, @p is removed from &kbase_mmu_table.pgd_pages_list.
+ */
+static void free_pgd_page_metadata(struct kbase_device *kbdev, struct page *p)
+{
+	struct kbase_page_metadata *page_md = kbase_page_private(p);
+
+	WARN_ON_ONCE(page_md->data.pt_mapped.num_allocated_sub_pages);
+	page_md->data.pt_mapped.pgd_page = NULL;
+	list_del_init(&page_md->data.pt_mapped.pgd_link);
+
+	if (kbase_is_page_migration_enabled())
+		return;
+
+	set_page_private(p, (unsigned long)page_md->dma_addr);
+	kmem_cache_free(kbdev->page_metadata_slab, page_md);
+}
+
+/**
+ * allocate_pgd_sub_page() - Allocate a PGD sub page
+ *
+ * @page_md:  Page metadata of a PGD page where a sub page is allocated from.
+ *
+ * Return:    Physical address of allocated PGD sub page on success.
+ *            KBASE_INVALID_PHYSICAL_ADDRESS on failure.
+ */
+static inline phys_addr_t allocate_pgd_sub_page(struct kbase_page_metadata *page_md)
+{
+	unsigned long sub_page_index;
+
+	if (page_md->data.pt_mapped.num_allocated_sub_pages == GPU_PAGES_PER_CPU_PAGE)
+		return KBASE_INVALID_PHYSICAL_ADDRESS;
+	sub_page_index = find_first_zero_bit(page_md->data.pt_mapped.allocated_sub_pages,
+					     GPU_PAGES_PER_CPU_PAGE);
+
+#ifdef CONFIG_MALI_BIFROST_DEBUG
+	if (WARN_ON_ONCE(sub_page_index >= GPU_PAGES_PER_CPU_PAGE))
+		return KBASE_INVALID_PHYSICAL_ADDRESS;
+	if (WARN_ON_ONCE(page_md->data.pt_mapped.num_allocated_sub_pages > GPU_PAGES_PER_CPU_PAGE))
+		return KBASE_INVALID_PHYSICAL_ADDRESS;
+#endif
+	set_bit(sub_page_index, page_md->data.pt_mapped.allocated_sub_pages);
+	page_md->data.pt_mapped.num_allocated_sub_pages++;
+
+	return (page_to_phys(page_md->data.pt_mapped.pgd_page) + (sub_page_index * GPU_PAGE_SIZE));
+}
+
+/**
+ * free_pgd_sub_page() - Free a PGD sub page
+ *
+ * @pgd:      Sub PGD to be freed.
+ *
+ * Return:    The number of remaining allocated sub pages in the PGD.
+ */
+static int free_pgd_sub_page(phys_addr_t pgd)
+{
+	struct page *p = pfn_to_page(PFN_DOWN(pgd));
+	struct kbase_page_metadata *page_md = kbase_page_private(p);
+	const u32 sub_page_index = get_pgd_sub_page_index(pgd);
+
+#ifdef CONFIG_MALI_BIFROST_DEBUG
+	if (WARN_ON_ONCE(!test_bit(sub_page_index, page_md->data.pt_mapped.allocated_sub_pages)))
+		return page_md->data.pt_mapped.num_allocated_sub_pages;
+#endif
+	clear_bit(sub_page_index, page_md->data.pt_mapped.allocated_sub_pages);
+	if (!WARN_ON_ONCE(page_md->data.pt_mapped.num_allocated_sub_pages <= 0))
+		page_md->data.pt_mapped.num_allocated_sub_pages--;
+
+	return page_md->data.pt_mapped.num_allocated_sub_pages;
+}
+
+/**
+ * allocate_from_pgd_pages_list() - Allocate a PGD from the PGD pages list
+ *
+ * @mmut:     Structure holding details of the MMU table for a kcontext.
+ *
+ * Return:    Physical address of the allocated PGD.
+ */
+static inline phys_addr_t allocate_from_pgd_pages_list(struct kbase_mmu_table *mmut)
+{
+	struct list_head *entry;
+	phys_addr_t pgd;
+
+	lockdep_assert_held(&mmut->mmu_lock);
+
+	if (unlikely(!mmut->num_free_pgd_sub_pages))
+		return KBASE_INVALID_PHYSICAL_ADDRESS;
+
+	if (mmut->last_allocated_pgd_page) {
+		pgd = allocate_pgd_sub_page(kbase_page_private(mmut->last_allocated_pgd_page));
+		if (pgd != KBASE_INVALID_PHYSICAL_ADDRESS)
+			goto success;
+	}
+
+	if (mmut->last_freed_pgd_page) {
+		pgd = allocate_pgd_sub_page(kbase_page_private(mmut->last_freed_pgd_page));
+		if (pgd != KBASE_INVALID_PHYSICAL_ADDRESS)
+			goto success;
+	}
+
+	list_for_each(entry, &mmut->pgd_pages_list) {
+		struct kbase_page_metadata *page_md =
+			list_entry(entry, struct kbase_page_metadata, data.pt_mapped.pgd_link);
+
+		pgd = allocate_pgd_sub_page(page_md);
+		if (pgd != KBASE_INVALID_PHYSICAL_ADDRESS)
+			goto success;
+	}
+
+	return KBASE_INVALID_PHYSICAL_ADDRESS;
+success:
+	mmut->num_free_pgd_sub_pages--;
+	return pgd;
+}
+#endif
+
 static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
 				     const u64 start_vpfn, struct tagged_addr *phys, size_t nr,
 				     unsigned long flags, int const group_id, u64 *dirty_pgds,
@@ -151,6 +401,44 @@ static void mmu_invalidate(struct kbase_device *kbdev, struct kbase_context *kct
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
 
+/**
+ * mmu_invalidate_on_teardown() - Perform an invalidate operation on MMU caches on page
+ *                                table teardown.
+ * @kbdev:      The Kbase device.
+ * @kctx:       The Kbase context.
+ * @vpfn:       The virtual page frame number at which teardown is done.
+ * @num_pages:  The number of entries that were invalidated in top most level PGD, that
+ *              was affected by the teardown operation.
+ * @level:      The top most PGD level that was touched on teardown.
+ * @as_nr:      GPU address space number for which invalidate is required.
+ *
+ * Perform an MMU invalidate operation after the teardown of top most level PGD on a
+ * particular address space by issuing a UNLOCK command.
+ */
+static inline void mmu_invalidate_on_teardown(struct kbase_device *kbdev,
+					      struct kbase_context *kctx, u64 vpfn,
+					      size_t num_pages, int level, int as_nr)
+{
+	u32 invalidate_range_num_pages = num_pages;
+	u64 invalidate_range_start_vpfn = vpfn;
+	struct kbase_mmu_hw_op_param op_param;
+
+	if (level != MIDGARD_MMU_BOTTOMLEVEL) {
+		invalidate_range_num_pages = 1 << ((3 - level) * 9);
+		invalidate_range_start_vpfn = vpfn - (vpfn & (invalidate_range_num_pages - 1));
+	}
+
+	op_param = (struct kbase_mmu_hw_op_param){
+		.vpfn = invalidate_range_start_vpfn,
+		.nr = invalidate_range_num_pages,
+		.mmu_sync_info = CALLER_MMU_ASYNC,
+		.kctx_id = kctx ? kctx->id : 0xFFFFFFFF,
+		.flush_skip_levels = (1ULL << level) - 1,
+	};
+
+	mmu_invalidate(kbdev, kctx, as_nr, &op_param);
+}
+
 /* Perform a flush/invalidate on a particular address space
  */
 static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as *as,
@@ -318,14 +606,16 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kb
  * @mmut:     GPU MMU page table.
  * @pgds:     Physical addresses of page directories to be freed.
  * @vpfn:     The virtual page frame number.
- * @level:    The level of MMU page table.
+ * @level:    The level of MMU page table that needs to be updated.
  * @flush_op: The type of MMU flush operation to perform.
  * @dirty_pgds: Flags to track every level where a PGD has been updated.
+ * @as_nr:     GPU address space number for which invalidate is required.
  */
 static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
 						  struct kbase_mmu_table *mmut, phys_addr_t *pgds,
 						  u64 vpfn, int level,
-						  enum kbase_mmu_op_type flush_op, u64 *dirty_pgds);
+						  enum kbase_mmu_op_type flush_op, u64 *dirty_pgds,
+						  int as_nr);
 
 static void kbase_mmu_account_freed_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
 {
@@ -377,7 +667,7 @@ static bool kbase_mmu_handle_isolated_pgd_page(struct kbase_device *kbdev,
 		 * PGD page, which is done inside kbase_mmu_free_pgd() for the
 		 * PGD page that did not get isolated.
 		 */
-		dma_sync_single_for_device(kbdev->dev, kbase_dma_addr(p), PAGE_SIZE,
+		dma_sync_single_for_device(kbdev->dev, pgd_dma_addr(p, page_to_phys(p)), PAGE_SIZE,
 					   DMA_BIDIRECTIONAL);
 		kbase_mmu_account_freed_pgd(kbdev, mmut);
 	}
@@ -404,6 +694,20 @@ static void kbase_mmu_free_pgd(struct kbase_device *kbdev, struct kbase_mmu_tabl
 	lockdep_assert_held(&mmut->mmu_lock);
 
 	p = pfn_to_page(PFN_DOWN(pgd));
+#if GPU_PAGES_PER_CPU_PAGE > 1
+	if (free_pgd_sub_page(pgd)) {
+		mmut->num_free_pgd_sub_pages++;
+		mmut->last_freed_pgd_page = p;
+		return;
+	}
+
+	mmut->num_free_pgd_sub_pages -= (GPU_PAGES_PER_CPU_PAGE - 1);
+	if (p == mmut->last_freed_pgd_page)
+		mmut->last_freed_pgd_page = NULL;
+	if (p == mmut->last_allocated_pgd_page)
+		mmut->last_allocated_pgd_page = NULL;
+	free_pgd_page_metadata(kbdev, p);
+#endif
 	page_is_isolated = kbase_mmu_handle_isolated_pgd_page(kbdev, mmut, p);
 
 	if (likely(!page_is_isolated)) {
@@ -433,19 +737,19 @@ static void kbase_mmu_free_pgds_list(struct kbase_device *kbdev, struct kbase_mm
 	lockdep_assert_held(&mmut->mmu_lock);
 
 	for (i = 0; i < mmut->scratch_mem.free_pgds.head_index; i++)
-		kbase_mmu_free_pgd(kbdev, mmut, page_to_phys(mmut->scratch_mem.free_pgds.pgds[i]));
+		kbase_mmu_free_pgd(kbdev, mmut, mmut->scratch_mem.free_pgds.pgds[i]);
 
 	mmut->scratch_mem.free_pgds.head_index = 0;
 }
 
-static void kbase_mmu_add_to_free_pgds_list(struct kbase_mmu_table *mmut, struct page *p)
+static void kbase_mmu_add_to_free_pgds_list(struct kbase_mmu_table *mmut, phys_addr_t pgd)
 {
 	lockdep_assert_held(&mmut->mmu_lock);
 
 	if (WARN_ON_ONCE(mmut->scratch_mem.free_pgds.head_index > (MAX_FREE_PGDS - 1)))
 		return;
 
-	mmut->scratch_mem.free_pgds.pgds[mmut->scratch_mem.free_pgds.head_index++] = p;
+	mmut->scratch_mem.free_pgds.pgds[mmut->scratch_mem.free_pgds.head_index++] = pgd;
 }
 
 static inline void kbase_mmu_reset_free_pgds_list(struct kbase_mmu_table *mmut)
@@ -626,6 +930,14 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx,
 		return;
 	}
 
+	if (unlikely(region->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS)) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(
+			kctx, faulting_as, "Unexpected write permission fault on an alias region",
+			&faulting_as->pf_data);
+		return;
+	}
+
 	pfn_offset = fault_pfn - region->start_pfn;
 	fault_phys_addr = &kbase_get_gpu_phy_pages(region)[pfn_offset];
 
@@ -741,6 +1053,7 @@ static size_t estimate_pool_space_required(struct kbase_mem_pool *pool, const si
  *                 either small or 2 MiB pages, depending on the number of pages requested.
  * @grow_2mb_pool: Pointer to variable to store which pool needs to grow - true for 2 MiB, false for
  *                 pool of small pages.
+ * @fallback_to_small:  Whether fallback to small pages or not
  * @prealloc_sas:  Pointer to kbase_sub_alloc structures
  *
  * This function will try to allocate as many pages as possible from the context pool, then if
@@ -758,7 +1071,7 @@ static size_t estimate_pool_space_required(struct kbase_mem_pool *pool, const si
  */
 static bool page_fault_try_alloc(struct kbase_context *kctx, struct kbase_va_region *region,
 				 size_t new_pages, size_t *pages_to_grow, bool *grow_2mb_pool,
-				 struct kbase_sub_alloc **prealloc_sas)
+				 bool fallback_to_small, struct kbase_sub_alloc **prealloc_sas)
 {
 	size_t total_gpu_pages_alloced = 0;
 	size_t total_cpu_pages_alloced = 0;
@@ -776,7 +1089,8 @@ static bool page_fault_try_alloc(struct kbase_context *kctx, struct kbase_va_reg
 		return false;
 	}
 
-	if (kctx->kbdev->pagesize_2mb && new_pages >= NUM_PAGES_IN_2MB_LARGE_PAGE) {
+	if (kbase_is_large_pages_enabled() && new_pages >= NUM_PAGES_IN_2MB_LARGE_PAGE &&
+	    !fallback_to_small) {
 		root_pool = &kctx->mem_pools.large[region->gpu_alloc->group_id];
 		*grow_2mb_pool = true;
 	} else {
@@ -923,7 +1237,8 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
 	int err;
 	bool grown = false;
 	size_t pages_to_grow;
-	bool grow_2mb_pool;
+	bool grow_2mb_pool = false;
+	bool fallback_to_small = false;
 	struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL };
 	int i;
 	size_t current_backed_size;
@@ -964,13 +1279,11 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
 #endif
 #endif
 
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
 	/* check if we still have GPU */
 	if (unlikely(kbase_is_gpu_removed(kbdev))) {
 		dev_dbg(kbdev->dev, "%s: GPU has been removed", __func__);
 		goto fault_done;
 	}
-#endif
 
 	if (unlikely(fault->protected_mode)) {
 		kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Protected mode fault", fault);
@@ -1093,7 +1406,7 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
 	}
 
 page_fault_retry:
-	if (kbdev->pagesize_2mb) {
+	if (kbase_is_large_pages_enabled() && !fallback_to_small) {
 		/* Preallocate (or re-allocate) memory for the sub-allocation structs if necessary */
 		for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
 			if (!prealloc_sas[i]) {
@@ -1130,6 +1443,14 @@ page_fault_retry:
 		goto fault_done;
 	}
 
+	if (unlikely(region->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS)) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+						"Unexpected page fault on an alias region",
+						&faulting_as->pf_data);
+		goto fault_done;
+	}
+
 	if (region->gpu_alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) {
 		kbase_gpu_vm_unlock(kctx);
 		kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Bad physical memory group ID",
@@ -1180,10 +1501,14 @@ page_fault_retry:
 		 */
 		op_param.mmu_sync_info = mmu_sync_info;
 		op_param.kctx_id = kctx->id;
-		/* Can safely skip the invalidate for all levels in case
-		 * of duplicate page faults.
+		/* Usually it is safe to skip the MMU cache invalidate for all levels
+		 * in case of duplicate page faults. But for the pathological scenario
+		 * where the faulty VA gets mapped by the time page fault worker runs it
+		 * becomes imperative to invalidate MMU cache for all levels, otherwise
+		 * there is a possibility of repeated page faults on GPUs which supports
+		 * fine grained MMU cache invalidation.
 		 */
-		op_param.flush_skip_levels = 0xF;
+		op_param.flush_skip_levels = 0x0;
 		op_param.vpfn = fault_pfn;
 		op_param.nr = 1;
 		spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
@@ -1217,10 +1542,14 @@ page_fault_retry:
 		/* See comment [1] about UNLOCK usage */
 		op_param.mmu_sync_info = mmu_sync_info;
 		op_param.kctx_id = kctx->id;
-		/* Can safely skip the invalidate for all levels in case
-		 * of duplicate page faults.
+		/* Usually it is safe to skip the MMU cache invalidate for all levels
+		 * in case of duplicate page faults. But for the pathological scenario
+		 * where the faulty VA gets mapped by the time page fault worker runs it
+		 * becomes imperative to invalidate MMU cache for all levels, otherwise
+		 * there is a possibility of repeated page faults on GPUs which supports
+		 * fine grained MMU cache invalidation.
 		 */
-		op_param.flush_skip_levels = 0xF;
+		op_param.flush_skip_levels = 0x0;
 		op_param.vpfn = fault_pfn;
 		op_param.nr = 1;
 		spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
@@ -1249,7 +1578,7 @@ page_fault_retry:
 
 	spin_lock(&kctx->mem_partials_lock);
 	grown = page_fault_try_alloc(kctx, region, new_pages, &pages_to_grow, &grow_2mb_pool,
-				     prealloc_sas);
+				     fallback_to_small, prealloc_sas);
 	spin_unlock(&kctx->mem_partials_lock);
 
 	if (grown) {
@@ -1293,22 +1622,6 @@ page_fault_retry:
 		else
 			trace_mali_mmu_page_fault_grow(region, fault, new_pages);
 
-#if MALI_INCREMENTAL_RENDERING_JM
-		/* Switch to incremental rendering if we have nearly run out of
-		 * memory in a JIT memory allocation.
-		 */
-		if (region->threshold_pages &&
-		    kbase_reg_current_backed_size(region) > region->threshold_pages) {
-			dev_dbg(kctx->kbdev->dev, "%zu pages exceeded IR threshold %zu",
-				new_pages + current_backed_size, region->threshold_pages);
-
-			if (kbase_mmu_switch_to_ir(kctx, region) >= 0) {
-				dev_dbg(kctx->kbdev->dev, "Get region %pK for IR", (void *)region);
-				kbase_va_region_alloc_get(kctx, region);
-			}
-		}
-#endif
-
 		/* AS transaction begin */
 
 		/* clear MMU interrupt - this needs to be done after updating
@@ -1382,7 +1695,7 @@ page_fault_retry:
 		 * Otherwise fail the allocation.
 		 */
 		if (pages_to_grow > 0) {
-			if (kbdev->pagesize_2mb && grow_2mb_pool) {
+			if (kbase_is_large_pages_enabled() && grow_2mb_pool) {
 				/* Round page requirement up to nearest 2 MB */
 				struct kbase_mem_pool *const lp_mem_pool =
 					&kctx->mem_pools.large[group_id];
@@ -1392,6 +1705,15 @@ page_fault_retry:
 					lp_mem_pool->order;
 
 				ret = kbase_mem_pool_grow(lp_mem_pool, pages_to_grow, kctx->task);
+				/* Retry handling the fault with small pages if required
+				 * number of 2MB pages couldn't be allocated.
+				 */
+				if (ret < 0) {
+					fallback_to_small = true;
+					dev_dbg(kbdev->dev,
+						"No room for 2MB pages, fallback to small pages");
+					goto page_fault_retry;
+				}
 			} else {
 				struct kbase_mem_pool *const mem_pool =
 					&kctx->mem_pools.small[group_id];
@@ -1436,12 +1758,32 @@ fault_done:
 	dev_dbg(kbdev->dev, "Leaving page_fault_worker %pK", (void *)data);
 }
 
+/**
+ * kbase_mmu_alloc_pgd() - Allocate a PGD
+ *
+ * @kbdev:    Pointer to the instance of a kbase device.
+ * @mmut:     Structure holding details of the MMU table for a kcontext.
+ *
+ * A 4KB sized PGD page is allocated for the PGD from the memory pool if PAGE_SIZE is 4KB.
+ * Otherwise PGD is sub-allocated from a page that is allocated from the memory pool or
+ * from one of the pages earlier allocated for the PGD of @mmut.
+ *
+ * Return:    Physical address of the allocated PGD.
+ */
 static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
 {
 	u64 *page;
 	struct page *p;
 	phys_addr_t pgd;
 
+	lockdep_assert_held(&mmut->mmu_lock);
+
+#if GPU_PAGES_PER_CPU_PAGE > 1
+	pgd = allocate_from_pgd_pages_list(mmut);
+	if (pgd != KBASE_INVALID_PHYSICAL_ADDRESS)
+		return pgd;
+#endif
+
 	p = kbase_mem_pool_alloc(&kbdev->mem_pools.small[mmut->group_id]);
 	if (!p)
 		return KBASE_INVALID_PHYSICAL_ADDRESS;
@@ -1451,6 +1793,15 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, struct kbase_
 	if (page == NULL)
 		goto alloc_free;
 
+#if GPU_PAGES_PER_CPU_PAGE > 1
+	if (!alloc_pgd_page_metadata(kbdev, mmut, p)) {
+		kbase_kunmap(p, page);
+		goto alloc_free;
+	}
+	mmut->num_free_pgd_sub_pages += (GPU_PAGES_PER_CPU_PAGE - 1);
+	mmut->last_allocated_pgd_page = p;
+#endif
+
 	pgd = page_to_phys(p);
 
 	/* If the MMU tables belong to a context then account the memory usage
@@ -1469,12 +1820,12 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, struct kbase_
 
 	kbase_trace_gpu_mem_usage_inc(kbdev, mmut->kctx, 1);
 
-	kbdev->mmu_mode->entries_invalidate(page, KBASE_MMU_PAGE_ENTRIES);
+	kbdev->mmu_mode->entries_invalidate(page, KBASE_MMU_PAGE_ENTRIES * GPU_PAGES_PER_CPU_PAGE);
 
 	/* As this page is newly created, therefore there is no content to
 	 * clean or invalidate in the GPU caches.
 	 */
-	kbase_mmu_sync_pgd_cpu(kbdev, kbase_dma_addr(p), PAGE_SIZE);
+	kbase_mmu_sync_pgd_cpu(kbdev, pgd_dma_addr(p, pgd), PAGE_SIZE);
 
 	kbase_kunmap(p, page);
 	return pgd;
@@ -1516,7 +1867,7 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *
 	vpfn &= 0x1FF;
 
 	p = pfn_to_page(PFN_DOWN(*pgd));
-	page = kbase_kmap(p);
+	page = kmap_pgd(p, *pgd);
 	if (page == NULL) {
 		dev_err(kbdev->dev, "%s: kmap failure", __func__);
 		return -EINVAL;
@@ -1525,7 +1876,7 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *
 	if (!kbdev->mmu_mode->pte_is_valid(page[vpfn], level)) {
 		dev_dbg(kbdev->dev, "%s: invalid PTE at level %d vpfn 0x%llx", __func__, level,
 			vpfn);
-		kbase_kunmap(p, page);
+		kunmap_pgd(p, page);
 		return -EFAULT;
 	} else {
 		target_pgd = kbdev->mmu_mode->pte_to_phy_addr(
@@ -1533,7 +1884,7 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *
 				kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[vpfn]));
 	}
 
-	kbase_kunmap(p, page);
+	kunmap_pgd(p, page);
 	*pgd = target_pgd;
 
 	return 0;
@@ -1595,6 +1946,7 @@ static int mmu_get_lowest_valid_pgd(struct kbase_device *kbdev, struct kbase_mmu
 
 	return err;
 }
+KBASE_ALLOW_ERROR_INJECTION_TEST_API(mmu_get_lowest_valid_pgd, ERRNO);
 
 /*
  * On success, sets out_pgd to the PGD for the specified level of translation
@@ -1664,10 +2016,10 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
 		for (level = MIDGARD_MMU_TOPLEVEL; level <= MIDGARD_MMU_BOTTOMLEVEL; level++) {
 			idx = (vpfn >> ((3 - level) * 9)) & 0x1FF;
 			pgds[level] = pgd;
-			page = kbase_kmap(p);
+			page = kmap_pgd(p, pgd);
 			if (mmu_mode->ate_is_valid(page[idx], level))
 				break; /* keep the mapping */
-			kbase_kunmap(p, page);
+			kunmap_pgd(p, page);
 			pgd = mmu_mode->pte_to_phy_addr(kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
 				kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[idx]));
 			p = phys_to_page(pgd);
@@ -1700,12 +2052,21 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
 		mmu_mode->entries_invalidate(&page[idx], pcount);
 
 		if (!num_of_valid_entries) {
-			kbase_kunmap(p, page);
+			mmu_mode->set_num_valid_entries(page, 0);
 
-			kbase_mmu_add_to_free_pgds_list(mmut, p);
+			kunmap_pgd(p, page);
+
+			kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level - 1,
+							      KBASE_MMU_OP_NONE, dirty_pgds, 0);
+
+			/* No CPU and GPU cache maintenance is done here as caller would do the
+			 * complete flush of GPU cache and invalidation of TLB before the PGD
+			 * page is freed. CPU cache flush would be done when the PGD page is
+			 * returned to the memory pool.
+			 */
+
+			kbase_mmu_add_to_free_pgds_list(mmut, pgd);
 
-			kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level,
-							      KBASE_MMU_OP_NONE, dirty_pgds);
 			vpfn += count;
 			continue;
 		}
@@ -1716,9 +2077,9 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
 		 * going to be done by the caller
 		 */
 		kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (idx * sizeof(u64)),
-				   kbase_dma_addr(p) + sizeof(u64) * idx, sizeof(u64) * pcount,
+				   pgd_dma_addr(p, pgd) + sizeof(u64) * idx, sizeof(u64) * pcount,
 				   KBASE_MMU_OP_NONE);
-		kbase_kunmap(p, page);
+		kunmap_pgd(p, page);
 next:
 		vpfn += count;
 	}
@@ -1728,7 +2089,8 @@ next:
 	 * going to happen to these pages at this stage. They might return
 	 * movable once they are returned to a memory pool.
 	 */
-	if (kbase_is_page_migration_enabled() && !ignore_page_migration && phys) {
+	if (kbase_is_page_migration_enabled() && !ignore_page_migration && phys &&
+	    !is_huge(*phys) && !is_partial(*phys)) {
 		const u64 num_pages = (to_vpfn - from_vpfn) / GPU_PAGES_PER_CPU_PAGE;
 		u64 i;
 
@@ -1836,7 +2198,7 @@ static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table
 			goto failure_recovery;
 		}
 
-		parent_page_va = kbase_kmap(parent_page);
+		parent_page_va = kmap_pgd(parent_page, parent_pgd);
 
 		if (unlikely(parent_page_va == NULL)) {
 			dev_err(kbdev->dev, "%s: kmap failure", __func__);
@@ -1848,15 +2210,17 @@ static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table
 
 		kbdev->mmu_mode->entry_set_pte(&pte, target_pgd);
 		parent_page_va[parent_vpfn] = kbdev->mgm_dev->ops.mgm_update_gpu_pte(
-			kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, parent_index, pte);
+			kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, PBHA_ID_DEFAULT, PTE_FLAGS_NONE,
+			parent_index, pte);
 		kbdev->mmu_mode->set_num_valid_entries(parent_page_va, current_valid_entries + 1);
-		kbase_kunmap(parent_page, parent_page_va);
+		kunmap_pgd(parent_page, parent_page_va);
 
 		if (parent_index != insert_level) {
 			/* Newly allocated PGDs */
-			kbase_mmu_sync_pgd_cpu(
-				kbdev, kbase_dma_addr(parent_page) + (parent_vpfn * sizeof(u64)),
-				sizeof(u64));
+			kbase_mmu_sync_pgd_cpu(kbdev,
+					       pgd_dma_addr(parent_page, parent_pgd) +
+						       (parent_vpfn * sizeof(u64)),
+					       sizeof(u64));
 		} else {
 			/* A new valid entry is added to an existing PGD. Perform the
 			 * invalidate operation for GPU cache as it could be having a
@@ -1864,7 +2228,7 @@ static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table
 			 */
 			kbase_mmu_sync_pgd(
 				kbdev, mmut->kctx, parent_pgd + (parent_vpfn * sizeof(u64)),
-				kbase_dma_addr(parent_page) + (parent_vpfn * sizeof(u64)),
+				pgd_dma_addr(parent_page, parent_pgd) + (parent_vpfn * sizeof(u64)),
 				sizeof(u64), KBASE_MMU_OP_FLUSH_PT);
 		}
 
@@ -1875,6 +2239,9 @@ static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table
 
 			spin_lock(&page_md->migrate_lock);
 
+#if GPU_PAGES_PER_CPU_PAGE > 1
+			page_md->status = PAGE_STATUS_SET(page_md->status, NOT_MOVABLE);
+#else
 			WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != ALLOCATE_IN_PROGRESS ||
 				     IS_PAGE_ISOLATED(page_md->status));
 
@@ -1886,6 +2253,7 @@ static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table
 			} else {
 				page_md->status = PAGE_STATUS_SET(page_md->status, NOT_MOVABLE);
 			}
+#endif
 
 			spin_unlock(&page_md->migrate_lock);
 		}
@@ -1898,11 +2266,11 @@ failure_recovery:
 	for (; pgd_index < cur_level; pgd_index++) {
 		phys_addr_t pgd = pgds_to_insert[pgd_index];
 		struct page *pgd_page = pfn_to_page(PFN_DOWN(pgd));
-		u64 *pgd_page_va = kbase_kmap(pgd_page);
+		u64 *pgd_page_va = kmap_pgd(pgd_page, pgd);
 		u64 vpfn = (insert_vpfn >> ((3 - pgd_index) * 9)) & 0x1FF;
 
 		kbdev->mmu_mode->entries_invalidate(&pgd_page_va[vpfn], 1);
-		kbase_kunmap(pgd_page, pgd_page_va);
+		kunmap_pgd(pgd_page, pgd_page_va);
 	}
 
 	return err;
@@ -1918,6 +2286,8 @@ failure_recovery:
  * @level_high: The higher bound for the levels for which the PGD allocs are required
  * @new_pgds:   Ptr to an array (size MIDGARD_MMU_BOTTOMLEVEL+1) to write the
  *              newly allocated PGD addresses to.
+ * @pool_grown: True if new PGDs required the memory pool to grow to allocate more pages,
+ *              or false otherwise
  *
  * Numerically, level_low < level_high, not to be confused with top level and
  * bottom level concepts for MMU PGDs. They are only used as low and high bounds
@@ -1928,19 +2298,22 @@ failure_recovery:
  * * -ENOMEM - allocation failed for a PGD.
  */
 static int mmu_insert_alloc_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
-				 phys_addr_t *new_pgds, int level_low, int level_high)
+				 phys_addr_t *new_pgds, int level_low, int level_high,
+				 bool *pool_grown)
 {
 	int err = 0;
 	int i;
 
 	lockdep_assert_held(&mmut->mmu_lock);
 
+	*pool_grown = false;
 	for (i = level_low; i <= level_high; i++) {
+		if (new_pgds[i] != KBASE_INVALID_PHYSICAL_ADDRESS)
+			continue;
 		do {
 			new_pgds[i] = kbase_mmu_alloc_pgd(kbdev, mmut);
 			if (new_pgds[i] != KBASE_INVALID_PHYSICAL_ADDRESS)
 				break;
-
 			mutex_unlock(&mmut->mmu_lock);
 			err = kbase_mem_pool_grow(&kbdev->mem_pools.small[mmut->group_id],
 						  (size_t)level_high, NULL);
@@ -1948,17 +2321,9 @@ static int mmu_insert_alloc_pgds(struct kbase_device *kbdev, struct kbase_mmu_ta
 			if (err) {
 				dev_err(kbdev->dev, "%s: kbase_mem_pool_grow() returned error %d",
 					__func__, err);
-
-				/* Free all PGDs allocated in previous successful iterations
-				 * from (i-1) to level_low
-				 */
-				for (i = (i - 1); i >= level_low; i--) {
-					if (new_pgds[i] != KBASE_INVALID_PHYSICAL_ADDRESS)
-						kbase_mmu_free_pgd(kbdev, mmut, new_pgds[i]);
-				}
-
 				return err;
 			}
+			*pool_grown = true;
 		} while (1);
 	}
 
@@ -1988,6 +2353,8 @@ static int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vp
 	if (WARN_ON(kctx == NULL))
 		return -EINVAL;
 
+	lockdep_assert_held(&kctx->reg_lock);
+
 	/* 64-bit address range is the max */
 	KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE));
 
@@ -2023,6 +2390,7 @@ static int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vp
 		struct page *p;
 		register unsigned int num_of_valid_entries;
 		bool newly_created_pgd = false;
+		bool pool_grown;
 
 		if (count > remain)
 			count = remain;
@@ -2030,6 +2398,10 @@ static int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vp
 		cur_level = MIDGARD_MMU_BOTTOMLEVEL;
 		insert_level = cur_level;
 
+		for (l = MIDGARD_MMU_TOPLEVEL + 1; l <= cur_level; l++)
+			new_pgds[l] = KBASE_INVALID_PHYSICAL_ADDRESS;
+
+repeat_page_table_walk:
 		/*
 		 * Repeatedly calling mmu_get_lowest_valid_pgd() is clearly
 		 * suboptimal. We don't have to re-parse the whole tree
@@ -2044,7 +2416,7 @@ static int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vp
 		if (err) {
 			dev_err(kbdev->dev, "%s: mmu_get_lowest_valid_pgd() returned error %d",
 				__func__, err);
-			goto fail_unlock;
+			goto fail_unlock_free_pgds;
 		}
 
 		/* No valid pgd at cur_level */
@@ -2053,9 +2425,12 @@ static int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vp
 			 * down to the lowest valid pgd at insert_level
 			 */
 			err = mmu_insert_alloc_pgds(kbdev, mmut, new_pgds, (insert_level + 1),
-						    cur_level);
+						    cur_level, &pool_grown);
 			if (err)
-				goto fail_unlock;
+				goto fail_unlock_free_pgds;
+
+			if (pool_grown)
+				goto repeat_page_table_walk;
 
 			newly_created_pgd = true;
 
@@ -2070,7 +2445,7 @@ static int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vp
 
 		p = pfn_to_page(PFN_DOWN(pgd));
 
-		pgd_page = kbase_kmap(p);
+		pgd_page = kmap_pgd(p, pgd);
 		if (!pgd_page) {
 			dev_err(kbdev->dev, "%s: kmap failure", __func__);
 			err = -ENOMEM;
@@ -2109,8 +2484,8 @@ static int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vp
 		flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT;
 
 		kbase_mmu_sync_pgd(kbdev, kctx, pgd + (vindex * sizeof(u64)),
-				   kbase_dma_addr(p) + (vindex * sizeof(u64)), count * sizeof(u64),
-				   flush_op);
+				   pgd_dma_addr(p, pgd) + (vindex * sizeof(u64)),
+				   count * sizeof(u64), flush_op);
 
 		if (newly_created_pgd) {
 			err = update_parent_pgds(kbdev, mmut, cur_level, insert_level, insert_vpfn,
@@ -2121,14 +2496,14 @@ static int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vp
 
 				kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count);
 
-				kbase_kunmap(p, pgd_page);
+				kunmap_pgd(p, pgd_page);
 				goto fail_unlock_free_pgds;
 			}
 		}
 
 		insert_vpfn += count;
 		remain -= count;
-		kbase_kunmap(p, pgd_page);
+		kunmap_pgd(p, pgd_page);
 	}
 
 	mutex_unlock(&mmut->mmu_lock);
@@ -2141,9 +2516,9 @@ static int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vp
 fail_unlock_free_pgds:
 	/* Free the pgds allocated by us from insert_level+1 to bottom level */
 	for (l = cur_level; l > insert_level; l--)
-		kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]);
+		if (new_pgds[l] != KBASE_INVALID_PHYSICAL_ADDRESS)
+			kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]);
 
-fail_unlock:
 	if (insert_vpfn != (start_vpfn * GPU_PAGES_PER_CPU_PAGE)) {
 		/* Invalidate the pages we have partially completed */
 		mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn * GPU_PAGES_PER_CPU_PAGE,
@@ -2267,10 +2642,15 @@ u64 kbase_mmu_create_ate(struct kbase_device *const kbdev, struct tagged_addr co
 			 unsigned long const flags, int const level, int const group_id)
 {
 	u64 entry;
+	unsigned int pte_flags = 0;
 
 	kbdev->mmu_mode->entry_set_ate(&entry, phy, flags, level);
-	return kbdev->mgm_dev->ops.mgm_update_gpu_pte(kbdev->mgm_dev, (unsigned int)group_id, level,
-						      entry);
+
+	if ((flags & KBASE_REG_GPU_CACHED) && !(flags & KBASE_REG_CPU_CACHED))
+		pte_flags |= BIT(MMA_VIOLATION);
+
+	return kbdev->mgm_dev->ops.mgm_update_gpu_pte(kbdev->mgm_dev, (unsigned int)group_id,
+						      kbdev->mma_wa_id, pte_flags, level, entry);
 }
 
 static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
@@ -2289,6 +2669,9 @@ static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mm
 	int l, cur_level, insert_level;
 	struct tagged_addr *start_phys = phys;
 
+	if (mmut->kctx)
+		lockdep_assert_held(&mmut->kctx->reg_lock);
+
 	/* Note that 0 is a valid start_vpfn */
 	/* 64-bit address range is the max */
 	KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE));
@@ -2311,17 +2694,30 @@ static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mm
 		register unsigned int num_of_valid_entries;
 		bool newly_created_pgd = false;
 		enum kbase_mmu_op_type flush_op;
+		bool pool_grown;
 
 		if (count > remain)
 			count = remain;
 
-		if (!vindex && is_huge_head(*phys))
+		/* There are 3 conditions to satisfy in order to create a level 2 ATE:
+		 *
+		 * - The GPU VA is aligned to 2 MB.
+		 * - The physical address is tagged as the head of a 2 MB region,
+		 *   which guarantees a contiguous physical address range.
+		 * - There are actually 2 MB of virtual and physical pages to map,
+		 *   i.e. 512 entries for the MMU page table.
+		 */
+		if (!vindex && is_huge_head(*phys) && (count == KBASE_MMU_PAGE_ENTRIES))
 			cur_level = MIDGARD_MMU_LEVEL(2);
 		else
 			cur_level = MIDGARD_MMU_BOTTOMLEVEL;
 
 		insert_level = cur_level;
 
+		for (l = MIDGARD_MMU_TOPLEVEL + 1; l <= cur_level; l++)
+			new_pgds[l] = KBASE_INVALID_PHYSICAL_ADDRESS;
+
+repeat_page_table_walk:
 		/*
 		 * Repeatedly calling mmu_get_lowest_valid_pgd() is clearly
 		 * suboptimal. We don't have to re-parse the whole tree
@@ -2336,7 +2732,7 @@ static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mm
 		if (err) {
 			dev_err(kbdev->dev, "%s: mmu_get_lowest_valid_pgd() returned error %d",
 				__func__, err);
-			goto fail_unlock;
+			goto fail_unlock_free_pgds;
 		}
 
 		/* No valid pgd at cur_level */
@@ -2345,9 +2741,12 @@ static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mm
 			 * down to the lowest valid pgd at insert_level
 			 */
 			err = mmu_insert_alloc_pgds(kbdev, mmut, new_pgds, (insert_level + 1),
-						    cur_level);
+						    cur_level, &pool_grown);
 			if (err)
-				goto fail_unlock;
+				goto fail_unlock_free_pgds;
+
+			if (pool_grown)
+				goto repeat_page_table_walk;
 
 			newly_created_pgd = true;
 
@@ -2361,7 +2760,7 @@ static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mm
 		}
 
 		p = pfn_to_page(PFN_DOWN(pgd));
-		pgd_page = kbase_kmap(p);
+		pgd_page = kmap_pgd(p, pgd);
 
 		if (!pgd_page) {
 			dev_err(kbdev->dev, "%s: kmap failure", __func__);
@@ -2431,8 +2830,8 @@ static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mm
 		flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT;
 
 		kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (vindex * sizeof(u64)),
-				   kbase_dma_addr(p) + (vindex * sizeof(u64)), count * sizeof(u64),
-				   flush_op);
+				   pgd_dma_addr(p, pgd) + (vindex * sizeof(u64)),
+				   count * sizeof(u64), flush_op);
 
 		if (newly_created_pgd) {
 			err = update_parent_pgds(kbdev, mmut, cur_level, insert_level, insert_vpfn,
@@ -2443,7 +2842,7 @@ static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mm
 
 				kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count);
 
-				kbase_kunmap(p, pgd_page);
+				kunmap_pgd(p, pgd_page);
 				goto fail_unlock_free_pgds;
 			}
 		}
@@ -2451,7 +2850,7 @@ static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mm
 		phys += (count / GPU_PAGES_PER_CPU_PAGE);
 		insert_vpfn += count;
 		remain -= count;
-		kbase_kunmap(p, pgd_page);
+		kunmap_pgd(p, pgd_page);
 	}
 
 	mutex_unlock(&mmut->mmu_lock);
@@ -2461,9 +2860,9 @@ static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mm
 fail_unlock_free_pgds:
 	/* Free the pgds allocated by us from insert_level+1 to bottom level */
 	for (l = cur_level; l > insert_level; l--)
-		kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]);
+		if (new_pgds[l] != KBASE_INVALID_PHYSICAL_ADDRESS)
+			kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]);
 
-fail_unlock:
 	if (insert_vpfn != (start_vpfn * GPU_PAGES_PER_CPU_PAGE)) {
 		/* Invalidate the pages we have partially completed */
 		mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn * GPU_PAGES_PER_CPU_PAGE,
@@ -2525,6 +2924,7 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *m
 }
 
 KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
+KBASE_ALLOW_ERROR_INJECTION_TEST_API(kbase_mmu_insert_pages, ERRNO);
 
 int kbase_mmu_insert_pages_skip_status_update(struct kbase_device *kbdev,
 					      struct kbase_mmu_table *mmut, u64 vpfn,
@@ -2582,6 +2982,7 @@ int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_
 
 	return 0;
 }
+KBASE_ALLOW_ERROR_INJECTION_TEST_API(kbase_mmu_insert_aliased_pages, ERRNO);
 
 void kbase_mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, int as_nr)
 {
@@ -2699,50 +3100,66 @@ KBASE_EXPORT_TEST_API(kbase_mmu_disable);
 static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
 						  struct kbase_mmu_table *mmut, phys_addr_t *pgds,
 						  u64 vpfn, int level,
-						  enum kbase_mmu_op_type flush_op, u64 *dirty_pgds)
+						  enum kbase_mmu_op_type flush_op, u64 *dirty_pgds,
+						  int as_nr)
 {
-	int current_level;
+	phys_addr_t current_pgd = pgds[level];
+	struct page *p = phys_to_page(current_pgd);
+	u64 *current_page = kmap_pgd(p, current_pgd);
+	unsigned int current_valid_entries = kbdev->mmu_mode->get_num_valid_entries(current_page);
+	unsigned int index = (vpfn >> ((3 - level) * 9)) & 0x1FFU;
 
 	lockdep_assert_held(&mmut->mmu_lock);
 
-	for (current_level = level - 1; current_level >= MIDGARD_MMU_LEVEL(0); current_level--) {
-		phys_addr_t current_pgd = pgds[current_level];
-		struct page *p = phys_to_page(current_pgd);
+	/* We need to track every level that needs updating */
+	if (dirty_pgds)
+		*dirty_pgds |= 1ULL << level;
 
-		u64 *current_page = kbase_kmap(p);
-		unsigned int current_valid_entries =
-			kbdev->mmu_mode->get_num_valid_entries(current_page);
-		unsigned int index = (vpfn >> ((3 - current_level) * 9)) & 0x1FFU;
+	kbdev->mmu_mode->entries_invalidate(&current_page[index], 1);
+	if (current_valid_entries == 1 && level != MIDGARD_MMU_LEVEL(0)) {
+		kbdev->mmu_mode->set_num_valid_entries(current_page, 0);
 
-		/* We need to track every level that needs updating */
-		if (dirty_pgds)
-			*dirty_pgds |= 1ULL << current_level;
+		kunmap_pgd(p, current_page);
 
-		kbdev->mmu_mode->entries_invalidate(&current_page[index], 1);
-		if (current_valid_entries == 1 && current_level != MIDGARD_MMU_LEVEL(0)) {
-			kbase_kunmap(p, current_page);
+		kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level - 1, flush_op,
+						      dirty_pgds, as_nr);
 
-			/* Ensure the cacheline containing the last valid entry
-			 * of PGD is invalidated from the GPU cache, before the
-			 * PGD page is freed.
+		/* Check if fine grained GPU cache maintenance is being used */
+		if (flush_op == KBASE_MMU_OP_FLUSH_PT) {
+			/* Ensure the invalidated PTE is visible in memory right away */
+			kbase_mmu_sync_pgd_cpu(kbdev,
+					       pgd_dma_addr(p, current_pgd) + (index * sizeof(u64)),
+					       sizeof(u64));
+			/* Invalidate the GPU cache for the whole PGD page and not just for
+			 * the cacheline containing the invalidated PTE, as the PGD page is
+			 * going to be freed. There is an extremely remote possibility that
+			 * other cachelines (containing all invalid PTEs) of PGD page are
+			 * also present in the GPU cache.
 			 */
-			kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx,
-					       current_pgd + (index * sizeof(u64)), sizeof(u64),
-					       flush_op);
-
-			kbase_mmu_add_to_free_pgds_list(mmut, p);
-		} else {
-			current_valid_entries--;
-
-			kbdev->mmu_mode->set_num_valid_entries(current_page, current_valid_entries);
-
-			kbase_kunmap(p, current_page);
-
-			kbase_mmu_sync_pgd(kbdev, mmut->kctx, current_pgd + (index * sizeof(u64)),
-					   kbase_dma_addr(p) + (index * sizeof(u64)), sizeof(u64),
-					   flush_op);
-			break;
+			kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, current_pgd, 512 * sizeof(u64),
+					       KBASE_MMU_OP_FLUSH_PT);
 		}
+
+		kbase_mmu_add_to_free_pgds_list(mmut, current_pgd);
+	} else {
+		current_valid_entries--;
+
+		kbdev->mmu_mode->set_num_valid_entries(current_page, current_valid_entries);
+
+		kunmap_pgd(p, current_page);
+
+		kbase_mmu_sync_pgd(kbdev, mmut->kctx, current_pgd + (index * sizeof(u64)),
+				   pgd_dma_addr(p, current_pgd) + (index * sizeof(u64)),
+				   sizeof(u64), flush_op);
+
+		/* When fine grained GPU cache maintenance is used then invalidate the MMU caches
+		 * now as the top most level PGD entry, affected by the teardown operation, has
+		 * been invalidated (both in memory as well as in GPU L2 cache). This is to avoid
+		 * the possibility of invalid ATEs being reloaded into the GPU L2 cache whilst the
+		 * teardown is happening.
+		 */
+		if (flush_op == KBASE_MMU_OP_FLUSH_PT)
+			mmu_invalidate_on_teardown(kbdev, mmut->kctx, vpfn, 1, level, as_nr);
 	}
 }
 
@@ -2783,13 +3200,11 @@ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev,
 	}
 #if MALI_USE_CSF
 	else {
-		/* Partial GPU cache flush with MMU cache invalidation */
+		/* Partial GPU cache flush of the pages that were unmapped */
 		unsigned long irq_flags;
 		unsigned int i;
 		bool flush_done = false;
 
-		mmu_invalidate(kbdev, kctx, as_nr, op_param);
-
 		for (i = 0; !flush_done && i < phys_page_nr; i++) {
 			spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
 			if (kbdev->pm.backend.gpu_ready && (!kctx || kctx->as_nr >= 0))
@@ -2809,7 +3224,7 @@ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev,
 static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
 					u64 vpfn, size_t nr, u64 *dirty_pgds,
 					struct list_head *free_pgds_list,
-					enum kbase_mmu_op_type flush_op)
+					enum kbase_mmu_op_type flush_op, int as_nr)
 {
 	struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode;
 
@@ -2832,41 +3247,29 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase
 		phys_addr_t pgd = mmut->pgd;
 		struct page *p = phys_to_page(pgd);
 
-		if (count > nr)
-			count = nr;
+		count = MIN(nr, count);
 
 		/* need to check if this is a 2MB page or a small page */
 		for (level = MIDGARD_MMU_TOPLEVEL; level <= MIDGARD_MMU_BOTTOMLEVEL; level++) {
 			phys_addr_t next_pgd;
 
 			index = (vpfn >> ((3 - level) * 9)) & 0x1FF;
-			page = kbase_kmap(p);
+			page = kmap_pgd(p, pgd);
 			if (mmu_mode->ate_is_valid(page[index], level))
 				break; /* keep the mapping */
 			else if (!mmu_mode->pte_is_valid(page[index], level)) {
-				/* nothing here, advance */
-				switch (level) {
-				case MIDGARD_MMU_LEVEL(0):
-					count = 134217728;
-					break;
-				case MIDGARD_MMU_LEVEL(1):
-					count = 262144;
-					break;
-				case MIDGARD_MMU_LEVEL(2):
-					count = 512;
-					break;
-				case MIDGARD_MMU_LEVEL(3):
-					count = 1;
-					break;
-				}
-				if (count > nr)
-					count = nr;
+				dev_warn(kbdev->dev, "Invalid PTE found @ level %d for VA %llx",
+					 level, vpfn << PAGE_SHIFT);
+				/* nothing here, advance to the next PTE of the current level */
+				count = (1 << ((3 - level) * 9));
+				count -= (vpfn & (count - 1));
+				count = MIN(nr, count);
 				goto next;
 			}
 			next_pgd = mmu_mode->pte_to_phy_addr(
 				kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
 					kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[index]));
-			kbase_kunmap(p, page);
+			kunmap_pgd(p, page);
 			pgds[level] = pgd;
 			pgd = next_pgd;
 			p = phys_to_page(pgd);
@@ -2877,7 +3280,7 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase
 		case MIDGARD_MMU_LEVEL(1):
 			dev_warn(kbdev->dev, "%s: No support for ATEs at level %d", __func__,
 				 level);
-			kbase_kunmap(p, page);
+			kunmap_pgd(p, page);
 			goto out;
 		case MIDGARD_MMU_LEVEL(2):
 			/* can only teardown if count >= 512 */
@@ -2915,19 +3318,36 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase
 		mmu_mode->entries_invalidate(&page[index], pcount);
 
 		if (!num_of_valid_entries) {
-			kbase_kunmap(p, page);
+			mmu_mode->set_num_valid_entries(page, 0);
 
-			/* Ensure the cacheline(s) containing the last valid entries
-			 * of PGD is invalidated from the GPU cache, before the
-			 * PGD page is freed.
+			kunmap_pgd(p, page);
+
+			/* To avoid the invalid ATEs from the PGD page (that is going to be freed)
+			 * from getting reloaded into the GPU L2 cache whilst the teardown is
+			 * happening, the fine grained GPU L2 cache maintenance is done in the top
+			 * to bottom level PGD order. MMU cache invalidation is done after
+			 * invalidating the entry of top most level PGD, affected by the teardown.
 			 */
-			kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, pgd + (index * sizeof(u64)),
-					       pcount * sizeof(u64), flush_op);
+			kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level - 1,
+							      flush_op, dirty_pgds, as_nr);
 
-			kbase_mmu_add_to_free_pgds_list(mmut, p);
+			/* Check if fine grained GPU cache maintenance is being used */
+			if (flush_op == KBASE_MMU_OP_FLUSH_PT) {
+				/* Ensure the invalidated ATEs are visible in memory right away */
+				kbase_mmu_sync_pgd_cpu(kbdev,
+						       pgd_dma_addr(p, pgd) + (index * sizeof(u64)),
+						       pcount * sizeof(u64));
+				/* Invalidate the GPU cache for the whole PGD page and not just for
+				 * the cachelines containing the invalidated ATEs, as the PGD page
+				 * is going to be freed. There is an extremely remote possibility
+				 * that other cachelines (containing all invalid ATEs) of PGD page
+				 * are also present in the GPU cache.
+				 */
+				kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, pgd, 512 * sizeof(u64),
+						       KBASE_MMU_OP_FLUSH_PT);
+			}
 
-			kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level,
-							      flush_op, dirty_pgds);
+			kbase_mmu_add_to_free_pgds_list(mmut, pgd);
 
 			vpfn += count;
 			nr -= count;
@@ -2937,10 +3357,16 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase
 		mmu_mode->set_num_valid_entries(page, num_of_valid_entries);
 
 		kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)),
-				   kbase_dma_addr(p) + (index * sizeof(u64)), pcount * sizeof(u64),
-				   flush_op);
+				   pgd_dma_addr(p, pgd) + (index * sizeof(u64)),
+				   pcount * sizeof(u64), flush_op);
+
+		/* When fine grained GPU cache maintenance is used then invalidation of MMU cache
+		 * is done inline for every bottom level PGD touched in the teardown.
+		 */
+		if (flush_op == KBASE_MMU_OP_FLUSH_PT)
+			mmu_invalidate_on_teardown(kbdev, mmut->kctx, vpfn, pcount, level, as_nr);
 next:
-		kbase_kunmap(p, page);
+		kunmap_pgd(p, page);
 		vpfn += count;
 		nr -= count;
 	}
@@ -3032,7 +3458,7 @@ static int mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table
 	mutex_lock(&mmut->mmu_lock);
 
 	err = kbase_mmu_teardown_pgd_pages(kbdev, mmut, vpfn, nr_virt_pages, &dirty_pgds,
-					   &free_pgds_list, flush_op);
+					   &free_pgds_list, flush_op, as_nr);
 
 	/* Set up MMU operation parameters. See above about MMU cache flush strategy. */
 	op_param = (struct kbase_mmu_hw_op_param){
@@ -3069,6 +3495,7 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table
 	return mmu_teardown_pages(kbdev, mmut, vpfn, phys, nr_phys_pages, nr_virt_pages, as_nr,
 				  false);
 }
+KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
 
 int kbase_mmu_teardown_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
 				      u64 vpfn, struct tagged_addr *phys, size_t nr_phys_pages,
@@ -3144,7 +3571,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kb
 			goto fail_unlock;
 
 		p = pfn_to_page(PFN_DOWN(pgd));
-		pgd_page = kbase_kmap(p);
+		pgd_page = kmap_pgd(p, pgd);
 		if (!pgd_page) {
 			dev_warn(kbdev->dev, "kmap failure on update_pages");
 			err = -ENOMEM;
@@ -3164,7 +3591,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kb
 			pgd_page[level_index] = kbase_mmu_create_ate(
 				kbdev, *target_phys, flags, MIDGARD_MMU_LEVEL(2), group_id);
 			kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (level_index * sizeof(u64)),
-					   kbase_dma_addr(p) + (level_index * sizeof(u64)),
+					   pgd_dma_addr(p, pgd) + (level_index * sizeof(u64)),
 					   sizeof(u64), KBASE_MMU_OP_NONE);
 		} else {
 			for (i = 0; i < count; i += GPU_PAGES_PER_CPU_PAGE) {
@@ -3189,7 +3616,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kb
 			 * will be done by the caller.
 			 */
 			kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)),
-					   kbase_dma_addr(p) + (index * sizeof(u64)),
+					   pgd_dma_addr(p, pgd) + (index * sizeof(u64)),
 					   count * sizeof(u64), KBASE_MMU_OP_NONE);
 		}
 
@@ -3202,7 +3629,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kb
 		vpfn += count;
 		nr -= count;
 
-		kbase_kunmap(p, pgd_page);
+		kunmap_pgd(p, pgd_page);
 	}
 
 	mutex_unlock(&mmut->mmu_lock);
@@ -3438,7 +3865,7 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p
 		goto get_pgd_at_level_error;
 	}
 
-	pgd_page = kbase_kmap(phys_to_page(pgd));
+	pgd_page = kmap_pgd(phys_to_page(pgd), pgd);
 	if (!pgd_page) {
 		dev_warn(kbdev->dev, "%s: kmap failure for PGD page.", __func__);
 		ret = -EINVAL;
@@ -3547,8 +3974,10 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p
 				kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, pgd_page[index])));
 #endif
 		kbdev->mmu_mode->entry_set_pte(&managed_pte, as_phys_addr_t(new_phys));
-		*target = kbdev->mgm_dev->ops.mgm_update_gpu_pte(
-			kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, managed_pte);
+		*target = kbdev->mgm_dev->ops.mgm_update_gpu_pte(kbdev->mgm_dev,
+								 MGM_DEFAULT_PTE_GROUP,
+								 PBHA_ID_DEFAULT, PTE_FLAGS_NONE,
+								 level, managed_pte);
 	}
 
 	kbdev->mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries);
@@ -3559,55 +3988,64 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p
 	 * maintenance is necessary.
 	 */
 	kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)),
-			   kbase_dma_addr(phys_to_page(pgd)) + (index * sizeof(u64)),
+			   pgd_dma_addr(phys_to_page(pgd), pgd) + (index * sizeof(u64)),
 			   pgd_entries_to_sync * sizeof(u64), KBASE_MMU_OP_FLUSH_PT);
 
 	/* Unlock MMU region.
 	 *
-	 * Notice that GPUs which don't issue flush commands via GPU control
-	 * still need an additional GPU cache flush here, this time only
-	 * for the page table, because the function call above to sync PGDs
-	 * won't have any effect on them.
+	 * For GPUs without FLUSH_PA_RANGE support, the GPU caches were completely
+	 * cleaned and invalidated after locking the virtual address range affected
+	 * by the migration. As long as the lock is in place, GPU access to the
+	 * locked range would remain blocked. So there is no need to clean and
+	 * invalidate the GPU caches again after the copying the page contents
+	 * of old page and updating the page table entry to point to new page.
+	 *
+	 * For GPUs with FLUSH_PA_RANGE support, the contents of old page would
+	 * have been evicted from the GPU caches after locking the virtual address
+	 * range. The page table entry contents also would have been invalidated
+	 * from the GPU's L2 cache by kbase_mmu_sync_pgd() after the page table
+	 * update.
+	 *
+	 * If kbase_mmu_hw_do_unlock_no_addr() fails, GPU reset will be triggered which
+	 * would remove the MMU lock and so there is no need to rollback page migration
+	 * and the failure can be ignored.
 	 */
 	spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
 	if (kbdev->pm.backend.gpu_ready && mmut->kctx->as_nr >= 0) {
 		int as_nr = mmut->kctx->as_nr;
 		struct kbase_as *as = &kbdev->as[as_nr];
+		int local_ret = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, &op_param);
 
-		if (mmu_flush_cache_on_gpu_ctrl(kbdev)) {
-			ret = kbase_mmu_hw_do_unlock(kbdev, as, &op_param);
-		} else {
-			ret = kbase_gpu_cache_flush_and_busy_wait(kbdev,
-								  GPU_COMMAND_CACHE_CLN_INV_L2);
-			if (!ret)
-				ret = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, &op_param);
-		}
+		CSTD_UNUSED(local_ret);
 	}
+
+	/* Release the transition prevention in L2 by ending the transaction */
+	mmu_page_migration_transaction_end(kbdev);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
 	/* Releasing locks before checking the migration transaction error state */
 	mutex_unlock(&kbdev->mmu_hw_mutex);
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
-	/* Release the transition prevention in L2 by ending the transaction */
-	mmu_page_migration_transaction_end(kbdev);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
-
-	/* Checking the final migration transaction error state */
-	if (ret < 0) {
-		dev_err(kbdev->dev, "%s: failed to unlock MMU region.", __func__);
-		goto undo_mappings;
-	}
-
 	/* Undertaking metadata transfer, while we are holding the mmu_lock */
 	spin_lock(&page_md->migrate_lock);
 	if (level == MIDGARD_MMU_BOTTOMLEVEL) {
-		size_t page_array_index = (page_md->data.mapped.vpfn / GPU_PAGES_PER_CPU_PAGE) -
-					  page_md->data.mapped.reg->start_pfn;
+		enum kbase_page_status page_status = PAGE_STATUS_GET(page_md->status);
 
-		WARN_ON(PAGE_STATUS_GET(page_md->status) != ALLOCATED_MAPPED);
+		if (page_status == ALLOCATED_MAPPED) {
+			/* Replace page in array of pages of the physical allocation. */
+			size_t page_array_index =
+				div_u64(page_md->data.mapped.vpfn, GPU_PAGES_PER_CPU_PAGE) -
+				page_md->data.mapped.reg->start_pfn;
 
-		/* Replace page in array of pages of the physical allocation. */
-		page_md->data.mapped.reg->gpu_alloc->pages[page_array_index] = new_phys;
+			page_md->data.mapped.reg->gpu_alloc->pages[page_array_index] = new_phys;
+		} else if (page_status == NOT_MOVABLE) {
+			dev_dbg(kbdev->dev,
+				"%s: migration completed and page has become NOT_MOVABLE.",
+				__func__);
+		} else {
+			dev_WARN(kbdev->dev,
+				 "%s: migration completed but page has moved to status %d.",
+				 __func__, page_status);
+		}
 	}
 	/* Update the new page dma_addr with the transferred metadata from the old_page */
 	page_md->dma_addr = new_dma_addr;
@@ -3618,7 +4056,7 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p
 	set_page_private(as_page(old_phys), 0);
 
 l2_state_defer_out:
-	kbase_kunmap(phys_to_page(pgd), pgd_page);
+	kunmap_pgd(phys_to_page(pgd), pgd_page);
 pgd_page_map_error:
 get_pgd_at_level_error:
 page_state_change_out:
@@ -3633,7 +4071,7 @@ old_page_map_error:
 undo_mappings:
 	/* Unlock the MMU table and undo mappings. */
 	mutex_unlock(&mmut->mmu_lock);
-	kbase_kunmap(phys_to_page(pgd), pgd_page);
+	kunmap_pgd(phys_to_page(pgd), pgd_page);
 	kbase_kunmap(as_page(new_phys), new_page);
 	kbase_kunmap(as_page(old_phys), old_page);
 
@@ -3652,7 +4090,7 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_tabl
 
 	lockdep_assert_held(&mmut->mmu_lock);
 
-	pgd_page = kbase_kmap_atomic(p);
+	pgd_page = kmap_atomic_pgd(p, pgd);
 	/* kmap_atomic should NEVER fail. */
 	if (WARN_ON_ONCE(pgd_page == NULL))
 		return;
@@ -3661,7 +4099,7 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_tabl
 		 * kmap_atomic usage
 		 */
 		pgd_page_buffer = mmut->scratch_mem.teardown_pages.levels[level];
-		memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE);
+		memcpy(pgd_page_buffer, pgd_page, GPU_PAGE_SIZE);
 	}
 
 	/* When page migration is enabled, kbase_region_tracker_term() would ensure
@@ -3672,7 +4110,7 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_tabl
 		WARN_ON_ONCE(kbdev->mmu_mode->get_num_valid_entries(pgd_page));
 	/* Invalidate page after copying */
 	mmu_mode->entries_invalidate(pgd_page, KBASE_MMU_PAGE_ENTRIES);
-	kbase_kunmap_atomic(pgd_page);
+	kunmap_atomic_pgd(pgd_page);
 	pgd_page = pgd_page_buffer;
 
 	if (level < MIDGARD_MMU_BOTTOMLEVEL) {
@@ -3691,13 +4129,20 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_tabl
 	kbase_mmu_free_pgd(kbdev, mmut, pgd);
 }
 
-static void kbase_mmu_mark_non_movable(struct page *page)
+static void kbase_mmu_mark_non_movable(struct kbase_device *const kbdev, struct page *page)
 {
 	struct kbase_page_metadata *page_md;
 
 	if (!kbase_is_page_migration_enabled())
 		return;
 
+	/* Composite large-page is excluded from migration, trigger a warn if a development
+	 * wrongly leads to it.
+	 */
+	if (is_huge_head(as_tagged(page_to_phys(page))) ||
+	    is_partial(as_tagged(page_to_phys(page))))
+		dev_WARN(kbdev->dev, "%s: migration on large-page attempted.", __func__);
+
 	page_md = kbase_page_private(page);
 
 	spin_lock(&page_md->migrate_lock);
@@ -3725,6 +4170,10 @@ int kbase_mmu_init(struct kbase_device *const kbdev, struct kbase_mmu_table *con
 	mmut->kctx = kctx;
 	mmut->pgd = KBASE_INVALID_PHYSICAL_ADDRESS;
 
+#if GPU_PAGES_PER_CPU_PAGE > 1
+	INIT_LIST_HEAD(&mmut->pgd_pages_list);
+#endif
+
 	/* We allocate pages into the kbdev memory pool, then
 	 * kbase_mmu_alloc_pgd will allocate out of that pool. This is done to
 	 * avoid allocations from the kernel happening with the lock held.
@@ -3739,10 +4188,12 @@ int kbase_mmu_init(struct kbase_device *const kbdev, struct kbase_mmu_table *con
 			return -ENOMEM;
 		}
 
+		mutex_lock(&mmut->mmu_lock);
 		mmut->pgd = kbase_mmu_alloc_pgd(kbdev, mmut);
+		mutex_unlock(&mmut->mmu_lock);
 	}
 
-	kbase_mmu_mark_non_movable(pfn_to_page(PFN_DOWN(mmut->pgd)));
+	kbase_mmu_mark_non_movable(kbdev, pfn_to_page(PFN_DOWN(mmut->pgd)));
 	return 0;
 }
 
@@ -3800,6 +4251,7 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd,
 	size_t dump_size;
 	struct kbase_device *kbdev;
 	struct kbase_mmu_mode const *mmu_mode;
+	struct page *p;
 
 	if (WARN_ON(kctx == NULL))
 		return 0;
@@ -3808,7 +4260,8 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd,
 	kbdev = kctx->kbdev;
 	mmu_mode = kbdev->mmu_mode;
 
-	pgd_page = kbase_kmap(pfn_to_page(PFN_DOWN(pgd)));
+	p = pfn_to_page(PFN_DOWN(pgd));
+	pgd_page = kmap_pgd(p, pgd);
 	if (!pgd_page) {
 		dev_warn(kbdev->dev, "%s: kmap failure", __func__);
 		return 0;
@@ -3842,7 +4295,7 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd,
 				dump_size = kbasep_mmu_dump_level(kctx, target_pgd, level + 1,
 								  buffer, size_left);
 				if (!dump_size) {
-					kbase_kunmap(pfn_to_page(PFN_DOWN(pgd)), pgd_page);
+					kunmap_pgd(p, pgd_page);
 					return 0;
 				}
 				size += dump_size;
@@ -3850,7 +4303,7 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd,
 		}
 	}
 
-	kbase_kunmap(pfn_to_page(PFN_DOWN(pgd)), pgd_page);
+	kunmap_pgd(p, pgd_page);
 
 	return size;
 }
@@ -3956,7 +4409,6 @@ void kbase_mmu_bus_fault_worker(struct work_struct *data)
 		return;
 	}
 
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
 	/* check if we still have GPU */
 	if (unlikely(kbase_is_gpu_removed(kbdev))) {
 		dev_dbg(kbdev->dev, "%s: GPU has been removed", __func__);
@@ -3964,7 +4416,6 @@ void kbase_mmu_bus_fault_worker(struct work_struct *data)
 		atomic_dec(&kbdev->faults_pending);
 		return;
 	}
-#endif
 
 	if (unlikely(fault->protected_mode)) {
 		kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Permission failure", fault);
diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder.c
new file mode 100644
index 000000000000..548d88cf216e
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2024 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/**
+ * DOC: Base kernel MMU faults decoder.
+ */
+
+#include <mmu/mali_kbase_mmu_faults_decoder.h>
+#include <mmu/mali_kbase_mmu_faults_decoder_luts.h>
+#if MALI_USE_CSF
+#include <mmu/backend/mali_kbase_mmu_faults_decoder_luts_csf.h>
+#else
+#include <mmu/backend/mali_kbase_mmu_faults_decoder_luts_jm.h>
+#endif
+
+#include <hw_access/mali_kbase_hw_access_regmap.h>
+#include <mali_kbase.h>
+
+unsigned int fault_source_id_internal_requester_get(struct kbase_device *kbdev,
+						    unsigned int source_id)
+{
+	if (kbdev->gpu_props.gpu_id.product_model < GPU_ID_MODEL_MAKE(14, 0))
+		return ((source_id >> 4) & 0xF);
+	else
+		return (source_id & 0x3F);
+}
+
+static inline const char *source_id_enc_core_type_get_str(struct kbase_device *kbdev,
+							  unsigned int source_id)
+{
+	if (kbdev->gpu_props.gpu_id.product_model < GPU_ID_MODEL_MAKE(14, 0))
+		return decode_fault_source_core_id_t_core_type(
+			FAULT_SOURCE_ID_CORE_ID_GET(source_id), kbdev->gpu_props.gpu_id.arch_id);
+	else
+		return decode_fault_source_core_type_t_name(
+			FAULT_SOURCE_ID_CORE_TYPE_GET(source_id), kbdev->gpu_props.gpu_id.arch_id);
+}
+const char *fault_source_id_internal_requester_get_str(struct kbase_device *kbdev,
+						       unsigned int source_id,
+						       unsigned int access_type)
+{
+	unsigned int ir = fault_source_id_internal_requester_get(kbdev, source_id);
+	bool older_source_id_fmt =
+		(kbdev->gpu_props.gpu_id.product_model < GPU_ID_MODEL_MAKE(14, 0));
+	unsigned int utlb_id = 0;
+
+	if (older_source_id_fmt)
+		utlb_id = FAULT_SOURCE_ID_UTLB_ID_GET(source_id);
+
+	if (!strcmp(source_id_enc_core_type_get_str(kbdev, source_id), "shader")) {
+		if (utlb_id == 0) {
+			if (access_type == AS_FAULTSTATUS_ACCESS_TYPE_READ)
+				return decode_fault_source_shader_r_t(
+					ir, kbdev->gpu_props.gpu_id.arch_id);
+			else
+				return decode_fault_source_shader_w_t(
+					ir, kbdev->gpu_props.gpu_id.arch_id);
+		} else
+			return "Load/store cache";
+	} else if (!strcmp(source_id_enc_core_type_get_str(kbdev, source_id), "tiler")) {
+#if MALI_USE_CSF
+		if (utlb_id == 0) {
+			if (access_type == AS_FAULTSTATUS_ACCESS_TYPE_READ)
+				return decode_fault_source_tiler_r_t(
+					ir, kbdev->gpu_props.gpu_id.arch_id);
+			else
+				return decode_fault_source_tiler_w_t(
+					ir, kbdev->gpu_props.gpu_id.arch_id);
+		} else
+			return "The polygon list writer. No further details.";
+#else
+		return (utlb_id == 0) ? "Anything other than the polygon list writer" :
+					      "The polygon list writer";
+#endif
+	}
+#if MALI_USE_CSF
+	else if (!strcmp(source_id_enc_core_type_get_str(kbdev, source_id), "csf")) {
+		if (access_type == AS_FAULTSTATUS_ACCESS_TYPE_READ)
+			return decode_fault_source_csf_r_t(ir, kbdev->gpu_props.gpu_id.arch_id);
+		else
+			return decode_fault_source_csf_w_t(ir, kbdev->gpu_props.gpu_id.arch_id);
+	}
+#else
+	else if (!strcmp(source_id_enc_core_type_get_str(kbdev, source_id), "jm"))
+		return decode_fault_source_jm_t(ir, kbdev->gpu_props.gpu_id.arch_id);
+#endif
+	else if (!strcmp(source_id_enc_core_type_get_str(kbdev, source_id), "I2c") ||
+		 !strcmp(source_id_enc_core_type_get_str(kbdev, source_id), "memsys") ||
+		 !strcmp(source_id_enc_core_type_get_str(kbdev, source_id), "mmu")) {
+		return "Not used";
+	}
+
+	return "unknown";
+}
+
+const char *fault_source_id_core_type_description_get(struct kbase_device *kbdev,
+						      unsigned int source_id)
+{
+	if (kbdev->gpu_props.gpu_id.product_model < GPU_ID_MODEL_MAKE(14, 0)) {
+		return decode_fault_source_core_id_t_desc(FAULT_SOURCE_ID_CORE_ID_GET(source_id),
+							  kbdev->gpu_props.gpu_id.arch_id);
+	} else {
+		return decode_fault_source_core_type_t_desc(
+			FAULT_SOURCE_ID_CORE_TYPE_GET(source_id), kbdev->gpu_props.gpu_id.arch_id);
+	}
+}
diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder.h b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder.h
new file mode 100644
index 000000000000..da5610ec94b0
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder.h
@@ -0,0 +1,143 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2024 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+#ifndef _MALI_KBASE_MMU_FAULTS_DECODER_H_
+#define _MALI_KBASE_MMU_FAULTS_DECODER_H_
+
+#include <linux/types.h>
+#include <mali_kbase.h>
+
+/* FAULTSTATUS.SOURCE_ID encoding */
+#define SOURCE_ID_CORE_ID_SHIFT (9)
+#define SOURCE_ID_CORE_ID_MASK (0x7F << SOURCE_ID_CORE_ID_SHIFT)
+#define SOURCE_ID_UTLB_ID_SHIFT (8)
+#define SOURCE_ID_UTLB_ID_MASK (0x01 << SOURCE_ID_UTLB_ID_SHIFT)
+#define SOURCE_ID_CORE_TYPE_SHIFT (12)
+#define SOURCE_ID_CORE_TYPE_MASK (0x0F << SOURCE_ID_CORE_TYPE_SHIFT)
+#define SOURCE_ID_CORE_INDEX_SHIFT (6)
+#define SOURCE_ID_CORE_INDEX_MASK (0x3F << SOURCE_ID_CORE_INDEX_SHIFT)
+
+/**
+ * FAULT_SOURCE_ID_CORE_ID_GET() - Get core ID of a fault.
+ *
+ * @source_id: SOURCE_ID field of FAULTSTATUS (MMU) or GPU_FAULTSTATUS (GPU)
+ *			   registers.
+ *
+ * Get core ID part of SOURCE_ID field of FAULTSTATUS (MMU) or
+ * GPU_FAULTSTATUS (GPU) registers.
+ *
+ * Return: core ID of the fault.
+ */
+#define FAULT_SOURCE_ID_CORE_ID_GET(source_id) \
+	((source_id & SOURCE_ID_CORE_ID_MASK) >> SOURCE_ID_CORE_ID_SHIFT)
+
+/**
+ * FAULT_SOURCE_ID_UTLB_ID_GET() - Get UTLB ID of a fault.
+ *
+ * @source_id: SOURCE_ID field of FAULTSTATUS (MMU) or GPU_FAULTSTATUS (GPU)
+ *			   registers.
+ *
+ * Get UTLB(micro-TLB) ID part of SOURCE_ID field of FAULTSTATUS (MMU) or
+ * GPU_FAULTSTATUS (GPU) registers.
+ *
+ * Return: UTLB ID of the fault.
+ */
+#define FAULT_SOURCE_ID_UTLB_ID_GET(source_id) \
+	((source_id & SOURCE_ID_UTLB_ID_MASK) >> SOURCE_ID_UTLB_ID_SHIFT)
+
+/**
+ * FAULT_SOURCE_ID_CORE_TYPE_GET() - Get core type of a fault.
+ *
+ * @source_id: SOURCE_ID field of FAULTSTATUS (MMU) or GPU_FAULTSTATUS (GPU)
+ *			   registers.
+ *
+ * Get core type part of SOURCE_ID field of FAULTSTATUS (MMU) or
+ * GPU_FAULTSTATUS (GPU) registers.
+ *
+ * Return: core type code of the fault.
+ */
+#define FAULT_SOURCE_ID_CORE_TYPE_GET(source_id) \
+	((source_id & SOURCE_ID_CORE_TYPE_MASK) >> SOURCE_ID_CORE_TYPE_SHIFT)
+
+/**
+ * FAULT_SOURCE_ID_CORE_INDEX_GET() - Get core index of a fault.
+ *
+ * @source_id: SOURCE_ID field of FAULTSTATUS (MMU) or GPU_FAULTSTATUS (GPU)
+ *			   registers.
+ *
+ * Get core index part of SOURCE_ID field of FAULTSTATUS (MMU) or
+ * GPU_FAULTSTATUS (GPU) registers.
+ *
+ * Return: core index of the fault.
+ */
+#define FAULT_SOURCE_ID_CORE_INDEX_GET(source_id) \
+	((source_id & SOURCE_ID_CORE_INDEX_MASK) >> SOURCE_ID_CORE_INDEX_SHIFT)
+
+/**
+ * fault_source_id_internal_requester_get() - Get internal_requester of a fault.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer).
+ * @source_id: SOURCE_ID field of FAULTSTATUS (MMU) or GPU_FAULTSTATUS (GPU)
+ *			   registers.
+ *
+ * Get internal_requester part of SOURCE_ID field of FAULTSTATUS (MMU) or
+ * GPU_FAULTSTATUS (GPU) registers.
+ *
+ * Return: Internal requester code of the fault.
+ */
+unsigned int fault_source_id_internal_requester_get(struct kbase_device *kbdev,
+						    unsigned int source_id);
+
+/**
+ * fault_source_id_internal_requester_get_str() - Get internal_requester of a
+ * fault in a human readable format.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer).
+ * @source_id: SOURCE_ID field of FAULTSTATUS (MMU) or GPU_FAULTSTATUS (GPU)
+ *			   registers.
+ * @access_type: the direction of data transfer that caused the fault (atomic,
+ *				 execute, read, write)
+ *
+ * Get the human readable decoding of internal_requester part of SOURCE_ID field
+ * of FAULTSTATUS (MMU) or GPU_FAULTSTATUS (GPU) registers.
+ *
+ * Return: Internal requester of the fault in human readable format.
+ */
+const char *fault_source_id_internal_requester_get_str(struct kbase_device *kbdev,
+						       unsigned int source_id,
+						       unsigned int access_type);
+
+/**
+ * fault_source_id_core_type_description_get() - Get the core type of
+ * a fault in a human readable format.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer).
+ * @source_id: SOURCE_ID field of FAULTSTATUS (MMU) or GPU_FAULTSTATUS (GPU)
+ *			   registers.
+ *
+ * Get the human readable decoding of core type part of SOURCE_ID field
+ * of FAULTSTATUS (MMU) or GPU_FAULTSTATUS (GPU) registers.
+ *
+ * Return: core type of the fault in human readable format.
+ */
+const char *fault_source_id_core_type_description_get(struct kbase_device *kbdev,
+						      unsigned int source_id);
+
+#endif /* _MALI_KBASE_MMU_FAULTS_DECODER_H_ */
diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder_luts.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder_luts.c
new file mode 100644
index 000000000000..8e90cacb4efa
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder_luts.c
@@ -0,0 +1,660 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/**
+ * DOC: Base kernel MMU faults decoder.
+ */
+
+#include <mmu/mali_kbase_mmu_faults_decoder_luts.h>
+
+#define GPU_ID_ARCH_ID_MAJOR_GET(gpu_id) ((gpu_id >> 16) & 0xFF)
+#define GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id) (gpu_id & 0xFFFF)
+#define NELEMS(s) (sizeof(s) / sizeof((s)[0]))
+
+struct decode_lut_element {
+	u16 arch_minor_rev;
+	u16 key;
+	const char *text;
+};
+
+static const char *decode_lut_element_lookup(u16 arch_minor_rev, u16 key,
+					     struct decode_lut_element *decode_element_lut,
+					     unsigned int lut_len)
+{
+	struct decode_lut_element *p;
+
+	for (p = decode_element_lut; p < decode_element_lut + lut_len; p++) {
+		if (p->key == key &&
+		    (p->arch_minor_rev == 0xffff || p->arch_minor_rev == arch_minor_rev))
+			break;
+	}
+	if (p < decode_element_lut + lut_len)
+		return p->text;
+	else
+		return "unknown";
+}
+
+/* Auto-generated code: DO NOT MODIFY! */
+
+static struct decode_lut_element lut_fault_source_core_type_t_name_major_9[] = {
+	{ 0xFFFF, 0, "shader" }, { 0xFFFF, 1, "l2c" }, { 0xFFFF, 2, "tiler" },
+	{ 0xFFFF, 3, "mmu" },	 { 0xFFFF, 4, "jm" },  { 0xFFFF, 5, "pmb" },
+};
+
+static struct decode_lut_element lut_fault_source_core_type_t_desc_major_9[] = {
+	{ 0xFFFF, 0, "Shader core" }, { 0xFFFF, 1, "Level 2 cache" },
+	{ 0xFFFF, 2, "Tiler" },	      { 0xFFFF, 3, "MMU" },
+	{ 0xFFFF, 4, "Job Manager" }, { 0xFFFF, 5, "Performance Monitor Block" },
+};
+
+static struct decode_lut_element lut_fault_source_core_type_t_name_major_10[] = {
+	{ 0xFFFF, 0, "shader" }, { 0xFFFF, 1, "l2c" }, { 0xFFFF, 2, "tiler" },
+	{ 0xFFFF, 3, "mmu" },	 { 0xFFFF, 4, "csf" }, { 0xFFFF, 5, "memsys" },
+};
+
+static struct decode_lut_element lut_fault_source_core_type_t_desc_major_10[] = {
+	{ 0xFFFF, 0, "Shader core" }, { 0xFFFF, 1, "Level 2 cache" },
+	{ 0xFFFF, 2, "Tiler" },	      { 0xFFFF, 3, "MMU" },
+	{ 0xFFFF, 4, "CSF" },	      { 0xFFFF, 5, "Memory system" },
+};
+
+static struct decode_lut_element lut_fault_source_core_type_t_name_major_11[] = {
+	{ 0xFFFF, 0, "shader" }, { 0xFFFF, 1, "l2c" }, { 0xFFFF, 2, "tiler" },
+	{ 0xFFFF, 3, "mmu" },	 { 0xFFFF, 4, "csf" }, { 0xFFFF, 5, "memsys" },
+};
+
+static struct decode_lut_element lut_fault_source_core_type_t_desc_major_11[] = {
+	{ 0xFFFF, 0, "Shader core" }, { 0xFFFF, 1, "Level 2 cache" },
+	{ 0xFFFF, 2, "Tiler" },	      { 0xFFFF, 3, "MMU" },
+	{ 0xFFFF, 4, "CSF" },	      { 0xFFFF, 5, "Memory system" },
+};
+
+static struct decode_lut_element lut_fault_source_core_type_t_name_major_12[] = {
+	{ 0xFFFF, 0, "shader" }, { 0xFFFF, 1, "l2c" }, { 0xFFFF, 2, "tiler" },
+	{ 0xFFFF, 3, "mmu" },	 { 0xFFFF, 4, "csf" }, { 0xFFFF, 5, "memsys" },
+};
+
+static struct decode_lut_element lut_fault_source_core_type_t_desc_major_12[] = {
+	{ 0xFFFF, 0, "Shader core" }, { 0xFFFF, 1, "Level 2 cache" },
+	{ 0xFFFF, 2, "Tiler" },	      { 0xFFFF, 3, "MMU" },
+	{ 0xFFFF, 4, "CSF" },	      { 0xFFFF, 5, "Memory system" },
+};
+
+static struct decode_lut_element lut_fault_source_core_id_t_desc_major_9[] = {
+	{ 0xFFFF, 0, "Shader core 0" },
+	{ 0xFFFF, 1, "Shader core 1" },
+	{ 0xFFFF, 2, "Shader core 2" },
+	{ 0xFFFF, 3, "Shader core 3" },
+	{ 0xFFFF, 4, "Shader core 4" },
+	{ 0xFFFF, 5, "Shader core 5" },
+	{ 0xFFFF, 6, "Shader core 6" },
+	{ 0xFFFF, 7, "Shader core 7" },
+	{ 0xFFFF, 8, "Shader core 8" },
+	{ 0xFFFF, 9, "Shader core 9" },
+	{ 0xFFFF, 10, "Shader core 10" },
+	{ 0xFFFF, 11, "Shader core 11" },
+	{ 0xFFFF, 12, "Shader core 12" },
+	{ 0xFFFF, 13, "Shader core 13" },
+	{ 0xFFFF, 14, "Shader core 14" },
+	{ 0xFFFF, 15, "Shader core 15" },
+	{ 0xFFFF, 16, "Shader core 16" },
+	{ 0xFFFF, 17, "Shader core 17" },
+	{ 0xFFFF, 18, "Shader core 18" },
+	{ 0xFFFF, 19, "Shader core 19" },
+	{ 0xFFFF, 20, "Shader core 20" },
+	{ 0xFFFF, 21, "Shader core 21" },
+	{ 0xFFFF, 22, "Shader core 22" },
+	{ 0xFFFF, 23, "Shader core 23" },
+	{ 0xFFFF, 24, "Shader core 24" },
+	{ 0xFFFF, 25, "Shader core 25" },
+	{ 0xFFFF, 26, "Shader core 26" },
+	{ 0xFFFF, 27, "Shader core 27" },
+	{ 0xFFFF, 28, "Shader core 28" },
+	{ 0xFFFF, 29, "Shader core 29" },
+	{ 0xFFFF, 30, "Shader core 30" },
+	{ 0xFFFF, 31, "Shader core 31" },
+	{ 0xFFFF, 41, "L2 Slice 3" },
+	{ 0xFFFF, 43, "L2 Slice 2" },
+	{ 0xFFFF, 45, "L2 Slice 1" },
+	{ 0xFFFF, 46, "PMB" },
+	{ 0xFFFF, 47, "L2 Slice 0" },
+	{ 0xFFFF, 51, "Tiler" },
+	{ 0xFFFF, 55, "MMU" },
+	{ 0xFFFF, 62, "Job Manager" },
+};
+
+static struct decode_lut_element lut_fault_source_core_id_t_core_type_major_9[] = {
+	{ 0xFFFF, 0, "shader" },  { 0xFFFF, 1, "shader" },  { 0xFFFF, 2, "shader" },
+	{ 0xFFFF, 3, "shader" },  { 0xFFFF, 4, "shader" },  { 0xFFFF, 5, "shader" },
+	{ 0xFFFF, 6, "shader" },  { 0xFFFF, 7, "shader" },  { 0xFFFF, 8, "shader" },
+	{ 0xFFFF, 9, "shader" },  { 0xFFFF, 10, "shader" }, { 0xFFFF, 11, "shader" },
+	{ 0xFFFF, 12, "shader" }, { 0xFFFF, 13, "shader" }, { 0xFFFF, 14, "shader" },
+	{ 0xFFFF, 15, "shader" }, { 0xFFFF, 16, "shader" }, { 0xFFFF, 17, "shader" },
+	{ 0xFFFF, 18, "shader" }, { 0xFFFF, 19, "shader" }, { 0xFFFF, 20, "shader" },
+	{ 0xFFFF, 21, "shader" }, { 0xFFFF, 22, "shader" }, { 0xFFFF, 23, "shader" },
+	{ 0xFFFF, 24, "shader" }, { 0xFFFF, 25, "shader" }, { 0xFFFF, 26, "shader" },
+	{ 0xFFFF, 27, "shader" }, { 0xFFFF, 28, "shader" }, { 0xFFFF, 29, "shader" },
+	{ 0xFFFF, 30, "shader" }, { 0xFFFF, 31, "shader" }, { 0xFFFF, 41, "l2c" },
+	{ 0xFFFF, 43, "l2c" },	  { 0xFFFF, 45, "l2c" },    { 0xFFFF, 46, "pmb" },
+	{ 0xFFFF, 47, "l2c" },	  { 0xFFFF, 51, "tiler" },  { 0xFFFF, 55, "mmu" },
+	{ 0xFFFF, 62, "jm" },
+};
+
+static struct decode_lut_element lut_fault_source_core_id_t_desc_major_10[] = {
+	{ 0xFFFF, 0, "Shader core 0" },
+	{ 0xFFFF, 1, "Shader core 1" },
+	{ 0xFFFF, 2, "Shader core 2" },
+	{ 0xFFFF, 3, "Shader core 3" },
+	{ 0xFFFF, 4, "Shader core 4" },
+	{ 0xFFFF, 5, "Shader core 5" },
+	{ 0xFFFF, 6, "Shader core 6" },
+	{ 0xFFFF, 7, "Shader core 7" },
+	{ 0xFFFF, 8, "Shader core 8" },
+	{ 0xFFFF, 9, "Shader core 9" },
+	{ 0xFFFF, 10, "Shader core 10" },
+	{ 0xFFFF, 11, "Shader core 11" },
+	{ 0xFFFF, 12, "Shader core 12" },
+	{ 0xFFFF, 13, "Shader core 13" },
+	{ 0xFFFF, 14, "Shader core 14" },
+	{ 0xFFFF, 15, "Shader core 15" },
+	{ 0xFFFF, 16, "Shader core 16" },
+	{ 0xFFFF, 17, "Shader core 17" },
+	{ 0xFFFF, 18, "Shader core 18" },
+	{ 0xFFFF, 19, "Shader core 19" },
+	{ 0xFFFF, 20, "Shader core 20" },
+	{ 0xFFFF, 21, "Shader core 21" },
+	{ 0xFFFF, 22, "Shader core 22" },
+	{ 0xFFFF, 23, "Shader core 23" },
+	{ 0xFFFF, 24, "Shader core 24" },
+	{ 0xFFFF, 25, "Shader core 25" },
+	{ 0xFFFF, 26, "Shader core 26" },
+	{ 0xFFFF, 27, "Shader core 27" },
+	{ 0xFFFF, 28, "Shader core 28" },
+	{ 0xFFFF, 29, "Shader core 29" },
+	{ 0xFFFF, 30, "Shader core 30" },
+	{ 0xFFFF, 31, "Shader core 31" },
+	{ 0xFFFF, 41, "L2 Slice 3" },
+	{ 0xFFFF, 43, "L2 Slice 2" },
+	{ 0xFFFF, 45, "L2 Slice 1" },
+	{ 0xFFFF, 47, "L2 Slice 0" },
+	{ 0xFFFF, 51, "Tiler" },
+	{ 0xFFFF, 55, "MMU" },
+	{ 0xFFFF, 33, "L2 Slice 7" },
+	{ 0xFFFF, 35, "L2 Slice 6" },
+	{ 0xFFFF, 37, "L2 Slice 5" },
+	{ 0xFFFF, 39, "L2 Slice 4" },
+	{ 0xFFFF, 48, "Memory system, undefined" },
+	{ 0xFFFF, 62, "Command Stream Frontend" },
+};
+
+static struct decode_lut_element lut_fault_source_core_id_t_core_type_major_10[] = {
+	{ 0xFFFF, 0, "shader" },  { 0xFFFF, 1, "shader" },  { 0xFFFF, 2, "shader" },
+	{ 0xFFFF, 3, "shader" },  { 0xFFFF, 4, "shader" },  { 0xFFFF, 5, "shader" },
+	{ 0xFFFF, 6, "shader" },  { 0xFFFF, 7, "shader" },  { 0xFFFF, 8, "shader" },
+	{ 0xFFFF, 9, "shader" },  { 0xFFFF, 10, "shader" }, { 0xFFFF, 11, "shader" },
+	{ 0xFFFF, 12, "shader" }, { 0xFFFF, 13, "shader" }, { 0xFFFF, 14, "shader" },
+	{ 0xFFFF, 15, "shader" }, { 0xFFFF, 16, "shader" }, { 0xFFFF, 17, "shader" },
+	{ 0xFFFF, 18, "shader" }, { 0xFFFF, 19, "shader" }, { 0xFFFF, 20, "shader" },
+	{ 0xFFFF, 21, "shader" }, { 0xFFFF, 22, "shader" }, { 0xFFFF, 23, "shader" },
+	{ 0xFFFF, 24, "shader" }, { 0xFFFF, 25, "shader" }, { 0xFFFF, 26, "shader" },
+	{ 0xFFFF, 27, "shader" }, { 0xFFFF, 28, "shader" }, { 0xFFFF, 29, "shader" },
+	{ 0xFFFF, 30, "shader" }, { 0xFFFF, 31, "shader" }, { 0xFFFF, 41, "l2c" },
+	{ 0xFFFF, 43, "l2c" },	  { 0xFFFF, 45, "l2c" },    { 0xFFFF, 47, "l2c" },
+	{ 0xFFFF, 51, "tiler" },  { 0xFFFF, 55, "mmu" },    { 0xFFFF, 33, "l2c" },
+	{ 0xFFFF, 35, "l2c" },	  { 0xFFFF, 37, "l2c" },    { 0xFFFF, 39, "l2c" },
+	{ 0xFFFF, 48, "memsys" }, { 0xFFFF, 62, "csf" },
+};
+
+static struct decode_lut_element lut_fault_source_core_id_t_desc_major_11[] = {
+	{ 0xFFFF, 0, "Shader core 0" },
+	{ 0xFFFF, 1, "Shader core 1" },
+	{ 0xFFFF, 2, "Shader core 2" },
+	{ 0xFFFF, 3, "Shader core 3" },
+	{ 0xFFFF, 4, "Shader core 4" },
+	{ 0xFFFF, 5, "Shader core 5" },
+	{ 0xFFFF, 6, "Shader core 6" },
+	{ 0xFFFF, 7, "Shader core 7" },
+	{ 0xFFFF, 8, "Shader core 8" },
+	{ 0xFFFF, 9, "Shader core 9" },
+	{ 0xFFFF, 10, "Shader core 10" },
+	{ 0xFFFF, 11, "Shader core 11" },
+	{ 0xFFFF, 12, "Shader core 12" },
+	{ 0xFFFF, 13, "Shader core 13" },
+	{ 0xFFFF, 14, "Shader core 14" },
+	{ 0xFFFF, 15, "Shader core 15" },
+	{ 0xFFFF, 16, "Shader core 16" },
+	{ 0xFFFF, 17, "Shader core 17" },
+	{ 0xFFFF, 18, "Shader core 18" },
+	{ 0xFFFF, 19, "Shader core 19" },
+	{ 0xFFFF, 20, "Shader core 20" },
+	{ 0xFFFF, 21, "Shader core 21" },
+	{ 0xFFFF, 22, "Shader core 22" },
+	{ 0xFFFF, 23, "Shader core 23" },
+	{ 0xFFFF, 24, "Shader core 24" },
+	{ 0xFFFF, 25, "Shader core 25" },
+	{ 0xFFFF, 26, "Shader core 26" },
+	{ 0xFFFF, 27, "Shader core 27" },
+	{ 0xFFFF, 28, "Shader core 28" },
+	{ 0xFFFF, 29, "Shader core 29" },
+	{ 0xFFFF, 30, "Shader core 30" },
+	{ 0xFFFF, 31, "Shader core 31" },
+	{ 0xFFFF, 41, "L2 Slice 3" },
+	{ 0xFFFF, 43, "L2 Slice 2" },
+	{ 0xFFFF, 45, "L2 Slice 1" },
+	{ 0xFFFF, 47, "L2 Slice 0" },
+	{ 0xFFFF, 51, "Tiler" },
+	{ 0xFFFF, 55, "MMU" },
+	{ 0xFFFF, 33, "L2 Slice 7" },
+	{ 0xFFFF, 35, "L2 Slice 6" },
+	{ 0xFFFF, 37, "L2 Slice 5" },
+	{ 0xFFFF, 39, "L2 Slice 4" },
+	{ 0xFFFF, 48, "Memory system, undefined" },
+	{ 0xFFFF, 62, "Command Stream Frontend" },
+};
+
+static struct decode_lut_element lut_fault_source_core_id_t_core_type_major_11[] = {
+	{ 0xFFFF, 0, "shader" },  { 0xFFFF, 1, "shader" },  { 0xFFFF, 2, "shader" },
+	{ 0xFFFF, 3, "shader" },  { 0xFFFF, 4, "shader" },  { 0xFFFF, 5, "shader" },
+	{ 0xFFFF, 6, "shader" },  { 0xFFFF, 7, "shader" },  { 0xFFFF, 8, "shader" },
+	{ 0xFFFF, 9, "shader" },  { 0xFFFF, 10, "shader" }, { 0xFFFF, 11, "shader" },
+	{ 0xFFFF, 12, "shader" }, { 0xFFFF, 13, "shader" }, { 0xFFFF, 14, "shader" },
+	{ 0xFFFF, 15, "shader" }, { 0xFFFF, 16, "shader" }, { 0xFFFF, 17, "shader" },
+	{ 0xFFFF, 18, "shader" }, { 0xFFFF, 19, "shader" }, { 0xFFFF, 20, "shader" },
+	{ 0xFFFF, 21, "shader" }, { 0xFFFF, 22, "shader" }, { 0xFFFF, 23, "shader" },
+	{ 0xFFFF, 24, "shader" }, { 0xFFFF, 25, "shader" }, { 0xFFFF, 26, "shader" },
+	{ 0xFFFF, 27, "shader" }, { 0xFFFF, 28, "shader" }, { 0xFFFF, 29, "shader" },
+	{ 0xFFFF, 30, "shader" }, { 0xFFFF, 31, "shader" }, { 0xFFFF, 41, "l2c" },
+	{ 0xFFFF, 43, "l2c" },	  { 0xFFFF, 45, "l2c" },    { 0xFFFF, 47, "l2c" },
+	{ 0xFFFF, 51, "tiler" },  { 0xFFFF, 55, "mmu" },    { 0xFFFF, 33, "l2c" },
+	{ 0xFFFF, 35, "l2c" },	  { 0xFFFF, 37, "l2c" },    { 0xFFFF, 39, "l2c" },
+	{ 0xFFFF, 48, "memsys" }, { 0xFFFF, 62, "csf" },
+};
+
+static struct decode_lut_element lut_fault_source_core_id_t_desc_major_12[] = {
+	{ 0xFFFF, 0, "Shader core 0" },
+	{ 0xFFFF, 1, "Shader core 1" },
+	{ 0xFFFF, 2, "Shader core 2" },
+	{ 0xFFFF, 3, "Shader core 3" },
+	{ 0xFFFF, 4, "Shader core 4" },
+	{ 0xFFFF, 5, "Shader core 5" },
+	{ 0xFFFF, 6, "Shader core 6" },
+	{ 0xFFFF, 7, "Shader core 7" },
+	{ 0xFFFF, 8, "Shader core 8" },
+	{ 0xFFFF, 9, "Shader core 9" },
+	{ 0xFFFF, 10, "Shader core 10" },
+	{ 0xFFFF, 11, "Shader core 11" },
+	{ 0xFFFF, 12, "Shader core 12" },
+	{ 0xFFFF, 13, "Shader core 13" },
+	{ 0xFFFF, 14, "Shader core 14" },
+	{ 0xFFFF, 15, "Shader core 15" },
+	{ 0xFFFF, 16, "Shader core 16" },
+	{ 0xFFFF, 17, "Shader core 17" },
+	{ 0xFFFF, 18, "Shader core 18" },
+	{ 0xFFFF, 19, "Shader core 19" },
+	{ 0xFFFF, 20, "Shader core 20" },
+	{ 0xFFFF, 21, "Shader core 21" },
+	{ 0xFFFF, 22, "Shader core 22" },
+	{ 0xFFFF, 23, "Shader core 23" },
+	{ 0xFFFF, 24, "Shader core 24" },
+	{ 0xFFFF, 25, "Shader core 25" },
+	{ 0xFFFF, 26, "Shader core 26" },
+	{ 0xFFFF, 27, "Shader core 27" },
+	{ 0xFFFF, 28, "Shader core 28" },
+	{ 0xFFFF, 29, "Shader core 29" },
+	{ 0xFFFF, 30, "Shader core 30" },
+	{ 0xFFFF, 31, "Shader core 31" },
+	{ 0xFFFF, 41, "L2 Slice 3" },
+	{ 0xFFFF, 43, "L2 Slice 2" },
+	{ 0xFFFF, 45, "L2 Slice 1" },
+	{ 0xFFFF, 47, "L2 Slice 0" },
+	{ 0xFFFF, 51, "Tiler" },
+	{ 0xFFFF, 55, "MMU" },
+	{ 0xFFFF, 33, "L2 Slice 7" },
+	{ 0xFFFF, 35, "L2 Slice 6" },
+	{ 0xFFFF, 37, "L2 Slice 5" },
+	{ 0xFFFF, 39, "L2 Slice 4" },
+	{ 0xFFFF, 48, "Memory system, undefined" },
+	{ 0xFFFF, 62, "Command Stream Frontend" },
+};
+
+static struct decode_lut_element lut_fault_source_core_id_t_core_type_major_12[] = {
+	{ 0xFFFF, 0, "shader" },  { 0xFFFF, 1, "shader" },  { 0xFFFF, 2, "shader" },
+	{ 0xFFFF, 3, "shader" },  { 0xFFFF, 4, "shader" },  { 0xFFFF, 5, "shader" },
+	{ 0xFFFF, 6, "shader" },  { 0xFFFF, 7, "shader" },  { 0xFFFF, 8, "shader" },
+	{ 0xFFFF, 9, "shader" },  { 0xFFFF, 10, "shader" }, { 0xFFFF, 11, "shader" },
+	{ 0xFFFF, 12, "shader" }, { 0xFFFF, 13, "shader" }, { 0xFFFF, 14, "shader" },
+	{ 0xFFFF, 15, "shader" }, { 0xFFFF, 16, "shader" }, { 0xFFFF, 17, "shader" },
+	{ 0xFFFF, 18, "shader" }, { 0xFFFF, 19, "shader" }, { 0xFFFF, 20, "shader" },
+	{ 0xFFFF, 21, "shader" }, { 0xFFFF, 22, "shader" }, { 0xFFFF, 23, "shader" },
+	{ 0xFFFF, 24, "shader" }, { 0xFFFF, 25, "shader" }, { 0xFFFF, 26, "shader" },
+	{ 0xFFFF, 27, "shader" }, { 0xFFFF, 28, "shader" }, { 0xFFFF, 29, "shader" },
+	{ 0xFFFF, 30, "shader" }, { 0xFFFF, 31, "shader" }, { 0xFFFF, 41, "l2c" },
+	{ 0xFFFF, 43, "l2c" },	  { 0xFFFF, 45, "l2c" },    { 0xFFFF, 47, "l2c" },
+	{ 0xFFFF, 51, "tiler" },  { 0xFFFF, 55, "mmu" },    { 0xFFFF, 33, "l2c" },
+	{ 0xFFFF, 35, "l2c" },	  { 0xFFFF, 37, "l2c" },    { 0xFFFF, 39, "l2c" },
+	{ 0xFFFF, 48, "memsys" }, { 0xFFFF, 62, "csf" },
+};
+
+static struct decode_lut_element lut_fault_source_shader_r_t_major_9[] = {
+	{ 0xFFFF, 0, "ic" },	{ 0xFFFF, 1, "adc" },	{ 0xFFFF, 4, "scm" },
+	{ 0xFFFF, 5, "vl" },	{ 0xFFFF, 6, "plr" },	{ 0xFFFF, 7, "fsdc" },
+	{ 0xFFFF, 8, "lsc" },	{ 0xFFFF, 9, "cse" },	{ 0xFFFF, 10, "tb" },
+	{ 0xFFFF, 11, "tmdi" }, { 0xFFFF, 12, "tmu0" }, { 0xFFFF, 13, "tmu1" },
+	{ 0xFFFF, 14, "tma0" }, { 0xFFFF, 15, "tma1" },
+};
+
+static struct decode_lut_element lut_fault_source_shader_r_t_major_10[] = {
+	{ 0xFFFF, 4, "scm" },	{ 0xFFFF, 5, "vl" },	{ 0xFFFF, 6, "plr" },
+	{ 0xFFFF, 7, "fsdc" },	{ 0xFFFF, 8, "lsc" },	{ 0xFFFF, 9, "cse" },
+	{ 0xFFFF, 10, "tb" },	{ 0xFFFF, 11, "tmdi" }, { 0xFFFF, 12, "tmu0" },
+	{ 0xFFFF, 13, "tmu1" }, { 0xFFFF, 14, "tma0" }, { 0xFFFF, 15, "tma1" },
+	{ 0xFFFF, 0, "ic0" },	{ 0xFFFF, 1, "ic1" },	{ 0xFFFF, 2, "adc" },
+};
+
+static struct decode_lut_element lut_fault_source_shader_r_t_major_11[] = {
+	{ 0xFFFF, 4, "scm" },	{ 0xFFFF, 5, "vl" },	{ 0xFFFF, 6, "plr" },
+	{ 0xFFFF, 7, "fsdc" },	{ 0xFFFF, 8, "lsc" },	{ 0xFFFF, 9, "cse" },
+	{ 0xFFFF, 10, "tb" },	{ 0xFFFF, 11, "tmdi" }, { 0xFFFF, 12, "tmu0" },
+	{ 0xFFFF, 13, "tmu1" }, { 0xFFFF, 14, "tma0" }, { 0xFFFF, 15, "tma1" },
+	{ 0xFFFF, 0, "ic0" },	{ 0xFFFF, 1, "ic1" },	{ 0xFFFF, 2, "adc" },
+};
+
+static struct decode_lut_element lut_fault_source_shader_r_t_major_12[] = {
+	{ 0xFFFF, 4, "scm" },	{ 0xFFFF, 6, "plr" },	{ 0xFFFF, 7, "fsdc" },
+	{ 0xFFFF, 8, "lsc" },	{ 0xFFFF, 9, "cse" },	{ 0xFFFF, 10, "tb" },
+	{ 0xFFFF, 11, "tmdi" }, { 0xFFFF, 12, "tmu0" }, { 0xFFFF, 13, "tmu1" },
+	{ 0xFFFF, 14, "tma0" }, { 0xFFFF, 15, "tma1" }, { 0xFFFF, 0, "ic0" },
+	{ 0xFFFF, 1, "ic1" },	{ 0xFFFF, 2, "adc" },	{ 0xFFFF, 3, "rtas" },
+};
+
+static struct decode_lut_element lut_fault_source_shader_w_t_major_9[] = {
+	{ 0xFFFF, 0, "pcb" },
+	{ 0xFFFF, 8, "lsc" },
+	{ 0xFFFF, 10, "tb" },
+};
+
+static struct decode_lut_element lut_fault_source_shader_w_t_major_10[] = {
+	{ 0xFFFF, 0, "pcb" },  { 0xFFFF, 8, "lsc" },  { 0xFFFF, 12, "tb0" },
+	{ 0xFFFF, 13, "tb1" }, { 0xFFFF, 14, "tb2" }, { 0xFFFF, 15, "tb3" },
+};
+
+static struct decode_lut_element lut_fault_source_shader_w_t_major_11[] = {
+	{ 0xFFFF, 0, "pcb" },  { 0xFFFF, 8, "lsc" },  { 0xFFFF, 12, "tb0" },
+	{ 0xFFFF, 13, "tb1" }, { 0xFFFF, 14, "tb2" }, { 0xFFFF, 15, "tb3" },
+};
+
+static struct decode_lut_element lut_fault_source_shader_w_t_major_12[] = {
+	{ 0xFFFF, 0, "pcb" },  { 0xFFFF, 8, "lsc" },  { 0xFFFF, 12, "tb0" },
+	{ 0xFFFF, 13, "tb1" }, { 0xFFFF, 14, "tb2" }, { 0xFFFF, 15, "tb3" },
+};
+
+static struct decode_lut_element lut_fault_source_tiler_r_t_major_10[] = {
+	{ 0xFFFF, 0, "pf" },
+	{ 0xFFFF, 1, "pcache" },
+	{ 0xFFFF, 2, "tcu" },
+	{ 0xFFFF, 3, "idx" },
+};
+
+static struct decode_lut_element lut_fault_source_tiler_r_t_major_11[] = {
+	{ 0xFFFF, 0, "pf" },
+	{ 0xFFFF, 1, "pcache" },
+	{ 0xFFFF, 2, "tcu" },
+	{ 0xFFFF, 3, "idx" },
+};
+
+static struct decode_lut_element lut_fault_source_tiler_r_t_major_12[] = {
+	{ 0xFFFF, 0, "pf" },
+	{ 0xFFFF, 1, "pcache" },
+	{ 0xFFFF, 2, "tcu" },
+	{ 0xFFFF, 3, "idx" },
+};
+
+static struct decode_lut_element lut_fault_source_tiler_w_t_major_10[] = {
+	{ 0xFFFF, 1, "pcache_wb" },
+	{ 0xFFFF, 2, "tcu_pcb" },
+};
+
+static struct decode_lut_element lut_fault_source_tiler_w_t_major_11[] = {
+	{ 0xFFFF, 1, "pcache_wb" },
+	{ 0xFFFF, 2, "tcu_pcb" },
+};
+
+static struct decode_lut_element lut_fault_source_tiler_w_t_major_12[] = {
+	{ 0xFFFF, 1, "pcache_wb" },
+	{ 0xFFFF, 2, "tcu_pcb" },
+};
+
+
+const char *decode_fault_source_core_type_t_name(u16 idx, u32 gpu_id)
+{
+	u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id);
+	const char *ret = "unknown";
+
+	switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) {
+	case 9:
+		ret = decode_lut_element_lookup(min_rev, idx,
+						lut_fault_source_core_type_t_name_major_9,
+						NELEMS(lut_fault_source_core_type_t_name_major_9));
+		break;
+	case 10:
+		ret = decode_lut_element_lookup(min_rev, idx,
+						lut_fault_source_core_type_t_name_major_10,
+						NELEMS(lut_fault_source_core_type_t_name_major_10));
+		break;
+	case 11:
+		ret = decode_lut_element_lookup(min_rev, idx,
+						lut_fault_source_core_type_t_name_major_11,
+						NELEMS(lut_fault_source_core_type_t_name_major_11));
+		break;
+	case 12:
+		ret = decode_lut_element_lookup(min_rev, idx,
+						lut_fault_source_core_type_t_name_major_12,
+						NELEMS(lut_fault_source_core_type_t_name_major_12));
+		break;
+	}
+	return ret;
+}
+
+const char *decode_fault_source_core_type_t_desc(u16 idx, u32 gpu_id)
+{
+	u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id);
+	const char *ret = "unknown";
+
+	switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) {
+	case 9:
+		ret = decode_lut_element_lookup(min_rev, idx,
+						lut_fault_source_core_type_t_desc_major_9,
+						NELEMS(lut_fault_source_core_type_t_desc_major_9));
+		break;
+	case 10:
+		ret = decode_lut_element_lookup(min_rev, idx,
+						lut_fault_source_core_type_t_desc_major_10,
+						NELEMS(lut_fault_source_core_type_t_desc_major_10));
+		break;
+	case 11:
+		ret = decode_lut_element_lookup(min_rev, idx,
+						lut_fault_source_core_type_t_desc_major_11,
+						NELEMS(lut_fault_source_core_type_t_desc_major_11));
+		break;
+	case 12:
+		ret = decode_lut_element_lookup(min_rev, idx,
+						lut_fault_source_core_type_t_desc_major_12,
+						NELEMS(lut_fault_source_core_type_t_desc_major_12));
+		break;
+	}
+	return ret;
+}
+
+const char *decode_fault_source_core_id_t_desc(u16 idx, u32 gpu_id)
+{
+	u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id);
+	const char *ret = "unknown";
+
+	switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) {
+	case 9:
+		ret = decode_lut_element_lookup(min_rev, idx,
+						lut_fault_source_core_id_t_desc_major_9,
+						NELEMS(lut_fault_source_core_id_t_desc_major_9));
+		break;
+	case 10:
+		ret = decode_lut_element_lookup(min_rev, idx,
+						lut_fault_source_core_id_t_desc_major_10,
+						NELEMS(lut_fault_source_core_id_t_desc_major_10));
+		break;
+	case 11:
+		ret = decode_lut_element_lookup(min_rev, idx,
+						lut_fault_source_core_id_t_desc_major_11,
+						NELEMS(lut_fault_source_core_id_t_desc_major_11));
+		break;
+	case 12:
+		ret = decode_lut_element_lookup(min_rev, idx,
+						lut_fault_source_core_id_t_desc_major_12,
+						NELEMS(lut_fault_source_core_id_t_desc_major_12));
+		break;
+	}
+	return ret;
+}
+
+const char *decode_fault_source_core_id_t_core_type(u16 idx, u32 gpu_id)
+{
+	u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id);
+	const char *ret = "unknown";
+
+	switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) {
+	case 9:
+		ret = decode_lut_element_lookup(
+			min_rev, idx, lut_fault_source_core_id_t_core_type_major_9,
+			NELEMS(lut_fault_source_core_id_t_core_type_major_9));
+		break;
+	case 10:
+		ret = decode_lut_element_lookup(
+			min_rev, idx, lut_fault_source_core_id_t_core_type_major_10,
+			NELEMS(lut_fault_source_core_id_t_core_type_major_10));
+		break;
+	case 11:
+		ret = decode_lut_element_lookup(
+			min_rev, idx, lut_fault_source_core_id_t_core_type_major_11,
+			NELEMS(lut_fault_source_core_id_t_core_type_major_11));
+		break;
+	case 12:
+		ret = decode_lut_element_lookup(
+			min_rev, idx, lut_fault_source_core_id_t_core_type_major_12,
+			NELEMS(lut_fault_source_core_id_t_core_type_major_12));
+		break;
+	}
+	return ret;
+}
+
+const char *decode_fault_source_shader_r_t(u16 idx, u32 gpu_id)
+{
+	u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id);
+	const char *ret = "unknown";
+
+	switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) {
+	case 9:
+		ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_shader_r_t_major_9,
+						NELEMS(lut_fault_source_shader_r_t_major_9));
+		break;
+	case 10:
+		ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_shader_r_t_major_10,
+						NELEMS(lut_fault_source_shader_r_t_major_10));
+		break;
+	case 11:
+		ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_shader_r_t_major_11,
+						NELEMS(lut_fault_source_shader_r_t_major_11));
+		break;
+	case 12:
+		ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_shader_r_t_major_12,
+						NELEMS(lut_fault_source_shader_r_t_major_12));
+		break;
+	}
+	return ret;
+}
+
+const char *decode_fault_source_shader_w_t(u16 idx, u32 gpu_id)
+{
+	u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id);
+	const char *ret = "unknown";
+
+	switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) {
+	case 9:
+		ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_shader_w_t_major_9,
+						NELEMS(lut_fault_source_shader_w_t_major_9));
+		break;
+	case 10:
+		ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_shader_w_t_major_10,
+						NELEMS(lut_fault_source_shader_w_t_major_10));
+		break;
+	case 11:
+		ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_shader_w_t_major_11,
+						NELEMS(lut_fault_source_shader_w_t_major_11));
+		break;
+	case 12:
+		ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_shader_w_t_major_12,
+						NELEMS(lut_fault_source_shader_w_t_major_12));
+		break;
+	}
+	return ret;
+}
+
+const char *decode_fault_source_tiler_r_t(u16 idx, u32 gpu_id)
+{
+	u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id);
+	const char *ret = "unknown";
+
+	switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) {
+	case 10:
+		ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_tiler_r_t_major_10,
+						NELEMS(lut_fault_source_tiler_r_t_major_10));
+		break;
+	case 11:
+		ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_tiler_r_t_major_11,
+						NELEMS(lut_fault_source_tiler_r_t_major_11));
+		break;
+	case 12:
+		ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_tiler_r_t_major_12,
+						NELEMS(lut_fault_source_tiler_r_t_major_12));
+		break;
+	}
+	return ret;
+}
+
+const char *decode_fault_source_tiler_w_t(u16 idx, u32 gpu_id)
+{
+	u16 min_rev = GPU_ID_ARCH_ID_MINOR_AND_REV_GET(gpu_id);
+	const char *ret = "unknown";
+
+	switch (GPU_ID_ARCH_ID_MAJOR_GET(gpu_id)) {
+	case 10:
+		ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_tiler_w_t_major_10,
+						NELEMS(lut_fault_source_tiler_w_t_major_10));
+		break;
+	case 11:
+		ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_tiler_w_t_major_11,
+						NELEMS(lut_fault_source_tiler_w_t_major_11));
+		break;
+	case 12:
+		ret = decode_lut_element_lookup(min_rev, idx, lut_fault_source_tiler_w_t_major_12,
+						NELEMS(lut_fault_source_tiler_w_t_major_12));
+		break;
+	}
+	return ret;
+}
diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder_luts.h b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder_luts.h
new file mode 100644
index 000000000000..2b0ca5659a6c
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_faults_decoder_luts.h
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2024 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _MALI_KBASE_MMU_FAULTS_DECODER_LUTS_H_
+#define _MALI_KBASE_MMU_FAULTS_DECODER_LUTS_H_
+
+#include <linux/types.h>
+
+/**
+ * decode_fault_source_core_id_t_desc() - Get core description of a
+ * fault in a human readable format.
+ *
+ * @idx: Core ID part of SOURCE_ID field of the fault.
+ * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev.
+ *
+ * Return: core ID of the fault in human readable format.
+ */
+const char *decode_fault_source_core_id_t_desc(u16 idx, u32 gpu_id);
+
+/**
+ * decode_fault_source_core_id_t_core_type() - Get core type of a
+ * fault in a human readable format.
+ *
+ * @idx: Core ID part of SOURCE_ID field of the fault.
+ * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev.
+ *
+ * Return: core type of the fault in human readable format.
+ */
+const char *decode_fault_source_core_id_t_core_type(u16 idx, u32 gpu_id);
+
+/**
+ * decode_fault_source_core_type_t_name() - Get core type name of a
+ * fault.
+ *
+ * @idx: Core type part of SOURCE_ID field of the fault.
+ * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev.
+ *
+ * Return: core type short name of the fault.
+ */
+const char *decode_fault_source_core_type_t_name(u16 idx, u32 gpu_id);
+
+/**
+ * decode_fault_source_core_type_t_desc() - Get core type description of a
+ * fault.
+ *
+ * @idx: Core type part of SOURCE_ID field of the fault.
+ * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev.
+ *
+ * Return: core type description of the fault.
+ */
+const char *decode_fault_source_core_type_t_desc(u16 idx, u32 gpu_id);
+
+/**
+ * decode_fault_source_shader_r_t() - Get internal requester of a
+ * fault in a human readable format.
+ *
+ * @idx: Internal requester part of SOURCE_ID field of the fault.
+ * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev.
+ *
+ * Return: Internal requester of a fault in a human readable format for read
+ * operations on a shader core.
+ */
+const char *decode_fault_source_shader_r_t(u16 idx, u32 gpu_id);
+
+/**
+ * decode_fault_source_shader_w_t() - Get internal requester of a
+ * fault in a human readable format.
+ *
+ * @idx: Internal requester part of SOURCE_ID field of the fault.
+ * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev.
+ *
+ * Return: Internal requester of a fault in a human readable format for write
+ * operations on a shader core.
+ */
+const char *decode_fault_source_shader_w_t(u16 idx, u32 gpu_id);
+
+/**
+ * decode_fault_source_tiler_r_t() - Get internal requester of a
+ * fault in a human readable format.
+ *
+ * @idx: Internal requester part of SOURCE_ID field of the fault.
+ * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev.
+ *
+ * Return: Internal requester of a fault in a human readable format for read
+ * operations on a tiler core.
+ */
+const char *decode_fault_source_tiler_r_t(u16 idx, u32 gpu_id);
+
+/**
+ * decode_fault_source_tiler_w_t() - Get internal requester of a
+ * fault in a human readable format.
+ *
+ * @idx: Internal requester part of SOURCE_ID field of the fault.
+ * @gpu_id: GPU id composed of arch_major << 16 | arch_minor << 8 | arch_rev.
+ *
+ * Return: Internal requester of a fault in a human readable format for write
+ * operations on a tiler core.
+ */
+const char *decode_fault_source_tiler_w_t(u16 idx, u32 gpu_id);
+
+#endif /* _MALI_KBASE_MMU_FAULTS_DECODER_LUTS_H_ */
diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h
index c2b377de54a9..560baceafe8a 100644
--- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h
+++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -42,7 +42,7 @@ struct kbase_context;
  * enum kbase_mmu_fault_type - MMU fault type descriptor.
  * @KBASE_MMU_FAULT_TYPE_UNKNOWN:         unknown fault
  * @KBASE_MMU_FAULT_TYPE_PAGE:            page fault
- * @KBASE_MMU_FAULT_TYPE_BUS:             nus fault
+ * @KBASE_MMU_FAULT_TYPE_BUS:             bus fault
  * @KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED: page_unexpected fault
  * @KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED:  bus_unexpected fault
  */
diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c
index ba67ae0e01e9..46c04f2b1fc1 100644
--- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c
+++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -529,8 +529,8 @@ int kbase_mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as,
 		return ret;
 
 #if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
-	/* WA for the BASE_HW_ISSUE_GPU2019_3901. */
-	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2019_3901) &&
+	/* WA for the KBASE_HW_ISSUE_GPU2019_3901. */
+	if (kbase_hw_has_issue(kbdev, KBASE_HW_ISSUE_GPU2019_3901) &&
 	    mmu_cmd == AS_COMMAND_COMMAND_FLUSH_MEM) {
 		ret = apply_hw_issue_GPU2019_3901_wa(kbdev, &mmu_cmd, as->number);
 		if (ret) {
@@ -635,6 +635,15 @@ void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
 #endif
 	kbase_reg_write32(kbdev, MMU_CONTROL_ENUM(IRQ_CLEAR), pf_bf_mask);
 
+#if MALI_USE_CSF
+	/* For valid page faults, this function is called just before unblocking the MMU (which
+	 * would in turn unblock the MCU firmware) and so this is an opportune location to
+	 * update the page fault counter value in firmware visible memory.
+	 */
+	if (likely(type == KBASE_MMU_FAULT_TYPE_PAGE) && kbdev->csf.page_fault_cnt_ptr)
+		*kbdev->csf.page_fault_cnt_ptr = ++kbdev->csf.page_fault_cnt;
+#endif
+
 unlock:
 	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
 }
diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_internal.h b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_internal.h
index 4c2c1a64ca41..8b68791e4c77 100644
--- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_internal.h
+++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_internal.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -38,19 +38,6 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, struct kbas
 void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, struct kbase_as *as,
 				     const char *reason_str, struct kbase_fault *fault);
 
-/**
- * kbase_mmu_switch_to_ir() - Switch to incremental rendering if possible
- * @kctx:	kbase_context for the faulting address space.
- * @reg:	of a growable GPU memory region in the same context.
- *		Takes ownership of the reference if successful.
- *
- * Used to switch to incremental rendering if we have nearly run out of
- * virtual address space in a growable memory region.
- *
- * Return: 0 if successful, otherwise a negative error code.
- */
-int kbase_mmu_switch_to_ir(struct kbase_context *kctx, struct kbase_va_region *reg);
-
 /**
  * kbase_mmu_page_fault_worker() - Process a page fault.
  *
diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c
index d19579da2f5d..7aace473011f 100644
--- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c
+++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -32,7 +32,7 @@
  */
 #define ENTRY_IS_ATE_L3 3ULL
 #define ENTRY_IS_ATE_L02 1ULL
-#define ENTRY_IS_INVAL 2ULL
+#define ENTRY_IS_INVAL 0ULL
 #define ENTRY_IS_PTE 3ULL
 
 #define ENTRY_ACCESS_RW (1ULL << 6) /* bits 6:7 */
@@ -179,7 +179,7 @@ static void set_num_valid_entries(u64 *pgd, unsigned int num_of_valid_entries)
 
 static void entry_set_pte(u64 *entry, phys_addr_t phy)
 {
-	page_table_entry_set(entry, (phy & PAGE_MASK) | ENTRY_ACCESS_BIT | ENTRY_IS_PTE);
+	page_table_entry_set(entry, (phy & GPU_PAGE_MASK) | ENTRY_ACCESS_BIT | ENTRY_IS_PTE);
 }
 
 static void entries_invalidate(u64 *entry, u32 count)
diff --git a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_runtime_pm.c b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_runtime_pm.c
index 2a5030745586..d0342af60fb3 100644
--- a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_runtime_pm.c
+++ b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_runtime_pm.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
diff --git a/drivers/gpu/arm/bifrost/tests/Kbuild b/drivers/gpu/arm/bifrost/tests/Kbuild
index 72ca70ac8779..479b91532ed7 100644
--- a/drivers/gpu/arm/bifrost/tests/Kbuild
+++ b/drivers/gpu/arm/bifrost/tests/Kbuild
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 #
-# (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2017-2024 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
diff --git a/drivers/gpu/arm/bifrost/tests/Kconfig b/drivers/gpu/arm/bifrost/tests/Kconfig
index aa011bac8990..88a4194c5cd7 100644
--- a/drivers/gpu/arm/bifrost/tests/Kconfig
+++ b/drivers/gpu/arm/bifrost/tests/Kconfig
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 #
-# (C) COPYRIGHT 2017, 2020-2023 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2017, 2020-2024 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
diff --git a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_kprobe.h b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_kprobe.h
index f75cd776c60e..d8c3ca88166b 100644
--- a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_kprobe.h
+++ b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_kprobe.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,8 @@
 #ifndef _KUTF_KPROBE_H_
 #define _KUTF_KPROBE_H_
 
+struct dentry;
+
 int kutf_kprobe_init(struct dentry *base_dir);
 void kutf_kprobe_exit(void);
 
@@ -30,4 +32,6 @@ typedef void (*kutf_kp_handler)(int argc, char **argv);
 void kutf_kp_sample_handler(int argc, char **argv);
 void kutf_kp_sample_kernel_function(void);
 
+void kutf_kp_delay_handler(int argc, char **argv);
+
 #endif /* _KUTF_KPROBE_H_ */
diff --git a/drivers/gpu/arm/bifrost/tests/kutf/kutf_kprobe.c b/drivers/gpu/arm/bifrost/tests/kutf/kutf_kprobe.c
index f118692c43a1..232809e1ed58 100644
--- a/drivers/gpu/arm/bifrost/tests/kutf/kutf_kprobe.c
+++ b/drivers/gpu/arm/bifrost/tests/kutf/kutf_kprobe.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -29,6 +29,7 @@
 #include <linux/debugfs.h>
 #include <linux/kprobes.h>
 #include <linux/version.h>
+#include <linux/delay.h>
 #include <kutf/kutf_kprobe.h>
 
 #define KUTF_KP_REG_MIN_ARGS 3
@@ -86,6 +87,19 @@ const struct file_operations kutf_kp_unreg_debugfs_fops = {
 
 struct kprobe kutf_kallsym_kp = { .symbol_name = "kallsyms_lookup_name" };
 
+void kutf_kp_delay_handler(int argc, char **argv)
+{
+	long delay;
+
+	if ((!argv) || (!argv[0]))
+		return;
+
+	if (kstrtol(argv[0], 0, &delay))
+		return;
+
+	mdelay(delay);
+}
+
 void kutf_kp_sample_kernel_function(void)
 {
 	pr_debug("%s called\n", __func__);
@@ -150,11 +164,9 @@ static ssize_t kutf_kp_reg_debugfs_write(struct file *file, const char __user *u
 	if (count >= KUTF_KP_WRITE_BUFSIZE)
 		return -EINVAL;
 
-	kbuf = memdup_user(user_buf, count);
-	if (IS_ERR(kbuf)) {
+	kbuf = memdup_user_nul(user_buf, count);
+	if (IS_ERR(kbuf))
 		return -ENOMEM;
-	}
-	kbuf[count - 1] = '\0';
 
 	argv = argv_split(GFP_KERNEL, kbuf, &argc);
 	if (!argv) {
@@ -245,11 +257,9 @@ static ssize_t kutf_kp_unreg_debugfs_write(struct file *file, const char __user
 	if (count >= KUTF_KP_WRITE_BUFSIZE)
 		return -EINVAL;
 
-	kbuf = memdup_user(user_buf, count);
-	if (IS_ERR(kbuf)) {
+	kbuf = memdup_user_nul(user_buf, count);
+	if (IS_ERR(kbuf))
 		return -ENOMEM;
-	}
-	kbuf[count - 1] = '\0';
 
 	argv = argv_split(GFP_KERNEL, kbuf, &argc);
 	if (!argv) {
diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c
index 6c343cf9f73b..0598d4397e2a 100644
--- a/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c
+++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -425,7 +425,7 @@ static const char *kutf_clk_trace_do_get_platform(struct kutf_context *context,
 	const void *arbiter_if_node = NULL;
 	const void *power_node = NULL;
 	const char *platform = "GPU";
-#if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF)
+#if defined(CONFIG_OF)
 	struct kutf_clk_rate_trace_fixture_data *data = context->fixture;
 
 	arbiter_if_node = of_get_property(data->kbdev->dev->of_node, "arbiter-if", NULL);
diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/mali_kutf_mgm_integration_test_main.c b/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/mali_kutf_mgm_integration_test_main.c
index 8937d69f182f..f341a411324e 100644
--- a/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/mali_kutf_mgm_integration_test_main.c
+++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/mali_kutf_mgm_integration_test_main.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2022-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -95,7 +95,9 @@ static void mali_kutf_mgm_pte_translation_test(struct kutf_context *context)
 				data->group_id, mmu_level, original_pte);
 
 			translated_pte = mgm_dev->ops.mgm_update_gpu_pte(mgm_dev, data->group_id,
-									 mmu_level, original_pte);
+									 PBHA_ID_DEFAULT,
+									 PTE_FLAGS_NONE, mmu_level,
+									 original_pte);
 			if (translated_pte == original_pte) {
 				snprintf(
 					msg_buf, sizeof(msg_buf),
diff --git a/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c b/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c
index 1592eab806ac..cfb347affa2e 100644
--- a/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c
+++ b/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c
@@ -20,18 +20,169 @@
  * kbase_context_get_unmapped_area() interface.
  */
 
+#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
+/**
+ * move_mt_gap() -  Search the maple tree for an existing gap of a particular size
+ *                  immediately before another pre-identified gap.
+ * @gap_start:      Pre-identified gap starting address.
+ * @gap_end:        Pre-identified gap ending address.
+ * @size:           Size of the new gap needed before gap_start.
+ *
+ * This function will search the calling process' maple tree
+ * for another gap, one that is immediately preceding the pre-identified
+ * gap, for a specific size, and upon success it will decrement gap_end
+ * by the specified size, and replace gap_start with the new gap_start of
+ * the newly identified gap.
+ *
+ * Return: true if large enough preceding gap is found, false otherwise.
+ */
+static bool move_mt_gap(unsigned long *gap_start, unsigned long *gap_end, unsigned long size)
+{
+	unsigned long new_gap_start, new_gap_end;
+
+	MA_STATE(mas, &current->mm->mm_mt, 0, 0);
+
+	if (*gap_end < size)
+		return false;
+
+	/* Calculate the gap end for the new, resultant gap */
+	new_gap_end = *gap_end - size;
+
+	/* If the new gap_end (i.e. new VA start address) is larger than gap_start, than the
+	 * pre-identified gap already has space to shrink to accommodate the decrease in
+	 * gap_end.
+	 */
+	if (new_gap_end >= *gap_start) {
+		/* Pre-identified gap already has space - just patch gap_end to new
+		 * lower value and exit.
+		 */
+		*gap_end = new_gap_end;
+		return true;
+	}
+
+	/* Since the new VA start address (new_gap_end) is below the start of the pre-identified
+	 * gap in the maple tree, see if there is a free gap directly before the existing gap, of
+	 * the same size as the alignment shift, such that the effective gap found is "extended".
+	 * This may be larger than needed but leaves the same distance between gap_end and gap_start
+	 * that currently exists.
+	 */
+	new_gap_start = *gap_start - size;
+	if (mas_empty_area_rev(&mas, new_gap_start, *gap_start - 1, size)) {
+		/* There's no gap between the new start address needed and the
+		 * current start address - so return false to find a new
+		 * gap from the maple tree.
+		 */
+		return false;
+	}
+	/* Suitable gap found - replace gap_start and gap_end with new values. gap_start takes the
+	 * value of the start of new gap found, which now correctly precedes gap_end, and gap_end
+	 * takes on the new aligned value that has now been decremented by the requested size.
+	 */
+	*gap_start = mas.index;
+	*gap_end = new_gap_end;
+	return true;
+}
+
 /**
  * align_and_check() - Align the specified pointer to the provided alignment and
- *                     check that it is still in range.
- * @gap_end:        Highest possible start address for allocation (end of gap in
- *                  address space)
- * @gap_start:      Start address of current memory area / gap in address space
- * @info:           vm_unmapped_area_info structure passed to caller, containing
- *                  alignment, length and limits for the allocation
- * @is_shader_code: True if the allocation is for shader code (which has
- *                  additional alignment requirements)
- * @is_same_4gb_page: True if the allocation needs to reside completely within
- *                    a 4GB chunk
+ *                     check that it is still in range. On kernel 6.1 onwards
+ *                     this function does not require that the initial requested
+ *                     gap is extended with the maximum size needed to guarantee
+ *                     an alignment.
+ * @gap_end:           Highest possible start address for allocation (end of gap in
+ *                     address space)
+ * @gap_start:         Start address of current memory area / gap in address space
+ * @info:              vm_unmapped_area_info structure passed to caller, containing
+ *                     alignment, length and limits for the allocation
+ * @is_shader_code:    True if the allocation is for shader code (which has
+ *                     additional alignment requirements)
+ * @is_same_4gb_page:  True if the allocation needs to reside completely within
+ *                     a 4GB chunk
+ *
+ * Return: true if gap_end is now aligned correctly and is still in range,
+ *         false otherwise
+ */
+static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
+			    struct vm_unmapped_area_info *info, bool is_shader_code,
+			    bool is_same_4gb_page)
+{
+	unsigned long alignment_shift;
+
+	/* Compute highest gap address at the desired alignment */
+	*gap_end -= info->length;
+	alignment_shift = (*gap_end - info->align_offset) & info->align_mask;
+
+	/* Align desired start VA (gap_end) by calculated alignment shift amount */
+	if (!move_mt_gap(&gap_start, gap_end, alignment_shift))
+		return false;
+	/* Alignment is done so far - check for further alignment requirements */
+
+	if (is_shader_code) {
+		/* Shader code allocations must not start or end on a 4GB boundary */
+		alignment_shift = info->align_offset ? info->align_offset : info->length;
+		if (0 == (*gap_end & BASE_MEM_MASK_4GB)) {
+			if (!move_mt_gap(&gap_start, gap_end, alignment_shift))
+				return false;
+		}
+		if (0 == ((*gap_end + info->length) & BASE_MEM_MASK_4GB)) {
+			if (!move_mt_gap(&gap_start, gap_end, alignment_shift))
+				return false;
+		}
+
+		if (!(*gap_end & BASE_MEM_MASK_4GB) ||
+		    !((*gap_end + info->length) & BASE_MEM_MASK_4GB))
+			return false;
+	} else if (is_same_4gb_page) {
+		unsigned long start = *gap_end;
+		unsigned long end = *gap_end + info->length;
+		unsigned long mask = ~((unsigned long)U32_MAX);
+
+		/* Check if 4GB boundary is straddled */
+		if ((start & mask) != ((end - 1) & mask)) {
+			unsigned long offset = end - (end & mask);
+			/* This is to ensure that alignment doesn't get
+			 * disturbed in an attempt to prevent straddling at
+			 * 4GB boundary. The GPU VA is aligned to 2MB when the
+			 * allocation size is > 2MB and there is enough CPU &
+			 * GPU virtual space.
+			 */
+			unsigned long rounded_offset = ALIGN(offset, info->align_mask + 1);
+
+			if (!move_mt_gap(&gap_start, gap_end, rounded_offset))
+				return false;
+			/* Re-calculate start and end values */
+			start = *gap_end;
+			end = *gap_end + info->length;
+
+			/* The preceding 4GB boundary shall not get straddled,
+			 * even after accounting for the alignment, as the
+			 * size of allocation is limited to 4GB and the initial
+			 * start location was already aligned.
+			 */
+			WARN_ON((start & mask) != ((end - 1) & mask));
+		}
+	}
+
+	if ((*gap_end < info->low_limit) || (*gap_end < gap_start))
+		return false;
+
+	return true;
+}
+#else
+/**
+ * align_and_check() - Align the specified pointer to the provided alignment and
+ *                     check that it is still in range. For Kernel versions below
+ *                     6.1, it requires that the length of the alignment is already
+ *                     extended by a worst-case alignment mask.
+ * @gap_end:           Highest possible start address for allocation (end of gap in
+ *                     address space)
+ * @gap_start:         Start address of current memory area / gap in address space
+ * @info:              vm_unmapped_area_info structure passed to caller, containing
+ *                     alignment, length and limits for the allocation
+ * @is_shader_code:    True if the allocation is for shader code (which has
+ *                     additional alignment requirements)
+ * @is_same_4gb_page:  True if the allocation needs to reside completely within
+ *                     a 4GB chunk
  *
  * Return: true if gap_end is now aligned correctly and is still in range,
  *         false otherwise
@@ -41,8 +192,8 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
 			    bool is_same_4gb_page)
 {
 	/* Compute highest gap address at the desired alignment */
-	(*gap_end) -= info->length;
-	(*gap_end) -= (*gap_end - info->align_offset) & info->align_mask;
+	*gap_end -= info->length;
+	*gap_end -= (*gap_end - info->align_offset) & info->align_mask;
 
 	if (is_shader_code) {
 		/* Check for 4GB boundary */
@@ -73,6 +224,7 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
 			start -= rounded_offset;
 			end -= rounded_offset;
 
+			/* Patch gap_end to use new starting address for VA region */
 			*gap_end = start;
 
 			/* The preceding 4GB boundary shall not get straddled,
@@ -89,6 +241,7 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
 
 	return true;
 }
+#endif
 
 /**
  * kbase_unmapped_area_topdown() - allocates new areas top-down from
@@ -218,31 +371,27 @@ check_current:
 		}
 	}
 #else
-	unsigned long length, high_limit, gap_start, gap_end;
+	unsigned long high_limit, gap_start, gap_end;
 
 	MA_STATE(mas, &current->mm->mm_mt, 0, 0);
-	/* Adjust search length to account for worst case alignment overhead */
-	length = info->length + info->align_mask;
-	if (length < info->length)
-		return -ENOMEM;
 
 	/*
 	 * Adjust search limits by the desired length.
 	 * See implementation comment at top of unmapped_area().
 	 */
 	gap_end = info->high_limit;
-	if (gap_end < length)
+	if (gap_end < info->length)
 		return -ENOMEM;
-	high_limit = gap_end - length;
+	high_limit = gap_end - info->length;
 
 	if (info->low_limit > high_limit)
 		return -ENOMEM;
 
 	while (true) {
-		if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1, length))
+		if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1, info->length))
 			return -ENOMEM;
 		gap_end = mas.last + 1;
-		gap_start = mas.min;
+		gap_start = mas.index;
 
 		if (align_and_check(&gap_end, gap_start, info, is_shader_code, is_same_4gb_page))
 			return gap_end;
@@ -368,7 +517,7 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx,
 		kbase_gpu_vm_unlock(kctx);
 #ifndef CONFIG_64BIT
 	} else {
-		return current->mm->get_unmapped_area(kctx->kfile->filp, addr, len, pgoff, flags);
+		return current->mm->get_unmapped_area(kctx->filp, addr, len, pgoff, flags);
 #endif
 	}
 
diff --git a/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_csf.c b/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_csf.c
index a91278dd3bef..f254aa84dc20 100644
--- a/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_csf.c
+++ b/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_csf.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -35,7 +35,7 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev)
 	u32 const num_sb_entries = kbdev->gpu_props.gpu_id.arch_major >= 11 ? 16 : 8;
 	u32 const supports_gpu_sleep =
 #ifdef KBASE_PM_RUNTIME
-		kbdev->pm.backend.gpu_sleep_supported;
+		test_bit(KBASE_GPU_SUPPORTS_GPU_SLEEP, &kbdev->pm.backend.gpu_sleep_allowed);
 #else
 		false;
 #endif /* KBASE_PM_RUNTIME */
diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c
index d98e22880419..719e26124409 100644
--- a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c
+++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -29,6 +29,7 @@
 #include <linux/poll.h>
 #include <linux/version_compat_defs.h>
 #include <linux/anon_inodes.h>
+#include <linux/overflow.h>
 
 /* Explicitly include epoll header for old kernels. Not required from 4.16. */
 #if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE
@@ -169,7 +170,7 @@ static inline int copy_stream_header(char __user *buffer, size_t size, ssize_t *
 				     const char *hdr, size_t hdr_size, size_t *hdr_btc)
 {
 	const size_t offset = hdr_size - *hdr_btc;
-	const size_t copy_size = MIN((size_t)((ssize_t)size - *copy_len), *hdr_btc);
+	const size_t copy_size = MIN(size_sub((ssize_t)size, *copy_len), *hdr_btc);
 
 	if (!*hdr_btc)
 		return 0;
diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c
index 742735846d49..d4465c44addb 100644
--- a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c
+++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -62,6 +62,7 @@ enum tl_msg_id_obj {
 	KBASE_TL_EVENT_ATOM_SOFTJOB_START,
 	KBASE_TL_EVENT_ATOM_SOFTJOB_END,
 	KBASE_TL_ARBITER_GRANTED,
+	KBASE_TL_ARBITER_LOST,
 	KBASE_TL_ARBITER_STARTED,
 	KBASE_TL_ARBITER_STOP_REQUESTED,
 	KBASE_TL_ARBITER_STOPPED,
@@ -272,6 +273,10 @@ enum tl_msg_id_obj {
 		"Arbiter has granted gpu access", \
 		"@p", \
 		"gpu") \
+	TRACEPOINT_DESC(KBASE_TL_ARBITER_LOST, \
+		"Received a gpu lost event from the arbiter", \
+		"@p", \
+		"gpu") \
 	TRACEPOINT_DESC(KBASE_TL_ARBITER_STARTED, \
 		"Driver is running again and able to process jobs", \
 		"@p", \
@@ -1546,6 +1551,29 @@ void __kbase_tlstream_tl_arbiter_granted(
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
+void __kbase_tlstream_tl_arbiter_lost(
+	struct kbase_tlstream *stream,
+	const void *gpu
+)
+{
+	const u32 msg_id = KBASE_TL_ARBITER_LOST;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(gpu)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &gpu, sizeof(gpu));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
 void __kbase_tlstream_tl_arbiter_started(
 	struct kbase_tlstream *stream,
 	const void *gpu
diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h
index b2cbfe6e528d..6dd4b44ea6b2 100644
--- a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h
+++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -271,6 +271,11 @@ void __kbase_tlstream_tl_arbiter_granted(
 	const void *gpu
 );
 
+void __kbase_tlstream_tl_arbiter_lost(
+	struct kbase_tlstream *stream,
+	const void *gpu
+);
+
 void __kbase_tlstream_tl_arbiter_started(
 	struct kbase_tlstream *stream,
 	const void *gpu
@@ -1550,6 +1555,25 @@ struct kbase_tlstream;
 				);	\
 	} while (0)
 
+/**
+ * KBASE_TLSTREAM_TL_ARBITER_LOST - Received a gpu lost event from the arbiter
+ *
+ * @kbdev: Kbase device
+ * @gpu: Name of the GPU object
+ */
+#define KBASE_TLSTREAM_TL_ARBITER_LOST(	\
+	kbdev,	\
+	gpu	\
+	)	\
+	do {	\
+		u32 enabled = (u32)atomic_read(&kbdev->timeline_flags);	\
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_arbiter_lost(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				gpu	\
+				);	\
+	} while (0)
+
 /**
  * KBASE_TLSTREAM_TL_ARBITER_STARTED - Driver is running again and able to process jobs
  *
diff --git a/drivers/hwtracing/coresight/mali/Makefile b/drivers/hwtracing/coresight/mali/Makefile
index 923cb0c910d9..d8186bee6e64 100644
--- a/drivers/hwtracing/coresight/mali/Makefile
+++ b/drivers/hwtracing/coresight/mali/Makefile
@@ -79,9 +79,9 @@ ifeq ($(MALI_KCONFIG_EXT_PREFIX),)
 endif
 
 EXTRA_SYMBOLS += \
-    $(M)/../../../base/arm/Module.symvers \
     $(GPU_SYMBOLS)
 
+
 # The following were added to align with W=1 in scripts/Makefile.extrawarn
 # from the Linux source tree
 CFLAGS_MODULE += -Wall -Werror
@@ -99,6 +99,8 @@ CFLAGS_MODULE += $(call cc-option, -Wstringop-truncation)
 CFLAGS_MODULE += -Wno-missing-field-initializers
 CFLAGS_MODULE += -Wno-sign-compare
 CFLAGS_MODULE += -Wno-type-limits
+# The following ensures the stack frame does not get larger than a page
+CFLAGS_MODULE += -Wframe-larger-than=4096
 
 KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1
 
diff --git a/drivers/hwtracing/coresight/mali/build.bp b/drivers/hwtracing/coresight/mali/build.bp
index 33dcd22fa364..d69148c8cb70 100644
--- a/drivers/hwtracing/coresight/mali/build.bp
+++ b/drivers/hwtracing/coresight/mali/build.bp
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2022-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
diff --git a/drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.c b/drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.c
index e6d2dc71096b..247a8b47f05b 100644
--- a/drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.c
+++ b/drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -50,7 +50,11 @@ static void coresight_mali_disable_source(struct coresight_device *csdev, struct
 }
 
 static const struct coresight_ops_source coresight_mali_source_ops = {
+#if KERNEL_VERSION(6, 3, 0) <= LINUX_VERSION_CODE
+	.cpu_id = coresight_mali_source_trace_id,
+#else
 	.trace_id = coresight_mali_source_trace_id,
+#endif
 	.enable = coresight_mali_enable_source,
 	.disable = coresight_mali_disable_source
 };
diff --git a/drivers/hwtracing/coresight/mali/sources/itm/coresight_mali_source_itm_core.c b/drivers/hwtracing/coresight/mali/sources/itm/coresight_mali_source_itm_core.c
index 59d5cd314c2f..727e5c7a552a 100644
--- a/drivers/hwtracing/coresight/mali/sources/itm/coresight_mali_source_itm_core.c
+++ b/drivers/hwtracing/coresight/mali/sources/itm/coresight_mali_source_itm_core.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2022-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -85,14 +85,14 @@ static struct kbase_debug_coresight_csf_op dwt_itm_enable_ops[] = {
 };
 
 static struct kbase_debug_coresight_csf_op dwt_itm_disable_ops[] = {
-	// Disable ITM/DWT functionality via DEMCR register
-	WRITE_IMM_OP(CS_SCS_BASE_ADDR + SCS_DEMCR, 0x00000000),
 	// Unlock ITM configuration
 	WRITE_IMM_OP(CS_ITM_BASE_ADDR + CORESIGHT_LAR, CS_MALI_UNLOCK_COMPONENT),
 	// Check ITM is disabled
 	POLL_OP(CS_ITM_BASE_ADDR + ITM_TCR, ITM_TCR_BUSY_BIT, 0x0),
 	// Lock
 	WRITE_IMM_OP(CS_ITM_BASE_ADDR + CORESIGHT_LAR, 0x00000000),
+	// Disable ITM/DWT functionality via DEMCR register
+	WRITE_IMM_OP(CS_SCS_BASE_ADDR + SCS_DEMCR, 0x00000000),
 	// Set enabled bit off at the end of sequence
 	BIT_AND_OP(&itm_state.enabled, 0x0),
 };
diff --git a/drivers/xen/arm/Makefile b/drivers/xen/arm/Makefile
index b2ee53723428..27bee59ac787 100644
--- a/drivers/xen/arm/Makefile
+++ b/drivers/xen/arm/Makefile
@@ -78,6 +78,8 @@ CFLAGS_MODULE += $(call cc-option, -Wstringop-truncation)
 CFLAGS_MODULE += -Wno-missing-field-initializers
 CFLAGS_MODULE += -Wno-sign-compare
 CFLAGS_MODULE += -Wno-type-limits
+# The following ensures the stack frame does not get larger than a page
+CFLAGS_MODULE += -Wframe-larger-than=4096
 
 KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1
 
diff --git a/include/linux/mali_arbiter_interface.h b/include/linux/mali_arbiter_interface.h
index b4162f86ebb4..ae44e82ae6dd 100644
--- a/include/linux/mali_arbiter_interface.h
+++ b/include/linux/mali_arbiter_interface.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -26,6 +26,8 @@
 #ifndef _MALI_KBASE_ARBITER_INTERFACE_H_
 #define _MALI_KBASE_ARBITER_INTERFACE_H_
 
+#include <linux/device.h>
+
 /**
  * DOC: Mali arbiter interface version
  *
diff --git a/include/linux/mali_hw_access.h b/include/linux/mali_hw_access.h
new file mode 100644
index 000000000000..106393fc3372
--- /dev/null
+++ b/include/linux/mali_hw_access.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _MALI_HW_ACCESS_H_
+#define _MALI_HW_ACCESS_H_
+
+#include <asm/arch_timer.h>
+#include <linux/io.h>
+
+
+#define mali_readl(addr) readl(addr)
+
+#define mali_writel(val, addr) writel(val, addr)
+
+#define mali_ioremap(addr, size) ioremap(addr, size)
+
+#define mali_iounmap(addr) iounmap(addr)
+
+#define mali_arch_timer_get_cntfrq() arch_timer_get_cntfrq()
+
+
+#define mali_readq(addr) ((u64)mali_readl(addr) | ((u64)mali_readl(addr + 4) << 32))
+
+static inline u64 mali_readq_coherent(const void __iomem *addr)
+{
+	u32 hi1, hi2, lo;
+
+	do {
+		hi1 = mali_readl(addr + 4);
+		lo = mali_readl(addr);
+		hi2 = mali_readl(addr + 4);
+	} while (hi1 != hi2);
+
+	return lo | (((u64)hi1) << 32);
+}
+
+#define mali_writeq(val, addr)                                \
+	do {                                                  \
+		u64 __val = (u64)val;                         \
+		mali_writel((u32)(__val & 0xFFFFFFFF), addr); \
+		mali_writel((u32)(__val >> 32), addr + 4);    \
+	} while (0)
+
+#endif /* _MALI_HW_ACCESS_H_ */
diff --git a/include/linux/memory_group_manager.h b/include/linux/memory_group_manager.h
index 3820f1bff86b..ec55d74f56ad 100644
--- a/include/linux/memory_group_manager.h
+++ b/include/linux/memory_group_manager.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -32,9 +32,19 @@ typedef int vm_fault_t;
 
 #define MEMORY_GROUP_MANAGER_NR_GROUPS (16)
 
+#define PTE_PBHA_SHIFT (59)
+#define PTE_PBHA_MASK ((uint64_t)0xf << PTE_PBHA_SHIFT)
+#define PTE_RES_BIT_MULTI_AS_SHIFT (63)
+#define PTE_FLAGS_NONE (0)
+#define PBHA_ID_DEFAULT (0)
+
 struct memory_group_manager_device;
 struct memory_group_manager_import_data;
 
+enum mgm_pte_flags {
+	MMA_VIOLATION = 0,
+};
+
 /**
  * struct memory_group_manager_ops - Callbacks for memory group manager
  *                                   operations
@@ -46,6 +56,8 @@ struct memory_group_manager_import_data;
  * @mgm_pte_to_original_pte:  Callback to get the original PTE entry as given
  *                            to mgm_update_gpu_pte
  * @mgm_vmf_insert_pfn_prot:  Callback to map a physical memory page for the CPU
+ * @mgm_get_import_memory_cached_access_permitted: Callback to query if a given imported
+ *                            memory is allowed to be accessed as cached or not by the GPU
  */
 struct memory_group_manager_ops {
 	/*
@@ -115,6 +127,11 @@ struct memory_group_manager_ops {
 	 * @group_id:  A physical memory group ID. The meaning of this is
 	 *             defined by the systems integrator. Its valid range is
 	 *             0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1.
+	 *
+	 * @pbha_id:   PBHA Overrride ID to encode into the PTE
+	 * @pte_flags: PTE related flags, defined in enum mgm_pte_flags
+	 *
+	 *
 	 * @mmu_level: The level of the page table entry in @ate.
 	 * @pte:       The page table entry to modify, in LPAE or AArch64 format
 	 *             (depending on the driver's configuration). This should be
@@ -124,13 +141,14 @@ struct memory_group_manager_ops {
 	 * This function allows the memory group manager to modify a GPU page
 	 * table entry before it is stored by the kbase module (controller
 	 * driver). It may set certain bits in the page table entry attributes
-	 * or modify the physical address, based on the physical memory group ID
-	 * and/or additional data in struct memory_group_manager_device.
+	 * or modify the physical address, based on the physical memory group ID,
+	 * PBHA ID, PTE flags and/or additional data in struct memory_group_manager_device.
 	 *
 	 * Return: A modified GPU page table entry to be stored in a page table.
 	 */
 	u64 (*mgm_update_gpu_pte)(struct memory_group_manager_device *mgm_dev,
-				  unsigned int group_id, int mmu_level, u64 pte);
+				  unsigned int group_id, unsigned int pbha_id,
+				  unsigned int pte_flags, int mmu_level, u64 pte);
 
 	/*
 	 * mgm_pte_to_original_pte - Undo any modification done during mgm_update_gpu_pte()
@@ -178,6 +196,20 @@ struct memory_group_manager_ops {
 					      unsigned int group_id, struct vm_area_struct *vma,
 					      unsigned long addr, unsigned long pfn,
 					      pgprot_t pgprot);
+
+	/*
+	 * mgm_get_import_memory_cached_access_permitted - Check if a given imported memory
+	 *                            is allowed to be accessed as cached or not by the GPU
+	 *
+	 * @mgm_dev:     The memory group manager through which the request
+	 *               is being made.
+	 * @import_data: Pointer to the data which describes imported memory.
+	 *
+	 * Return: true if cached access is permitted, false otherwise
+	 */
+	bool (*mgm_get_import_memory_cached_access_permitted)(
+		struct memory_group_manager_device *mgm_dev,
+		struct memory_group_manager_import_data *import_data);
 };
 
 /**
diff --git a/include/linux/version_compat_defs.h b/include/linux/version_compat_defs.h
index 9144b719b08d..3a5b5fe405ee 100644
--- a/include/linux/version_compat_defs.h
+++ b/include/linux/version_compat_defs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2022-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -26,6 +26,9 @@
 #include <linux/highmem.h>
 #include <linux/timer.h>
 #include <linux/iopoll.h>
+#include <linux/bitmap.h>
+#include <linux/math64.h>
+#include <linux/moduleparam.h>
 
 #if (KERNEL_VERSION(4, 4, 267) < LINUX_VERSION_CODE)
 #include <linux/overflow.h>
@@ -176,6 +179,7 @@ static inline void kbase_kunmap_atomic(void *address)
  */
 #define check_mul_overflow(a, b, d) __builtin_mul_overflow(a, b, d)
 #define check_add_overflow(a, b, d) __builtin_add_overflow(a, b, d)
+#define check_sub_overflow(a, b, d) __builtin_sub_overflow(a, b, d)
 #endif
 
 /*
@@ -337,4 +341,173 @@ static inline long kbase_pin_user_pages_remote(struct task_struct *tsk, struct m
 
 #endif /* (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) */
 
+#if (KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE)
+/* Null definition */
+#define ALLOW_ERROR_INJECTION(fname, err_type)
+#endif /* (KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE) */
+
+#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE
+#define KBASE_REGISTER_SHRINKER(reclaim, name, priv_data) register_shrinker(reclaim)
+
+/* clang-format off */
+#elif ((KERNEL_VERSION(6, 7, 0) > LINUX_VERSION_CODE) && \
+	!(defined(__ANDROID_COMMON_KERNEL__) && (KERNEL_VERSION(6, 6, 0) == LINUX_VERSION_CODE)))
+/* clang-format on */
+#define KBASE_REGISTER_SHRINKER(reclaim, name, priv_data) register_shrinker(reclaim, name)
+
+#else
+#define KBASE_REGISTER_SHRINKER(reclaim, name, priv_data) \
+	do {                                              \
+		reclaim->private_data = priv_data;        \
+		shrinker_register(reclaim);               \
+	} while (0)
+
+#endif /* KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE */
+/* clang-format off */
+#if ((KERNEL_VERSION(6, 7, 0) > LINUX_VERSION_CODE) && \
+	!(defined(__ANDROID_COMMON_KERNEL__) && (KERNEL_VERSION(6, 6, 0) == LINUX_VERSION_CODE)))
+/* clang-format on */
+#define KBASE_UNREGISTER_SHRINKER(reclaim) unregister_shrinker(&reclaim)
+#define KBASE_GET_KBASE_DATA_FROM_SHRINKER(s, type, var) container_of(s, type, var)
+#define DEFINE_KBASE_SHRINKER struct shrinker
+#define KBASE_INIT_RECLAIM(var, attr, name) (&((var)->attr))
+#define KBASE_SET_RECLAIM(var, attr, reclaim) ((var)->attr = (*reclaim))
+
+#else
+#define KBASE_UNREGISTER_SHRINKER(reclaim) shrinker_free(reclaim)
+#define KBASE_GET_KBASE_DATA_FROM_SHRINKER(s, type, var) s->private_data
+#define DEFINE_KBASE_SHRINKER struct shrinker *
+#define KBASE_SHRINKER_ALLOC(name) shrinker_alloc(0, name)
+#define KBASE_INIT_RECLAIM(var, attr, name) (KBASE_SHRINKER_ALLOC(name))
+#define KBASE_SET_RECLAIM(var, attr, reclaim) ((var)->attr = reclaim)
+
+#endif
+
+static inline int kbase_param_set_uint_minmax(const char *val, const struct kernel_param *kp,
+					      unsigned int min, unsigned int max)
+{
+#if (KERNEL_VERSION(5, 15, 0) > LINUX_VERSION_CODE)
+	uint uint_val;
+	int ret;
+
+	if (!val)
+		return -EINVAL;
+
+	ret = kstrtouint(val, 0, &uint_val);
+
+	if (ret == 0) {
+		if (uint_val < min || uint_val > max)
+			return -EINVAL;
+
+		*((uint *)kp->arg) = uint_val;
+	}
+
+	return ret;
+#else
+	return param_set_uint_minmax(val, kp, min, max);
+#endif
+}
+
+#if (KERNEL_VERSION(4, 20, 0) <= LINUX_VERSION_CODE)
+#include <linux/compiler_attributes.h>
+#endif
+#ifndef __maybe_unused
+#define __maybe_unused __attribute__((unused))
+#endif
+
+#if KERNEL_VERSION(5, 4, 103) <= LINUX_VERSION_CODE
+#define mali_sysfs_emit(buf, fmt, ...) sysfs_emit(buf, fmt, __VA_ARGS__)
+#else
+#define mali_sysfs_emit(buf, fmt, ...) scnprintf(buf, PAGE_SIZE, fmt, __VA_ARGS__)
+#endif
+
+#if KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE
+#include <linux/devfreq.h>
+#include <linux/of_platform.h>
+
+static inline struct devfreq *devfreq_get_devfreq_by_node(struct device_node *node)
+{
+	struct platform_device *pdev = of_find_device_by_node(node);
+
+	if (!pdev || !node)
+		return NULL;
+
+	return devfreq_get_devfreq_by_phandle(&pdev->dev, 0);
+}
+#endif
+
+#if (KERNEL_VERSION(5, 16, 0) <= LINUX_VERSION_CODE &&       \
+	KERNEL_VERSION(5, 18, 0) > LINUX_VERSION_CODE) ||       \
+	(KERNEL_VERSION(5, 11, 0) <= LINUX_VERSION_CODE &&   \
+	KERNEL_VERSION(5, 15, 85) >= LINUX_VERSION_CODE) || \
+	(KERNEL_VERSION(5, 10, 200) >= LINUX_VERSION_CODE)
+/*
+ * Kernel revisions
+ *  - up to 5.10.200
+ *  - between 5.11.0 and 5.15.85 inclusive
+ *  - between 5.16.0 and 5.17.15 inclusive
+ * do not provide an implementation of
+ * size_add, size_sub and size_mul.
+ * The implementations below provides
+ * backward compatibility implementations of these functions.
+ */
+
+static inline size_t __must_check size_mul(size_t factor1, size_t factor2)
+{
+	size_t ret_val;
+
+	if (check_mul_overflow(factor1, factor2, &ret_val))
+		return SIZE_MAX;
+	return ret_val;
+}
+
+static inline size_t __must_check size_add(size_t addend1, size_t addend2)
+{
+	size_t ret_val;
+
+	if (check_add_overflow(addend1, addend2, &ret_val))
+		return SIZE_MAX;
+	return ret_val;
+}
+
+static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
+{
+	size_t ret_val;
+
+	if (minuend == SIZE_MAX || subtrahend == SIZE_MAX ||
+	    check_sub_overflow(minuend, subtrahend, &ret_val))
+		return SIZE_MAX;
+	return ret_val;
+}
+#endif
+
+#if KERNEL_VERSION(5, 5, 0) > LINUX_VERSION_CODE
+static inline unsigned long bitmap_get_value8(const unsigned long *map, unsigned long start)
+{
+	const size_t index = BIT_WORD(start);
+	const unsigned long offset = start % BITS_PER_LONG;
+
+	return (map[index] >> offset) & 0xFF;
+}
+
+static inline unsigned long find_next_clump8(unsigned long *clump, const unsigned long *addr,
+					     unsigned long size, unsigned long offset)
+{
+	offset = find_next_bit(addr, size, offset);
+	if (offset == size)
+		return size;
+
+	offset = round_down(offset, 8);
+	*clump = bitmap_get_value8(addr, offset);
+
+	return offset;
+}
+
+#define find_first_clump8(clump, bits, size) find_next_clump8((clump), (bits), (size), 0)
+
+#define for_each_set_clump8(start, clump, bits, size)                                 \
+	for ((start) = find_first_clump8(&(clump), (bits), (size)); (start) < (size); \
+	     (start) = find_next_clump8(&(clump), (bits), (size), (start) + 8))
+#endif
+
 #endif /* _VERSION_COMPAT_DEFS_H_ */
diff --git a/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h b/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h
index 564f477e57d1..b80817f04255 100644
--- a/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h
+++ b/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
diff --git a/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h b/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h
index 0fb824267184..2b2fd1dd7bc8 100644
--- a/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h
+++ b/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -46,7 +46,11 @@
  */
 #define BASE_MEM_CSF_EVENT ((base_mem_alloc_flags)1 << 19)
 
-#define BASE_MEM_RESERVED_BIT_20 ((base_mem_alloc_flags)1 << 20)
+/* Unused bit for CSF, only used in JM for BASE_MEM_TILER_ALIGN_TOP */
+#define BASE_MEM_UNUSED_BIT_20 ((base_mem_alloc_flags)1 << 20)
+
+/* Unused bit for CSF, only used in JM for BASE_MEM_FLAG_MAP_FIXED */
+#define BASE_MEM_UNUSED_BIT_27 ((base_mem_alloc_flags)1 << 27)
 
 /* Must be FIXABLE memory: its GPU VA will be determined at a later point,
  * at which time it will be at a fixed GPU VA.
@@ -57,14 +61,21 @@
  * must be less than BASE_MEM_FLAGS_NR_BITS !!!
  */
 
-/* A mask of all the flags which are only valid for allocations within kbase,
- * and may not be passed from user space.
+/* A mask of all the flags which are only valid within kbase,
+ * and may not be passed to/from user space.
  */
 #define BASEP_MEM_FLAGS_KERNEL_ONLY (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE)
 
-/* A mask of all currently reserved flags
+/* A mask of flags that, when provied, cause other flags to be
+ * enabled but are not enabled themselves
  */
-#define BASE_MEM_FLAGS_RESERVED BASE_MEM_RESERVED_BIT_20
+#define BASE_MEM_FLAGS_ACTION_MODIFIERS (BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_IMPORT_SHARED)
+
+/* A mask of all currently reserved flags */
+#define BASE_MEM_FLAGS_RESERVED ((base_mem_alloc_flags)0)
+
+/* A mask of all bits that are not used by a flag on CSF */
+#define BASE_MEM_FLAGS_UNUSED (BASE_MEM_UNUSED_BIT_20 | BASE_MEM_UNUSED_BIT_27)
 
 /* Special base mem handles specific to CSF.
  */
@@ -474,7 +485,26 @@ struct base_gpu_queue_error_fatal_payload {
 };
 
 /**
- * enum base_gpu_queue_group_error_type - GPU Fatal error type.
+ * struct base_gpu_queue_error_fault_payload - Recoverable fault
+ *        error information related to GPU command queue.
+ *
+ * @sideband:     Additional information about this recoverable fault.
+ * @status:       Recoverable fault information.
+ *                This consists of exception type (least significant byte) and
+ *                data (remaining bytes). One example of exception type is
+ *                INSTR_INVALID_PC (0x50).
+ * @csi_index:    Index of the CSF interface the queue is bound to.
+ * @padding:      Padding to make multiple of 64bits
+ */
+struct base_gpu_queue_error_fault_payload {
+	__u64 sideband;
+	__u32 status;
+	__u8 csi_index;
+	__u8 padding[3];
+};
+
+/**
+ * enum base_gpu_queue_group_error_type - GPU error type.
  *
  * @BASE_GPU_QUEUE_GROUP_ERROR_FATAL:       Fatal error associated with GPU
  *                                          command queue group.
@@ -484,7 +514,9 @@ struct base_gpu_queue_error_fatal_payload {
  *                                          progress timeout.
  * @BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM: Fatal error due to running out
  *                                             of tiler heap memory.
- * @BASE_GPU_QUEUE_GROUP_ERROR_FATAL_COUNT: The number of fatal error types
+ * @BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FAULT: Fault error associated with GPU
+ *                                          command queue.
+ * @BASE_GPU_QUEUE_GROUP_ERROR_FATAL_COUNT: The number of GPU error types
  *
  * This type is used for &struct_base_gpu_queue_group_error.error_type.
  */
@@ -493,6 +525,7 @@ enum base_gpu_queue_group_error_type {
 	BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL,
 	BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT,
 	BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM,
+	BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FAULT,
 	BASE_GPU_QUEUE_GROUP_ERROR_FATAL_COUNT
 };
 
@@ -512,6 +545,7 @@ struct base_gpu_queue_group_error {
 	union {
 		struct base_gpu_queue_group_error_fatal_payload fatal_group;
 		struct base_gpu_queue_error_fatal_payload fatal_queue;
+		struct base_gpu_queue_error_fault_payload fault_queue;
 	} payload;
 };
 
@@ -519,8 +553,7 @@ struct base_gpu_queue_group_error {
  * enum base_csf_notification_type - Notification type
  *
  * @BASE_CSF_NOTIFICATION_EVENT:                 Notification with kernel event
- * @BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR: Notification with GPU fatal
- *                                               error
+ * @BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR: Notification with GPU error
  * @BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP:        Notification with dumping cpu
  *                                               queue
  * @BASE_CSF_NOTIFICATION_COUNT:                 The number of notification type
diff --git a/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h b/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h
index 537c90d6efa5..2b5b8b25fc2c 100644
--- a/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h
+++ b/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -95,15 +95,35 @@
  * 1.22:
  * - Add comp_pri_threshold and comp_pri_ratio attributes to
  *   kbase_ioctl_cs_queue_group_create.
+ * - Made the BASE_MEM_DONT_NEED memory flag queryable.
  * 1.23:
  * - Disallows changing the sharability on the GPU of imported dma-bufs to
  *   BASE_MEM_COHERENT_SYSTEM using KBASE_IOCTL_MEM_FLAGS_CHANGE.
  * 1.24:
  * - Implement full block state support for hardware counters.
+ * 1.25:
+ * - Add support for CS_FAULT reporting to userspace
+ * 1.26:
+ * - Made the BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP and BASE_MEM_KERNEL_SYNC memory
+ *   flags queryable.
+ * 1.27:
+ * - Implement support for HWC block state availability.
+ * 1.28:
+ * - Made the SAME_VA memory flag queryable.
+ * 1.29:
+ * - Re-allow child process to do supported file operations (like mmap, ioctl
+ *   read, poll) on the file descriptor of mali device that was inherited
+ *   from the parent process.
+ * 1.30:
+ * - Implement support for setting GPU Timestamp Offset register.
+ * 1.31:
+ * - Reject non-protected allocations containing the BASE_MEM_PROTECTED memory flag.
+ * - Reject allocations containing the BASE_MEM_DONT_NEED memory flag (it is only settable).
+ * - Reject allocations containing the BASE_MEM_UNUSED_BIT_xx memory flags.
  */
 
 #define BASE_UK_VERSION_MAJOR 1
-#define BASE_UK_VERSION_MINOR 24
+#define BASE_UK_VERSION_MINOR 31
 
 /**
  * struct kbase_ioctl_version_check - Check version compatibility between
@@ -340,6 +360,8 @@ union kbase_ioctl_cs_queue_group_create_1_18 {
  * @in.csi_handlers:  Flags to signal that the application intends to use CSI
  *                    exception handlers in some linear buffers to deal with
  *                    the given exception types.
+ * @in.cs_fault_report_enable:  Flag to indicate reporting of CS_FAULTs
+ *                    to userspace.
  * @in.padding:       Currently unused, must be zero
  * @out:              Output parameters
  * @out.group_handle: Handle of a newly created queue group.
@@ -360,7 +382,8 @@ union kbase_ioctl_cs_queue_group_create {
 		/**
 		 * @in.reserved:   Reserved, currently unused, must be zero.
 		 */
-		__u16 reserved;
+		__u8 reserved;
+		__u8 cs_fault_report_enable;
 		/**
 		 * @in.dvs_buf: buffer for deferred vertex shader
 		 */
@@ -480,7 +503,7 @@ union kbase_ioctl_cs_tiler_heap_init {
 
 /**
  * union kbase_ioctl_cs_tiler_heap_init_1_13 - Initialize chunked tiler memory heap,
- *                                             earlier version upto 1.13
+ *                                             earlier version up to 1.13
  * @in:                Input parameters
  * @in.chunk_size:     Size of each chunk.
  * @in.initial_chunks: Initial number of chunks that heap will be created with.
@@ -637,6 +660,22 @@ union kbase_ioctl_read_user_page {
 
 #define KBASE_IOCTL_READ_USER_PAGE _IOWR(KBASE_IOCTL_TYPE, 60, union kbase_ioctl_read_user_page)
 
+/**
+ * struct kbase_ioctl_queue_group_clear_faults - Re-enable CS FAULT reporting for the GPU queues
+ *
+ * @addr: CPU VA to an array of GPU VAs of the buffers backing the queues
+ * @nr_queues: Number of queues in the array
+ * @padding: Padding to round up to a multiple of 8 bytes, must be zero
+ */
+struct kbase_ioctl_queue_group_clear_faults {
+	__u64 addr;
+	__u32 nr_queues;
+	__u8 padding[4];
+};
+
+#define KBASE_IOCTL_QUEUE_GROUP_CLEAR_FAULTS \
+	_IOW(KBASE_IOCTL_TYPE, 61, struct kbase_ioctl_queue_group_clear_faults)
+
 /***************
  * test ioctls *
  ***************/
diff --git a/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h b/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h
index d3478546e244..d4d12aed780d 100644
--- a/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h
+++ b/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
diff --git a/include/uapi/gpu/arm/bifrost/jm/mali_base_jm_kernel.h b/include/uapi/gpu/arm/bifrost/jm/mali_base_jm_kernel.h
index 9478334ce667..fad61299b1c1 100644
--- a/include/uapi/gpu/arm/bifrost/jm/mali_base_jm_kernel.h
+++ b/include/uapi/gpu/arm/bifrost/jm/mali_base_jm_kernel.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -30,15 +30,11 @@
  * See base_mem_alloc_flags.
  */
 
-/* Used as BASE_MEM_FIXED in other backends */
-#define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8)
+/* Unused bit for JM, only used in CSF for BASE_MEM_FIXED */
+#define BASE_MEM_UNUSED_BIT_8 ((base_mem_alloc_flags)1 << 8)
 
-/**
- * BASE_MEM_RESERVED_BIT_19 - Bit 19 is reserved.
- *
- * Do not remove, use the next unreserved bit for new flags
- */
-#define BASE_MEM_RESERVED_BIT_19 ((base_mem_alloc_flags)1 << 19)
+/* Unused bit for JM, only used in CSF for BASE_CSF_EVENT */
+#define BASE_MEM_UNUSED_BIT_19 ((base_mem_alloc_flags)1 << 19)
 
 /**
  * BASE_MEM_TILER_ALIGN_TOP - Memory starting from the end of the initial commit is aligned
@@ -57,16 +53,23 @@
  * must be less than BASE_MEM_FLAGS_NR_BITS !!!
  */
 
-/* A mask of all the flags which are only valid for allocations within kbase,
- * and may not be passed from user space.
+/* A mask of all the flags which are only valid within kbase,
+ * and may not be passed to/from user space.
  */
 #define BASEP_MEM_FLAGS_KERNEL_ONLY                                                              \
 	(BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE | BASE_MEM_FLAG_MAP_FIXED | \
 	 BASEP_MEM_PERFORM_JIT_TRIM)
 
-/* A mask of all currently reserved flags
+/* A mask of flags that, when provied, cause other flags to be
+ * enabled but are not enabled themselves
  */
-#define BASE_MEM_FLAGS_RESERVED (BASE_MEM_RESERVED_BIT_8 | BASE_MEM_RESERVED_BIT_19)
+#define BASE_MEM_FLAGS_ACTION_MODIFIERS (BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_IMPORT_SHARED)
+
+/* A mask of all currently reserved flags */
+#define BASE_MEM_FLAGS_RESERVED ((base_mem_alloc_flags)0)
+
+/* A mask of all bits that are not used by a flag on JM */
+#define BASE_MEM_FLAGS_UNUSED (BASE_MEM_UNUSED_BIT_8 | BASE_MEM_UNUSED_BIT_19)
 
 /* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the
  * initial commit is aligned to 'extension' pages, where 'extension' must be a power
@@ -119,10 +122,6 @@
  */
 #define BASE_JD_ATOM_COUNT 256
 
-/* Maximum number of concurrent render passes.
- */
-#define BASE_JD_RP_COUNT (256)
-
 /* Set/reset values for a software event */
 #define BASE_JD_SOFT_EVENT_SET ((unsigned char)1)
 #define BASE_JD_SOFT_EVENT_RESET ((unsigned char)0)
@@ -362,40 +361,6 @@ typedef __u32 base_jd_core_req;
  */
 #define BASE_JD_REQ_JOB_SLOT ((base_jd_core_req)1 << 17)
 
-/* SW-only requirement: The atom is the start of a renderpass.
- *
- * If this bit is set then the job chain will be soft-stopped if it causes the
- * GPU to write beyond the end of the physical pages backing the tiler heap, and
- * committing more memory to the heap would exceed an internal threshold. It may
- * be resumed after running one of the job chains attached to an atom with
- * BASE_JD_REQ_END_RENDERPASS set and the same renderpass ID. It may be
- * resumed multiple times until it completes without memory usage exceeding the
- * threshold.
- *
- * Usually used with BASE_JD_REQ_T.
- */
-#define BASE_JD_REQ_START_RENDERPASS ((base_jd_core_req)1 << 18)
-
-/* SW-only requirement: The atom is the end of a renderpass.
- *
- * If this bit is set then the atom incorporates the CPU address of a
- * base_jd_fragment object instead of the GPU address of a job chain.
- *
- * Which job chain is run depends upon whether the atom with the same renderpass
- * ID and the BASE_JD_REQ_START_RENDERPASS bit set completed normally or
- * was soft-stopped when it exceeded an upper threshold for tiler heap memory
- * usage.
- *
- * It also depends upon whether one of the job chains attached to the atom has
- * already been run as part of the same renderpass (in which case it would have
- * written unresolved multisampled and otherwise-discarded output to temporary
- * buffers that need to be read back). The job chain for doing a forced read and
- * forced write (from/to temporary buffers) is run as many times as necessary.
- *
- * Usually used with BASE_JD_REQ_FS.
- */
-#define BASE_JD_REQ_END_RENDERPASS ((base_jd_core_req)1 << 19)
-
 /* SW-only requirement: The atom needs to run on a limited core mask affinity.
  *
  * If this bit is set then the kbase_context.limited_core_mask will be applied
@@ -411,7 +376,6 @@ typedef __u32 base_jd_core_req;
 	   BASE_JD_REQ_EVENT_COALESCE | BASE_JD_REQ_COHERENT_GROUP |                          \
 	   BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON |   \
 	   BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END | BASE_JD_REQ_JOB_SLOT | \
-	   BASE_JD_REQ_START_RENDERPASS | BASE_JD_REQ_END_RENDERPASS |                        \
 	   BASE_JD_REQ_LIMITED_CORE_MASK))
 
 /* Mask of all bits in base_jd_core_req that control the type of the atom.
@@ -470,62 +434,6 @@ struct base_dependency {
 	base_jd_dep_type dependency_type;
 };
 
-/**
- * struct base_jd_fragment - Set of GPU fragment job chains used for rendering.
- *
- * @norm_read_norm_write: Job chain for full rendering.
- *                        GPU address of a fragment job chain to render in the
- *                        circumstance where the tiler job chain did not exceed
- *                        its memory usage threshold and no fragment job chain
- *                        was previously run for the same renderpass.
- *                        It is used no more than once per renderpass.
- * @norm_read_forced_write: Job chain for starting incremental
- *                          rendering.
- *                          GPU address of a fragment job chain to render in
- *                          the circumstance where the tiler job chain exceeded
- *                          its memory usage threshold for the first time and
- *                          no fragment job chain was previously run for the
- *                          same renderpass.
- *                          Writes unresolved multisampled and normally-
- *                          discarded output to temporary buffers that must be
- *                          read back by a subsequent forced_read job chain
- *                          before the renderpass is complete.
- *                          It is used no more than once per renderpass.
- * @forced_read_forced_write: Job chain for continuing incremental
- *                            rendering.
- *                            GPU address of a fragment job chain to render in
- *                            the circumstance where the tiler job chain
- *                            exceeded its memory usage threshold again
- *                            and a fragment job chain was previously run for
- *                            the same renderpass.
- *                            Reads unresolved multisampled and
- *                            normally-discarded output from temporary buffers
- *                            written by a previous forced_write job chain and
- *                            writes the same to temporary buffers again.
- *                            It is used as many times as required until
- *                            rendering completes.
- * @forced_read_norm_write: Job chain for ending incremental rendering.
- *                          GPU address of a fragment job chain to render in the
- *                          circumstance where the tiler job chain did not
- *                          exceed its memory usage threshold this time and a
- *                          fragment job chain was previously run for the same
- *                          renderpass.
- *                          Reads unresolved multisampled and normally-discarded
- *                          output from temporary buffers written by a previous
- *                          forced_write job chain in order to complete a
- *                          renderpass.
- *                          It is used no more than once per renderpass.
- *
- * This structure is referenced by the main atom structure if
- * BASE_JD_REQ_END_RENDERPASS is set in the base_jd_core_req.
- */
-struct base_jd_fragment {
-	__u64 norm_read_norm_write;
-	__u64 norm_read_forced_write;
-	__u64 forced_read_forced_write;
-	__u64 forced_read_norm_write;
-};
-
 /**
  * typedef base_jd_prio - Base Atom priority.
  *
@@ -590,9 +498,7 @@ typedef __u8 base_jd_prio;
  * struct base_jd_atom_v2 - Node of a dependency graph used to submit a
  *                          GPU job chain or soft-job to the kernel driver.
  *
- * @jc:            GPU address of a job chain or (if BASE_JD_REQ_END_RENDERPASS
- *                 is set in the base_jd_core_req) the CPU address of a
- *                 base_jd_fragment object.
+ * @jc:            GPU address of a job chain.
  * @udata:         User data.
  * @extres_list:   List of external resources.
  * @nr_extres:     Number of external resources or JIT allocations.
@@ -611,9 +517,6 @@ typedef __u8 base_jd_prio;
  *                 specified.
  * @jobslot:       Job slot to use when BASE_JD_REQ_JOB_SLOT is specified.
  * @core_req:      Core requirements.
- * @renderpass_id: Renderpass identifier used to associate an atom that has
- *                 BASE_JD_REQ_START_RENDERPASS set in its core requirements
- *                 with an atom that has BASE_JD_REQ_END_RENDERPASS set.
  * @padding:       Unused. Must be zero.
  *
  * This structure has changed since UK 10.2 for which base_jd_core_req was a
@@ -641,8 +544,7 @@ struct base_jd_atom_v2 {
 	__u8 device_nr;
 	__u8 jobslot;
 	base_jd_core_req core_req;
-	__u8 renderpass_id;
-	__u8 padding[7];
+	__u8 padding[8];
 };
 
 /**
@@ -650,9 +552,7 @@ struct base_jd_atom_v2 {
  *                          at the beginning.
  *
  * @seq_nr:        Sequence number of logical grouping of atoms.
- * @jc:            GPU address of a job chain or (if BASE_JD_REQ_END_RENDERPASS
- *                 is set in the base_jd_core_req) the CPU address of a
- *                 base_jd_fragment object.
+ * @jc:            GPU address of a job chain.
  * @udata:         User data.
  * @extres_list:   List of external resources.
  * @nr_extres:     Number of external resources or JIT allocations.
@@ -834,11 +734,6 @@ enum {
  * @BASE_JD_EVENT_REMOVED_FROM_NEXT: raised when an atom that was configured in
  *                                   the GPU has to be retried (but it has not
  *                                   started) due to e.g., GPU reset
- * @BASE_JD_EVENT_END_RP_DONE: this is used for incremental rendering to signal
- *                             the completion of a renderpass. This value
- *                             shouldn't be returned to userspace but I haven't
- *                             seen where it is reset back to JD_EVENT_DONE.
- *
  * HW and low-level SW events are represented by event codes.
  * The status of jobs which succeeded are also represented by
  * an event code (see @BASE_JD_EVENT_DONE).
@@ -937,8 +832,6 @@ enum base_jd_event_code {
 	BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | 0x000,
 	BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL |
 					  BASE_JD_SW_EVENT_JOB | 0x000,
-	BASE_JD_EVENT_END_RP_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL |
-				    BASE_JD_SW_EVENT_JOB | 0x001,
 
 	BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL |
 					      BASE_JD_SW_EVENT_RESERVED | 0x3FF
diff --git a/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h b/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h
index 2a7a06a995be..34da87860ee2 100644
--- a/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h
+++ b/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -149,15 +149,31 @@
  *   from the parent process.
  * 11.40:
  * - Remove KBASE_IOCTL_HWCNT_READER_SETUP and KBASE_HWCNT_READER_* ioctls.
+ * - Made the BASE_MEM_DONT_NEED memory flag queryable.
  * 11.41:
  * - Disallows changing the sharability on the GPU of imported dma-bufs to
  *   BASE_MEM_COHERENT_SYSTEM using KBASE_IOCTL_MEM_FLAGS_CHANGE.
  * 11.42:
  * - Implement full block state support for hardware counters.
- */
+ * 11.43:
+ * - Made the BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP and BASE_MEM_KERNEL_SYNC memory
+ *   flags queryable.
+ * 11.44:
+ * - Made the SAME_VA memory flag queryable.
+ * 11.45:
+ * - Re-allow child process to do supported file operations (like mmap, ioctl
+ *   read, poll) on the file descriptor of mali device that was inherited
+ *   from the parent process.
+ * 11.46:
+ * - Remove renderpass_id from base_jd_atom_v2 to deprecate support for JM Incremental Rendering
+ * 11.47:
+ * - Reject non-protected allocations containing the BASE_MEM_PROTECTED memory flag.
+ * - Reject allocations containing the BASE_MEM_DONT_NEED memory flag (it is only settable).
+ * - Reject allocations containing the BASE_MEM_UNUSED_BIT_xx memory flags.
+  */
 
 #define BASE_UK_VERSION_MAJOR 11
-#define BASE_UK_VERSION_MINOR 42
+#define BASE_UK_VERSION_MINOR 47
 
 /**
  * struct kbase_ioctl_version_check - Check version compatibility between
diff --git a/include/uapi/gpu/arm/bifrost/mali_base_common_kernel.h b/include/uapi/gpu/arm/bifrost/mali_base_common_kernel.h
index c009d5ddd494..bbbee900415e 100644
--- a/include/uapi/gpu/arm/bifrost/mali_base_common_kernel.h
+++ b/include/uapi/gpu/arm/bifrost/mali_base_common_kernel.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2022-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -146,7 +146,6 @@ struct base_mem_handle {
  */
 #define BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP ((base_mem_alloc_flags)1 << 26)
 
-/* OUT */
 /* Kernel side cache sync ops required */
 #define BASE_MEM_KERNEL_SYNC ((base_mem_alloc_flags)1 << 28)
 
@@ -156,12 +155,10 @@ struct base_mem_handle {
  */
 #define BASE_MEM_FLAGS_NR_BITS 30
 
-/* A mask for all output bits, excluding IN/OUT bits.
- */
+/* A mask for all bits that are output from kbase, but never input. */
 #define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP
 
-/* A mask for all input bits, including IN/OUT bits.
- */
+/* A mask for all bits that can be input to kbase. */
 #define BASE_MEM_FLAGS_INPUT_MASK \
 	(((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK)
 
diff --git a/include/uapi/gpu/arm/bifrost/mali_base_kernel.h b/include/uapi/gpu/arm/bifrost/mali_base_kernel.h
index cb1a1e8dd550..9e7294970efb 100644
--- a/include/uapi/gpu/arm/bifrost/mali_base_kernel.h
+++ b/include/uapi/gpu/arm/bifrost/mali_base_kernel.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -74,7 +74,7 @@
  * More flags can be added to this list, as long as they don't clash
  * (see BASE_MEM_FLAGS_NR_BITS for the number of the first free bit).
  */
-typedef __u32 base_mem_alloc_flags;
+typedef __u64 base_mem_alloc_flags;
 
 #define BASE_MEM_FLAGS_MODIFIABLE_NATIVE (BASE_MEM_DONT_NEED)
 
@@ -89,10 +89,10 @@ typedef __u32 base_mem_alloc_flags;
 /* A mask of all the flags that can be returned via the base_mem_get_flags()
  * interface.
  */
-#define BASE_MEM_FLAGS_QUERYABLE                                                           \
-	(BASE_MEM_FLAGS_INPUT_MASK &                                                       \
-	 ~(BASE_MEM_SAME_VA | BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_IMPORT_SHARED | \
-	   BASE_MEM_FLAGS_RESERVED | BASEP_MEM_FLAGS_KERNEL_ONLY))
+#define BASE_MEM_FLAGS_QUERYABLE                                                               \
+	(BASE_MEM_FLAGS_INPUT_MASK &                                                           \
+	 ~(BASE_MEM_FLAGS_RESERVED | BASE_MEM_FLAGS_UNUSED | BASE_MEM_FLAGS_ACTION_MODIFIERS | \
+	   BASEP_MEM_FLAGS_KERNEL_ONLY))
 
 /**
  * enum base_mem_import_type - Memory types supported by @a base_mem_import
@@ -619,15 +619,15 @@ struct base_gpu_props {
 #define BASE_TIMEINFO_TIMESTAMP_FLAG (1U << 1)
 /* For GPU cycle counter */
 #define BASE_TIMEINFO_CYCLE_COUNTER_FLAG (1U << 2)
-/* Specify kernel GPU register timestamp */
-#define BASE_TIMEINFO_KERNEL_SOURCE_FLAG (1U << 30)
-/* Specify userspace cntvct_el0 timestamp source */
-#define BASE_TIMEINFO_USER_SOURCE_FLAG (1U << 31)
 
-#define BASE_TIMEREQUEST_ALLOWED_FLAGS                                         \
-	(BASE_TIMEINFO_MONOTONIC_FLAG | BASE_TIMEINFO_TIMESTAMP_FLAG |         \
-	 BASE_TIMEINFO_CYCLE_COUNTER_FLAG | BASE_TIMEINFO_KERNEL_SOURCE_FLAG | \
-	 BASE_TIMEINFO_USER_SOURCE_FLAG)
+/* Specify TimeReques flags allowed if time source is cpu/gpu register */
+#define BASE_TIMEREQUEST_CPU_GPU_SRC_ALLOWED_FLAGS                     \
+	(BASE_TIMEINFO_MONOTONIC_FLAG | BASE_TIMEINFO_TIMESTAMP_FLAG | \
+	 BASE_TIMEINFO_CYCLE_COUNTER_FLAG)
+
+/* Specify TimeReques flags allowed if time source is system(user) space */
+#define BASE_TIMEREQUEST_SYSTEM_SRC_ALLOWED_FLAGS \
+	(BASE_TIMEINFO_MONOTONIC_FLAG | BASE_TIMEINFO_TIMESTAMP_FLAG)
 
 /* Maximum number of source allocations allowed to create an alias allocation.
  * This needs to be 4096 * 6 to allow cube map arrays with up to 4096 array
diff --git a/include/uapi/gpu/arm/bifrost/mali_kbase_ioctl.h b/include/uapi/gpu/arm/bifrost/mali_kbase_ioctl.h
index d60745f564b0..163637c62297 100644
--- a/include/uapi/gpu/arm/bifrost/mali_kbase_ioctl.h
+++ b/include/uapi/gpu/arm/bifrost/mali_kbase_ioctl.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -169,7 +169,7 @@ struct kbase_ioctl_hwcnt_reader_setup {
  * struct kbase_ioctl_hwcnt_values - Values to set dummy the dummy counters to.
  * @data:    Counter samples for the dummy model.
  * @size:    Size of the counter sample data.
- * @padding: Padding.
+ * @padding: Currently unused, must be zero
  */
 struct kbase_ioctl_hwcnt_values {
 	__u64 data;
@@ -193,7 +193,7 @@ struct kbase_ioctl_disjoint_query {
  * struct kbase_ioctl_get_ddk_version - Query the kernel version
  * @version_buffer: Buffer to receive the kernel version string
  * @size: Size of the buffer
- * @padding: Padding
+ * @padding: Currently unused, must be zero
  *
  * The ioctl will return the number of bytes written into version_buffer
  * (which includes a NULL byte) or a negative error code
diff --git a/include/uapi/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs_buf_size.h b/include/uapi/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs_buf_size.h
index 11c51d9c2993..648c166b1e3d 100644
--- a/include/uapi/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs_buf_size.h
+++ b/include/uapi/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs_buf_size.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -30,6 +30,6 @@
  * KBASE_MEM_PROFILE_MAX_BUF_SIZE - The size of the buffer to accumulate the histogram report text
  *                                  in @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT
  */
-#define KBASE_MEM_PROFILE_MAX_BUF_SIZE ((size_t)(64 + ((80 + (56 * 64)) * 57) + 56))
+#define KBASE_MEM_PROFILE_MAX_BUF_SIZE ((size_t)(64 + ((80 + (56 * 64)) * 69) + 56))
 
 #endif /*_UAPI_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/