diff --git a/Documentation/ABI/testing/sysfs-device-mali b/Documentation/ABI/testing/sysfs-device-mali
new file mode 100644
index 000000000000..1cd1819d42bc
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-device-mali
@@ -0,0 +1,284 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation) and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program) and can also be obtained
+ * from Free Software Foundation) Inc.) 51 Franklin Street) Fifth Floor)
+ * Boston) MA  02110-1301) USA.
+ *
+ */
+
+What:		/sys/class/misc/mali%u/device/core_mask
+Description:
+		This attribute is used to restrict number of shader core
+		available, is useful for debugging purposes. Reading
+		this attribute provides us mask of cores available.
+		Writing to it will set the current core mask.
+
+What:		/sys/class/misc/mali%u/device/debug_command
+Description:
+		This attribute is used to issue debug commands that supported
+		by the driver. On reading it provides the list of debug commands
+		that are supported, and writing back one of those commands will
+		enable that debug option.
+
+What:		/sys/class/misc/mali%u/device/dvfs_period
+Description:
+		This is used to set the DVFS sampling period to be used by the
+		driver, On reading it provides the current DVFS sampling period,
+		on writing a value we set the DVFS sampling period.
+
+What:		/sys/class/misc/mali%u/device/dummy_job_wa_info
+Description:
+		This attribute is available only with platform device that
+                supports a Job Manager based GPU that requires a GPU workaround
+		to execute the dummy fragment job on all shader cores to
+		workaround a hang issue.
+
+		Its a readonly attribute and on reading gives details on the
+		options used with the dummy workaround.
+
+What:		/sys/class/misc/mali%u/device/gpuinfo
+Description:
+		This attribute provides description of the present Mali GPU.
+		Its a read only attribute provides details like GPU family, the
+		number of cores, the hardware version and the raw product id.
+
+What:		/sys/class/misc/mali%u/device/idle_hysteresis_time
+Description:
+		This attribute is available only with mali platform
+		device-driver that supports a CSF GPU. This attribute is
+		used to set the duration value in milliseconds for the
+		configuring hysteresis field for determining GPU idle detection.
+
+What:		/sys/class/misc/mali%u/device/js_ctx_scheduling_mode
+Description:
+		This attribute is available only with platform device that
+		supports a Job Manager based GPU. This attribute is used to set
+		context scheduling priority for a job slot.
+
+		On Reading it provides the currently set job slot context
+		priority.
+
+		Writing 0 to this attribute sets it to the mode were
+		higher priority atoms will be scheduled first, regardless of
+		the context they belong to. Newly-runnable higher priority atoms
+		can preempt lower priority atoms currently running on the GPU,
+		even if they belong to a different context.
+
+		Writing 1 to this attribute set it to the mode were the
+		highest-priority atom will be chosen from each context in turn
+		using a round-robin algorithm, so priority only has an effect
+		within the context an atom belongs to. Newly-runnable higher
+		priority atoms can preempt the lower priority atoms currently
+		running on the GPU, but only if they belong to the same context.
+
+What:		/sys/class/misc/mali%u/device/js_scheduling_period
+Description:
+		This attribute is available only with platform device that
+                supports a Job Manager based GPU. Used to set the job scheduler
+		tick period in nano-seconds. The Job Scheduler determines the
+		jobs that are run on the GPU, and for how long, Job Scheduler
+		makes decisions at a regular time interval determined by value
+		in js_scheduling_period.
+
+What:		/sys/class/misc/mali%u/device/js_softstop_always
+Description:
+		This attribute is available only with platform device that
+                supports a Job Manager based GPU. Soft-stops are disabled when
+		only a single context is present, this attribute is used to
+		enable soft-stop when only a single context is present can be
+		used for debug and unit-testing purposes.
+
+What:		/sys/class/misc/mali%u/device/js_timeouts
+Description:
+		This attribute is available only with platform device that
+                supports a Job Manager based GPU. It used to set the soft stop
+		and hard stop times for the job scheduler.
+
+		Writing value 0 causes no change, or -1 to restore the
+		default timeout.
+
+		The format used to set js_timeouts is
+		"<soft_stop_ms> <soft_stop_ms_cl> <hard_stop_ms_ss>
+		<hard_stop_ms_cl> <hard_stop_ms_dumping> <reset_ms_ss>
+		<reset_ms_cl> <reset_ms_dumping>"
+
+
+What:		/sys/class/misc/mali%u/device/lp_mem_pool_max_size
+Description:
+		This attribute is used to set the maximum number of large pages
+		memory pools that the driver can contain. Large pages are of
+		size 2MB. On read it displays all the max size of all memory
+		pools and can be used to modify each individual pools as well.
+
+What:		/sys/class/misc/mali%u/device/lp_mem_pool_size
+Description:
+		This attribute is used to set the number of large memory pages
+		which should be	populated, changing this value may cause
+		existing pages to be removed from the pool, or new pages to be
+		created and then added to the pool. On read it will provide
+		pool size for all available pools and we can modify individual
+		pool.
+
+What:		/sys/class/misc/mali%u/device/mem_pool_max_size
+Description:
+		This attribute is used to set the maximum number of small pages
+		for memory pools that the driver can contain. Here small pages
+		are of size 4KB. On read it will display the max size for all
+		available pools and allows us to set max size of
+		individual pools.
+
+What:		/sys/class/misc/mali%u/device/mem_pool_size
+Description:
+		This attribute is used to set the number of small memory pages
+		which should be populated, changing this value may cause
+		existing pages to be removed from the pool, or new pages to
+		be created and then added to the pool. On read it will provide
+		pool size for all available pools and we can modify individual
+		pool.
+
+What:		/sys/class/misc/mali%u/device/device/mempool/ctx_default_max_size
+Description:
+		This attribute is used to set maximum memory pool size for
+		all the memory pool so that the maximum amount of free memory
+		that each pool can hold is identical.
+
+What:		/sys/class/misc/mali%u/device/device/mempool/lp_max_size
+Description:
+		This attribute is used to set the maximum number of large pages
+		for all memory pools that the driver can contain.
+		Large pages are of size 2MB.
+
+What:		/sys/class/misc/mali%u/device/device/mempool/max_size
+Description:
+		This attribute is used to set the maximum number of small pages
+		for all the memory pools that the driver can contain.
+		Here small pages are of size 4KB.
+
+What:		/sys/class/misc/mali%u/device/pm_poweroff
+Description:
+		This attribute contains the current values, represented as the
+		following space-separated integers:
+		• PM_GPU_POWEROFF_TICK_NS.
+		• PM_POWEROFF_TICK_SHADER.
+		• PM_POWEROFF_TICK_GPU.
+
+		Example:
+		echo 100000 4 4 > /sys/class/misc/mali0/device/pm_poweroff
+
+		Sets the following new values: 100,000ns tick, four ticks
+		for shader power down, and four ticks for GPU power down.
+
+What:		/sys/class/misc/mali%u/device/power_policy
+Description:
+		This attribute is used to find the current power policy been
+		used, reading will list the power policies available and
+		enclosed in square bracket is the current one been selected.
+
+		Example:
+		cat /sys/class/misc/mali0/device/power_policy
+		[demand] coarse_demand always_on
+
+		To switch to a different policy at runtime write the valid entry
+		name back to the attribute.
+
+		Example:
+		echo "coarse_demand" > /sys/class/misc/mali0/device/power_policy
+
+What:		/sys/class/misc/mali%u/device/progress_timeout
+Description:
+		This attribute is available only with mali platform
+		device-driver that supports a CSF GPU. This attribute
+		is used to set the progress timeout value and read the current
+		progress timeout value.
+
+		Progress timeout value is the maximum number of GPU cycles
+		without forward progress to allow to elapse before terminating a
+		GPU command queue group.
+
+What:		/sys/class/misc/mali%u/device/reset_timeout
+Description:
+		This attribute is used to set the number of milliseconds to
+		wait for the soft stop to complete for the GPU jobs before
+		proceeding with the GPU reset.
+
+What:		/sys/class/misc/mali%u/device/soft_job_timeout
+Description:
+		This attribute is available only with platform device that
+                supports a Job Manager based GPU. It used to set the timeout
+		value for waiting for any soft event to complete.
+
+What:		/sys/class/misc/mali%u/device/scheduling/serialize_jobs
+Description:
+		This attribute is available only with platform device that
+                supports a Job Manager based GPU.
+
+		Various options available under this are:
+		• none - for disabling serialization.
+		• intra-slot - Serialize atoms within a slot, only one
+				atom per job slot.
+		• inter-slot - Serialize atoms between slots, only one
+				job slot running at any time.
+		• full - it a combination of both inter and intra slot,
+				so only one atom and one job slot running
+				at any time.
+		• full-reset - full serialization and Reset the GPU after
+				each atom completion
+
+		These options are useful for debugging and investigating
+		failures and gpu hangs to narrow down atoms that could cause
+		troubles.
+
+What:		/sys/class/misc/mali%u/device/firmware_config/Compute iterator count/*
+Description:
+		This attribute is available only with mali platform
+		device-driver that supports a CSF GPU. Its a read-only attribute
+		which indicates the maximum number of Compute iterators
+		supported by the GPU.
+
+What:		/sys/class/misc/mali%u/device/firmware_config/CSHWIF count/*
+Description:
+		This attribute is available only with mali platform
+		device-driver that supports a CSF GPU. Its a read-only
+		attribute which indicates the maximum number of	CSHWIFs
+		supported by the GPU.
+
+What:		/sys/class/misc/mali%u/device/firmware_config/Fragment iterator count/*
+Description:
+		This attribute is available only with mali platform
+		device-driver that supports a CSF GPU. Its a read-only
+		attribute which indicates the maximum number of
+		Fragment iterators supported by the GPU.
+
+What:		/sys/class/misc/mali%u/device/firmware_config/Scoreboard set count/*
+Description:
+		This attribute is available only with mali platform
+		device-driver that supports a CSF GPU. Its a read-only
+		attribute which indicates the maximum number of
+		Scoreboard set supported by the GPU.
+
+What:		/sys/class/misc/mali%u/device/firmware_config/Tiler iterator count/*
+Description:
+		This attribute is available only with mali platform
+		device-driver that supports a CSF GPU. Its a read-only
+		attribute which indicates the maximum number of	Tiler iterators
+		supported by the GPU.
+
+What:		/sys/class/misc/mali%u/device/firmware_config/Log verbosity/*
+Description:
+		This attribute is available only with mali platform
+                device-driver that supports a CSF GPU.
+
+		Used to enable firmware logs, logging levels valid values
+		are indicated using 'min and 'max' attribute values
+		values that are read-only.
+
+		Log level can be set using the 'cur' read, write attribute,
+		we can use a valid log level value from min and max range values
+		and set a valid desired log level for firmware logs.
diff --git a/Documentation/devicetree/bindings/arm/mali-bifrost.txt b/Documentation/devicetree/bindings/arm/mali-bifrost.txt
index 8aaca67d3c9a..93493e833c9b 100644
--- a/Documentation/devicetree/bindings/arm/mali-bifrost.txt
+++ b/Documentation/devicetree/bindings/arm/mali-bifrost.txt
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 #
-# (C) COPYRIGHT 2013-2020 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2013-2021 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,8 +16,6 @@
 # along with this program; if not, you can access it online at
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
-# SPDX-License-Identifier: GPL-2.0
-#
 #
 
 * ARM Mali Midgard / Bifrost devices
@@ -46,12 +45,12 @@ Documentation/devicetree/bindings/regulator/regulator.txt for details.
                        This is optional.
 - operating-points-v2 : Refer to Documentation/devicetree/bindings/power/mali-opp.txt
 for details.
-- quirks_jm : Used to write to the JM_CONFIG register or equivalent.
+- quirks_gpu : Used to write to the JM_CONFIG or CSF_CONFIG register.
 	  Should be used with care. Options passed here are used to override
 	  certain default behavior. Note: This will override 'idvs-group-size'
 	  field in devicetree and module param 'corestack_driver_control',
-	  therefore if 'quirks_jm' is used then 'idvs-group-size' and
-	  'corestack_driver_control' value should be incorporated into 'quirks_jm'.
+	  therefore if 'quirks_gpu' is used then 'idvs-group-size' and
+	  'corestack_driver_control' value should be incorporated into 'quirks_gpu'.
 - quirks_sc : Used to write to the SHADER_CONFIG register.
 	  Should be used with care. Options passed here are used to override
 	  certain default behavior.
diff --git a/Documentation/devicetree/bindings/arm/memory_group_manager.txt b/Documentation/devicetree/bindings/arm/memory_group_manager.txt
index fda8f001dafb..77a94743a97b 100644
--- a/Documentation/devicetree/bindings/arm/memory_group_manager.txt
+++ b/Documentation/devicetree/bindings/arm/memory_group_manager.txt
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 #
-# (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,8 +16,6 @@
 # along with this program; if not, you can access it online at
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
-# SPDX-License-Identifier: GPL-2.0
-#
 #
 
 * Arm memory group manager for Mali GPU device drivers
diff --git a/Documentation/devicetree/bindings/arm/priority_control_manager.txt b/Documentation/devicetree/bindings/arm/priority_control_manager.txt
new file mode 100644
index 000000000000..0a34896dd8ee
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/priority_control_manager.txt
@@ -0,0 +1,48 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU license.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+#
+
+* Arm priority control manager for Mali GPU device drivers
+
+Required properties:
+
+- compatible: Must be "arm,priority-control-manager"
+
+An example node:
+
+        gpu_priority_control_manager: priority-control-manager {
+                compatible = "arm,priority-control-manager";
+        };
+
+It must be referenced by the GPU as well, see priority-control-manager:
+
+	gpu: gpu@0x6e000000 {
+		compatible = "arm,mali-midgard";
+		reg = <0x0 0x6e000000 0x0 0x200000>;
+		interrupts = <0 168 4>, <0 168 4>, <0 168 4>;
+		interrupt-names = "JOB", "MMU", "GPU";
+		clocks = <&scpi_dvfs 2>;
+		clock-names = "clk_mali";
+		system-coherency = <31>;
+		priority-control-manager = <&gpu_priority_control_manager>;
+		operating-points = <
+			/* KHz uV */
+			50000 820000
+		>;
+	};
diff --git a/Documentation/devicetree/bindings/arm/protected_memory_allocator.txt b/Documentation/devicetree/bindings/arm/protected_memory_allocator.txt
new file mode 100644
index 000000000000..8ca5f08c6b44
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/protected_memory_allocator.txt
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU license.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+#
+
+* Arm protected memory allocator for Mali GPU device drivers
+
+Required properties:
+
+- compatible: Must be "arm,protected-memory-allocator"
+
+The protected memory allocator manages allocation of physical pages of a
+reserved memory region of protected memory, therefore its device node shall
+reference a reserved memory region.
+
+In addition to that, the protected memory allocator shall be referenced
+by the GPU.
+
+A complete example configuration for the device tree:
+
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		mali_protected: mali_protected@c0000000 {
+			compatible = "mali-reserved";
+			reg = <0x0 0xc0000000 0x0 0x1000000>;
+		};
+	};
+
+	gpu_protected_memory_allocator: protected-memory-allocator {
+		compatible = "arm,protected-memory-allocator";
+		memory-region = <&mali_protected>;
+	};
+
+	gpu_fpga: gpu@0x6e000000 {
+		compatible = "arm,mali-midgard";
+		reg = <0x0 0x6e000000 0x0 0x200000>;
+		interrupts = <0 168 4>, <0 168 4>, <0 168 4>;
+		interrupt-names = "JOB", "MMU", "GPU";
+		clocks = <&scpi_dvfs 2>;
+		clock-names = "clk_mali";
+		protected-memory-allocator = <&gpu_protected_memory_allocator>;
+		operating-points = <
+			/* KHz uV */
+			50000 820000
+		>;
+	};
+
+The protected memory allocator is gpu_protected_memory_allocator.
+It references the mali_protected reserved memory region and, in turn,
+it is referenced by the GPU as protected-memory-allocator.
diff --git a/Documentation/devicetree/bindings/power/mali-opp.txt b/Documentation/devicetree/bindings/power/mali-opp.txt
index 88999ff0a172..411c99229e95 100644
--- a/Documentation/devicetree/bindings/power/mali-opp.txt
+++ b/Documentation/devicetree/bindings/power/mali-opp.txt
@@ -1,14 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0
 #
-# (C) COPYRIGHT 2017, 2019 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2017, 2019-2021 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
-# A copy of the licence is included with the program, and can also be obtained
-# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-# Boston, MA  02110-1301, USA.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
 #
 #
 
@@ -48,7 +54,7 @@ Optional properties:
 
 - opp-core-count: Number of cores to use for this OPP. If this is present then
   the driver will build a core mask using the available core mask provided by
-  the GPU hardware.
+  the GPU hardware. An opp-core-count value of 0 is not permitted.
 
   If neither this nor opp-core-mask are present then all shader cores will be
   used for this OPP.
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 8d7001712062..edcaf6d35f23 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -183,6 +183,9 @@ config SOC_BUS
 
 source "drivers/base/regmap/Kconfig"
 
+source "drivers/base/arm/memory_group_manager/Kconfig"
+source "drivers/base/arm/protected_memory_allocator/Kconfig"
+
 config DMA_SHARED_BUFFER
 	bool
 	default n
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 41369fc7004f..d17dcf488d39 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -18,6 +18,10 @@ obj-$(CONFIG_MODULES)	+= module.o
 endif
 obj-$(CONFIG_SYS_HYPERVISOR) += hypervisor.o
 obj-$(CONFIG_REGMAP)	+= regmap/
+
+obj-$(CONFIG_MALI_MEMORY_GROUP_MANAGER)	+= arm/memory_group_manager/
+obj-$(CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR)	+= arm/protected_memory_allocator/
+
 obj-$(CONFIG_SOC_BUS) += soc.o
 obj-$(CONFIG_PINCTRL) += pinctrl.o
 obj-$(CONFIG_DEV_COREDUMP) += devcoredump.o
diff --git a/drivers/base/memory_group_manager/Kbuild b/drivers/base/arm/memory_group_manager/Kbuild
similarity index 89%
rename from drivers/base/memory_group_manager/Kbuild
rename to drivers/base/arm/memory_group_manager/Kbuild
index a049bed07431..1b0f8b07b399 100644
--- a/drivers/base/memory_group_manager/Kbuild
+++ b/drivers/base/arm/memory_group_manager/Kbuild
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 #
-# (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,8 +16,6 @@
 # along with this program; if not, you can access it online at
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
-# SPDX-License-Identifier: GPL-2.0
-#
 #
 
-obj-$(CONFIG_MALI_MEMORY_GROUP_MANAGER) := memory_group_manager.o
\ No newline at end of file
+obj-$(CONFIG_MALI_MEMORY_GROUP_MANAGER) := memory_group_manager.o
diff --git a/drivers/base/memory_group_manager/Kconfig b/drivers/base/arm/memory_group_manager/Kconfig
similarity index 91%
rename from drivers/base/memory_group_manager/Kconfig
rename to drivers/base/arm/memory_group_manager/Kconfig
index da464ec68abe..de698fa1eb4c 100644
--- a/drivers/base/memory_group_manager/Kconfig
+++ b/drivers/base/arm/memory_group_manager/Kconfig
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 #
-# (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,8 +16,6 @@
 # along with this program; if not, you can access it online at
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
-# SPDX-License-Identifier: GPL-2.0
-#
 #
 
 
diff --git a/drivers/base/memory_group_manager/Makefile b/drivers/base/arm/memory_group_manager/Makefile
similarity index 72%
rename from drivers/base/memory_group_manager/Makefile
rename to drivers/base/arm/memory_group_manager/Makefile
index a5bceae12384..4f1ef727cb23 100644
--- a/drivers/base/memory_group_manager/Makefile
+++ b/drivers/base/arm/memory_group_manager/Makefile
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 #
-# (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,8 +16,6 @@
 # along with this program; if not, you can access it online at
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
-# SPDX-License-Identifier: GPL-2.0
-#
 #
 
 # linux build system bootstrap for out-of-tree module
@@ -24,12 +23,15 @@
 # default to building for the host
 ARCH ?= $(shell uname -m)
 
-ifeq ($(KDIR),)
-$(error Must specify KDIR to point to the kernel to target))
-endif
+# Handle Android Common Kernel source naming
+KERNEL_SRC ?= /lib/modules/$(shell uname -r)/build
+KDIR ?= $(KERNEL_SRC)
 
 all:
-	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" modules CONFIG_MALI_MEMORY_GROUP_MANAGER=m
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include" modules CONFIG_MALI_MEMORY_GROUP_MANAGER=m
 
 clean:
 	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
+modules_install:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) modules_install
diff --git a/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu.h b/drivers/base/arm/memory_group_manager/build.bp
similarity index 66%
rename from drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu.h
rename to drivers/base/arm/memory_group_manager/build.bp
index 9516e56eda01..4eb447cc1257 100644
--- a/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu.h
+++ b/drivers/base/arm/memory_group_manager/build.bp
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,16 +17,14 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-#ifndef _KBASE_GPU_H_
-#define _KBASE_GPU_H_
-
-#include "mali_kbase_gpu_regmap.h"
-#include "mali_kbase_gpu_fault.h"
-#include "mali_kbase_gpu_coherency.h"
-#include "mali_kbase_gpu_id.h"
-
-#endif /* _KBASE_GPU_H_ */
+bob_kernel_module {
+    name: "memory_group_manager",
+    srcs: [
+        "Kbuild",
+        "memory_group_manager.c",
+    ],
+    kbuild_options: ["CONFIG_MALI_MEMORY_GROUP_MANAGER=m"],
+    defaults: ["kernel_defaults"],
+}
diff --git a/drivers/base/memory_group_manager/memory_group_manager.c b/drivers/base/arm/memory_group_manager/memory_group_manager.c
similarity index 98%
rename from drivers/base/memory_group_manager/memory_group_manager.c
rename to drivers/base/arm/memory_group_manager/memory_group_manager.c
index cbe7f0a775a1..2ce3b940ed72 100644
--- a/drivers/base/memory_group_manager/memory_group_manager.c
+++ b/drivers/base/arm/memory_group_manager/memory_group_manager.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <linux/fs.h>
@@ -481,7 +480,6 @@ static struct platform_driver memory_group_manager_driver = {
 	.remove = memory_group_manager_remove,
 	.driver = {
 		.name = "physical-memory-group-manager",
-		.owner = THIS_MODULE,
 		.of_match_table = of_match_ptr(memory_group_manager_dt_ids),
 		/*
 		 * Prevent the mgm_dev from being unbound and freed, as other's
diff --git a/drivers/base/arm/protected_memory_allocator/Kbuild b/drivers/base/arm/protected_memory_allocator/Kbuild
new file mode 100644
index 000000000000..205c5f1ee95a
--- /dev/null
+++ b/drivers/base/arm/protected_memory_allocator/Kbuild
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU license.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+#
+
+obj-$(CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR) := protected_memory_allocator.o
diff --git a/drivers/base/arm/protected_memory_allocator/Kconfig b/drivers/base/arm/protected_memory_allocator/Kconfig
new file mode 100644
index 000000000000..5ee5dcce625d
--- /dev/null
+++ b/drivers/base/arm/protected_memory_allocator/Kconfig
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU license.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+#
+
+
+config MALI_PROTECTED_MEMORY_ALLOCATOR
+	tristate "MALI_PROTECTED_MEMORY_ALLOCATOR"
+	help
+	  This option enables an example implementation of a protected memory allocator
+	  for allocation and release of pages of secure memory intended to be used
+	  by the firmware of Mali GPU device drivers.
diff --git a/drivers/base/arm/protected_memory_allocator/Makefile b/drivers/base/arm/protected_memory_allocator/Makefile
new file mode 100644
index 000000000000..93f49b5efdf2
--- /dev/null
+++ b/drivers/base/arm/protected_memory_allocator/Makefile
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU license.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+#
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+# Handle Android Common Kernel source naming
+KERNEL_SRC ?= /lib/modules/$(shell uname -r)/build
+KDIR ?= $(KERNEL_SRC)
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include" modules CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR=m
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
+modules_install:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) modules_install
diff --git a/drivers/base/arm/protected_memory_allocator/build.bp b/drivers/base/arm/protected_memory_allocator/build.bp
new file mode 100644
index 000000000000..ddb0510cacd6
--- /dev/null
+++ b/drivers/base/arm/protected_memory_allocator/build.bp
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+bob_kernel_module {
+    name: "protected_memory_allocator",
+    srcs: [
+        "Kbuild",
+        "protected_memory_allocator.c",
+    ],
+    kbuild_options: ["CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR=m"],
+    defaults: ["kernel_defaults"],
+    enabled: false,
+    build_csf_only_module: {
+        enabled: true,
+    },
+}
diff --git a/drivers/base/arm/protected_memory_allocator/protected_memory_allocator.c b/drivers/base/arm/protected_memory_allocator/protected_memory_allocator.c
new file mode 100644
index 000000000000..a34b3a44fb79
--- /dev/null
+++ b/drivers/base/arm/protected_memory_allocator/protected_memory_allocator.c
@@ -0,0 +1,551 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include <linux/version.h>
+#include <linux/of.h>
+#include <linux/of_reserved_mem.h>
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/protected_memory_allocator.h>
+
+/* Size of a bitfield element in bytes */
+#define BITFIELD_ELEM_SIZE sizeof(u64)
+
+/* We can track whether or not 64 pages are currently allocated in a u64 */
+#define PAGES_PER_BITFIELD_ELEM (BITFIELD_ELEM_SIZE * BITS_PER_BYTE)
+
+/* Order 6 (ie, 64) corresponds to the number of pages held in a bitfield */
+#define ORDER_OF_PAGES_PER_BITFIELD_ELEM 6
+
+/**
+ * struct simple_pma_device -	Simple implementation of a protected memory
+ *				allocator device
+ *
+ * @pma_dev:			Protected memory allocator device pointer
+ * @dev:  			Device pointer
+ * @alloc_pages_bitfield_arr:	Status of all the physical memory pages within the
+ *				protected memory region, one bit per page
+ * @rmem_base:			Base address of the reserved memory region
+ * @rmem_size:			Size of the reserved memory region, in pages
+ * @num_free_pages:		Number of free pages in the memory region
+ * @rmem_lock:			Lock to serialize the allocation and freeing of
+ *				physical pages from the protected memory region
+ */
+struct simple_pma_device {
+	struct protected_memory_allocator_device pma_dev;
+	struct device *dev;
+	u64 *allocated_pages_bitfield_arr;
+	phys_addr_t rmem_base;
+	size_t rmem_size;
+	size_t num_free_pages;
+	spinlock_t rmem_lock;
+};
+
+/**
+ * Number of elements in array 'allocated_pages_bitfield_arr'. If the number of
+ * pages required does not divide exactly by PAGES_PER_BITFIELD_ELEM, adds an
+ * extra page for the remainder.
+ */
+#define ALLOC_PAGES_BITFIELD_ARR_SIZE(num_pages) \
+	((PAGES_PER_BITFIELD_ELEM * (0 != (num_pages % PAGES_PER_BITFIELD_ELEM)) + \
+	num_pages) / PAGES_PER_BITFIELD_ELEM)
+
+/**
+ * Allocate a power-of-two number of pages, N, where
+ * 0 <= N <= ORDER_OF_PAGES_PER_BITFIELD_ELEM - 1.  ie, Up to 32 pages. The routine
+ * fills-in a pma structure and sets the appropriate bits in the allocated-pages
+ * bitfield array but assumes the caller has already determined that these are
+ * already clear.
+ *
+ * This routine always works within only a single allocated-pages bitfield element.
+ * It can be thought of as the 'small-granularity' allocator.
+ */
+static void small_granularity_alloc(struct simple_pma_device *const epma_dev,
+				    size_t alloc_bitfield_idx, size_t start_bit,
+				    size_t order,
+				    struct protected_memory_allocation *pma)
+{
+	size_t i;
+	size_t page_idx;
+	u64 *bitfield;
+	size_t alloc_pages_bitfield_size;
+
+	if (WARN_ON(!epma_dev) ||
+	    WARN_ON(!pma))
+		return;
+
+	WARN(epma_dev->rmem_size == 0, "%s: rmem_size is 0", __func__);
+	alloc_pages_bitfield_size = ALLOC_PAGES_BITFIELD_ARR_SIZE(epma_dev->rmem_size);
+
+	WARN(alloc_bitfield_idx >= alloc_pages_bitfield_size,
+	     "%s: idx>bf_size: %zu %zu", __FUNCTION__,
+	     alloc_bitfield_idx, alloc_pages_bitfield_size);
+
+	WARN((start_bit + (1 << order)) > PAGES_PER_BITFIELD_ELEM,
+	     "%s: start=%zu order=%zu ppbe=%zu",
+	     __FUNCTION__, start_bit, order, PAGES_PER_BITFIELD_ELEM);
+
+	bitfield = &epma_dev->allocated_pages_bitfield_arr[alloc_bitfield_idx];
+
+	for (i = 0; i < (1 << order); i++) {
+		/* Check the pages represented by this bit are actually free */
+		WARN (*bitfield & (1ULL << (start_bit + i)),
+		      "in %s: page not free: %zu %zu %.16llx %zu\n",
+		      __FUNCTION__, i, order, *bitfield, alloc_pages_bitfield_size);
+
+		/* Mark the pages as now allocated */
+		*bitfield |= (1ULL << (start_bit + i));
+	}
+
+	/* Compute the page index */
+	page_idx = (alloc_bitfield_idx * PAGES_PER_BITFIELD_ELEM) + start_bit;
+
+	/* Fill-in the allocation struct for the caller */
+	pma->pa = epma_dev->rmem_base + (page_idx << PAGE_SHIFT);
+	pma->order = order;
+}
+
+/**
+ * Allocate a power-of-two number of pages, N, where
+ * N >= ORDER_OF_PAGES_PER_BITFIELD_ELEM. ie, 64 pages or more. The routine fills-in
+ * a pma structure and sets the appropriate bits in the allocated-pages bitfield array
+ * but assumes the caller has already determined that these are already clear.
+ *
+ * Unlike small_granularity_alloc, this routine can work with multiple 64-page groups,
+ * ie multiple elements from the allocated-pages bitfield array. However, it always
+ * works with complete sets of these 64-page groups. It can therefore be thought of
+ * as the 'large-granularity' allocator.
+ */
+static void large_granularity_alloc(struct simple_pma_device *const epma_dev,
+				    size_t start_alloc_bitfield_idx,
+				    size_t order,
+				    struct protected_memory_allocation *pma)
+{
+	size_t i;
+	size_t num_pages_to_alloc = (size_t)1 << order;
+	size_t num_bitfield_elements_needed = num_pages_to_alloc / PAGES_PER_BITFIELD_ELEM;
+	size_t start_page_idx = start_alloc_bitfield_idx * PAGES_PER_BITFIELD_ELEM;
+
+	if (WARN_ON(!epma_dev) ||
+	    WARN_ON(!pma))
+		return;
+
+	/*
+	 * Are there anough bitfield array elements (groups of 64 pages)
+	 * between the start element and the end of the bitfield array
+	 * to fulfill the request?
+	 */
+	WARN((start_alloc_bitfield_idx + order) >= ALLOC_PAGES_BITFIELD_ARR_SIZE(epma_dev->rmem_size),
+	     "%s: start=%zu order=%zu ms=%zu",
+	     __FUNCTION__, start_alloc_bitfield_idx, order, epma_dev->rmem_size);
+
+	for (i = 0; i < num_bitfield_elements_needed; i++) {
+		u64 *bitfield = &epma_dev->allocated_pages_bitfield_arr[start_alloc_bitfield_idx + i];
+
+		/* We expect all pages that relate to this bitfield element to be free */
+		WARN((*bitfield != 0),
+		     "in %s: pages not free: i=%zu o=%zu bf=%.16llx\n",
+		     __FUNCTION__, i, order, *bitfield);
+
+		/* Mark all the pages for this element as not free */
+		*bitfield = ~0ULL;
+	}
+
+	/* Fill-in the allocation struct for the caller */
+	pma->pa = epma_dev->rmem_base + (start_page_idx  << PAGE_SHIFT);
+	pma->order = order;
+}
+
+static struct protected_memory_allocation *simple_pma_alloc_page(
+	struct protected_memory_allocator_device *pma_dev, unsigned int order)
+{
+	struct simple_pma_device *const epma_dev =
+		container_of(pma_dev, struct simple_pma_device, pma_dev);
+	struct protected_memory_allocation *pma;
+	size_t num_pages_to_alloc;
+
+	u64 *bitfields = epma_dev->allocated_pages_bitfield_arr;
+	size_t i;
+	size_t bit;
+	size_t count;
+
+	dev_dbg(epma_dev->dev, "%s(pma_dev=%px, order=%u\n",
+		__func__, (void *)pma_dev, order);
+
+	/* This is an example function that follows an extremely simple logic
+	 * and is very likely to fail to allocate memory if put under stress.
+	 *
+	 * The simple_pma_device maintains an array of u64s, with one bit used
+	 * to track the status of each page.
+	 *
+	 * In order to create a memory allocation, the allocator looks for an
+	 * adjacent group of cleared bits. This does leave the algorithm open
+	 * to fragmentation issues, but is deemed sufficient for now.
+	 * If successful, the allocator shall mark all the pages as allocated
+	 * and increment the offset accordingly.
+	 *
+	 * Allocations of 64 pages or more (order 6) can be allocated only with
+	 * 64-page alignment, in order to keep the algorithm as simple as
+	 * possible. ie, starting from bit 0 of any 64-bit page-allocation
+	 * bitfield. For this, the large-granularity allocator is utilised.
+	 *
+	 * Allocations of lower-order can only be allocated entirely within the
+	 * same group of 64 pages, with the small-ganularity allocator  (ie
+	 * always from the same 64-bit page-allocation bitfield) - again, to
+	 * keep things as simple as possible, but flexible to meet
+	 * current needs.
+	 */
+
+	num_pages_to_alloc = (size_t)1 << order;
+
+	pma = devm_kzalloc(epma_dev->dev, sizeof(*pma), GFP_KERNEL);
+	if (!pma) {
+		dev_err(epma_dev->dev, "Failed to alloc pma struct");
+		return NULL;
+	}
+
+	spin_lock(&epma_dev->rmem_lock);
+
+	if (epma_dev->num_free_pages < num_pages_to_alloc) {
+		dev_err(epma_dev->dev, "not enough free pages\n");
+		devm_kfree(epma_dev->dev, pma);
+		spin_unlock(&epma_dev->rmem_lock);
+		return NULL;
+	}
+
+	/*
+	 * For order 0-5 (ie, 1 to 32 pages) we always allocate within the same set of 64 pages
+	 * Currently, most allocations will be very small (1 page), so the more likely path
+	 * here is order < ORDER_OF_PAGES_PER_BITFIELD_ELEM.
+	 */
+	if (likely(order < ORDER_OF_PAGES_PER_BITFIELD_ELEM)) {
+		size_t alloc_pages_bitmap_size = ALLOC_PAGES_BITFIELD_ARR_SIZE(epma_dev->rmem_size);
+
+		for (i = 0; i < alloc_pages_bitmap_size; i++) {
+			count = 0;
+
+			for (bit = 0; bit < PAGES_PER_BITFIELD_ELEM; bit++) {
+				if  (0 == (bitfields[i] & (1ULL << bit))) {
+					if ((count + 1) >= num_pages_to_alloc) {
+						/*
+						 * We've found enough free, consecutive pages with which to
+						 * make an allocation
+						 */
+						small_granularity_alloc(
+							epma_dev, i,
+							bit - count, order,
+							pma);
+
+						epma_dev->num_free_pages -=
+							num_pages_to_alloc;
+
+						spin_unlock(
+							&epma_dev->rmem_lock);
+						return pma;
+					}
+
+					/* So far so good, but we need more set bits yet */
+					count++;
+				} else {
+					/*
+					 * We found an allocated page, so nothing we've seen so far can be used.
+					 * Keep looking.
+					 */
+					count = 0;
+				}
+			}
+		}
+	} else {
+		/**
+		 * For allocations of order ORDER_OF_PAGES_PER_BITFIELD_ELEM and above (>= 64 pages), we know
+		 * we'll only get allocations for whole groups of 64 pages, which hugely simplifies the task.
+		 */
+		size_t alloc_pages_bitmap_size = ALLOC_PAGES_BITFIELD_ARR_SIZE(epma_dev->rmem_size);
+
+		/* How many 64-bit bitfield elements will be needed for the allocation? */
+		size_t num_bitfield_elements_needed = num_pages_to_alloc / PAGES_PER_BITFIELD_ELEM;
+
+		count = 0;
+
+		for (i = 0; i < alloc_pages_bitmap_size; i++) {
+			/* Are all the pages free for the i'th u64 bitfield element? */
+			if (bitfields[i] == 0) {
+				count += PAGES_PER_BITFIELD_ELEM;
+
+				if (count >= (1 << order)) {
+					size_t start_idx = (i + 1) - num_bitfield_elements_needed;
+
+					large_granularity_alloc(epma_dev,
+								start_idx,
+								order, pma);
+
+					epma_dev->num_free_pages -= 1 << order;
+					spin_unlock(&epma_dev->rmem_lock);
+					return pma;
+				}
+			}
+			else
+			{
+				count = 0;
+			}
+		}
+	}
+
+	spin_unlock(&epma_dev->rmem_lock);
+	devm_kfree(epma_dev->dev, pma);
+
+	dev_err(epma_dev->dev, "not enough contiguous pages (need %zu), total free pages left %zu\n",
+		num_pages_to_alloc, epma_dev->num_free_pages);
+	return NULL;
+}
+
+static phys_addr_t simple_pma_get_phys_addr(
+	struct protected_memory_allocator_device *pma_dev,
+	struct protected_memory_allocation *pma)
+{
+	struct simple_pma_device *const epma_dev =
+		container_of(pma_dev, struct simple_pma_device, pma_dev);
+
+	dev_dbg(epma_dev->dev, "%s(pma_dev=%px, pma=%px, pa=%llx\n",
+		__func__, (void *)pma_dev, (void *)pma,
+		(unsigned long long)pma->pa);
+
+	return pma->pa;
+}
+
+static void simple_pma_free_page(
+	struct protected_memory_allocator_device *pma_dev,
+	struct protected_memory_allocation *pma)
+{
+	struct simple_pma_device *const epma_dev =
+		container_of(pma_dev, struct simple_pma_device, pma_dev);
+	size_t num_pages_in_allocation;
+	size_t offset;
+	size_t i;
+	size_t bitfield_idx;
+	size_t bitfield_start_bit;
+	size_t page_num;
+	u64 *bitfield;
+	size_t alloc_pages_bitmap_size;
+	size_t num_bitfield_elems_used_by_alloc;
+
+	WARN_ON(pma == NULL);
+
+	dev_dbg(epma_dev->dev, "%s(pma_dev=%px, pma=%px, pa=%llx\n",
+		__func__, (void *)pma_dev, (void *)pma,
+		(unsigned long long)pma->pa);
+
+	WARN_ON(pma->pa < epma_dev->rmem_base);
+
+	/* This is an example function that follows an extremely simple logic
+	 * and is vulnerable to abuse.
+	 */
+	offset = (pma->pa - epma_dev->rmem_base);
+	num_pages_in_allocation = (size_t)1 << pma->order;
+
+	/* The number of bitfield elements used by the allocation */
+	num_bitfield_elems_used_by_alloc = num_pages_in_allocation / PAGES_PER_BITFIELD_ELEM;
+
+	/* The page number of the first page of the allocation, relative to rmem_base */
+	page_num = offset >> PAGE_SHIFT;
+
+	/* Which u64 bitfield refers to this page? */
+	bitfield_idx = page_num / PAGES_PER_BITFIELD_ELEM;
+
+	alloc_pages_bitmap_size = ALLOC_PAGES_BITFIELD_ARR_SIZE(epma_dev->rmem_size);
+
+	/* Is the allocation within expected bounds? */
+	WARN_ON((bitfield_idx + num_bitfield_elems_used_by_alloc) >= alloc_pages_bitmap_size);
+
+	spin_lock(&epma_dev->rmem_lock);
+
+	if (pma->order < ORDER_OF_PAGES_PER_BITFIELD_ELEM) {
+		bitfield = &epma_dev->allocated_pages_bitfield_arr[bitfield_idx];
+
+		/* Which bit within that u64 bitfield is the lsb covering this allocation?  */
+		bitfield_start_bit = page_num % PAGES_PER_BITFIELD_ELEM;
+
+		/* Clear the bits for the pages we're now freeing */
+		*bitfield &= ~(((1ULL << num_pages_in_allocation) - 1) << bitfield_start_bit);
+	}
+	else {
+		WARN(page_num % PAGES_PER_BITFIELD_ELEM,
+		     "%s: Expecting allocs of order >= %d to be %zu-page aligned\n",
+		     __FUNCTION__, ORDER_OF_PAGES_PER_BITFIELD_ELEM, PAGES_PER_BITFIELD_ELEM);
+
+		for (i = 0; i < num_bitfield_elems_used_by_alloc; i++) {
+			bitfield = &epma_dev->allocated_pages_bitfield_arr[bitfield_idx + i];
+
+			/* We expect all bits to be set (all pages allocated) */
+			WARN((*bitfield != ~0),
+			     "%s: alloc being freed is not fully allocated: of=%zu np=%zu bf=%.16llx\n",
+			     __FUNCTION__, offset, num_pages_in_allocation, *bitfield);
+
+			/*
+			 * Now clear all the bits in the bitfield element to mark all the pages
+			 * it refers to as free.
+			 */
+			*bitfield = 0ULL;
+		}
+	}
+
+	epma_dev->num_free_pages += num_pages_in_allocation;
+	spin_unlock(&epma_dev->rmem_lock);
+	devm_kfree(epma_dev->dev, pma);
+}
+
+static int protected_memory_allocator_probe(struct platform_device *pdev)
+{
+	struct simple_pma_device *epma_dev;
+	struct device_node *np;
+	phys_addr_t rmem_base;
+	size_t rmem_size;
+	size_t alloc_bitmap_pages_arr_size;
+#if (KERNEL_VERSION(4, 15, 0) <= LINUX_VERSION_CODE)
+	struct reserved_mem *rmem;
+#endif
+
+	np = pdev->dev.of_node;
+
+	if (!np) {
+		dev_err(&pdev->dev, "device node pointer not set\n");
+		return -ENODEV;
+	}
+
+	np = of_parse_phandle(np, "memory-region", 0);
+	if (!np) {
+		dev_err(&pdev->dev, "memory-region node not set\n");
+		return -ENODEV;
+	}
+
+#if (KERNEL_VERSION(4, 15, 0) <= LINUX_VERSION_CODE)
+	rmem = of_reserved_mem_lookup(np);
+	if (rmem) {
+		rmem_base = rmem->base;
+		rmem_size = rmem->size >> PAGE_SHIFT;
+	} else
+#endif
+	{
+		of_node_put(np);
+		dev_err(&pdev->dev, "could not read reserved memory-region\n");
+		return -ENODEV;
+	}
+
+	of_node_put(np);
+	epma_dev = devm_kzalloc(&pdev->dev, sizeof(*epma_dev), GFP_KERNEL);
+	if (!epma_dev)
+		return -ENOMEM;
+
+	epma_dev->pma_dev.ops.pma_alloc_page = simple_pma_alloc_page;
+	epma_dev->pma_dev.ops.pma_get_phys_addr = simple_pma_get_phys_addr;
+	epma_dev->pma_dev.ops.pma_free_page = simple_pma_free_page;
+	epma_dev->pma_dev.owner = THIS_MODULE;
+	epma_dev->dev = &pdev->dev;
+	epma_dev->rmem_base = rmem_base;
+	epma_dev->rmem_size = rmem_size;
+	epma_dev->num_free_pages = rmem_size;
+	spin_lock_init(&epma_dev->rmem_lock);
+
+	alloc_bitmap_pages_arr_size = ALLOC_PAGES_BITFIELD_ARR_SIZE(epma_dev->rmem_size);
+
+	epma_dev->allocated_pages_bitfield_arr = devm_kzalloc(&pdev->dev,
+		alloc_bitmap_pages_arr_size * BITFIELD_ELEM_SIZE, GFP_KERNEL);
+
+	if (!epma_dev->allocated_pages_bitfield_arr) {
+		dev_err(&pdev->dev, "failed to allocate resources\n");
+		devm_kfree(&pdev->dev, epma_dev);
+		return -ENOMEM;
+	}
+
+	if (epma_dev->rmem_size % PAGES_PER_BITFIELD_ELEM) {
+		size_t extra_pages =
+			alloc_bitmap_pages_arr_size * PAGES_PER_BITFIELD_ELEM -
+			epma_dev->rmem_size;
+		size_t last_bitfield_index = alloc_bitmap_pages_arr_size - 1;
+
+		/* Mark the extra pages (that lie outside the reserved range) as
+		 * always in use.
+		 */
+		epma_dev->allocated_pages_bitfield_arr[last_bitfield_index] =
+			((1ULL << extra_pages) - 1) <<
+			(PAGES_PER_BITFIELD_ELEM - extra_pages);
+	}
+
+	platform_set_drvdata(pdev, &epma_dev->pma_dev);
+	dev_info(&pdev->dev,
+		"Protected memory allocator probed successfully\n");
+	dev_info(&pdev->dev, "Protected memory region: base=%llx num pages=%zu\n",
+		(unsigned long long)rmem_base, rmem_size);
+
+	return 0;
+}
+
+static int protected_memory_allocator_remove(struct platform_device *pdev)
+{
+	struct protected_memory_allocator_device *pma_dev =
+		platform_get_drvdata(pdev);
+	struct simple_pma_device *epma_dev;
+	struct device *dev;
+
+	if (!pma_dev)
+		return -EINVAL;
+
+	epma_dev = container_of(pma_dev, struct simple_pma_device, pma_dev);
+	dev = epma_dev->dev;
+
+	if (epma_dev->num_free_pages < epma_dev->rmem_size) {
+		dev_warn(&pdev->dev, "Leaking %zu pages of protected memory\n",
+			epma_dev->rmem_size - epma_dev->num_free_pages);
+	}
+
+	platform_set_drvdata(pdev, NULL);
+	devm_kfree(dev, epma_dev->allocated_pages_bitfield_arr);
+	devm_kfree(dev, epma_dev);
+
+	dev_info(&pdev->dev,
+		"Protected memory allocator removed successfully\n");
+
+	return 0;
+}
+
+static const struct of_device_id protected_memory_allocator_dt_ids[] = {
+	{ .compatible = "arm,protected-memory-allocator" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, protected_memory_allocator_dt_ids);
+
+static struct platform_driver protected_memory_allocator_driver = {
+	.probe = protected_memory_allocator_probe,
+	.remove = protected_memory_allocator_remove,
+	.driver = {
+		.name = "simple_protected_memory_allocator",
+		.of_match_table = of_match_ptr(protected_memory_allocator_dt_ids),
+	}
+};
+
+module_platform_driver(protected_memory_allocator_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION("1.0");
diff --git a/drivers/base/memory_group_manager/build.bp b/drivers/base/memory_group_manager/build.bp
deleted file mode 100644
index 04dbfd3a51de..000000000000
--- a/drivers/base/memory_group_manager/build.bp
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * A copy of the licence is included with the program, and can also be obtained
- * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA  02110-1301, USA.
- */
-
-bob_kernel_module {
-    name: "memory_group_manager",
-    srcs: [
-        "Kbuild",
-        "memory_group_manager.c",
-    ],
-    kbuild_options: ["CONFIG_MALI_MEMORY_GROUP_MANAGER=m"],
-    defaults: ["kernel_defaults"],
-}
diff --git a/drivers/gpu/arm/Kbuild b/drivers/gpu/arm/Kbuild
index 62b4706f7326..f747fc889b5b 100644
--- a/drivers/gpu/arm/Kbuild
+++ b/drivers/gpu/arm/Kbuild
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 #
-# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2012, 2020 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,8 +16,6 @@
 # along with this program; if not, you can access it online at
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
-# SPDX-License-Identifier: GPL-2.0
-#
 #
 
 obj-$(CONFIG_MALI_MIDGARD) += midgard/
diff --git a/drivers/gpu/arm/Kconfig b/drivers/gpu/arm/Kconfig
index 64ede484d1dc..398a8e50a4cd 100644
--- a/drivers/gpu/arm/Kconfig
+++ b/drivers/gpu/arm/Kconfig
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,8 +16,6 @@
 # along with this program; if not, you can access it online at
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
-# SPDX-License-Identifier: GPL-2.0
-#
 #
 #
 source "drivers/gpu/arm/mali400/mali/Kconfig"
diff --git a/drivers/gpu/arm/bifrost/Kbuild b/drivers/gpu/arm/bifrost/Kbuild
index c05dc8399027..8b4ece17cfe5 100644
--- a/drivers/gpu/arm/bifrost/Kbuild
+++ b/drivers/gpu/arm/bifrost/Kbuild
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 #
-# (C) COPYRIGHT 2012-2020 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,16 +16,14 @@
 # along with this program; if not, you can access it online at
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
-# SPDX-License-Identifier: GPL-2.0
-#
 #
 
 # Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= "g2p0-01eac0"
+MALI_RELEASE_NAME ?= '"g6p0-01eac0"'
 
 # Paths required for build
 
-# make $(src) as absolute path if it isn't already, by prefixing $(srctree)
+# make $(src) as absolute path if it is not already, by prefixing $(srctree)
 src:=$(if $(patsubst /%,,$(src)),$(srctree)/$(src),$(src))
 KBASE_PATH = $(src)
 KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy
@@ -32,11 +31,8 @@ UMP_PATH = $(src)/../../../base
 
 # Set up defaults if not defined by build system
 MALI_CUSTOMER_RELEASE ?= 1
-MALI_USE_CSF ?= 0
 MALI_UNIT_TEST ?= 0
-MALI_KERNEL_TEST_API ?= 0
 MALI_COVERAGE ?= 0
-MALI_JIT_PRESSURE_LIMIT_BASE ?= 1
 CONFIG_MALI_PLATFORM_NAME ?= "devicetree"
 # Experimental features (corresponding -D definition should be appended to
 # DEFINES below, e.g. for MALI_EXPERIMENTAL_FEATURE,
@@ -46,6 +42,20 @@ CONFIG_MALI_PLATFORM_NAME ?= "devicetree"
 # MALI_EXPERIMENTAL_FEATURE ?= 0
 MALI_INCREMENTAL_RENDERING ?= 0
 
+ifeq ($(CONFIG_MALI_CSF_SUPPORT),y)
+MALI_JIT_PRESSURE_LIMIT_BASE = 0
+MALI_USE_CSF = 1
+else
+MALI_JIT_PRESSURE_LIMIT_BASE ?= 1
+MALI_USE_CSF ?= 0
+endif
+
+ifneq ($(CONFIG_MALI_KUTF), n)
+MALI_KERNEL_TEST_API ?= 1
+else
+MALI_KERNEL_TEST_API ?= 0
+endif
+
 # Set up our defines, which will be passed to gcc
 DEFINES = \
 	-DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \
@@ -53,7 +63,7 @@ DEFINES = \
 	-DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \
 	-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
 	-DMALI_COVERAGE=$(MALI_COVERAGE) \
-	-DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \
+	-DMALI_RELEASE_NAME=$(MALI_RELEASE_NAME) \
 	-DMALI_JIT_PRESSURE_LIMIT_BASE=$(MALI_JIT_PRESSURE_LIMIT_BASE) \
 	-DMALI_INCREMENTAL_RENDERING=$(MALI_INCREMENTAL_RENDERING)
 
@@ -90,7 +100,6 @@ SRC := \
 	mali_kbase_config.c \
 	mali_kbase_vinstr.c \
 	mali_kbase_hwcnt.c \
-	mali_kbase_hwcnt_backend_jm.c \
 	mali_kbase_hwcnt_gpu.c \
 	mali_kbase_hwcnt_legacy.c \
 	mali_kbase_hwcnt_types.c \
@@ -104,7 +113,6 @@ SRC := \
 	mali_kbase_mem_profile_debugfs.c \
 	mmu/mali_kbase_mmu.c \
 	mmu/mali_kbase_mmu_hw_direct.c \
-	mmu/mali_kbase_mmu_mode_lpae.c \
 	mmu/mali_kbase_mmu_mode_aarch64.c \
 	mali_kbase_disjoint_events.c \
 	mali_kbase_debug_mem_view.c \
@@ -115,6 +123,7 @@ SRC := \
 	mali_kbase_strings.c \
 	mali_kbase_as_fault_debugfs.c \
 	mali_kbase_regs_history_debugfs.c \
+	mali_kbase_dvfs_debugfs.c \
 	mali_power_gpu_frequency_trace.c \
 	mali_kbase_trace_gpu_mem.c \
 	thirdparty/mali_kbase_mmap.c \
@@ -126,6 +135,8 @@ SRC := \
 
 ifeq ($(MALI_USE_CSF),1)
 	SRC += \
+		mali_kbase_hwcnt_backend_csf.c \
+		mali_kbase_hwcnt_backend_csf_if_fw.c \
 		debug/backend/mali_kbase_debug_ktrace_csf.c \
 		device/backend/mali_kbase_device_csf.c \
 		device/backend/mali_kbase_device_hw_csf.c \
@@ -135,6 +146,7 @@ ifeq ($(MALI_USE_CSF),1)
 		context/backend/mali_kbase_context_csf.c
 else
 	SRC += \
+		mali_kbase_hwcnt_backend_jm.c \
 		mali_kbase_dummy_job_wa.c \
 		mali_kbase_debug_job_fault.c \
 		mali_kbase_event.c \
@@ -156,9 +168,6 @@ ifeq ($(CONFIG_MALI_CINSTR_GWT),y)
 	SRC += mali_kbase_gwt.c
 endif
 
-ifeq ($(MALI_UNIT_TEST),1)
-	SRC += tl/mali_kbase_timeline_test.c
-endif
 
 ifeq ($(MALI_CUSTOMER_RELEASE),0)
 	SRC += mali_kbase_regs_dump_debugfs.c
diff --git a/drivers/gpu/arm/bifrost/Kconfig b/drivers/gpu/arm/bifrost/Kconfig
index 868b3d319727..fdbeafc010fb 100644
--- a/drivers/gpu/arm/bifrost/Kconfig
+++ b/drivers/gpu/arm/bifrost/Kconfig
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 #
-# (C) COPYRIGHT 2012-2020 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,8 +16,6 @@
 # along with this program; if not, you can access it online at
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
-# SPDX-License-Identifier: GPL-2.0
-#
 #
 
 
@@ -31,6 +30,13 @@ menuconfig MALI_BIFROST
 	  To compile this driver as a module, choose M here:
 	  this will generate a single module, called mali_kbase.
 
+config MALI_CSF_SUPPORT
+	bool "Mali CSF based GPU support"
+	depends on MALI_BIFROST=m
+	default n
+	help
+	  Enables support for CSF based GPUs.
+
 config MALI_BIFROST_GATOR_SUPPORT
 	bool "Enable Streamline tracing support"
 	depends on MALI_BIFROST
@@ -277,10 +283,20 @@ config MALI_JOB_DUMP
 	  minimal overhead when not in use. Enable only if you know what
 	  you are doing.
 
-config MALI_BIFROST_PRFCNT_SET_SECONDARY
-	bool "Use secondary set of performance counters"
+choice
+	prompt "Performance counters set"
+	default MALI_PRFCNT_SET_PRIMARY
+	depends on MALI_BIFROST && MALI_BIFROST_EXPERT
+
+config MALI_PRFCNT_SET_PRIMARY
+	bool "Primary"
+	depends on MALI_BIFROST && MALI_BIFROST_EXPERT
+	help
+	  Select this option to use primary set of performance counters.
+
+config MALI_BIFROST_PRFCNT_SET_SECONDARY
+	bool "Secondary"
 	depends on MALI_BIFROST && MALI_BIFROST_EXPERT
-	default n
 	help
 	  Select this option to use secondary set of performance counters. Kernel
 	  features that depend on an access to the primary set of counters may
@@ -288,21 +304,43 @@ config MALI_BIFROST_PRFCNT_SET_SECONDARY
 	  from working optimally and may cause instrumentation tools to return
 	  bogus results.
 
-	  If unsure, say N.
+	  If unsure, use MALI_PRFCNT_SET_PRIMARY.
 
-config MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS
-	bool "Use secondary set of performance counters"
-	depends on MALI_BIFROST && MALI_BIFROST_EXPERT && !MALI_BIFROST_PRFCNT_SET_SECONDARY && DEBUG_FS
+config MALI_PRFCNT_SET_TERTIARY
+	bool "Tertiary"
+	depends on MALI_BIFROST && MALI_BIFROST_EXPERT
+	help
+	  Select this option to use tertiary set of performance counters. Kernel
+	  features that depend on an access to the primary set of counters may
+	  become unavailable. Enabling this option will prevent power management
+	  from working optimally and may cause instrumentation tools to return
+	  bogus results.
+
+	  If unsure, use MALI_PRFCNT_SET_PRIMARY.
+
+endchoice
+
+config MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS
+	bool "Allow runtime selection of performance counters set via debugfs"
+	depends on MALI_BIFROST && MALI_BIFROST_EXPERT && DEBUG_FS
 	default n
 	help
 	  Select this option to make the secondary set of performance counters
 	  available at runtime via debugfs. Kernel features that depend on an
 	  access to the primary set of counters may become unavailable.
 
+	  If no runtime debugfs option is set, the build time counter set
+	  choice will be used.
+
 	  This feature is unsupported and unstable, and may break at any time.
 	  Enabling this option will prevent power management from working
 	  optimally and may cause instrumentation tools to return bogus results.
 
+	  No validation is done on the debugfs input. Invalid input could cause
+	  performance counter errors. Valid inputs are the values accepted by
+	  the SET_SELECT bits of the PRFCNT_CONFIG register as defined in the
+	  architecture specification.
+
 	  If unsure, say N.
 
 source "drivers/gpu/arm/midgard/platform/Kconfig"
diff --git a/drivers/gpu/arm/bifrost/Makefile b/drivers/gpu/arm/bifrost/Makefile
index 53a12094ec14..89760d9f850a 100644
--- a/drivers/gpu/arm/bifrost/Makefile
+++ b/drivers/gpu/arm/bifrost/Makefile
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 #
-# (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,24 +16,49 @@
 # along with this program; if not, you can access it online at
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
-# SPDX-License-Identifier: GPL-2.0
-#
 #
 
+# Handle Android Common Kernel source naming
+KERNEL_SRC ?= /lib/modules/$(shell uname -r)/build
+KDIR ?= $(KERNEL_SRC)
 
-KDIR ?= /lib/modules/$(shell uname -r)/build
+# out-of-tree
+ifeq ($(KBUILD_EXTMOD),)
+export CONFIG_MALI_MIDGARD?=m
+
+ifneq ($(CONFIG_MALI_MIDGARD),n)
+export CONFIG_MALI_CSF_SUPPORT?=n
+export CONFIG_MALI_KUTF?=m
+export CONFIG_MALI_REAL_HW?=y
+
+# Handle default y/m in Kconfig
+export CONFIG_MALI_BIFROST_GATOR_SUPPORT?=y
+export CONFIG_MALI_BIFROST_DEVFREQ?=n
+ifneq ($(CONFIG_PM_DEVFREQ),n)
+export CONFIG_MALI_BIFROST_DEVFREQ?=y
+endif
+
+DEFINES += -DCONFIG_MALI_MIDGARD=$(CONFIG_MALI_MIDGARD) \
+	-DCONFIG_MALI_CSF_SUPPORT=$(CONFIG_MALI_CSF_SUPPORT) \
+	-DCONFIG_MALI_KUTF=$(CONFIG_MALI_KUTF) \
+	-DCONFIG_MALI_REAL_HW=$(CONFIG_MALI_REAL_HW) \
+	-DCONFIG_MALI_GATOR_SUPPORT=$(CONFIG_MALI_BIFROST_GATOR_SUPPORT) \
+	-DCONFIG_MALI_DEVFREQ=$(CONFIG_MALI_BIFROST_DEVFREQ)
+
+export DEFINES
+
+endif
+endif
 
-BUSLOG_PATH_RELATIVE = $(CURDIR)/../../../..
 KBASE_PATH_RELATIVE = $(CURDIR)
 
-ifeq ($(CONFIG_MALI_BUSLOG),y)
-#Add bus logger symbols
-EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers
-endif
 
 # we get the symbols from modules using KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions
 all:
 	$(MAKE) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include -I$(CURDIR)/../../../../tests/include $(SCONS_CFLAGS)" $(SCONS_CONFIGS) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
 
+modules_install:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) modules_install
+
 clean:
 	$(MAKE) -C $(KDIR) M=$(CURDIR) clean
diff --git a/drivers/gpu/arm/bifrost/Makefile.kbase b/drivers/gpu/arm/bifrost/Makefile.kbase
index 6b0f81ee76e8..6d97f197670b 100644
--- a/drivers/gpu/arm/bifrost/Makefile.kbase
+++ b/drivers/gpu/arm/bifrost/Makefile.kbase
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 #
-# (C) COPYRIGHT 2010, 2013, 2018 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2010, 2013, 2018-2020 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,9 +16,6 @@
 # along with this program; if not, you can access it online at
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
-# SPDX-License-Identifier: GPL-2.0
-#
 #
 
 EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(KBASE_PATH)/platform_$(PLATFORM)
-
diff --git a/drivers/gpu/arm/bifrost/Mconfig b/drivers/gpu/arm/bifrost/Mconfig
index 99ababfc2d16..7d6695d5eae7 100644
--- a/drivers/gpu/arm/bifrost/Mconfig
+++ b/drivers/gpu/arm/bifrost/Mconfig
@@ -1,18 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0
 #
-# (C) COPYRIGHT 2012-2020 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
-# A copy of the licence is included with the program, and can also be obtained
-# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-# Boston, MA  02110-1301, USA.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
 #
 #
 
-
 menuconfig MALI_BIFROST
 	bool "Mali Midgard series support"
 	default y
@@ -22,6 +27,13 @@ menuconfig MALI_BIFROST
 	  To compile this driver as a module, choose M here:
 	  this will generate a single module, called mali_kbase.
 
+config MALI_CSF_SUPPORT
+	bool "Mali CSF based GPU support"
+	depends on MALI_BIFROST
+	default n
+	help
+	  Enables support for CSF based GPUs.
+
 config MALI_BIFROST_GATOR_SUPPORT
 	bool "Enable Streamline tracing support"
 	depends on MALI_BIFROST && !BACKEND_USER
@@ -272,6 +284,9 @@ config MALI_GEM5_BUILD
 # Instrumentation options.
 
 # config MALI_JOB_DUMP exists in the Kernel Kconfig but is configured using CINSTR_JOB_DUMP in Mconfig.
+# config MALI_PRFCNT_SET_PRIMARY exists in the Kernel Kconfig but is configured using CINSTR_PRIMARY_HWC in Mconfig.
 # config MALI_BIFROST_PRFCNT_SET_SECONDARY exists in the Kernel Kconfig but is configured using CINSTR_SECONDARY_HWC in Mconfig.
+# config MALI_PRFCNT_SET_TERTIARY exists in the Kernel Kconfig but is configured using CINSTR_TERTIARY_HWC in Mconfig.
+# config MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS exists in the Kernel Kconfig but is configured using CINSTR_HWC_SET_SELECT_VIA_DEBUG_FS in Mconfig.
 
 source "kernel/drivers/gpu/arm/midgard/tests/Mconfig"
diff --git a/drivers/gpu/arm/bifrost/arbiter/Kbuild b/drivers/gpu/arm/bifrost/arbiter/Kbuild
index 98e47bed223a..4c04cabce93c 100644
--- a/drivers/gpu/arm/bifrost/arbiter/Kbuild
+++ b/drivers/gpu/arm/bifrost/arbiter/Kbuild
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,10 +16,8 @@
 # along with this program; if not, you can access it online at
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
-# SPDX-License-Identifier: GPL-2.0
-#
 #
 
-mali_kbase-y += \
+bifrost_kbase-y += \
 	arbiter/mali_kbase_arbif.o \
 	arbiter/mali_kbase_arbiter_pm.o
diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c
index ddf1a0ce0b05..7d6ab0cd95c7 100644
--- a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c
+++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c
@@ -1,13 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
-
 /*
  *
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -18,13 +17,10 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /**
- * @file mali_kbase_arbif.c
- * Mali arbiter interface APIs to share GPU between Virtual Machines
+ * DOC: Mali arbiter interface APIs to share GPU between Virtual Machines
  */
 
 #include <mali_kbase.h>
@@ -34,29 +30,148 @@
 #include <linux/of_platform.h>
 #include "mali_kbase_arbiter_interface.h"
 
+/* Arbiter interface version against which was implemented this module */
+#define MALI_REQUIRED_KBASE_ARBITER_INTERFACE_VERSION 5
+#if MALI_REQUIRED_KBASE_ARBITER_INTERFACE_VERSION != \
+			MALI_KBASE_ARBITER_INTERFACE_VERSION
+#error "Unsupported Mali Arbiter interface version."
+#endif
+
+static void on_max_config(struct device *dev, uint32_t max_l2_slices,
+			  uint32_t max_core_mask)
+{
+	struct kbase_device *kbdev;
+
+	if (!dev) {
+		pr_err("%s(): dev is NULL", __func__);
+		return;
+	}
+
+	kbdev = dev_get_drvdata(dev);
+	if (!kbdev) {
+		dev_err(dev, "%s(): kbdev is NULL", __func__);
+		return;
+	}
+
+	if (!max_l2_slices || !max_core_mask) {
+		dev_dbg(dev,
+			"%s(): max_config ignored as one of the fields is zero",
+			__func__);
+		return;
+	}
+
+	/* set the max config info in the kbase device */
+	kbase_arbiter_set_max_config(kbdev, max_l2_slices, max_core_mask);
+}
+
+/**
+ * on_update_freq() - Updates GPU clock frequency
+ * @dev: arbiter interface device handle
+ * @freq: GPU clock frequency value reported from arbiter
+ *
+ * call back function to update GPU clock frequency with
+ * new value from arbiter
+ */
+static void on_update_freq(struct device *dev, uint32_t freq)
+{
+	struct kbase_device *kbdev;
+
+	if (!dev) {
+		pr_err("%s(): dev is NULL", __func__);
+		return;
+	}
+
+	kbdev = dev_get_drvdata(dev);
+	if (!kbdev) {
+		dev_err(dev, "%s(): kbdev is NULL", __func__);
+		return;
+	}
+
+	kbase_arbiter_pm_update_gpu_freq(&kbdev->arb.arb_freq, freq);
+}
+
+/**
+ * on_gpu_stop() - sends KBASE_VM_GPU_STOP_EVT event on VM stop
+ * @dev: arbiter interface device handle
+ *
+ * call back function to signal a GPU STOP event from arbiter interface
+ */
 static void on_gpu_stop(struct device *dev)
 {
-	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	struct kbase_device *kbdev;
+
+	if (!dev) {
+		pr_err("%s(): dev is NULL", __func__);
+		return;
+	}
+
+	kbdev = dev_get_drvdata(dev);
+	if (!kbdev) {
+		dev_err(dev, "%s(): kbdev is NULL", __func__);
+		return;
+	}
 
 	KBASE_TLSTREAM_TL_ARBITER_STOP_REQUESTED(kbdev, kbdev);
 	kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_STOP_EVT);
 }
 
+/**
+ * on_gpu_granted() - sends KBASE_VM_GPU_GRANTED_EVT event on GPU granted
+ * @dev: arbiter interface device handle
+ *
+ * call back function to signal a GPU GRANT event from arbiter interface
+ */
 static void on_gpu_granted(struct device *dev)
 {
-	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	struct kbase_device *kbdev;
+
+	if (!dev) {
+		pr_err("%s(): dev is NULL", __func__);
+		return;
+	}
+
+	kbdev = dev_get_drvdata(dev);
+	if (!kbdev) {
+		dev_err(dev, "%s(): kbdev is NULL", __func__);
+		return;
+	}
 
 	KBASE_TLSTREAM_TL_ARBITER_GRANTED(kbdev, kbdev);
 	kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_GRANTED_EVT);
 }
 
+/**
+ * on_gpu_lost() - sends KBASE_VM_GPU_LOST_EVT event  on GPU granted
+ * @dev: arbiter interface device handle
+ *
+ * call back function to signal a GPU LOST event from arbiter interface
+ */
 static void on_gpu_lost(struct device *dev)
 {
-	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	struct kbase_device *kbdev;
+
+	if (!dev) {
+		pr_err("%s(): dev is NULL", __func__);
+		return;
+	}
+
+	kbdev = dev_get_drvdata(dev);
+	if (!kbdev) {
+		dev_err(dev, "%s(): kbdev is NULL", __func__);
+		return;
+	}
 
 	kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_LOST_EVT);
 }
 
+/**
+ * kbase_arbif_init() - Kbase Arbiter interface initialisation.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Initialise Kbase Arbiter interface and assign callback functions.
+ *
+ * Return: 0 on success else a Linux error code
+ */
 int kbase_arbif_init(struct kbase_device *kbdev)
 {
 #ifdef CONFIG_OF
@@ -100,6 +215,12 @@ int kbase_arbif_init(struct kbase_device *kbdev)
 	ops.arb_vm_gpu_stop = on_gpu_stop;
 	ops.arb_vm_gpu_granted = on_gpu_granted;
 	ops.arb_vm_gpu_lost = on_gpu_lost;
+	ops.arb_vm_max_config = on_max_config;
+	ops.arb_vm_update_freq = on_update_freq;
+
+
+	kbdev->arb.arb_freq.arb_freq = 0;
+	mutex_init(&kbdev->arb.arb_freq.arb_freq_lock);
 
 	/* register kbase arbiter_if callbacks */
 	if (arb_if->vm_ops.vm_arb_register_dev) {
@@ -111,6 +232,7 @@ int kbase_arbif_init(struct kbase_device *kbdev)
 			return err;
 		}
 	}
+
 #else /* CONFIG_OF */
 	dev_dbg(kbdev->dev, "No arbiter without Device Tree support\n");
 	kbdev->arb.arb_dev = NULL;
@@ -119,6 +241,12 @@ int kbase_arbif_init(struct kbase_device *kbdev)
 	return 0;
 }
 
+/**
+ * kbase_arbif_destroy() - De-init Kbase arbiter interface
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * De-initialise Kbase arbiter interface
+ */
 void kbase_arbif_destroy(struct kbase_device *kbdev)
 {
 	struct arbiter_if_dev *arb_if = kbdev->arb.arb_if;
@@ -133,16 +261,45 @@ void kbase_arbif_destroy(struct kbase_device *kbdev)
 	kbdev->arb.arb_dev = NULL;
 }
 
+/**
+ * kbase_arbif_get_max_config() - Request max config info
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * call back function from arb interface to arbiter requesting max config info
+ */
+void kbase_arbif_get_max_config(struct kbase_device *kbdev)
+{
+	struct arbiter_if_dev *arb_if = kbdev->arb.arb_if;
+
+	if (arb_if && arb_if->vm_ops.vm_arb_get_max_config) {
+		dev_dbg(kbdev->dev, "%s\n", __func__);
+		arb_if->vm_ops.vm_arb_get_max_config(arb_if);
+	}
+}
+
+/**
+ * kbase_arbif_gpu_request() - Request GPU from
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * call back function from arb interface to arbiter requesting GPU for VM
+ */
 void kbase_arbif_gpu_request(struct kbase_device *kbdev)
 {
 	struct arbiter_if_dev *arb_if = kbdev->arb.arb_if;
 
 	if (arb_if && arb_if->vm_ops.vm_arb_gpu_request) {
 		dev_dbg(kbdev->dev, "%s\n", __func__);
+		KBASE_TLSTREAM_TL_ARBITER_REQUESTED(kbdev, kbdev);
 		arb_if->vm_ops.vm_arb_gpu_request(arb_if);
 	}
 }
 
+/**
+ * kbase_arbif_gpu_stopped() - send GPU stopped message to the arbiter
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @gpu_required: GPU request flag
+ *
+ */
 void kbase_arbif_gpu_stopped(struct kbase_device *kbdev, u8 gpu_required)
 {
 	struct arbiter_if_dev *arb_if = kbdev->arb.arb_if;
@@ -154,6 +311,12 @@ void kbase_arbif_gpu_stopped(struct kbase_device *kbdev, u8 gpu_required)
 	}
 }
 
+/**
+ * kbase_arbif_gpu_active() - Sends a GPU_ACTIVE message to the Arbiter
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Informs the arbiter VM is active
+ */
 void kbase_arbif_gpu_active(struct kbase_device *kbdev)
 {
 	struct arbiter_if_dev *arb_if = kbdev->arb.arb_if;
@@ -164,6 +327,12 @@ void kbase_arbif_gpu_active(struct kbase_device *kbdev)
 	}
 }
 
+/**
+ * kbase_arbif_gpu_idle() - Inform the arbiter that the VM has gone idle
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Informs the arbiter VM is idle
+ */
 void kbase_arbif_gpu_idle(struct kbase_device *kbdev)
 {
 	struct arbiter_if_dev *arb_if = kbdev->arb.arb_if;
diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.h b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.h
index e7e9de76c94c..710559c4fe02 100644
--- a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.h
+++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.h
@@ -1,28 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- *//* SPDX-License-Identifier: GPL-2.0 */
-
-/*
- *
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -38,12 +17,10 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- *
  */
 
 /**
- * @file
- * Mali arbiter interface APIs to share GPU between Virtual Machines
+ * DOC: Mali arbiter interface APIs to share GPU between Virtual Machines
  */
 
 #ifndef _MALI_KBASE_ARBIF_H_
@@ -94,6 +71,14 @@ int kbase_arbif_init(struct kbase_device *kbdev);
  */
 void kbase_arbif_destroy(struct kbase_device *kbdev);
 
+/**
+ * kbase_arbif_get_max_config() - Request max config info
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * call back function from arb interface to arbiter requesting max config info
+ */
+void kbase_arbif_get_max_config(struct kbase_device *kbdev);
+
 /**
  * kbase_arbif_gpu_request() - Send GPU request message to the arbiter
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_defs.h b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_defs.h
index 1f53cbf1a286..586c5d4f69db 100644
--- a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_defs.h
+++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_defs.h
@@ -1,28 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- *//* SPDX-License-Identifier: GPL-2.0 */
-
-/*
- *
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -38,7 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- *
  */
 
 /**
@@ -66,7 +44,8 @@
  * @vm_resume_work:  Work item for vm_arb_wq to resume current work on GPU
  * @vm_arb_starting: Work queue resume in progress
  * @vm_arb_stopping: Work queue suspend in progress
- * @vm_arb_users_waiting: Count of users waiting for GPU
+ * @interrupts_installed: Flag set when interrupts are installed
+ * @vm_request_timer: Timer to monitor GPU request
  */
 struct kbase_arbiter_vm_state {
 	struct kbase_device *kbdev;
@@ -78,7 +57,8 @@ struct kbase_arbiter_vm_state {
 	struct work_struct vm_resume_work;
 	bool vm_arb_starting;
 	bool vm_arb_stopping;
-	int vm_arb_users_waiting;
+	bool interrupts_installed;
+	struct hrtimer vm_request_timer;
 };
 
 /**
@@ -86,10 +66,12 @@ struct kbase_arbiter_vm_state {
  *                               allocated from the probe method of Mali driver
  * @arb_if:                 Pointer to the arbiter interface device
  * @arb_dev:                Pointer to the arbiter device
+ * @arb_freq:               GPU clock frequency retrieved from arbiter.
  */
 struct kbase_arbiter_device {
 	struct arbiter_if_dev *arb_if;
 	struct device *arb_dev;
+	struct kbase_arbiter_freq arb_freq;
 };
 
 #endif /* _MALI_KBASE_ARBITER_DEFS_H_ */
diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_interface.h b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_interface.h
index 5d5d8a7d2cff..84389e828f4d 100644
--- a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_interface.h
+++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_interface.h
@@ -1,28 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- *//* SPDX-License-Identifier: GPL-2.0 */
-
-/*
- *
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -38,7 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- *
  */
 
 /**
@@ -50,7 +28,7 @@
 #define _MALI_KBASE_ARBITER_INTERFACE_H_
 
 /**
- * @brief Mali arbiter interface version
+ *  Mali arbiter interface version
  *
  * This specifies the current version of the configuration interface. Whenever
  * the arbiter interface changes, so that integration effort is required, the
@@ -61,8 +39,15 @@
  * 1 - Added the Mali arbiter configuration interface.
  * 2 - Strip out reference code from header
  * 3 - Removed DVFS utilization interface (DVFS moved to arbiter side)
+ * 4 - Added max_config support
+ * 5 - Added GPU clock frequency reporting support from arbiter
  */
-#define MALI_KBASE_ARBITER_INTERFACE_VERSION 3
+#define MALI_KBASE_ARBITER_INTERFACE_VERSION 5
+
+/**
+ * NO_FREQ is used in case platform doesn't support reporting frequency
+ */
+#define NO_FREQ 0
 
 struct arbiter_if_dev;
 
@@ -108,6 +93,27 @@ struct arbiter_if_arb_vm_ops {
 	 * If successful, will respond with a vm_arb_gpu_stopped message.
 	 */
 	void (*arb_vm_gpu_lost)(struct device *dev);
+
+	/**
+	 * arb_vm_max_config() - Send max config info to the VM
+	 * @dev: The arbif kernel module device.
+	 * @max_l2_slices: The maximum number of L2 slices.
+	 * @max_core_mask: The largest core mask.
+	 *
+	 * Informs KBase the maximum resources that can be allocated to the
+	 * partition in use.
+	 */
+	void (*arb_vm_max_config)(struct device *dev, uint32_t max_l2_slices,
+				  uint32_t max_core_mask);
+
+	/**
+	 * arb_vm_update_freq() - GPU clock frequency has been updated
+	 * @dev: The arbif kernel module device.
+	 * @freq: GPU clock frequency value reported from arbiter
+	 *
+	 * Informs KBase that the GPU clock frequency has been updated.
+	 */
+	void (*arb_vm_update_freq)(struct device *dev, uint32_t freq);
 };
 
 /**
@@ -136,6 +142,13 @@ struct arbiter_if_vm_arb_ops {
 	 */
 	void (*vm_arb_unregister_dev)(struct arbiter_if_dev *arbif_dev);
 
+	/**
+	 * vm_arb_gpu_get_max_config() - Request the max config from the
+	 * Arbiter.
+	 * @arbif_dev: The arbiter interface we want to issue the request.
+	 */
+	void (*vm_arb_get_max_config)(struct arbiter_if_dev *arbif_dev);
+
 	/**
 	 * vm_arb_gpu_request() - Ask the arbiter interface for GPU access.
 	 * @arbif_dev: The arbiter interface we want to issue the request.
diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c
index 02b5de2436ea..456cc70753cd 100644
--- a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c
+++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c
@@ -1,13 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
-
 /*
  *
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -18,12 +17,10 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /**
- * @file mali_kbase_arbiter_pm.c
+ * @file
  * Mali arbiter power manager state machine and APIs
  */
 
@@ -34,11 +31,34 @@
 #include <mali_kbase_hwcnt_context.h>
 #include <mali_kbase_pm_internal.h>
 #include <tl/mali_kbase_tracepoints.h>
+#include <mali_kbase_gpuprops.h>
+
+/* A dmesg warning will occur if the GPU is not granted
+ * after the following time (in milliseconds) has ellapsed.
+ */
+#define GPU_REQUEST_TIMEOUT 1000
+
+#define MAX_L2_SLICES_MASK		0xFF
+
+/* Maximum time in ms, before deferring probe incase
+ * GPU_GRANTED message is not received
+ */
+static int gpu_req_timeout = 1;
+module_param(gpu_req_timeout, int, 0644);
+MODULE_PARM_DESC(gpu_req_timeout,
+	"On a virtualized platform, if the GPU is not granted within this time(ms) kbase will defer the probe");
 
 static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev);
 static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld(
 	struct kbase_device *kbdev);
 
+/**
+ * kbase_arbiter_pm_vm_state_str() - Helper function to get string
+ *                                   for kbase VM state.(debug)
+ * @state: kbase VM state
+ *
+ * Return: string representation of Kbase_vm_state
+ */
 static inline const char *kbase_arbiter_pm_vm_state_str(
 	enum kbase_vm_state state)
 {
@@ -73,6 +93,13 @@ static inline const char *kbase_arbiter_pm_vm_state_str(
 	}
 }
 
+/**
+ * kbase_arbiter_pm_vm_event_str() - Helper function to get string
+ *                                   for kbase VM event.(debug)
+ * @evt: kbase VM state
+ *
+ * Return: String representation of Kbase_arbif_event
+ */
 static inline const char *kbase_arbiter_pm_vm_event_str(
 	enum kbase_arbif_evt evt)
 {
@@ -99,6 +126,13 @@ static inline const char *kbase_arbiter_pm_vm_event_str(
 	}
 }
 
+/**
+ * kbase_arbiter_pm_vm_set_state() - Sets new kbase_arbiter_vm_state
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @new_state: kbase VM new state
+ *
+ * This function sets the new state for the VM
+ */
 static void kbase_arbiter_pm_vm_set_state(struct kbase_device *kbdev,
 	enum kbase_vm_state new_state)
 {
@@ -107,11 +141,22 @@ static void kbase_arbiter_pm_vm_set_state(struct kbase_device *kbdev,
 	dev_dbg(kbdev->dev, "VM set_state %s -> %s",
 	kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state),
 	kbase_arbiter_pm_vm_state_str(new_state));
+
 	lockdep_assert_held(&arb_vm_state->vm_state_lock);
 	arb_vm_state->vm_state = new_state;
+	if (new_state != KBASE_VM_STATE_INITIALIZING_WITH_GPU &&
+		new_state != KBASE_VM_STATE_INITIALIZING)
+		KBASE_KTRACE_ADD(kbdev, ARB_VM_STATE, NULL, new_state);
 	wake_up(&arb_vm_state->vm_state_wait);
 }
 
+/**
+ * kbase_arbiter_pm_suspend_wq() - suspend work queue of the driver.
+ * @data: work queue
+ *
+ * Suspends work queue of the driver, when VM is in SUSPEND_PENDING or
+ * STOPPING_IDLE or STOPPING_ACTIVE state
+ */
 static void kbase_arbiter_pm_suspend_wq(struct work_struct *data)
 {
 	struct kbase_arbiter_vm_state *arb_vm_state = container_of(data,
@@ -136,6 +181,13 @@ static void kbase_arbiter_pm_suspend_wq(struct work_struct *data)
 	dev_dbg(kbdev->dev, "<%s\n", __func__);
 }
 
+/**
+ * kbase_arbiter_pm_resume_wq() -Kbase resume work queue.
+ * @data: work item
+ *
+ * Resume work queue of the driver when VM is in STARTING state,
+ * else if its in STOPPING_ACTIVE will request a stop event.
+ */
 static void kbase_arbiter_pm_resume_wq(struct work_struct *data)
 {
 	struct kbase_arbiter_vm_state *arb_vm_state = container_of(data,
@@ -157,9 +209,74 @@ static void kbase_arbiter_pm_resume_wq(struct work_struct *data)
 	}
 	arb_vm_state->vm_arb_starting = false;
 	mutex_unlock(&arb_vm_state->vm_state_lock);
+	KBASE_TLSTREAM_TL_ARBITER_STARTED(kbdev, kbdev);
 	dev_dbg(kbdev->dev, "<%s\n", __func__);
 }
 
+/**
+ * request_timer_callback() - Issue warning on request timer expiration
+ * @timer: Request hr timer data
+ *
+ * Called when the Arbiter takes too long to grant the GPU after a
+ * request has been made.  Issues a warning in dmesg.
+ *
+ * Return: Always returns HRTIMER_NORESTART
+ */
+static enum hrtimer_restart request_timer_callback(struct hrtimer *timer)
+{
+	struct kbase_arbiter_vm_state *arb_vm_state = container_of(timer,
+			struct kbase_arbiter_vm_state, vm_request_timer);
+
+	KBASE_DEBUG_ASSERT(arb_vm_state);
+	KBASE_DEBUG_ASSERT(arb_vm_state->kbdev);
+
+	dev_warn(arb_vm_state->kbdev->dev,
+		"Still waiting for GPU to be granted from Arbiter after %d ms\n",
+		GPU_REQUEST_TIMEOUT);
+	return HRTIMER_NORESTART;
+}
+
+/**
+ * start_request_timer() - Start a timer after requesting GPU
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Start a timer to track when kbase is waiting for the GPU from the
+ * Arbiter.  If the timer expires before GPU is granted, a warning in
+ * dmesg will be issued.
+ */
+static void start_request_timer(struct kbase_device *kbdev)
+{
+	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
+
+	hrtimer_start(&arb_vm_state->vm_request_timer,
+			HR_TIMER_DELAY_MSEC(GPU_REQUEST_TIMEOUT),
+			HRTIMER_MODE_REL);
+}
+
+/**
+ * cancel_request_timer() - Stop the request timer
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Stops the request timer once GPU has been granted.  Safe to call
+ * even if timer is no longer running.
+ */
+static void cancel_request_timer(struct kbase_device *kbdev)
+{
+	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
+
+	hrtimer_cancel(&arb_vm_state->vm_request_timer);
+}
+
+/**
+ * kbase_arbiter_pm_early_init() - Initialize arbiter for VM
+ *                                 Paravirtualized use.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Initialize the arbiter and other required resources during the runtime
+ * and request the GPU for the VM for the first time.
+ *
+ * Return: 0 if success, or a Linux error code
+ */
 int kbase_arbiter_pm_early_init(struct kbase_device *kbdev)
 {
 	int err;
@@ -179,12 +296,17 @@ int kbase_arbiter_pm_early_init(struct kbase_device *kbdev)
 		WQ_HIGHPRI);
 	if (!arb_vm_state->vm_arb_wq) {
 		dev_err(kbdev->dev, "Failed to allocate vm_arb workqueue\n");
+		kfree(arb_vm_state);
 		return -ENOMEM;
 	}
 	INIT_WORK(&arb_vm_state->vm_suspend_work, kbase_arbiter_pm_suspend_wq);
 	INIT_WORK(&arb_vm_state->vm_resume_work, kbase_arbiter_pm_resume_wq);
 	arb_vm_state->vm_arb_starting = false;
-	arb_vm_state->vm_arb_users_waiting = 0;
+	atomic_set(&kbdev->pm.gpu_users_waiting, 0);
+	hrtimer_init(&arb_vm_state->vm_request_timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	arb_vm_state->vm_request_timer.function =
+						request_timer_callback;
 	kbdev->pm.arb_vm_state = arb_vm_state;
 
 	err = kbase_arbif_init(kbdev);
@@ -192,17 +314,31 @@ int kbase_arbiter_pm_early_init(struct kbase_device *kbdev)
 		dev_err(kbdev->dev, "Failed to initialise arbif module\n");
 		goto arbif_init_fail;
 	}
+
 	if (kbdev->arb.arb_if) {
 		kbase_arbif_gpu_request(kbdev);
 		dev_dbg(kbdev->dev, "Waiting for initial GPU assignment...\n");
-		wait_event(arb_vm_state->vm_state_wait,
+		err = wait_event_timeout(arb_vm_state->vm_state_wait,
 			arb_vm_state->vm_state ==
-					KBASE_VM_STATE_INITIALIZING_WITH_GPU);
+					KBASE_VM_STATE_INITIALIZING_WITH_GPU,
+			msecs_to_jiffies(gpu_req_timeout));
+
+		if (!err) {
+			dev_dbg(kbdev->dev,
+			"Kbase probe Deferred after waiting %d ms to receive GPU_GRANT\n",
+			gpu_req_timeout);
+			err = -EPROBE_DEFER;
+			goto arbif_eprobe_defer;
+		}
+
 		dev_dbg(kbdev->dev,
 			"Waiting for initial GPU assignment - done\n");
 	}
 	return 0;
 
+arbif_eprobe_defer:
+	kbase_arbiter_pm_early_term(kbdev);
+	return err;
 arbif_init_fail:
 	destroy_workqueue(arb_vm_state->vm_arb_wq);
 	kfree(arb_vm_state);
@@ -210,36 +346,72 @@ arbif_init_fail:
 	return err;
 }
 
+/**
+ * kbase_arbiter_pm_early_term() - Shutdown arbiter and free resources
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Clean up all the resources
+ */
 void kbase_arbiter_pm_early_term(struct kbase_device *kbdev)
 {
 	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
 
+	cancel_request_timer(kbdev);
 	mutex_lock(&arb_vm_state->vm_state_lock);
 	if (arb_vm_state->vm_state > KBASE_VM_STATE_STOPPED_GPU_REQUESTED) {
 		kbase_pm_set_gpu_lost(kbdev, false);
 		kbase_arbif_gpu_stopped(kbdev, false);
 	}
 	mutex_unlock(&arb_vm_state->vm_state_lock);
-	kbase_arbif_destroy(kbdev);
 	destroy_workqueue(arb_vm_state->vm_arb_wq);
+	kbase_arbif_destroy(kbdev);
 	arb_vm_state->vm_arb_wq = NULL;
 	kfree(kbdev->pm.arb_vm_state);
 	kbdev->pm.arb_vm_state = NULL;
 }
 
+/**
+ * kbase_arbiter_pm_release_interrupts() - Release the GPU interrupts
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Releases interrupts and set the interrupt flag to false
+ */
 void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev)
 {
 	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
 
 	mutex_lock(&arb_vm_state->vm_state_lock);
-	if (!kbdev->arb.arb_if ||
-			arb_vm_state->vm_state >
-					KBASE_VM_STATE_STOPPED_GPU_REQUESTED)
+	if (arb_vm_state->interrupts_installed == true) {
+		arb_vm_state->interrupts_installed = false;
 		kbase_release_interrupts(kbdev);
-
+	}
 	mutex_unlock(&arb_vm_state->vm_state_lock);
 }
 
+/**
+ * kbase_arbiter_pm_install_interrupts() - Install the GPU interrupts
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Install interrupts and set the interrupt_install flag to true.
+ */
+int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev)
+{
+	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
+	int err;
+
+	mutex_lock(&arb_vm_state->vm_state_lock);
+	arb_vm_state->interrupts_installed = true;
+	err = kbase_install_interrupts(kbdev);
+	mutex_unlock(&arb_vm_state->vm_state_lock);
+	return err;
+}
+
+/**
+ * kbase_arbiter_pm_vm_stopped() - Handle stop state for the VM
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Handles a stop state for the VM
+ */
 void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev)
 {
 	bool request_gpu = false;
@@ -247,14 +419,19 @@ void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev)
 
 	lockdep_assert_held(&arb_vm_state->vm_state_lock);
 
-	if (arb_vm_state->vm_arb_users_waiting > 0 &&
+	if (atomic_read(&kbdev->pm.gpu_users_waiting) > 0 &&
 			arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_IDLE)
 		kbase_arbiter_pm_vm_set_state(kbdev,
 			 KBASE_VM_STATE_STOPPING_ACTIVE);
 
 	dev_dbg(kbdev->dev, "%s %s\n", __func__,
 		kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state));
-	kbase_release_interrupts(kbdev);
+
+	if (arb_vm_state->interrupts_installed) {
+		arb_vm_state->interrupts_installed = false;
+		kbase_release_interrupts(kbdev);
+	}
+
 	switch (arb_vm_state->vm_state) {
 	case KBASE_VM_STATE_STOPPING_ACTIVE:
 		request_gpu = true;
@@ -275,13 +452,85 @@ void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev)
 
 	kbase_pm_set_gpu_lost(kbdev, false);
 	kbase_arbif_gpu_stopped(kbdev, request_gpu);
+	if (request_gpu)
+		start_request_timer(kbdev);
 }
 
+void kbase_arbiter_set_max_config(struct kbase_device *kbdev,
+				  uint32_t max_l2_slices,
+				  uint32_t max_core_mask)
+{
+	struct kbase_arbiter_vm_state *arb_vm_state;
+	struct max_config_props max_config;
+
+	if (!kbdev)
+		return;
+
+	/* Mask the max_l2_slices as it is stored as 8 bits into kbase */
+	max_config.l2_slices = max_l2_slices & MAX_L2_SLICES_MASK;
+	max_config.core_mask = max_core_mask;
+	arb_vm_state = kbdev->pm.arb_vm_state;
+
+	mutex_lock(&arb_vm_state->vm_state_lock);
+	/* Just set the max_props in kbase during initialization. */
+	if (arb_vm_state->vm_state == KBASE_VM_STATE_INITIALIZING)
+		kbase_gpuprops_set_max_config(kbdev, &max_config);
+	else
+		dev_dbg(kbdev->dev, "Unexpected max_config on VM state %s",
+			kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state));
+
+	mutex_unlock(&arb_vm_state->vm_state_lock);
+}
+
+int kbase_arbiter_pm_gpu_assigned(struct kbase_device *kbdev)
+{
+	struct kbase_arbiter_vm_state *arb_vm_state;
+	int result = -EINVAL;
+
+	if (!kbdev)
+		return result;
+
+	/* First check the GPU_LOST state */
+	kbase_pm_lock(kbdev);
+	if (kbase_pm_is_gpu_lost(kbdev)) {
+		kbase_pm_unlock(kbdev);
+		return 0;
+	}
+	kbase_pm_unlock(kbdev);
+
+	/* Then the arbitration state machine */
+	arb_vm_state = kbdev->pm.arb_vm_state;
+
+	mutex_lock(&arb_vm_state->vm_state_lock);
+	switch (arb_vm_state->vm_state) {
+	case KBASE_VM_STATE_INITIALIZING:
+	case KBASE_VM_STATE_SUSPENDED:
+	case KBASE_VM_STATE_STOPPED:
+	case KBASE_VM_STATE_STOPPED_GPU_REQUESTED:
+	case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT:
+		result = 0;
+		break;
+	default:
+		result = 1;
+		break;
+	}
+	mutex_unlock(&arb_vm_state->vm_state_lock);
+
+	return result;
+}
+
+/**
+ * kbase_arbiter_pm_vm_gpu_start() - Handles the start state of the VM
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Handles the start state of the VM
+ */
 static void kbase_arbiter_pm_vm_gpu_start(struct kbase_device *kbdev)
 {
 	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
 
 	lockdep_assert_held(&arb_vm_state->vm_state_lock);
+	cancel_request_timer(kbdev);
 	switch (arb_vm_state->vm_state) {
 	case KBASE_VM_STATE_INITIALIZING:
 		kbase_arbiter_pm_vm_set_state(kbdev,
@@ -289,7 +538,14 @@ static void kbase_arbiter_pm_vm_gpu_start(struct kbase_device *kbdev)
 		break;
 	case KBASE_VM_STATE_STOPPED_GPU_REQUESTED:
 		kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STARTING);
+		arb_vm_state->interrupts_installed = true;
 		kbase_install_interrupts(kbdev);
+		/*
+		 * GPU GRANTED received while in stop can be a result of a
+		 * repartitioning.
+		 */
+		kbase_gpuprops_req_curr_config_update(kbdev);
+		/* curr_config will be updated while resuming the PM. */
 		queue_work(arb_vm_state->vm_arb_wq,
 			&arb_vm_state->vm_resume_work);
 		break;
@@ -306,6 +562,12 @@ static void kbase_arbiter_pm_vm_gpu_start(struct kbase_device *kbdev)
 	}
 }
 
+/**
+ * kbase_arbiter_pm_vm_gpu_stop() - Handles the stop state of the VM
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Handles the start state of the VM
+ */
 static void kbase_arbiter_pm_vm_gpu_stop(struct kbase_device *kbdev)
 {
 	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
@@ -348,6 +610,12 @@ static void kbase_arbiter_pm_vm_gpu_stop(struct kbase_device *kbdev)
 	}
 }
 
+/**
+ * kbase_gpu_lost() - Kbase signals GPU is lost on a lost event signal
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * On GPU lost event signals GPU_LOST to the aribiter
+ */
 static void kbase_gpu_lost(struct kbase_device *kbdev)
 {
 	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
@@ -396,6 +664,13 @@ static void kbase_gpu_lost(struct kbase_device *kbdev)
 	}
 }
 
+/**
+ * kbase_arbiter_pm_vm_os_suspend_ready_state() - checks if VM is ready
+ *			to be moved to suspended state.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: True if its ready to be suspended else False.
+ */
 static inline bool kbase_arbiter_pm_vm_os_suspend_ready_state(
 	struct kbase_device *kbdev)
 {
@@ -410,6 +685,14 @@ static inline bool kbase_arbiter_pm_vm_os_suspend_ready_state(
 	}
 }
 
+/**
+ * kbase_arbiter_pm_vm_os_prepare_suspend() - Prepare OS to be in suspend state
+ *                             until it receives the grant message from arbiter
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Prepares OS to be in suspend state until it receives GRANT message
+ * from Arbiter asynchronously.
+ */
 static void kbase_arbiter_pm_vm_os_prepare_suspend(struct kbase_device *kbdev)
 {
 	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
@@ -475,6 +758,14 @@ static void kbase_arbiter_pm_vm_os_prepare_suspend(struct kbase_device *kbdev)
 	}
 }
 
+/**
+ * kbase_arbiter_pm_vm_os_resume() - Resume OS function once it receives
+ *                                   a grant message from arbiter
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Resume OS function once it receives GRANT message
+ * from Arbiter asynchronously.
+ */
 static void kbase_arbiter_pm_vm_os_resume(struct kbase_device *kbdev)
 {
 	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
@@ -487,6 +778,7 @@ static void kbase_arbiter_pm_vm_os_resume(struct kbase_device *kbdev)
 	kbase_arbiter_pm_vm_set_state(kbdev,
 		KBASE_VM_STATE_STOPPED_GPU_REQUESTED);
 	kbase_arbif_gpu_request(kbdev);
+	start_request_timer(kbdev);
 
 	/* Release lock and block resume OS function until we have
 	 * asynchronously received the GRANT message from the Arbiter and
@@ -498,6 +790,14 @@ static void kbase_arbiter_pm_vm_os_resume(struct kbase_device *kbdev)
 	mutex_lock(&arb_vm_state->vm_state_lock);
 }
 
+/**
+ * kbase_arbiter_pm_vm_event() - Dispatch VM event to the state machine.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @evt: VM event
+ *
+ * The state machine function. Receives events and transitions states
+ * according the event received and the current state
+ */
 void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev,
 	enum kbase_arbif_evt evt)
 {
@@ -509,7 +809,9 @@ void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev,
 	mutex_lock(&arb_vm_state->vm_state_lock);
 	dev_dbg(kbdev->dev, "%s %s\n", __func__,
 		kbase_arbiter_pm_vm_event_str(evt));
-
+	if (arb_vm_state->vm_state != KBASE_VM_STATE_INITIALIZING_WITH_GPU &&
+		arb_vm_state->vm_state != KBASE_VM_STATE_INITIALIZING)
+		KBASE_KTRACE_ADD(kbdev, ARB_VM_EVT, NULL, evt);
 	switch (evt) {
 	case KBASE_VM_GPU_GRANTED_EVT:
 		kbase_arbiter_pm_vm_gpu_start(kbdev);
@@ -542,8 +844,6 @@ void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev,
 	case KBASE_VM_REF_EVENT:
 		switch (arb_vm_state->vm_state) {
 		case KBASE_VM_STATE_STARTING:
-			KBASE_TLSTREAM_TL_ARBITER_STARTED(kbdev, kbdev);
-			/* FALL THROUGH */
 		case KBASE_VM_STATE_IDLE:
 			kbase_arbiter_pm_vm_set_state(kbdev,
 			KBASE_VM_STATE_ACTIVE);
@@ -586,6 +886,12 @@ void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev,
 
 KBASE_EXPORT_TEST_API(kbase_arbiter_pm_vm_event);
 
+/**
+ * kbase_arbiter_pm_vm_wait_gpu_assignment() - VM wait for a GPU assignment.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * VM waits for a GPU assignment.
+ */
 static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev)
 {
 	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
@@ -597,6 +903,12 @@ static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev)
 	dev_dbg(kbdev->dev, "Waiting for GPU assignment - done\n");
 }
 
+/**
+ * kbase_arbiter_pm_vm_gpu_assigned_lockheld() - Check if VM holds VM state lock
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Checks if the virtual machine holds VM state lock.
+ */
 static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld(
 	struct kbase_device *kbdev)
 {
@@ -607,6 +919,19 @@ static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld(
 		arb_vm_state->vm_state == KBASE_VM_STATE_ACTIVE);
 }
 
+/**
+ * kbase_arbiter_pm_ctx_active_handle_suspend() - Handle suspend operation for
+ *                                                arbitration mode
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @suspend_handler: The handler code for how to handle a suspend
+ *                   that might occur
+ *
+ * This function handles a suspend event from the driver,
+ * communicating with the arbiter and waiting synchronously for the GPU
+ * to be granted again depending on the VM state.
+ *
+ * Return: 0 on success else 1 suspend handler isn not possible.
+ */
 int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev,
 	enum kbase_pm_suspend_handler suspend_handler)
 {
@@ -627,6 +952,7 @@ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev,
 				kbase_arbiter_pm_vm_set_state(kbdev,
 					KBASE_VM_STATE_STOPPED_GPU_REQUESTED);
 				kbase_arbif_gpu_request(kbdev);
+				start_request_timer(kbdev);
 			} else if (arb_vm_state->vm_state ==
 					KBASE_VM_STATE_INITIALIZING_WITH_GPU)
 				break;
@@ -660,7 +986,7 @@ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev,
 			}
 
 			/* Need to synchronously wait for GPU assignment */
-			arb_vm_state->vm_arb_users_waiting++;
+			atomic_inc(&kbdev->pm.gpu_users_waiting);
 			mutex_unlock(&arb_vm_state->vm_state_lock);
 			mutex_unlock(&kbdev->pm.lock);
 			mutex_unlock(&js_devdata->runpool_mutex);
@@ -668,9 +994,66 @@ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev,
 			mutex_lock(&js_devdata->runpool_mutex);
 			mutex_lock(&kbdev->pm.lock);
 			mutex_lock(&arb_vm_state->vm_state_lock);
-			arb_vm_state->vm_arb_users_waiting--;
+			atomic_dec(&kbdev->pm.gpu_users_waiting);
 		}
 		mutex_unlock(&arb_vm_state->vm_state_lock);
 	}
 	return res;
 }
+
+/**
+ * kbase_arbiter_pm_update_gpu_freq() - Updates GPU clock frequency received
+ * from arbiter.
+ * @arb_freq - Pointer to struchture holding GPU clock frequenecy data
+ * @freq - New frequency value
+ */
+void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq,
+		uint32_t freq)
+{
+	mutex_lock(&arb_freq->arb_freq_lock);
+	arb_freq->arb_freq = freq;
+	mutex_unlock(&arb_freq->arb_freq_lock);
+}
+
+/**
+ * enumerate_arb_gpu_clk() - Enumerate a GPU clock on the given index
+ * @kbdev - kbase_device pointer
+ * @index - GPU clock index
+ *
+ * Returns pointer to structure holding GPU clock frequency data reported from
+ * arbiter, only index 0 is valid.
+ */
+static void *enumerate_arb_gpu_clk(struct kbase_device *kbdev,
+		unsigned int index)
+{
+	if (index == 0)
+		return &kbdev->arb.arb_freq;
+	return NULL;
+}
+
+/**
+ * get_arb_gpu_clk_rate() - Get the current rate of GPU clock frequency value
+ * @kbdev - kbase_device pointer
+ * @index - GPU clock index
+ *
+ * Returns the GPU clock frequency value saved when gpu is granted from arbiter
+ */
+static unsigned long get_arb_gpu_clk_rate(struct kbase_device *kbdev,
+		void *gpu_clk_handle)
+{
+	uint32_t freq;
+	struct kbase_arbiter_freq *arb_dev_freq =
+			(struct kbase_arbiter_freq *) gpu_clk_handle;
+
+	mutex_lock(&arb_dev_freq->arb_freq_lock);
+	freq = arb_dev_freq->arb_freq;
+	mutex_unlock(&arb_dev_freq->arb_freq_lock);
+	return freq;
+}
+
+struct kbase_clk_rate_trace_op_conf arb_clk_rate_trace_ops = {
+	.get_gpu_clk_rate = get_arb_gpu_clk_rate,
+	.enumerate_gpu_clk = enumerate_arb_gpu_clk,
+	.gpu_clk_notifier_register = NULL,
+	.gpu_clk_notifier_unregister = NULL
+};
diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h
index 3c49eb1948c5..0f74b631c76e 100644
--- a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h
+++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h
@@ -1,28 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- *//* SPDX-License-Identifier: GPL-2.0 */
-
-/*
- *
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -38,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /**
@@ -116,10 +93,18 @@ void kbase_arbiter_pm_early_term(struct kbase_device *kbdev);
  * kbase_arbiter_pm_release_interrupts() - Release the GPU interrupts
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
  *
- * Releases interrupts if needed (GPU is available) otherwise does nothing
+ * Releases interrupts and set the interrupt flag to false
  */
 void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev);
 
+/**
+ * kbase_arbiter_pm_install_interrupts() - Install the GPU interrupts
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Install interrupts and set the interrupt_install flag to true.
+ */
+int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev);
+
 /**
  * kbase_arbiter_pm_vm_event() - Dispatch VM event to the state machine
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
@@ -156,4 +141,42 @@ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev,
  */
 void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev);
 
+/**
+ * kbase_arbiter_set_max_config() - Set the max config data in kbase device.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer).
+ * @max_l2_slices: The maximum number of L2 slices.
+ * @max_core_mask: The largest core mask.
+ *
+ * This function handles a stop event for the VM.
+ * It will update the VM state and forward the stop event to the driver.
+ */
+void kbase_arbiter_set_max_config(struct kbase_device *kbdev,
+				  uint32_t max_l2_slices,
+				  uint32_t max_core_mask);
+
+/**
+ * kbase_arbiter_pm_gpu_assigned() - Determine if this VM has access to the GPU
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 if the VM does not have access, 1 if it does, and a negative number
+ * if an error occurred
+ */
+int kbase_arbiter_pm_gpu_assigned(struct kbase_device *kbdev);
+
+extern struct kbase_clk_rate_trace_op_conf arb_clk_rate_trace_ops;
+
+/**
+ * struct kbase_arbiter_freq - Holding the GPU clock frequency data retrieved
+ * from arbiter
+ * @arb_freq:                 GPU clock frequency value
+ * @arb_freq_lock:            Mutex protecting access to arbfreq value
+ */
+struct kbase_arbiter_freq {
+	uint32_t arb_freq;
+	struct mutex arb_freq_lock;
+};
+
+void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq,
+		uint32_t freq);
+
 #endif /*_MALI_KBASE_ARBITER_PM_H_ */
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/Kbuild b/drivers/gpu/arm/bifrost/backend/gpu/Kbuild
index b48ab4c51875..928766d65632 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/Kbuild
+++ b/drivers/gpu/arm/bifrost/backend/gpu/Kbuild
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 #
-# (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,15 +16,12 @@
 # along with this program; if not, you can access it online at
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
-# SPDX-License-Identifier: GPL-2.0
-#
 #
 
 BACKEND += \
 	backend/gpu/mali_kbase_cache_policy_backend.c \
 	backend/gpu/mali_kbase_gpuprops_backend.c \
 	backend/gpu/mali_kbase_irq_linux.c \
-	backend/gpu/mali_kbase_instr_backend.c \
 	backend/gpu/mali_kbase_js_backend.c \
 	backend/gpu/mali_kbase_pm_backend.c \
 	backend/gpu/mali_kbase_pm_driver.c \
@@ -40,6 +38,7 @@ ifeq ($(MALI_USE_CSF),1)
 # empty
 else
 	BACKEND += \
+		backend/gpu/mali_kbase_instr_backend.c \
 		backend/gpu/mali_kbase_jm_as.c \
 		backend/gpu/mali_kbase_debug_job_fault_backend.c \
 		backend/gpu/mali_kbase_jm_hw.c \
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_backend_config.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_backend_config.h
index 4a61f96c8c7d..a6bbdbbc150c 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_backend_config.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_backend_config.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.c
index 4e07a3f9d83f..fee946478667 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2014-2016, 2018, 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include "backend/gpu/mali_kbase_cache_policy_backend.h"
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.h
index f78ada74f605..53b9667cc669 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016, 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,16 +17,13 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
 #ifndef _KBASE_CACHE_POLICY_BACKEND_H_
 #define _KBASE_CACHE_POLICY_BACKEND_H_
 
 #include "mali_kbase.h"
-#include "mali_base_kernel.h"
+#include <uapi/gpu/arm/bifrost/mali_base_kernel.h>
 
 /**
   * kbase_cache_set_coherency_mode() - Sets the system coherency mode
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.c
index 187d7d6f6926..7076ab4f73ee 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
@@ -40,6 +39,38 @@
 #define CLK_RATE_TRACE_OPS (NULL)
 #endif
 
+/**
+ * get_clk_rate_trace_callbacks() - Returns pointer to clk trace ops.
+ * @kbdev: Pointer to kbase device, used to check if arbitration is enabled
+ *         when compiled with arbiter support.
+ * Return: Pointer to clk trace ops if supported or NULL.
+ */
+static struct kbase_clk_rate_trace_op_conf *
+get_clk_rate_trace_callbacks(struct kbase_device *kbdev __maybe_unused)
+{
+	/* base case */
+	struct kbase_clk_rate_trace_op_conf *callbacks =
+		(struct kbase_clk_rate_trace_op_conf *)CLK_RATE_TRACE_OPS;
+#if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF)
+	const void *arbiter_if_node;
+
+	if (WARN_ON(!kbdev) || WARN_ON(!kbdev->dev))
+		return callbacks;
+
+	arbiter_if_node =
+		of_get_property(kbdev->dev->of_node, "arbiter_if", NULL);
+	/* Arbitration enabled, override the callback pointer.*/
+	if (arbiter_if_node)
+		callbacks = &arb_clk_rate_trace_ops;
+	else
+		dev_dbg(kbdev->dev,
+			"Arbitration supported but disabled by platform. Leaving clk rate callbacks as default.\n");
+
+#endif
+
+	return callbacks;
+}
+
 static int gpu_clk_rate_change_notifier(struct notifier_block *nb,
 			unsigned long event, void *data)
 {
@@ -70,12 +101,13 @@ static int gpu_clk_rate_change_notifier(struct notifier_block *nb,
 static int gpu_clk_data_init(struct kbase_device *kbdev,
 		void *gpu_clk_handle, unsigned int index)
 {
-	struct kbase_clk_rate_trace_op_conf *callbacks =
-		(struct kbase_clk_rate_trace_op_conf *)CLK_RATE_TRACE_OPS;
+	struct kbase_clk_rate_trace_op_conf *callbacks;
 	struct kbase_clk_data *clk_data;
 	struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm;
 	int ret = 0;
 
+	callbacks = get_clk_rate_trace_callbacks(kbdev);
+
 	if (WARN_ON(!callbacks) ||
 	    WARN_ON(!gpu_clk_handle) ||
 	    WARN_ON(index >= BASE_MAX_NR_CLOCKS_REGULATORS))
@@ -109,8 +141,9 @@ static int gpu_clk_data_init(struct kbase_device *kbdev,
 	clk_data->clk_rate_change_nb.notifier_call =
 			gpu_clk_rate_change_notifier;
 
-	ret = callbacks->gpu_clk_notifier_register(kbdev, gpu_clk_handle,
-			&clk_data->clk_rate_change_nb);
+	if (callbacks->gpu_clk_notifier_register)
+		ret = callbacks->gpu_clk_notifier_register(kbdev,
+				gpu_clk_handle, &clk_data->clk_rate_change_nb);
 	if (ret) {
 		dev_err(kbdev->dev, "Failed to register notifier for clock enumerated at index %u", index);
 		kfree(clk_data);
@@ -121,19 +154,22 @@ static int gpu_clk_data_init(struct kbase_device *kbdev,
 
 int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev)
 {
-	struct kbase_clk_rate_trace_op_conf *callbacks =
-		(struct kbase_clk_rate_trace_op_conf *)CLK_RATE_TRACE_OPS;
+	struct kbase_clk_rate_trace_op_conf *callbacks;
 	struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm;
 	unsigned int i;
 	int ret = 0;
 
-	/* Return early if no callbacks provided for clock rate tracing */
-	if (!callbacks)
-		return 0;
+	callbacks = get_clk_rate_trace_callbacks(kbdev);
 
 	spin_lock_init(&clk_rtm->lock);
 	INIT_LIST_HEAD(&clk_rtm->listeners);
 
+	/* Return early if no callbacks provided for clock rate tracing */
+	if (!callbacks) {
+		WRITE_ONCE(clk_rtm->clk_rate_trace_ops, NULL);
+		return 0;
+	}
+
 	clk_rtm->gpu_idle = true;
 
 	for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) {
@@ -151,10 +187,12 @@ int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev)
 	/* Activate clock rate trace manager if at least one GPU clock was
 	 * enumerated.
 	 */
-	if (i)
+	if (i) {
 		WRITE_ONCE(clk_rtm->clk_rate_trace_ops, callbacks);
-	else
+	} else {
 		dev_info(kbdev->dev, "No clock(s) available for rate tracing");
+		WRITE_ONCE(clk_rtm->clk_rate_trace_ops, NULL);
+	}
 
 	return 0;
 
@@ -183,9 +221,10 @@ void kbase_clk_rate_trace_manager_term(struct kbase_device *kbdev)
 		if (!clk_rtm->clks[i])
 			break;
 
-		clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister(
-				kbdev, clk_rtm->clks[i]->gpu_clk_handle,
-				&clk_rtm->clks[i]->clk_rate_change_nb);
+		if (clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister)
+			clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister
+			(kbdev, clk_rtm->clks[i]->gpu_clk_handle,
+			&clk_rtm->clks[i]->clk_rate_change_nb);
 		kfree(clk_rtm->clks[i]);
 	}
 
@@ -284,4 +323,3 @@ void kbase_clk_rate_trace_manager_notify_all(
 	}
 }
 KBASE_EXPORT_TEST_API(kbase_clk_rate_trace_manager_notify_all);
-
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.h
index dcafb26ea4c0..ba250bec4491 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_clk_rate_trace_mgr.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,17 +17,15 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_CLK_RATE_TRACE_MGR_
 #define _KBASE_CLK_RATE_TRACE_MGR_
 
-/** The index of top clock domain in kbase_clk_rate_trace_manager:clks. */
+/* The index of top clock domain in kbase_clk_rate_trace_manager:clks. */
 #define KBASE_CLOCK_DOMAIN_TOP (0)
 
-/** The index of shader-cores clock domain in
+/* The index of shader-cores clock domain in
  * kbase_clk_rate_trace_manager:clks.
  */
 #define KBASE_CLOCK_DOMAIN_SHADER_CORES (1)
@@ -139,7 +138,7 @@ static inline void kbase_clk_rate_trace_manager_unsubscribe(
  *                                             rate listeners.
  *
  * @clk_rtm:     Clock rate manager instance.
- * @clk_index:   Clock index.
+ * @clock_index:   Clock index.
  * @new_rate:    New clock frequency(Hz)
  *
  * kbase_clk_rate_trace_manager:lock must be locked.
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_debug_job_fault_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_debug_job_fault_backend.c
index 3aadcb04160c..b5ca79db5fad 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_debug_job_fault_backend.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_debug_job_fault_backend.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2012-2015, 2018-2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <mali_kbase.h>
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c
index ff561d180247..1381f992364b 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
@@ -32,20 +33,8 @@
 #endif
 
 #include <linux/version.h>
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
 #include <linux/pm_opp.h>
-#else /* Linux >= 3.13 */
-/* In 3.13 the OPP include header file, types, and functions were all
- * renamed. Use the old filename for the include, and define the new names to
- * the old, when an old kernel is detected.
- */
-#include <linux/opp.h>
-#define dev_pm_opp opp
-#define dev_pm_opp_get_voltage opp_get_voltage
-#define dev_pm_opp_get_opp_count opp_get_opp_count
-#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil
-#define dev_pm_opp_find_freq_floor opp_find_freq_floor
-#endif /* Linux >= 3.13 */
+
 #include <soc/rockchip/rockchip_ipa.h>
 #include <soc/rockchip/rockchip_opp_select.h>
 #include <soc/rockchip/rockchip_system_monitor.h>
@@ -59,22 +48,46 @@ static struct monitor_dev_profile mali_mdevp = {
 };
 
 /**
- * opp_translate - Translate nominal OPP frequency from devicetree into real
- *                 frequency and core mask
- * @kbdev:     Device pointer
- * @freq:      Nominal frequency
- * @volt:      Nominal voltage
- * @core_mask: Pointer to u64 to store core mask to
- * @freqs:     Pointer to array of frequencies
- * @volts:     Pointer to array of voltages
+ * get_voltage() - Get the voltage value corresponding to the nominal frequency
+ *                 used by devfreq.
+ * @kbdev:    Device pointer
+ * @freq:     Nominal frequency in Hz passed by devfreq.
  *
- * This function will only perform translation if an operating-points-v2-mali
- * table is present in devicetree. If one is not present then it will return an
- * untranslated frequency and all cores enabled.
+ * This function will be called only when the opp table which is compatible with
+ * "operating-points-v2-mali", is not present in the devicetree for GPU device.
+ *
+ * Return: Voltage value in milli volts, 0 in case of error.
  */
-static void opp_translate(struct kbase_device *kbdev, unsigned long freq,
-			  unsigned long volt, u64 *core_mask,
-			  unsigned long *freqs, unsigned long *volts)
+static unsigned long get_voltage(struct kbase_device *kbdev, unsigned long freq)
+{
+	struct dev_pm_opp *opp;
+	unsigned long voltage = 0;
+
+#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE
+	rcu_read_lock();
+#endif
+
+	opp = dev_pm_opp_find_freq_exact(kbdev->dev, freq, true);
+
+	if (IS_ERR_OR_NULL(opp))
+		dev_err(kbdev->dev, "Failed to get opp (%ld)\n", PTR_ERR(opp));
+	else {
+		voltage = dev_pm_opp_get_voltage(opp);
+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
+		dev_pm_opp_put(opp);
+#endif
+	}
+
+#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE
+	rcu_read_unlock();
+#endif
+
+	/* Return the voltage in milli volts */
+	return voltage / 1000;
+}
+
+void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq,
+	u64 *core_mask, unsigned long *freqs, unsigned long *volts)
 {
 	unsigned int i;
 
@@ -95,13 +108,16 @@ static void opp_translate(struct kbase_device *kbdev, unsigned long freq,
 	}
 
 	/* If failed to find OPP, return all cores enabled
-	 * and nominal frequency
+	 * and nominal frequency and the corresponding voltage.
 	 */
 	if (i == kbdev->num_opps) {
+		unsigned long voltage = get_voltage(kbdev, freq);
+
 		*core_mask = kbdev->gpu_props.props.raw_props.shader_present;
+
 		for (i = 0; i < kbdev->nr_clocks; i++) {
 			freqs[i] = freq;
-			volts[i] = volt;
+			volts[i] = voltage;
 		}
 	}
 }
@@ -120,12 +136,12 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
 
 	nominal_freq = *target_freq;
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE
 	rcu_read_lock();
 #endif
 	opp = devfreq_recommended_opp(dev, &nominal_freq, flags);
 	if (IS_ERR_OR_NULL(opp)) {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE
 		rcu_read_unlock();
 #endif
 		dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp));
@@ -135,12 +151,15 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
 	rcu_read_unlock();
 #endif
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
 	dev_pm_opp_put(opp);
 #endif
 
-	opp_translate(kbdev, nominal_freq, nominal_volt, &core_mask, freqs,
-		      volts);
+	kbase_devfreq_opp_translate(kbdev,
+				    nominal_freq,
+				    &core_mask,
+				    freqs,
+				    volts);
 
 	/*
 	 * Only update if there is a change of frequency
@@ -168,6 +187,7 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
 #endif
 		return 0;
 	}
+
 	dev_dbg(dev, "%lu-->%lu\n", kbdev->current_nominal_freq, nominal_freq);
 
 #ifdef CONFIG_REGULATOR
@@ -285,6 +305,10 @@ kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat)
 	stat->current_frequency = kbdev->current_nominal_freq;
 	stat->private_data = NULL;
 
+#if MALI_USE_CSF && defined CONFIG_DEVFREQ_THERMAL
+	kbase_ipa_reset_data(kbdev);
+#endif
+
 	return 0;
 }
 
@@ -296,11 +320,11 @@ static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev,
 	unsigned long freq;
 	struct dev_pm_opp *opp;
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE
 	rcu_read_lock();
 #endif
 	count = dev_pm_opp_get_opp_count(kbdev->dev);
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE
 	rcu_read_unlock();
 #endif
 	if (count < 0)
@@ -311,20 +335,20 @@ static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev,
 	if (!dp->freq_table)
 		return -ENOMEM;
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE
 	rcu_read_lock();
 #endif
 	for (i = 0, freq = ULONG_MAX; i < count; i++, freq--) {
 		opp = dev_pm_opp_find_freq_floor(kbdev->dev, &freq);
 		if (IS_ERR(opp))
 			break;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
 		dev_pm_opp_put(opp);
-#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) */
+#endif /* KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE */
 
 		dp->freq_table[i] = freq;
 	}
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE
 	rcu_read_unlock();
 #endif
 
@@ -407,7 +431,7 @@ static void kbasep_devfreq_read_suspend_clock(struct kbase_device *kbdev,
 
 static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev)
 {
-#if KERNEL_VERSION(3, 18, 0) > LINUX_VERSION_CODE || !defined(CONFIG_OF)
+#ifndef CONFIG_OF
 	/* OPP table initialization requires at least the capability to get
 	 * regulators and clocks from the device tree, as well as parsing
 	 * arrays of unsigned integer values.
@@ -541,11 +565,9 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev)
 	kbdev->num_opps = i;
 
 	return 0;
-#endif /* KERNEL_VERSION(3, 18, 0) > LINUX_VERSION_CODE */
+#endif /* CONFIG_OF */
 }
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
-
 static const char *kbase_devfreq_req_type_name(enum kbase_devfreq_work_type type)
 {
 	const char *p;
@@ -602,12 +624,9 @@ static void kbase_devfreq_suspend_resume_worker(struct work_struct *work)
 	}
 }
 
-#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
-
 void kbase_devfreq_enqueue_work(struct kbase_device *kbdev,
 				       enum kbase_devfreq_work_type work_type)
 {
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
 	unsigned long flags;
 
 	WARN_ON(work_type == DEVFREQ_WORK_NONE);
@@ -617,12 +636,10 @@ void kbase_devfreq_enqueue_work(struct kbase_device *kbdev,
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 	dev_dbg(kbdev->dev, "Enqueuing devfreq req: %s\n",
 		kbase_devfreq_req_type_name(work_type));
-#endif
 }
 
 static int kbase_devfreq_work_init(struct kbase_device *kbdev)
 {
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
 	kbdev->devfreq_queue.req_type = DEVFREQ_WORK_NONE;
 	kbdev->devfreq_queue.acted_type = DEVFREQ_WORK_RESUME;
 
@@ -632,15 +649,12 @@ static int kbase_devfreq_work_init(struct kbase_device *kbdev)
 
 	INIT_WORK(&kbdev->devfreq_queue.work,
 			kbase_devfreq_suspend_resume_worker);
-#endif
 	return 0;
 }
 
 static void kbase_devfreq_work_term(struct kbase_device *kbdev)
 {
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
 	destroy_workqueue(kbdev->devfreq_queue.workq);
-#endif
 }
 
 static unsigned long kbase_devfreq_get_static_power(struct devfreq *devfreq,
@@ -661,9 +675,9 @@ int kbase_devfreq_init(struct kbase_device *kbdev)
 	struct devfreq_cooling_power *kbase_dcp = &kbase_cooling_power;
 	struct device_node *np = kbdev->dev->of_node;
 	struct devfreq_dev_profile *dp;
+	int err;
 	struct dev_pm_opp *opp;
 	unsigned long opp_rate;
-	int err;
 	unsigned int i;
 
 	if (kbdev->nr_clocks == 0) {
@@ -726,7 +740,8 @@ int kbase_devfreq_init(struct kbase_device *kbdev)
 	}
 
 	/* devfreq_add_device only copies a few of kbdev->dev's fields, so
-	 * set drvdata explicitly so IPA models can access kbdev. */
+	 * set drvdata explicitly so IPA models can access kbdev.
+	 */
 	dev_set_drvdata(&kbdev->devfreq->dev, kbdev);
 
 	err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq);
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.h
index 7bcd47c70ef0..d1305d382990 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2014, 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2019-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _BASE_DEVFREQ_H_
@@ -44,4 +43,20 @@ void kbase_devfreq_force_freq(struct kbase_device *kbdev, unsigned long freq);
 void kbase_devfreq_enqueue_work(struct kbase_device *kbdev,
 				enum kbase_devfreq_work_type work_type);
 
+/**
+ * kbase_devfreq_opp_translate - Translate nominal OPP frequency from devicetree
+ *                               into real frequency & voltage pair, along with
+ *                               core mask
+ * @kbdev:     Device pointer
+ * @freq:      Nominal frequency
+ * @core_mask: Pointer to u64 to store core mask to
+ * @freqs:     Pointer to array of frequencies
+ * @volts:     Pointer to array of voltages
+ *
+ * This function will only perform translation if an operating-points-v2-mali
+ * table is present in devicetree. If one is not present then it will return an
+ * untranslated frequency (and corresponding voltage) and all cores enabled.
+ */
+void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq,
+	u64 *core_mask, unsigned long *freqs, unsigned long *volts);
 #endif /* _BASE_DEVFREQ_H_ */
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c
index 60ae0206d6a8..7542209ca247 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c
@@ -1,12 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -17,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
@@ -41,12 +39,13 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev,
 
 	registers.l2_features = kbase_reg_read(kbdev,
 				GPU_CONTROL_REG(L2_FEATURES));
+	registers.core_features = 0;
 #if !MALI_USE_CSF
+	/* TGOx */
 	registers.core_features = kbase_reg_read(kbdev,
 				GPU_CONTROL_REG(CORE_FEATURES));
 #else /* !MALI_USE_CSF */
-	registers.core_features = 0;
-#endif /* !MALI_USE_CSF */
+#endif /* MALI_USE_CSF */
 	registers.tiler_features = kbase_reg_read(kbdev,
 				GPU_CONTROL_REG(TILER_FEATURES));
 	registers.mem_features = kbase_reg_read(kbdev,
@@ -105,6 +104,16 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev,
 	registers.stack_present_hi = kbase_reg_read(kbdev,
 				GPU_CONTROL_REG(STACK_PRESENT_HI));
 
+	if (registers.gpu_id >= GPU_ID2_PRODUCT_MAKE(11, 8, 5, 2)) {
+		registers.gpu_features_lo = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(GPU_FEATURES_LO));
+		registers.gpu_features_hi = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(GPU_FEATURES_HI));
+	} else {
+		registers.gpu_features_lo = 0;
+		registers.gpu_features_hi = 0;
+	}
+
 	if (!kbase_is_gpu_removed(kbdev)) {
 		*regdump = registers;
 		return 0;
@@ -112,6 +121,32 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev,
 		return -EIO;
 }
 
+int kbase_backend_gpuprops_get_curr_config(struct kbase_device *kbdev,
+		struct kbase_current_config_regdump *curr_config_regdump)
+{
+	if (WARN_ON(!kbdev) || WARN_ON(!curr_config_regdump))
+		return -EINVAL;
+
+	curr_config_regdump->mem_features = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(MEM_FEATURES));
+
+	curr_config_regdump->shader_present_lo = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(SHADER_PRESENT_LO));
+	curr_config_regdump->shader_present_hi = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(SHADER_PRESENT_HI));
+
+	curr_config_regdump->l2_present_lo = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(L2_PRESENT_LO));
+	curr_config_regdump->l2_present_hi = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(L2_PRESENT_HI));
+
+	if (WARN_ON(kbase_is_gpu_removed(kbdev)))
+		return -EIO;
+
+	return 0;
+
+}
+
 int kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
 					struct kbase_gpuprops_regdump *regdump)
 {
@@ -147,11 +182,15 @@ int kbase_backend_gpuprops_get_l2_features(struct kbase_device *kbdev,
 	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) {
 		u32 l2_features = kbase_reg_read(kbdev,
 				GPU_CONTROL_REG(L2_FEATURES));
+		u32 l2_config =
+			kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG));
+
 
 		if (kbase_is_gpu_removed(kbdev))
 			return -EIO;
 
 		regdump->l2_features = l2_features;
+		regdump->l2_config = l2_config;
 	}
 
 	return 0;
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c
index 54b07483dee6..edc2f69e47f7 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /*
  * GPU backend instrumentation APIs.
  */
@@ -39,9 +36,7 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
 {
 	unsigned long flags;
 	int err = -EINVAL;
-#if !MALI_USE_CSF
 	u32 irq_mask;
-#endif
 	u32 prfcnt_config;
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -58,12 +53,10 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
 		goto out_err;
 	}
 
-#if !MALI_USE_CSF
 	/* Enable interrupt */
 	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
 						PRFCNT_SAMPLE_COMPLETED);
-#endif
 
 	/* In use, this context is the owner */
 	kbdev->hwcnt.kctx = kctx;
@@ -75,36 +68,13 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
 
 	/* Configure */
 	prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
-#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS
-	if (kbdev->hwcnt.backend.use_secondary_override)
+#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS
+	prfcnt_config |= kbdev->hwcnt.backend.override_counter_set
+			 << PRFCNT_CONFIG_SETSELECT_SHIFT;
 #else
-	if (enable->use_secondary)
+	prfcnt_config |= enable->counter_set << PRFCNT_CONFIG_SETSELECT_SHIFT;
 #endif
-		prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
 
-#if MALI_USE_CSF
-	kbase_reg_write(kbdev, GPU_CONTROL_MCU_REG(PRFCNT_CONFIG),
-			prfcnt_config | PRFCNT_CONFIG_MODE_OFF);
-
-	kbase_reg_write(kbdev, GPU_CONTROL_MCU_REG(PRFCNT_BASE_LO),
-					enable->dump_buffer & 0xFFFFFFFF);
-	kbase_reg_write(kbdev, GPU_CONTROL_MCU_REG(PRFCNT_BASE_HI),
-					enable->dump_buffer >> 32);
-
-	kbase_reg_write(kbdev, GPU_CONTROL_MCU_REG(PRFCNT_CSHW_EN),
-					enable->fe_bm);
-
-	kbase_reg_write(kbdev, GPU_CONTROL_MCU_REG(PRFCNT_SHADER_EN),
-					enable->shader_bm);
-	kbase_reg_write(kbdev, GPU_CONTROL_MCU_REG(PRFCNT_MMU_L2_EN),
-					enable->mmu_l2_bm);
-
-	kbase_reg_write(kbdev, GPU_CONTROL_MCU_REG(PRFCNT_TILER_EN),
-					enable->tiler_bm);
-
-	kbase_reg_write(kbdev, GPU_CONTROL_MCU_REG(PRFCNT_CONFIG),
-			prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL);
-#else
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
 			prfcnt_config | PRFCNT_CONFIG_MODE_OFF);
 
@@ -126,7 +96,6 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
 
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
 			prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL);
-#endif
 
 	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
 
@@ -138,7 +107,7 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
 
 	err = 0;
 
-	dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
+	dev_dbg(kbdev->dev, "HW counters dumping set-up for context %pK", kctx);
 	return err;
  out_err:
 	return err;
@@ -148,9 +117,7 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
 {
 	unsigned long flags, pm_flags;
 	int err = -EINVAL;
-#if !MALI_USE_CSF
 	u32 irq_mask;
-#endif
 	struct kbase_device *kbdev = kctx->kbdev;
 
 	while (1) {
@@ -185,10 +152,6 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
 	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
 	kbdev->hwcnt.backend.triggered = 0;
 
-#if MALI_USE_CSF
-	/* Disable the counters */
-	kbase_reg_write(kbdev, GPU_CONTROL_MCU_REG(PRFCNT_CONFIG), 0);
-#else
 	/* Disable interrupt */
 	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
@@ -196,7 +159,6 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
 
 	/* Disable the counters */
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0);
-#endif
 
 	kbdev->hwcnt.kctx = NULL;
 	kbdev->hwcnt.addr = 0ULL;
@@ -205,11 +167,10 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
 	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
 
-	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
+	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %pK",
 									kctx);
 
 	err = 0;
-
  out:
 	return err;
 }
@@ -229,7 +190,8 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
 
 	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) {
 		/* HW counters are disabled or another dump is ongoing, or we're
-		 * resetting */
+		 * resetting
+		 */
 		goto unlock;
 	}
 
@@ -239,44 +201,26 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
 	 */
 	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING;
 
-
-#if MALI_USE_CSF
-	/* Reconfigure the dump address */
-	kbase_reg_write(kbdev, GPU_CONTROL_MCU_REG(PRFCNT_BASE_LO),
-					kbdev->hwcnt.addr & 0xFFFFFFFF);
-	kbase_reg_write(kbdev, GPU_CONTROL_MCU_REG(PRFCNT_BASE_HI),
-					kbdev->hwcnt.addr >> 32);
-#else
 	/* Reconfigure the dump address */
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
 					kbdev->hwcnt.addr & 0xFFFFFFFF);
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
 					kbdev->hwcnt.addr >> 32);
-#endif
 
 	/* Start dumping */
 	KBASE_KTRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL,
 			kbdev->hwcnt.addr);
 
-#if MALI_USE_CSF
-	kbase_reg_write(kbdev, GPU_CONTROL_MCU_REG(GPU_COMMAND),
-					GPU_COMMAND_PRFCNT_SAMPLE);
-#else
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
 					GPU_COMMAND_PRFCNT_SAMPLE);
-#endif
 
-	dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx);
+	dev_dbg(kbdev->dev, "HW counters dumping done for context %pK", kctx);
 
 	err = 0;
 
  unlock:
 	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
 
-#if MALI_USE_CSF
-	tasklet_schedule(&kbdev->hwcnt.backend.csf_hwc_irq_poll_tasklet);
-#endif
-
 	return err;
 }
 KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump);
@@ -305,86 +249,6 @@ bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
 }
 KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete);
 
-void kbasep_cache_clean_worker(struct work_struct *data)
-{
-	struct kbase_device *kbdev;
-	unsigned long flags, pm_flags;
-
-	kbdev = container_of(data, struct kbase_device,
-						hwcnt.backend.cache_clean_work);
-
-	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
-	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
-
-	/* Clean and invalidate the caches so we're sure the mmu tables for the
-	 * dump buffer is valid.
-	 */
-	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
-					KBASE_INSTR_STATE_REQUEST_CLEAN);
-	kbase_gpu_start_cache_clean_nolock(kbdev);
-	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
-
-	kbase_gpu_wait_cache_clean(kbdev);
-
-	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
-	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
-					KBASE_INSTR_STATE_REQUEST_CLEAN);
-	/* All finished and idle */
-	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
-	kbdev->hwcnt.backend.triggered = 1;
-	wake_up(&kbdev->hwcnt.backend.wait);
-
-	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
-}
-
-#if MALI_USE_CSF
-/**
- * kbasep_hwcnt_irq_poll_tasklet - tasklet to poll MCU IRQ status register
- *
- * @data: tasklet parameter which pointer to kbdev
- *
- * This tasklet poll GPU_IRQ_STATUS register in GPU_CONTROL_MCU page to check
- * PRFCNT_SAMPLE_COMPLETED bit.
- *
- * Tasklet is needed here since work_queue is too slow and cuased some test
- * cases timeout, the poll_count variable is introduced to avoid infinite
- * loop in unexpected cases, the poll_count is 1 or 2 in normal case, 128
- * should be big enough to exit the tasklet in abnormal cases.
- *
- * Return: void
- */
-static void kbasep_hwcnt_irq_poll_tasklet(unsigned long int data)
-{
-	struct kbase_device *kbdev = (struct kbase_device *)data;
-	unsigned long flags, pm_flags;
-	u32 mcu_gpu_irq_raw_status = 0;
-	u32 poll_count = 0;
-
-	while (1) {
-		spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
-		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
-		mcu_gpu_irq_raw_status = kbase_reg_read(kbdev,
-			GPU_CONTROL_MCU_REG(GPU_IRQ_RAWSTAT));
-		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
-		if (mcu_gpu_irq_raw_status & PRFCNT_SAMPLE_COMPLETED) {
-			kbase_reg_write(kbdev,
-				GPU_CONTROL_MCU_REG(GPU_IRQ_CLEAR),
-				PRFCNT_SAMPLE_COMPLETED);
-			kbase_instr_hwcnt_sample_done(kbdev);
-			break;
-		} else if (poll_count++ > 128) {
-			dev_err(kbdev->dev,
-				"Err: HWC dump timeout, count: %u", poll_count);
-			/* Still call sample_done to unblock waiting thread */
-			kbase_instr_hwcnt_sample_done(kbdev);
-			break;
-		}
-	}
-}
-#endif
-
 void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
 {
 	unsigned long flags;
@@ -395,20 +259,10 @@ void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
 		kbdev->hwcnt.backend.triggered = 1;
 		wake_up(&kbdev->hwcnt.backend.wait);
 	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) {
-		if (kbdev->mmu_mode->flags & KBASE_MMU_MODE_HAS_NON_CACHEABLE) {
-			/* All finished and idle */
-			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
-			kbdev->hwcnt.backend.triggered = 1;
-			wake_up(&kbdev->hwcnt.backend.wait);
-		} else {
-			int ret;
-			/* Always clean and invalidate the cache after a successful dump
-			 */
-			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
-			ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
-						&kbdev->hwcnt.backend.cache_clean_work);
-			KBASE_DEBUG_ASSERT(ret);
-		}
+		/* All finished and idle */
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+		kbdev->hwcnt.backend.triggered = 1;
+		wake_up(&kbdev->hwcnt.backend.wait);
 	}
 
 	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
@@ -450,20 +304,16 @@ int kbase_instr_hwcnt_clear(struct kbase_context *kctx)
 	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
 
 	/* Check it's the context previously set up and we're not already
-	 * dumping */
+	 * dumping
+	 */
 	if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state !=
 							KBASE_INSTR_STATE_IDLE)
 		goto out;
 
 	/* Clear the counters */
 	KBASE_KTRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, 0);
-#if MALI_USE_CSF
-	kbase_reg_write(kbdev, GPU_CONTROL_MCU_REG(GPU_COMMAND),
-					GPU_COMMAND_PRFCNT_CLEAR);
-#else
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
 						GPU_COMMAND_PRFCNT_CLEAR);
-#endif
 
 	err = 0;
 
@@ -475,46 +325,45 @@ KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear);
 
 int kbase_instr_backend_init(struct kbase_device *kbdev)
 {
-	int ret = 0;
+	spin_lock_init(&kbdev->hwcnt.lock);
 
 	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
 
 	init_waitqueue_head(&kbdev->hwcnt.backend.wait);
-	INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work,
-						kbasep_cache_clean_worker);
-
-#if MALI_USE_CSF
-	tasklet_init(&kbdev->hwcnt.backend.csf_hwc_irq_poll_tasklet,
-		     kbasep_hwcnt_irq_poll_tasklet, (unsigned long int)kbdev);
-#endif
 
 	kbdev->hwcnt.backend.triggered = 0;
 
-#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS
-	kbdev->hwcnt.backend.use_secondary_override = false;
+#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS
+/* Use the build time option for the override default. */
+#if defined(CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY)
+	kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_SET_SECONDARY;
+#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY)
+	kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_SET_TERTIARY;
+#else
+	/* Default to primary */
+	kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_SET_PRIMARY;
 #endif
-
-	kbdev->hwcnt.backend.cache_clean_wq =
-			alloc_workqueue("Mali cache cleaning workqueue", 0, 1);
-	if (NULL == kbdev->hwcnt.backend.cache_clean_wq)
-		ret = -EINVAL;
-
-	return ret;
+#endif
+	return 0;
 }
 
 void kbase_instr_backend_term(struct kbase_device *kbdev)
 {
-#if MALI_USE_CSF
-	tasklet_kill(&kbdev->hwcnt.backend.csf_hwc_irq_poll_tasklet);
-#endif
-	destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq);
+	CSTD_UNUSED(kbdev);
 }
 
-#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS
+#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS
 void kbase_instr_backend_debugfs_init(struct kbase_device *kbdev)
 {
-	debugfs_create_bool("hwcnt_use_secondary", S_IRUGO | S_IWUSR,
-		kbdev->mali_debugfs_directory,
-		&kbdev->hwcnt.backend.use_secondary_override);
+	/* No validation is done on the debugfs input. Invalid input could cause
+	 * performance counter errors. This is acceptable since this is a debug
+	 * only feature and users should know what they are doing.
+	 *
+	 * Valid inputs are the values accepted bythe SET_SELECT bits of the
+	 * PRFCNT_CONFIG register as defined in the architecture specification.
+	*/
+	debugfs_create_u8("hwcnt_set_select", S_IRUGO | S_IWUSR,
+			  kbdev->mali_debugfs_directory,
+			  (u8 *)&kbdev->hwcnt.backend.override_counter_set);
 }
 #endif
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_defs.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_defs.h
index 9f785ce16e17..05d5193b9b1c 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_defs.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_defs.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2014, 2016, 2018, 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2016, 2018-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
@@ -27,6 +26,8 @@
 #ifndef _KBASE_INSTR_DEFS_H_
 #define _KBASE_INSTR_DEFS_H_
 
+#include "../../mali_kbase_hwcnt_gpu.h"
+
 /*
  * Instrumentation State Machine States
  */
@@ -37,8 +38,6 @@ enum kbase_instr_state {
 	KBASE_INSTR_STATE_IDLE,
 	/* Hardware is currently dumping a frame. */
 	KBASE_INSTR_STATE_DUMPING,
-	/* We've requested a clean to occur on a workqueue */
-	KBASE_INSTR_STATE_REQUEST_CLEAN,
 	/* An error has occured during DUMPING (page fault). */
 	KBASE_INSTR_STATE_FAULT
 };
@@ -47,17 +46,11 @@ enum kbase_instr_state {
 struct kbase_instr_backend {
 	wait_queue_head_t wait;
 	int triggered;
-#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS
-	bool use_secondary_override;
+#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS
+	enum kbase_hwcnt_physical_set override_counter_set;
 #endif
 
 	enum kbase_instr_state state;
-	struct workqueue_struct *cache_clean_wq;
-	struct work_struct  cache_clean_work;
-#if MALI_USE_CSF
-	struct tasklet_struct csf_hwc_irq_poll_tasklet;
-#endif
 };
 
 #endif /* _KBASE_INSTR_DEFS_H_ */
-
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_internal.h
index 2254b9f30d02..c0472fadb687 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_internal.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_internal.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2014, 2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2018, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /*
  * Backend-specific HW access instrumentation APIs
  */
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_internal.h
index ca3c048b637a..bad249c9ee03 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_internal.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_internal.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c
index b09db552e639..92aa4e481660 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2014-2016,2018-2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <mali_kbase.h>
@@ -215,20 +214,21 @@ int kbase_set_custom_irq_handler(struct kbase_device *kbdev,
 	int result = 0;
 	irq_handler_t requested_irq_handler = NULL;
 
-	KBASE_DEBUG_ASSERT((JOB_IRQ_HANDLER <= irq_type) &&
-						(GPU_IRQ_HANDLER >= irq_type));
+	KBASE_DEBUG_ASSERT((irq_type >= JOB_IRQ_HANDLER) &&
+			   (irq_type <= GPU_IRQ_HANDLER));
 
 	/* Release previous handler */
 	if (kbdev->irqs[irq_type].irq)
 		free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type));
 
-	requested_irq_handler = (NULL != custom_handler) ? custom_handler :
-						kbase_handler_table[irq_type];
+	requested_irq_handler = (custom_handler != NULL) ?
+					custom_handler :
+					kbase_handler_table[irq_type];
 
-	if (0 != request_irq(kbdev->irqs[irq_type].irq,
-			requested_irq_handler,
+	if (request_irq(kbdev->irqs[irq_type].irq, requested_irq_handler,
 			kbdev->irqs[irq_type].flags | IRQF_SHARED,
-			dev_name(kbdev->dev), kbase_tag(kbdev, irq_type))) {
+			dev_name(kbdev->dev),
+			kbase_tag(kbdev, irq_type)) != 0) {
 		result = -EINVAL;
 		dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
 					kbdev->irqs[irq_type].irq, irq_type);
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_as.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_as.c
index 9b775898dac2..57ff67b75cab 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_as.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_as.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,11 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
 /*
  * Register backend context / address space management
  */
@@ -190,8 +188,8 @@ int kbase_backend_find_and_release_free_address_space(
 			}
 
 			/* Context was retained while locks were dropped,
-			 * continue looking for free AS */
-
+			 * continue looking for free AS
+			 */
 			mutex_unlock(&js_devdata->runpool_mutex);
 			mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
 
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_defs.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_defs.h
index 9cccf224999e..97663c7d00eb 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_defs.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_defs.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2014-2016, 2018-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,11 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
 /*
  * Register-based HW access backend specific definitions
  */
@@ -78,9 +76,8 @@ struct slot_rb {
  * The hwaccess_lock (a spinlock) must be held when accessing this structure
  */
 struct kbase_backend_data {
-	struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS];
-
 #if !MALI_USE_CSF
+	struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS];
 	struct hrtimer scheduling_timer;
 
 	bool timer_running;
@@ -94,13 +91,16 @@ struct kbase_backend_data {
 /* kbase_prepare_to_reset_gpu has been called */
 #define KBASE_RESET_GPU_PREPARED        1
 /* kbase_reset_gpu has been called - the reset will now definitely happen
- * within the timeout period */
+ * within the timeout period
+ */
 #define KBASE_RESET_GPU_COMMITTED       2
 /* The GPU reset process is currently occuring (timeout has expired or
- * kbasep_try_reset_gpu_early was called) */
+ * kbasep_try_reset_gpu_early was called)
+ */
 #define KBASE_RESET_GPU_HAPPENING       3
 /* Reset the GPU silently, used when resetting the GPU as part of normal
- * behavior (e.g. when exiting protected mode). */
+ * behavior (e.g. when exiting protected mode).
+ */
 #define KBASE_RESET_GPU_SILENT          4
 	struct workqueue_struct *reset_workq;
 	struct work_struct reset_work;
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c
index 19661c9766c6..6acacf1eeed4 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
@@ -41,10 +40,12 @@
 #include <mali_kbase_regs_history_debugfs.h>
 
 static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev);
+static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev,
+				const u64 affinity, const u64 limited_core_mask);
 
 static u64 kbase_job_write_affinity(struct kbase_device *kbdev,
 				base_jd_core_req core_req,
-				int js)
+				int js, const u64 limited_core_mask)
 {
 	u64 affinity;
 
@@ -73,14 +74,21 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev,
 		 */
 		if (js == 2 && num_core_groups > 1)
 			affinity &= coherency_info->group[1].core_mask;
-		else
+		else if (num_core_groups > 1)
 			affinity &= coherency_info->group[0].core_mask;
+		else
+			affinity &= kbdev->gpu_props.curr_config.shader_present;
 	} else {
 		/* Use all cores */
 		affinity = kbdev->pm.backend.shaders_avail &
 				kbdev->pm.debug_core_mask[js];
 	}
 
+	if (core_req & BASE_JD_REQ_LIMITED_CORE_MASK) {
+		/* Limiting affinity due to BASE_JD_REQ_LIMITED_CORE_MASK by applying the limited core mask. */
+		affinity = kbasep_apply_limited_core_mask(kbdev, affinity, limited_core_mask);
+	}
+
 	if (unlikely(!affinity)) {
 #ifdef CONFIG_MALI_BIFROST_DEBUG
 		u64 shaders_ready =
@@ -90,6 +98,16 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev,
 #endif
 
 		affinity = kbdev->pm.backend.shaders_avail;
+
+		if (core_req & BASE_JD_REQ_LIMITED_CORE_MASK) {
+			/* Limiting affinity again to make sure it only enables shader cores with backed TLS memory. */
+			affinity = kbasep_apply_limited_core_mask(kbdev, affinity, limited_core_mask);
+
+#ifdef CONFIG_MALI_BIFROST_DEBUG
+			/* affinity should never be 0 */
+			WARN_ON(!affinity);
+#endif
+		}
 	}
 
 	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO),
@@ -170,7 +188,7 @@ static u64 select_job_chain(struct kbase_jd_atom *katom)
 	}
 
 	dev_dbg(kctx->kbdev->dev,
-		"Selected job chain 0x%llx for end atom %p in state %d\n",
+		"Selected job chain 0x%llx for end atom %pK in state %d\n",
 		jc, (void *)katom, (int)rp->state);
 
 	katom->jc = jc;
@@ -194,7 +212,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
 	/* Command register must be available */
 	KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx));
 
-	dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %p\n",
+	dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %pK\n",
 		jc_head, (void *)katom);
 
 	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO),
@@ -202,10 +220,12 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
 	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI),
 						jc_head >> 32);
 
-	affinity = kbase_job_write_affinity(kbdev, katom->core_req, js);
+	affinity = kbase_job_write_affinity(kbdev, katom->core_req, js,
+						kctx->limited_core_mask);
 
 	/* start MMU, medium priority, cache clean/flush on end, clean/flush on
-	 * start */
+	 * start
+	 */
 	cfg = kctx->as_nr;
 
 	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION) &&
@@ -257,7 +277,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
 	katom->start_timestamp = ktime_get();
 
 	/* GO ! */
-	dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx",
+	dev_dbg(kbdev->dev, "JS: Submitting atom %pK from ctx %pK to js[%d] with head=0x%llx",
 				katom, kctx, js, jc_head);
 
 	KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js,
@@ -331,7 +351,8 @@ static void kbasep_job_slot_update_head_start_timestamp(
 			/* Only update the timestamp if it's a better estimate
 			 * than what's currently stored. This is because our
 			 * estimate that accounts for the throttle time may be
-			 * too much of an overestimate */
+			 * too much of an overestimate
+			 */
 			katom->start_timestamp = end_timestamp;
 		}
 	}
@@ -374,9 +395,9 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
 		/* treat failed slots as finished slots */
 		u32 finished = (done & 0xFFFF) | failed;
 
-		/* Note: This is inherently unfair, as we always check
-		 * for lower numbered interrupts before the higher
-		 * numbered ones.*/
+		/* Note: This is inherently unfair, as we always check for lower
+		 * numbered interrupts before the higher numbered ones.
+		 */
 		i = ffs(finished) - 1;
 		KBASE_DEBUG_ASSERT(i >= 0);
 
@@ -388,7 +409,8 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
 
 			if (failed & (1u << i)) {
 				/* read out the job slot status code if the job
-				 * slot reported failure */
+				 * slot reported failure
+				 */
 				completion_code = kbase_reg_read(kbdev,
 					JOB_SLOT_REG(i, JS_STATUS));
 
@@ -402,7 +424,8 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
 
 					/* Soft-stopped job - read the value of
 					 * JS<n>_TAIL so that the job chain can
-					 * be resumed */
+					 * be resumed
+					 */
 					job_tail = (u64)kbase_reg_read(kbdev,
 						JOB_SLOT_REG(i, JS_TAIL_LO)) |
 						((u64)kbase_reg_read(kbdev,
@@ -411,21 +434,26 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
 				} else if (completion_code ==
 						BASE_JD_EVENT_NOT_STARTED) {
 					/* PRLAM-10673 can cause a TERMINATED
-					 * job to come back as NOT_STARTED, but
-					 * the error interrupt helps us detect
-					 * it */
+					 * job to come back as NOT_STARTED,
+					 * but the error interrupt helps us
+					 * detect it
+					 */
 					completion_code =
 						BASE_JD_EVENT_TERMINATED;
 				}
 
 				kbase_gpu_irq_evict(kbdev, i, completion_code);
 
-				/* Some jobs that encounter a BUS FAULT may result in corrupted
-				 * state causing future jobs to hang. Reset GPU before
-				 * allowing any other jobs on the slot to continue. */
+				/* Some jobs that encounter a BUS FAULT may
+				 * result in corrupted state causing future
+				 * jobs to hang. Reset GPU before allowing
+				 * any other jobs on the slot to continue.
+				 */
 				if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_3076)) {
 					if (completion_code == BASE_JD_EVENT_JOB_BUS_FAULT) {
-						if (kbase_prepare_to_reset_gpu_locked(kbdev))
+						if (kbase_prepare_to_reset_gpu_locked(
+							    kbdev,
+							    RESET_FLAGS_NONE))
 							kbase_reset_gpu_locked(kbdev);
 					}
 				}
@@ -483,7 +511,8 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
 
 				if ((rawstat >> (i + 16)) & 1) {
 					/* There is a failed job that we've
-					 * missed - add it back to active */
+					 * missed - add it back to active
+					 */
 					active |= (1u << i);
 				}
 			}
@@ -585,7 +614,8 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
 		}
 
 		/* We are about to issue a soft stop, so mark the atom as having
-		 * been soft stopped */
+		 * been soft stopped
+		 */
 		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPED;
 
 		/* Mark the point where we issue the soft-stop command */
@@ -781,7 +811,7 @@ static int softstop_start_rp_nolock(
 
 	if (!(katom->core_req & BASE_JD_REQ_START_RENDERPASS)) {
 		dev_dbg(kctx->kbdev->dev,
-			"Atom %p on job slot is not start RP\n", (void *)katom);
+			"Atom %pK on job slot is not start RP\n", (void *)katom);
 		return -EPERM;
 	}
 
@@ -794,13 +824,13 @@ static int softstop_start_rp_nolock(
 		rp->state != KBASE_JD_RP_RETRY))
 		return -EINVAL;
 
-	dev_dbg(kctx->kbdev->dev, "OOM in state %d with region %p\n",
+	dev_dbg(kctx->kbdev->dev, "OOM in state %d with region %pK\n",
 		(int)rp->state, (void *)reg);
 
 	if (WARN_ON(katom != rp->start_katom))
 		return -EINVAL;
 
-	dev_dbg(kctx->kbdev->dev, "Adding region %p to list %p\n",
+	dev_dbg(kctx->kbdev->dev, "Adding region %pK to list %pK\n",
 		(void *)reg, (void *)&rp->oom_reg_list);
 	list_move_tail(&reg->link, &rp->oom_reg_list);
 	dev_dbg(kctx->kbdev->dev, "Added region to list\n");
@@ -845,9 +875,9 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
 	if (timeout != 0)
 		goto exit;
 
-	if (kbase_prepare_to_reset_gpu(kbdev)) {
+	if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) {
 		dev_err(kbdev->dev,
-			"Issueing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n",
+			"Issuing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n",
 			ZAP_TIMEOUT);
 		kbase_reset_gpu(kbdev);
 	}
@@ -855,7 +885,7 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
 	/* Wait for the reset to complete */
 	kbase_reset_gpu_wait(kbdev);
 exit:
-	dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx);
+	dev_dbg(kbdev->dev, "Zap: Finished Context %pK", kctx);
 
 	/* Ensure that the signallers of the waitqs have finished */
 	mutex_lock(&kctx->jctx.lock);
@@ -916,7 +946,7 @@ KBASE_EXPORT_TEST_API(kbase_job_slot_term);
 void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
 			struct kbase_jd_atom *target_katom, u32 sw_flags)
 {
-	dev_dbg(kbdev->dev, "Soft-stop atom %p with flags 0x%x (s:%d)\n",
+	dev_dbg(kbdev->dev, "Soft-stop atom %pK with flags 0x%x (s:%d)\n",
 		target_katom, sw_flags, js);
 
 	KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK));
@@ -1020,6 +1050,33 @@ void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
 	}
 }
 
+int kbase_reset_gpu_prevent_and_wait(struct kbase_device *kbdev)
+{
+	WARN(true, "%s Not implemented for JM GPUs", __func__);
+	return -EINVAL;
+}
+
+int kbase_reset_gpu_try_prevent(struct kbase_device *kbdev)
+{
+	WARN(true, "%s Not implemented for JM GPUs", __func__);
+	return -EINVAL;
+}
+
+void kbase_reset_gpu_allow(struct kbase_device *kbdev)
+{
+	WARN(true, "%s Not implemented for JM GPUs", __func__);
+}
+
+void kbase_reset_gpu_assert_prevented(struct kbase_device *kbdev)
+{
+	WARN(true, "%s Not implemented for JM GPUs", __func__);
+}
+
+void kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device *kbdev)
+{
+	WARN(true, "%s Not implemented for JM GPUs", __func__);
+}
+
 static void kbase_debug_dump_registers(struct kbase_device *kbdev)
 {
 	int i;
@@ -1086,13 +1143,15 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
 
 	/* Make sure the timer has completed - this cannot be done from
 	 * interrupt context, so this cannot be done within
-	 * kbasep_try_reset_gpu_early. */
+	 * kbasep_try_reset_gpu_early.
+	 */
 	hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer);
 
 	if (kbase_pm_context_active_handle_suspend(kbdev,
 				KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
 		/* This would re-activate the GPU. Since it's already idle,
-		 * there's no need to reset it */
+		 * there's no need to reset it
+		 */
 		atomic_set(&kbdev->hwaccess.backend.reset_gpu,
 						KBASE_RESET_GPU_NOT_PENDING);
 		kbase_disjoint_state_down(kbdev);
@@ -1113,14 +1172,16 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
 	kbdev->irq_reset_flush = true;
 
 	/* Disable IRQ to avoid IRQ handlers to kick in after releasing the
-	 * spinlock; this also clears any outstanding interrupts */
+	 * spinlock; this also clears any outstanding interrupts
+	 */
 	kbase_pm_disable_interrupts_nolock(kbdev);
 
 	spin_unlock(&kbdev->mmu_mask_change);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	/* Ensure that any IRQ handlers have finished
-	 * Must be done without any locks IRQ handlers will take */
+	 * Must be done without any locks IRQ handlers will take
+	 */
 	kbase_synchronize_irqs(kbdev);
 
 	/* Flush out any in-flight work items */
@@ -1131,7 +1192,8 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
 
 	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8463)) {
 		/* Ensure that L2 is not transitioning when we send the reset
-		 * command */
+		 * command
+		 */
 		while (--max_loops && kbase_pm_get_trans_cores(kbdev,
 				KBASE_PM_CORE_L2))
 			;
@@ -1146,14 +1208,16 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
 	/* All slot have been soft-stopped and we've waited
 	 * SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we
 	 * assume that anything that is still left on the GPU is stuck there and
-	 * we'll kill it when we reset the GPU */
+	 * we'll kill it when we reset the GPU
+	 */
 
 	if (!silent)
 		dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)",
 								RESET_TIMEOUT);
 
 	/* Output the state of some interesting registers to help in the
-	 * debugging of GPU resets */
+	 * debugging of GPU resets
+	 */
 	if (!silent)
 		kbase_debug_dump_registers(kbdev);
 
@@ -1192,7 +1256,8 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
 	kbase_pm_update_cores_state(kbdev);
 
 	/* Synchronously request and wait for those cores, because if
-	 * instrumentation is enabled it would need them immediately. */
+	 * instrumentation is enabled it would need them immediately.
+	 */
 	kbase_pm_wait_for_desired_state(kbdev);
 
 	mutex_unlock(&kbdev->pm.lock);
@@ -1269,7 +1334,8 @@ static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev)
 
 	/* Check that the reset has been committed to (i.e. kbase_reset_gpu has
 	 * been called), and that no other thread beat this thread to starting
-	 * the reset */
+	 * the reset
+	 */
 	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
 			KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) !=
 						KBASE_RESET_GPU_COMMITTED) {
@@ -1293,6 +1359,7 @@ static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev)
 /**
  * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU
  * @kbdev: kbase device
+ * @flags: Bitfield indicating impact of reset (see flag defines)
  *
  * This function just soft-stops all the slots to ensure that as many jobs as
  * possible are saved.
@@ -1303,10 +1370,12 @@ static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev)
  *   false - Another thread is performing a reset, kbase_reset_gpu should
  *   not be called.
  */
-bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev)
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev,
+				       unsigned int flags)
 {
 	int i;
 
+	CSTD_UNUSED(flags);
 	KBASE_DEBUG_ASSERT(kbdev);
 
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
@@ -1334,14 +1403,14 @@ bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev)
 	return true;
 }
 
-bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev)
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags)
 {
-	unsigned long flags;
+	unsigned long lock_flags;
 	bool ret;
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	ret = kbase_prepare_to_reset_gpu_locked(kbdev);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, lock_flags);
+	ret = kbase_prepare_to_reset_gpu_locked(kbdev, flags);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, lock_flags);
 
 	return ret;
 }
@@ -1362,7 +1431,8 @@ void kbase_reset_gpu(struct kbase_device *kbdev)
 	KBASE_DEBUG_ASSERT(kbdev);
 
 	/* Note this is an assert/atomic_set because it is a software issue for
-	 * a race to be occuring here */
+	 * a race to be occurring here
+	 */
 	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
 						KBASE_RESET_GPU_PREPARED);
 	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
@@ -1385,7 +1455,8 @@ void kbase_reset_gpu_locked(struct kbase_device *kbdev)
 	KBASE_DEBUG_ASSERT(kbdev);
 
 	/* Note this is an assert/atomic_set because it is a software issue for
-	 * a race to be occuring here */
+	 * a race to be occurring here
+	 */
 	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
 						KBASE_RESET_GPU_PREPARED);
 	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
@@ -1460,3 +1531,21 @@ void kbase_reset_gpu_term(struct kbase_device *kbdev)
 {
 	destroy_workqueue(kbdev->hwaccess.backend.reset_workq);
 }
+
+static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev,
+				const u64 affinity, const u64 limited_core_mask)
+{
+	const u64 result = affinity & limited_core_mask;
+
+#ifdef CONFIG_MALI_BIFROST_DEBUG
+	dev_dbg(kbdev->dev,
+				"Limiting affinity due to BASE_JD_REQ_LIMITED_CORE_MASK from 0x%lx to 0x%lx (mask is 0x%lx)\n",
+				(unsigned long int)affinity,
+				(unsigned long int)result,
+				(unsigned long int)limited_core_mask);
+#else
+	CSTD_UNUSED(kbdev);
+#endif
+
+	return result;
+}
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h
index cd1f9794fdc4..6761cbd9a32a 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
  * (C) COPYRIGHT 2011-2016, 2018-2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /*
  * Job Manager backend-specific low-level APIs.
  */
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c
index afaaef27883d..5fdf9b63d263 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,11 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
 /*
  * Register-based HW access backend specific APIs
  */
@@ -40,10 +38,12 @@
 #include <backend/gpu/mali_kbase_pm_internal.h>
 
 /* Return whether the specified ringbuffer is empty. HW access lock must be
- * held */
+ * held
+ */
 #define SLOT_RB_EMPTY(rb)   (rb->write_idx == rb->read_idx)
 /* Return number of atoms currently in the specified ringbuffer. HW access lock
- * must be held */
+ * must be held
+ */
 #define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx)
 
 static void kbase_gpu_release_atom(struct kbase_device *kbdev,
@@ -284,7 +284,8 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev,
 		kbase_kinstr_jm_atom_hw_release(katom);
 		/* Inform power management at start/finish of atom so it can
 		 * update its GPU utilisation metrics. Mark atom as not
-		 * submitted beforehand. */
+		 * submitted beforehand.
+		 */
 		katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
 		kbase_pm_metrics_update(kbdev, end_timestamp);
 
@@ -544,7 +545,8 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev,
 		KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev, kbdev);
 		/* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV
 		 * should ensure that we are not already transitiong, and that
-		 * there are no atoms currently on the GPU. */
+		 * there are no atoms currently on the GPU.
+		 */
 		WARN_ON(kbdev->protected_mode_transition);
 		WARN_ON(kbase_gpu_atoms_submitted_any(kbdev));
 		/* If hwcnt is disabled, it means we didn't clean up correctly
@@ -570,19 +572,15 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev,
 
 		/* We couldn't disable atomically, so kick off a worker */
 		if (!kbdev->protected_mode_hwcnt_disabled) {
-#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE
-			queue_work(system_wq,
+			kbase_hwcnt_context_queue_work(
+				kbdev->hwcnt_gpu_ctx,
 				&kbdev->protected_mode_hwcnt_disable_work);
-#else
-			queue_work(system_highpri_wq,
-				&kbdev->protected_mode_hwcnt_disable_work);
-#endif
 			return -EAGAIN;
 		}
 
-		/* Once reaching this point GPU must be
-		 * switched to protected mode or hwcnt
-		 * re-enabled. */
+		/* Once reaching this point GPU must be switched to protected
+		 * mode or hwcnt re-enabled.
+		 */
 
 		if (kbase_pm_protected_entry_override_enable(kbdev))
 			return -EAGAIN;
@@ -722,7 +720,8 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
 		KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(kbdev, kbdev);
 		/* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV
 		 * should ensure that we are not already transitiong, and that
-		 * there are no atoms currently on the GPU. */
+		 * there are no atoms currently on the GPU.
+		 */
 		WARN_ON(kbdev->protected_mode_transition);
 		WARN_ON(kbase_gpu_atoms_submitted_any(kbdev));
 
@@ -768,8 +767,8 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
 			katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
 			kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
 			/* Only return if head atom or previous atom
-			 * already removed - as atoms must be returned
-			 * in order */
+			 * already removed - as atoms must be returned in order
+			 */
 			if (idx == 0 || katom[0]->gpu_rb_state ==
 					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
 				kbase_gpu_dequeue_atom(kbdev, js, NULL);
@@ -912,12 +911,14 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
 					kbase_gpu_mark_atom_for_return(kbdev,
 							katom[idx]);
 					/* Set EVENT_DONE so this atom will be
-					   completed, not unpulled. */
+					 * completed, not unpulled.
+					 */
 					katom[idx]->event_code =
 						BASE_JD_EVENT_DONE;
 					/* Only return if head atom or previous
 					 * atom already removed - as atoms must
-					 * be returned in order. */
+					 * be returned in order.
+					 */
 					if (idx == 0 ||	katom[0]->gpu_rb_state ==
 							KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
 						kbase_gpu_dequeue_atom(kbdev, js, NULL);
@@ -948,7 +949,8 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
 
 				if (idx == 1) {
 					/* Only submit if head atom or previous
-					 * atom already submitted */
+					 * atom already submitted
+					 */
 					if ((katom[0]->gpu_rb_state !=
 						KBASE_ATOM_GPU_RB_SUBMITTED &&
 						katom[0]->gpu_rb_state !=
@@ -964,7 +966,8 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
 				}
 
 				/* If inter-slot serialization in use then don't
-				 * submit atom if any other slots are in use */
+				 * submit atom if any other slots are in use
+				 */
 				if ((kbdev->serialize_jobs &
 						KBASE_SERIALIZE_INTER_SLOT) &&
 						other_slots_busy(kbdev, js))
@@ -976,7 +979,8 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
 					break;
 #endif
 				/* Check if this job needs the cycle counter
-				 * enabled before submission */
+				 * enabled before submission
+				 */
 				if (katom[idx]->core_req & BASE_JD_REQ_PERMON)
 					kbase_pm_request_gpu_cycle_counter_l2_is_on(
 									kbdev);
@@ -987,7 +991,8 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
 
 				/* Inform power management at start/finish of
 				 * atom so it can update its GPU utilisation
-				 * metrics. */
+				 * metrics.
+				 */
 				kbase_pm_metrics_update(kbdev,
 						&katom[idx]->start_timestamp);
 
@@ -1000,7 +1005,8 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
 			case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
 				/* Only return if head atom or previous atom
 				 * already removed - as atoms must be returned
-				 * in order */
+				 * in order
+				 */
 				if (idx == 0 || katom[0]->gpu_rb_state ==
 					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
 					kbase_gpu_dequeue_atom(kbdev, js, NULL);
@@ -1018,7 +1024,7 @@ void kbase_backend_run_atom(struct kbase_device *kbdev,
 				struct kbase_jd_atom *katom)
 {
 	lockdep_assert_held(&kbdev->hwaccess_lock);
-	dev_dbg(kbdev->dev, "Backend running atom %p\n", (void *)katom);
+	dev_dbg(kbdev->dev, "Backend running atom %pK\n", (void *)katom);
 
 	kbase_gpu_enqueue_atom(kbdev, katom);
 	kbase_backend_slot_update(kbdev);
@@ -1079,7 +1085,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
 	struct kbase_context *kctx = katom->kctx;
 
 	dev_dbg(kbdev->dev,
-		"Atom %p completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n",
+		"Atom %pK completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n",
 		(void *)katom, completion_code, job_tail, js);
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -1103,7 +1109,8 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
 		 * BASE_JD_REQ_SKIP_CACHE_END is set, the GPU cache is not
 		 * flushed. To prevent future evictions causing possible memory
 		 * corruption we need to flush the cache manually before any
-		 * affected memory gets reused. */
+		 * affected memory gets reused.
+		 */
 		katom->need_cache_flush_cores_retained = true;
 	}
 
@@ -1184,7 +1191,8 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
 					katom_idx1->gpu_rb_state !=
 					KBASE_ATOM_GPU_RB_SUBMITTED) {
 				/* Can not dequeue this atom yet - will be
-				 * dequeued when atom at idx0 completes */
+				 * dequeued when atom at idx0 completes
+				 */
 				katom_idx1->event_code = BASE_JD_EVENT_STOPPED;
 				kbase_gpu_mark_atom_for_return(kbdev,
 								katom_idx1);
@@ -1197,7 +1205,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
 	if (job_tail != 0 && job_tail != katom->jc) {
 		/* Some of the job has been executed */
 		dev_dbg(kbdev->dev,
-			"Update job chain address of atom %p to resume from 0x%llx\n",
+			"Update job chain address of atom %pK to resume from 0x%llx\n",
 			(void *)katom, job_tail);
 
 		katom->jc = job_tail;
@@ -1258,7 +1266,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
 
 	if (katom) {
 		dev_dbg(kbdev->dev,
-			"Cross-slot dependency %p has become runnable.\n",
+			"Cross-slot dependency %pK has become runnable.\n",
 			(void *)katom);
 
 		/* Check if there are lower priority jobs to soft stop */
@@ -1271,7 +1279,8 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
 	kbase_pm_update_state(kbdev);
 
 	/* Job completion may have unblocked other atoms. Try to update all job
-	 * slots */
+	 * slots
+	 */
 	kbase_backend_slot_update(kbdev);
 }
 
@@ -1322,7 +1331,8 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
 				katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
 				/* As the atom was not removed, increment the
 				 * index so that we read the correct atom in the
-				 * next iteration. */
+				 * next iteration.
+				 */
 				atom_idx++;
 				continue;
 			}
@@ -1425,7 +1435,8 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
 		katom_idx0_valid = (katom_idx0 == katom);
 		/* If idx0 is to be removed and idx1 is on the same context,
 		 * then idx1 must also be removed otherwise the atoms might be
-		 * returned out of order */
+		 * returned out of order
+		 */
 		if (katom_idx1)
 			katom_idx1_valid = (katom_idx1 == katom) ||
 						(katom_idx0_valid &&
@@ -1472,7 +1483,8 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
 				if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
 						JS_COMMAND_NEXT)) == 0) {
 					/* idx0 has already completed - stop
-					 * idx1 if needed*/
+					 * idx1 if needed
+					 */
 					if (katom_idx1_valid) {
 						kbase_gpu_stop_atom(kbdev, js,
 								katom_idx1,
@@ -1481,7 +1493,8 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
 					}
 				} else {
 					/* idx1 is in NEXT registers - attempt
-					 * to remove */
+					 * to remove
+					 */
 					kbase_reg_write(kbdev,
 							JOB_SLOT_REG(js,
 							JS_COMMAND_NEXT),
@@ -1496,7 +1509,8 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
 							JS_HEAD_NEXT_HI))
 									!= 0) {
 						/* idx1 removed successfully,
-						 * will be handled in IRQ */
+						 * will be handled in IRQ
+						 */
 						kbase_gpu_remove_atom(kbdev,
 								katom_idx1,
 								action, true);
@@ -1510,7 +1524,8 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
 						ret = true;
 					} else if (katom_idx1_valid) {
 						/* idx0 has already completed,
-						 * stop idx1 if needed */
+						 * stop idx1 if needed
+						 */
 						kbase_gpu_stop_atom(kbdev, js,
 								katom_idx1,
 								action);
@@ -1529,7 +1544,8 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
 				 * flow was also interrupted, and this function
 				 * might not enter disjoint state e.g. if we
 				 * don't actually do a hard stop on the head
-				 * atom */
+				 * atom
+				 */
 				kbase_gpu_stop_atom(kbdev, js, katom_idx0,
 									action);
 				ret = true;
@@ -1557,7 +1573,8 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
 				ret = true;
 			} else {
 				/* idx1 is in NEXT registers - attempt to
-				 * remove */
+				 * remove
+				 */
 				kbase_reg_write(kbdev, JOB_SLOT_REG(js,
 							JS_COMMAND_NEXT),
 							JS_COMMAND_NOP);
@@ -1567,13 +1584,15 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
 				    kbase_reg_read(kbdev, JOB_SLOT_REG(js,
 						JS_HEAD_NEXT_HI)) != 0) {
 					/* idx1 removed successfully, will be
-					 * handled in IRQ once idx0 completes */
+					 * handled in IRQ once idx0 completes
+					 */
 					kbase_gpu_remove_atom(kbdev, katom_idx1,
 									action,
 									false);
 				} else {
 					/* idx0 has already completed - stop
-					 * idx1 */
+					 * idx1
+					 */
 					kbase_gpu_stop_atom(kbdev, js,
 								katom_idx1,
 								action);
@@ -1647,7 +1666,7 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev)
 
 			if (katom)
 				dev_info(kbdev->dev,
-				"  js%d idx%d : katom=%p gpu_rb_state=%d\n",
+				"  js%d idx%d : katom=%pK gpu_rb_state=%d\n",
 				js, idx, katom, katom->gpu_rb_state);
 			else
 				dev_info(kbdev->dev, "  js%d idx%d : empty\n",
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.h
index c3b9f2d85536..14da98143f74 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,11 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
 /*
  * Register-based HW access backend specific APIs
  */
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c
index 8187e73767be..0940bccccddd 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,11 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
 /*
  * Register-based HW access backend specific job scheduler APIs
  */
@@ -48,7 +46,8 @@ static inline bool timer_callback_should_run(struct kbase_device *kbdev)
 
 	/* nr_contexts_pullable is updated with the runpool_mutex. However, the
 	 * locking in the caller gives us a barrier that ensures
-	 * nr_contexts_pullable is up-to-date for reading */
+	 * nr_contexts_pullable is up-to-date for reading
+	 */
 	nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable);
 
 #ifdef CONFIG_MALI_BIFROST_DEBUG
@@ -114,7 +113,8 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
 
 		if (atom != NULL) {
 			/* The current version of the model doesn't support
-			 * Soft-Stop */
+			 * Soft-Stop
+			 */
 			if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) {
 				u32 ticks = atom->ticks++;
 
@@ -142,7 +142,8 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
 				 * new soft_stop timeout. This ensures that
 				 * atoms do not miss any of the timeouts due to
 				 * races between this worker and the thread
-				 * changing the timeouts. */
+				 * changing the timeouts.
+				 */
 				if (backend->timeouts_updated &&
 						ticks > soft_stop_ticks)
 					ticks = atom->ticks = soft_stop_ticks;
@@ -172,10 +173,11 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
 					 *
 					 * Similarly, if it's about to be
 					 * decreased, the last job from another
-					 * context has already finished, so it's
-					 * not too bad that we observe the older
-					 * value and register a disjoint event
-					 * when we try soft-stopping */
+					 * context has already finished, so
+					 * it's not too bad that we observe the
+					 * older value and register a disjoint
+					 * event when we try soft-stopping
+					 */
 					if (js_devdata->nr_user_contexts_running
 							>= disjoint_threshold)
 						softstop_flags |=
@@ -253,9 +255,9 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
 		}
 	}
 	if (reset_needed) {
-		dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issueing GPU soft-reset to resolve.");
+		dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issuing GPU soft-reset to resolve.");
 
-		if (kbase_prepare_to_reset_gpu_locked(kbdev))
+		if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE))
 			kbase_reset_gpu_locked(kbdev);
 	}
 	/* the timer is re-issued if there is contexts in the run-pool */
@@ -287,11 +289,12 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 		backend->timer_running = false;
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-		/* From now on, return value of timer_callback_should_run() will
-		 * also cause the timer to not requeue itself. Its return value
-		 * cannot change, because it depends on variables updated with
-		 * the runpool_mutex held, which the caller of this must also
-		 * hold */
+		/* From now on, return value of timer_callback_should_run()
+		 * will also cause the timer to not requeue itself. Its return
+		 * value cannot change, because it depends on variables updated
+		 * with the runpool_mutex held, which the caller of this must
+		 * also hold
+		 */
 		hrtimer_cancel(&backend->scheduling_timer);
 	}
 
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_internal.h
index 6576e55d2e39..e15528d069ac 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_internal.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_internal.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,11 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
 /*
  * Register-based HW access backend specific job scheduler APIs
  */
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.c
index d5526caa5899..884cbd6326f1 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.c
@@ -6,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -17,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <mali_kbase.h>
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.h
index 0c779ac80d27..2dfeadb178c2 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.h
@@ -1,31 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- *//* SPDX-License-Identifier: GPL-2.0 */
-/*
  * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.c
index e33fe0b8e415..66fb24d27b86 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2010-2015, 2018-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2015, 2018-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /*
  * "Always on" power management policy
  */
@@ -62,6 +59,9 @@ const struct kbase_pm_policy kbase_pm_always_on_policy_ops = {
 	always_on_shaders_needed,	/* shaders_needed */
 	always_on_get_core_active,	/* get_core_active */
 	KBASE_PM_POLICY_ID_ALWAYS_ON,	/* id */
+#if MALI_USE_CSF
+	ALWAYS_ON_PM_SCHED_FLAGS,	/* pm_sched_flags */
+#endif
 };
 
 KBASE_EXPORT_TEST_API(kbase_pm_always_on_policy_ops);
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.h
index e7927cf82e5a..e500d46dc82d 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2011-2015,2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2015, 2018, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /*
  * "Always on" power management policy
  */
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c
index 7b10d06c5fdb..a03078b33063 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c
@@ -1,11 +1,12 @@
- /*
+// SPDX-License-Identifier: GPL-2.0
+/*
  *
- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,11 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
 /*
  * GPU backend implementation of base kernel power management APIs
  */
@@ -156,15 +154,25 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
 #endif /* CONFIG_MALI_BIFROST_DEBUG */
 	init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait);
 
+#if !MALI_USE_CSF
 	/* Initialise the metrics subsystem */
 	ret = kbasep_pm_metrics_init(kbdev);
 	if (ret)
 		return ret;
+#else
+	mutex_init(&kbdev->pm.backend.policy_change_lock);
+	kbdev->pm.backend.policy_change_clamp_state_to_off = false;
+	/* Due to dependency on kbase_ipa_control, the metrics subsystem can't
+	 * be initialized here.
+	 */
+	CSTD_UNUSED(ret);
+#endif
 
 	init_waitqueue_head(&kbdev->pm.backend.reset_done_wait);
 	kbdev->pm.backend.reset_done = false;
 
 	init_waitqueue_head(&kbdev->pm.zero_active_count_wait);
+	init_waitqueue_head(&kbdev->pm.resume_wait);
 	kbdev->pm.active_count = 0;
 
 	spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock);
@@ -221,7 +229,9 @@ pm_state_machine_fail:
 	kbase_pm_policy_term(kbdev);
 	kbase_pm_ca_term(kbdev);
 workq_fail:
+#if !MALI_USE_CSF
 	kbasep_pm_metrics_term(kbdev);
+#endif
 	return -EINVAL;
 }
 
@@ -230,7 +240,8 @@ void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume)
 	lockdep_assert_held(&kbdev->pm.lock);
 
 	/* Turn clocks and interrupts on - no-op if we haven't done a previous
-	 * kbase_pm_clock_off() */
+	 * kbase_pm_clock_off()
+	 */
 	kbase_pm_clock_on(kbdev, is_resume);
 
 	if (!is_resume) {
@@ -248,7 +259,8 @@ void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume)
 	kbase_pm_update_cores_state(kbdev);
 
 	/* NOTE: We don't wait to reach the desired state, since running atoms
-	 * will wait for that state to be reached anyway */
+	 * will wait for that state to be reached anyway
+	 */
 }
 
 static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data)
@@ -486,7 +498,15 @@ static void kbase_pm_hwcnt_disable_worker(struct work_struct *data)
 		/* PM state was updated while we were doing the disable,
 		 * so we need to undo the disable we just performed.
 		 */
+#if MALI_USE_CSF
+		unsigned long lock_flags;
+
+		kbase_csf_scheduler_spin_lock(kbdev, &lock_flags);
+#endif
 		kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+#if MALI_USE_CSF
+		kbase_csf_scheduler_spin_unlock(kbdev, lock_flags);
+#endif
 	}
 
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
@@ -562,20 +582,35 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
 	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
 
 	/* Power up the GPU, don't enable IRQs as we are not ready to receive
-	 * them. */
+	 * them
+	 */
 	ret = kbase_pm_init_hw(kbdev, flags);
 	if (ret) {
 		kbase_pm_unlock(kbdev);
 		return ret;
 	}
-
+#if MALI_USE_CSF
+	kbdev->pm.debug_core_mask =
+		kbdev->gpu_props.props.raw_props.shader_present;
+	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+	/* Set the initial value for 'shaders_avail'. It would be later
+	 * modified only from the MCU state machine, when the shader core
+	 * allocation enable mask request has completed. So its value would
+	 * indicate the mask of cores that are currently being used by FW for
+	 * the allocation of endpoints requested by CSGs.
+	 */
+	kbdev->pm.backend.shaders_avail = kbase_pm_ca_get_core_mask(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+#else
 	kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] =
 			kbdev->pm.debug_core_mask[1] =
 			kbdev->pm.debug_core_mask[2] =
 			kbdev->gpu_props.props.raw_props.shader_present;
+#endif
 
 	/* Pretend the GPU is active to prevent a power policy turning the GPU
-	 * cores off */
+	 * cores off
+	 */
 	kbdev->pm.active_count = 1;
 
 	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
@@ -587,7 +622,8 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
 								irq_flags);
 
 	/* We are ready to receive IRQ's now as power policy is set up, so
-	 * enable them now. */
+	 * enable them now.
+	 */
 #ifdef CONFIG_MALI_BIFROST_DEBUG
 	kbdev->pm.backend.driver_ready_for_irqs = true;
 #endif
@@ -620,6 +656,8 @@ void kbase_hwaccess_pm_halt(struct kbase_device *kbdev)
 	mutex_lock(&kbdev->pm.lock);
 	kbase_pm_do_poweroff(kbdev);
 	mutex_unlock(&kbdev->pm.lock);
+
+	kbase_pm_wait_for_poweroff_complete(kbdev);
 }
 
 KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt);
@@ -634,10 +672,15 @@ void kbase_hwaccess_pm_term(struct kbase_device *kbdev)
 
 	if (kbdev->pm.backend.hwcnt_disabled) {
 		unsigned long flags;
-
+#if MALI_USE_CSF
+		kbase_csf_scheduler_spin_lock(kbdev, &flags);
+		kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+		kbase_csf_scheduler_spin_unlock(kbdev, flags);
+#else
 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 		kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+#endif
 	}
 
 	/* Free any resources the policy allocated */
@@ -645,8 +688,16 @@ void kbase_hwaccess_pm_term(struct kbase_device *kbdev)
 	kbase_pm_policy_term(kbdev);
 	kbase_pm_ca_term(kbdev);
 
+#if !MALI_USE_CSF
 	/* Shut down the metrics subsystem */
 	kbasep_pm_metrics_term(kbdev);
+#else
+	if (WARN_ON(mutex_is_locked(&kbdev->pm.backend.policy_change_lock))) {
+		mutex_lock(&kbdev->pm.backend.policy_change_lock);
+		mutex_unlock(&kbdev->pm.backend.policy_change_lock);
+	}
+	mutex_destroy(&kbdev->pm.backend.policy_change_lock);
+#endif
 
 	destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wait_wq);
 }
@@ -665,6 +716,17 @@ void kbase_pm_power_changed(struct kbase_device *kbdev)
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
 
+#if MALI_USE_CSF
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	kbdev->pm.debug_core_mask = new_core_mask;
+	kbase_pm_update_dynamic_cores_onoff(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbase_pm_set_debug_core_mask);
+#else
 void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
 		u64 new_core_mask_js0, u64 new_core_mask_js1,
 		u64 new_core_mask_js2)
@@ -685,6 +747,7 @@ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
 
 	kbase_pm_update_dynamic_cores_onoff(kbdev);
 }
+#endif /* MALI_USE_CSF */
 
 void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev)
 {
@@ -700,7 +763,8 @@ void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
 {
 	/* Force power off the GPU and all cores (regardless of policy), only
 	 * after the PM active count reaches zero (otherwise, we risk turning it
-	 * off prematurely) */
+	 * off prematurely)
+	 */
 	kbase_pm_lock(kbdev);
 
 	kbase_pm_do_poweroff(kbdev);
@@ -735,6 +799,7 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
 	kbase_backend_timer_resume(kbdev);
 #endif /* !MALI_USE_CSF */
 
+	wake_up_all(&kbdev->pm.resume_wait);
 	kbase_pm_unlock(kbdev);
 }
 
@@ -745,6 +810,9 @@ void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev)
 	ktime_t end_timestamp = ktime_get();
 	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
 
+	if (!kbdev->arb.arb_if)
+		return;
+
 	mutex_lock(&kbdev->pm.lock);
 	mutex_lock(&arb_vm_state->vm_state_lock);
 	if (kbdev->pm.backend.gpu_powered &&
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c
index 984e12503009..368f89dd0c61 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2013-2018, 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
@@ -59,6 +58,14 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
+#if MALI_USE_CSF
+	if (!(core_mask & kbdev->pm.debug_core_mask)) {
+		dev_err(kbdev->dev,
+			"OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n",
+			core_mask, kbdev->pm.debug_core_mask);
+		goto unlock;
+	}
+#else
 	if (!(core_mask & kbdev->pm.debug_core_mask_all)) {
 		dev_err(kbdev->dev, "OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n",
 				core_mask, kbdev->pm.debug_core_mask_all);
@@ -69,6 +76,7 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
 		dev_err(kbdev->dev, "Dynamic core scaling not supported as dummy job WA is enabled");
 		goto unlock;
 	}
+#endif /* MALI_USE_CSF */
 
 	pm_backend->ca_cores_enabled = core_mask;
 
@@ -80,21 +88,32 @@ unlock:
 	dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX\n",
 			pm_backend->ca_cores_enabled);
 }
+KBASE_EXPORT_TEST_API(kbase_devfreq_set_core_mask);
 #endif
 
 u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev)
 {
-#ifdef CONFIG_MALI_BIFROST_DEVFREQ
-	struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend;
+#if MALI_USE_CSF
+	u64 debug_core_mask = kbdev->pm.debug_core_mask;
+#else
+	u64 debug_core_mask = kbdev->pm.debug_core_mask_all;
 #endif
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
 #ifdef CONFIG_MALI_BIFROST_DEVFREQ
-	return pm_backend->ca_cores_enabled & kbdev->pm.debug_core_mask_all;
+	/*
+	 * Although in the init we let the pm_backend->ca_cores_enabled to be
+	 * the max config (it uses the base_gpu_props), at this function we need
+	 * to limit it to be a subgroup of the curr config, otherwise the
+	 * shaders state machine on the PM does not evolve.
+	 */
+	return kbdev->gpu_props.curr_config.shader_present &
+			kbdev->pm.backend.ca_cores_enabled &
+			debug_core_mask;
 #else
-	return kbdev->gpu_props.props.raw_props.shader_present &
-			kbdev->pm.debug_core_mask_all;
+	return kbdev->gpu_props.curr_config.shader_present &
+		debug_core_mask;
 #endif
 }
 
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.h
index 5423e96725b9..c20bf6f965ee 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2018, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca_devfreq.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca_devfreq.h
index f67ec650c981..d24bccd9e1a0 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca_devfreq.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca_devfreq.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_coarse_demand.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_coarse_demand.c
index 9eef44ad877f..dd8fad4f2384 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_coarse_demand.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_coarse_demand.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2012-2016, 2018-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016, 2018-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /*
  * "Coarse Demand" power management policy
  */
@@ -61,6 +58,9 @@ const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = {
 	coarse_demand_shaders_needed,		/* shaders_needed */
 	coarse_demand_get_core_active,		/* get_core_active */
 	KBASE_PM_POLICY_ID_COARSE_DEMAND,	/* id */
+#if MALI_USE_CSF
+	COARSE_ON_DEMAND_PM_SCHED_FLAGS,	/* pm_sched_flags */
+#endif
 };
 
 KBASE_EXPORT_TEST_API(kbase_pm_coarse_demand_policy_ops);
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_coarse_demand.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_coarse_demand.h
index 304e5d7fa32d..f1a0e3aab24e 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_coarse_demand.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_coarse_demand.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2012-2015,2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2015, 2018, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /*
  * "Coarse Demand" power management policy
  */
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h
index 7322c093c7b6..e7017ad14792 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
@@ -62,24 +61,9 @@ enum kbase_pm_core_type {
 	KBASE_PM_CORE_STACK = STACK_PRESENT_LO
 };
 
-/**
+/*
  * enum kbase_l2_core_state - The states used for the L2 cache & tiler power
  *                            state machine.
- *
- * @KBASE_L2_OFF: The L2 cache and tiler are off
- * @KBASE_L2_PEND_ON: The L2 cache and tiler are powering on
- * @KBASE_L2_RESTORE_CLOCKS: The GPU clock is restored. Conditionally used.
- * @KBASE_L2_ON_HWCNT_ENABLE: The L2 cache and tiler are on, and hwcnt is being
- *                            enabled
- * @KBASE_L2_ON: The L2 cache and tiler are on, and hwcnt is enabled
- * @KBASE_L2_ON_HWCNT_DISABLE: The L2 cache and tiler are on, and hwcnt is being
- *                             disabled
- * @KBASE_L2_SLOW_DOWN_CLOCKS: The GPU clock is set to appropriate or lowest
- *                             clock. Conditionally used.
- * @KBASE_L2_POWER_DOWN: The L2 cache and tiler are about to be powered off
- * @KBASE_L2_PEND_OFF: The L2 cache and tiler are powering off
- * @KBASE_L2_RESET_WAIT: The GPU is resetting, L2 cache and tiler power state
- *                       are unknown
  */
 enum kbase_l2_core_state {
 #define KBASEP_L2_STATE(n) KBASE_L2_ ## n,
@@ -88,24 +72,8 @@ enum kbase_l2_core_state {
 };
 
 #if MALI_USE_CSF
-/**
+/*
  * enum kbase_mcu_state - The states used for the MCU state machine.
- *
- * @KBASE_MCU_OFF:            The MCU is powered off.
- * @KBASE_MCU_PEND_ON_RELOAD: The warm boot of MCU or cold boot of MCU (with
- *                            firmware reloading) is in progress.
- * @KBASE_MCU_ON_GLB_REINIT_PEND: The MCU is enabled and Global configuration
- *                                requests have been sent to the firmware.
- * @KBASE_MCU_ON_HWCNT_ENABLE: The Global requests have completed and MCU is
- *                             now ready for use and hwcnt is being enabled.
- * @KBASE_MCU_ON:             The MCU is active and hwcnt has been enabled.
- * @KBASE_MCU_ON_HWCNT_DISABLE: The MCU is on and hwcnt is being disabled.
- * @KBASE_MCU_ON_HALT:        The MCU is on and hwcnt has been disabled,
- *                            MCU halt would be triggered.
- * @KBASE_MCU_ON_PEND_HALT:   MCU halt in progress, confirmation pending.
- * @KBASE_MCU_POWER_DOWN:     MCU halted operations, pending being disabled.
- * @KBASE_MCU_PEND_OFF:       MCU is being disabled, pending on powering off.
- * @KBASE_MCU_RESET_WAIT:     The GPU is resetting, MCU state is unknown.
  */
 enum kbase_mcu_state {
 #define KBASEP_MCU_STATE(n) KBASE_MCU_ ## n,
@@ -114,45 +82,8 @@ enum kbase_mcu_state {
 };
 #endif
 
-/**
+/*
  * enum kbase_shader_core_state - The states used for the shaders' state machine.
- *
- * @KBASE_SHADERS_OFF_CORESTACK_OFF: The shaders and core stacks are off
- * @KBASE_SHADERS_OFF_CORESTACK_PEND_ON: The shaders are off, core stacks have
- *                                       been requested to power on and hwcnt
- *                                       is being disabled
- * @KBASE_SHADERS_PEND_ON_CORESTACK_ON: Core stacks are on, shaders have been
- *                                      requested to power on. Or after doing
- *                                      partial shader on/off, checking whether
- *                                      it's the desired state.
- * @KBASE_SHADERS_ON_CORESTACK_ON: The shaders and core stacks are on, and hwcnt
- *					already enabled.
- * @KBASE_SHADERS_ON_CORESTACK_ON_RECHECK: The shaders and core stacks
- *                                      are on, hwcnt disabled, and checks
- *                                      to powering down or re-enabling
- *                                      hwcnt.
- * @KBASE_SHADERS_WAIT_OFF_CORESTACK_ON: The shaders have been requested to
- *                                       power off, but they remain on for the
- *                                       duration of the hysteresis timer
- * @KBASE_SHADERS_WAIT_GPU_IDLE: The shaders partial poweroff needs to reach
- *                               a state where jobs on the GPU are finished
- *                               including jobs currently running and in the
- *                               GPU queue because of GPU2017-861
- * @KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON: The hysteresis timer has expired
- * @KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON: The core stacks are on and the
- *                                          level 2 cache is being flushed.
- * @KBASE_SHADERS_READY_OFF_CORESTACK_ON: The core stacks are on and the shaders
- *                                        are ready to be powered off.
- * @KBASE_SHADERS_PEND_OFF_CORESTACK_ON: The core stacks are on, and the shaders
- *                                       have been requested to power off
- * @KBASE_SHADERS_OFF_CORESTACK_PEND_OFF: The shaders are off, and the core stacks
- *                                        have been requested to power off
- * @KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF: Shaders and corestacks are
- *                                                  off, but the tick timer
- *                                                  cancellation is still
- *                                                  pending.
- * @KBASE_SHADERS_RESET_WAIT: The GPU is resetting, shader and core stack power
- *                            states are unknown
  */
 enum kbase_shader_core_state {
 #define KBASEP_SHADER_STATE(n) KBASE_SHADERS_ ## n,
@@ -164,28 +95,40 @@ enum kbase_shader_core_state {
  * struct kbasep_pm_metrics - Metrics data collected for use by the power
  *                            management framework.
  *
- *  @time_busy: number of ns the GPU was busy executing jobs since the
- *          @time_period_start timestamp.
- *  @time_idle: number of ns since time_period_start the GPU was not executing
- *          jobs since the @time_period_start timestamp.
- *  @busy_cl: number of ns the GPU was busy executing CL jobs. Note that
- *           if two CL jobs were active for 400ns, this value would be updated
- *           with 800.
- *  @busy_gl: number of ns the GPU was busy executing GL jobs. Note that
- *           if two GL jobs were active for 400ns, this value would be updated
- *           with 800.
+ *  @time_busy: the amount of time the GPU was busy executing jobs since the
+ *          @time_period_start timestamp, in units of 256ns. This also includes
+ *          time_in_protm, the time spent in protected mode, since it's assumed
+ *          the GPU was busy 100% during this period.
+ *  @time_idle: the amount of time the GPU was not executing jobs since the
+ *              time_period_start timestamp, measured in units of 256ns.
+ *  @time_in_protm: The amount of time the GPU has spent in protected mode since
+ *                  the time_period_start timestamp, measured in units of 256ns.
+ *  @busy_cl: the amount of time the GPU was busy executing CL jobs. Note that
+ *           if two CL jobs were active for 256ns, this value would be updated
+ *           with 2 (2x256ns).
+ *  @busy_gl: the amount of time the GPU was busy executing GL jobs. Note that
+ *           if two GL jobs were active for 256ns, this value would be updated
+ *           with 2 (2x256ns).
  */
 struct kbasep_pm_metrics {
 	u32 time_busy;
 	u32 time_idle;
+#if MALI_USE_CSF
+	u32 time_in_protm;
+#else
 	u32 busy_cl[2];
 	u32 busy_gl;
+#endif
 };
 
 /**
  * struct kbasep_pm_metrics_state - State required to collect the metrics in
  *                                  struct kbasep_pm_metrics
  *  @time_period_start: time at which busy/idle measurements started
+ *  @ipa_control_client: Handle returned on registering DVFS as a
+ *                       kbase_ipa_control client
+ *  @skip_gpu_active_sanity_check: Decide whether to skip GPU_ACTIVE sanity
+ *                                 check in DVFS utilisation calculation
  *  @gpu_active: true when the GPU is executing jobs. false when
  *           not. Updated when the job scheduler informs us a job in submitted
  *           or removed from a GPU slot.
@@ -197,6 +140,7 @@ struct kbasep_pm_metrics {
  *  @values: The current values of the power management metrics. The
  *           kbase_pm_get_dvfs_metrics() function is used to compare these
  *           current values with the saved values from a previous invocation.
+ *  @initialized: tracks whether metrics_state has been initialized or not.
  *  @timer: timer to regularly make DVFS decisions based on the power
  *           management metrics.
  *  @timer_active: boolean indicating @timer is running
@@ -205,9 +149,14 @@ struct kbasep_pm_metrics {
  */
 struct kbasep_pm_metrics_state {
 	ktime_t time_period_start;
+#if MALI_USE_CSF
+	void *ipa_control_client;
+	bool skip_gpu_active_sanity_check;
+#else
 	bool gpu_active;
 	u32 active_cl_ctx[2];
 	u32 active_gl_ctx[3];
+#endif
 	spinlock_t lock;
 
 	void *platform_data;
@@ -216,6 +165,7 @@ struct kbasep_pm_metrics_state {
 	struct kbasep_pm_metrics values;
 
 #ifdef CONFIG_MALI_BIFROST_DVFS
+	bool initialized;
 	struct hrtimer timer;
 	bool timer_active;
 	struct kbasep_pm_metrics dvfs_last;
@@ -326,6 +276,8 @@ union kbase_pm_policy_data {
  * @callback_soft_reset: Optional callback to software reset the GPU. See
  *                       &struct kbase_pm_callback_conf
  * @ca_cores_enabled: Cores that are currently available
+ * @mcu_state: The current state of the micro-control unit, only applicable
+ *             to GPUs that have such a component
  * @l2_state:     The current state of the L2 cache state machine. See
  *                &enum kbase_l2_core_state
  * @l2_desired:   True if the L2 cache should be powered on by the L2 cache state
@@ -335,10 +287,10 @@ union kbase_pm_policy_data {
  * @shaders_avail: This is updated by the state machine when it is in a state
  *                 where it can write to the SHADER_PWRON or PWROFF registers
  *                 to have the same set of available cores as specified by
- *                 @shaders_desired_mask. So it would eventually have the same
- *                 value as @shaders_desired_mask and would precisely indicate
- *                 the cores that are currently available. This is internal to
- *                 shader state machine and should *not* be modified elsewhere.
+ *                 @shaders_desired_mask. So would precisely indicate the cores
+ *                 that are currently available. This is internal to shader
+ *                 state machine of JM GPUs and should *not* be modified
+ *                 elsewhere.
  * @shaders_desired_mask: This is updated by the state machine when it is in
  *                        a state where it can handle changes to the core
  *                        availability (either by DVFS or sysfs). This is
@@ -350,6 +302,16 @@ union kbase_pm_policy_data {
  *                   cores may be different, but there should be transitions in
  *                   progress that will eventually achieve this state (assuming
  *                   that the policy doesn't change its mind in the mean time).
+ * @mcu_desired: True if the micro-control unit should be powered on
+ * @policy_change_clamp_state_to_off: Signaling the backend is in PM policy
+ *                change transition, needs the mcu/L2 to be brought back to the
+ *                off state and remain in that state until the flag is cleared.
+ * @csf_pm_sched_flags: CSF Dynamic PM control flags in accordance to the
+ *                current active PM policy. This field is updated whenever a
+ *                new policy is activated.
+ * @policy_change_lock: Used to serialize the policy change calls. In CSF case,
+ *                      the change of policy may involve the scheduler to
+ *                      suspend running CSGs and then reconfigure the MCU.
  * @in_reset: True if a GPU is resetting and normal power manager operation is
  *            suspended
  * @partial_shaderoff: True if we want to partial power off shader cores,
@@ -440,9 +402,6 @@ struct kbase_pm_backend_data {
 	u64 ca_cores_enabled;
 
 #if MALI_USE_CSF
-	/* The current state of the micro-control unit, only applicable
-	 * to GPUs that has such a component
-	 */
 	enum kbase_mcu_state mcu_state;
 #endif
 	enum kbase_l2_core_state l2_state;
@@ -450,8 +409,10 @@ struct kbase_pm_backend_data {
 	u64 shaders_avail;
 	u64 shaders_desired_mask;
 #if MALI_USE_CSF
-	/* True if the micro-control unit should be powered on */
 	bool mcu_desired;
+	bool policy_change_clamp_state_to_off;
+	unsigned int csf_pm_sched_flags;
+	struct mutex policy_change_lock;
 #endif
 	bool l2_desired;
 	bool l2_always_on;
@@ -476,6 +437,23 @@ struct kbase_pm_backend_data {
 	struct work_struct gpu_clock_control_work;
 };
 
+#if MALI_USE_CSF
+/* CSF PM flag, signaling that the MCU CORE should be kept on */
+#define  CSF_DYNAMIC_PM_CORE_KEEP_ON (1 << 0)
+/* CSF PM flag, signaling no scheduler suspension on idle groups */
+#define CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE (1 << 1)
+/* CSF PM flag, signaling no scheduler suspension on no runnable groups */
+#define CSF_DYNAMIC_PM_SCHED_NO_SUSPEND (1 << 2)
+
+/* The following flags corresponds to existing defined PM policies */
+#define ALWAYS_ON_PM_SCHED_FLAGS (CSF_DYNAMIC_PM_CORE_KEEP_ON | \
+				  CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE | \
+				  CSF_DYNAMIC_PM_SCHED_NO_SUSPEND)
+#define COARSE_ON_DEMAND_PM_SCHED_FLAGS (0)
+#if !MALI_CUSTOMER_RELEASE
+#define ALWAYS_ON_DEMAND_PM_SCHED_FLAGS (CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE)
+#endif
+#endif
 
 /* List of policy IDs */
 enum kbase_pm_policy_id {
@@ -502,11 +480,15 @@ enum kbase_pm_policy_id {
  *                      necessarily the same as its index in the list returned
  *                      by kbase_pm_list_policies().
  *                      It is used purely for debugging.
+ * @pm_sched_flags: Policy associated with CSF PM scheduling operational flags.
+ *                  Pre-defined required flags exist for each of the
+ *                  ARM released policies, such as 'always_on', 'coarse_demand'
+ *                  and etc.
  */
 struct kbase_pm_policy {
 	char *name;
 
-	/**
+	/*
 	 * Function called when the policy is selected
 	 *
 	 * This should initialize the kbdev->pm.pm_policy_data structure. It
@@ -520,7 +502,7 @@ struct kbase_pm_policy {
 	 */
 	void (*init)(struct kbase_device *kbdev);
 
-	/**
+	/*
 	 * Function called when the policy is unselected.
 	 *
 	 * @kbdev: The kbase device structure for the device (must be a
@@ -528,7 +510,7 @@ struct kbase_pm_policy {
 	 */
 	void (*term)(struct kbase_device *kbdev);
 
-	/**
+	/*
 	 * Function called to find out if shader cores are needed
 	 *
 	 * This needs to at least satisfy kbdev->pm.backend.shaders_desired,
@@ -541,7 +523,7 @@ struct kbase_pm_policy {
 	 */
 	bool (*shaders_needed)(struct kbase_device *kbdev);
 
-	/**
+	/*
 	 * Function called to get the current overall GPU power state
 	 *
 	 * This function must meet or exceed the requirements for power
@@ -555,6 +537,15 @@ struct kbase_pm_policy {
 	bool (*get_core_active)(struct kbase_device *kbdev);
 
 	enum kbase_pm_policy_id id;
+
+#if MALI_USE_CSF
+	/* Policy associated with CSF PM scheduling operational flags.
+	 * There are pre-defined required flags exist for each of the
+	 * ARM released policies, such as 'always_on', 'coarse_demand'
+	 * and etc.
+	 */
+	unsigned int pm_sched_flags;
+#endif
 };
 
 #endif /* _KBASE_PM_HWACCESS_DEFS_H_ */
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c
index e9e30ebadc2d..6e742fb1137e 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c
@@ -1,12 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -17,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
@@ -32,9 +30,13 @@
 #include <mali_kbase_pm.h>
 #include <mali_kbase_config_defaults.h>
 #include <mali_kbase_smc.h>
-#if !MALI_USE_CSF
+
+#if MALI_USE_CSF
+#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
+#else
 #include <mali_kbase_hwaccess_jm.h>
 #endif /* !MALI_USE_CSF */
+
 #include <mali_kbase_reset_gpu.h>
 #include <mali_kbase_ctx_sched.h>
 #include <mali_kbase_hwcnt_context.h>
@@ -47,6 +49,9 @@
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
 #include <arbiter/mali_kbase_arbiter_pm.h>
 #endif /* CONFIG_MALI_ARBITER_SUPPORT */
+#if MALI_USE_CSF
+#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
+#endif
 
 #include <linux/of.h>
 
@@ -103,13 +108,13 @@ bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev)
 		return true;
 
 	/* MCU is supposed to be ON, only when scheduler.pm_active_count is
-	 * non zero. But for always_on policy also MCU needs to be ON.
-	 * GPUCORE-24926 will add the proper handling for always_on
-	 * power policy.
+	 * non zero. But for always_on policy, the MCU needs to be kept on,
+	 * unless policy changing transition needs it off.
 	 */
+
 	return (kbdev->pm.backend.mcu_desired &&
-		(kbdev->pm.backend.pm_current_policy ==
-		 &kbase_pm_always_on_policy_ops));
+		kbase_pm_no_mcu_core_pwroff(kbdev) &&
+		!kbdev->pm.backend.policy_change_clamp_state_to_off);
 }
 #endif
 
@@ -126,6 +131,11 @@ bool kbase_pm_is_l2_desired(struct kbase_device *kbdev)
 			!kbdev->pm.backend.shaders_desired)
 		return false;
 
+#if MALI_USE_CSF
+	if (kbdev->pm.backend.policy_change_clamp_state_to_off)
+		return false;
+#endif
+
 	return kbdev->pm.backend.l2_desired;
 }
 
@@ -257,7 +267,8 @@ static void mali_cci_flush_l2(struct kbase_device *kbdev)
 		GPU_CONTROL_REG(GPU_IRQ_RAWSTAT));
 
 	/* Wait for cache flush to complete before continuing, exit on
-	 * gpu resets or loop expiry. */
+	 * gpu resets or loop expiry.
+	 */
 	while (((raw & mask) == 0) && --loops) {
 		raw = kbase_reg_read(kbdev,
 					GPU_CONTROL_REG(GPU_IRQ_RAWSTAT));
@@ -396,9 +407,9 @@ u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
 
 	switch (type) {
 	case KBASE_PM_CORE_L2:
-		return kbdev->gpu_props.props.raw_props.l2_present;
+		return kbdev->gpu_props.curr_config.l2_present;
 	case KBASE_PM_CORE_SHADER:
-		return kbdev->gpu_props.props.raw_props.shader_present;
+		return kbdev->gpu_props.curr_config.shader_present;
 	case KBASE_PM_CORE_TILER:
 		return kbdev->gpu_props.props.raw_props.tiler_present;
 	case KBASE_PM_CORE_STACK:
@@ -492,14 +503,10 @@ static void kbase_pm_trigger_hwcnt_disable(struct kbase_device *kbdev)
 	 */
 	if (kbase_hwcnt_context_disable_atomic(kbdev->hwcnt_gpu_ctx)) {
 		backend->hwcnt_disabled = true;
+
 	} else {
-#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE
-		queue_work(system_wq,
-			&backend->hwcnt_disable_work);
-#else
-		queue_work(system_highpri_wq,
-			&backend->hwcnt_disable_work);
-#endif
+		kbase_hwcnt_context_queue_work(kbdev->hwcnt_gpu_ctx,
+					       &backend->hwcnt_disable_work);
 	}
 }
 
@@ -517,7 +524,8 @@ static void kbase_pm_l2_config_override(struct kbase_device *kbdev)
 	 * Skip if size and hash are not given explicitly,
 	 * which means default values are used.
 	 */
-	if ((kbdev->l2_size_override == 0) && (kbdev->l2_hash_override == 0))
+	if ((kbdev->l2_size_override == 0) && (kbdev->l2_hash_override == 0) &&
+	    (!kbdev->l2_hash_values_override))
 		return;
 
 	val = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG));
@@ -528,13 +536,25 @@ static void kbase_pm_l2_config_override(struct kbase_device *kbdev)
 	}
 
 	if (kbdev->l2_hash_override) {
+		WARN_ON(kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH));
 		val &= ~L2_CONFIG_HASH_MASK;
 		val |= (kbdev->l2_hash_override << L2_CONFIG_HASH_SHIFT);
+	} else if (kbdev->l2_hash_values_override) {
+		int i;
+
+		WARN_ON(!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH));
+		val &= ~L2_CONFIG_ASN_HASH_ENABLE_MASK;
+		val |= (0x1 << L2_CONFIG_ASN_HASH_ENABLE_SHIFT);
+
+		for (i = 0; i < ASN_HASH_COUNT; i++) {
+			dev_dbg(kbdev->dev, "Program 0x%x to ASN_HASH[%d]\n",
+				kbdev->l2_hash_values[i], i);
+			kbase_reg_write(kbdev, GPU_CONTROL_REG(ASN_HASH(i)),
+					kbdev->l2_hash_values[i]);
+		}
 	}
 
 	dev_dbg(kbdev->dev, "Program 0x%x to L2_CONFIG\n", val);
-
-	/* Write L2_CONFIG to override */
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_CONFIG), val);
 }
 
@@ -561,6 +581,35 @@ static const char *kbase_mcu_state_to_string(enum kbase_mcu_state state)
 		return strings[state];
 }
 
+static inline bool kbase_pm_handle_mcu_core_attr_update(struct kbase_device *kbdev)
+{
+	struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
+	bool timer_update;
+	bool core_mask_update;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ON(backend->mcu_state != KBASE_MCU_ON);
+
+	/* This function is only for cases where the MCU managing Cores, if
+	 * the firmware mode is with host control, do nothing here.
+	 */
+	if (unlikely(kbdev->csf.firmware_hctl_core_pwr))
+		return false;
+
+	core_mask_update =
+		backend->shaders_avail != backend->shaders_desired_mask;
+
+	timer_update = kbdev->csf.mcu_core_pwroff_dur_count !=
+			kbdev->csf.mcu_core_pwroff_reg_shadow;
+
+	if (core_mask_update || timer_update)
+		kbase_csf_firmware_update_core_attr(kbdev, timer_update,
+			core_mask_update, backend->shaders_desired_mask);
+
+	return (core_mask_update || timer_update);
+}
+
 static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
 {
 	struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
@@ -578,11 +627,20 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
 	}
 
 	do {
+		u64 shaders_trans = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_SHADER);
+		u64 shaders_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+
+		/* mask off ready from trans in case transitions finished
+		 * between the register reads
+		 */
+		shaders_trans &= ~shaders_ready;
+
 		prev_state = backend->mcu_state;
 
 		switch (backend->mcu_state) {
 		case KBASE_MCU_OFF:
 			if (kbase_pm_is_mcu_desired(kbdev) &&
+			    !backend->policy_change_clamp_state_to_off &&
 			    backend->l2_state == KBASE_L2_ON) {
 				kbase_csf_firmware_trigger_reload(kbdev);
 				backend->mcu_state = KBASE_MCU_PEND_ON_RELOAD;
@@ -591,35 +649,116 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
 
 		case KBASE_MCU_PEND_ON_RELOAD:
 			if (kbdev->csf.firmware_reloaded) {
-				kbase_csf_firmware_global_reinit(kbdev);
+				backend->shaders_desired_mask =
+					kbase_pm_ca_get_core_mask(kbdev);
+				kbase_csf_firmware_global_reinit(kbdev,
+					backend->shaders_desired_mask);
 				backend->mcu_state =
 					KBASE_MCU_ON_GLB_REINIT_PEND;
 			}
 			break;
 
 		case KBASE_MCU_ON_GLB_REINIT_PEND:
-			if (kbase_csf_firmware_global_reinit_complete(kbdev))
+			if (kbase_csf_firmware_global_reinit_complete(kbdev)) {
+				backend->shaders_avail =
+						backend->shaders_desired_mask;
+				backend->pm_shaders_core_mask = 0;
+				if (kbdev->csf.firmware_hctl_core_pwr) {
+					kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER,
+						backend->shaders_avail, ACTION_PWRON);
+					backend->mcu_state =
+						KBASE_MCU_HCTL_SHADERS_PEND_ON;
+				} else
+					backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE;
+			}
+			break;
+
+		case KBASE_MCU_HCTL_SHADERS_PEND_ON:
+			if (!shaders_trans &&
+			    shaders_ready == backend->shaders_avail) {
+				/* Cores now stable, notify MCU the stable mask */
+				kbase_csf_firmware_update_core_attr(kbdev,
+						false, true, shaders_ready);
+
+				backend->pm_shaders_core_mask = shaders_ready;
+				backend->mcu_state =
+					KBASE_MCU_HCTL_CORES_NOTIFY_PEND;
+			}
+			break;
+
+		case KBASE_MCU_HCTL_CORES_NOTIFY_PEND:
+			/* Wait for the acknowledgement */
+			if (kbase_csf_firmware_core_attr_updated(kbdev))
 				backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE;
 			break;
 
 		case KBASE_MCU_ON_HWCNT_ENABLE:
 			backend->hwcnt_desired = true;
 			if (backend->hwcnt_disabled) {
+				unsigned long flags;
+
+				kbase_csf_scheduler_spin_lock(kbdev, &flags);
 				kbase_hwcnt_context_enable(
 					kbdev->hwcnt_gpu_ctx);
+				kbase_csf_scheduler_spin_unlock(kbdev, flags);
 				backend->hwcnt_disabled = false;
 			}
 			backend->mcu_state = KBASE_MCU_ON;
 			break;
 
 		case KBASE_MCU_ON:
+			backend->shaders_desired_mask = kbase_pm_ca_get_core_mask(kbdev);
+
 			if (!kbase_pm_is_mcu_desired(kbdev))
 				backend->mcu_state = KBASE_MCU_ON_HWCNT_DISABLE;
+			else if (kbdev->csf.firmware_hctl_core_pwr) {
+				/* Host control add additional Cores to be active */
+				if (backend->shaders_desired_mask & ~shaders_ready) {
+					backend->hwcnt_desired = false;
+					if (!backend->hwcnt_disabled)
+						kbase_pm_trigger_hwcnt_disable(kbdev);
+					backend->mcu_state =
+						KBASE_MCU_HCTL_MCU_ON_RECHECK;
+				}
+			} else if (kbase_pm_handle_mcu_core_attr_update(kbdev))
+				kbdev->pm.backend.mcu_state =
+					KBASE_MCU_ON_CORE_ATTR_UPDATE_PEND;
 			break;
 
-		/* ToDo. Add new state(s) if shader cores mask change for DVFS
-		 * has to be accommodated in the MCU state machine.
-		 */
+		case KBASE_MCU_HCTL_MCU_ON_RECHECK:
+			backend->shaders_desired_mask = kbase_pm_ca_get_core_mask(kbdev);
+
+			if (!backend->hwcnt_disabled) {
+				/* Wait for being disabled */
+				;
+			} else if (!kbase_pm_is_mcu_desired(kbdev)) {
+				/* Converging to MCU powering down flow */
+				backend->mcu_state = KBASE_MCU_ON_HWCNT_DISABLE;
+			} else if (backend->shaders_desired_mask & ~shaders_ready) {
+				/* set cores ready but not available to
+				 * meet SHADERS_PEND_ON check pass
+				 */
+				backend->shaders_avail =
+					(backend->shaders_desired_mask | shaders_ready);
+
+				kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER,
+						backend->shaders_avail & ~shaders_ready,
+						ACTION_PWRON);
+				backend->mcu_state =
+					KBASE_MCU_HCTL_SHADERS_PEND_ON;
+			} else {
+				backend->mcu_state =
+					KBASE_MCU_HCTL_SHADERS_PEND_ON;
+			}
+			break;
+
+		case KBASE_MCU_ON_CORE_ATTR_UPDATE_PEND:
+			if (kbase_csf_firmware_core_attr_updated(kbdev)) {
+				backend->shaders_avail =
+					backend->shaders_desired_mask;
+				backend->mcu_state = KBASE_MCU_ON;
+			}
+			break;
 
 		case KBASE_MCU_ON_HWCNT_DISABLE:
 			if (kbase_pm_is_mcu_desired(kbdev)) {
@@ -639,14 +778,32 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
 			if (!kbase_pm_is_mcu_desired(kbdev)) {
 				kbase_csf_firmware_trigger_mcu_halt(kbdev);
 				backend->mcu_state = KBASE_MCU_ON_PEND_HALT;
-			} else if (kbase_pm_is_mcu_desired(kbdev)) {
+			} else
 				backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE;
-			}
 			break;
 
 		case KBASE_MCU_ON_PEND_HALT:
-			if (kbase_csf_firmware_mcu_halted(kbdev))
+			if (kbase_csf_firmware_mcu_halted(kbdev)) {
+				if (kbdev->csf.firmware_hctl_core_pwr)
+					backend->mcu_state =
+						KBASE_MCU_HCTL_SHADERS_READY_OFF;
+				else
+					backend->mcu_state = KBASE_MCU_POWER_DOWN;
+			}
+			break;
+
+		case KBASE_MCU_HCTL_SHADERS_READY_OFF:
+			kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER,
+					shaders_ready, ACTION_PWROFF);
+			backend->mcu_state =
+				KBASE_MCU_HCTL_SHADERS_PEND_OFF;
+			break;
+
+		case KBASE_MCU_HCTL_SHADERS_PEND_OFF:
+			if (!shaders_trans && !shaders_ready) {
+				backend->pm_shaders_core_mask = 0;
 				backend->mcu_state = KBASE_MCU_POWER_DOWN;
+			}
 			break;
 
 		case KBASE_MCU_POWER_DOWN:
@@ -698,8 +855,10 @@ static const char *kbase_l2_core_state_to_string(enum kbase_l2_core_state state)
 static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
 {
 	struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
-	u64 l2_present = kbdev->gpu_props.props.raw_props.l2_present;
+	u64 l2_present = kbdev->gpu_props.curr_config.l2_present;
+#if !MALI_USE_CSF
 	u64 tiler_present = kbdev->gpu_props.props.raw_props.tiler_present;
+#endif
 	enum kbase_l2_core_state prev_state;
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -710,10 +869,13 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
 				KBASE_PM_CORE_L2);
 		u64 l2_ready = kbase_pm_get_ready_cores(kbdev,
 				KBASE_PM_CORE_L2);
+
+#if !MALI_USE_CSF
 		u64 tiler_trans = kbase_pm_get_trans_cores(kbdev,
 				KBASE_PM_CORE_TILER);
 		u64 tiler_ready = kbase_pm_get_ready_cores(kbdev,
 				KBASE_PM_CORE_TILER);
+#endif
 
 		/*
 		 * kbase_pm_get_ready_cores and kbase_pm_get_trans_cores
@@ -736,8 +898,9 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
 		 * between the register reads
 		 */
 		l2_trans &= ~l2_ready;
+#if !MALI_USE_CSF
 		tiler_trans &= ~tiler_ready;
-
+#endif
 		prev_state = backend->l2_state;
 
 		switch (backend->l2_state) {
@@ -748,7 +911,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
 				 * powering it on
 				 */
 				kbase_pm_l2_config_override(kbdev);
-
+#if !MALI_USE_CSF
 				/* L2 is required, power on.  Powering on the
 				 * tiler will also power the first L2 cache.
 				 */
@@ -762,14 +925,30 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
 					kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2,
 							l2_present & ~1,
 							ACTION_PWRON);
+#else
+				/* With CSF firmware, Host driver doesn't need to
+				 * handle power management with both shader and tiler cores.
+				 * The CSF firmware will power up the cores appropriately.
+				 * So only power the l2 cache explicitly.
+				 */
+				kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2,
+						l2_present, ACTION_PWRON);
+#endif
 				backend->l2_state = KBASE_L2_PEND_ON;
 			}
 			break;
 
 		case KBASE_L2_PEND_ON:
+#if !MALI_USE_CSF
 			if (!l2_trans && l2_ready == l2_present && !tiler_trans
 					&& tiler_ready == tiler_present) {
-				KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, tiler_ready);
+				KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL,
+						tiler_ready);
+#else
+			if (!l2_trans && l2_ready == l2_present) {
+				KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL,
+						l2_ready);
+#endif
 				/*
 				 * Ensure snoops are enabled after L2 is powered
 				 * up. Note that kbase keeps track of the snoop
@@ -948,9 +1127,11 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
 				 */
 				kbase_gpu_start_cache_clean_nolock(
 						kbdev);
-
+#if !MALI_USE_CSF
 			KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, 0u);
-
+#else
+			KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL, 0u);
+#endif
 			backend->l2_state = KBASE_L2_PEND_OFF;
 			break;
 
@@ -1078,7 +1259,6 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev)
 			&kbdev->pm.backend.shader_tick_timer;
 	enum kbase_shader_core_state prev_state;
 	u64 stacks_avail = 0;
-	int err = 0;
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
@@ -1173,8 +1353,18 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev)
 				backend->pm_shaders_core_mask = shaders_ready;
 				backend->hwcnt_desired = true;
 				if (backend->hwcnt_disabled) {
+#if MALI_USE_CSF
+					unsigned long flags;
+
+					kbase_csf_scheduler_spin_lock(kbdev,
+								      &flags);
+#endif
 					kbase_hwcnt_context_enable(
 						kbdev->hwcnt_gpu_ctx);
+#if MALI_USE_CSF
+					kbase_csf_scheduler_spin_unlock(kbdev,
+									flags);
+#endif
 					backend->hwcnt_disabled = false;
 				}
 
@@ -1354,8 +1544,18 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev)
 				backend->pm_shaders_core_mask = 0;
 				backend->hwcnt_desired = true;
 				if (backend->hwcnt_disabled) {
+#if MALI_USE_CSF
+					unsigned long flags;
+
+					kbase_csf_scheduler_spin_lock(kbdev,
+								      &flags);
+#endif
 					kbase_hwcnt_context_enable(
 						kbdev->hwcnt_gpu_ctx);
+#if MALI_USE_CSF
+					kbase_csf_scheduler_spin_unlock(kbdev,
+									flags);
+#endif
 					backend->hwcnt_disabled = false;
 				}
 				backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF;
@@ -1382,7 +1582,7 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev)
 
 	} while (backend->shaders_state != prev_state);
 
-	return err;
+	return 0;
 }
 #endif
 
@@ -1647,7 +1847,8 @@ void kbase_pm_reset_complete(struct kbase_device *kbdev)
 
 /* Timeout for kbase_pm_wait_for_desired_state when wait_event_killable has
  * aborted due to a fatal signal. If the time spent waiting has exceeded this
- * threshold then there is most likely a hardware issue. */
+ * threshold then there is most likely a hardware issue.
+ */
 #define PM_TIMEOUT_MS (5000) /* 5s */
 
 static void kbase_pm_timed_out(struct kbase_device *kbdev)
@@ -1706,7 +1907,8 @@ static void kbase_pm_timed_out(struct kbase_device *kbdev)
 					L2_PWRTRANS_LO)));
 
 	dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n");
-	if (kbase_prepare_to_reset_gpu(kbdev))
+	if (kbase_prepare_to_reset_gpu(kbdev,
+				       RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
 		kbase_reset_gpu(kbdev);
 }
 
@@ -1774,7 +1976,7 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev)
 {
 	unsigned long flags;
 
-	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
 	/*
 	 * Clear all interrupts,
 	 * and unmask them all.
@@ -1800,7 +2002,7 @@ KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts);
 
 void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev)
 {
-	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
 	/*
 	 * Mask all interrupts,
 	 * and clear them all.
@@ -1827,6 +2029,22 @@ void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
 
 KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts);
 
+#if MALI_USE_CSF
+static void update_user_reg_page_mapping(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	if (kbdev->csf.mali_file_inode) {
+		/* This would zap the pte corresponding to the mapping of User
+		 * register page for all the Kbase contexts.
+		 */
+		unmap_mapping_range(kbdev->csf.mali_file_inode->i_mapping,
+				    BASEP_MEM_CSF_USER_REG_PAGE_HANDLE,
+				    PAGE_SIZE, 1);
+	}
+}
+#endif
+
 /*
  * pmu layout:
  * 0x0000: PMU TAG (RO) (0xCAFECAFE)
@@ -1838,7 +2056,7 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
 	bool reset_required = is_resume;
 	unsigned long flags;
 
-	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
 #if !MALI_USE_CSF
 	lockdep_assert_held(&kbdev->js_data.runpool_mutex);
 #endif /* !MALI_USE_CSF */
@@ -1876,24 +2094,39 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
 	kbdev->pm.backend.gpu_powered = true;
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
+#if MALI_USE_CSF
+	/* GPU has been turned on, can switch to actual register page */
+	update_user_reg_page_mapping(kbdev);
+#endif
+
 	if (reset_required) {
 		/* GPU state was lost, reset GPU to ensure it is in a
-		 * consistent state */
+		 * consistent state
+		 */
 		kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS);
 	}
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
 	else {
-		struct kbase_arbiter_vm_state *arb_vm_state =
+		if (kbdev->arb.arb_if) {
+			struct kbase_arbiter_vm_state *arb_vm_state =
 				kbdev->pm.arb_vm_state;
 
-		/* In the case that the GPU has just been granted by
-		 * the Arbiter, a reset will have already been done.
-		 * However, it is still necessary to initialize the GPU.
-		 */
-		if (arb_vm_state->vm_arb_starting)
-			kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS |
-					PM_NO_RESET);
+			/* In the case that the GPU has just been granted by
+			 * the Arbiter, a reset will have already been done.
+			 * However, it is still necessary to initialize the GPU.
+			 */
+			if (arb_vm_state->vm_arb_starting)
+				kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS |
+						PM_NO_RESET);
+		}
 	}
+	/*
+	 * This point means that the GPU trasitioned to ON. So there is a chance
+	 * that a repartitioning occurred. In this case the current config
+	 * should be read again.
+	 */
+	kbase_gpuprops_get_curr_config_props(kbdev,
+		&kbdev->gpu_props.curr_config);
 #endif /* CONFIG_MALI_ARBITER_SUPPORT */
 
 	mutex_lock(&kbdev->mmu_hw_mutex);
@@ -1918,6 +2151,17 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	kbdev->pm.backend.gpu_ready = true;
 	kbdev->pm.backend.l2_desired = true;
+#if MALI_USE_CSF
+	if (reset_required) {
+		/* GPU reset was done after the power on, so send the post
+		 * reset event instead. This is okay as GPU power off event
+		 * is same as pre GPU reset event.
+		 */
+		kbase_ipa_control_handle_gpu_reset_post(kbdev);
+	} else {
+		kbase_ipa_control_handle_gpu_power_on(kbdev);
+	}
+#endif
 	kbase_pm_update_state(kbdev);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
@@ -1928,7 +2172,7 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev)
 {
 	unsigned long flags;
 
-	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
 	lockdep_assert_held(&kbdev->pm.lock);
 
 	/* ASSERT that the cores should now be unavailable. No lock needed. */
@@ -1952,12 +2196,16 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev)
 
 	if (atomic_read(&kbdev->faults_pending)) {
 		/* Page/bus faults are still being processed. The GPU can not
-		 * be powered off until they have completed */
+		 * be powered off until they have completed
+		 */
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 		return false;
 	}
 
 	kbase_pm_cache_snoop_disable(kbdev);
+#if MALI_USE_CSF
+	kbase_ipa_control_handle_gpu_power_off(kbdev);
+#endif
 
 	kbdev->pm.backend.gpu_ready = false;
 
@@ -1974,6 +2222,12 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev)
 #endif
 
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+#if MALI_USE_CSF
+	/* GPU is about to be turned off, switch to dummy page */
+	update_user_reg_page_mapping(kbdev);
+#endif
+
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
 	kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_IDLE_EVENT);
 #endif /* CONFIG_MALI_ARBITER_SUPPORT */
@@ -2021,23 +2275,23 @@ static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer)
 	struct kbasep_reset_timeout_data *rtdata =
 		container_of(timer, struct kbasep_reset_timeout_data, timer);
 
-	rtdata->timed_out = 1;
+	rtdata->timed_out = true;
 
 	/* Set the wait queue to wake up kbase_pm_init_hw even though the reset
-	 * hasn't completed */
+	 * hasn't completed
+	 */
 	kbase_pm_reset_done(rtdata->kbdev);
 
 	return HRTIMER_NORESTART;
 }
 
-static int kbase_set_jm_quirks(struct kbase_device *kbdev, const u32 prod_id)
+static int kbase_set_gpu_quirks(struct kbase_device *kbdev, const u32 prod_id)
 {
 #if MALI_USE_CSF
-	kbdev->hw_quirks_jm = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(CSF_CONFIG));
+	kbdev->hw_quirks_gpu =
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(CSF_CONFIG));
 #else
-	u32 hw_quirks_jm = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(JM_CONFIG));
+	u32 hw_quirks_gpu = kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG));
 
 	if (GPU_ID2_MODEL_MATCH_VALUE(prod_id) == GPU_ID2_PRODUCT_TMIX) {
 		/* Only for tMIx */
@@ -2051,39 +2305,38 @@ static int kbase_set_jm_quirks(struct kbase_device *kbdev, const u32 prod_id)
 		 */
 		if (coherency_features ==
 				COHERENCY_FEATURE_BIT(COHERENCY_ACE)) {
-			hw_quirks_jm |= (COHERENCY_ACE_LITE |
-					COHERENCY_ACE) <<
-					JM_FORCE_COHERENCY_FEATURES_SHIFT;
+			hw_quirks_gpu |= (COHERENCY_ACE_LITE | COHERENCY_ACE)
+					 << JM_FORCE_COHERENCY_FEATURES_SHIFT;
 		}
 	}
 
 	if (kbase_is_gpu_removed(kbdev))
 		return -EIO;
 
-	kbdev->hw_quirks_jm = hw_quirks_jm;
+	kbdev->hw_quirks_gpu = hw_quirks_gpu;
 
 #endif /* !MALI_USE_CSF */
 	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_IDVS_GROUP_SIZE)) {
 		int default_idvs_group_size = 0xF;
-		u32 tmp;
+		u32 group_size = 0;
 
-		if (of_property_read_u32(kbdev->dev->of_node,
-					"idvs-group-size", &tmp))
-			tmp = default_idvs_group_size;
+		if (of_property_read_u32(kbdev->dev->of_node, "idvs-group-size",
+					 &group_size))
+			group_size = default_idvs_group_size;
 
-		if (tmp > IDVS_GROUP_MAX_SIZE) {
+		if (group_size > IDVS_GROUP_MAX_SIZE) {
 			dev_err(kbdev->dev,
 				"idvs-group-size of %d is too large. Maximum value is %d",
-				tmp, IDVS_GROUP_MAX_SIZE);
-			tmp = default_idvs_group_size;
+				group_size, IDVS_GROUP_MAX_SIZE);
+			group_size = default_idvs_group_size;
 		}
 
-		kbdev->hw_quirks_jm |= tmp << IDVS_GROUP_SIZE_SHIFT;
+		kbdev->hw_quirks_gpu |= group_size << IDVS_GROUP_SIZE_SHIFT;
 	}
 
 #define MANUAL_POWER_CONTROL ((u32)(1 << 8))
 	if (corestack_driver_control)
-		kbdev->hw_quirks_jm |= MANUAL_POWER_CONTROL;
+		kbdev->hw_quirks_gpu |= MANUAL_POWER_CONTROL;
 
 	return 0;
 }
@@ -2137,18 +2390,17 @@ static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
 				GPU_ID_VERSION_PRODUCT_ID_SHIFT;
 	int error = 0;
 
-	kbdev->hw_quirks_jm = 0;
+	kbdev->hw_quirks_gpu = 0;
 	kbdev->hw_quirks_sc = 0;
 	kbdev->hw_quirks_tiler = 0;
 	kbdev->hw_quirks_mmu = 0;
 
-	if (!of_property_read_u32(np, "quirks_jm",
-				&kbdev->hw_quirks_jm)) {
+	if (!of_property_read_u32(np, "quirks_gpu", &kbdev->hw_quirks_gpu)) {
 		dev_info(kbdev->dev,
-			"Found quirks_jm = [0x%x] in Devicetree\n",
-			kbdev->hw_quirks_jm);
+			 "Found quirks_gpu = [0x%x] in Devicetree\n",
+			 kbdev->hw_quirks_gpu);
 	} else {
-		error = kbase_set_jm_quirks(kbdev, prod_id);
+		error = kbase_set_gpu_quirks(kbdev, prod_id);
 		if (error)
 			return error;
 	}
@@ -2199,10 +2451,10 @@ static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev)
 			kbdev->hw_quirks_mmu);
 #if MALI_USE_CSF
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(CSF_CONFIG),
-			kbdev->hw_quirks_jm);
+			kbdev->hw_quirks_gpu);
 #else
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG),
-			kbdev->hw_quirks_jm);
+			kbdev->hw_quirks_gpu);
 #endif
 }
 
@@ -2233,6 +2485,7 @@ void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev)
 	}
 }
 
+#if !MALI_USE_CSF
 static void reenable_protected_mode_hwcnt(struct kbase_device *kbdev)
 {
 	unsigned long irq_flags;
@@ -2245,6 +2498,7 @@ static void reenable_protected_mode_hwcnt(struct kbase_device *kbdev)
 	}
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
 }
+#endif
 
 static int kbase_pm_do_reset(struct kbase_device *kbdev)
 {
@@ -2271,7 +2525,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev)
 
 	/* Initialize a structure for tracking the status of the reset */
 	rtdata.kbdev = kbdev;
-	rtdata.timed_out = 0;
+	rtdata.timed_out = false;
 
 	/* Create a timer to use as a timeout on the reset */
 	hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
@@ -2283,7 +2537,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev)
 	/* Wait for the RESET_COMPLETED interrupt to be raised */
 	kbase_pm_wait_for_reset(kbdev);
 
-	if (rtdata.timed_out == 0) {
+	if (!rtdata.timed_out) {
 		/* GPU has been reset */
 		hrtimer_cancel(&rtdata.timer);
 		destroy_hrtimer_on_stack(&rtdata.timer);
@@ -2291,11 +2545,13 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev)
 	}
 
 	/* No interrupt has been received - check if the RAWSTAT register says
-	 * the reset has completed */
+	 * the reset has completed
+	 */
 	if ((kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) &
 							RESET_COMPLETED)) {
 		/* The interrupt is set in the RAWSTAT; this suggests that the
-		 * interrupts are not getting to the CPU */
+		 * interrupts are not getting to the CPU
+		 */
 		dev_err(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n");
 		/* If interrupts aren't working we can't continue. */
 		destroy_hrtimer_on_stack(&rtdata.timer);
@@ -2309,33 +2565,40 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev)
 	}
 
 	/* The GPU doesn't seem to be responding to the reset so try a hard
-	 * reset */
-	dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n",
-								RESET_TIMEOUT);
-	KBASE_KTRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, 0);
-	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
-						GPU_COMMAND_HARD_RESET);
+	 * reset, but only when NOT in arbitration mode.
+	 */
+#ifdef CONFIG_MALI_ARBITER_SUPPORT
+	if (!kbdev->arb.arb_if) {
+#endif /* CONFIG_MALI_ARBITER_SUPPORT */
+		dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n",
+					RESET_TIMEOUT);
+		KBASE_KTRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, 0);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_HARD_RESET);
 
-	/* Restart the timer to wait for the hard reset to complete */
-	rtdata.timed_out = 0;
+		/* Restart the timer to wait for the hard reset to complete */
+		rtdata.timed_out = false;
 
-	hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT),
-							HRTIMER_MODE_REL);
+		hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT),
+					HRTIMER_MODE_REL);
 
-	/* Wait for the RESET_COMPLETED interrupt to be raised */
-	kbase_pm_wait_for_reset(kbdev);
+		/* Wait for the RESET_COMPLETED interrupt to be raised */
+		kbase_pm_wait_for_reset(kbdev);
+
+		if (!rtdata.timed_out) {
+			/* GPU has been reset */
+			hrtimer_cancel(&rtdata.timer);
+			destroy_hrtimer_on_stack(&rtdata.timer);
+			return 0;
+		}
 
-	if (rtdata.timed_out == 0) {
-		/* GPU has been reset */
-		hrtimer_cancel(&rtdata.timer);
 		destroy_hrtimer_on_stack(&rtdata.timer);
-		return 0;
+
+		dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n",
+					RESET_TIMEOUT);
+#ifdef CONFIG_MALI_ARBITER_SUPPORT
 	}
-
-	destroy_hrtimer_on_stack(&rtdata.timer);
-
-	dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n",
-								RESET_TIMEOUT);
+#endif /* CONFIG_MALI_ARBITER_SUPPORT */
 
 	return -EINVAL;
 }
@@ -2359,7 +2622,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
 	unsigned long irq_flags;
 	int err = 0;
 
-	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
 	lockdep_assert_held(&kbdev->pm.lock);
 
 	/* Ensure the clock is on before attempting to access the hardware */
@@ -2371,7 +2634,8 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
 	}
 
 	/* Ensure interrupts are off to begin with, this also clears any
-	 * outstanding interrupts */
+	 * outstanding interrupts
+	 */
 	kbase_pm_disable_interrupts(kbdev);
 	/* Ensure cache snoops are disabled before reset. */
 	kbase_pm_cache_snoop_disable(kbdev);
@@ -2392,6 +2656,17 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
 				kbdev->protected_dev);
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+#if MALI_USE_CSF
+	if (kbdev->protected_mode) {
+		unsigned long flags;
+
+		kbase_ipa_control_protm_exited(kbdev);
+
+		kbase_csf_scheduler_spin_lock(kbdev, &flags);
+		kbase_hwcnt_backend_csf_protm_exited(&kbdev->hwcnt_gpu_iface);
+		kbase_csf_scheduler_spin_unlock(kbdev, flags);
+	}
+#endif
 	kbdev->protected_mode = false;
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
 
@@ -2412,7 +2687,8 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
 			GPU_STATUS_PROTECTED_MODE_ACTIVE);
 
 	/* If cycle counter was in use re-enable it, enable_irqs will only be
-	 * false when called from kbase_pm_powerup */
+	 * false when called from kbase_pm_powerup
+	 */
 	if (kbdev->pm.backend.gpu_cycle_counter_requests &&
 						(flags & PM_ENABLE_IRQS)) {
 		kbase_pm_enable_interrupts(kbdev);
@@ -2435,12 +2711,14 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
 		kbase_pm_enable_interrupts(kbdev);
 
 exit:
+#if !MALI_USE_CSF
 	if (!kbdev->pm.backend.protected_entry_transition_override) {
 		/* Re-enable GPU hardware counters if we're resetting from
 		 * protected mode.
 		 */
 		reenable_protected_mode_hwcnt(kbdev);
 	}
+#endif
 
 	return err;
 }
@@ -2467,12 +2745,22 @@ kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev)
 
 	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
 									flags);
-
 	++kbdev->pm.backend.gpu_cycle_counter_requests;
 
-	if (1 == kbdev->pm.backend.gpu_cycle_counter_requests)
+	if (kbdev->pm.backend.gpu_cycle_counter_requests == 1)
 		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
 					GPU_COMMAND_CYCLE_COUNT_START);
+	else {
+		/* This might happen after GPU reset.
+		 * Then counter needs to be kicked.
+		 */
+		if (!IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) &&
+		    (!(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) &
+		       GPU_STATUS_CYCLE_COUNT_ACTIVE))) {
+			kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_START);
+		}
+	}
 
 	spin_unlock_irqrestore(
 			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
@@ -2488,6 +2776,8 @@ void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev)
 	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
 								INT_MAX);
 
+	kbase_pm_wait_for_l2_powered(kbdev);
+
 	kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
 }
 
@@ -2522,7 +2812,7 @@ void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev)
 
 	--kbdev->pm.backend.gpu_cycle_counter_requests;
 
-	if (0 == kbdev->pm.backend.gpu_cycle_counter_requests)
+	if (kbdev->pm.backend.gpu_cycle_counter_requests == 0)
 		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
 					GPU_COMMAND_CYCLE_COUNT_STOP);
 
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h
index 50ca016bbd6d..d27eb58ebcda 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /*
  * Power management API definitions used internally by GPU backend
  */
@@ -227,6 +224,7 @@ void kbase_pm_reset_done(struct kbase_device *kbdev);
  *
  * Return: 0 on success, error code on error
  */
+int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
 #else
 /**
  * kbase_pm_wait_for_desired_state - Wait for the desired power state to be
@@ -250,8 +248,8 @@ void kbase_pm_reset_done(struct kbase_device *kbdev);
  *
  * Return: 0 on success, error code on error
  */
-#endif
 int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
+#endif
 
 /**
  * kbase_pm_wait_for_l2_powered - Wait for the L2 cache to be powered on
@@ -492,7 +490,8 @@ void kbase_pm_register_access_enable(struct kbase_device *kbdev);
 void kbase_pm_register_access_disable(struct kbase_device *kbdev);
 
 /* NOTE: kbase_pm_is_suspending is in mali_kbase.h, because it is an inline
- * function */
+ * function
+ */
 
 /**
  * kbase_pm_metrics_is_active - Check if the power management metrics
@@ -536,8 +535,22 @@ void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev,
 
 #ifdef CONFIG_MALI_BIFROST_DVFS
 
+#if MALI_USE_CSF
 /**
- * kbase_platform_dvfs_event - Report utilisation to DVFS code
+ * kbase_platform_dvfs_event - Report utilisation to DVFS code for CSF GPU
+ *
+ * Function provided by platform specific code when DVFS is enabled to allow
+ * the power management metrics system to report utilisation.
+ *
+ * @kbdev:         The kbase device structure for the device (must be a
+ *                 valid pointer)
+ * @utilisation:   The current calculated utilisation by the metrics system.
+ * Return:         Returns 0 on failure and non zero on success.
+ */
+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation);
+#else
+/**
+ * kbase_platform_dvfs_event - Report utilisation to DVFS code for JM GPU
  *
  * Function provided by platform specific code when DVFS is enabled to allow
  * the power management metrics system to report utilisation.
@@ -550,11 +563,12 @@ void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev,
  *                 group.
  * Return:         Returns 0 on failure and non zero on success.
  */
-
 int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
-	u32 util_gl_share, u32 util_cl_share[2]);
+			      u32 util_gl_share, u32 util_cl_share[2]);
 #endif
 
+#endif /* CONFIG_MALI_BIFROST_DVFS */
+
 void kbase_pm_power_changed(struct kbase_device *kbdev);
 
 /**
@@ -708,6 +722,72 @@ extern bool corestack_driver_control;
  */
 bool kbase_pm_is_l2_desired(struct kbase_device *kbdev);
 
+#if MALI_USE_CSF
+/**
+ * kbase_pm_is_mcu_desired - Check whether MCU is desired
+ *
+ * @kbdev: Device pointer
+ *
+ * This shall be called to check whether MCU needs to be enabled.
+ *
+ * Return: true if MCU needs to be enabled.
+ */
+bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_idle_groups_sched_suspendable - Check whether the scheduler can be
+ *                                        suspended to low power state when all
+ *                                        the CSGs are idle
+ *
+ * @kbdev: Device pointer
+ *
+ * Return: true if allowed to enter the suspended state.
+ */
+static inline
+bool kbase_pm_idle_groups_sched_suspendable(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	return !(kbdev->pm.backend.csf_pm_sched_flags &
+		 CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE);
+}
+
+/**
+ * kbase_pm_no_runnables_sched_suspendable - Check whether the scheduler can be
+ *                                        suspended to low power state when
+ *                                        there are no runnable CSGs.
+ *
+ * @kbdev: Device pointer
+ *
+ * Return: true if allowed to enter the suspended state.
+ */
+static inline
+bool kbase_pm_no_runnables_sched_suspendable(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	return !(kbdev->pm.backend.csf_pm_sched_flags &
+		 CSF_DYNAMIC_PM_SCHED_NO_SUSPEND);
+}
+
+/**
+ * kbase_pm_no_mcu_core_pwroff - Check whether the PM is required to keep the
+ *                               MCU core powered in accordance to the active
+ *                               power management policy
+ *
+ * @kbdev: Device pointer
+ *
+ * Return: true if the MCU is to retain powered.
+ */
+static inline bool kbase_pm_no_mcu_core_pwroff(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	return kbdev->pm.backend.csf_pm_sched_flags &
+		CSF_DYNAMIC_PM_CORE_KEEP_ON;
+}
+#endif
+
 /**
  * kbase_pm_lock - Lock all necessary mutexes to perform PM actions
  *
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_l2_states.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_l2_states.h
index 12cb051db42a..d66b92841290 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_l2_states.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_l2_states.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
@@ -25,6 +24,19 @@
  * The function-like macro KBASEP_L2_STATE() must be defined before including
  * this header file. This header file can be included multiple times in the
  * same compilation unit with different definitions of KBASEP_L2_STATE().
+ *
+ * @OFF:              The L2 cache and tiler are off
+ * @PEND_ON:          The L2 cache and tiler are powering on
+ * @RESTORE_CLOCKS:   The GPU clock is restored. Conditionally used.
+ * @ON_HWCNT_ENABLE:  The L2 cache and tiler are on, and hwcnt is being enabled
+ * @ON:               The L2 cache and tiler are on, and hwcnt is enabled
+ * @ON_HWCNT_DISABLE: The L2 cache and tiler are on, and hwcnt is being disabled
+ * @SLOW_DOWN_CLOCKS: The GPU clock is set to appropriate or lowest clock.
+ *                    Conditionally used.
+ * @POWER_DOWN:       The L2 cache and tiler are about to be powered off
+ * @PEND_OFF:         The L2 cache and tiler are powering off
+ * @RESET_WAIT:       The GPU is resetting, L2 cache and tiler power state are
+ *                    unknown
  */
 KBASEP_L2_STATE(OFF)
 KBASEP_L2_STATE(PEND_ON)
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_mcu_states.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_mcu_states.h
index e163bd4f4094..eab30eb32c56 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_mcu_states.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_mcu_states.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
@@ -25,15 +24,40 @@
  * The function-like macro KBASEP_MCU_STATE() must be defined before including
  * this header file. This header file can be included multiple times in the
  * same compilation unit with different definitions of KBASEP_MCU_STATE().
+ *
+ * @OFF:                      The MCU is powered off.
+ * @PEND_ON_RELOAD:           The warm boot of MCU or cold boot of MCU (with
+ *                            firmware reloading) is in progress.
+ * @ON_GLB_REINIT_PEND:       The MCU is enabled and Global configuration
+ *                            requests have been sent to the firmware.
+ * @ON_HWCNT_ENABLE:          The Global requests have completed and MCU is now
+ *                            ready for use and hwcnt is being enabled.
+ * @ON:                       The MCU is active and hwcnt has been enabled.
+ * @ON_CORE_ATTR_UPDATE_PEND: The MCU is active and mask of enabled shader cores
+ *                            is being updated.
+ * @ON_HWCNT_DISABLE:         The MCU is on and hwcnt is being disabled.
+ * @ON_HALT:                  The MCU is on and hwcnt has been disabled, MCU
+ *                            halt would be triggered.
+ * @ON_PEND_HALT:             MCU halt in progress, confirmation pending.
+ * @POWER_DOWN:               MCU halted operations, pending being disabled.
+ * @PEND_OFF:                 MCU is being disabled, pending on powering off.
+ * @RESET_WAIT:               The GPU is resetting, MCU state is unknown.
  */
 KBASEP_MCU_STATE(OFF)
 KBASEP_MCU_STATE(PEND_ON_RELOAD)
 KBASEP_MCU_STATE(ON_GLB_REINIT_PEND)
 KBASEP_MCU_STATE(ON_HWCNT_ENABLE)
 KBASEP_MCU_STATE(ON)
+KBASEP_MCU_STATE(ON_CORE_ATTR_UPDATE_PEND)
 KBASEP_MCU_STATE(ON_HWCNT_DISABLE)
 KBASEP_MCU_STATE(ON_HALT)
 KBASEP_MCU_STATE(ON_PEND_HALT)
 KBASEP_MCU_STATE(POWER_DOWN)
 KBASEP_MCU_STATE(PEND_OFF)
 KBASEP_MCU_STATE(RESET_WAIT)
+/* Additional MCU states with HOST_CONTROL_SHADERS */
+KBASEP_MCU_STATE(HCTL_SHADERS_PEND_ON)
+KBASEP_MCU_STATE(HCTL_CORES_NOTIFY_PEND)
+KBASEP_MCU_STATE(HCTL_MCU_ON_RECHECK)
+KBASEP_MCU_STATE(HCTL_SHADERS_READY_OFF)
+KBASEP_MCU_STATE(HCTL_SHADERS_PEND_OFF)
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c
index b714971ba17c..319a60bc62e8 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /*
  * Metrics for power management
  */
@@ -29,24 +26,28 @@
 #include <mali_kbase.h>
 #include <mali_kbase_pm.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
-#if !MALI_USE_CSF
+
+#if MALI_USE_CSF
+#include "mali_kbase_clk_rate_trace_mgr.h"
+#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
+#else
 #include <backend/gpu/mali_kbase_jm_rb.h>
 #endif /* !MALI_USE_CSF */
+
 #include <backend/gpu/mali_kbase_pm_defs.h>
 #include <mali_linux_trace.h>
 
-/* When VSync is being hit aim for utilisation between 70-90% */
-#define KBASE_PM_VSYNC_MIN_UTILISATION          70
-#define KBASE_PM_VSYNC_MAX_UTILISATION          90
-/* Otherwise aim for 10-40% */
-#define KBASE_PM_NO_VSYNC_MIN_UTILISATION       10
-#define KBASE_PM_NO_VSYNC_MAX_UTILISATION       40
-
 /* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns
  * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly
- * under 11s. Exceeding this will cause overflow */
+ * under 11s. Exceeding this will cause overflow
+ */
 #define KBASE_PM_TIME_SHIFT			8
 
+#if MALI_USE_CSF
+/* To get the GPU_ACTIVE value in nano seconds unit */
+#define GPU_ACTIVE_SCALING_FACTOR ((u64)1E9)
+#endif
+
 #ifdef CONFIG_MALI_BIFROST_DVFS
 static enum hrtimer_restart dvfs_callback(struct hrtimer *timer)
 {
@@ -73,11 +74,45 @@ static enum hrtimer_restart dvfs_callback(struct hrtimer *timer)
 
 int kbasep_pm_metrics_init(struct kbase_device *kbdev)
 {
+#if MALI_USE_CSF
+	struct kbase_ipa_control_perf_counter perf_counter;
+	int err;
+
+	/* One counter group */
+	const size_t NUM_PERF_COUNTERS = 1;
+
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
-
 	kbdev->pm.backend.metrics.kbdev = kbdev;
-
 	kbdev->pm.backend.metrics.time_period_start = ktime_get();
+	kbdev->pm.backend.metrics.values.time_busy = 0;
+	kbdev->pm.backend.metrics.values.time_idle = 0;
+	kbdev->pm.backend.metrics.values.time_in_protm = 0;
+
+	perf_counter.scaling_factor = GPU_ACTIVE_SCALING_FACTOR;
+
+	/* Normalize values by GPU frequency */
+	perf_counter.gpu_norm = true;
+
+	/* We need the GPU_ACTIVE counter, which is in the CSHW group */
+	perf_counter.type = KBASE_IPA_CORE_TYPE_CSHW;
+
+	/* We need the GPU_ACTIVE counter */
+	perf_counter.idx = GPU_ACTIVE_CNT_IDX;
+
+	err = kbase_ipa_control_register(
+		kbdev, &perf_counter, NUM_PERF_COUNTERS,
+		&kbdev->pm.backend.metrics.ipa_control_client);
+	if (err) {
+		dev_err(kbdev->dev,
+			"Failed to register IPA with kbase_ipa_control: err=%d",
+			err);
+		return -1;
+	}
+#else
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	kbdev->pm.backend.metrics.kbdev = kbdev;
+	kbdev->pm.backend.metrics.time_period_start = ktime_get();
+
 	kbdev->pm.backend.metrics.gpu_active = false;
 	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
 	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
@@ -91,16 +126,25 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev)
 	kbdev->pm.backend.metrics.values.busy_cl[1] = 0;
 	kbdev->pm.backend.metrics.values.busy_gl = 0;
 
+#endif
 	spin_lock_init(&kbdev->pm.backend.metrics.lock);
 
 #ifdef CONFIG_MALI_BIFROST_DVFS
 	hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC,
 							HRTIMER_MODE_REL);
 	kbdev->pm.backend.metrics.timer.function = dvfs_callback;
-
+	kbdev->pm.backend.metrics.initialized = true;
 	kbase_pm_metrics_start(kbdev);
 #endif /* CONFIG_MALI_BIFROST_DVFS */
 
+#if MALI_USE_CSF
+	/* The sanity check on the GPU_ACTIVE performance counter
+	 * is skipped for Juno platforms that have timing problems.
+	 */
+	kbdev->pm.backend.metrics.skip_gpu_active_sanity_check =
+		of_machine_is_compatible("arm,juno");
+#endif
+
 	return 0;
 }
 KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init);
@@ -117,7 +161,13 @@ void kbasep_pm_metrics_term(struct kbase_device *kbdev)
 	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
 
 	hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
+	kbdev->pm.backend.metrics.initialized = false;
 #endif /* CONFIG_MALI_BIFROST_DVFS */
+
+#if MALI_USE_CSF
+	kbase_ipa_control_unregister(
+		kbdev, kbdev->pm.backend.metrics.ipa_control_client);
+#endif
 }
 
 KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term);
@@ -125,8 +175,121 @@ KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term);
 /* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
  * function
  */
+#if MALI_USE_CSF
+#if defined(CONFIG_MALI_BIFROST_DEVFREQ) || defined(CONFIG_MALI_BIFROST_DVFS)
+static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev)
+{
+	int err;
+	u64 gpu_active_counter;
+	u64 protected_time;
+	ktime_t now;
+
+	lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+	/* Query IPA_CONTROL for the latest GPU-active and protected-time
+	 * info.
+	 */
+	err = kbase_ipa_control_query(
+		kbdev, kbdev->pm.backend.metrics.ipa_control_client,
+		&gpu_active_counter, 1, &protected_time);
+
+	/* Read the timestamp after reading the GPU_ACTIVE counter value.
+	 * This ensures the time gap between the 2 reads is consistent for
+	 * a meaningful comparison between the increment of GPU_ACTIVE and
+	 * elapsed time. The lock taken inside kbase_ipa_control_query()
+	 * function can cause lot of variation.
+	 */
+	now = ktime_get();
+
+	if (err) {
+		dev_err(kbdev->dev,
+			"Failed to query the increment of GPU_ACTIVE counter: err=%d",
+			err);
+	} else {
+		u64 diff_ns, margin_ns;
+		s64 diff_ns_signed;
+		u32 ns_time;
+		ktime_t diff = ktime_sub(
+			now, kbdev->pm.backend.metrics.time_period_start);
+
+		diff_ns_signed = ktime_to_ns(diff);
+
+		if (diff_ns_signed < 0)
+			return;
+
+		diff_ns = (u64)diff_ns_signed;
+
+		/* Use a margin value that is approximately 1% of the time
+		 * difference.
+		 */
+		margin_ns = diff_ns >> 6;
+
+		/* Calculate time difference in units of 256ns */
+		ns_time = (u32)(diff_ns >> KBASE_PM_TIME_SHIFT);
+
+#ifndef CONFIG_MALI_BIFROST_NO_MALI
+		/* The GPU_ACTIVE counter shouldn't clock-up more time than has
+		 * actually elapsed - but still some margin needs to be given
+		 * when doing the comparison. There could be some drift between
+		 * the CPU and GPU clock.
+		 *
+		 * Can do the check only in a real driver build, as an arbitrary
+		 * value for GPU_ACTIVE can be fed into dummy model in no_mali
+		 * configuration which may not correspond to the real elapsed
+		 * time.
+		 */
+		if (!kbdev->pm.backend.metrics.skip_gpu_active_sanity_check) {
+			if (gpu_active_counter > (diff_ns + margin_ns)) {
+				dev_info(
+					kbdev->dev,
+					"GPU activity takes longer than time interval: %llu ns > %llu ns",
+					(unsigned long long)gpu_active_counter,
+					(unsigned long long)diff_ns);
+			}
+		}
+#else
+		CSTD_UNUSED(margin_ns);
+#endif
+
+		/* Add protected_time to gpu_active_counter so that time in
+		 * protected mode is included in the apparent GPU active time,
+		 * then convert it from units of 1ns to units of 256ns, to
+		 * match what JM GPUs use. The assumption is made here that the
+		 * GPU is 100% busy while in protected mode, so we should add
+		 * this since the GPU can't (and thus won't) update these
+		 * counters while it's actually in protected mode.
+		 *
+		 * Perform the add after dividing each value down, to reduce
+		 * the chances of overflows.
+		 */
+		protected_time >>= KBASE_PM_TIME_SHIFT;
+		gpu_active_counter >>= KBASE_PM_TIME_SHIFT;
+		gpu_active_counter += protected_time;
+
+		/* Ensure the following equations don't go wrong if ns_time is
+		 * slightly larger than gpu_active_counter somehow
+		 */
+		gpu_active_counter = MIN(gpu_active_counter, ns_time);
+
+		kbdev->pm.backend.metrics.values.time_busy +=
+			gpu_active_counter;
+
+		kbdev->pm.backend.metrics.values.time_idle +=
+			ns_time - gpu_active_counter;
+
+		/* Also make time in protected mode available explicitly,
+		 * so users of this data have this info, too.
+		 */
+		kbdev->pm.backend.metrics.values.time_in_protm +=
+			protected_time;
+	}
+
+	kbdev->pm.backend.metrics.time_period_start = now;
+}
+#endif /* defined(CONFIG_MALI_BIFROST_DEVFREQ) || defined(CONFIG_MALI_BIFROST_DVFS) */
+#else
 static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev,
-								ktime_t now)
+					       ktime_t now)
 {
 	ktime_t diff;
 
@@ -151,12 +314,13 @@ static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev,
 		if (kbdev->pm.backend.metrics.active_gl_ctx[2])
 			kbdev->pm.backend.metrics.values.busy_gl += ns_time;
 	} else {
-		kbdev->pm.backend.metrics.values.time_idle += (u32) (ktime_to_ns(diff)
-							>> KBASE_PM_TIME_SHIFT);
+		kbdev->pm.backend.metrics.values.time_idle +=
+			(u32)(ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT);
 	}
 
 	kbdev->pm.backend.metrics.time_period_start = now;
 }
+#endif  /* MALI_USE_CSF */
 
 #if defined(CONFIG_MALI_BIFROST_DEVFREQ) || defined(CONFIG_MALI_BIFROST_DVFS)
 void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev,
@@ -167,14 +331,23 @@ void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev,
 	unsigned long flags;
 
 	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+#if MALI_USE_CSF
+	kbase_pm_get_dvfs_utilisation_calc(kbdev);
+#else
 	kbase_pm_get_dvfs_utilisation_calc(kbdev, ktime_get());
+#endif
 
 	memset(diff, 0, sizeof(*diff));
 	diff->time_busy = cur->time_busy - last->time_busy;
 	diff->time_idle = cur->time_idle - last->time_idle;
+
+#if MALI_USE_CSF
+	diff->time_in_protm = cur->time_in_protm - last->time_in_protm;
+#else
 	diff->busy_cl[0] = cur->busy_cl[0] - last->busy_cl[0];
 	diff->busy_cl[1] = cur->busy_cl[1] - last->busy_cl[1];
 	diff->busy_gl = cur->busy_gl - last->busy_gl;
+#endif
 
 	*last = *cur;
 
@@ -186,26 +359,42 @@ KBASE_EXPORT_TEST_API(kbase_pm_get_dvfs_metrics);
 #ifdef CONFIG_MALI_BIFROST_DVFS
 void kbase_pm_get_dvfs_action(struct kbase_device *kbdev)
 {
-	int utilisation, util_gl_share;
-	int util_cl_share[2];
-	int busy;
+	int utilisation;
 	struct kbasep_pm_metrics *diff;
+#if !MALI_USE_CSF
+	int busy;
+	int util_gl_share;
+	int util_cl_share[2];
+#endif
 
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
 
 	diff = &kbdev->pm.backend.metrics.dvfs_diff;
 
-	kbase_pm_get_dvfs_metrics(kbdev, &kbdev->pm.backend.metrics.dvfs_last, diff);
+	kbase_pm_get_dvfs_metrics(kbdev, &kbdev->pm.backend.metrics.dvfs_last,
+				  diff);
 
 	utilisation = (100 * diff->time_busy) /
 			max(diff->time_busy + diff->time_idle, 1u);
 
+#if !MALI_USE_CSF
 	busy = max(diff->busy_gl + diff->busy_cl[0] + diff->busy_cl[1], 1u);
+
 	util_gl_share = (100 * diff->busy_gl) / busy;
 	util_cl_share[0] = (100 * diff->busy_cl[0]) / busy;
 	util_cl_share[1] = (100 * diff->busy_cl[1]) / busy;
 
-	kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share, util_cl_share);
+	kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share,
+				  util_cl_share);
+#else
+	/* Note that, at present, we don't pass protected-mode time to the
+	 * platform here. It's unlikely to be useful, however, as the platform
+	 * probably just cares whether the GPU is busy or not; time in
+	 * protected mode is already added to busy-time at this point, though,
+	 * so we should be good.
+	 */
+	kbase_platform_dvfs_event(kbdev, utilisation);
+#endif
 }
 
 bool kbase_pm_metrics_is_active(struct kbase_device *kbdev)
@@ -226,11 +415,20 @@ KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active);
 void kbase_pm_metrics_start(struct kbase_device *kbdev)
 {
 	unsigned long flags;
+	bool update = true;
+
+	if (unlikely(!kbdev->pm.backend.metrics.initialized))
+		return;
 
 	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
-	kbdev->pm.backend.metrics.timer_active = true;
+	if (!kbdev->pm.backend.metrics.timer_active)
+		kbdev->pm.backend.metrics.timer_active = true;
+	else
+		update = false;
 	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
-	hrtimer_start(&kbdev->pm.backend.metrics.timer,
+
+	if (update)
+		hrtimer_start(&kbdev->pm.backend.metrics.timer,
 			HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period),
 			HRTIMER_MODE_REL);
 }
@@ -238,11 +436,20 @@ void kbase_pm_metrics_start(struct kbase_device *kbdev)
 void kbase_pm_metrics_stop(struct kbase_device *kbdev)
 {
 	unsigned long flags;
+	bool update = true;
+
+	if (unlikely(!kbdev->pm.backend.metrics.initialized))
+		return;
 
 	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
-	kbdev->pm.backend.metrics.timer_active = false;
+	if (kbdev->pm.backend.metrics.timer_active)
+		kbdev->pm.backend.metrics.timer_active = false;
+	else
+		update = false;
 	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
-	hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
+
+	if (update)
+		hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
 }
 
 
@@ -273,7 +480,8 @@ static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev)
 		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
 
 		/* Head atom may have just completed, so if it isn't running
-		 * then try the next atom */
+		 * then try the next atom
+		 */
 		if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED)
 			katom = kbase_gpu_inspect(kbdev, js, 1);
 
@@ -296,7 +504,6 @@ static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev)
 		}
 	}
 }
-#endif /* !MALI_USE_CSF */
 
 /* called when job is submitted to or removed from a GPU slot */
 void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp)
@@ -313,12 +520,12 @@ void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp)
 		timestamp = &now;
 	}
 
-	/* Track how long CL and/or GL jobs have been busy for */
+	/* Track how much of time has been spent busy or idle. For JM GPUs,
+	 * this also evaluates how long CL and/or GL jobs have been busy for.
+	 */
 	kbase_pm_get_dvfs_utilisation_calc(kbdev, *timestamp);
 
-#if !MALI_USE_CSF
 	kbase_pm_metrics_active_calc(kbdev);
-#endif /* !MALI_USE_CSF */
-
 	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
 }
+#endif /* !MALI_USE_CSF */
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c
index 48b24b1c866e..1f56ae867177 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
@@ -28,6 +27,11 @@
 #include <gpu/mali_kbase_gpu_regmap.h>
 #include <mali_kbase_pm.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
+#include <mali_kbase_reset_gpu.h>
+
+#if MALI_USE_CSF && defined CONFIG_MALI_BIFROST_DEBUG
+#include <csf/mali_kbase_csf_firmware.h>
+#endif
 
 static const struct kbase_pm_policy *const all_policy_list[] = {
 #ifdef CONFIG_MALI_BIFROST_NO_MALI
@@ -45,11 +49,45 @@ static const struct kbase_pm_policy *const all_policy_list[] = {
 #endif /* CONFIG_MALI_BIFROST_NO_MALI */
 };
 
+#if MALI_USE_CSF
+void kbase_pm_policy_init(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	const struct kbase_pm_policy *default_policy = all_policy_list[0];
+
+#if defined CONFIG_MALI_BIFROST_DEBUG
+	/* Use always_on policy if module param fw_debug=1 is
+	 * passed, to aid firmware debugging.
+	 */
+	if (fw_debug)
+		default_policy = &kbase_pm_always_on_policy_ops;
+#endif
+
+
+	default_policy->init(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.pm_current_policy = default_policy;
+	kbdev->pm.backend.csf_pm_sched_flags =
+				default_policy->pm_sched_flags;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+#else /* MALI_USE_CSF */
 void kbase_pm_policy_init(struct kbase_device *kbdev)
 {
 	kbdev->pm.backend.pm_current_policy = all_policy_list[0];
+
+#if MALI_USE_CSF && defined CONFIG_MALI_BIFROST_DEBUG
+	/* Use always_on policy if module param fw_debug=1 is
+	 * passed, to aid firmware debugging.
+	 */
+	if (fw_debug)
+		kbdev->pm.backend.pm_current_policy =
+			&kbase_pm_always_on_policy_ops;
+#endif
 	kbdev->pm.backend.pm_current_policy->init(kbdev);
 }
+#endif /* MALI_USE_CSF */
 
 void kbase_pm_policy_term(struct kbase_device *kbdev)
 {
@@ -102,7 +140,8 @@ void kbase_pm_update_active(struct kbase_device *kbdev)
 		}
 	} else {
 		/* It is an error for the power policy to power off the GPU
-		 * when there are contexts active */
+		 * when there are contexts active
+		 */
 		KBASE_DEBUG_ASSERT(pm->active_count == 0);
 
 		pm->backend.poweron_required = false;
@@ -126,18 +165,20 @@ void kbase_pm_update_dynamic_cores_onoff(struct kbase_device *kbdev)
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 	lockdep_assert_held(&kbdev->pm.lock);
 
-#if MALI_USE_CSF
-	/* On CSF GPUs, Host driver isn't supposed to do the power management
-	 * for shader cores. CSF firmware will power up the cores appropriately
-	 * and so from Driver's standpoint 'shaders_desired' flag shall always
-	 * remain 0.
-	 */
-	return;
-#endif
 	if (kbdev->pm.backend.pm_current_policy == NULL)
 		return;
 	if (kbdev->pm.backend.poweroff_wait_in_progress)
 		return;
+
+#if MALI_USE_CSF
+	CSTD_UNUSED(shaders_desired);
+	/* Invoke the MCU state machine to send a request to FW for updating
+	 * the mask of shader cores that can be used for allocation of
+	 * endpoints requested by CSGs.
+	 */
+	if (kbase_pm_is_mcu_desired(kbdev))
+		kbase_pm_update_state(kbdev);
+#else
 	/* In protected transition, don't allow outside shader core request
 	 * affect transition, return directly
 	 */
@@ -149,6 +190,7 @@ void kbase_pm_update_dynamic_cores_onoff(struct kbase_device *kbdev)
 	if (shaders_desired && kbase_pm_is_l2_desired(kbdev)) {
 		kbase_pm_update_state(kbdev);
 	}
+#endif
 }
 
 void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
@@ -164,7 +206,8 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
 
 	if (kbdev->pm.backend.protected_transition_override)
 		/* We are trying to change in/out of protected mode - force all
-		 * cores off so that the L2 powers down */
+		 * cores off so that the L2 powers down
+		 */
 		shaders_desired = false;
 	else
 		shaders_desired = kbdev->pm.backend.pm_current_policy->shaders_needed(kbdev);
@@ -216,20 +259,106 @@ const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev)
 
 KBASE_EXPORT_TEST_API(kbase_pm_get_policy);
 
+#if MALI_USE_CSF
+static int policy_change_wait_for_L2_off(struct kbase_device *kbdev)
+{
+#define WAIT_DURATION_MS (3000)
+	long remaining;
+	long timeout = kbase_csf_timeout_in_jiffies(WAIT_DURATION_MS);
+	int err = 0;
+
+	/* Wait for L2 becoming off, by which the MCU is also implicitly off
+	 * since the L2 state machine would only start its power-down
+	 * sequence when the MCU is in off state. The L2 off is required
+	 * as the tiler may need to be power cycled for MCU reconfiguration
+	 * for host control of shader cores.
+	 */
+#if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE
+	remaining = wait_event_killable_timeout(
+		kbdev->pm.backend.gpu_in_desired_state_wait,
+		kbdev->pm.backend.l2_state == KBASE_L2_OFF, timeout);
+#else
+	remaining = wait_event_timeout(
+		kbdev->pm.backend.gpu_in_desired_state_wait,
+		kbdev->pm.backend.l2_state == KBASE_L2_OFF, timeout);
+#endif
+
+	if (!remaining) {
+		err = -ETIMEDOUT;
+	} else if (remaining < 0) {
+		dev_info(kbdev->dev,
+			 "Wait for L2_off got interrupted");
+		err = (int)remaining;
+	}
+
+	dev_dbg(kbdev->dev, "%s: err=%d mcu_state=%d, L2_state=%d\n", __func__,
+		err, kbdev->pm.backend.mcu_state, kbdev->pm.backend.l2_state);
+
+	return err;
+}
+#endif
+
 void kbase_pm_set_policy(struct kbase_device *kbdev,
 				const struct kbase_pm_policy *new_policy)
 {
 	const struct kbase_pm_policy *old_policy;
 	unsigned long flags;
+#if MALI_USE_CSF
+	unsigned int new_policy_csf_pm_sched_flags;
+	bool sched_suspend;
+	bool reset_gpu = false;
+#endif
 
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
 	KBASE_DEBUG_ASSERT(new_policy != NULL);
 
 	KBASE_KTRACE_ADD(kbdev, PM_SET_POLICY, NULL, new_policy->id);
 
+#if MALI_USE_CSF
+	/* Serialize calls on kbase_pm_set_policy() */
+	mutex_lock(&kbdev->pm.backend.policy_change_lock);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	/* policy_change_clamp_state_to_off, when needed, is set/cleared in
+	 * this function, a very limited temporal scope for covering the
+	 * change transition.
+	 */
+	WARN_ON(kbdev->pm.backend.policy_change_clamp_state_to_off);
+	new_policy_csf_pm_sched_flags = new_policy->pm_sched_flags;
+
+	/* Requiring the scheduler PM suspend operation when changes involving
+	 * the always_on policy, reflected by the CSF_DYNAMIC_PM_CORE_KEEP_ON
+	 * flag bit.
+	 */
+	sched_suspend = kbdev->csf.firmware_inited &&
+			(CSF_DYNAMIC_PM_CORE_KEEP_ON &
+			 (new_policy_csf_pm_sched_flags |
+			  kbdev->pm.backend.csf_pm_sched_flags));
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (sched_suspend)
+		kbase_csf_scheduler_pm_suspend(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	/* If the current active policy is always_on, one needs to clamp the
+	 * MCU/L2 for reaching off-state
+	 */
+	if (sched_suspend)
+		kbdev->pm.backend.policy_change_clamp_state_to_off =
+			CSF_DYNAMIC_PM_CORE_KEEP_ON & kbdev->pm.backend.csf_pm_sched_flags;
+
+	kbase_pm_update_state(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (sched_suspend)
+		reset_gpu = policy_change_wait_for_L2_off(kbdev);
+#endif
+
 	/* During a policy change we pretend the GPU is active */
 	/* A suspend won't happen here, because we're in a syscall from a
-	 * userspace thread */
+	 * userspace thread
+	 */
 	kbase_pm_context_active(kbdev);
 
 	kbase_pm_lock(kbdev);
@@ -250,19 +379,40 @@ void kbase_pm_set_policy(struct kbase_device *kbdev,
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	kbdev->pm.backend.pm_current_policy = new_policy;
+#if MALI_USE_CSF
+	kbdev->pm.backend.csf_pm_sched_flags = new_policy_csf_pm_sched_flags;
+	/* New policy in place, release the clamping on mcu/L2 off state */
+	kbdev->pm.backend.policy_change_clamp_state_to_off = false;
+	kbase_pm_update_state(kbdev);
+#endif
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	/* If any core power state changes were previously attempted, but
 	 * couldn't be made because the policy was changing (current_policy was
-	 * NULL), then re-try them here. */
+	 * NULL), then re-try them here.
+	 */
 	kbase_pm_update_active(kbdev);
 	kbase_pm_update_cores_state(kbdev);
 
 	kbase_pm_unlock(kbdev);
 
 	/* Now the policy change is finished, we release our fake context active
-	 * reference */
+	 * reference
+	 */
 	kbase_pm_context_idle(kbdev);
+
+#if MALI_USE_CSF
+	/* Reverse the suspension done */
+	if (reset_gpu) {
+		dev_warn(kbdev->dev, "Resorting to GPU reset for policy change\n");
+		if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
+			kbase_reset_gpu(kbdev);
+		kbase_reset_gpu_wait(kbdev);
+	} else if (sched_suspend)
+		kbase_csf_scheduler_pm_resume(kbdev);
+
+	mutex_unlock(&kbdev->pm.backend.policy_change_lock);
+#endif
 }
 
 KBASE_EXPORT_TEST_API(kbase_pm_set_policy);
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.h
index f103ef0c01e4..a513a26ac92c 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2010-2015, 2018-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2015, 2018-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_shader_states.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_shader_states.h
index 6cafaa171962..2276713d9987 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_shader_states.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_shader_states.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2018-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
@@ -26,6 +25,41 @@
  * including this header file. This header file can be included multiple
  * times in the same compilation unit with different definitions of
  * KBASEP_SHADER_STATE().
+ *
+ * @OFF_CORESTACK_OFF:                The shaders and core stacks are off
+ * @OFF_CORESTACK_PEND_ON:            The shaders are off, core stacks have been
+ *                                    requested to power on and hwcnt is being
+ *                                    disabled
+ * @PEND_ON_CORESTACK_ON:             Core stacks are on, shaders have been
+ *                                    requested to power on. Or after doing
+ *                                    partial shader on/off, checking whether
+ *                                    it's the desired state.
+ * @ON_CORESTACK_ON:                  The shaders and core stacks are on, and
+ *                                    hwcnt already enabled.
+ * @ON_CORESTACK_ON_RECHECK:          The shaders and core stacks are on, hwcnt
+ *                                    disabled, and checks to powering down or
+ *                                    re-enabling hwcnt.
+ * @WAIT_OFF_CORESTACK_ON:            The shaders have been requested to power
+ *                                    off, but they remain on for the duration
+ *                                    of the hysteresis timer
+ * @WAIT_GPU_IDLE:                    The shaders partial poweroff needs to
+ *                                    reach a state where jobs on the GPU are
+ *                                    finished including jobs currently running
+ *                                    and in the GPU queue because of
+ *                                    GPU2017-861
+ * @WAIT_FINISHED_CORESTACK_ON:       The hysteresis timer has expired
+ * @L2_FLUSHING_CORESTACK_ON:         The core stacks are on and the level 2
+ *                                    cache is being flushed.
+ * @READY_OFF_CORESTACK_ON:           The core stacks are on and the shaders are
+ *                                    ready to be powered off.
+ * @PEND_OFF_CORESTACK_ON:            The core stacks are on, and the shaders
+ *                                    have been requested to power off
+ * @OFF_CORESTACK_PEND_OFF:           The shaders are off, and the core stacks
+ *                                    have been requested to power off
+ * @OFF_CORESTACK_OFF_TIMER_PEND_OFF: Shaders and corestacks are off, but the
+ *                                    tick timer cancellation is still pending.
+ * @RESET_WAIT:                       The GPU is resetting, shader and core
+ *                                    stack power states are unknown
  */
 KBASEP_SHADER_STATE(OFF_CORESTACK_OFF)
 KBASEP_SHADER_STATE(OFF_CORESTACK_PEND_ON)
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c
index e19f53b2cbe8..7abb8e2c7b06 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2014-2016,2018-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016, 2018-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <mali_kbase.h>
@@ -67,14 +66,47 @@ void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev,
 #endif
 }
 
+#if !MALI_USE_CSF
+/**
+ * timedwait_cycle_count_active() - Timed wait till CYCLE_COUNT_ACTIVE is active
+ *
+ * @kbdev: Kbase device
+ *
+ * Return: true if CYCLE_COUNT_ACTIVE is active within the timeout.
+ */
+static bool timedwait_cycle_count_active(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_MALI_BIFROST_NO_MALI
+	return true;
+#else
+	bool success = false;
+	const unsigned int timeout = 100;
+	const unsigned long remaining = jiffies + msecs_to_jiffies(timeout);
+
+	while (time_is_after_jiffies(remaining)) {
+		if ((kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) &
+		     GPU_STATUS_CYCLE_COUNT_ACTIVE)) {
+			success = true;
+			break;
+		}
+	}
+	return success;
+#endif
+}
+#endif
+
 void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
 				u64 *system_time, struct timespec64 *ts)
 {
 #if !MALI_USE_CSF
 	kbase_pm_request_gpu_cycle_counter(kbdev);
+	WARN_ONCE(kbdev->pm.backend.l2_state != KBASE_L2_ON,
+		  "L2 not powered up");
+	WARN_ONCE((!timedwait_cycle_count_active(kbdev)),
+		  "Timed out on CYCLE_COUNT_ACTIVE");
 #endif
-	kbase_backend_get_gpu_time_norequest(
-		kbdev, cycle_counter, system_time, ts);
+	kbase_backend_get_gpu_time_norequest(kbdev, cycle_counter, system_time,
+					     ts);
 #if !MALI_USE_CSF
 	kbase_pm_release_gpu_cycle_counter(kbdev);
 #endif
diff --git a/drivers/gpu/arm/bifrost/build.bp b/drivers/gpu/arm/bifrost/build.bp
index b9b86184f3be..b1b52ffe9a1c 100644
--- a/drivers/gpu/arm/bifrost/build.bp
+++ b/drivers/gpu/arm/bifrost/build.bp
@@ -1,15 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2017-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
- * A copy of the licence is included with the program, and can also be obtained
- * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
  *
  */
 
@@ -135,8 +141,11 @@ bob_kernel_module {
     cinstr_secondary_hwc: {
         kbuild_options: ["CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY=y"],
     },
-    cinstr_secondary_hwc_via_debug_fs: {
-        kbuild_options: ["CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS=y"],
+    cinstr_tertiary_hwc: {
+        kbuild_options: ["CONFIG_MALI_PRFCNT_SET_TERTIARY=y"],
+    },
+    cinstr_hwc_set_select_via_debug_fs: {
+        kbuild_options: ["CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS=y"],
     },
     mali_2mb_alloc: {
         kbuild_options: ["CONFIG_MALI_2MB_ALLOC=y"],
@@ -158,14 +167,20 @@ bob_kernel_module {
             "jm/*.h",
             "tl/backend/*_jm.c",
             "mmu/backend/*_jm.c",
+            "ipa/backend/*_jm.c",
+            "ipa/backend/*_jm.h",
         ],
     },
     gpu_has_csf: {
+        kbuild_options: ["CONFIG_MALI_CSF_SUPPORT=y"],
         srcs: [
             "context/backend/*_csf.c",
             "csf/*.c",
             "csf/*.h",
             "csf/Kbuild",
+            "csf/ipa_control/*.c",
+            "csf/ipa_control/*.h",
+            "csf/ipa_control/Kbuild",
             "debug/backend/*_csf.c",
             "debug/backend/*_csf.h",
             "device/backend/*_csf.c",
@@ -173,6 +188,8 @@ bob_kernel_module {
             "gpu/backend/*_csf.h",
             "tl/backend/*_csf.c",
             "mmu/backend/*_csf.c",
+            "ipa/backend/*_csf.c",
+            "ipa/backend/*_csf.h",
         ],
     },
     mali_arbiter_support: {
diff --git a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c
index 7c68eb2f860a..c6602476284f 100644
--- a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c
+++ b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c
@@ -6,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -17,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
@@ -28,18 +26,17 @@
 #include <context/mali_kbase_context_internal.h>
 #include <gpu/mali_kbase_gpu_regmap.h>
 #include <mali_kbase.h>
-#include <mali_kbase_ctx_sched.h>
 #include <mali_kbase_dma_fence.h>
 #include <mali_kbase_mem_linux.h>
 #include <mali_kbase_mem_pool_group.h>
 #include <mmu/mali_kbase_mmu.h>
 #include <tl/mali_kbase_timeline.h>
-#include <tl/mali_kbase_tracepoints.h>
 
 #ifdef CONFIG_DEBUG_FS
 #include <csf/mali_kbase_csf_csg_debugfs.h>
 #include <csf/mali_kbase_csf_kcpu_debugfs.h>
 #include <csf/mali_kbase_csf_tiler_heap_debugfs.h>
+#include <csf/mali_kbase_csf_cpu_queue_debugfs.h>
 #include <mali_kbase_debug_mem_view.h>
 #include <mali_kbase_mem_pool_debugfs.h>
 
@@ -51,6 +48,7 @@ void kbase_context_debugfs_init(struct kbase_context *const kctx)
 	kbase_csf_queue_group_debugfs_init(kctx);
 	kbase_csf_kcpu_debugfs_init(kctx);
 	kbase_csf_tiler_heap_debugfs_init(kctx);
+	kbase_csf_cpu_queue_debugfs_init(kctx);
 }
 KBASE_EXPORT_SYMBOL(kbase_context_debugfs_init);
 
@@ -73,24 +71,34 @@ void kbase_context_debugfs_term(struct kbase_context *const kctx)
 KBASE_EXPORT_SYMBOL(kbase_context_debugfs_term);
 #endif /* CONFIG_DEBUG_FS */
 
+static void kbase_context_free(struct kbase_context *kctx)
+{
+	kbase_timeline_post_kbase_context_destroy(kctx);
+
+	vfree(kctx);
+}
+
 static const struct kbase_context_init context_init[] = {
-	{kbase_context_common_init, kbase_context_common_term, NULL},
-	{kbase_context_mem_pool_group_init, kbase_context_mem_pool_group_term,
-			"Memory pool goup initialization failed"},
-	{kbase_mem_evictable_init, kbase_mem_evictable_deinit,
-			"Memory evictable initialization failed"},
-	{kbase_context_mmu_init, kbase_context_mmu_term,
-			"MMU initialization failed"},
-	{kbase_context_mem_alloc_page, kbase_context_mem_pool_free,
-			"Memory alloc page failed"},
-	{kbase_region_tracker_init, kbase_region_tracker_term,
-			"Region tracker initialization failed"},
-	{kbase_sticky_resource_init, kbase_context_sticky_resource_term,
-			"Sticky resource initialization failed"},
-	{kbase_jit_init, kbase_jit_term,
-			"JIT initialization failed"},
-	{kbase_csf_ctx_init, kbase_csf_ctx_term,
-			"CSF context initialization failed"},
+	{ NULL, kbase_context_free, NULL },
+	{ kbase_context_common_init, kbase_context_common_term,
+	  "Common context initialization failed" },
+	{ kbase_context_mem_pool_group_init, kbase_context_mem_pool_group_term,
+	  "Memory pool group initialization failed" },
+	{ kbase_mem_evictable_init, kbase_mem_evictable_deinit,
+	  "Memory evictable initialization failed" },
+	{ kbase_context_mmu_init, kbase_context_mmu_term,
+	  "MMU initialization failed" },
+	{ kbase_context_mem_alloc_page, kbase_context_mem_pool_free,
+	  "Memory alloc page failed" },
+	{ kbase_region_tracker_init, kbase_region_tracker_term,
+	  "Region tracker initialization failed" },
+	{ kbase_sticky_resource_init, kbase_context_sticky_resource_term,
+	  "Sticky resource initialization failed" },
+	{ kbase_jit_init, kbase_jit_term, "JIT initialization failed" },
+	{ kbase_csf_ctx_init, kbase_csf_ctx_term,
+	  "CSF context initialization failed" },
+	{ kbase_context_add_to_dev_list, kbase_context_remove_from_dev_list,
+	  "Adding kctx to device failed" },
 };
 
 static void kbase_context_term_partial(
@@ -134,14 +142,23 @@ struct kbase_context *kbase_create_context(struct kbase_device *kbdev,
 #if defined(CONFIG_64BIT)
 	else
 		kbase_ctx_flag_set(kctx, KCTX_FORCE_SAME_VA);
-#endif /* !defined(CONFIG_64BIT) */
+#endif /* defined(CONFIG_64BIT) */
 
 	for (i = 0; i < ARRAY_SIZE(context_init); i++) {
-		int err = context_init[i].init(kctx);
+		int err = 0;
+
+		if (context_init[i].init)
+			err = context_init[i].init(kctx);
 
 		if (err) {
 			dev_err(kbdev->dev, "%s error = %d\n",
 						context_init[i].err_mes, err);
+
+			/* kctx should be freed by kbase_context_free().
+			 * Otherwise it will result in memory leak.
+			 */
+			WARN_ON(i == 0);
+
 			kbase_context_term_partial(kctx, i);
 			return NULL;
 		}
@@ -162,11 +179,18 @@ void kbase_destroy_context(struct kbase_context *kctx)
 	if (WARN_ON(!kbdev))
 		return;
 
-	/* Ensure the core is powered up for the destroy process
-	 * A suspend won't happen here, because we're in a syscall
-	 * from a userspace thread.
+	/* Context termination could happen whilst the system suspend of
+	 * the GPU device is ongoing or has completed. It has been seen on
+	 * Customer side that a hang could occur if context termination is
+	 * not blocked until the resume of GPU device.
 	 */
-	kbase_pm_context_active(kbdev);
+	while (kbase_pm_context_active_handle_suspend(
+		kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) {
+		dev_info(kbdev->dev,
+			 "Suspend in progress when destroying context");
+		wait_event(kbdev->pm.resume_wait,
+			   !kbase_pm_is_suspending(kbdev));
+	}
 
 	kbase_mem_pool_group_mark_dying(&kctx->mem_pools);
 
diff --git a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c
index 0eb42589fe46..d74e31fe8ca4 100644
--- a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c
+++ b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c
@@ -1,12 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -17,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
@@ -35,7 +33,6 @@
 #include <mali_kbase_mem_pool_group.h>
 #include <mmu/mali_kbase_mmu.h>
 #include <tl/mali_kbase_timeline.h>
-#include <tl/mali_kbase_tracepoints.h>
 
 #ifdef CONFIG_DEBUG_FS
 #include <mali_kbase_debug_mem_view.h>
@@ -47,14 +44,12 @@ void kbase_context_debugfs_init(struct kbase_context *const kctx)
 	kbase_mem_pool_debugfs_init(kctx->kctx_dentry, kctx);
 	kbase_jit_debugfs_init(kctx);
 	kbasep_jd_debugfs_ctx_init(kctx);
-	kbase_debug_job_fault_context_init(kctx);
 }
 KBASE_EXPORT_SYMBOL(kbase_context_debugfs_init);
 
 void kbase_context_debugfs_term(struct kbase_context *const kctx)
 {
 	debugfs_remove_recursive(kctx->kctx_dentry);
-	kbase_debug_job_fault_context_term(kctx);
 }
 KBASE_EXPORT_SYMBOL(kbase_context_debugfs_term);
 #else
@@ -73,12 +68,7 @@ KBASE_EXPORT_SYMBOL(kbase_context_debugfs_term);
 
 static int kbase_context_kbase_kinstr_jm_init(struct kbase_context *kctx)
 {
-	int ret = kbase_kinstr_jm_init(&kctx->kinstr_jm);
-
-	if (!ret)
-		return ret;
-
-	return 0;
+	return kbase_kinstr_jm_init(&kctx->kinstr_jm);
 }
 
 static void kbase_context_kbase_kinstr_jm_term(struct kbase_context *kctx)
@@ -114,12 +104,27 @@ static int kbase_context_submit_check(struct kbase_context *kctx)
 	return 0;
 }
 
+static void kbase_context_flush_jobs(struct kbase_context *kctx)
+{
+	kbase_jd_zap_context(kctx);
+	flush_workqueue(kctx->jctx.job_done_wq);
+}
+
+static void kbase_context_free(struct kbase_context *kctx)
+{
+	kbase_timeline_post_kbase_context_destroy(kctx);
+
+	vfree(kctx);
+}
+
 static const struct kbase_context_init context_init[] = {
-	{ kbase_context_common_init, kbase_context_common_term, NULL },
+	{ NULL, kbase_context_free, NULL },
+	{ kbase_context_common_init, kbase_context_common_term,
+	  "Common context initialization failed" },
 	{ kbase_dma_fence_init, kbase_dma_fence_term,
 	  "DMA fence initialization failed" },
 	{ kbase_context_mem_pool_group_init, kbase_context_mem_pool_group_term,
-	  "Memory pool goup initialization failed" },
+	  "Memory pool group initialization failed" },
 	{ kbase_mem_evictable_init, kbase_mem_evictable_deinit,
 	  "Memory evictable initialization failed" },
 	{ kbase_context_mmu_init, kbase_context_mmu_term,
@@ -134,13 +139,22 @@ static const struct kbase_context_init context_init[] = {
 	{ kbase_context_kbase_kinstr_jm_init,
 	  kbase_context_kbase_kinstr_jm_term,
 	  "JM instrumentation initialization failed" },
-	{ kbase_context_kbase_timer_setup, NULL, NULL },
+	{ kbase_context_kbase_timer_setup, NULL,
+	  "Timers initialization failed" },
 	{ kbase_event_init, kbase_event_cleanup,
 	  "Event initialization failed" },
 	{ kbasep_js_kctx_init, kbasep_js_kctx_term,
 	  "JS kctx initialization failed" },
 	{ kbase_jd_init, kbase_jd_exit, "JD initialization failed" },
-	{ kbase_context_submit_check, NULL, NULL },
+	{ kbase_context_submit_check, NULL, "Enabling job submission failed" },
+#ifdef CONFIG_DEBUG_FS
+	{ kbase_debug_job_fault_context_init,
+	  kbase_debug_job_fault_context_term,
+	  "Job fault context initialization failed" },
+#endif
+	{ NULL, kbase_context_flush_jobs, NULL },
+	{ kbase_context_add_to_dev_list, kbase_context_remove_from_dev_list,
+	  "Adding kctx to device failed" },
 };
 
 static void kbase_context_term_partial(
@@ -184,14 +198,23 @@ struct kbase_context *kbase_create_context(struct kbase_device *kbdev,
 #if defined(CONFIG_64BIT)
 	else
 		kbase_ctx_flag_set(kctx, KCTX_FORCE_SAME_VA);
-#endif /* !defined(CONFIG_64BIT) */
+#endif /* defined(CONFIG_64BIT) */
 
 	for (i = 0; i < ARRAY_SIZE(context_init); i++) {
-		int err = context_init[i].init(kctx);
+		int err = 0;
+
+		if (context_init[i].init)
+			err = context_init[i].init(kctx);
 
 		if (err) {
 			dev_err(kbdev->dev, "%s error = %d\n",
 						context_init[i].err_mes, err);
+
+			/* kctx should be freed by kbase_context_free().
+			 * Otherwise it will result in memory leak.
+			 */
+			WARN_ON(i == 0);
+
 			kbase_context_term_partial(kctx, i);
 			return NULL;
 		}
@@ -212,17 +235,27 @@ void kbase_destroy_context(struct kbase_context *kctx)
 	if (WARN_ON(!kbdev))
 		return;
 
-	/* Ensure the core is powered up for the destroy process
-	 * A suspend won't happen here, because we're in a syscall
-	 * from a userspace thread.
+	/* Context termination could happen whilst the system suspend of
+	 * the GPU device is ongoing or has completed. It has been seen on
+	 * Customer side that a hang could occur if context termination is
+	 * not blocked until the resume of GPU device.
 	 */
-	kbase_pm_context_active(kbdev);
+#ifdef CONFIG_MALI_ARBITER_SUPPORT
+	atomic_inc(&kbdev->pm.gpu_users_waiting);
+#endif /* CONFIG_MALI_ARBITER_SUPPORT */
+	while (kbase_pm_context_active_handle_suspend(
+		kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) {
+		dev_dbg(kbdev->dev,
+			 "Suspend in progress when destroying context");
+		wait_event(kbdev->pm.resume_wait,
+			   !kbase_pm_is_suspending(kbdev));
+	}
+#ifdef CONFIG_MALI_ARBITER_SUPPORT
+	atomic_dec(&kbdev->pm.gpu_users_waiting);
+#endif /* CONFIG_MALI_ARBITER_SUPPORT */
 
 	kbase_mem_pool_group_mark_dying(&kctx->mem_pools);
 
-	kbase_jd_zap_context(kctx);
-	flush_workqueue(kctx->jctx.job_done_wq);
-
 	kbase_context_term_partial(kctx, ARRAY_SIZE(context_init));
 
 	kbase_pm_context_idle(kbdev);
diff --git a/drivers/gpu/arm/bifrost/context/mali_kbase_context.c b/drivers/gpu/arm/bifrost/context/mali_kbase_context.c
index 83182f983467..fa4970be05ce 100644
--- a/drivers/gpu/arm/bifrost/context/mali_kbase_context.c
+++ b/drivers/gpu/arm/bifrost/context/mali_kbase_context.c
@@ -6,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -17,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
@@ -28,10 +26,8 @@
 #include <mali_kbase.h>
 #include <gpu/mali_kbase_gpu_regmap.h>
 #include <mali_kbase_mem_linux.h>
-#include <mali_kbase_dma_fence.h>
 #include <mali_kbase_ctx_sched.h>
 #include <mali_kbase_mem_pool_group.h>
-#include <tl/mali_kbase_tracepoints.h>
 #include <tl/mali_kbase_timeline.h>
 #include <mmu/mali_kbase_mmu.h>
 #include <context/mali_kbase_context_internal.h>
@@ -170,22 +166,49 @@ int kbase_context_common_init(struct kbase_context *kctx)
 	mutex_init(&kctx->legacy_hwcnt_lock);
 
 	mutex_lock(&kctx->kbdev->kctx_list_lock);
-	list_add(&kctx->kctx_list_link, &kctx->kbdev->kctx_list);
 
 	err = kbase_insert_kctx_to_process(kctx);
 	if (err)
 		dev_err(kctx->kbdev->dev,
 		"(err:%d) failed to insert kctx to kbase_process\n", err);
 
-	KBASE_TLSTREAM_TL_KBASE_NEW_CTX(kctx->kbdev, kctx->id,
-		kctx->kbdev->gpu_props.props.raw_props.gpu_id);
-	KBASE_TLSTREAM_TL_NEW_CTX(kctx->kbdev, kctx, kctx->id,
-			(u32)(kctx->tgid));
 	mutex_unlock(&kctx->kbdev->kctx_list_lock);
 
 	return err;
 }
 
+int kbase_context_add_to_dev_list(struct kbase_context *kctx)
+{
+	if (WARN_ON(!kctx))
+		return -EINVAL;
+
+	if (WARN_ON(!kctx->kbdev))
+		return -EINVAL;
+
+	mutex_lock(&kctx->kbdev->kctx_list_lock);
+	list_add(&kctx->kctx_list_link, &kctx->kbdev->kctx_list);
+	mutex_unlock(&kctx->kbdev->kctx_list_lock);
+
+	kbase_timeline_post_kbase_context_create(kctx);
+
+	return 0;
+}
+
+void kbase_context_remove_from_dev_list(struct kbase_context *kctx)
+{
+	if (WARN_ON(!kctx))
+		return;
+
+	if (WARN_ON(!kctx->kbdev))
+		return;
+
+	kbase_timeline_pre_kbase_context_destroy(kctx);
+
+	mutex_lock(&kctx->kbdev->kctx_list_lock);
+	list_del_init(&kctx->kctx_list_link);
+	mutex_unlock(&kctx->kbdev->kctx_list_lock);
+}
+
 /**
  * kbase_remove_kctx_from_process - remove a terminating context from
  *                                    the process list.
@@ -238,24 +261,9 @@ void kbase_context_common_term(struct kbase_context *kctx)
 
 	mutex_lock(&kctx->kbdev->kctx_list_lock);
 	kbase_remove_kctx_from_process(kctx);
-
-	KBASE_TLSTREAM_TL_KBASE_DEL_CTX(kctx->kbdev, kctx->id);
-
-	KBASE_TLSTREAM_TL_DEL_CTX(kctx->kbdev, kctx);
-	list_del(&kctx->kctx_list_link);
 	mutex_unlock(&kctx->kbdev->kctx_list_lock);
 
 	KBASE_KTRACE_ADD(kctx->kbdev, CORE_CTX_DESTROY, kctx, 0u);
-
-	/* Flush the timeline stream, so the user can see the termination
-	 * tracepoints being fired.
-	 * The "if" statement below is for optimization. It is safe to call
-	 * kbase_timeline_streams_flush when timeline is disabled.
-	 */
-	if (atomic_read(&kctx->kbdev->timeline_flags) != 0)
-		kbase_timeline_streams_flush(kctx->kbdev->timeline);
-
-	vfree(kctx);
 }
 
 int kbase_context_mem_pool_group_init(struct kbase_context *kctx)
@@ -273,11 +281,9 @@ void kbase_context_mem_pool_group_term(struct kbase_context *kctx)
 
 int kbase_context_mmu_init(struct kbase_context *kctx)
 {
-	kbase_mmu_init(kctx->kbdev,
-		&kctx->mmu, kctx,
+	return kbase_mmu_init(
+		kctx->kbdev, &kctx->mmu, kctx,
 		base_context_mmu_group_id_get(kctx->create_flags));
-
-	return 0;
 }
 
 void kbase_context_mmu_term(struct kbase_context *kctx)
diff --git a/drivers/gpu/arm/bifrost/context/mali_kbase_context.h b/drivers/gpu/arm/bifrost/context/mali_kbase_context.h
index e4ed8944bdd2..f18f64b172be 100644
--- a/drivers/gpu/arm/bifrost/context/mali_kbase_context.h
+++ b/drivers/gpu/arm/bifrost/context/mali_kbase_context.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2017, 2019-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,18 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
- *//* SPDX-License-Identifier: GPL-2.0 */
-/*
- *
- * (C) COPYRIGHT 2011-2017, 2019 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
  */
 
 #ifndef _KBASE_CONTEXT_H_
@@ -117,25 +106,7 @@ static inline bool kbase_ctx_flag(struct kbase_context *kctx,
 static inline void kbase_ctx_flag_clear(struct kbase_context *kctx,
 					enum kbase_context_flags flag)
 {
-#if KERNEL_VERSION(4, 3, 0) > LINUX_VERSION_CODE
-	/*
-	 * Earlier kernel versions doesn't have atomic_andnot() or
-	 * atomic_and(). atomic_clear_mask() was only available on some
-	 * architectures and removed on arm in v3.13 on arm and arm64.
-	 *
-	 * Use a compare-exchange loop to clear the flag on pre 4.3 kernels,
-	 * when atomic_andnot() becomes available.
-	 */
-	int old, new;
-
-	do {
-		old = atomic_read(&kctx->flags);
-		new = old & ~flag;
-
-	} while (atomic_cmpxchg(&kctx->flags, old, new) != old);
-#else
 	atomic_andnot(flag, &kctx->flags);
-#endif
 }
 
 /**
diff --git a/drivers/gpu/arm/bifrost/context/mali_kbase_context_internal.h b/drivers/gpu/arm/bifrost/context/mali_kbase_context_internal.h
index 818cdbea960d..fd009b8ebc89 100644
--- a/drivers/gpu/arm/bifrost/context/mali_kbase_context_internal.h
+++ b/drivers/gpu/arm/bifrost/context/mali_kbase_context_internal.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,16 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
- *//* SPDX-License-Identifier: GPL-2.0 */
-/*
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
  */
 
 #include <mali_kbase.h>
@@ -58,3 +49,6 @@ int kbase_context_mem_alloc_page(struct kbase_context *kctx);
 void kbase_context_mem_pool_free(struct kbase_context *kctx);
 
 void kbase_context_sticky_resource_term(struct kbase_context *kctx);
+
+int kbase_context_add_to_dev_list(struct kbase_context *kctx);
+void kbase_context_remove_from_dev_list(struct kbase_context *kctx);
diff --git a/drivers/gpu/arm/bifrost/csf/Kbuild b/drivers/gpu/arm/bifrost/csf/Kbuild
index bb61811e6c85..d65b75f32525 100644
--- a/drivers/gpu/arm/bifrost/csf/Kbuild
+++ b/drivers/gpu/arm/bifrost/csf/Kbuild
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,11 +16,9 @@
 # along with this program; if not, you can access it online at
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
-# SPDX-License-Identifier: GPL-2.0
-#
 #
 
-mali_kbase-y += \
+bifrost_kbase-y += \
 	csf/mali_kbase_csf_firmware_cfg.o \
 	csf/mali_kbase_csf_trace_buffer.o \
 	csf/mali_kbase_csf.o \
@@ -33,8 +32,11 @@ mali_kbase-y += \
 	csf/mali_kbase_csf_csg_debugfs.o \
 	csf/mali_kbase_csf_kcpu_debugfs.o \
 	csf/mali_kbase_csf_protected_memory.o \
-	csf/mali_kbase_csf_tiler_heap_debugfs.o
+	csf/mali_kbase_csf_tiler_heap_debugfs.o \
+	csf/mali_kbase_csf_cpu_queue_debugfs.o
 
-mali_kbase-$(CONFIG_MALI_REAL_HW) += csf/mali_kbase_csf_firmware.o
+bifrost_kbase-$(CONFIG_MALI_REAL_HW) += csf/mali_kbase_csf_firmware.o
 
-mali_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += csf/mali_kbase_csf_firmware_no_mali.o
+bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += csf/mali_kbase_csf_firmware_no_mali.o
+
+include $(src)/csf/ipa_control/Kbuild
diff --git a/drivers/gpu/arm/bifrost/csf/ipa_control/Kbuild b/drivers/gpu/arm/bifrost/csf/ipa_control/Kbuild
new file mode 100644
index 000000000000..222e0c803572
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/csf/ipa_control/Kbuild
@@ -0,0 +1,22 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU license.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+#
+
+bifrost_kbase-y += \
+	csf/ipa_control/mali_kbase_csf_ipa_control.o
\ No newline at end of file
diff --git a/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.c b/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.c
new file mode 100644
index 000000000000..7b0845c0d78c
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.c
@@ -0,0 +1,925 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include <mali_kbase.h>
+#include "mali_kbase_clk_rate_trace_mgr.h"
+#include "mali_kbase_csf_ipa_control.h"
+
+/*
+ * Status flags from the STATUS register of the IPA Control interface.
+ */
+#define STATUS_COMMAND_ACTIVE ((u32)1 << 0)
+#define STATUS_TIMER_ACTIVE ((u32)1 << 1)
+#define STATUS_AUTO_ACTIVE ((u32)1 << 2)
+#define STATUS_PROTECTED_MODE ((u32)1 << 8)
+#define STATUS_RESET ((u32)1 << 9)
+#define STATUS_TIMER_ENABLED ((u32)1 << 31)
+
+/*
+ * Commands for the COMMAND register of the IPA Control interface.
+ */
+#define COMMAND_NOP ((u32)0)
+#define COMMAND_APPLY ((u32)1)
+#define COMMAND_CLEAR ((u32)2)
+#define COMMAND_SAMPLE ((u32)3)
+#define COMMAND_PROTECTED_ACK ((u32)4)
+#define COMMAND_RESET_ACK ((u32)5)
+
+/**
+ * Default value for the TIMER register of the IPA Control interface,
+ * expressed in milliseconds.
+ *
+ * The chosen value is a trade off between two requirements: the IPA Control
+ * interface should sample counters with a resolution in the order of
+ * milliseconds, while keeping GPU overhead as limited as possible.
+ */
+#define TIMER_DEFAULT_VALUE_MS ((u32)10) /* 10 milliseconds */
+
+/**
+ * Number of timer events per second.
+ */
+#define TIMER_EVENTS_PER_SECOND ((u32)1000 / TIMER_DEFAULT_VALUE_MS)
+
+/**
+ * Maximum number of loops polling the GPU before we assume the GPU has hung.
+ */
+#define IPA_INACTIVE_MAX_LOOPS ((unsigned int)8000000)
+
+/**
+ * Number of bits used to configure a performance counter in SELECT registers.
+ */
+#define IPA_CONTROL_SELECT_BITS_PER_CNT ((u64)8)
+
+/**
+ * Maximum value of a performance counter.
+ */
+#define MAX_PRFCNT_VALUE (((u64)1 << 48) - 1)
+
+/**
+ * struct kbase_ipa_control_listener_data - Data for the GPU clock frequency
+ *                                          listener
+ *
+ * @listener: GPU clock frequency listener.
+ * @kbdev:    Pointer to kbase device.
+ */
+struct kbase_ipa_control_listener_data {
+	struct kbase_clk_rate_listener listener;
+	struct kbase_device *kbdev;
+};
+
+static u32 timer_value(u32 gpu_rate)
+{
+	return gpu_rate / TIMER_EVENTS_PER_SECOND;
+}
+
+static int wait_status(struct kbase_device *kbdev, u32 flags)
+{
+	unsigned int max_loops = IPA_INACTIVE_MAX_LOOPS;
+	u32 status = kbase_reg_read(kbdev, IPA_CONTROL_REG(STATUS));
+
+	/*
+	 * Wait for the STATUS register to indicate that flags have been
+	 * cleared, in case a transition is pending.
+	 */
+	while (--max_loops && (status & flags))
+		status = kbase_reg_read(kbdev, IPA_CONTROL_REG(STATUS));
+	if (max_loops == 0) {
+		dev_err(kbdev->dev, "IPA_CONTROL STATUS register stuck");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int apply_select_config(struct kbase_device *kbdev, u64 *select)
+{
+	int ret;
+
+	u32 select_cshw_lo = (u32)(select[KBASE_IPA_CORE_TYPE_CSHW] & U32_MAX);
+	u32 select_cshw_hi =
+		(u32)((select[KBASE_IPA_CORE_TYPE_CSHW] >> 32) & U32_MAX);
+	u32 select_memsys_lo =
+		(u32)(select[KBASE_IPA_CORE_TYPE_MEMSYS] & U32_MAX);
+	u32 select_memsys_hi =
+		(u32)((select[KBASE_IPA_CORE_TYPE_MEMSYS] >> 32) & U32_MAX);
+	u32 select_tiler_lo =
+		(u32)(select[KBASE_IPA_CORE_TYPE_TILER] & U32_MAX);
+	u32 select_tiler_hi =
+		(u32)((select[KBASE_IPA_CORE_TYPE_TILER] >> 32) & U32_MAX);
+	u32 select_shader_lo =
+		(u32)(select[KBASE_IPA_CORE_TYPE_SHADER] & U32_MAX);
+	u32 select_shader_hi =
+		(u32)((select[KBASE_IPA_CORE_TYPE_SHADER] >> 32) & U32_MAX);
+
+	kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_LO), select_cshw_lo);
+	kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_HI), select_cshw_hi);
+	kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_MEMSYS_LO),
+			select_memsys_lo);
+	kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_MEMSYS_HI),
+			select_memsys_hi);
+	kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_TILER_LO),
+			select_tiler_lo);
+	kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_TILER_HI),
+			select_tiler_hi);
+	kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_SHADER_LO),
+			select_shader_lo);
+	kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_SHADER_HI),
+			select_shader_hi);
+
+	ret = wait_status(kbdev, STATUS_COMMAND_ACTIVE);
+
+	if (!ret)
+		kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), COMMAND_APPLY);
+
+	return ret;
+}
+
+static u64 read_value_cnt(struct kbase_device *kbdev, u8 type, int select_idx)
+{
+	u32 value_lo, value_hi;
+
+	switch (type) {
+	case KBASE_IPA_CORE_TYPE_CSHW:
+		value_lo = kbase_reg_read(
+			kbdev, IPA_CONTROL_REG(VALUE_CSHW_REG_LO(select_idx)));
+		value_hi = kbase_reg_read(
+			kbdev, IPA_CONTROL_REG(VALUE_CSHW_REG_HI(select_idx)));
+		break;
+	case KBASE_IPA_CORE_TYPE_MEMSYS:
+		value_lo = kbase_reg_read(
+			kbdev,
+			IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(select_idx)));
+		value_hi = kbase_reg_read(
+			kbdev,
+			IPA_CONTROL_REG(VALUE_MEMSYS_REG_HI(select_idx)));
+		break;
+	case KBASE_IPA_CORE_TYPE_TILER:
+		value_lo = kbase_reg_read(
+			kbdev, IPA_CONTROL_REG(VALUE_TILER_REG_LO(select_idx)));
+		value_hi = kbase_reg_read(
+			kbdev, IPA_CONTROL_REG(VALUE_TILER_REG_HI(select_idx)));
+		break;
+	case KBASE_IPA_CORE_TYPE_SHADER:
+		value_lo = kbase_reg_read(
+			kbdev,
+			IPA_CONTROL_REG(VALUE_SHADER_REG_LO(select_idx)));
+		value_hi = kbase_reg_read(
+			kbdev,
+			IPA_CONTROL_REG(VALUE_SHADER_REG_HI(select_idx)));
+		break;
+	default:
+		WARN(1, "Unknown core type: %u\n", type);
+		value_lo = value_hi = 0;
+		break;
+	}
+
+	return (((u64)value_hi << 32) | value_lo);
+}
+
+static void build_select_config(struct kbase_ipa_control *ipa_ctrl,
+				u64 *select_config)
+{
+	size_t i;
+
+	for (i = 0; i < KBASE_IPA_CORE_TYPE_NUM; i++) {
+		size_t j;
+
+		select_config[i] = 0ULL;
+
+		for (j = 0; j < KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS; j++) {
+			struct kbase_ipa_control_prfcnt_config *prfcnt_config =
+				&ipa_ctrl->blocks[i].select[j];
+
+			select_config[i] |=
+				((u64)prfcnt_config->idx
+				 << (IPA_CONTROL_SELECT_BITS_PER_CNT * j));
+		}
+	}
+}
+
+static inline void calc_prfcnt_delta(struct kbase_device *kbdev,
+				     struct kbase_ipa_control_prfcnt *prfcnt,
+				     bool gpu_ready)
+{
+	u64 delta_value, raw_value;
+
+	if (gpu_ready)
+		raw_value = read_value_cnt(kbdev, (u8)prfcnt->type,
+					   prfcnt->select_idx);
+	else
+		raw_value = prfcnt->latest_raw_value;
+
+	if (raw_value < prfcnt->latest_raw_value) {
+		delta_value = (MAX_PRFCNT_VALUE - prfcnt->latest_raw_value) +
+			      raw_value;
+	} else {
+		delta_value = raw_value - prfcnt->latest_raw_value;
+	}
+
+	delta_value *= prfcnt->scaling_factor;
+
+	if (!WARN_ON_ONCE(kbdev->csf.ipa_control.cur_gpu_rate == 0))
+		if (prfcnt->gpu_norm)
+			delta_value /= kbdev->csf.ipa_control.cur_gpu_rate;
+
+	prfcnt->latest_raw_value = raw_value;
+
+	/* Accumulate the difference */
+	prfcnt->accumulated_diff += delta_value;
+}
+
+/**
+ * kbase_ipa_control_rate_change_notify - GPU frequency change callback
+ *
+ * @listener:     Clock frequency change listener.
+ * @clk_index:    Index of the clock for which the change has occurred.
+ * @clk_rate_hz:  Clock frequency(Hz).
+ *
+ * This callback notifies kbase_ipa_control about GPU frequency changes.
+ * Only top-level clock changes are meaningful. GPU frequency updates
+ * affect all performance counters which require GPU normalization
+ * in every session.
+ */
+static void
+kbase_ipa_control_rate_change_notify(struct kbase_clk_rate_listener *listener,
+				     u32 clk_index, u32 clk_rate_hz)
+{
+	if ((clk_index == KBASE_CLOCK_DOMAIN_TOP) && (clk_rate_hz != 0)) {
+		size_t i;
+		unsigned long flags;
+		struct kbase_ipa_control_listener_data *listener_data =
+			container_of(listener,
+				     struct kbase_ipa_control_listener_data,
+				     listener);
+		struct kbase_device *kbdev = listener_data->kbdev;
+		struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control;
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		if (!kbdev->pm.backend.gpu_ready) {
+			dev_err(kbdev->dev,
+				"%s: GPU frequency cannot change while GPU is off",
+				__func__);
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			return;
+		}
+
+		/* Interrupts are already disabled and interrupt state is also saved */
+		spin_lock(&ipa_ctrl->lock);
+
+		for (i = 0; i < ipa_ctrl->num_active_sessions; i++) {
+			size_t j;
+			struct kbase_ipa_control_session *session = &ipa_ctrl->sessions[i];
+
+			for (j = 0; j < session->num_prfcnts; j++) {
+				struct kbase_ipa_control_prfcnt *prfcnt =
+					&session->prfcnts[j];
+
+				if (prfcnt->gpu_norm)
+					calc_prfcnt_delta(kbdev, prfcnt, true);
+			 }
+		}
+
+		ipa_ctrl->cur_gpu_rate = clk_rate_hz;
+
+		/* Update the timer for automatic sampling if active sessions
+		 * are present. Counters have already been manually sampled.
+		 */
+		if (ipa_ctrl->num_active_sessions > 0) {
+			kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER),
+					timer_value(ipa_ctrl->cur_gpu_rate));
+		}
+
+		spin_unlock(&ipa_ctrl->lock);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+}
+
+void kbase_ipa_control_init(struct kbase_device *kbdev)
+{
+	struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control;
+	struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm;
+	struct kbase_ipa_control_listener_data *listener_data;
+	size_t i, j;
+
+	for (i = 0; i < KBASE_IPA_CORE_TYPE_NUM; i++) {
+		for (j = 0; j < KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS; j++) {
+			ipa_ctrl->blocks[i].select[j].idx = 0;
+			ipa_ctrl->blocks[i].select[j].refcount = 0;
+		}
+		ipa_ctrl->blocks[i].num_available_counters =
+			KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS;
+	}
+
+	spin_lock_init(&ipa_ctrl->lock);
+	ipa_ctrl->num_active_sessions = 0;
+	for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) {
+		ipa_ctrl->sessions[i].active = false;
+	}
+
+	listener_data = kmalloc(sizeof(struct kbase_ipa_control_listener_data),
+				GFP_KERNEL);
+	if (listener_data) {
+		listener_data->listener.notify =
+			kbase_ipa_control_rate_change_notify;
+		listener_data->kbdev = kbdev;
+		ipa_ctrl->rtm_listener_data = listener_data;
+	}
+
+	spin_lock(&clk_rtm->lock);
+	if (clk_rtm->clks[KBASE_CLOCK_DOMAIN_TOP])
+		ipa_ctrl->cur_gpu_rate =
+			clk_rtm->clks[KBASE_CLOCK_DOMAIN_TOP]->clock_val;
+	if (listener_data)
+		kbase_clk_rate_trace_manager_subscribe_no_lock(
+			clk_rtm, &listener_data->listener);
+	spin_unlock(&clk_rtm->lock);
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_control_init);
+
+void kbase_ipa_control_term(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm;
+	struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control;
+	struct kbase_ipa_control_listener_data *listener_data =
+		ipa_ctrl->rtm_listener_data;
+
+	WARN_ON(ipa_ctrl->num_active_sessions);
+
+	if (listener_data)
+		kbase_clk_rate_trace_manager_unsubscribe(clk_rtm, &listener_data->listener);
+	kfree(ipa_ctrl->rtm_listener_data);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	if (kbdev->pm.backend.gpu_powered)
+		kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER), 0);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_control_term);
+
+int kbase_ipa_control_register(
+	struct kbase_device *kbdev,
+	const struct kbase_ipa_control_perf_counter *perf_counters,
+	size_t num_counters, void **client)
+{
+	int ret = 0;
+	size_t i, session_idx, req_counters[KBASE_IPA_CORE_TYPE_NUM];
+	bool already_configured[KBASE_IPA_CONTROL_MAX_COUNTERS];
+	bool new_config = false;
+	struct kbase_ipa_control *ipa_ctrl;
+	struct kbase_ipa_control_session *session = NULL;
+	unsigned long flags;
+
+	if (WARN_ON(kbdev == NULL) || WARN_ON(perf_counters == NULL) ||
+	    WARN_ON(client == NULL) ||
+	    WARN_ON(num_counters > KBASE_IPA_CONTROL_MAX_COUNTERS)) {
+		dev_err(kbdev->dev, "%s: wrong input arguments", __func__);
+		return -EINVAL;
+	}
+
+	kbase_pm_context_active(kbdev);
+
+	ipa_ctrl = &kbdev->csf.ipa_control;
+	spin_lock_irqsave(&ipa_ctrl->lock, flags);
+
+	if (ipa_ctrl->num_active_sessions == KBASE_IPA_CONTROL_MAX_SESSIONS) {
+		dev_err(kbdev->dev, "%s: too many sessions", __func__);
+		ret = -EBUSY;
+		goto exit;
+	}
+
+	for (i = 0; i < KBASE_IPA_CORE_TYPE_NUM; i++)
+		req_counters[i] = 0;
+
+	/*
+	 * Count how many counters would need to be configured in order to
+	 * satisfy the request. Requested counters which happen to be already
+	 * configured can be skipped.
+	 */
+	for (i = 0; i < num_counters; i++) {
+		size_t j;
+		enum kbase_ipa_core_type type = perf_counters[i].type;
+		u8 idx = perf_counters[i].idx;
+
+		if ((type >= KBASE_IPA_CORE_TYPE_NUM) ||
+		    (idx >= KBASE_IPA_CONTROL_CNT_MAX_IDX)) {
+			dev_err(kbdev->dev,
+				"%s: invalid requested type %u and/or index %u",
+				__func__, type, idx);
+			ret = -EINVAL;
+			goto exit;
+		}
+
+		for (j = 0; j < KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS; j++) {
+			struct kbase_ipa_control_prfcnt_config *prfcnt_config =
+				&ipa_ctrl->blocks[type].select[j];
+
+			if (prfcnt_config->refcount > 0) {
+				if (prfcnt_config->idx == idx) {
+					already_configured[i] = true;
+					break;
+				}
+			}
+		}
+
+		if (j == KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS) {
+			already_configured[i] = false;
+			req_counters[type]++;
+			new_config = true;
+		}
+	}
+
+	for (i = 0; i < KBASE_IPA_CORE_TYPE_NUM; i++)
+		if (req_counters[i] >
+		    ipa_ctrl->blocks[i].num_available_counters) {
+			dev_err(kbdev->dev,
+				"%s: more counters (%zu) than available (%zu) have been requested for type %zu",
+				__func__, req_counters[i],
+				ipa_ctrl->blocks[i].num_available_counters, i);
+			ret = -EINVAL;
+			goto exit;
+		}
+
+	/*
+	 * The request has been validated.
+	 * Firstly, find an available session and then set up the initial state
+	 * of the session and update the configuration of performance counters
+	 * in the internal state of kbase_ipa_control.
+	 */
+	for (session_idx = 0; session_idx < KBASE_IPA_CONTROL_MAX_SESSIONS;
+	     session_idx++) {
+		session = &ipa_ctrl->sessions[session_idx];
+		if (!session->active)
+			break;
+	}
+
+	if (!session) {
+		dev_err(kbdev->dev, "%s: wrong or corrupt session state",
+			__func__);
+		ret = -EBUSY;
+		goto exit;
+	}
+
+	for (i = 0; i < num_counters; i++) {
+		struct kbase_ipa_control_prfcnt_config *prfcnt_config;
+		size_t j;
+		u8 type = perf_counters[i].type;
+		u8 idx = perf_counters[i].idx;
+
+		for (j = 0; j < KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS; j++) {
+			prfcnt_config = &ipa_ctrl->blocks[type].select[j];
+
+			if (already_configured[i]) {
+				if ((prfcnt_config->refcount > 0) &&
+				    (prfcnt_config->idx == idx)) {
+					break;
+				}
+			} else {
+				if (prfcnt_config->refcount == 0)
+					break;
+			}
+		}
+
+		if (WARN_ON((prfcnt_config->refcount > 0 &&
+			     prfcnt_config->idx != idx) ||
+			    (j == KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS))) {
+			dev_err(kbdev->dev,
+				"%s: invalid internal state: counter already configured or no counter available to configure",
+				__func__);
+			ret = -EBUSY;
+			goto exit;
+		}
+
+		if (prfcnt_config->refcount == 0) {
+			prfcnt_config->idx = idx;
+			ipa_ctrl->blocks[type].num_available_counters--;
+		}
+
+		session->prfcnts[i].accumulated_diff = 0;
+		session->prfcnts[i].type = type;
+		session->prfcnts[i].select_idx = j;
+		session->prfcnts[i].scaling_factor =
+			perf_counters[i].scaling_factor;
+		session->prfcnts[i].gpu_norm = perf_counters[i].gpu_norm;
+
+		/* Reports to this client for GPU time spent in protected mode
+		 * should begin from the point of registration.
+		 */
+		session->last_query_time = ktime_get_ns();
+
+		/* Initially, no time has been spent in protected mode */
+		session->protm_time = 0;
+
+		prfcnt_config->refcount++;
+	}
+
+	/*
+	 * Apply new configuration, if necessary.
+	 * As a temporary solution, make sure that the GPU is on
+	 * before applying the new configuration.
+	 */
+	if (new_config) {
+		u64 select_config[KBASE_IPA_CORE_TYPE_NUM];
+
+		build_select_config(ipa_ctrl, select_config);
+		ret = apply_select_config(kbdev, select_config);
+		if (ret)
+			dev_err(kbdev->dev,
+				"%s: failed to apply SELECT configuration",
+				__func__);
+	}
+
+	if (!ret) {
+		/* Accumulator registers don't contain any sample if the timer
+		 * has not been enabled first. Take a sample manually before
+		 * enabling the timer.
+		 */
+		if (ipa_ctrl->num_active_sessions == 0) {
+			kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND),
+					COMMAND_SAMPLE);
+			ret = wait_status(kbdev, STATUS_COMMAND_ACTIVE);
+			if (!ret) {
+				kbase_reg_write(
+					kbdev, IPA_CONTROL_REG(TIMER),
+					timer_value(ipa_ctrl->cur_gpu_rate));
+			} else {
+				dev_err(kbdev->dev,
+					"%s: failed to sample new counters",
+					__func__);
+			}
+		}
+	}
+
+	if (!ret) {
+		session->num_prfcnts = num_counters;
+		session->active = true;
+		ipa_ctrl->num_active_sessions++;
+		*client = session;
+
+		/*
+		 * Read current raw value to initialize the session.
+		 * This is necessary to put the first query in condition
+		 * to generate a correct value by calculating the difference
+		 * from the beginning of the session.
+		 */
+		for (i = 0; i < session->num_prfcnts; i++) {
+			struct kbase_ipa_control_prfcnt *prfcnt =
+				&session->prfcnts[i];
+			u64 raw_value = read_value_cnt(kbdev, (u8)prfcnt->type,
+						       prfcnt->select_idx);
+			prfcnt->latest_raw_value = raw_value;
+		}
+	}
+
+exit:
+	spin_unlock_irqrestore(&ipa_ctrl->lock, flags);
+	kbase_pm_context_idle(kbdev);
+	return ret;
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_control_register);
+
+int kbase_ipa_control_unregister(struct kbase_device *kbdev, const void *client)
+{
+	struct kbase_ipa_control *ipa_ctrl;
+	struct kbase_ipa_control_session *session;
+	int ret = 0;
+	size_t i;
+	unsigned long flags;
+	bool new_config = false, valid_session = false;
+
+	if (WARN_ON(kbdev == NULL) || WARN_ON(client == NULL)) {
+		dev_err(kbdev->dev, "%s: wrong input arguments", __func__);
+		return -EINVAL;
+	}
+
+	kbase_pm_context_active(kbdev);
+
+	ipa_ctrl = &kbdev->csf.ipa_control;
+	session = (struct kbase_ipa_control_session *)client;
+
+	spin_lock_irqsave(&ipa_ctrl->lock, flags);
+
+	for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) {
+		if (session == &ipa_ctrl->sessions[i]) {
+			valid_session = true;
+			break;
+		}
+	}
+
+	if (!valid_session) {
+		dev_err(kbdev->dev, "%s: invalid session handle", __func__);
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	if (ipa_ctrl->num_active_sessions == 0) {
+		dev_err(kbdev->dev, "%s: no active sessions found", __func__);
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	if (!session->active) {
+		dev_err(kbdev->dev, "%s: session is already inactive",
+			__func__);
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	for (i = 0; i < session->num_prfcnts; i++) {
+		struct kbase_ipa_control_prfcnt_config *prfcnt_config;
+		u8 type = session->prfcnts[i].type;
+		u8 idx = session->prfcnts[i].select_idx;
+
+		prfcnt_config = &ipa_ctrl->blocks[type].select[idx];
+
+		if (!WARN_ON(prfcnt_config->refcount == 0)) {
+			prfcnt_config->refcount--;
+			if (prfcnt_config->refcount == 0) {
+				new_config = true;
+				ipa_ctrl->blocks[type].num_available_counters++;
+			}
+		}
+	}
+
+	if (new_config) {
+		u64 select_config[KBASE_IPA_CORE_TYPE_NUM];
+
+		build_select_config(ipa_ctrl, select_config);
+		ret = apply_select_config(kbdev, select_config);
+		if (ret)
+			dev_err(kbdev->dev,
+				"%s: failed to apply SELECT configuration",
+				__func__);
+	}
+
+	session->num_prfcnts = 0;
+	session->active = false;
+	ipa_ctrl->num_active_sessions--;
+
+exit:
+	spin_unlock_irqrestore(&ipa_ctrl->lock, flags);
+	kbase_pm_context_idle(kbdev);
+	return ret;
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_control_unregister);
+
+int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client,
+			    u64 *values, size_t num_values, u64 *protected_time)
+{
+	struct kbase_ipa_control *ipa_ctrl;
+	struct kbase_ipa_control_session *session;
+	size_t i;
+	unsigned long flags;
+	bool gpu_ready;
+
+	if (WARN_ON(kbdev == NULL) || WARN_ON(client == NULL) ||
+	    WARN_ON(values == NULL)) {
+		dev_err(kbdev->dev, "%s: wrong input arguments", __func__);
+		return -EINVAL;
+	}
+
+	ipa_ctrl = &kbdev->csf.ipa_control;
+	session = (struct kbase_ipa_control_session *)client;
+
+	if (WARN_ON(num_values < session->num_prfcnts)) {
+		dev_err(kbdev->dev,
+			"%s: not enough space (%zu) to return all counter values (%zu)",
+			__func__, num_values, session->num_prfcnts);
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	gpu_ready = kbdev->pm.backend.gpu_ready;
+
+	for (i = 0; i < session->num_prfcnts; i++) {
+		struct kbase_ipa_control_prfcnt *prfcnt = &session->prfcnts[i];
+
+		calc_prfcnt_delta(kbdev, prfcnt, gpu_ready);
+		/* Return all the accumulated difference */
+		values[i] = prfcnt->accumulated_diff;
+		prfcnt->accumulated_diff = 0;
+	}
+
+	if (protected_time) {
+		u64 time_now = ktime_get_ns();
+
+		/* This is the amount of protected-mode time spent prior to
+		 * the current protm period.
+		 */
+		*protected_time = session->protm_time;
+
+		if (kbdev->protected_mode) {
+			*protected_time +=
+				time_now - MAX(session->last_query_time,
+					       ipa_ctrl->protm_start);
+		}
+		session->last_query_time = time_now;
+		session->protm_time = 0;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	for (i = session->num_prfcnts; i < num_values; i++)
+		values[i] = 0;
+
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_control_query);
+
+void kbase_ipa_control_handle_gpu_power_off(struct kbase_device *kbdev)
+{
+	struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control;
+	size_t session_idx;
+	int ret;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* GPU should still be ready for use when this function gets called */
+	WARN_ON(!kbdev->pm.backend.gpu_ready);
+
+	/* Interrupts are already disabled and interrupt state is also saved */
+	spin_lock(&ipa_ctrl->lock);
+
+	/* First disable the automatic sampling through TIMER  */
+	kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER), 0);
+	ret = wait_status(kbdev, STATUS_TIMER_ENABLED);
+	if (ret) {
+		dev_err(kbdev->dev,
+			"Wait for disabling of IPA control timer failed: %d",
+			ret);
+	}
+
+	/* Now issue the manual SAMPLE command */
+	kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), COMMAND_SAMPLE);
+	ret = wait_status(kbdev, STATUS_COMMAND_ACTIVE);
+	if (ret) {
+		dev_err(kbdev->dev,
+			"Wait for the completion of manual sample failed: %d",
+			ret);
+	}
+
+	for (session_idx = 0; session_idx < ipa_ctrl->num_active_sessions;
+	     session_idx++) {
+		struct kbase_ipa_control_session *session =
+			&ipa_ctrl->sessions[session_idx];
+		size_t i;
+
+		for (i = 0; i < session->num_prfcnts; i++) {
+			struct kbase_ipa_control_prfcnt *prfcnt =
+				&session->prfcnts[i];
+
+			calc_prfcnt_delta(kbdev, prfcnt, true);
+		}
+	}
+
+	spin_unlock(&ipa_ctrl->lock);
+}
+
+void kbase_ipa_control_handle_gpu_power_on(struct kbase_device *kbdev)
+{
+	struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control;
+	int ret;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* GPU should have become ready for use when this function gets called */
+	WARN_ON(!kbdev->pm.backend.gpu_ready);
+
+	/* Interrupts are already disabled and interrupt state is also saved */
+	spin_lock(&ipa_ctrl->lock);
+
+	/* Re-issue the APPLY command, this is actually needed only for CSHW */
+	kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), COMMAND_APPLY);
+	ret = wait_status(kbdev, STATUS_COMMAND_ACTIVE);
+	if (ret) {
+		dev_err(kbdev->dev,
+			"Wait for the completion of apply command failed: %d",
+			ret);
+	}
+
+	/* Re-enable the timer for periodic sampling */
+	kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER),
+			timer_value(ipa_ctrl->cur_gpu_rate));
+
+	spin_unlock(&ipa_ctrl->lock);
+}
+
+void kbase_ipa_control_handle_gpu_reset_pre(struct kbase_device *kbdev)
+{
+	/* A soft reset is treated as a power down */
+	kbase_ipa_control_handle_gpu_power_off(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_reset_pre);
+
+void kbase_ipa_control_handle_gpu_reset_post(struct kbase_device *kbdev)
+{
+	struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control;
+	int ret;
+	u32 status;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* GPU should have become ready for use when this function gets called */
+	WARN_ON(!kbdev->pm.backend.gpu_ready);
+
+	/* Interrupts are already disabled and interrupt state is also saved */
+	spin_lock(&ipa_ctrl->lock);
+
+	/* Check the status reset bit is set before acknowledging it */
+	status = kbase_reg_read(kbdev, IPA_CONTROL_REG(STATUS));
+	if (status & STATUS_RESET) {
+		/* Acknowledge the reset command */
+		kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), COMMAND_RESET_ACK);
+		ret = wait_status(kbdev, STATUS_RESET);
+		if (ret) {
+			dev_err(kbdev->dev,
+				"Wait for the reset ack command failed: %d",
+				ret);
+		}
+	}
+
+	spin_unlock(&ipa_ctrl->lock);
+
+	kbase_ipa_control_handle_gpu_power_on(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_reset_post);
+
+#if MALI_UNIT_TEST
+void kbase_ipa_control_rate_change_notify_test(struct kbase_device *kbdev,
+					       u32 clk_index, u32 clk_rate_hz)
+{
+	struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control;
+	struct kbase_ipa_control_listener_data *listener_data =
+		ipa_ctrl->rtm_listener_data;
+
+	kbase_ipa_control_rate_change_notify(&listener_data->listener,
+					     clk_index, clk_rate_hz);
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_control_rate_change_notify_test);
+#endif
+
+void kbase_ipa_control_protm_entered(struct kbase_device *kbdev)
+{
+	struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	ipa_ctrl->protm_start = ktime_get_ns();
+}
+
+void kbase_ipa_control_protm_exited(struct kbase_device *kbdev)
+{
+	struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control;
+	size_t i;
+	u64 time_now = ktime_get_ns();
+	u32 status;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < ipa_ctrl->num_active_sessions; i++) {
+		struct kbase_ipa_control_session *session =
+			&ipa_ctrl->sessions[i];
+		u64 protm_time = time_now - MAX(session->last_query_time,
+						ipa_ctrl->protm_start);
+
+		session->protm_time += protm_time;
+	}
+
+	/* Acknowledge the protected_mode bit in the IPA_CONTROL STATUS
+	 * register
+	 */
+	status = kbase_reg_read(kbdev, IPA_CONTROL_REG(STATUS));
+	if (status & STATUS_PROTECTED_MODE) {
+		int ret;
+
+		/* Acknowledge the protm command */
+		kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND),
+				COMMAND_PROTECTED_ACK);
+		ret = wait_status(kbdev, STATUS_PROTECTED_MODE);
+		if (ret) {
+			dev_err(kbdev->dev,
+				"Wait for the protm ack command failed: %d",
+				ret);
+		}
+	}
+}
+
diff --git a/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.h b/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.h
new file mode 100644
index 000000000000..a828e01f9d4d
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.h
@@ -0,0 +1,244 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_CSF_IPA_CONTROL_H_
+#define _KBASE_CSF_IPA_CONTROL_H_
+
+#include <mali_kbase.h>
+
+/**
+ * Maximum index accepted to configure an IPA Control performance counter.
+ */
+#define KBASE_IPA_CONTROL_CNT_MAX_IDX ((u8)64 * 3)
+
+/**
+ * struct kbase_ipa_control_perf_counter - Performance counter description
+ *
+ * @scaling_factor: Scaling factor by which the counter's value shall be
+ *                  multiplied. A scaling factor of 1 corresponds to units
+ *                  of 1 second if values are normalised by GPU frequency.
+ * @gpu_norm:       Indicating whether counter values shall be normalized by
+ *                  GPU frequency. If true, returned values represent
+ *                  an interval of time expressed in seconds (when the scaling
+ *                  factor is set to 1).
+ * @type:           Type of counter block for performance counter.
+ * @idx:            Index of the performance counter inside the block.
+ *                  It may be dependent on GPU architecture.
+ *                  It cannot be greater than KBASE_IPA_CONTROL_CNT_MAX_IDX.
+ *
+ * This structure is used by clients of the IPA Control component to describe
+ * a performance counter that they intend to read. The counter is identified
+ * by block and index. In addition to that, the client also specifies how
+ * values shall be represented. Raw values are a number of GPU cycles;
+ * if normalized, they are divided by GPU frequency and become an interval
+ * of time expressed in seconds, since the GPU frequency is given in Hz.
+ * The client may specify a scaling factor to multiply counter values before
+ * they are divided by frequency, in case the unit of time of 1 second is
+ * too low in resolution. For instance: a scaling factor of 1000 implies
+ * that the returned value is a time expressed in milliseconds; a scaling
+ * factor of 1000 * 1000 implies that the returned value is a time expressed
+ * in microseconds.
+ */
+struct kbase_ipa_control_perf_counter {
+	u64 scaling_factor;
+	bool gpu_norm;
+	enum kbase_ipa_core_type type;
+	u8 idx;
+};
+
+/**
+ * kbase_ipa_control_init - Initialize the IPA Control component
+ *
+ * @kbdev: Pointer to Kbase device.
+ */
+void kbase_ipa_control_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_ipa_control_term - Terminate the IPA Control component
+ *
+ * @kbdev: Pointer to Kbase device.
+ */
+void kbase_ipa_control_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_ipa_control_register - Register a client to the IPA Control component
+ *
+ * @kbdev:         Pointer to Kbase device.
+ * @perf_counters: Array of performance counters the client intends to read.
+ *                 For each counter the client specifies block, index,
+ *                 scaling factor and whether it must be normalized by GPU
+ *                 frequency.
+ * @num_counters:  Number of performance counters. It cannot exceed the total
+ *                 number of counters that exist on the IPA Control interface.
+ * @client:        Handle to an opaque structure set by IPA Control if
+ *                 the registration is successful. This handle identifies
+ *                 a client's session and shall be provided in its future
+ *                 queries.
+ *
+ * A client needs to subscribe to the IPA Control component by declaring which
+ * performance counters it intends to read, and specifying a scaling factor
+ * and whether normalization is requested for each performance counter.
+ * The function shall configure the IPA Control interface accordingly and start
+ * a session for the client that made the request. A unique handle is returned
+ * if registration is successful in order to identify the client's session
+ * and be used for future queries.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
+int kbase_ipa_control_register(
+	struct kbase_device *kbdev,
+	const struct kbase_ipa_control_perf_counter *perf_counters,
+	size_t num_counters, void **client);
+
+/**
+ * kbase_ipa_control_unregister - Unregister a client from IPA Control
+ *
+ * @kbdev:  Pointer to kbase device.
+ * @client: Handle to an opaque structure that identifies the client session
+ *          to terminate, as returned by kbase_ipa_control_register.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
+int kbase_ipa_control_unregister(struct kbase_device *kbdev,
+				 const void *client);
+
+/**
+ * kbase_ipa_control_query - Query performance counters
+ *
+ * @kbdev:          Pointer to kbase device.
+ * @client:         Handle to an opaque structure that identifies the client
+ *                  session, as returned by kbase_ipa_control_register.
+ * @values:         Array of values queried from performance counters, whose
+ *                  length depends on the number of counters requested at
+ *                  the time of registration. Values are scaled and normalized
+ *                  and represent the difference since the last query.
+ * @num_values:     Number of entries in the array of values that has been
+ *                  passed by the caller. It must be at least equal to the
+ *                  number of performance counters the client registered itself
+ *                  to read.
+ * @protected_time: Time spent in protected mode since last query,
+ *                  expressed in nanoseconds. This pointer may be NULL if the
+ *                  client doesn't want to know about this.
+ *
+ * A client that has already opened a session by registering itself to read
+ * some performance counters may use this function to query the values of
+ * those counters. The values returned are normalized by GPU frequency if
+ * requested and then multiplied by the scaling factor provided at the time
+ * of registration. Values always represent a difference since the last query.
+ *
+ * Performance counters are not updated while the GPU operates in protected
+ * mode. For this reason, returned values may be unreliable if the GPU has
+ * been in protected mode since the last query. The function returns success
+ * in that case, but it also gives a measure of how much time has been spent
+ * in protected mode.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
+int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client,
+			    u64 *values, size_t num_values,
+			    u64 *protected_time);
+
+/**
+ * kbase_ipa_control_handle_gpu_power_on - Handle the GPU power on event
+ *
+ * @kbdev:          Pointer to kbase device.
+ *
+ * This function is called after GPU has been powered and is ready for use.
+ * After the GPU power on, IPA Control component needs to ensure that the
+ * counters start incrementing again.
+ */
+void kbase_ipa_control_handle_gpu_power_on(struct kbase_device *kbdev);
+
+/**
+ * kbase_ipa_control_handle_gpu_power_off - Handle the GPU power off event
+ *
+ * @kbdev:          Pointer to kbase device.
+ *
+ * This function is called just before the GPU is powered off when it is still
+ * ready for use.
+ * IPA Control component needs to be aware of the GPU power off so that it can
+ * handle the query from Clients appropriately and return meaningful values
+ * to them.
+ */
+void kbase_ipa_control_handle_gpu_power_off(struct kbase_device *kbdev);
+
+/**
+ * kbase_ipa_control_handle_gpu_reset_pre - Handle the pre GPU reset event
+ *
+ * @kbdev:          Pointer to kbase device.
+ *
+ * This function is called when the GPU is about to be reset.
+ */
+void kbase_ipa_control_handle_gpu_reset_pre(struct kbase_device *kbdev);
+
+/**
+ * kbase_ipa_control_handle_gpu_reset_post - Handle the post GPU reset event
+ *
+ * @kbdev:          Pointer to kbase device.
+ *
+ * This function is called after the GPU has been reset.
+ */
+void kbase_ipa_control_handle_gpu_reset_post(struct kbase_device *kbdev);
+
+#if MALI_UNIT_TEST
+/**
+ * kbase_ipa_control_rate_change_notify_test - Notify GPU rate change
+ *                                             (only for testing)
+ *
+ * @kbdev:       Pointer to kbase device.
+ * @clk_index:   Index of the clock for which the change has occurred.
+ * @clk_rate_hz: Clock frequency(Hz).
+ *
+ * Notify the IPA Control component about a GPU rate change.
+ */
+void kbase_ipa_control_rate_change_notify_test(struct kbase_device *kbdev,
+					       u32 clk_index, u32 clk_rate_hz);
+#endif /* MALI_UNIT_TEST */
+
+/**
+ * kbase_ipa_control_protm_entered - Tell IPA_CONTROL that protected mode
+ * has been entered.
+ *
+ * @kbdev:		Pointer to kbase device.
+ *
+ * This function provides a means through which IPA_CONTROL can be informed
+ * that the GPU has entered protected mode. Since the GPU cannot access
+ * performance counters while in this mode, this information is useful as
+ * it implies (a) the values of these registers cannot change, so theres no
+ * point trying to read them, and (b) IPA_CONTROL has a means through which
+ * to record the duration of time the GPU is in protected mode, which can
+ * then be forwarded on to clients, who may wish, for example, to assume
+ * that the GPU was busy 100% of the time while in this mode.
+ */
+void kbase_ipa_control_protm_entered(struct kbase_device *kbdev);
+
+/**
+ * kbase_ipa_control_protm_exited - Tell IPA_CONTROL that protected mode
+ * has been exited.
+ *
+ * @kbdev:		Pointer to kbase device
+ *
+ * This function provides a means through which IPA_CONTROL can be informed
+ * that the GPU has exited from protected mode.
+ */
+void kbase_ipa_control_protm_exited(struct kbase_device *kbdev);
+
+#endif /* _KBASE_CSF_IPA_CONTROL_H_ */
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c
index 83d7513e78d9..00f89088c7ba 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,25 +17,25 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <mali_kbase.h>
 #include <gpu/mali_kbase_gpu_fault.h>
-#include <mali_kbase_ctx_sched.h>
 #include <mali_kbase_reset_gpu.h>
 #include "mali_kbase_csf.h"
 #include "backend/gpu/mali_kbase_pm_internal.h"
 #include <linux/export.h>
+#include <linux/priority_control_manager.h>
 #include <linux/shmem_fs.h>
-#include "mali_gpu_csf_registers.h"
+#include <uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h>
 #include "mali_kbase_csf_tiler_heap.h"
 #include <mmu/mali_kbase_mmu.h>
-#include <mali_kbase_ctx_sched.h>
+#include "mali_kbase_csf_timeout.h"
+#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
 
 #define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK)
 #define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK)
+#define POWER_DOWN_LATEST_FLUSH_VALUE ((u32)1)
 
 /**
  * struct kbase_csf_event - CSF event callback.
@@ -55,6 +56,19 @@ struct kbase_csf_event {
 	void *param;
 };
 
+const u8 kbasep_csf_queue_group_priority_to_relative[BASE_QUEUE_GROUP_PRIORITY_COUNT] = {
+	KBASE_QUEUE_GROUP_PRIORITY_HIGH,
+	KBASE_QUEUE_GROUP_PRIORITY_MEDIUM,
+	KBASE_QUEUE_GROUP_PRIORITY_LOW,
+	KBASE_QUEUE_GROUP_PRIORITY_REALTIME
+};
+const u8 kbasep_csf_relative_to_queue_group_priority[KBASE_QUEUE_GROUP_PRIORITY_COUNT] = {
+	BASE_QUEUE_GROUP_PRIORITY_REALTIME,
+	BASE_QUEUE_GROUP_PRIORITY_HIGH,
+	BASE_QUEUE_GROUP_PRIORITY_MEDIUM,
+	BASE_QUEUE_GROUP_PRIORITY_LOW
+};
+
 static void put_user_pages_mmap_handle(struct kbase_context *kctx,
 			struct kbase_queue *queue)
 {
@@ -263,6 +277,7 @@ unlock:
 	return ret;
 }
 
+static void term_queue_group(struct kbase_queue_group *group);
 static void get_queue(struct kbase_queue *queue);
 static void release_queue(struct kbase_queue *queue);
 
@@ -438,7 +453,7 @@ static void release_queue(struct kbase_queue *queue)
 }
 
 static void oom_event_worker(struct work_struct *data);
-static void fault_event_worker(struct work_struct *data);
+static void fatal_event_worker(struct work_struct *data);
 
 int kbase_csf_queue_register(struct kbase_context *kctx,
 			     struct kbase_ioctl_cs_queue_register *reg)
@@ -503,10 +518,13 @@ int kbase_csf_queue_register(struct kbase_context *kctx,
 	queue->sync_ptr = 0;
 	queue->sync_value = 0;
 
+	queue->sb_status = 0;
+	queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED;
+
 	INIT_LIST_HEAD(&queue->link);
 	INIT_LIST_HEAD(&queue->error.link);
 	INIT_WORK(&queue->oom_event_work, oom_event_worker);
-	INIT_WORK(&queue->fault_event_work, fault_event_worker);
+	INIT_WORK(&queue->fatal_event_work, fatal_event_worker);
 	list_add(&queue->link, &kctx->csf.queue_list);
 
 	region->flags |= KBASE_REG_NO_USER_FREE;
@@ -525,13 +543,26 @@ static void unbind_queue(struct kbase_context *kctx,
 void kbase_csf_queue_terminate(struct kbase_context *kctx,
 			      struct kbase_ioctl_cs_queue_terminate *term)
 {
+	struct kbase_device *kbdev = kctx->kbdev;
 	struct kbase_queue *queue;
+	int err;
+	bool reset_prevented = false;
+
+	err = kbase_reset_gpu_prevent_and_wait(kbdev);
+	if (err)
+		dev_warn(
+			kbdev->dev,
+			"Unsuccessful GPU reset detected when terminating queue (buffer_addr=0x%.16llx), attempting to terminate regardless",
+			term->buffer_gpu_addr);
+	else
+		reset_prevented = true;
 
 	mutex_lock(&kctx->csf.lock);
-
 	queue = find_queue(kctx, term->buffer_gpu_addr);
 
 	if (queue) {
+		unsigned long flags;
+
 		/* As the GPU queue has been terminated by the
 		 * user space, undo the actions that were performed when the
 		 * queue was registered i.e. remove the queue from the per
@@ -555,15 +586,19 @@ void kbase_csf_queue_terminate(struct kbase_context *kctx,
 		}
 		kbase_gpu_vm_unlock(kctx);
 
-		/* Remove any pending command queue fatal from
-		 * the per-context list.
-		 */
+		spin_lock_irqsave(&kctx->csf.event_lock, flags);
+		dev_dbg(kctx->kbdev->dev,
+			"Remove any pending command queue fatal from context %pK\n",
+			(void *)kctx);
 		list_del_init(&queue->error.link);
+		spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
 
 		release_queue(queue);
 	}
 
 	mutex_unlock(&kctx->csf.lock);
+	if (reset_prevented)
+		kbase_reset_gpu_allow(kbdev);
 }
 
 int kbase_csf_queue_bind(struct kbase_context *kctx, union kbase_ioctl_cs_queue_bind *bind)
@@ -678,41 +713,49 @@ void kbase_csf_ring_cs_user_doorbell(struct kbase_device *kbdev,
 }
 
 void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev,
-			struct kbase_queue *queue)
+				       int csi_index, int csg_nr,
+				       bool ring_csg_doorbell)
 {
-	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
-	struct kbase_queue_group *group = get_bound_queue_group(queue);
 	struct kbase_csf_cmd_stream_group_info *ginfo;
 	u32 value;
-	int slot;
 
-	if (WARN_ON(!group))
+	if (WARN_ON(csg_nr < 0) ||
+	    WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num))
 		return;
 
-	slot = kbase_csf_scheduler_group_get_slot(group);
+	ginfo = &kbdev->csf.global_iface.groups[csg_nr];
 
-	if (WARN_ON(slot < 0))
+	if (WARN_ON(csi_index < 0) ||
+	    WARN_ON(csi_index >= ginfo->stream_num))
 		return;
 
-	ginfo = &global_iface->groups[slot];
-
 	value = kbase_csf_firmware_csg_output(ginfo, CSG_DB_ACK);
-	value ^= (1 << queue->csi_index);
+	value ^= (1 << csi_index);
 	kbase_csf_firmware_csg_input_mask(ginfo, CSG_DB_REQ, value,
-					  1 << queue->csi_index);
+					  1 << csi_index);
 
-	kbase_csf_ring_csg_doorbell(kbdev, slot);
+	if (likely(ring_csg_doorbell))
+		kbase_csf_ring_csg_doorbell(kbdev, csg_nr);
 }
 
 int kbase_csf_queue_kick(struct kbase_context *kctx,
 			 struct kbase_ioctl_cs_queue_kick *kick)
 {
+	struct kbase_device *kbdev = kctx->kbdev;
 	struct kbase_queue_group *group;
 	struct kbase_queue *queue;
 	int err = 0;
 
-	mutex_lock(&kctx->csf.lock);
+	err = kbase_reset_gpu_prevent_and_wait(kbdev);
+	if (err) {
+		dev_warn(
+			kbdev->dev,
+			"Unsuccessful GPU reset detected when kicking queue (buffer_addr=0x%.16llx)",
+			kick->buffer_gpu_addr);
+		return err;
+	}
 
+	mutex_lock(&kctx->csf.lock);
 	queue = find_queue(kctx, kick->buffer_gpu_addr);
 	if (!queue)
 		err = -EINVAL;
@@ -727,8 +770,9 @@ int kbase_csf_queue_kick(struct kbase_context *kctx,
 
 	if (!err)
 		err = kbase_csf_scheduler_queue_start(queue);
-
 	mutex_unlock(&kctx->csf.lock);
+	kbase_reset_gpu_allow(kbdev);
+
 	return err;
 }
 
@@ -758,8 +802,8 @@ static void unbind_stopped_queue(struct kbase_context *kctx,
  * @kctx:	Address of the kbase context within which the queue was created.
  * @queue:	Pointer to the queue to be unlinked.
  *
- * This function will also send the stop request to firmware for the command
- * stream if the group to which the GPU command queue was bound is scheduled.
+ * This function will also send the stop request to firmware for the CS
+ * if the group to which the GPU command queue was bound is scheduled.
  *
  * This function would be called when :-
  * - queue is being unbound. This would happen when the IO mapping
@@ -778,6 +822,7 @@ static void unbind_stopped_queue(struct kbase_context *kctx,
 
 static void unbind_queue(struct kbase_context *kctx, struct kbase_queue *queue)
 {
+	kbase_reset_gpu_assert_failed_or_prevented(kctx->kbdev);
 	lockdep_assert_held(&kctx->csf.lock);
 
 	if (queue->bind_state != KBASE_CSF_QUEUE_UNBOUND) {
@@ -794,7 +839,38 @@ void kbase_csf_queue_unbind(struct kbase_queue *queue)
 
 	lockdep_assert_held(&kctx->csf.lock);
 
-	unbind_queue(kctx, queue);
+	/* As the process itself is exiting, the termination of queue group can
+	 * be done which would be much faster than stopping of individual
+	 * queues. This would ensure a faster exit for the process especially
+	 * in the case where CSI gets stuck.
+	 * The CSI STOP request will wait for the in flight work to drain
+	 * whereas CSG TERM request would result in an immediate abort or
+	 * cancellation of the pending work.
+	 */
+	if (current->flags & PF_EXITING) {
+		struct kbase_queue_group *group = get_bound_queue_group(queue);
+
+		if (group)
+			term_queue_group(group);
+
+		WARN_ON(queue->bind_state != KBASE_CSF_QUEUE_UNBOUND);
+	} else {
+		unbind_queue(kctx, queue);
+	}
+
+	/* Free the resources, if allocated for this queue. */
+	if (queue->reg)
+		kbase_csf_free_command_stream_user_pages(kctx, queue);
+}
+
+void kbase_csf_queue_unbind_stopped(struct kbase_queue *queue)
+{
+	struct kbase_context *kctx = queue->kctx;
+
+	lockdep_assert_held(&kctx->csf.lock);
+
+	WARN_ON(queue->bind_state == KBASE_CSF_QUEUE_BOUND);
+	unbind_stopped_queue(kctx, queue);
 
 	/* Free the resources, if allocated for this queue. */
 	if (queue->reg)
@@ -827,15 +903,14 @@ static int find_free_group_handle(struct kbase_context *const kctx)
 }
 
 /**
- * iface_has_enough_streams() - Check that at least one command stream
- *				group supports a given number of streams
+ * iface_has_enough_streams() - Check that at least one CSG supports
+ *                              a given number of CS
  *
- * @kbdev:	Instance of a GPU platform device that implements a command
- *		stream front-end interface.
- * @cs_min:	Minimum number of command streams required.
+ * @kbdev:  Instance of a GPU platform device that implements a CSF interface.
+ * @cs_min: Minimum number of CSs required.
  *
- * Return: true if at least one command stream group supports the given number
- *         of command streams (or more); otherwise false.
+ * Return: true if at least one CSG supports the given number
+ *         of CSs (or more); otherwise false.
  */
 static bool iface_has_enough_streams(struct kbase_device *const kbdev,
 	u32 const cs_min)
@@ -942,9 +1017,8 @@ phy_alloc_failed:
  * create_protected_suspend_buffer() - Create protected-mode suspend buffer
  *					per queue group
  *
- * @kbdev:	Instance of a GPU platform device that implements a command
- *		stream front-end interface.
- * @s_buf:	Pointer to suspend buffer that is attached to queue group
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @s_buf: Pointer to suspend buffer that is attached to queue group
  *
  * Return: 0 if suspend buffer is successfully allocated and reflected to GPU
  *         MMU page table. Otherwise -ENOMEM.
@@ -1057,6 +1131,26 @@ static int create_suspend_buffers(struct kbase_context *const kctx,
 	return err;
 }
 
+/**
+ * generate_group_uid() - Makes an ID unique to all kernel base devices
+ *                        and contexts, for a queue group and CSG.
+ *
+ * Return:      A unique ID in the form of an unsigned 32-bit integer
+ */
+static u32 generate_group_uid(void)
+{
+	/* use first KBase device to store max UID */
+	struct kbase_device *kbdev = kbase_find_device(-1);
+	u32 uid = 1;
+
+	if (kbdev)
+		uid = (u32) atomic_inc_return(&kbdev->group_max_uid_in_devices);
+	else
+		WARN(1, "NULL kbase device pointer in group UID generation");
+
+	return uid;
+}
+
 /**
  * create_queue_group() - Create a queue group
  *
@@ -1068,7 +1162,7 @@ static int create_suspend_buffers(struct kbase_context *const kctx,
  * Return: a queue group handle on success, or a negative error code on failure.
  */
 static int create_queue_group(struct kbase_context *const kctx,
-	const union kbase_ioctl_cs_queue_group_create *const create)
+	union kbase_ioctl_cs_queue_group_create *const create)
 {
 	int group_handle = find_free_group_handle(kctx);
 
@@ -1099,10 +1193,14 @@ static int create_queue_group(struct kbase_context *const kctx,
 			group->tiler_max = create->in.tiler_max;
 			group->fragment_max = create->in.fragment_max;
 			group->compute_max = create->in.compute_max;
-			group->priority = create->in.priority;
+			group->priority = kbase_csf_priority_queue_group_priority_to_relative(
+				kbase_csf_priority_check(kctx->kbdev, create->in.priority));
 			group->doorbell_nr = KBASEP_USER_DB_NR_INVALID;
 			group->faulted = false;
 
+			group->group_uid = generate_group_uid();
+			create->out.group_uid = group->group_uid;
+
 			INIT_LIST_HEAD(&group->link);
 			INIT_LIST_HEAD(&group->link_to_schedule);
 			INIT_LIST_HEAD(&group->error_fatal.link);
@@ -1155,7 +1253,7 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx,
 		err = -EINVAL;
 	} else if (!iface_has_enough_streams(kctx->kbdev, create->in.cs_min)) {
 		dev_err(kctx->kbdev->dev,
-			"No CSG has at least %d streams\n",
+			"No CSG has at least %d CSs\n",
 			create->in.cs_min);
 		err = -EINVAL;
 	} else {
@@ -1217,9 +1315,8 @@ static void term_normal_suspend_buffer(struct kbase_context *const kctx,
  * term_protected_suspend_buffer() - Free normal-mode suspend buffer of
  *					queue group
  *
- * @kbdev:	Instance of a GPU platform device that implements a command
- *		stream front-end interface.
- * @s_buf:	Pointer to queue group suspend buffer to be freed
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @s_buf: Pointer to queue group suspend buffer to be freed
  */
 static void term_protected_suspend_buffer(struct kbase_device *const kbdev,
 		struct kbase_protected_suspend_buffer *s_buf)
@@ -1247,7 +1344,7 @@ void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group)
 {
 	struct kbase_context *kctx = group->kctx;
 
-	/* Currently each group supports the same number of streams */
+	/* Currently each group supports the same number of CS */
 	u32 max_streams =
 		kctx->kbdev->csf.global_iface.groups[0].stream_num;
 	u32 i;
@@ -1288,6 +1385,7 @@ static void term_queue_group(struct kbase_queue_group *group)
 {
 	struct kbase_context *kctx = group->kctx;
 
+	kbase_reset_gpu_assert_failed_or_prevented(kctx->kbdev);
 	lockdep_assert_held(&kctx->csf.lock);
 
 	/* Stop the group and evict it from the scheduler */
@@ -1311,22 +1409,44 @@ void kbase_csf_queue_group_terminate(struct kbase_context *kctx,
 				     u8 group_handle)
 {
 	struct kbase_queue_group *group;
+	int err;
+	bool reset_prevented = false;
+	struct kbase_device *const kbdev = kctx->kbdev;
+
+	err = kbase_reset_gpu_prevent_and_wait(kbdev);
+	if (err)
+		dev_warn(
+			kbdev->dev,
+			"Unsuccessful GPU reset detected when terminating group %d, attempting to terminate regardless",
+			group_handle);
+	else
+		reset_prevented = true;
 
 	mutex_lock(&kctx->csf.lock);
 
 	group = find_queue_group(kctx, group_handle);
 
 	if (group) {
-		/* Remove any pending group fatal error from the per-context list. */
+		unsigned long flags;
+
+		spin_lock_irqsave(&kctx->csf.event_lock, flags);
+
+		dev_dbg(kbdev->dev,
+			"Remove any pending group fatal error from context %pK\n",
+			(void *)group->kctx);
+
 		list_del_init(&group->error_tiler_oom.link);
 		list_del_init(&group->error_timeout.link);
 		list_del_init(&group->error_fatal.link);
+		spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
 
 		term_queue_group(group);
 		kctx->csf.queue_groups[group_handle] = NULL;
 	}
 
 	mutex_unlock(&kctx->csf.lock);
+	if (reset_prevented)
+		kbase_reset_gpu_allow(kbdev);
 
 	if (!group)
 		return;
@@ -1344,29 +1464,78 @@ int kbase_csf_queue_group_suspend(struct kbase_context *kctx,
 				  struct kbase_suspend_copy_buffer *sus_buf,
 				  u8 group_handle)
 {
-	int err = -EINVAL;
+	struct kbase_device *const kbdev = kctx->kbdev;
+	int err;
 	struct kbase_queue_group *group;
 
+	err = kbase_reset_gpu_prevent_and_wait(kbdev);
+	if (err) {
+		dev_warn(
+			kbdev->dev,
+			"Unsuccessful GPU reset detected when suspending group %d",
+			group_handle);
+		return err;
+	}
 	mutex_lock(&kctx->csf.lock);
 
 	group = find_queue_group(kctx, group_handle);
 	if (group)
 		err = kbase_csf_scheduler_group_copy_suspend_buf(group,
 								 sus_buf);
+	else
+		err = -EINVAL;
 
 	mutex_unlock(&kctx->csf.lock);
+	kbase_reset_gpu_allow(kbdev);
+
 	return err;
 }
 
 /**
- * kbase_csf_add_fatal_error_to_kctx - Add a fatal error to per-ctx error list.
+ * add_error() - Add an error to the list of errors to report to user space
  *
- * @group:       GPU command queue group.
- * @err_payload: Error payload to report.
+ * @kctx:  Address of a base context associated with a GPU address space.
+ * @error: Address of the item to be added to the context's pending error list.
+ * @data:  Error data to be returned to userspace.
+ *
+ * Does not wake up the event queue blocking a user thread in kbase_poll. This
+ * is to make it more efficient to add multiple errors.
+ *
+ * The added error must not already be on the context's list of errors waiting
+ * to be reported (e.g. because a previous error concerning the same object has
+ * not yet been reported).
  */
-static void kbase_csf_add_fatal_error_to_kctx(
-		struct kbase_queue_group *const group,
-		const struct base_gpu_queue_group_error *const err_payload)
+static void add_error(struct kbase_context *const kctx,
+		      struct kbase_csf_notification *const error,
+		      struct base_csf_notification const *const data)
+{
+	unsigned long flags;
+
+	if (WARN_ON(!kctx))
+		return;
+
+	if (WARN_ON(!error))
+		return;
+
+	if (WARN_ON(!data))
+		return;
+
+	spin_lock_irqsave(&kctx->csf.event_lock, flags);
+
+	if (!WARN_ON(!list_empty(&error->link))) {
+		error->data = *data;
+		list_add_tail(&error->link, &kctx->csf.error_list);
+		dev_dbg(kctx->kbdev->dev,
+			"Added error %pK of type %d in context %pK\n",
+			(void *)error, data->type, (void *)kctx);
+	}
+
+	spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
+}
+
+void kbase_csf_add_group_fatal_error(
+	struct kbase_queue_group *const group,
+	struct base_gpu_queue_group_error const *const err_payload)
 {
 	struct base_csf_notification error;
 
@@ -1386,16 +1555,7 @@ static void kbase_csf_add_fatal_error_to_kctx(
 		}
 	};
 
-	lockdep_assert_held(&group->kctx->csf.lock);
-
-	/* If this group has already been in fatal error status,
-	 * subsequent fatal error on this group should never take place.
-	 */
-	if (!WARN_ON(!list_empty(&group->error_fatal.link))) {
-		group->error_fatal.data = error;
-		list_add_tail(&group->error_fatal.link,
-				&group->kctx->csf.error_list);
-	}
+	add_error(group->kctx, &group->error_fatal, &error);
 }
 
 void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev,
@@ -1404,7 +1564,6 @@ void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev,
 	struct list_head evicted_groups;
 	struct kbase_queue_group *group;
 	int i;
-	bool fatal_error_built = false;
 
 	INIT_LIST_HEAD(&evicted_groups);
 
@@ -1412,10 +1571,6 @@ void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev,
 
 	kbase_csf_scheduler_evict_ctx_slots(kbdev, kctx, &evicted_groups);
 	while (!list_empty(&evicted_groups)) {
-		struct kbase_csf_scheduler *scheduler =
-						&kbdev->csf.scheduler;
-		unsigned long flags;
-
 		group = list_first_entry(&evicted_groups,
 				struct kbase_queue_group, link);
 
@@ -1423,28 +1578,8 @@ void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev,
 			    kctx->tgid, kctx->id, group->handle);
 		kbase_csf_term_descheduled_queue_group(group);
 		list_del_init(&group->link);
-
-		kbase_csf_scheduler_spin_lock(kbdev, &flags);
-		if ((group == scheduler->active_protm_grp) &&
-		    group->faulted) {
-			const struct base_gpu_queue_group_error err_payload = {
-				.error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL,
-				.payload = {
-					.fatal_group = {
-					.status = GPU_EXCEPTION_TYPE_SW_FAULT_0,
-					}
-				}
-			};
-
-			kbase_csf_add_fatal_error_to_kctx(group, &err_payload);
-			fatal_error_built = true;
-		}
-		kbase_csf_scheduler_spin_unlock(kbdev, flags);
 	}
 
-	if (fatal_error_built)
-		kbase_event_wakeup(kctx);
-
 	/* Acting on the queue groups that are pending to be terminated. */
 	for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++) {
 		group = kctx->csf.queue_groups[i];
@@ -1458,6 +1593,7 @@ void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev,
 
 int kbase_csf_ctx_init(struct kbase_context *kctx)
 {
+	struct kbase_device *kbdev = kctx->kbdev;
 	int err = -ENOMEM;
 
 	INIT_LIST_HEAD(&kctx->csf.event_callback_list);
@@ -1467,6 +1603,19 @@ int kbase_csf_ctx_init(struct kbase_context *kctx)
 
 	spin_lock_init(&kctx->csf.event_lock);
 	kctx->csf.user_reg_vma = NULL;
+	mutex_lock(&kbdev->pm.lock);
+	/* The inode information for /dev/malixx file is not available at the
+	 * time of device probe as the inode is created when the device node
+	 * is created by udevd (through mknod).
+	 */
+	if (kctx->filp) {
+		if (!kbdev->csf.mali_file_inode)
+			kbdev->csf.mali_file_inode = kctx->filp->f_inode;
+
+		/* inode is unique for a file */
+		WARN_ON(kbdev->csf.mali_file_inode != kctx->filp->f_inode);
+	}
+	mutex_unlock(&kbdev->pm.lock);
 
 	/* Mark all the cookies as 'free' */
 	bitmap_fill(kctx->csf.cookies, KBASE_CSF_NUM_USER_IO_PAGES_HANDLE);
@@ -1506,6 +1655,8 @@ void kbase_csf_ctx_handle_fault(struct kbase_context *kctx,
 	int gr;
 	bool reported = false;
 	struct base_gpu_queue_group_error err_payload;
+	int err;
+	struct kbase_device *kbdev;
 
 	if (WARN_ON(!kctx))
 		return;
@@ -1513,6 +1664,14 @@ void kbase_csf_ctx_handle_fault(struct kbase_context *kctx,
 	if (WARN_ON(!fault))
 		return;
 
+	kbdev = kctx->kbdev;
+	err = kbase_reset_gpu_try_prevent(kbdev);
+	/* Regardless of whether reset failed or is currently happening, exit
+	 * early
+	 */
+	if (err)
+		return;
+
 	err_payload = (struct base_gpu_queue_group_error) {
 		.error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL,
 		.payload = {
@@ -1531,7 +1690,7 @@ void kbase_csf_ctx_handle_fault(struct kbase_context *kctx,
 
 		if (group && group->run_state != KBASE_CSF_GROUP_TERMINATED) {
 			term_queue_group(group);
-			kbase_csf_add_fatal_error_to_kctx(group, &err_payload);
+			kbase_csf_add_group_fatal_error(group, &err_payload);
 			reported = true;
 		}
 	}
@@ -1540,11 +1699,18 @@ void kbase_csf_ctx_handle_fault(struct kbase_context *kctx,
 
 	if (reported)
 		kbase_event_wakeup(kctx);
+
+	kbase_reset_gpu_allow(kbdev);
 }
 
 void kbase_csf_ctx_term(struct kbase_context *kctx)
 {
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbase_as *as = NULL;
+	unsigned long flags;
 	u32 i;
+	int err;
+	bool reset_prevented = false;
 
 	/* As the kbase context is terminating, its debugfs sub-directory would
 	 * have been removed already and so would be the debugfs file created
@@ -1553,8 +1719,17 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
 	 */
 	kbase_csf_event_wait_remove_all(kctx);
 
-	mutex_lock(&kctx->csf.lock);
+	/* Wait for a GPU reset if it is happening, prevent it if not happening */
+	err = kbase_reset_gpu_prevent_and_wait(kbdev);
+	if (err)
+		dev_warn(
+			kbdev->dev,
+			"Unsuccessful GPU reset detected when terminating csf context (%d_%d), attempting to terminate regardless",
+			kctx->tgid, kctx->id);
+	else
+		reset_prevented = true;
 
+	mutex_lock(&kctx->csf.lock);
 	/* Iterate through the queue groups that were not terminated by
 	 * userspace and issue the term request to firmware for them.
 	 */
@@ -1562,19 +1737,39 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
 		if (kctx->csf.queue_groups[i])
 			term_queue_group(kctx->csf.queue_groups[i]);
 	}
-
 	mutex_unlock(&kctx->csf.lock);
 
+	if (reset_prevented)
+		kbase_reset_gpu_allow(kbdev);
+
 	/* Now that all queue groups have been terminated, there can be no
 	 * more OoM or timer event interrupts but there can be inflight work
 	 * items. Destroying the wq will implicitly flush those work items.
 	 */
 	destroy_workqueue(kctx->csf.wq);
 
+	/* Wait for the firmware error work item to also finish as it could
+	 * be affecting this outgoing context also.
+	 */
+	flush_work(&kctx->kbdev->csf.fw_error_work);
+
+	/* A work item to handle page_fault/bus_fault/gpu_fault could be
+	 * pending for the outgoing context. Flush the workqueue that will
+	 * execute that work item.
+	 */
+	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags);
+	if (kctx->as_nr != KBASEP_AS_NR_INVALID)
+		as = &kctx->kbdev->as[kctx->as_nr];
+	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
+	if (as)
+		flush_workqueue(as->pf_wq);
+
 	mutex_lock(&kctx->csf.lock);
 
-	for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++)
+	for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++) {
 		kfree(kctx->csf.queue_groups[i]);
+		kctx->csf.queue_groups[i] = NULL;
+	}
 
 	/* Iterate through the queues that were not terminated by
 	 * userspace and do the required cleanup for them.
@@ -1623,6 +1818,9 @@ int kbase_csf_event_wait_add(struct kbase_context *kctx,
 
 		spin_lock_irqsave(&kctx->csf.event_lock, flags);
 		list_add_tail(&event->link, &kctx->csf.event_callback_list);
+		dev_dbg(kctx->kbdev->dev,
+			"Added event handler %pK with param %pK\n", event,
+			event->param);
 		spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
 
 		err = 0;
@@ -1642,6 +1840,9 @@ void kbase_csf_event_wait_remove(struct kbase_context *kctx,
 	list_for_each_entry(event, &kctx->csf.event_callback_list, link) {
 		if ((event->callback == callback) && (event->param == param)) {
 			list_del(&event->link);
+			dev_dbg(kctx->kbdev->dev,
+				"Removed event handler %pK with param %pK\n",
+				event, event->param);
 			kfree(event);
 			break;
 		}
@@ -1654,19 +1855,22 @@ bool kbase_csf_read_error(struct kbase_context *kctx,
 {
 	bool got_event = true;
 	struct kbase_csf_notification *error_data = NULL;
+	unsigned long flags;
 
-	mutex_lock(&kctx->csf.lock);
+	spin_lock_irqsave(&kctx->csf.event_lock, flags);
 
 	if (likely(!list_empty(&kctx->csf.error_list))) {
 		error_data = list_first_entry(&kctx->csf.error_list,
 			struct kbase_csf_notification, link);
 		list_del_init(&error_data->link);
 		*event_data = error_data->data;
+		dev_dbg(kctx->kbdev->dev, "Dequeued error %pK in context %pK\n",
+			(void *)error_data, (void *)kctx);
 	} else {
 		got_event = false;
 	}
 
-	mutex_unlock(&kctx->csf.lock);
+	spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
 
 	return got_event;
 }
@@ -1674,10 +1878,13 @@ bool kbase_csf_read_error(struct kbase_context *kctx,
 bool kbase_csf_error_pending(struct kbase_context *kctx)
 {
 	bool event_pended = false;
+	unsigned long flags;
 
-	mutex_lock(&kctx->csf.lock);
+	spin_lock_irqsave(&kctx->csf.event_lock, flags);
 	event_pended = !list_empty(&kctx->csf.error_list);
-	mutex_unlock(&kctx->csf.lock);
+	dev_dbg(kctx->kbdev->dev, "%s error is pending in context %pK\n",
+		event_pended ? "An" : "No", (void *)kctx);
+	spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
 
 	return event_pended;
 }
@@ -1687,6 +1894,10 @@ void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu)
 	struct kbase_csf_event *event, *next_event;
 	unsigned long flags;
 
+	dev_dbg(kctx->kbdev->dev,
+		"Signal event (%s GPU notify) for context %pK\n",
+		notify_gpu ? "with" : "without", (void *)kctx);
+
 	/* First increment the signal count and wake up event thread.
 	 */
 	atomic_set(&kctx->event_count, 1);
@@ -1712,9 +1923,12 @@ void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu)
 
 	list_for_each_entry_safe(
 		event, next_event, &kctx->csf.event_callback_list, link) {
-		enum kbase_csf_event_callback_action action =
-			event->callback(event->param);
+		enum kbase_csf_event_callback_action action;
 
+		dev_dbg(kctx->kbdev->dev,
+			"Calling event handler %pK with param %pK\n",
+			(void *)event, event->param);
+		action = event->callback(event->param);
 		if (action == KBASE_CSF_EVENT_CALLBACK_REMOVE) {
 			list_del(&event->link);
 			kfree(event);
@@ -1734,6 +1948,9 @@ void kbase_csf_event_wait_remove_all(struct kbase_context *kctx)
 	list_for_each_entry_safe(
 		event, next_event, &kctx->csf.event_callback_list, link) {
 		list_del(&event->link);
+		dev_dbg(kctx->kbdev->dev,
+			"Removed event handler %pK with param %pK\n",
+			(void *)event, event->param);
 		kfree(event);
 	}
 
@@ -1742,19 +1959,19 @@ void kbase_csf_event_wait_remove_all(struct kbase_context *kctx)
 
 /**
  * handle_oom_event - Handle the OoM event generated by the firmware for the
- *                    command stream interface.
+ *                    CSI.
  *
  * This function will handle the OoM event request from the firmware for the
- * command stream. It will retrieve the address of heap context and heap's
- * statistics (like number of render passes in-flight) from the command
- * stream's kernel output page and pass them to the tiler heap function
- * to allocate a new chunk.
- * It will also update the command stream's kernel input page with the address
+ * CS. It will retrieve the address of heap context and heap's
+ * statistics (like number of render passes in-flight) from the CS's kernel
+ * kernel output page and pass them to the tiler heap function to allocate a
+ * new chunk.
+ * It will also update the CS's kernel input page with the address
  * of a new chunk that was allocated.
  *
  * @kctx: Pointer to the kbase context in which the tiler heap was initialized.
  * @stream: Pointer to the structure containing info provided by the firmware
- *          about the command stream interface.
+ *          about the CSI.
  *
  * Return: 0 if successfully handled the request, otherwise a negative error
  *         code on failure.
@@ -1772,6 +1989,7 @@ static int handle_oom_event(struct kbase_context *const kctx,
 	const u32 frag_end =
 		kbase_csf_firmware_cs_output(stream, CS_HEAP_FRAG_END);
 	u32 renderpasses_in_flight;
+	u32 pending_frag_count;
 	u64 new_chunk_ptr;
 	int err;
 
@@ -1782,9 +2000,10 @@ static int handle_oom_event(struct kbase_context *const kctx,
 	}
 
 	renderpasses_in_flight = vt_start - frag_end;
+	pending_frag_count = vt_end - frag_end;
 
 	err = kbase_csf_tiler_heap_alloc_new_chunk(kctx,
-		gpu_heap_va, renderpasses_in_flight, &new_chunk_ptr);
+		gpu_heap_va, renderpasses_in_flight, pending_frag_count, &new_chunk_ptr);
 
 	/* It is okay to acknowledge with a NULL chunk (firmware will then wait
 	 * for the fragment jobs to complete and release chunks)
@@ -1823,17 +2042,9 @@ static void report_tiler_oom_error(struct kbase_queue_group *group)
 						  .error_type =
 							  BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM,
 					  } } } };
-	struct kbase_context *kctx = group->kctx;
 
-	lockdep_assert_held(&kctx->csf.lock);
-
-	/* Ignore this error if the previous one hasn't been reported */
-	if (!WARN_ON(!list_empty(&group->error_tiler_oom.link))) {
-		group->error_tiler_oom.data = error;
-		list_add_tail(&group->error_tiler_oom.link,
-			      &kctx->csf.error_list);
-		kbase_event_wakeup(kctx);
-	}
+	add_error(group->kctx, &group->error_tiler_oom, &error);
+	kbase_event_wakeup(group->kctx);
 }
 
 /**
@@ -1841,8 +2052,8 @@ static void report_tiler_oom_error(struct kbase_queue_group *group)
  *
  * @queue: Pointer to queue for which out-of-memory event was received.
  *
- * Called with the command-stream front-end locked for the affected GPU
- * virtual address space. Do not call in interrupt context.
+ * Called with the CSF locked for the affected GPU virtual address space.
+ * Do not call in interrupt context.
  *
  * Handles tiler out-of-memory for a GPU command queue and then clears the
  * notification to allow the firmware to report out-of-memory again in future.
@@ -1859,6 +2070,7 @@ static void kbase_queue_oom_event(struct kbase_queue *const queue)
 	int slot_num, err;
 	struct kbase_csf_cmd_stream_group_info const *ginfo;
 	struct kbase_csf_cmd_stream_info const *stream;
+	int csi_index = queue->csi_index;
 	u32 cs_oom_ack, cs_oom_req;
 
 	lockdep_assert_held(&kctx->csf.lock);
@@ -1887,7 +2099,7 @@ static void kbase_queue_oom_event(struct kbase_queue *const queue)
 		goto unlock;
 
 	ginfo = &kbdev->csf.global_iface.groups[slot_num];
-	stream = &ginfo->streams[queue->csi_index];
+	stream = &ginfo->streams[csi_index];
 	cs_oom_ack = kbase_csf_firmware_cs_output(stream, CS_ACK) &
 		     CS_ACK_TILER_OOM_MASK;
 	cs_oom_req = kbase_csf_firmware_cs_input_read(stream, CS_REQ) &
@@ -1918,7 +2130,7 @@ static void kbase_queue_oom_event(struct kbase_queue *const queue)
 		return;
 	}
 
-	kbase_csf_ring_cs_kernel_doorbell(kbdev, queue);
+	kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true);
 unlock:
 	kbase_csf_scheduler_unlock(kbdev);
 }
@@ -1937,6 +2149,14 @@ static void oom_event_worker(struct work_struct *data)
 	struct kbase_queue *queue =
 		container_of(data, struct kbase_queue, oom_event_work);
 	struct kbase_context *kctx = queue->kctx;
+	struct kbase_device *const kbdev = kctx->kbdev;
+
+	int err = kbase_reset_gpu_try_prevent(kbdev);
+	/* Regardless of whether reset failed or is currently happening, exit
+	 * early
+	 */
+	if (err)
+		return;
 
 	mutex_lock(&kctx->csf.lock);
 
@@ -1944,20 +2164,16 @@ static void oom_event_worker(struct work_struct *data)
 	release_queue(queue);
 
 	mutex_unlock(&kctx->csf.lock);
+	kbase_reset_gpu_allow(kbdev);
 }
 
 /**
- * timer_event_worker - Timer event handler called from a workqueue.
+ * report_group_timeout_error - Report the timeout error for the group to userspace.
  *
- * @data: Pointer to a work_struct embedded in GPU command queue group data.
- *
- * Notify the event notification thread of progress timeout fault
- * for the GPU command queue group.
+ * @group: Pointer to the group for which timeout error occurred
  */
-static void timer_event_worker(struct work_struct *data)
+static void report_group_timeout_error(struct kbase_queue_group *const group)
 {
-	struct kbase_queue_group *const group =
-		container_of(data, struct kbase_queue_group, timer_event_work);
 	struct base_csf_notification const
 		error = { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
 			  .payload = {
@@ -1967,19 +2183,59 @@ static void timer_event_worker(struct work_struct *data)
 						  .error_type =
 							  BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT,
 					  } } } };
+
+	dev_warn(group->kctx->kbdev->dev,
+		 "Notify the event notification thread, forward progress timeout (%llu cycles)\n",
+		 kbase_csf_timeout_get(group->kctx->kbdev));
+
+	add_error(group->kctx, &group->error_timeout, &error);
+	kbase_event_wakeup(group->kctx);
+}
+
+/**
+ * timer_event_worker - Handle the progress timeout error for the group
+ *
+ * @data: Pointer to a work_struct embedded in GPU command queue group data.
+ *
+ * Terminate the CSG and report the error to userspace
+ */
+static void timer_event_worker(struct work_struct *data)
+{
+	struct kbase_queue_group *const group =
+		container_of(data, struct kbase_queue_group, timer_event_work);
 	struct kbase_context *const kctx = group->kctx;
+	bool reset_prevented = false;
+	int err = kbase_reset_gpu_prevent_and_wait(kctx->kbdev);
+
+	if (err)
+		dev_warn(
+			kctx->kbdev->dev,
+			"Unsuccessful GPU reset detected when terminating group %d on progress timeout, attempting to terminate regardless",
+			group->handle);
+	else
+		reset_prevented = true;
 
 	mutex_lock(&kctx->csf.lock);
 
-	/* Ignore this error if the previous one hasn't been reported */
-	if (!WARN_ON(!list_empty(&group->error_timeout.link))) {
-		group->error_timeout.data = error;
-		list_add_tail(&group->error_timeout.link,
-			      &kctx->csf.error_list);
-		kbase_event_wakeup(kctx);
-	}
+	term_queue_group(group);
+	report_group_timeout_error(group);
 
 	mutex_unlock(&kctx->csf.lock);
+	if (reset_prevented)
+		kbase_reset_gpu_allow(kctx->kbdev);
+}
+
+/**
+ * handle_progress_timer_event - Progress timer timeout event handler.
+ *
+ * @group: Pointer to GPU queue group for which the timeout event is received.
+ *
+ * Enqueue a work item to terminate the group and notify the event notification
+ * thread of progress timeout fault for the GPU command queue group.
+ */
+static void handle_progress_timer_event(struct kbase_queue_group *const group)
+{
+	queue_work(group->kctx->csf.wq, &group->timer_event_work);
 }
 
 /**
@@ -1998,18 +2254,43 @@ static void protm_event_worker(struct work_struct *data)
 	kbase_csf_scheduler_group_protm_enter(group);
 }
 
+static void report_queue_fatal_error(struct kbase_queue *const queue,
+				     u32 cs_fatal, u64 cs_fatal_info,
+				     u8 group_handle)
+{
+	struct base_csf_notification error =
+		{ .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
+		  .payload = {
+			  .csg_error = {
+				  .handle = group_handle,
+				  .error = {
+					  .error_type =
+						  BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL,
+					  .payload = {
+						  .fatal_queue = {
+							  .sideband =
+								  cs_fatal_info,
+							  .status = cs_fatal,
+							  .csi_index =
+								  queue->csi_index,
+						  } } } } } };
+
+	add_error(queue->kctx, &queue->error, &error);
+	kbase_event_wakeup(queue->kctx);
+}
+
 /**
  * handle_fault_event - Handler for CS fault.
  *
  * @queue:  Pointer to queue for which fault event was received.
  * @stream: Pointer to the structure containing info provided by the
- *          firmware about the command stream interface.
+ *          firmware about the CSI.
  *
  * Prints meaningful CS fault information.
  *
- * Return: 0 on success, otherwise a negative system code.
  */
-static int handle_fault_event(struct kbase_queue const *const queue,
+static void
+handle_fault_event(struct kbase_queue *const queue,
 		   struct kbase_csf_cmd_stream_info const *const stream)
 {
 	const u32 cs_fault = kbase_csf_firmware_cs_output(stream, CS_FAULT);
@@ -2025,6 +2306,8 @@ static int handle_fault_event(struct kbase_queue const *const queue,
 		CS_FAULT_INFO_EXCEPTION_DATA_GET(cs_fault_info);
 	struct kbase_device *const kbdev = queue->kctx->kbdev;
 
+	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
 	dev_warn(kbdev->dev, "CSI: %d\n"
 			"CS_FAULT.EXCEPTION_TYPE: 0x%x (%s)\n"
 			"CS_FAULT.EXCEPTION_DATA: 0x%x\n"
@@ -2033,52 +2316,55 @@ static int handle_fault_event(struct kbase_queue const *const queue,
 			kbase_gpu_exception_name(cs_fault_exception_type),
 			cs_fault_exception_data, cs_fault_info_exception_data);
 
-	return -EFAULT;
+	if (cs_fault_exception_type ==
+	    CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT)
+		report_queue_fatal_error(queue, GPU_EXCEPTION_TYPE_SW_FAULT_2,
+					 0, queue->group->handle);
 }
 
 /**
- * report_queue_fatal_error - Report queue fatal error to user space
+ * fatal_event_worker - Handle the fatal error for the GPU queue
  *
- * @queue:         Pointer to queue for which fatal event was received.
- * @cs_fatal:      Fault information
- * @cs_fatal_info: Additional fault information
+ * @data: Pointer to a work_struct embedded in GPU command queue.
  *
- * If a queue has already been in fatal error status,
- * subsequent fatal error on the queue should never take place.
+ * Terminate the CSG and report the error to userspace.
  */
-static void report_queue_fatal_error(struct kbase_queue *const queue,
-		u32 cs_fatal, u64 cs_fatal_info)
+static void fatal_event_worker(struct work_struct *const data)
 {
-	struct base_csf_notification error = {
-		.type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
-		.payload = {
-			.csg_error = {
-				.handle = queue->group->handle,
-				.error = {
-					.error_type =
-					BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL,
-					.payload = {
-						.fatal_queue = {
-						.sideband = cs_fatal_info,
-						.status = cs_fatal,
-						.csi_index = queue->csi_index,
-						}
-					}
-				}
-			}
-		}
-	};
+	struct kbase_queue *const queue =
+		container_of(data, struct kbase_queue, fatal_event_work);
+	struct kbase_context *const kctx = queue->kctx;
+	struct kbase_device *const kbdev = kctx->kbdev;
+	struct kbase_queue_group *group;
+	u8 group_handle;
+	bool reset_prevented = false;
+	int err = kbase_reset_gpu_prevent_and_wait(kbdev);
 
-	lockdep_assert_held(&queue->kctx->csf.lock);
+	if (err)
+		dev_warn(
+			kbdev->dev,
+			"Unsuccessful GPU reset detected when terminating group to handle fatal event, attempting to terminate regardless");
+	else
+		reset_prevented = true;
 
-	/* If a queue has already been in fatal error status,
-	 * subsequent fatal error on the queue should never take place.
-	 */
-	if (!WARN_ON(!list_empty(&queue->error.link))) {
-		queue->error.data = error;
-		list_add_tail(&queue->error.link, &queue->kctx->csf.error_list);
-		kbase_event_wakeup(queue->kctx);
+	mutex_lock(&kctx->csf.lock);
+
+	group = get_bound_queue_group(queue);
+	if (!group) {
+		dev_warn(kbdev->dev, "queue not bound when handling fatal event");
+		goto unlock;
 	}
+
+	group_handle = group->handle;
+	term_queue_group(group);
+	report_queue_fatal_error(queue, queue->cs_fatal, queue->cs_fatal_info,
+				 group_handle);
+
+unlock:
+	release_queue(queue);
+	mutex_unlock(&kctx->csf.lock);
+	if (reset_prevented)
+		kbase_reset_gpu_allow(kbdev);
 }
 
 /**
@@ -2086,17 +2372,15 @@ static void report_queue_fatal_error(struct kbase_queue *const queue,
  *
  * @queue:    Pointer to queue for which fatal event was received.
  * @stream:   Pointer to the structure containing info provided by the
- *            firmware about the command stream interface.
- * @fw_error: Return true if internal firmware fatal is handled
+ *            firmware about the CSI.
  *
  * Prints meaningful CS fatal information.
- * Report queue fatal error to user space.
- *
- * Return: 0 on success otherwise a negative system error.
+ * Enqueue a work item to terminate the group and report the fatal error
+ * to user space.
  */
-static int handle_fatal_event(struct kbase_queue *const queue,
-	struct kbase_csf_cmd_stream_info const *const stream,
-	bool *fw_error)
+static void
+handle_fatal_event(struct kbase_queue *const queue,
+		   struct kbase_csf_cmd_stream_info const *const stream)
 {
 	const u32 cs_fatal = kbase_csf_firmware_cs_output(stream, CS_FATAL);
 	const u64 cs_fatal_info =
@@ -2111,7 +2395,7 @@ static int handle_fatal_event(struct kbase_queue *const queue,
 		CS_FATAL_INFO_EXCEPTION_DATA_GET(cs_fatal_info);
 	struct kbase_device *const kbdev = queue->kctx->kbdev;
 
-	lockdep_assert_held(&queue->kctx->csf.lock);
+	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
 
 	dev_warn(kbdev->dev,
 		 "CSG: %d, CSI: %d\n"
@@ -2124,156 +2408,67 @@ static int handle_fatal_event(struct kbase_queue *const queue,
 		 cs_fatal_exception_data, cs_fatal_info_exception_data);
 
 	if (cs_fatal_exception_type ==
-			CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR)
-		*fw_error = true;
-	else
-		report_queue_fatal_error(queue, cs_fatal, cs_fatal_info);
-
-	return -EFAULT;
-}
-
-/**
- * handle_internal_firmware_fatal - Handler for CS internal firmware fault.
- *
- * @kbdev:  Pointer to kbase device
- *
- * Report group fatal error to user space for all GPU command queue groups
- * in the device, terminate them and reset GPU.
- */
-static void handle_internal_firmware_fatal(struct kbase_device *const kbdev)
-{
-	int as;
-
-	for (as = 0; as < kbdev->nr_hw_address_spaces; as++) {
-		struct kbase_context *kctx;
-		struct kbase_fault fault = {
-			.status = GPU_EXCEPTION_TYPE_SW_FAULT_1,
-		};
-
-		if (as == MCU_AS_NR)
-			continue;
-
-		kctx = kbase_ctx_sched_as_to_ctx_refcount(kbdev, as);
-		if (!kctx)
-			continue;
-
-		kbase_csf_ctx_handle_fault(kctx, &fault);
-		kbase_ctx_sched_release_ctx_lock(kctx);
+			CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR) {
+		queue_work(system_wq, &kbdev->csf.fw_error_work);
+	} else {
+		get_queue(queue);
+		queue->cs_fatal = cs_fatal;
+		queue->cs_fatal_info = cs_fatal_info;
+		if (!queue_work(queue->kctx->csf.wq, &queue->fatal_event_work))
+			release_queue(queue);
 	}
-
-	if (kbase_prepare_to_reset_gpu(kbdev))
-		kbase_reset_gpu(kbdev);
 }
 
 /**
- * fault_event_worker - Worker function for CS fault/fatal.
+ * handle_queue_exception_event - Handler for CS fatal/fault exception events.
  *
- * @data: Pointer to a work_struct embedded in GPU command queue data.
- *
- * Handle the fault and fatal exception for a GPU command queue and then
- * releases a reference that was added to prevent the queue being destroyed
- * while this work item was pending on a workqueue.
- * 
- * Report the fault and fatal exception for a GPU command queue and then
- * clears the corresponding notification fields to allow the firmware to
- * report other faults in future.
- * 
- * It may also terminate the GPU command queue group(s) and reset GPU
- * in case internal firmware CS fatal exception occurred.
+ * @queue:  Pointer to queue for which fatal/fault event was received.
+ * @cs_req: Value of the CS_REQ register from the CS's input page.
+ * @cs_ack: Value of the CS_ACK register from the CS's output page.
  */
-static void fault_event_worker(struct work_struct *const data)
+static void handle_queue_exception_event(struct kbase_queue *const queue,
+					 const u32 cs_req, const u32 cs_ack)
 {
-	struct kbase_queue *const queue =
-		container_of(data, struct kbase_queue, fault_event_work);
-
-	struct kbase_context *const kctx = queue->kctx;
-	struct kbase_device *const kbdev = kctx->kbdev;
-	struct kbase_queue_group *group;
-	int slot_num;
 	struct kbase_csf_cmd_stream_group_info const *ginfo;
 	struct kbase_csf_cmd_stream_info const *stream;
-	u32 cs_ack, cs_req;
-	int err = 0;
-	bool internal_fw_error = false;
+	struct kbase_context *const kctx = queue->kctx;
+	struct kbase_device *const kbdev = kctx->kbdev;
+	struct kbase_queue_group *group = queue->group;
+	int csi_index = queue->csi_index;
+	int slot_num = group->csg_nr;
 
-	mutex_lock(&kctx->csf.lock);
-	kbase_csf_scheduler_lock(kbdev);
-
-	group = get_bound_queue_group(queue);
-	if (!group) {
-		dev_warn(kbdev->dev, "queue not bound\n");
-		goto unlock;
-	}
-
-	slot_num = kbase_csf_scheduler_group_get_slot(group);
-
-	/* The group could have gone off slot before this work item got
-	 * a chance to execute.
-	 */
-	if (slot_num < 0) {
-		dev_warn(kbdev->dev, "invalid slot_num\n");
-		goto unlock;
-	}
-
-	/* If the bound group is on slot yet the kctx is marked with disabled
-	 * on address-space fault, the group is pending to be killed. So skip
-	 * the inflight queue exception event operation.
-	 */
-	if (kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) {
-		dev_warn(kbdev->dev, "kctx is already disabled on fault\n");
-		goto unlock;
-	}
+	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
 
 	ginfo = &kbdev->csf.global_iface.groups[slot_num];
-	stream = &ginfo->streams[queue->csi_index];
-	cs_ack = kbase_csf_firmware_cs_output(stream, CS_ACK);
-	cs_req = kbase_csf_firmware_cs_input_read(stream, CS_REQ);
+	stream = &ginfo->streams[csi_index];
 
 	if ((cs_ack & CS_ACK_FATAL_MASK) != (cs_req & CS_REQ_FATAL_MASK)) {
-		err = handle_fatal_event(queue, stream, &internal_fw_error);
+		handle_fatal_event(queue, stream);
 		kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
 						 CS_REQ_FATAL_MASK);
 	}
 
 	if ((cs_ack & CS_ACK_FAULT_MASK) != (cs_req & CS_REQ_FAULT_MASK)) {
-		err |= handle_fault_event(queue, stream);
+		handle_fault_event(queue, stream);
 		kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
 						 CS_REQ_FAULT_MASK);
-		kbase_csf_ring_cs_kernel_doorbell(kbdev, queue);
+		kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true);
 	}
-
-	if (err) {
-		/* From 10.x.5, CS_REQ_ERROR_MODE is removed but TI2 bitfile
-		 * upload not finished. Need to remove on GPUCORE-23972
-		 */
-		kbase_csf_firmware_cs_input_mask(stream, CS_REQ, ~cs_ack,
-						CS_REQ_ERROR_MODE_MASK);
-		dev_dbg(kbdev->dev, "Slot-%d CSI-%d entering error mode\n",
-			slot_num, queue->csi_index);
-	}
-
-unlock:
-	release_queue(queue);
-	kbase_csf_scheduler_unlock(kbdev);
-	mutex_unlock(&kctx->csf.lock);
-
-	if (internal_fw_error)
-		handle_internal_firmware_fatal(kbdev);
 }
 
 /**
- * process_cs_interrupts - Process interrupts for a command stream.
+ * process_cs_interrupts - Process interrupts for a CS.
  *
  * @group:  Pointer to GPU command queue group data.
- * @ginfo:  The command stream group interface provided by the firmware.
- * @irqreq: CSG's IRQ request bitmask (one bit per stream).
- * @irqack: CSG's IRQ acknowledge bitmask (one bit per stream).
+ * @ginfo:  The CSG interface provided by the firmware.
+ * @irqreq: CSG's IRQ request bitmask (one bit per CS).
+ * @irqack: CSG's IRQ acknowledge bitmask (one bit per CS).
  *
  * If the interrupt request bitmask differs from the acknowledge bitmask
  * then the firmware is notifying the host of an event concerning those
- * streams indicated by bits whose value differs. The actions required
+ * CSs indicated by bits whose value differs. The actions required
  * are then determined by examining which notification flags differ between
- * the request and acknowledge registers for the individual stream(s).
+ * the request and acknowledge registers for the individual CS(s).
  */
 static void process_cs_interrupts(struct kbase_queue_group *const group,
 		      struct kbase_csf_cmd_stream_group_info const *const ginfo,
@@ -2282,6 +2477,8 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
 	struct kbase_device *const kbdev = group->kctx->kbdev;
 	u32 remaining = irqreq ^ irqack;
 	bool protm_pend = false;
+	const bool group_suspending =
+		!kbase_csf_scheduler_group_events_enabled(kbdev, group);
 
 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
 
@@ -2289,6 +2486,8 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
 		int const i = ffs(remaining) - 1;
 		struct kbase_queue *const queue = group->bound_queues[i];
 
+		remaining &= ~(1 << i);
+
 		/* The queue pointer can be NULL, but if it isn't NULL then it
 		 * cannot disappear since scheduler spinlock is held and before
 		 * freeing a bound queue it has to be first unbound which
@@ -2305,12 +2504,18 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
 
 			if ((cs_req & CS_REQ_EXCEPTION_MASK) ^
 			    (cs_ack & CS_ACK_EXCEPTION_MASK)) {
-				get_queue(queue);
 				KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_FAULT_INTERRUPT, group, queue, cs_req ^ cs_ack);
-				if (!queue_work(wq, &queue->fault_event_work))
-					release_queue(queue);
+				handle_queue_exception_event(queue, cs_req,
+							     cs_ack);
 			}
 
+			/* PROTM_PEND and TILER_OOM can be safely ignored
+			 * because they will be raised again if the group
+			 * is assigned a CSG slot in future.
+			 */
+			if (group_suspending)
+				continue;
+
 			if (((cs_req & CS_REQ_TILER_OOM_MASK) ^
 			     (cs_ack & CS_ACK_TILER_OOM_MASK))) {
 				get_queue(queue);
@@ -2337,8 +2542,6 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
 				protm_pend = true;
 			}
 		}
-
-		remaining &= ~(1 << i);
 	}
 
 	if (protm_pend)
@@ -2346,13 +2549,12 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
 }
 
 /**
- * process_csg_interrupts - Process interrupts for a command stream group.
+ * process_csg_interrupts - Process interrupts for a CSG.
  *
- * @kbdev: Instance of a GPU platform device that implements a command stream
- *         front-end interface.
- * @csg_nr: Command stream group number.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @csg_nr: CSG number.
  *
- * Handles interrupts for a command stream group and for streams within it.
+ * Handles interrupts for a CSG and for CSs within it.
  *
  * If the CSG's request register value differs from its acknowledge register
  * then the firmware is notifying the host of an event concerning the whole
@@ -2407,45 +2609,131 @@ static void process_csg_interrupts(struct kbase_device *const kbdev,
 	if (WARN_ON(kbase_csf_scheduler_group_get_slot_locked(group) != csg_nr))
 		return;
 
-	if ((req ^ ack) & CSG_REQ_SYNC_UPDATE) {
+	if ((req ^ ack) & CSG_REQ_SYNC_UPDATE_MASK) {
 		kbase_csf_firmware_csg_input_mask(ginfo,
-			CSG_REQ, ack, CSG_REQ_SYNC_UPDATE);
+			CSG_REQ, ack, CSG_REQ_SYNC_UPDATE_MASK);
 
 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SYNC_UPDATE_INTERRUPT, group, req ^ ack);
 		kbase_csf_event_signal_cpu_only(group->kctx);
 	}
 
-	/* IDLE and TILER_OOM can be safely ignored because they will be
-	 * raised again if the group is assigned a CSG slot in future.
-	 * TILER_OOM and PROGRESS_TIMER_EVENT may terminate the group.
-	 */
-	if (!kbase_csf_scheduler_group_events_enabled(kbdev, group))
-		return;
-
 	if ((req ^ ack) & CSG_REQ_IDLE_MASK) {
+		struct kbase_csf_scheduler *scheduler =	&kbdev->csf.scheduler;
+
 		kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack,
 			CSG_REQ_IDLE_MASK);
 
-		set_bit(csg_nr, kbdev->csf.scheduler.csg_slots_idle_mask);
+		set_bit(csg_nr, scheduler->csg_slots_idle_mask);
 
 		KBASE_KTRACE_ADD_CSF_GRP(kbdev,  CSG_IDLE_INTERRUPT, group, req ^ ack);
 		dev_dbg(kbdev->dev, "Idle notification received for Group %u on slot %d\n",
 			 group->handle, csg_nr);
+
+		/* Check if the scheduling tick can be advanced */
+		if (kbase_csf_scheduler_all_csgs_idle(kbdev) &&
+		    !scheduler->gpu_idle_fw_timer_enabled) {
+			kbase_csf_scheduler_advance_tick_nolock(kbdev);
+		}
 	}
 
 	if ((req ^ ack) & CSG_REQ_PROGRESS_TIMER_EVENT_MASK) {
 		kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack,
 			CSG_REQ_PROGRESS_TIMER_EVENT_MASK);
 
-		dev_dbg(kbdev->dev, "Timeout notification received for Group %u on slot %d\n",
-			group->handle, csg_nr);
+		dev_info(kbdev->dev,
+			"Timeout notification received for group %u of ctx %d_%d on slot %d\n",
+			group->handle, group->kctx->tgid, group->kctx->id, csg_nr);
 
-		queue_work(group->kctx->csf.wq, &group->timer_event_work);
+		handle_progress_timer_event(group);
 	}
 
 	process_cs_interrupts(group, ginfo, irqreq, irqack);
 }
 
+/**
+ * process_prfcnt_interrupts - Process performance counter interrupts.
+ *
+ * @kbdev:   Instance of a GPU platform device that implements a CSF interface.
+ * @glb_req: Global request register value.
+ * @glb_ack: Global acknowledge register value.
+ *
+ * Handles interrupts issued by the firmware that relate to the performance
+ * counters. For example, on completion of a performance counter sample. It is
+ * expected that the scheduler spinlock is already held on calling this
+ * function.
+ */
+static void process_prfcnt_interrupts(struct kbase_device *kbdev, u32 glb_req,
+				      u32 glb_ack)
+{
+	const struct kbase_csf_global_iface *const global_iface =
+		&kbdev->csf.global_iface;
+
+	lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock);
+
+	/* Process PRFCNT_SAMPLE interrupt. */
+	if (kbdev->csf.hwcnt.request_pending &&
+	    ((glb_req & GLB_REQ_PRFCNT_SAMPLE_MASK) ==
+	     (glb_ack & GLB_REQ_PRFCNT_SAMPLE_MASK))) {
+		kbdev->csf.hwcnt.request_pending = false;
+
+		dev_dbg(kbdev->dev, "PRFCNT_SAMPLE done interrupt received.");
+
+		kbase_hwcnt_backend_csf_on_prfcnt_sample(
+			&kbdev->hwcnt_gpu_iface);
+	}
+
+	/* Process PRFCNT_ENABLE interrupt. */
+	if (kbdev->csf.hwcnt.enable_pending &&
+	    ((glb_req & GLB_REQ_PRFCNT_ENABLE_MASK) ==
+	     (glb_ack & GLB_REQ_PRFCNT_ENABLE_MASK))) {
+		kbdev->csf.hwcnt.enable_pending = false;
+
+		dev_dbg(kbdev->dev,
+			"PRFCNT_ENABLE status changed interrupt received.");
+
+		if (glb_ack & GLB_REQ_PRFCNT_ENABLE_MASK)
+			kbase_hwcnt_backend_csf_on_prfcnt_enable(
+				&kbdev->hwcnt_gpu_iface);
+		else
+			kbase_hwcnt_backend_csf_on_prfcnt_disable(
+				&kbdev->hwcnt_gpu_iface);
+	}
+
+	/* Process PRFCNT_THRESHOLD interrupt. */
+	if ((glb_req ^ glb_ack) & GLB_REQ_PRFCNT_THRESHOLD_MASK) {
+		dev_dbg(kbdev->dev, "PRFCNT_THRESHOLD interrupt received.");
+
+		kbase_hwcnt_backend_csf_on_prfcnt_threshold(
+			&kbdev->hwcnt_gpu_iface);
+
+		/* Set the GLB_REQ.PRFCNT_THRESHOLD flag back to
+		 * the same value as GLB_ACK.PRFCNT_THRESHOLD
+		 * flag in order to enable reporting of another
+		 * PRFCNT_THRESHOLD event.
+		 */
+		kbase_csf_firmware_global_input_mask(
+			global_iface, GLB_REQ, glb_ack,
+			GLB_REQ_PRFCNT_THRESHOLD_MASK);
+	}
+
+	/* Process PRFCNT_OVERFLOW interrupt. */
+	if ((glb_req ^ glb_ack) & GLB_REQ_PRFCNT_OVERFLOW_MASK) {
+		dev_dbg(kbdev->dev, "PRFCNT_OVERFLOW interrupt received.");
+
+		kbase_hwcnt_backend_csf_on_prfcnt_overflow(
+			&kbdev->hwcnt_gpu_iface);
+
+		/* Set the GLB_REQ.PRFCNT_OVERFLOW flag back to
+		 * the same value as GLB_ACK.PRFCNT_OVERFLOW
+		 * flag in order to enable reporting of another
+		 * PRFCNT_OVERFLOW event.
+		 */
+		kbase_csf_firmware_global_input_mask(
+			global_iface, GLB_REQ, glb_ack,
+			GLB_REQ_PRFCNT_OVERFLOW_MASK);
+	}
+}
+
 void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
 {
 	unsigned long flags;
@@ -2465,10 +2753,7 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
 
 		if (!kbdev->csf.firmware_reloaded)
 			kbase_csf_firmware_reload_completed(kbdev);
-		else if (kbdev->csf.glb_init_request_pending)
-			kbase_pm_update_state(kbdev);
-
-		if (global_iface->output) {
+		else if (global_iface->output) {
 			u32 glb_req, glb_ack;
 
 			kbase_csf_scheduler_spin_lock(kbdev, &flags);
@@ -2485,8 +2770,41 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
 				WARN_ON(!kbase_csf_scheduler_protected_mode_in_use(kbdev));
 				scheduler->active_protm_grp = NULL;
 				KBASE_KTRACE_ADD(kbdev, SCHEDULER_EXIT_PROTM, NULL, 0u);
+				kbdev->protected_mode = false;
+				kbase_ipa_control_protm_exited(kbdev);
+				kbase_hwcnt_backend_csf_protm_exited(
+					&kbdev->hwcnt_gpu_iface);
 			}
+
+			/* Handle IDLE Hysteresis notification event */
+			if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) {
+				dev_dbg(kbdev->dev, "Idle-hysteresis event flagged");
+				kbase_csf_firmware_global_input_mask(
+						global_iface, GLB_REQ, glb_ack,
+						GLB_REQ_IDLE_EVENT_MASK);
+
+				if (!atomic_read(&scheduler->non_idle_offslot_grps)) {
+					if (kbase_pm_idle_groups_sched_suspendable(kbdev))
+						queue_work(system_highpri_wq,
+							   &scheduler->gpu_idle_work);
+				} else {
+					/* Advance the scheduling tick to get
+					 * the non-idle suspended groups loaded
+					 * soon.
+					 */
+					kbase_csf_scheduler_advance_tick_nolock(
+						kbdev);
+				}
+			}
+
+			process_prfcnt_interrupts(kbdev, glb_req, glb_ack);
+
 			kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+			/* Invoke the MCU state machine as a state transition
+			 * might have completed.
+			 */
+			kbase_pm_update_state(kbdev);
 		}
 
 		if (!remaining) {
@@ -2545,3 +2863,62 @@ int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev)
 
 	return 0;
 }
+
+void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev)
+{
+	if (as_phys_addr_t(kbdev->csf.dummy_user_reg_page)) {
+		struct page *page = as_page(kbdev->csf.dummy_user_reg_page);
+
+		kbase_mem_pool_free(
+			&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page,
+			false);
+	}
+}
+
+int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev)
+{
+	struct tagged_addr phys;
+	struct page *page;
+	u32 *addr;
+	int ret;
+
+	kbdev->csf.dummy_user_reg_page = as_tagged(0);
+
+	ret = kbase_mem_pool_alloc_pages(
+		&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
+		false);
+
+	if (ret <= 0)
+		return ret;
+
+	page = as_page(phys);
+	addr = kmap_atomic(page);
+
+	/* Write a special value for the latest flush register inside the
+	 * dummy page
+	 */
+	addr[LATEST_FLUSH / sizeof(u32)] = POWER_DOWN_LATEST_FLUSH_VALUE;
+
+	kbase_sync_single_for_device(kbdev, kbase_dma_addr(page), sizeof(u32),
+				     DMA_BIDIRECTIONAL);
+	kunmap_atomic(addr);
+
+	kbdev->csf.dummy_user_reg_page = phys;
+
+	return 0;
+}
+
+u8 kbase_csf_priority_check(struct kbase_device *kbdev, u8 req_priority)
+{
+	struct priority_control_manager_device *pcm_device = kbdev->pcm_dev;
+	u8 out_priority = req_priority;
+
+	if (pcm_device) {
+		req_priority = kbase_csf_priority_queue_group_priority_to_relative(req_priority);
+		out_priority = pcm_device->ops.pcm_scheduler_priority_check(pcm_device, current, req_priority);
+		out_priority = kbase_csf_priority_relative_to_queue_group_priority(out_priority);
+	}
+
+	return out_priority;
+}
+
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h
index c183d0a32302..effd4686a444 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_CSF_H_
@@ -28,11 +27,11 @@
 #include "mali_kbase_csf_firmware.h"
 #include "mali_kbase_csf_protected_memory.h"
 
-/* Indicate invalid command stream h/w interface
+/* Indicate invalid CS h/w interface
  */
 #define KBASEP_IF_NR_INVALID ((s8)-1)
 
-/* Indicate invalid command stream group number for a GPU command queue group
+/* Indicate invalid CSG number for a GPU command queue group
  */
 #define KBASEP_CSG_NR_INVALID ((s8)-1)
 
@@ -40,13 +39,10 @@
  */
 #define KBASEP_USER_DB_NR_INVALID ((s8)-1)
 
-/* Waiting timeout for global request completion acknowledgment */
-#define GLB_REQ_WAIT_TIMEOUT_MS (300) /* 300 milliseconds */
-
-#define CSG_REQ_EP_CFG (0x1 << CSG_REQ_EP_CFG_SHIFT)
-#define CSG_REQ_SYNC_UPDATE (0x1 << CSG_REQ_SYNC_UPDATE_SHIFT)
 #define FIRMWARE_PING_INTERVAL_MS (2000) /* 2 seconds */
 
+#define FIRMWARE_IDLE_HYSTERESIS_TIME_MS (10) /* Default 10 milliseconds */
+
 /**
  * enum kbase_csf_event_callback_action - return type for CSF event callbacks.
  *
@@ -124,9 +120,9 @@ void kbase_csf_event_wait_remove(struct kbase_context *kctx,
 void kbase_csf_event_wait_remove_all(struct kbase_context *kctx);
 
 /**
- * kbase_csf_read_error - Read command stream fatal error
+ * kbase_csf_read_error - Read CS fatal error
  *
- * This function takes the command stream fatal error from context's ordered
+ * This function takes the CS fatal error from context's ordered
  * error_list, copies its contents to @event_data.
  *
  * @kctx:       The kbase context to read fatal error from
@@ -150,8 +146,8 @@ bool kbase_csf_error_pending(struct kbase_context *kctx);
  * kbase_csf_event_signal - Signal a CSF event
  *
  * This function triggers all the CSF event callbacks that are registered to
- * a given Kbase context, and also signals the thread of userspace driver
- * (front-end), waiting for the CSF event.
+ * a given Kbase context, and also signals the event handling thread of
+ * userspace driver waiting for the CSF event.
  *
  * @kctx:  The kbase context whose CSF event callbacks shall be triggered.
  * @notify_gpu: Flag to indicate if CSF firmware should be notified of the
@@ -171,8 +167,7 @@ static inline void kbase_csf_event_signal_cpu_only(struct kbase_context *kctx)
 }
 
 /**
- * kbase_csf_ctx_init - Initialize the command-stream front-end for a GPU
- *                      address space.
+ * kbase_csf_ctx_init - Initialize the CSF interface for a GPU address space.
  *
  * @kctx:	Pointer to the kbase context which is being initialized.
  *
@@ -194,8 +189,7 @@ void kbase_csf_ctx_handle_fault(struct kbase_context *kctx,
 		struct kbase_fault *fault);
 
 /**
- * kbase_csf_ctx_term - Terminate the command-stream front-end for a GPU
- *                      address space.
+ * kbase_csf_ctx_term - Terminate the CSF interface for a GPU address space.
  *
  * This function terminates any remaining CSGs and CSs which weren't destroyed
  * before context termination.
@@ -268,6 +262,16 @@ int kbase_csf_queue_bind(struct kbase_context *kctx,
  */
 void kbase_csf_queue_unbind(struct kbase_queue *queue);
 
+/**
+ * kbase_csf_queue_unbind_stopped - Unbind a GPU command queue in the case
+ *                                  where it was never started.
+ * @queue:      Pointer to queue to be unbound.
+ *
+ * Variant of kbase_csf_queue_unbind() for use on error paths for cleaning up
+ * queues that failed to fully bind.
+ */
+void kbase_csf_queue_unbind_stopped(struct kbase_queue *queue);
+
 /**
  * kbase_csf_queue_kick - Schedule a GPU command queue on the firmware
  *
@@ -280,7 +284,9 @@ void kbase_csf_queue_unbind(struct kbase_queue *queue);
 int kbase_csf_queue_kick(struct kbase_context *kctx,
 			 struct kbase_ioctl_cs_queue_kick *kick);
 
-/** Find if given the queue group handle is valid.
+/**
+ * kbase_csf_queue_group_handle_is_valid - Find if the given queue group handle
+ *                                         is valid.
  *
  * This function is used to determine if the queue group handle is valid.
  *
@@ -340,7 +346,6 @@ void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group);
  *			suspended.
  * @sus_buf:		Pointer to the structure which contains details of the
  *			user buffer and its kernel pinned pages.
- * @size:		The size in bytes for the user provided buffer.
  * @group_handle:	Handle for the group which uniquely identifies it within
  *			the context within which it was created.
  *
@@ -350,6 +355,16 @@ void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group);
 int kbase_csf_queue_group_suspend(struct kbase_context *kctx,
 	struct kbase_suspend_copy_buffer *sus_buf, u8 group_handle);
 
+/**
+ * kbase_csf_add_group_fatal_error - Report a fatal group error to userspace
+ *
+ * @group:       GPU command queue group.
+ * @err_payload: Error payload to report.
+ */
+void kbase_csf_add_group_fatal_error(
+	struct kbase_queue_group *const group,
+	struct base_gpu_queue_group_error const *const err_payload);
+
 /**
  * kbase_csf_interrupt - Handle interrupts issued by CSF firmware.
  *
@@ -359,55 +374,96 @@ int kbase_csf_queue_group_suspend(struct kbase_context *kctx,
 void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val);
 
 /**
- * kbase_csf_doorbell_mapping_init - Initialize the bitmap of Hw doorbell pages
- *                           used to track their availability.
+ * kbase_csf_doorbell_mapping_init - Initialize the fields that facilitates
+ *                                   the update of userspace mapping of HW
+ *                                   doorbell page.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * The function creates a file and allocates a dummy page to facilitate the
+ * update of userspace mapping to point to the dummy page instead of the real
+ * HW doorbell page after the suspend of queue group.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * Return: 0 on success, or negative on failure.
  */
 int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev);
 
+/**
+ * kbase_csf_doorbell_mapping_term - Free the dummy page & close the file used
+ *                         to update the userspace mapping of HW doorbell page
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ */
 void kbase_csf_doorbell_mapping_term(struct kbase_device *kbdev);
 
 /**
- * kbase_csf_ring_csg_doorbell - ring the doorbell for a command stream group
- *                               interface.
+ * kbase_csf_setup_dummy_user_reg_page - Setup the dummy page that is accessed
+ *                                       instead of the User register page after
+ *                                       the GPU power down.
  *
- * The function kicks a notification on the command stream group interface to
- * firmware.
+ * The function allocates a dummy page which is used to replace the User
+ * register page in the userspace mapping after the power down of GPU.
+ * On the power up of GPU, the mapping is updated to point to the real
+ * User register page. The mapping is used to allow access to LATEST_FLUSH
+ * register from userspace.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
- * @slot: Index of command stream group interface for ringing the door-bell.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * Return: 0 on success, or negative on failure.
+ */
+int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_free_dummy_user_reg_page - Free the dummy page that was used
+ *                                 used to replace the User register page
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ */
+void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_ring_csg_doorbell - ring the doorbell for a CSG interface.
+ *
+ * The function kicks a notification on the CSG interface to firmware.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @slot: Index of CSG interface for ringing the door-bell.
  */
 void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot);
 
 /**
- * kbase_csf_ring_csg_slots_doorbell - ring the doorbell for a set of command
- *                                     stream group interfaces.
+ * kbase_csf_ring_csg_slots_doorbell - ring the doorbell for a set of CSG
+ *                                     interfaces.
  *
- * The function kicks a notification on a set of command stream group
- * interfaces to firmware.
+ * The function kicks a notification on a set of CSG interfaces to firmware.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  * @slot_bitmap: bitmap for the given slots, slot-0 on bit-0, etc.
  */
 void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev,
 				       u32 slot_bitmap);
 
 /**
- * kbase_csf_ring_cs_kernel_doorbell - ring the kernel doorbell for a queue
+ * kbase_csf_ring_cs_kernel_doorbell - ring the kernel doorbell for a CSI
+ *                                     assigned to a GPU queue
  *
- * The function kicks a notification to the firmware for the command stream
- * interface to which the queue is bound.
+ * The function sends a doorbell interrupt notification to the firmware for
+ * a CSI assigned to a GPU queue.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
- * @queue: Pointer to the queue for ringing the door-bell.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @csi_index: ID of the CSI assigned to the GPU queue.
+ * @csg_nr:    Index of the CSG slot assigned to the queue
+ *             group to which the GPU queue is bound.
+ * @ring_csg_doorbell: Flag to indicate if the CSG doorbell needs to be rung
+ *                     after updating the CSG_DB_REQ. So if this flag is false
+ *                     the doorbell interrupt will not be sent to FW.
+ *                     The flag is supposed be false only when the input page
+ *                     for bound GPU queues is programmed at the time of
+ *                     starting/resuming the group on a CSG slot.
  */
 void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev,
-			struct kbase_queue *queue);
+				       int csi_index, int csg_nr,
+				       bool ring_csg_doorbell);
 
 /**
  * kbase_csf_ring_cs_user_doorbell - ring the user doorbell allocated for a
@@ -416,8 +472,7 @@ void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev,
  * The function kicks a notification to the firmware on the doorbell assigned
  * to the queue.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  * @queue: Pointer to the queue for ringing the door-bell.
  */
 void kbase_csf_ring_cs_user_doorbell(struct kbase_device *kbdev,
@@ -427,9 +482,8 @@ void kbase_csf_ring_cs_user_doorbell(struct kbase_device *kbdev,
  * kbase_csf_active_queue_groups_reset - Reset the state of all active GPU
  *                            command queue groups associated with the context.
  *
- * @kbdev:     Instance of a GPU platform device that implements a command
- *             stream front-end interface.
- * @kctx:      The kbase context.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @kctx:  The kbase context.
  *
  * This function will iterate through all the active/scheduled GPU command
  * queue groups associated with the context, deschedule and mark them as
@@ -441,4 +495,54 @@ void kbase_csf_ring_cs_user_doorbell(struct kbase_device *kbdev,
 void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev,
 			struct kbase_context *kctx);
 
+/**
+ * kbase_csf_priority_check - Check the priority requested
+ *
+ * @kbdev:        Device pointer
+ * @req_priority: Requested priority
+ *
+ * This will determine whether the requested priority can be satisfied.
+ *
+ * Return: The same or lower priority than requested.
+ */
+u8 kbase_csf_priority_check(struct kbase_device *kbdev, u8 req_priority);
+
+extern const u8 kbasep_csf_queue_group_priority_to_relative[BASE_QUEUE_GROUP_PRIORITY_COUNT];
+extern const u8 kbasep_csf_relative_to_queue_group_priority[KBASE_QUEUE_GROUP_PRIORITY_COUNT];
+
+/**
+ * kbase_csf_priority_relative_to_queue_group_priority - Convert relative to base priority
+ *
+ * @priority: kbase relative priority
+ *
+ * This will convert the monotonically increasing realtive priority to the
+ * fixed base priority list.
+ *
+ * Return: base_queue_group_priority priority.
+ */
+static inline u8 kbase_csf_priority_relative_to_queue_group_priority(u8 priority)
+{
+	if (priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT)
+		priority = KBASE_QUEUE_GROUP_PRIORITY_LOW;
+	return kbasep_csf_relative_to_queue_group_priority[priority];
+}
+
+/**
+ * kbase_csf_priority_queue_group_priority_to_relative - Convert base priority to relative
+ *
+ * @priority: base_queue_group_priority priority
+ *
+ * This will convert the fixed base priority list to monotonically increasing realtive priority.
+ *
+ * Return: kbase relative priority.
+ */
+static inline u8 kbase_csf_priority_queue_group_priority_to_relative(u8 priority)
+{
+	/* Apply low priority in case of invalid priority */
+	if (priority >= BASE_QUEUE_GROUP_PRIORITY_COUNT)
+		priority = BASE_QUEUE_GROUP_PRIORITY_LOW;
+	return kbasep_csf_queue_group_priority_to_relative[priority];
+}
+
+
 #endif /* _KBASE_CSF_H_ */
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue_debugfs.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue_debugfs.c
new file mode 100644
index 000000000000..b54b2fc31939
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue_debugfs.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "mali_kbase_csf_cpu_queue_debugfs.h"
+#include <mali_kbase.h>
+#include <linux/seq_file.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+bool kbase_csf_cpu_queue_read_dump_req(struct kbase_context *kctx,
+					struct base_csf_notification *req)
+{
+	if (atomic_cmpxchg(&kctx->csf.cpu_queue.dump_req_status,
+			   BASE_CSF_CPU_QUEUE_DUMP_ISSUED,
+			   BASE_CSF_CPU_QUEUE_DUMP_PENDING) !=
+		BASE_CSF_CPU_QUEUE_DUMP_ISSUED) {
+		return false;
+	}
+
+	req->type = BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP;
+	return true;
+}
+
+/**
+ * kbasep_csf_cpu_queue_debugfs_show() - Print cpu queue information for per context
+ *
+ * @file: The seq_file for printing to
+ * @data: The debugfs dentry private data, a pointer to kbase_context
+ *
+ * Return: Negative error code or 0 on success.
+ */
+static int kbasep_csf_cpu_queue_debugfs_show(struct seq_file *file, void *data)
+{
+	struct kbase_context *kctx = file->private;
+
+	mutex_lock(&kctx->csf.lock);
+	if (atomic_read(&kctx->csf.cpu_queue.dump_req_status) !=
+				BASE_CSF_CPU_QUEUE_DUMP_COMPLETE) {
+		seq_printf(file, "Dump request already started! (try again)\n");
+		mutex_unlock(&kctx->csf.lock);
+		return -EBUSY;
+	}
+
+	atomic_set(&kctx->csf.cpu_queue.dump_req_status, BASE_CSF_CPU_QUEUE_DUMP_ISSUED);
+	init_completion(&kctx->csf.cpu_queue.dump_cmp);
+	kbase_event_wakeup(kctx);
+	mutex_unlock(&kctx->csf.lock);
+
+	seq_printf(file, "CPU Queues table (version:v%u):\n", MALI_CSF_CPU_QUEUE_DEBUGFS_VERSION);
+
+	wait_for_completion_timeout(&kctx->csf.cpu_queue.dump_cmp,
+			msecs_to_jiffies(3000));
+
+	mutex_lock(&kctx->csf.lock);
+	if (kctx->csf.cpu_queue.buffer) {
+		WARN_ON(atomic_read(&kctx->csf.cpu_queue.dump_req_status) !=
+				    BASE_CSF_CPU_QUEUE_DUMP_PENDING);
+
+		seq_printf(file, "%s\n", kctx->csf.cpu_queue.buffer);
+
+		kfree(kctx->csf.cpu_queue.buffer);
+		kctx->csf.cpu_queue.buffer = NULL;
+		kctx->csf.cpu_queue.buffer_size = 0;
+	}
+	else
+		seq_printf(file, "Dump error! (time out)\n");
+
+	atomic_set(&kctx->csf.cpu_queue.dump_req_status,
+			BASE_CSF_CPU_QUEUE_DUMP_COMPLETE);
+
+	mutex_unlock(&kctx->csf.lock);
+	return 0;
+}
+
+static int kbasep_csf_cpu_queue_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_csf_cpu_queue_debugfs_show, in->i_private);
+}
+
+static const struct file_operations kbasep_csf_cpu_queue_debugfs_fops = {
+	.open = kbasep_csf_cpu_queue_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void kbase_csf_cpu_queue_debugfs_init(struct kbase_context *kctx)
+{
+	struct dentry *file;
+
+	if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry)))
+		return;
+
+	file = debugfs_create_file("cpu_queue", 0444, kctx->kctx_dentry,
+			kctx, &kbasep_csf_cpu_queue_debugfs_fops);
+
+	if (IS_ERR_OR_NULL(file)) {
+		dev_warn(kctx->kbdev->dev,
+				"Unable to create cpu queue debugfs entry");
+	}
+
+	kctx->csf.cpu_queue.buffer = NULL;
+	kctx->csf.cpu_queue.buffer_size = 0;
+	atomic_set(&kctx->csf.cpu_queue.dump_req_status,
+		   BASE_CSF_CPU_QUEUE_DUMP_COMPLETE);
+}
+
+int kbase_csf_cpu_queue_dump(struct kbase_context *kctx,
+		u64 buffer, size_t buf_size)
+{
+	int err = 0;
+
+	size_t alloc_size = buf_size;
+	char *dump_buffer;
+
+	if (!buffer || !alloc_size)
+		goto done;
+
+	alloc_size = (alloc_size + PAGE_SIZE) & ~(PAGE_SIZE - 1);
+	dump_buffer = kzalloc(alloc_size, GFP_KERNEL);
+	if (ZERO_OR_NULL_PTR(dump_buffer)) {
+		err = -ENOMEM;
+		goto done;
+	}
+
+	WARN_ON(kctx->csf.cpu_queue.buffer != NULL);
+
+	err = copy_from_user(dump_buffer,
+			u64_to_user_ptr(buffer),
+			buf_size);
+	if (err) {
+		kfree(dump_buffer);
+		err = -EFAULT;
+		goto done;
+	}
+
+	mutex_lock(&kctx->csf.lock);
+
+	kfree(kctx->csf.cpu_queue.buffer);
+
+	if (atomic_read(&kctx->csf.cpu_queue.dump_req_status) ==
+			BASE_CSF_CPU_QUEUE_DUMP_PENDING) {
+		kctx->csf.cpu_queue.buffer = dump_buffer;
+		kctx->csf.cpu_queue.buffer_size = buf_size;
+		complete_all(&kctx->csf.cpu_queue.dump_cmp);
+	} else {
+		kfree(dump_buffer);
+	}
+
+	mutex_unlock(&kctx->csf.lock);
+done:
+	return err;
+}
+#else
+/*
+ * Stub functions for when debugfs is disabled
+ */
+void kbase_csf_cpu_queue_debugfs_init(struct kbase_context *kctx)
+{
+}
+
+bool kbase_csf_cpu_queue_read_dump_req(struct kbase_context *kctx,
+					struct base_csf_notification *req)
+{
+	return false;
+}
+
+int kbase_csf_cpu_queue_dump(struct kbase_context *kctx,
+			u64 buffer, size_t buf_size)
+{
+	return 0;
+}
+#endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue_debugfs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue_debugfs.h
new file mode 100644
index 000000000000..36336497209e
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue_debugfs.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_CSF_CPU_QUEUE_DEBUGFS_H_
+#define _KBASE_CSF_CPU_QUEUE_DEBUGFS_H_
+
+#include <asm/atomic.h>
+#include <linux/types.h>
+
+#include "mali_kbase.h"
+
+/* Forward declaration */
+struct base_csf_notification;
+
+#define MALI_CSF_CPU_QUEUE_DEBUGFS_VERSION 0
+
+/* CPU queue dump status */
+/* Dumping is done or no dumping is in progress. */
+#define BASE_CSF_CPU_QUEUE_DUMP_COMPLETE	0
+/* Dumping request is pending. */
+#define BASE_CSF_CPU_QUEUE_DUMP_PENDING		1
+/* Dumping request is issued to Userspace */
+#define BASE_CSF_CPU_QUEUE_DUMP_ISSUED		2
+
+
+/**
+ * kbase_csf_cpu_queue_debugfs_init() - Create a debugfs entry for per context cpu queue(s)
+ *
+ * @kctx: The kbase_context for which to create the debugfs entry
+ */
+void kbase_csf_cpu_queue_debugfs_init(struct kbase_context *kctx);
+
+/**
+ * kbase_csf_cpu_queue_read_dump_req - Read cpu queue dump request event
+ *
+ * @kctx: The kbase_context which cpu queue dumpped belongs to
+ * @req:  Notification with cpu queue dump request.
+ *
+ * Return: true if needs CPU queue dump, or false otherwise.
+ */
+bool kbase_csf_cpu_queue_read_dump_req(struct kbase_context *kctx,
+					struct base_csf_notification *req);
+
+/**
+ * kbase_csf_cpu_queue_dump_needed - Check the requirement for cpu queue dump
+ *
+ * @kctx: The kbase_context which cpu queue dumpped belongs to
+ *
+ * Return: true if it needs cpu queue dump, or false otherwise.
+ */
+static inline bool kbase_csf_cpu_queue_dump_needed(struct kbase_context *kctx)
+{
+#ifdef CONFIG_DEBUG_FS
+	return (atomic_read(&kctx->csf.cpu_queue.dump_req_status) ==
+		BASE_CSF_CPU_QUEUE_DUMP_ISSUED);
+#else
+	return false;
+#endif
+}
+
+/**
+ * kbase_csf_cpu_queue_dump - dump buffer containing cpu queue information to debugfs
+ *
+ * @kctx: The kbase_context which cpu queue dumpped belongs to
+ * @buffer: Buffer containing the cpu queue information.
+ * @buf_size: Buffer size.
+ *
+ * Return: Return 0 for dump successfully, or error code.
+ */
+int kbase_csf_cpu_queue_dump(struct kbase_context *kctx,
+		u64 buffer, size_t buf_size);
+#endif /* _KBASE_CSF_CPU_QUEUE_DEBUGFS_H_ */
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c
index fd8329ba9422..389468307e5f 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include "mali_kbase_csf_csg_debugfs.h"
@@ -29,12 +28,37 @@
 #ifdef CONFIG_DEBUG_FS
 #include "mali_kbase_csf_tl_reader.h"
 
+/**
+ * blocked_reason_to_string() - Convert blocking reason id to a string
+ *
+ * @reason_id: blocked_reason
+ *
+ * Return: Suitable string
+ */
+static const char *blocked_reason_to_string(u32 reason_id)
+{
+	/* possible blocking reasons of a cs */
+	static const char *const cs_blocked_reason[] = {
+		[CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED] = "UNBLOCKED",
+		[CS_STATUS_BLOCKED_REASON_REASON_WAIT] = "WAIT",
+		[CS_STATUS_BLOCKED_REASON_REASON_PROGRESS_WAIT] =
+			"PROGRESS_WAIT",
+		[CS_STATUS_BLOCKED_REASON_REASON_SYNC_WAIT] = "SYNC_WAIT",
+		[CS_STATUS_BLOCKED_REASON_REASON_DEFERRED] = "DEFERRED",
+		[CS_STATUS_BLOCKED_REASON_REASON_RESOURCE] = "RESOURCE",
+		[CS_STATUS_BLOCKED_REASON_REASON_FLUSH] = "FLUSH"
+	};
+
+	if (WARN_ON(reason_id >= ARRAY_SIZE(cs_blocked_reason)))
+		return "UNKNOWN_BLOCKED_REASON_ID";
+
+	return cs_blocked_reason[reason_id];
+}
+
 static void kbasep_csf_scheduler_dump_active_queue_cs_status_wait(
-		struct seq_file *file,
-		u32 wait_status,
-		u32 wait_sync_value,
-		u64 wait_sync_live_value,
-		u64 wait_sync_pointer)
+	struct seq_file *file, u32 wait_status, u32 wait_sync_value,
+	u64 wait_sync_live_value, u64 wait_sync_pointer, u32 sb_status,
+	u32 blocked_reason)
 {
 #define WAITING "Waiting"
 #define NOT_WAITING "Not waiting"
@@ -56,6 +80,11 @@ static void kbasep_csf_scheduler_dump_active_queue_cs_status_wait(
 	seq_printf(file, "SYNC_POINTER: 0x%llx\n", wait_sync_pointer);
 	seq_printf(file, "SYNC_VALUE: %d\n", wait_sync_value);
 	seq_printf(file, "SYNC_LIVE_VALUE: 0x%016llx\n", wait_sync_live_value);
+	seq_printf(file, "SB_STATUS: %u\n",
+		   CS_STATUS_SCOREBOARDS_NONZERO_GET(sb_status));
+	seq_printf(file, "BLOCKED_REASON: %s\n",
+		   blocked_reason_to_string(CS_STATUS_BLOCKED_REASON_REASON_GET(
+			   blocked_reason)));
 }
 
 /**
@@ -74,6 +103,8 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file,
 	u32 cs_active;
 	u64 wait_sync_pointer;
 	u32 wait_status, wait_sync_value;
+	u32 sb_status;
+	u32 blocked_reason;
 	struct kbase_vmap_struct *mapping;
 	u64 *evt;
 	u64 wait_sync_live_value;
@@ -109,6 +140,8 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file,
 			wait_status = queue->status_wait;
 			wait_sync_value = queue->sync_value;
 			wait_sync_pointer = queue->sync_ptr;
+			sb_status = queue->sb_status;
+			blocked_reason = queue->blocked_reason;
 
 			evt = (u64 *)kbase_phy_alloc_mapping_get(queue->kctx, wait_sync_pointer, &mapping);
 			if (evt) {
@@ -120,7 +153,8 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file,
 
 			kbasep_csf_scheduler_dump_active_queue_cs_status_wait(
 				file, wait_status, wait_sync_value,
-				wait_sync_live_value, wait_sync_pointer);
+				wait_sync_live_value, wait_sync_pointer,
+				sb_status, blocked_reason);
 		}
 	} else {
 		struct kbase_device const *const kbdev =
@@ -161,6 +195,11 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file,
 		wait_sync_pointer |= (u64)kbase_csf_firmware_cs_output(stream,
 					CS_STATUS_WAIT_SYNC_POINTER_HI) << 32;
 
+		sb_status = kbase_csf_firmware_cs_output(stream,
+							 CS_STATUS_SCOREBOARDS);
+		blocked_reason = kbase_csf_firmware_cs_output(
+			stream, CS_STATUS_BLOCKED_REASON);
+
 		evt = (u64 *)kbase_phy_alloc_mapping_get(queue->kctx, wait_sync_pointer, &mapping);
 		if (evt) {
 			wait_sync_live_value = evt[0];
@@ -171,7 +210,8 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file,
 
 		kbasep_csf_scheduler_dump_active_queue_cs_status_wait(
 			file, wait_status, wait_sync_value,
-			wait_sync_live_value, wait_sync_pointer);
+			wait_sync_live_value, wait_sync_pointer, sb_status,
+			blocked_reason);
 	}
 
 	seq_puts(file, "\n");
@@ -428,6 +468,61 @@ DEFINE_SIMPLE_ATTRIBUTE(kbasep_csf_debugfs_scheduling_timer_kick_fops,
 		&kbasep_csf_debugfs_scheduling_timer_kick_set,
 		"%llu\n");
 
+/**
+ * kbase_csf_debugfs_scheduler_suspend_get() - get if the scheduler is suspended.
+ *
+ * @data: The debugfs dentry private data, a pointer to kbase_device
+ * @val: The debugfs output value, boolean: 1 suspended, 0 otherwise
+ *
+ * Return: 0
+ */
+static int kbase_csf_debugfs_scheduler_suspend_get(
+		void *data, u64 *val)
+{
+	struct kbase_device *kbdev = data;
+	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+
+	kbase_csf_scheduler_lock(kbdev);
+	*val = (scheduler->state == SCHED_SUSPENDED);
+	kbase_csf_scheduler_unlock(kbdev);
+
+	return 0;
+}
+
+/**
+ * kbase_csf_debugfs_scheduler_suspend_set() - set the scheduler to suspended.
+ *
+ * @data: The debugfs dentry private data, a pointer to kbase_device
+ * @val: The debugfs input value, boolean: 1 suspend, 0 otherwise
+ *
+ * Return: Negative value if already in requested state, 0 otherwise.
+ */
+static int kbase_csf_debugfs_scheduler_suspend_set(
+		void *data, u64 val)
+{
+	struct kbase_device *kbdev = data;
+	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+	enum kbase_csf_scheduler_state state;
+
+	kbase_csf_scheduler_lock(kbdev);
+	state = scheduler->state;
+	kbase_csf_scheduler_unlock(kbdev);
+
+	if (val && (state != SCHED_SUSPENDED))
+		kbase_csf_scheduler_pm_suspend(kbdev);
+	else if (!val && (state == SCHED_SUSPENDED))
+		kbase_csf_scheduler_pm_resume(kbdev);
+	else
+		return -1;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(kbasep_csf_debugfs_scheduler_suspend_fops,
+		&kbase_csf_debugfs_scheduler_suspend_get,
+		&kbase_csf_debugfs_scheduler_suspend_set,
+		"%llu\n");
+
 void kbase_csf_debugfs_init(struct kbase_device *kbdev)
 {
 	debugfs_create_file("active_groups", 0444,
@@ -440,6 +535,9 @@ void kbase_csf_debugfs_init(struct kbase_device *kbdev)
 	debugfs_create_file("scheduling_timer_kick", 0200,
 			kbdev->mali_debugfs_directory, kbdev,
 			&kbasep_csf_debugfs_scheduling_timer_kick_fops);
+	debugfs_create_file("scheduler_suspend", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_csf_debugfs_scheduler_suspend_fops);
 
 	kbase_csf_tl_reader_debugfs_init(kbdev);
 	kbase_csf_firmware_trace_buffer_debugfs_init(kbdev);
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.h
index c2e99d386f8c..c6a86b6d814b 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_CSF_CSG_DEBUGFS_H_
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h
index 3829572a1aeb..05173990c6d8 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,11 +17,9 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-/* Definitions (types, defines, etcs) common to the command stream frontend.
+/* Definitions (types, defines, etcs) common to the CSF.
  * They are placed here to allow the hierarchy of header files to work.
  */
 
@@ -46,6 +45,14 @@
  */
 #define MAX_TILER_HEAPS (128)
 
+#define CSF_FIRMWARE_ENTRY_READ       (1ul << 0)
+#define CSF_FIRMWARE_ENTRY_WRITE      (1ul << 1)
+#define CSF_FIRMWARE_ENTRY_EXECUTE    (1ul << 2)
+#define CSF_FIRMWARE_ENTRY_CACHE_MODE (3ul << 3)
+#define CSF_FIRMWARE_ENTRY_PROTECTED  (1ul << 5)
+#define CSF_FIRMWARE_ENTRY_SHARED     (1ul << 30)
+#define CSF_FIRMWARE_ENTRY_ZERO       (1ul << 31)
+
 /**
  * enum kbase_csf_bind_state - bind state of the queue
  *
@@ -66,18 +73,36 @@ enum kbase_csf_queue_bind_state {
  * enum kbase_csf_reset_gpu_state - state of the gpu reset
  *
  * @KBASE_CSF_RESET_GPU_NOT_PENDING: Set when the GPU reset isn't pending
+ *
+ * @KBASE_CSF_RESET_GPU_PREPARED: Set when kbase_prepare_to_reset_gpu() has
+ * been called. This is just for debugging checks to encourage callers to call
+ * kbase_prepare_to_reset_gpu() before kbase_reset_gpu().
+ *
+ * @KBASE_CSF_RESET_GPU_COMMITTED: Set when the GPU reset process has been
+ * committed and so will definitely happen, but the procedure to reset the GPU
+ * has not yet begun. Other threads must finish accessing the HW before we
+ * reach %KBASE_CSF_RESET_GPU_HAPPENING.
+ *
  * @KBASE_CSF_RESET_GPU_HAPPENING: Set when the GPU reset process is occurring
- * @KBASE_CSF_RESET_GPU_SILENT: Set when the GPU reset process is occurring,
- * used when resetting the GPU as part of normal behavior (e.g. when exiting
- * protected mode).
+ * (silent or otherwise), and is actively accessing the HW. Any changes to the
+ * HW in other threads might get lost, overridden, or corrupted.
+ *
+ * @KBASE_CSF_RESET_GPU_COMMITTED_SILENT: Set when the GPU reset process has
+ * been committed but has not started happening. This is used when resetting
+ * the GPU as part of normal behavior (e.g. when exiting protected mode).
+ * Other threads must finish accessing the HW before we reach
+ * %KBASE_CSF_RESET_GPU_HAPPENING.
+ *
  * @KBASE_CSF_RESET_GPU_FAILED: Set when an error is encountered during the
  * GPU reset process. No more work could then be executed on GPU, unloading
  * the Driver module is the only option.
  */
 enum kbase_csf_reset_gpu_state {
 	KBASE_CSF_RESET_GPU_NOT_PENDING,
+	KBASE_CSF_RESET_GPU_PREPARED,
+	KBASE_CSF_RESET_GPU_COMMITTED,
 	KBASE_CSF_RESET_GPU_HAPPENING,
-	KBASE_CSF_RESET_GPU_SILENT,
+	KBASE_CSF_RESET_GPU_COMMITTED_SILENT,
 	KBASE_CSF_RESET_GPU_FAILED,
 };
 
@@ -86,17 +111,17 @@ enum kbase_csf_reset_gpu_state {
  *
  * @KBASE_CSF_GROUP_INACTIVE:          Group is inactive and won't be
  *                                     considered by scheduler for running on
- *                                     command stream group slot.
+ *                                     CSG slot.
  * @KBASE_CSF_GROUP_RUNNABLE:          Group is in the list of runnable groups
  *                                     and is subjected to time-slice based
  *                                     scheduling. A start request would be
  *                                     sent (or already has been sent) if the
- *                                     group is assigned the command stream
+ *                                     group is assigned the CS
  *                                     group slot for the fist time.
- * @KBASE_CSF_GROUP_IDLE:              Group is currently on a command stream
- *                                     group slot but all the command streams
- *                                     bound to the group have become either
- *                                     idle or waiting on sync object.
+ * @KBASE_CSF_GROUP_IDLE:              Group is currently on a CSG slot
+ *                                     but all the CSs bound to the group have
+ *                                     become either idle or waiting on sync
+ *                                     object.
  *                                     Group could be evicted from the slot on
  *                                     the next tick if there are no spare
  *                                     slots left after scheduling non-idle
@@ -110,12 +135,11 @@ enum kbase_csf_reset_gpu_state {
  *                                     KBASE_CSF_GROUP_SUSPENDED_ON_IDLE or
  *                                     KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC
  *                                     state.
- * @KBASE_CSF_GROUP_SUSPENDED:         Group was evicted from the command
- *                                     stream group slot and is not running but
- *                                     is still in the list of runnable groups
- *                                     and subjected to time-slice based
- *                                     scheduling. A resume request would be
- *                                     sent when a command stream group slot is
+ * @KBASE_CSF_GROUP_SUSPENDED:         Group was evicted from the CSG slot
+ *                                     and is not running but is still in the
+ *                                     list of runnable groups and subjected
+ *                                     to time-slice based scheduling. A resume
+ *                                     request would be sent when a CSG slot is
  *                                     re-assigned to the group and once the
  *                                     resume is complete group would be moved
  *                                     back to the RUNNABLE state.
@@ -128,8 +152,8 @@ enum kbase_csf_reset_gpu_state {
  *                                     bound to the group is kicked it would be
  *                                     moved to the SUSPENDED state.
  * @KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC: Same as GROUP_SUSPENDED_ON_IDLE
- *                                          except that at least one command
- *                                          stream bound to this group was
+ *                                          except that at least one CS
+ *                                          bound to this group was
  *                                          waiting for synchronization object
  *                                          before the suspension.
  * @KBASE_CSF_GROUP_FAULT_EVICTED:     Group is evicted from the scheduler due
@@ -185,10 +209,10 @@ enum kbase_csf_csg_slot_state {
  * enum kbase_csf_scheduler_state - state of the scheduler operational phases.
  *
  * @SCHED_BUSY:         The scheduler is busy performing on tick schedule
- *                      operations, the state of command stream group slots
+ *                      operations, the state of CSG slots
  *                      can't be changed.
  * @SCHED_INACTIVE:     The scheduler is inactive, it is allowed to modify the
- *                      state of command stream group slots by in-cycle
+ *                      state of CSG slots by in-cycle
  *                      priority scheduling.
  * @SCHED_SUSPENDED:    The scheduler is in low-power mode with scheduling
  *                      operations suspended and is not holding the power
@@ -202,6 +226,24 @@ enum kbase_csf_scheduler_state {
 	SCHED_SUSPENDED,
 };
 
+/**
+ * enum kbase_queue_group_priority - Kbase internal relative priority list.
+ *
+ * @KBASE_QUEUE_GROUP_PRIORITY_REALTIME:  The realtime queue group priority.
+ * @KBASE_QUEUE_GROUP_PRIORITY_HIGH:      The high queue group priority.
+ * @KBASE_QUEUE_GROUP_PRIORITY_MEDIUM:    The medium queue group priority.
+ * @KBASE_QUEUE_GROUP_PRIORITY_LOW:       The low queue group priority.
+ * @KBASE_QUEUE_GROUP_PRIORITY_COUNT:     The number of priority levels.
+ */
+enum kbase_queue_group_priority {
+	KBASE_QUEUE_GROUP_PRIORITY_REALTIME = 0,
+	KBASE_QUEUE_GROUP_PRIORITY_HIGH,
+	KBASE_QUEUE_GROUP_PRIORITY_MEDIUM,
+	KBASE_QUEUE_GROUP_PRIORITY_LOW,
+	KBASE_QUEUE_GROUP_PRIORITY_COUNT
+};
+
+
 /**
  * struct kbase_csf_notification - Event or error generated as part of command
  *                                 queue execution
@@ -240,37 +282,43 @@ struct kbase_csf_notification {
  * @refcount:    Reference count, stands for the number of times the queue
  *               has been referenced. The reference is taken when it is
  *               created, when it is bound to the group and also when the
- *               @oom_event_work or @fault_event_work work item is queued
+ *               @oom_event_work work item is queued
  *               for it.
  * @group:       Pointer to the group to which this queue is bound.
- * @queue_reg:   Pointer to the VA region allocated for command
- *               stream buffer.
+ * @queue_reg:   Pointer to the VA region allocated for CS buffer.
  * @oom_event_work: Work item corresponding to the out of memory event for
  *                  chunked tiler heap being used for this queue.
- * @fault_event_work: Work item corresponding to the firmware fault event.
- * @base_addr:      Base address of the command stream buffer.
- * @size:           Size of the command stream buffer.
+ * @base_addr:      Base address of the CS buffer.
+ * @size:           Size of the CS buffer.
  * @priority:       Priority of this queue within the group.
- * @bind_state:     Bind state of the queue.
- * @csi_index:      The ID of the assigned command stream hardware interface.
- * @enabled:        Indicating whether the command stream is running, or not.
- * @status_wait:    Value of CS_STATUS_WAIT register of the command stream will
- *                  be kept when the command stream gets blocked by sync wait.
+ * @bind_state:     Bind state of the queue as enum @kbase_csf_queue_bind_state
+ * @csi_index:      The ID of the assigned CS hardware interface.
+ * @enabled:        Indicating whether the CS is running, or not.
+ * @status_wait:    Value of CS_STATUS_WAIT register of the CS will
+ *                  be kept when the CS gets blocked by sync wait.
  *                  CS_STATUS_WAIT provides information on conditions queue is
  *                  blocking on. This is set when the group, to which queue is
  *                  bound, is suspended after getting blocked, i.e. in
  *                  KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC state.
- * @sync_ptr:       Value of CS_STATUS_WAIT_SYNC_POINTER register of the command
- *                  stream will be kept when the command stream gets blocked by
+ * @sync_ptr:       Value of CS_STATUS_WAIT_SYNC_POINTER register of the CS
+ *                  will be kept when the CS gets blocked by
  *                  sync wait. CS_STATUS_WAIT_SYNC_POINTER contains the address
  *                  of synchronization object being waited on.
  *                  Valid only when @status_wait is set.
- * @sync_value:     Value of CS_STATUS_WAIT_SYNC_VALUE register of the command
- *                  stream will be kept when the command stream gets blocked by
+ * @sync_value:     Value of CS_STATUS_WAIT_SYNC_VALUE register of the CS
+ *                  will be kept when the CS gets blocked by
  *                  sync wait. CS_STATUS_WAIT_SYNC_VALUE contains the value
  *                  tested against the synchronization object.
  *                  Valid only when @status_wait is set.
+ * @sb_status:      Value indicates which of the scoreboard entries in the queue
+ *                  are non-zero
+ * @blocked_reason: Value shows if the queue is blocked, and if so,
+ *                  the reason why it is blocked
  * @error:          GPU command queue fatal information to pass to user space.
+ * @fatal_event_work: Work item to handle the CS fatal event reported for this
+ *                    queue.
+ * @cs_fatal_info:    Records additional information about the CS fatal event.
+ * @cs_fatal:         Records information about the CS fatal event.
  */
 struct kbase_queue {
 	struct kbase_context *kctx;
@@ -285,17 +333,21 @@ struct kbase_queue {
 	struct kbase_queue_group *group;
 	struct kbase_va_region *queue_reg;
 	struct work_struct oom_event_work;
-	struct work_struct fault_event_work;
 	u64 base_addr;
 	u32 size;
 	u8 priority;
-	u8 bind_state;
 	s8 csi_index;
+	enum kbase_csf_queue_bind_state bind_state;
 	bool enabled;
 	u32 status_wait;
 	u64 sync_ptr;
 	u32 sync_value;
+	u32 sb_status;
+	u32 blocked_reason;
 	struct kbase_csf_notification error;
+	struct work_struct fatal_event_work;
+	u64 cs_fatal_info;
+	u32 cs_fatal;
 };
 
 /**
@@ -335,9 +387,9 @@ struct kbase_protected_suspend_buffer {
  *				buffer. Protected-mode suspend buffer that is
  *				used for group context switch.
  * @handle:         Handle which identifies this queue group.
- * @csg_nr:         Number/index of the command stream group to
- *                  which this queue group is mapped; KBASEP_CSG_NR_INVALID
- *                  indicates that the queue group is not scheduled.
+ * @csg_nr:         Number/index of the CSG to which this queue group is
+ *                  mapped; KBASEP_CSG_NR_INVALID indicates that the queue
+ *                  group is not scheduled.
  * @priority:       Priority of the queue group, 0 being the highest,
  *                  BASE_QUEUE_GROUP_PRIORITY_COUNT - 1 being the lowest.
  * @tiler_max:      Maximum number of tiler endpoints the group is allowed
@@ -349,18 +401,21 @@ struct kbase_protected_suspend_buffer {
  * @tiler_mask:     Mask of tiler endpoints the group is allowed to use.
  * @fragment_mask:  Mask of fragment endpoints the group is allowed to use.
  * @compute_mask:   Mask of compute endpoints the group is allowed to use.
+ * @group_uid:      32-bit wide unsigned identifier for the group, unique
+ *                  across all kbase devices and contexts.
  * @link:           Link to this queue group in the 'runnable_groups' list of
  *                  the corresponding kctx.
  * @link_to_schedule: Link to this queue group in the list of prepared groups
  *                    to be scheduled, if the group is runnable/suspended.
  *                    If the group is idle or waiting for CQS, it would be a
  *                    link to the list of idle/blocked groups list.
- * @timer_event_work: Work item corresponding to the event generated when a task
- *                    started by a queue in this group takes too long to execute
- *                    on an endpoint.
  * @run_state:      Current state of the queue group.
  * @prepared_seq_num: Indicates the position of queue group in the list of
  *                    prepared groups to be scheduled.
+ * @scan_seq_num:     Scan out sequence number before adjusting for dynamic
+ *                    idle conditions. It is used for setting a group's
+ *                    onslot priority. It could differ from prepared_seq_number
+ *                    when there are idle groups.
  * @faulted:          Indicates that a GPU fault occurred for the queue group.
  *                    This flag persists until the fault has been queued to be
  *                    reported to userspace.
@@ -369,7 +424,7 @@ struct kbase_protected_suspend_buffer {
  *                  group.
  * @protm_event_work:   Work item corresponding to the protected mode entry
  *                      event for this queue.
- * @protm_pending_bitmap:  Bit array to keep a track of command streams that
+ * @protm_pending_bitmap:  Bit array to keep a track of CSs that
  *                         have pending protected mode entry requests.
  * @error_fatal: An error of type BASE_GPU_QUEUE_GROUP_ERROR_FATAL to be
  *               returned to userspace if such an error has occurred.
@@ -377,6 +432,8 @@ struct kbase_protected_suspend_buffer {
  *                 to be returned to userspace if such an error has occurred.
  * @error_tiler_oom: An error of type BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM
  *                   to be returned to userspace if such an error has occurred.
+ * @timer_event_work: Work item to handle the progress timeout fatal event
+ *                    for the group.
  */
 struct kbase_queue_group {
 	struct kbase_context *kctx;
@@ -394,11 +451,13 @@ struct kbase_queue_group {
 	u64 fragment_mask;
 	u64 compute_mask;
 
+	u32 group_uid;
+
 	struct list_head link;
 	struct list_head link_to_schedule;
-	struct work_struct timer_event_work;
 	enum kbase_csf_group_state run_state;
 	u32 prepared_seq_num;
+	u32 scan_seq_num;
 	bool faulted;
 
 	struct kbase_queue *bound_queues[MAX_SUPPORTED_STREAMS_PER_GROUP];
@@ -410,6 +469,8 @@ struct kbase_queue_group {
 	struct kbase_csf_notification error_fatal;
 	struct kbase_csf_notification error_timeout;
 	struct kbase_csf_notification error_tiler_oom;
+
+	struct work_struct timer_event_work;
 };
 
 /**
@@ -442,6 +503,22 @@ struct kbase_csf_kcpu_queue_context {
 	struct list_head jit_blocked_queues;
 };
 
+/**
+ * struct kbase_csf_cpu_queue_context - Object representing the cpu queue
+ *                                      information.
+ *
+ * @buffer:     Buffer containing CPU queue information provided by Userspace.
+ * @buffer_size: The size of @buffer.
+ * @dump_req_status:  Indicates the current status for CPU queues dump request.
+ * @dump_cmp:         Dumping cpu queue completion event.
+ */
+struct kbase_csf_cpu_queue_context {
+	char *buffer;
+	size_t buffer_size;
+	atomic_t dump_req_status;
+	struct completion dump_cmp;
+};
+
 /**
  * struct kbase_csf_heap_context_allocator - Allocator of heap contexts
  *
@@ -472,18 +549,21 @@ struct kbase_csf_heap_context_allocator {
  * struct kbase_csf_tiler_heap_context - Object representing the tiler heaps
  *                                       context for a GPU address space.
  *
- * This contains all of the command-stream front-end state relating to chunked
- * tiler heaps for one @kbase_context. It is not the same as a heap context
- * structure allocated by the kernel for use by the firmware.
+ * This contains all of the CSF state relating to chunked tiler heaps for one
+ * @kbase_context. It is not the same as a heap context structure allocated by
+ * the kernel for use by the firmware.
  *
- * @lock:      Lock preventing concurrent access to the tiler heaps.
- * @list:      List of tiler heaps.
- * @ctx_alloc: Allocator for heap context structures.
+ * @lock:        Lock preventing concurrent access to the tiler heaps.
+ * @list:        List of tiler heaps.
+ * @ctx_alloc:   Allocator for heap context structures.
+ * @nr_of_heaps: Total number of tiler heaps that were added during the
+ *               life time of the context.
  */
 struct kbase_csf_tiler_heap_context {
 	struct mutex lock;
 	struct list_head list;
 	struct kbase_csf_heap_context_allocator ctx_alloc;
+	u64 nr_of_heaps;
 };
 
 /**
@@ -491,7 +571,7 @@ struct kbase_csf_tiler_heap_context {
  *                                      context for a GPU address space.
  *
  * @runnable_groups:    Lists of runnable GPU command queue groups in the kctx,
- *                      one per queue group priority level.
+ *                      one per queue group  relative-priority level.
  * @num_runnable_grps:  Total number of runnable groups across all priority
  *                      levels in @runnable_groups.
  * @idle_wait_groups:   A list of GPU command queue groups in which all enabled
@@ -500,7 +580,7 @@ struct kbase_csf_tiler_heap_context {
  * @num_idle_wait_grps: Length of the @idle_wait_groups list.
  * @sync_update_wq:     Dedicated workqueue to process work items corresponding
  *                      to the sync_update events by sync_set/sync_add
- *                      instruction execution on command streams bound to groups
+ *                      instruction execution on CSs bound to groups
  *                      of @idle_wait_groups list.
  * @sync_update_work:   work item to process the sync_update events by
  *                      sync_set / sync_add instruction execution on command
@@ -509,7 +589,7 @@ struct kbase_csf_tiler_heap_context {
  *                      'groups_to_schedule' list of scheduler instance.
  */
 struct kbase_csf_scheduler_context {
-	struct list_head runnable_groups[BASE_QUEUE_GROUP_PRIORITY_COUNT];
+	struct list_head runnable_groups[KBASE_QUEUE_GROUP_PRIORITY_COUNT];
 	u32 num_runnable_grps;
 	struct list_head idle_wait_groups;
 	u32 num_idle_wait_grps;
@@ -519,8 +599,7 @@ struct kbase_csf_scheduler_context {
 };
 
 /**
- * struct kbase_csf_context - Object representing command-stream front-end
- *                            for a GPU address space.
+ * struct kbase_csf_context - Object representing CSF for a GPU address space.
  *
  * @event_pages_head: A list of pages allocated for the event memory used by
  *                    the synchronization objects. A separate list would help
@@ -534,7 +613,7 @@ struct kbase_csf_scheduler_context {
  *                    deferred manner of a pair of User mode input/output pages
  *                    & a hardware doorbell page.
  *                    The pages are allocated when a GPU command queue is
- *                    bound to a command stream group in kbase_csf_queue_bind.
+ *                    bound to a CSG in kbase_csf_queue_bind.
  *                    This helps returning unique handles to Userspace from
  *                    kbase_csf_queue_bind and later retrieving the pointer to
  *                    queue in the mmap handler.
@@ -550,7 +629,8 @@ struct kbase_csf_scheduler_context {
  *                    userspace mapping created for them on bind operation
  *                    hasn't been removed.
  * @kcpu_queues:      Kernel CPU command queues.
- * @event_lock:       Lock protecting access to @event_callback_list
+ * @event_lock:       Lock protecting access to @event_callback_list and
+ *                    @error_list.
  * @event_callback_list: List of callbacks which are registered to serve CSF
  *                       events.
  * @tiler_heaps:      Chunked tiler memory heaps.
@@ -563,10 +643,12 @@ struct kbase_csf_scheduler_context {
  *                    of the USER register page. Currently used only for sanity
  *                    checking.
  * @sched:            Object representing the scheduler's context
- * @error_list:       List for command stream fatal errors in this context.
+ * @error_list:       List for CS fatal errors in this context.
  *                    Link of fatal error is
  *                    &struct_kbase_csf_notification.link.
- *                    @lock needs to be held to access to this list.
+ *                    @event_lock needs to be held to access this list.
+ * @cpu_queue:        CPU queue information. Only be available when DEBUG_FS
+ *                    is enabled.
  */
 struct kbase_csf_context {
 	struct list_head event_pages_head;
@@ -585,6 +667,9 @@ struct kbase_csf_context {
 	struct vm_area_struct *user_reg_vma;
 	struct kbase_csf_scheduler_context sched;
 	struct list_head error_list;
+#ifdef CONFIG_DEBUG_FS
+	struct kbase_csf_cpu_queue_context cpu_queue;
+#endif
 };
 
 /**
@@ -593,23 +678,28 @@ struct kbase_csf_context {
  * @workq:         Workqueue to execute the GPU reset work item @work.
  * @work:          Work item for performing the GPU reset.
  * @wait:          Wait queue used to wait for the GPU reset completion.
+ * @sem:           RW Semaphore to ensure no other thread attempts to use the
+ *                 GPU whilst a reset is in process. Unlike traditional
+ *                 semaphores and wait queues, this allows Linux's lockdep
+ *                 mechanism to check for deadlocks involving reset waits.
  * @state:         Tracks if the GPU reset is in progress or not.
+ *                 The state is represented by enum @kbase_csf_reset_gpu_state.
  */
 struct kbase_csf_reset_gpu {
 	struct workqueue_struct *workq;
 	struct work_struct work;
 	wait_queue_head_t wait;
+	struct rw_semaphore sem;
 	atomic_t state;
 };
 
 /**
  * struct kbase_csf_csg_slot - Object containing members for tracking the state
- *                             of command stream group slots.
- * @resident_group:   pointer to the queue group that is resident on the
- *                    command stream group slot.
- * @state:            state of the slot as per enum kbase_csf_csg_slot_state.
+ *                             of CSG slots.
+ * @resident_group:   pointer to the queue group that is resident on the CSG slot.
+ * @state:            state of the slot as per enum @kbase_csf_csg_slot_state.
  * @trigger_jiffies:  value of jiffies when change in slot state is recorded.
- * @priority:         dynamic priority assigned to command stream group slot.
+ * @priority:         dynamic priority assigned to CSG slot.
  */
 struct kbase_csf_csg_slot {
 	struct kbase_queue_group *resident_group;
@@ -620,8 +710,7 @@ struct kbase_csf_csg_slot {
 
 /**
  * struct kbase_csf_scheduler - Object representing the scheduler used for
- *                              command-stream front-end for an instance of
- *                              GPU platform device.
+ *                              CSF for an instance of GPU platform device.
  * @lock:                  Lock to serialize the scheduler operations and
  *                         access to the data members.
  * @interrupt_lock:        Lock to protect members accessed by interrupt
@@ -632,26 +721,29 @@ struct kbase_csf_csg_slot {
  * @doorbell_inuse_bitmap: Bitmap of hardware doorbell pages keeping track of
  *                         which pages are currently available for assignment
  *                         to clients.
- * @csg_inuse_bitmap:      Bitmap to keep a track of command stream group slots
+ * @csg_inuse_bitmap:      Bitmap to keep a track of CSG slots
  *                         that are currently in use.
- * @csg_slots:             The array for tracking the state of command stream
+ * @csg_slots:             The array for tracking the state of CS
  *                         group slots.
  * @runnable_kctxs:        List of Kbase contexts that have runnable command
  *                         queue groups.
  * @groups_to_schedule:    List of runnable queue groups prepared on every
- *                         scheduler tick. The dynamic priority of the command
- *                         stream group slot assigned to a group will depend
- *                         upon the position of group in the list.
+ *                         scheduler tick. The dynamic priority of the CSG
+ *                         slot assigned to a group will depend upon the
+ *                         position of group in the list.
  * @ngrp_to_schedule:      Number of groups in the @groups_to_schedule list,
  *                         incremented when a group is added to the list, used
  *                         to record the position of group in the list.
  * @num_active_address_spaces: Number of GPU address space slots that would get
  *                             used to program the groups in @groups_to_schedule
- *                             list on all the available command stream group
+ *                             list on all the available CSG
  *                             slots.
- * @num_csg_slots_for_tick:  Number of command stream group slots that can be
+ * @num_csg_slots_for_tick:  Number of CSG slots that can be
  *                           active in the given tick/tock. This depends on the
  *                           value of @num_active_address_spaces.
+ * @remaining_tick_slots:    Tracking the number of remaining available slots
+ *                           for @num_csg_slots_for_tick during the scheduling
+ *                           operation in a tick/tock.
  * @idle_groups_to_schedule: List of runnable queue groups, in which all GPU
  *                           command queues became idle or are waiting for
  *                           synchronization object, prepared on every
@@ -659,11 +751,14 @@ struct kbase_csf_csg_slot {
  *                           appended to the tail of @groups_to_schedule list
  *                           after the scan out so that the idle groups aren't
  *                           preferred for scheduling over the non-idle ones.
+ * @csg_scan_count_for_tick: CSG scanout count for assign the scan_seq_num for
+ *                           each scanned out group during scheduling operation
+ *                           in a tick/tock.
  * @total_runnable_grps:     Total number of runnable groups across all KCTXs.
  * @csgs_events_enable_mask: Use for temporary masking off asynchronous events
  *                           from firmware (such as OoM events) before a group
  *                           is suspended.
- * @csg_slots_idle_mask:     Bit array for storing the mask of command stream
+ * @csg_slots_idle_mask:     Bit array for storing the mask of CS
  *                           group slots for which idle notification was
  *                           received.
  * @csg_slots_prio_update:  Bit array for tracking slots that have an on-slot
@@ -677,39 +772,53 @@ struct kbase_csf_csg_slot {
  *                          then it will only perform scheduling under the
  *                          influence of external factors e.g., IRQs, IOCTLs.
  * @wq:                     Dedicated workqueue to execute the @tick_work.
- * @tick_work:              Work item that would perform the schedule on tick
- *                          operation to implement the time slice based
- *                          scheduling.
+ * @tick_timer:             High-resolution timer employed to schedule tick
+ *                          workqueue items (kernel-provided delayed_work
+ *                          items do not use hrtimer and for some reason do
+ *                          not provide sufficiently reliable periodicity).
+ * @tick_work:              Work item that performs the "schedule on tick"
+ *                          operation to implement timeslice-based scheduling.
  * @tock_work:              Work item that would perform the schedule on tock
  *                          operation to implement the asynchronous scheduling.
  * @ping_work:              Work item that would ping the firmware at regular
- *                          intervals, only if there is a single active command
- *                          stream group slot, to check if firmware is alive
- *                          and would initiate a reset if the ping request
- *                          isn't acknowledged.
+ *                          intervals, only if there is a single active CSG
+ *                          slot, to check if firmware is alive and would
+ *                          initiate a reset if the ping request isn't
+ *                          acknowledged.
  * @top_ctx:                Pointer to the Kbase context corresponding to the
  *                          @top_grp.
  * @top_grp:                Pointer to queue group inside @groups_to_schedule
  *                          list that was assigned the highest slot priority.
- * @head_slot_priority:     The dynamic slot priority to be used for the
- *                          queue group at the head of @groups_to_schedule
- *                          list. Once the queue group is assigned a command
- *                          stream group slot, it is removed from the list and
- *                          priority is decremented.
  * @tock_pending_request:   A "tock" request is pending: a group that is not
  *                          currently on the GPU demands to be scheduled.
  * @active_protm_grp:       Indicates if firmware has been permitted to let GPU
  *                          enter protected mode with the given group. On exit
  *                          from protected mode the pointer is reset to NULL.
+ * @gpu_idle_fw_timer_enabled: Whether the CSF scheduler has activiated the
+ *                            firmware idle hysteresis timer for preparing a
+ *                            GPU suspend on idle.
  * @gpu_idle_work:          Work item for facilitating the scheduler to bring
  *                          the GPU to a low-power mode on becoming idle.
- * @non_idle_suspended_grps: Count of suspended queue groups not idle.
+ * @non_idle_offslot_grps:  Count of off-slot non-idle groups. Reset during
+ *                          the scheduler active phase in a tick. It then
+ *                          tracks the count of non-idle groups across all the
+ *                          other phases.
+ * @non_idle_scanout_grps:  Count on the non-idle groups in the scan-out
+ *                          list at the scheduling prepare stage.
  * @pm_active_count:        Count indicating if the scheduler is owning a power
  *                          management reference count. Reference is taken when
  *                          the count becomes 1 and is dropped when the count
  *                          becomes 0. It is used to enable the power up of MCU
  *                          after GPU and L2 cache have been powered up. So when
  *                          this count is zero, MCU will not be powered up.
+ * @csg_scheduling_period_ms: Duration of Scheduling tick in milliseconds.
+ * @tick_timer_active:      Indicates whether the @tick_timer is effectively
+ *                          active or not, as the callback function of
+ *                          @tick_timer will enqueue @tick_work only if this
+ *                          flag is true. This is mainly useful for the case
+ *                          when scheduling tick needs to be advanced from
+ *                          interrupt context, without actually deactivating
+ *                          the @tick_timer first and then enqueing @tick_work.
  */
 struct kbase_csf_scheduler {
 	struct mutex lock;
@@ -723,7 +832,9 @@ struct kbase_csf_scheduler {
 	u32 ngrp_to_schedule;
 	u32 num_active_address_spaces;
 	u32 num_csg_slots_for_tick;
+	u32 remaining_tick_slots;
 	struct list_head idle_groups_to_schedule;
+	u32 csg_scan_count_for_tick;
 	u32 total_runnable_grps;
 	DECLARE_BITMAP(csgs_events_enable_mask, MAX_SUPPORTED_CSGS);
 	DECLARE_BITMAP(csg_slots_idle_mask, MAX_SUPPORTED_CSGS);
@@ -731,17 +842,21 @@ struct kbase_csf_scheduler {
 	unsigned long last_schedule;
 	bool timer_enabled;
 	struct workqueue_struct *wq;
-	struct delayed_work tick_work;
+	struct hrtimer tick_timer;
+	struct work_struct tick_work;
 	struct delayed_work tock_work;
 	struct delayed_work ping_work;
 	struct kbase_context *top_ctx;
 	struct kbase_queue_group *top_grp;
-	u8 head_slot_priority;
 	bool tock_pending_request;
 	struct kbase_queue_group *active_protm_grp;
-	struct delayed_work gpu_idle_work;
-	atomic_t non_idle_suspended_grps;
+	bool gpu_idle_fw_timer_enabled;
+	struct work_struct gpu_idle_work;
+	atomic_t non_idle_offslot_grps;
+	u32 non_idle_scanout_grps;
 	u32 pm_active_count;
+	unsigned int csg_scheduling_period_ms;
+	bool tick_timer_active;
 };
 
 /**
@@ -758,8 +873,205 @@ struct kbase_csf_scheduler {
 	GLB_PROGRESS_TIMER_TIMEOUT_SCALE)
 
 /**
- * struct kbase_csf      -  Object representing command-stream front-end for an
- *                          instance of GPU platform device.
+ * Default GLB_PWROFF_TIMER_TIMEOUT value in unit of micro-seconds.
+ */
+#define DEFAULT_GLB_PWROFF_TIMEOUT_US (800)
+
+/**
+ * In typical operations, the management of the shader core power transitions
+ * is delegated to the MCU/firmware. However, if the host driver is configured
+ * to take direct control, one needs to disable the MCU firmware GLB_PWROFF
+ * timer.
+ */
+#define DISABLE_GLB_PWROFF_TIMER (0)
+
+/* Index of the GPU_ACTIVE counter within the CSHW counter block */
+#define GPU_ACTIVE_CNT_IDX (4)
+
+/**
+ * Maximum number of sessions that can be managed by the IPA Control component.
+ */
+#if MALI_UNIT_TEST
+#define KBASE_IPA_CONTROL_MAX_SESSIONS ((size_t)8)
+#else
+#define KBASE_IPA_CONTROL_MAX_SESSIONS ((size_t)2)
+#endif
+
+/**
+ * enum kbase_ipa_core_type - Type of counter block for performance counters
+ *
+ * @KBASE_IPA_CORE_TYPE_CSHW:   CS Hardware counters.
+ * @KBASE_IPA_CORE_TYPE_MEMSYS: Memory System counters.
+ * @KBASE_IPA_CORE_TYPE_TILER:  Tiler counters.
+ * @KBASE_IPA_CORE_TYPE_SHADER: Shader Core counters.
+ * @KBASE_IPA_CORE_TYPE_NUM:    Number of core types.
+ */
+enum kbase_ipa_core_type {
+	KBASE_IPA_CORE_TYPE_CSHW = 0,
+	KBASE_IPA_CORE_TYPE_MEMSYS,
+	KBASE_IPA_CORE_TYPE_TILER,
+	KBASE_IPA_CORE_TYPE_SHADER,
+	KBASE_IPA_CORE_TYPE_NUM
+};
+
+/**
+ * Number of configurable counters per type of block on the IPA Control
+ * interface.
+ */
+#define KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS ((size_t)8)
+
+/**
+ * Total number of configurable counters existing on the IPA Control interface.
+ */
+#define KBASE_IPA_CONTROL_MAX_COUNTERS                                         \
+	((size_t)KBASE_IPA_CORE_TYPE_NUM * KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS)
+
+/**
+ * struct kbase_ipa_control_prfcnt - Session for a single performance counter
+ *
+ * @latest_raw_value: Latest raw value read from the counter.
+ * @scaling_factor:   Factor raw value shall be multiplied by.
+ * @accumulated_diff: Partial sum of scaled and normalized values from
+ *                    previous samples. This represent all the values
+ *                    that were read before the latest raw value.
+ * @type:             Type of counter block for performance counter.
+ * @select_idx:       Index of the performance counter as configured on
+ *                    the IPA Control interface.
+ * @gpu_norm:         Indicating whether values shall be normalized by
+ *                    GPU frequency. If true, returned values represent
+ *                    an interval of time expressed in seconds (when the
+ *                    scaling factor is set to 1).
+ */
+struct kbase_ipa_control_prfcnt {
+	u64 latest_raw_value;
+	u64 scaling_factor;
+	u64 accumulated_diff;
+	enum kbase_ipa_core_type type;
+	u8 select_idx;
+	bool gpu_norm;
+};
+
+/**
+ * struct kbase_ipa_control_session - Session for an IPA Control client
+ *
+ * @prfcnts:        Sessions for individual performance counters.
+ * @num_prfcnts:    Number of performance counters.
+ * @active:         Indicates whether this slot is in use or not
+ * @last_query_time:     Time of last query, in ns
+ * @protm_time:     Amount of time (in ns) that GPU has been in protected
+ */
+struct kbase_ipa_control_session {
+	struct kbase_ipa_control_prfcnt prfcnts[KBASE_IPA_CONTROL_MAX_COUNTERS];
+	size_t num_prfcnts;
+	bool active;
+	u64 last_query_time;
+	u64 protm_time;
+};
+
+/**
+ * struct kbase_ipa_control_prfcnt_config - Performance counter configuration
+ *
+ * @idx:      Index of the performance counter inside the block, as specified
+ *            in the GPU architecture.
+ * @refcount: Number of client sessions bound to this counter.
+ *
+ * This structure represents one configurable performance counter of
+ * the IPA Control interface. The entry may be mapped to a specific counter
+ * by one or more client sessions. The counter is considered to be unused
+ * if it isn't part of any client session.
+ */
+struct kbase_ipa_control_prfcnt_config {
+	u8 idx;
+	u8 refcount;
+};
+
+/**
+ * struct kbase_ipa_control_prfcnt_block - Block of performance counters
+ *
+ * @select:                 Current performance counter configuration.
+ * @num_available_counters: Number of counters that are not already configured.
+ *
+ */
+struct kbase_ipa_control_prfcnt_block {
+	struct kbase_ipa_control_prfcnt_config
+		select[KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS];
+	size_t num_available_counters;
+};
+
+/**
+ * struct kbase_ipa_control - Manager of the IPA Control interface.
+ *
+ * @blocks:              Current configuration of performance counters
+ *                       for the IPA Control interface.
+ * @sessions:            State of client sessions, storing information
+ *                       like performance counters the client subscribed to
+ *                       and latest value read from each counter.
+ * @lock:                Spinlock to serialize access by concurrent clients.
+ * @rtm_listener_data:   Private data for allocating a GPU frequency change
+ *                       listener.
+ * @num_active_sessions: Number of sessions opened by clients.
+ * @cur_gpu_rate:        Current GPU top-level operating frequency, in Hz.
+ * @rtm_listener_data:   Private data for allocating a GPU frequency change
+ *                       listener.
+ * @protm_start:         Time (in ns) at which the GPU entered protected mode
+ */
+struct kbase_ipa_control {
+	struct kbase_ipa_control_prfcnt_block blocks[KBASE_IPA_CORE_TYPE_NUM];
+	struct kbase_ipa_control_session
+		sessions[KBASE_IPA_CONTROL_MAX_SESSIONS];
+	spinlock_t lock;
+	void *rtm_listener_data;
+	size_t num_active_sessions;
+	u32 cur_gpu_rate;
+	u64 protm_start;
+};
+
+/**
+ * struct kbase_csf_firmware_interface - Interface in the MCU firmware
+ *
+ * @node:  Interface objects are on the kbase_device:csf.firmware_interfaces
+ *         list using this list_head to link them
+ * @phys:  Array of the physical (tagged) addresses making up this interface
+ * @name:  NULL-terminated string naming the interface
+ * @num_pages: Number of entries in @phys and @pma (and length of the interface)
+ * @virtual: Starting GPU virtual address this interface is mapped at
+ * @flags: bitmask of CSF_FIRMWARE_ENTRY_* conveying the interface attributes
+ * @data_start: Offset into firmware image at which the interface data starts
+ * @data_end: Offset into firmware image at which the interface data ends
+ * @kernel_map: A kernel mapping of the memory or NULL if not required to be
+ *              mapped in the kernel
+ * @pma: Array of pointers to protected memory allocations.
+ */
+struct kbase_csf_firmware_interface {
+	struct list_head node;
+	struct tagged_addr *phys;
+	char *name;
+	u32 num_pages;
+	u32 virtual;
+	u32 flags;
+	u32 data_start;
+	u32 data_end;
+	void *kernel_map;
+	struct protected_memory_allocation **pma;
+};
+
+/*
+ * struct kbase_csf_hwcnt - Object containing members for handling the dump of
+ *                          HW counters.
+ *
+ * @request_pending:        Flag set when HWC requested and used for HWC sample
+ *                          done interrupt.
+ * @enable_pending:         Flag set when HWC enable status change and used for
+ *                          enable done interrupt.
+ */
+struct kbase_csf_hwcnt {
+	bool request_pending;
+	bool enable_pending;
+};
+
+/**
+ * struct kbase_csf_device - Object representing CSF for an instance of GPU
+ *                           platform device.
  *
  * @mcu_mmu:                MMU page tables for the MCU firmware
  * @firmware_interfaces:    List of interfaces defined in the firmware image
@@ -794,6 +1106,17 @@ struct kbase_csf_scheduler {
  *                          of the real Hw doorbell page for the active GPU
  *                          command queues after they are stopped or after the
  *                          GPU is powered down.
+ * @dummy_user_reg_page:    Address of the dummy page that is mapped in place
+ *                          of the real User register page just before the GPU
+ *                          is powered down. The User register page is mapped
+ *                          in the address space of every process, that created
+ *                          a Base context, to enable the access to LATEST_FLUSH
+ *                          register from userspace.
+ * @mali_file_inode:        Pointer to the inode corresponding to mali device
+ *                          file. This is needed in order to switch to the
+ *                          @dummy_user_reg_page on GPU power down.
+ *                          All instances of the mali device file will point to
+ *                          the same inode.
  * @reg_lock:               Lock to serialize the MCU firmware related actions
  *                          that affect all contexts such as allocation of
  *                          regions from shared interface area, assignment of
@@ -806,7 +1129,7 @@ struct kbase_csf_scheduler {
  * @global_iface:           The result of parsing the global interface
  *                          structure set up by the firmware, including the
  *                          CSGs, CSs, and their properties
- * @scheduler:              The command stream scheduler instance.
+ * @scheduler:              The CS scheduler instance.
  * @reset:                  Contain members required for GPU reset handling.
  * @progress_timeout:       Maximum number of GPU clock cycles without forward
  *                          progress to allow, for all tasks running on
@@ -820,11 +1143,39 @@ struct kbase_csf_scheduler {
  *                          in GPU reset has completed.
  * @firmware_reload_needed: Flag for indicating that the firmware needs to be
  *                          reloaded as part of the GPU reset action.
+ * @firmware_hctl_core_pwr: Flag for indicating that the host diver is in
+ *                          charge of the shader core's power transitions, and
+ *                          the mcu_core_pwroff timeout feature is disabled
+ *                          (i.e. configured 0 in the register field). If
+ *                          false, the control is delegated to the MCU.
  * @firmware_reload_work:   Work item for facilitating the procedural actions
  *                          on reloading the firmware.
  * @glb_init_request_pending: Flag to indicate that Global requests have been
  *                            sent to the FW after MCU was re-enabled and their
  *                            acknowledgement is pending.
+ * @fw_error_work:          Work item for handling the firmware internal error
+ *                          fatal event.
+ * @ipa_control:            IPA Control component manager.
+ * @mcu_core_pwroff_dur_us: Sysfs attribute for the glb_pwroff timeout input
+ *                          in unit of micro-seconds. The firmware does not use
+ *                          it directly.
+ * @mcu_core_pwroff_dur_count: The counterpart of the glb_pwroff timeout input
+ *                             in interface required format, ready to be used
+ *                             directly in the firmware.
+ * @mcu_core_pwroff_reg_shadow: The actual value that has been programed into
+ *                              the glb_pwoff register. This is separated from
+ *                              the @p mcu_core_pwroff_dur_count as an update
+ *                              to the latter is asynchronous.
+ * @gpu_idle_hysteresis_ms: Sysfs attribute for the idle hysteresis time
+ *                          window in unit of ms. The firmware does not use it
+ *                          directly.
+ * @gpu_idle_dur_count:     The counterpart of the hysteresis time window in
+ *                          interface required format, ready to be used
+ *                          directly in the firmware.
+ * @fw_timeout_ms:          Timeout value (in milliseconds) used when waiting
+ *                          for any request sent to the firmware.
+ * @hwcnt:                  Contain members required for handling the dump of
+ *                          HW counters.
  */
 struct kbase_csf_device {
 	struct kbase_mmu_table mcu_mmu;
@@ -838,6 +1189,8 @@ struct kbase_csf_device {
 	struct file *db_filp;
 	u32 db_file_offsets;
 	struct tagged_addr dummy_db_page;
+	struct tagged_addr dummy_user_reg_page;
+	struct inode *mali_file_inode;
 	struct mutex reg_lock;
 	wait_queue_head_t event_wait;
 	bool interrupt_received;
@@ -849,8 +1202,18 @@ struct kbase_csf_device {
 	bool firmware_inited;
 	bool firmware_reloaded;
 	bool firmware_reload_needed;
+	bool firmware_hctl_core_pwr;
 	struct work_struct firmware_reload_work;
 	bool glb_init_request_pending;
+	struct work_struct fw_error_work;
+	struct kbase_ipa_control ipa_control;
+	u32 mcu_core_pwroff_dur_us;
+	u32 mcu_core_pwroff_dur_count;
+	u32 mcu_core_pwroff_reg_shadow;
+	u32 gpu_idle_hysteresis_ms;
+	u32 gpu_idle_dur_count;
+	unsigned int fw_timeout_ms;
+	struct kbase_csf_hwcnt hwcnt;
 };
 
 /**
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c
index 4a924f346685..89585bfc9716 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include "mali_kbase.h"
@@ -25,29 +24,41 @@
 #include "mali_kbase_csf_trace_buffer.h"
 #include "mali_kbase_csf_timeout.h"
 #include "mali_kbase_mem.h"
-#include <mali_kbase_reset_gpu.h>
+#include "mali_kbase_reset_gpu.h"
+#include "mali_kbase_ctx_sched.h"
 #include "mali_kbase_csf_scheduler.h"
 #include "device/mali_kbase_device.h"
 #include "backend/gpu/mali_kbase_pm_internal.h"
 #include "tl/mali_kbase_timeline_priv.h"
 #include "mali_kbase_csf_tl_reader.h"
+#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
+#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
 
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/firmware.h>
 #include <linux/mman.h>
 #include <linux/string.h>
+#include <linux/mutex.h>
 #if (KERNEL_VERSION(4, 13, 0) <= LINUX_VERSION_CODE)
 #include <linux/set_memory.h>
 #endif
 #include <mmu/mali_kbase_mmu.h>
+#include <asm/arch_timer.h>
 
 #define MALI_MAX_FIRMWARE_NAME_LEN ((size_t)20)
 
+
 static char fw_name[MALI_MAX_FIRMWARE_NAME_LEN] = "mali_csffw.bin";
 module_param_string(fw_name, fw_name, sizeof(fw_name), 0644);
 MODULE_PARM_DESC(fw_name, "firmware image");
 
+/* The waiting time for firmware to boot */
+static unsigned int csf_firmware_boot_timeout_ms = 500;
+module_param(csf_firmware_boot_timeout_ms, uint, 0444);
+MODULE_PARM_DESC(csf_firmware_boot_timeout_ms,
+		 "Maximum time to wait for firmware to boot.");
+
 #ifdef CONFIG_MALI_BIFROST_DEBUG
 /* Makes Driver wait indefinitely for an acknowledgment for the different
  * requests it sends to firmware. Otherwise the timeouts interfere with the
@@ -65,14 +76,6 @@ MODULE_PARM_DESC(fw_debug,
 #define FIRMWARE_HEADER_VERSION  (0ul)
 #define FIRMWARE_HEADER_LENGTH   (0x14ul)
 
-#define CSF_FIRMWARE_ENTRY_READ       (1ul << 0)
-#define CSF_FIRMWARE_ENTRY_WRITE      (1ul << 1)
-#define CSF_FIRMWARE_ENTRY_EXECUTE    (1ul << 2)
-#define CSF_FIRMWARE_ENTRY_CACHE_MODE (3ul << 3)
-#define CSF_FIRMWARE_ENTRY_PROTECTED  (1ul << 5)
-#define CSF_FIRMWARE_ENTRY_SHARED     (1ul << 30)
-#define CSF_FIRMWARE_ENTRY_ZERO       (1ul << 31)
-
 #define CSF_FIRMWARE_ENTRY_SUPPORTED_FLAGS \
 	(CSF_FIRMWARE_ENTRY_READ | \
 	 CSF_FIRMWARE_ENTRY_WRITE | \
@@ -97,11 +100,11 @@ MODULE_PARM_DESC(fw_debug,
 
 #define TL_METADATA_ENTRY_NAME_OFFSET (0x8)
 
-#define CSF_FIRMWARE_BOOT_TIMEOUT_MS     (500)
 #define CSF_MAX_FW_STOP_LOOPS            (100000)
 
-#define CSF_GLB_REQ_CFG_MASK \
-	(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK)
+#define CSF_GLB_REQ_CFG_MASK                                                   \
+	(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK |         \
+	 GLB_REQ_CFG_PWROFF_TIMER_MASK)
 
 static inline u32 input_page_read(const u32 *const input, const u32 offset)
 {
@@ -142,41 +145,17 @@ static unsigned int entry_size(u32 header)
 {
 	return (header >> 8) & 0xFF;
 }
+static bool entry_update(u32 header)
+{
+	return (header >> 30) & 0x1;
+}
 static bool entry_optional(u32 header)
 {
 	return (header >> 31) & 0x1;
 }
 
 /**
- * struct firmware_interface - Represents an interface in the MCU firmware
- *
- * @node:  Interface objects are on the kbase_device:csf.firmware_interfaces
- *         list using this list_head to link them
- * @phys:  Array of the physical (tagged) addresses making up this interface
- * @name:  NUL-terminated string naming the interface
- * @num_pages: Number of entries in @phys (and length of the interface)
- * @virtual: Virtual address that this interface is mapped at for the GPU
- * @flags: bitmask of CSF_FIRMWARE_ENTRY_* conveying the interface attributes
- * @data_start: Offset into firmware image at which the interface data starts
- * @data_end: Offset into firmware image at which the interface data ends
- * @kernel_map: A kernel mapping of the memory or NULL if not required to be
- *              mapped in the kernel
- * @pma: Array of pointers to protected memory allocations.
- */
-struct firmware_interface {
-	struct list_head node;
-	struct tagged_addr *phys;
-	char *name;
-	u32 num_pages;
-	u32 virtual;
-	u32 flags;
-	u32 data_start;
-	u32 data_end;
-	void *kernel_map;
-	struct protected_memory_allocation **pma;
-};
-
-/**
+ * struct firmware_timeline_metadata -
  * Timeline metadata item within the MCU firmware
  *
  * @node: List head linking all timeline metadata to
@@ -200,7 +179,8 @@ struct firmware_timeline_metadata {
  */
 static int setup_shared_iface_static_region(struct kbase_device *kbdev)
 {
-	struct firmware_interface *interface = kbdev->csf.shared_interface;
+	struct kbase_csf_firmware_interface *interface =
+		kbdev->csf.shared_interface;
 	struct kbase_va_region *reg;
 	int ret = -ENOMEM;
 
@@ -258,7 +238,7 @@ static void stop_csf_firmware(struct kbase_device *kbdev)
 static void wait_for_firmware_boot(struct kbase_device *kbdev)
 {
 	const long wait_timeout =
-		kbase_csf_timeout_in_jiffies(CSF_FIRMWARE_BOOT_TIMEOUT_MS);
+		kbase_csf_timeout_in_jiffies(csf_firmware_boot_timeout_ms);
 	long remaining;
 
 	/* Firmware will generate a global interface interrupt once booting
@@ -434,7 +414,7 @@ static void load_fw_image_section(struct kbase_device *kbdev, const u8 *data,
 static int reload_fw_data_sections(struct kbase_device *kbdev)
 {
 	const u32 magic = FIRMWARE_HEADER_MAGIC;
-	struct firmware_interface *interface;
+	struct kbase_csf_firmware_interface *interface;
 	const struct firmware *firmware;
 	int ret = 0;
 
@@ -503,7 +483,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
 	u32 num_pages;
 	char *name;
 	struct tagged_addr *phys = NULL;
-	struct firmware_interface *interface = NULL;
+	struct kbase_csf_firmware_interface *interface = NULL;
 	bool allocated_pages = false, protected_mode = false;
 	unsigned long mem_flags = 0;
 	u32 cache_mode = 0;
@@ -573,7 +553,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
 	load_fw_image_section(kbdev, fw->data, phys, num_pages, flags,
 			data_start, data_end);
 
-	/* Allocate enough memory for the struct firmware_interface and
+	/* Allocate enough memory for the struct kbase_csf_firmware_interface and
 	 * the name of the interface. An extra byte is allocated to place a
 	 * NUL-terminator in. This should already be included according to the
 	 * specification but here we add it anyway to be robust against a
@@ -671,10 +651,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
 out:
 	if (allocated_pages) {
 		if (protected_mode) {
-			if (interface) {
-				kbase_csf_protected_memory_free(kbdev,
-						interface->pma, num_pages);
-			}
+			kbase_csf_protected_memory_free(kbdev, pma, num_pages);
 		} else {
 			kbase_mem_pool_free_pages(
 				&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
@@ -763,6 +740,11 @@ static int load_firmware_entry(struct kbase_device *kbdev,
 	const unsigned int type = entry_type(header);
 	unsigned int size = entry_size(header);
 	const bool optional = entry_optional(header);
+	/* Update is used with configuration and tracebuffer entries to
+	 * initiate a FIRMWARE_CONFIG_UPDATE, instead of triggering a
+	 * silent reset.
+	 */
+	const bool updatable = entry_update(header);
 	const u32 *entry = (void *)(fw->data + offset);
 
 	if ((offset % sizeof(*entry)) || (size % sizeof(*entry))) {
@@ -797,7 +779,7 @@ static int load_firmware_entry(struct kbase_device *kbdev,
 			return -EINVAL;
 		}
 		return kbase_csf_firmware_cfg_option_entry_parse(
-			kbdev, fw, entry, size);
+			kbdev, fw, entry, size, updatable);
 	case CSF_FIRMWARE_ENTRY_TYPE_FUTF_TEST:
 #ifndef MALI_KBASE_BUILD
 		/* FW UTF option */
@@ -818,7 +800,7 @@ static int load_firmware_entry(struct kbase_device *kbdev,
 			return -EINVAL;
 		}
 		return kbase_csf_firmware_parse_trace_buffer_entry(
-				kbdev, entry, size);
+			kbdev, entry, size, updatable);
 	case CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA:
 		/* Meta data section */
 		if (size < TL_METADATA_ENTRY_NAME_OFFSET + sizeof(*entry)) {
@@ -865,7 +847,8 @@ static void free_global_iface(struct kbase_device *kbdev)
  */
 static inline void *iface_gpu_va_to_cpu(struct kbase_device *kbdev, u32 gpu_va)
 {
-	struct firmware_interface *interface = kbdev->csf.shared_interface;
+	struct kbase_csf_firmware_interface *interface =
+		kbdev->csf.shared_interface;
 	u8 *kernel_base = interface->kernel_map;
 
 	if (gpu_va < interface->virtual ||
@@ -919,7 +902,7 @@ static int parse_cmd_stream_group_info(struct kbase_device *kbdev,
 
 	if (ginfo->stream_num < MIN_SUPPORTED_STREAMS_PER_GROUP ||
 			ginfo->stream_num > MAX_SUPPORTED_STREAMS_PER_GROUP) {
-		dev_err(kbdev->dev, "CSG with %u streams out of range %u-%u",
+		dev_err(kbdev->dev, "CSG with %u CSs out of range %u-%u",
 				ginfo->stream_num,
 				MIN_SUPPORTED_STREAMS_PER_GROUP,
 				MAX_SUPPORTED_STREAMS_PER_GROUP);
@@ -930,7 +913,7 @@ static int parse_cmd_stream_group_info(struct kbase_device *kbdev,
 
 	if (ginfo->stream_num * ginfo->stream_stride > group_stride) {
 		dev_err(kbdev->dev,
-				"group stride of 0x%x exceeded by %u streams with stride 0x%x",
+				"group stride of 0x%x exceeded by %u CSs with stride 0x%x",
 				group_stride, ginfo->stream_num,
 				ginfo->stream_stride);
 		return -EINVAL;
@@ -950,7 +933,7 @@ static int parse_cmd_stream_group_info(struct kbase_device *kbdev,
 		err = parse_cmd_stream_info(kbdev, &ginfo->streams[sid],
 				stream_base);
 		if (err < 0) {
-			/* caller will free the memory for streams array */
+			/* caller will free the memory for CSs array */
 			return err;
 		}
 	}
@@ -960,7 +943,8 @@ static int parse_cmd_stream_group_info(struct kbase_device *kbdev,
 
 static u32 get_firmware_version(struct kbase_device *kbdev)
 {
-	struct firmware_interface *interface = kbdev->csf.shared_interface;
+	struct kbase_csf_firmware_interface *interface =
+		kbdev->csf.shared_interface;
 	u32 *shared_info = interface->kernel_map;
 
 	return shared_info[GLB_VERSION/4];
@@ -968,7 +952,8 @@ static u32 get_firmware_version(struct kbase_device *kbdev)
 
 static int parse_capabilities(struct kbase_device *kbdev)
 {
-	struct firmware_interface *interface = kbdev->csf.shared_interface;
+	struct kbase_csf_firmware_interface *interface =
+		kbdev->csf.shared_interface;
 	u32 *shared_info = interface->kernel_map;
 	struct kbase_csf_global_iface *iface = &kbdev->csf.global_iface;
 	unsigned int gid;
@@ -1008,6 +993,7 @@ static int parse_capabilities(struct kbase_device *kbdev)
 
 	iface->group_stride = shared_info[GLB_GROUP_STRIDE/4];
 	iface->prfcnt_size = shared_info[GLB_PRFCNT_SIZE/4];
+	iface->instr_features = shared_info[GLB_INSTR_FEATURES / 4];
 
 	if ((GROUP_CONTROL_0 +
 		(unsigned long)iface->group_num * iface->group_stride) >
@@ -1045,7 +1031,7 @@ static int parse_capabilities(struct kbase_device *kbdev)
 static inline void access_firmware_memory(struct kbase_device *kbdev,
 	u32 gpu_addr, u32 *value, const bool read)
 {
-	struct firmware_interface *interface;
+	struct kbase_csf_firmware_interface *interface;
 
 	list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) {
 		if ((gpu_addr >= interface->virtual) &&
@@ -1217,6 +1203,69 @@ u32 kbase_csf_firmware_global_output(
 	return val;
 }
 
+/**
+ * handle_internal_firmware_fatal - Handler for CS internal firmware fault.
+ *
+ * @kbdev:  Pointer to kbase device
+ *
+ * Report group fatal error to user space for all GPU command queue groups
+ * in the device, terminate them and reset GPU.
+ */
+static void handle_internal_firmware_fatal(struct kbase_device *const kbdev)
+{
+	int as;
+
+	for (as = 0; as < kbdev->nr_hw_address_spaces; as++) {
+		unsigned long flags;
+		struct kbase_context *kctx;
+		struct kbase_fault fault;
+
+		if (as == MCU_AS_NR)
+			continue;
+
+		/* Only handle the fault for an active address space. Lock is
+		 * taken here to atomically get reference to context in an
+		 * active address space and retain its refcount.
+		 */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kctx = kbase_ctx_sched_as_to_ctx_nolock(kbdev, as);
+
+		if (kctx) {
+			kbase_ctx_sched_retain_ctx_refcount(kctx);
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		} else {
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			continue;
+		}
+
+		fault = (struct kbase_fault) {
+			.status = GPU_EXCEPTION_TYPE_SW_FAULT_1,
+		};
+
+		kbase_csf_ctx_handle_fault(kctx, &fault);
+		kbase_ctx_sched_release_ctx_lock(kctx);
+	}
+
+	if (kbase_prepare_to_reset_gpu(kbdev,
+				       RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
+		kbase_reset_gpu(kbdev);
+}
+
+/**
+ * firmware_error_worker - Worker function for handling firmware internal error
+ *
+ * @data: Pointer to a work_struct embedded in kbase device.
+ *
+ * Handle the CS internal firmware error
+ */
+static void firmware_error_worker(struct work_struct *const data)
+{
+	struct kbase_device *const kbdev =
+		container_of(data, struct kbase_device, csf.fw_error_work);
+
+	handle_internal_firmware_fatal(kbdev);
+}
+
 static bool global_request_complete(struct kbase_device *const kbdev,
 				    u32 const req_mask)
 {
@@ -1242,7 +1291,7 @@ static int wait_for_global_request(struct kbase_device *const kbdev,
 				   u32 const req_mask)
 {
 	const long wait_timeout =
-		kbase_csf_timeout_in_jiffies(GLB_REQ_WAIT_TIMEOUT_MS);
+		kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
 	long remaining;
 	int err = 0;
 
@@ -1285,6 +1334,28 @@ static void enable_endpoints_global(
 	set_global_request(global_iface, GLB_REQ_CFG_ALLOC_EN_MASK);
 }
 
+static void enable_shader_poweroff_timer(struct kbase_device *const kbdev,
+	const struct kbase_csf_global_iface *const global_iface)
+{
+	u32 pwroff_reg;
+
+	if (kbdev->csf.firmware_hctl_core_pwr)
+		pwroff_reg =
+		    GLB_PWROFF_TIMER_TIMER_SOURCE_SET(DISABLE_GLB_PWROFF_TIMER,
+			       GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP);
+	else
+		pwroff_reg = kbdev->csf.mcu_core_pwroff_dur_count;
+
+	kbase_csf_firmware_global_input(global_iface, GLB_PWROFF_TIMER,
+					pwroff_reg);
+	set_global_request(global_iface, GLB_REQ_CFG_PWROFF_TIMER_MASK);
+
+	/* Save the programed reg value in its shadow field */
+	kbdev->csf.mcu_core_pwroff_reg_shadow = pwroff_reg;
+
+	dev_dbg(kbdev->dev, "GLB_PWROFF_TIMER set to 0x%.8x\n", pwroff_reg);
+}
+
 static void set_timeout_global(
 	const struct kbase_csf_global_iface *const global_iface,
 	u64 const timeout)
@@ -1295,29 +1366,16 @@ static void set_timeout_global(
 	set_global_request(global_iface, GLB_REQ_CFG_PROGRESS_TIMER_MASK);
 }
 
-static void set_coherency_mode(struct kbase_device *const kbdev)
+static void global_init(struct kbase_device *const kbdev, u64 core_mask)
 {
-	const struct kbase_csf_global_iface *const global_iface =
-		&kbdev->csf.global_iface;
-	u32 protected_mode_coherency = kbdev->system_coherency;
-
-	/* GPU is supposed to use ACE-Lite coherency mode on a fully coherent
-	 * system during protected mode execution.
-	 */
-	if (kbdev->system_coherency == COHERENCY_ACE)
-		protected_mode_coherency = COHERENCY_ACE_LITE;
-
-	kbase_csf_firmware_global_input(global_iface, GLB_PROTM_COHERENCY,
-					protected_mode_coherency);
-}
-
-static void global_init(struct kbase_device *const kbdev, u32 req_mask)
-{
-	u32 const ack_irq_mask = GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK  |
-			GLB_ACK_IRQ_MASK_PING_MASK |
-			GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK |
-			GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
-			GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK;
+	u32 const ack_irq_mask = GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK |
+				 GLB_ACK_IRQ_MASK_PING_MASK |
+				 GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK |
+				 GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
+				 GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK |
+				 GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
+				 GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK |
+				 GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK;
 
 	const struct kbase_csf_global_iface *const global_iface =
 		&kbdev->csf.global_iface;
@@ -1325,12 +1383,14 @@ static void global_init(struct kbase_device *const kbdev, u32 req_mask)
 
 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
 
-	/* Set the cohereny mode for protected mode execution */
-	set_coherency_mode(kbdev);
+	/* Set the coherency mode for protected mode execution */
+	WARN_ON(kbdev->system_coherency == COHERENCY_ACE);
+	kbase_csf_firmware_global_input(global_iface, GLB_PROTM_COHERENCY,
+					kbdev->system_coherency);
 
-	/* Enable endpoints on all present shader cores */
-	enable_endpoints_global(global_iface,
-		kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));
+	/* Update shader core allocation enable mask */
+	enable_endpoints_global(global_iface, core_mask);
+	enable_shader_poweroff_timer(kbdev, global_iface);
 
 	set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev));
 
@@ -1346,8 +1406,7 @@ static void global_init(struct kbase_device *const kbdev, u32 req_mask)
 /**
  * global_init_on_boot - Sends a global request to control various features.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface
  *
  * Currently only the request to enable endpoints and timeout for GPU progress
  * timer is sent.
@@ -1356,19 +1415,29 @@ static void global_init(struct kbase_device *const kbdev, u32 req_mask)
  */
 static int global_init_on_boot(struct kbase_device *const kbdev)
 {
-	u32 const req_mask = CSF_GLB_REQ_CFG_MASK;
+	unsigned long flags;
+	u64 core_mask;
 
-	global_init(kbdev, req_mask);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	core_mask = kbase_pm_ca_get_core_mask(kbdev);
+	kbdev->csf.firmware_hctl_core_pwr =
+				kbase_pm_no_mcu_core_pwroff(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
-	return wait_for_global_request(kbdev, req_mask);
+	global_init(kbdev, core_mask);
+
+	return wait_for_global_request(kbdev, CSF_GLB_REQ_CFG_MASK);
 }
 
-void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev)
+void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev,
+				      u64 core_mask)
 {
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
 	kbdev->csf.glb_init_request_pending = true;
-	global_init(kbdev, CSF_GLB_REQ_CFG_MASK);
+	kbdev->csf.firmware_hctl_core_pwr =
+				kbase_pm_no_mcu_core_pwroff(kbdev);
+	global_init(kbdev, core_mask);
 }
 
 bool kbase_csf_firmware_global_reinit_complete(struct kbase_device *kbdev)
@@ -1382,7 +1451,36 @@ bool kbase_csf_firmware_global_reinit_complete(struct kbase_device *kbdev)
 	return !kbdev->csf.glb_init_request_pending;
 }
 
+void kbase_csf_firmware_update_core_attr(struct kbase_device *kbdev,
+		bool update_core_pwroff_timer, bool update_core_mask, u64 core_mask)
+{
+	unsigned long flags;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	if (update_core_mask)
+		enable_endpoints_global(&kbdev->csf.global_iface, core_mask);
+	if (update_core_pwroff_timer)
+		enable_shader_poweroff_timer(kbdev, &kbdev->csf.global_iface);
+
+	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+}
+
+bool kbase_csf_firmware_core_attr_updated(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	return global_request_complete(kbdev, GLB_REQ_CFG_ALLOC_EN_MASK |
+					      GLB_REQ_CFG_PWROFF_TIMER_MASK);
+}
+
 /**
+ * kbase_csf_firmware_reload_worker() -
+ * reload the fw image and re-enable the MCU
+ * @work: CSF Work item for reloading the firmware.
+ *
  * This helper function will reload the firmware image and re-enable the MCU.
  * It is supposed to be called after MCU(GPU) has been reset.
  * Unlike the initial boot the firmware binary image is not parsed completely.
@@ -1449,6 +1547,130 @@ void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev)
 	kbase_pm_update_state(kbdev);
 }
 
+static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_ms)
+{
+#define HYSTERESIS_VAL_UNIT_SHIFT (10)
+	/* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */
+	u64 freq = arch_timer_get_cntfrq();
+	u64 dur_val = dur_ms;
+	u32 cnt_val_u32, reg_val_u32;
+	bool src_system_timestamp = freq > 0;
+
+	if (!src_system_timestamp) {
+		/* Get the cycle_counter source alternative */
+		spin_lock(&kbdev->pm.clk_rtm.lock);
+		if (kbdev->pm.clk_rtm.clks[0])
+			freq = kbdev->pm.clk_rtm.clks[0]->clock_val;
+		else
+			dev_warn(kbdev->dev, "No GPU clock, unexpected intregration issue!");
+		spin_unlock(&kbdev->pm.clk_rtm.lock);
+
+		dev_info(kbdev->dev, "Can't get the timestamp frequency, "
+			 "use cycle counter format with firmware idle hysteresis!");
+	}
+
+	/* Formula for dur_val = ((dur_ms/1000) * freq_HZ) >> 10) */
+	dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT;
+	dur_val = div_u64(dur_val, 1000);
+
+	/* Interface limits the value field to S32_MAX */
+	cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val;
+
+	reg_val_u32 = GLB_IDLE_TIMER_TIMEOUT_SET(0, cnt_val_u32);
+	/* add the source flag */
+	if (src_system_timestamp)
+		reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32,
+				GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP);
+	else
+		reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32,
+				GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER);
+
+	return reg_val_u32;
+}
+
+u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev)
+{
+	return kbdev->csf.gpu_idle_hysteresis_ms;
+}
+
+u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur)
+{
+	unsigned long flags;
+	const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur);
+
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	kbdev->csf.gpu_idle_hysteresis_ms = dur;
+	kbdev->csf.gpu_idle_dur_count = hysteresis_val;
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+	dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x",
+		hysteresis_val);
+
+	return hysteresis_val;
+}
+
+static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us)
+{
+#define PWROFF_VAL_UNIT_SHIFT (10)
+	/* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */
+	u64 freq = arch_timer_get_cntfrq();
+	u64 dur_val = dur_us;
+	u32 cnt_val_u32, reg_val_u32;
+	bool src_system_timestamp = freq > 0;
+
+	if (!src_system_timestamp) {
+		/* Get the cycle_counter source alternative */
+		spin_lock(&kbdev->pm.clk_rtm.lock);
+		if (kbdev->pm.clk_rtm.clks[0])
+			freq = kbdev->pm.clk_rtm.clks[0]->clock_val;
+		else
+			dev_warn(kbdev->dev, "No GPU clock, unexpected integration issue!");
+		spin_unlock(&kbdev->pm.clk_rtm.lock);
+
+		dev_info(kbdev->dev, "Can't get the timestamp frequency, "
+			 "use cycle counter with MCU Core Poweroff timer!");
+	}
+
+	/* Formula for dur_val = ((dur_us/1e6) * freq_HZ) >> 10) */
+	dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT;
+	dur_val = div_u64(dur_val, 1000000);
+
+	/* Interface limits the value field to S32_MAX */
+	cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val;
+
+	reg_val_u32 = GLB_PWROFF_TIMER_TIMEOUT_SET(0, cnt_val_u32);
+	/* add the source flag */
+	if (src_system_timestamp)
+		reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val_u32,
+				GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP);
+	else
+		reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val_u32,
+				GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER);
+
+	return reg_val_u32;
+}
+
+u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev)
+{
+	return kbdev->csf.mcu_core_pwroff_dur_us;
+}
+
+u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur)
+{
+	unsigned long flags;
+	const u32 pwroff = convert_dur_to_core_pwroff_count(kbdev, dur);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->csf.mcu_core_pwroff_dur_us = dur;
+	kbdev->csf.mcu_core_pwroff_dur_count = pwroff;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	dev_dbg(kbdev->dev, "MCU Core Poweroff input update: 0x%.8x", pwroff);
+
+	return pwroff;
+}
+
+
 int kbase_csf_firmware_init(struct kbase_device *kbdev)
 {
 	const struct firmware *firmware;
@@ -1474,6 +1696,7 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
 
 	init_waitqueue_head(&kbdev->csf.event_wait);
 	kbdev->csf.interrupt_received = false;
+	kbdev->csf.fw_timeout_ms = CSF_FIRMWARE_TIMEOUT_MS;
 
 	INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces);
 	INIT_LIST_HEAD(&kbdev->csf.firmware_config);
@@ -1481,9 +1704,18 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
 	INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list);
 	INIT_WORK(&kbdev->csf.firmware_reload_work,
 		  kbase_csf_firmware_reload_worker);
+	INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker);
 
 	mutex_init(&kbdev->csf.reg_lock);
 
+	kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
+	kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(kbdev,
+						FIRMWARE_IDLE_HYSTERESIS_TIME_MS);
+
+	kbdev->csf.mcu_core_pwroff_dur_us = DEFAULT_GLB_PWROFF_TIMEOUT_US;
+	kbdev->csf.mcu_core_pwroff_dur_count =
+		convert_dur_to_core_pwroff_count(kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_US);
+
 	ret = kbase_mcu_shared_interface_region_tracker_init(kbdev);
 	if (ret != 0) {
 		dev_err(kbdev->dev, "Failed to setup the rb tree for managing shared interface segment\n");
@@ -1510,8 +1742,8 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
 		goto error;
 	}
 
-	version_major = firmware->data[4];
-	version_minor = firmware->data[5];
+	version_minor = firmware->data[4];
+	version_major = firmware->data[5];
 
 	if (version_major != FIRMWARE_HEADER_VERSION) {
 		dev_err(kbdev->dev,
@@ -1590,6 +1822,10 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
 	if (ret != 0)
 		goto error;
 
+	ret = kbase_csf_setup_dummy_user_reg_page(kbdev);
+	if (ret != 0)
+		goto error;
+
 	ret = kbase_csf_timeout_init(kbdev);
 	if (ret != 0)
 		goto error;
@@ -1602,6 +1838,7 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
 	if (ret != 0)
 		goto error;
 
+
 	/* Firmware loaded successfully */
 	release_firmware(firmware);
 	KBASE_KTRACE_ADD(kbdev, FIRMWARE_BOOT, NULL,
@@ -1620,21 +1857,34 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
 	unsigned long flags;
 	int ret = 0;
 
-	while (kbase_reset_gpu_is_active(kbdev) && !ret)
-		ret = kbase_reset_gpu_wait(kbdev);
+	cancel_work_sync(&kbdev->csf.fw_error_work);
+
+	ret = kbase_reset_gpu_wait(kbdev);
 
 	WARN(ret, "failed to wait for GPU reset");
 
-	/* Make sure ongoing transitions have completed */
-	kbase_pm_wait_for_desired_state(kbdev);
-
 	kbase_csf_firmware_cfg_term(kbdev);
 
 	kbase_csf_timeout_term(kbdev);
 
+	kbase_csf_free_dummy_user_reg_page(kbdev);
+
+	kbase_csf_scheduler_term(kbdev);
+
+	kbase_csf_doorbell_mapping_term(kbdev);
+
+	/* Explicitly trigger the disabling of MCU through the state machine and
+	 * wait for its completion. It may not have been disabled yet due to the
+	 * power policy.
+	 */
+	kbdev->pm.backend.mcu_desired = false;
+	kbase_pm_wait_for_desired_state(kbdev);
+
+	free_global_iface(kbdev);
+
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	kbdev->csf.firmware_inited = false;
-	if (kbdev->pm.backend.mcu_state != KBASE_MCU_OFF) {
+	if (WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_OFF)) {
 		kbdev->pm.backend.mcu_state = KBASE_MCU_OFF;
 		stop_csf_firmware(kbdev);
 	}
@@ -1642,22 +1892,15 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
 
 	unload_mmu_tables(kbdev);
 
-	kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu);
-
-	kbase_csf_scheduler_term(kbdev);
-
-	kbase_csf_doorbell_mapping_term(kbdev);
-
-	free_global_iface(kbdev);
-
-	/* Release the address space */
-	kbdev->as_free |= MCU_AS_BITMASK;
+	kbase_csf_firmware_trace_buffers_term(kbdev);
 
 	while (!list_empty(&kbdev->csf.firmware_interfaces)) {
-		struct firmware_interface *interface;
+		struct kbase_csf_firmware_interface *interface;
 
-		interface = list_first_entry(&kbdev->csf.firmware_interfaces,
-				struct firmware_interface, node);
+		interface =
+			list_first_entry(&kbdev->csf.firmware_interfaces,
+					 struct kbase_csf_firmware_interface,
+					 node);
 		list_del(&interface->node);
 
 		vunmap(interface->kernel_map);
@@ -1687,21 +1930,67 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
 		kfree(metadata);
 	}
 
-	kbase_csf_firmware_trace_buffers_term(kbdev);
-
 #ifndef MALI_KBASE_BUILD
 	mali_kutf_fw_utf_entry_cleanup(kbdev);
 #endif
 
-	mutex_destroy(&kbdev->csf.reg_lock);
-
 	/* This will also free up the region allocated for the shared interface
 	 * entry parsed from the firmware image.
 	 */
 	kbase_mcu_shared_interface_region_tracker_term(kbdev);
+
+	mutex_destroy(&kbdev->csf.reg_lock);
+
+	kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu);
+
+	/* Release the address space */
+	kbdev->as_free |= MCU_AS_BITMASK;
 }
 
-int kbase_csf_firmware_ping(struct kbase_device *const kbdev)
+void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev)
+{
+	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+	const u32 glb_req =
+		kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
+
+	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
+	/* The scheduler is assumed to only call the enable when its internal
+	 * state indicates that the idle timer has previously been disabled. So
+	 * on entry the expected field values are:
+	 *   1. GLOBAL_INPUT_BLOCK.GLB_REQ.IDLE_ENABLE: 0
+	 *   2. GLOBAL_OUTPUT_BLOCK.GLB_ACK.IDLE_ENABLE: 0, or, on 1 -> 0
+	 */
+
+	if (glb_req & GLB_REQ_IDLE_ENABLE_MASK)
+		dev_err(kbdev->dev, "Incoherent scheduler state on REQ_IDLE_ENABLE!");
+
+	kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER,
+					kbdev->csf.gpu_idle_dur_count);
+
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ,
+				GLB_REQ_REQ_IDLE_ENABLE, GLB_REQ_IDLE_ENABLE_MASK);
+
+	dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x",
+		kbdev->csf.gpu_idle_dur_count);
+	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+}
+
+void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev)
+{
+	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+
+	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ,
+					GLB_REQ_REQ_IDLE_DISABLE,
+					GLB_REQ_IDLE_DISABLE_MASK);
+	dev_dbg(kbdev->dev, "Sending request to disable gpu idle timer");
+
+	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+}
+
+void kbase_csf_firmware_ping(struct kbase_device *const kbdev)
 {
 	const struct kbase_csf_global_iface *const global_iface =
 		&kbdev->csf.global_iface;
@@ -1711,7 +2000,11 @@ int kbase_csf_firmware_ping(struct kbase_device *const kbdev)
 	set_global_request(global_iface, GLB_REQ_PING_MASK);
 	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+}
 
+int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev)
+{
+	kbase_csf_firmware_ping(kbdev);
 	return wait_for_global_request(kbdev, GLB_REQ_PING_MASK);
 }
 
@@ -1743,51 +2036,86 @@ void kbase_csf_enter_protected_mode(struct kbase_device *kbdev)
 {
 	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
 	unsigned long flags;
-	unsigned int value;
+	int err;
 
 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
-	value = kbase_csf_firmware_global_output(global_iface, GLB_ACK);
-	value ^= GLB_REQ_PROTM_ENTER_MASK;
-	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, value,
-					     GLB_REQ_PROTM_ENTER_MASK);
+	set_global_request(global_iface, GLB_REQ_PROTM_ENTER_MASK);
 	dev_dbg(kbdev->dev, "Sending request to enter protected mode");
 	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
 
-	wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK);
+	err = wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK);
+
+	if (!err) {
+		unsigned long irq_flags;
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbdev->protected_mode = true;
+		kbase_ipa_protection_mode_switch_event(kbdev);
+		kbase_ipa_control_protm_entered(kbdev);
+
+		kbase_csf_scheduler_spin_lock(kbdev, &irq_flags);
+		kbase_hwcnt_backend_csf_protm_entered(&kbdev->hwcnt_gpu_iface);
+		kbase_csf_scheduler_spin_unlock(kbdev, irq_flags);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
 }
 
 void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev)
 {
 	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
 	unsigned long flags;
-	unsigned int value;
 
 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
-	value = kbase_csf_firmware_global_output(global_iface, GLB_ACK);
-	value ^= GLB_REQ_HALT_MASK;
-	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, value,
-					     GLB_REQ_HALT_MASK);
+	set_global_request(global_iface, GLB_REQ_HALT_MASK);
 	dev_dbg(kbdev->dev, "Sending request to HALT MCU");
 	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
 }
 
+int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev)
+{
+	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+	unsigned long flags;
+	int err = 0;
+
+	/* Ensure GPU is powered-up until we complete config update.*/
+	kbase_pm_context_active(kbdev);
+
+	/* The 'reg_lock' is also taken and is held till the update is
+	 * complete, to ensure the config update gets serialized.
+	 */
+	mutex_lock(&kbdev->csf.reg_lock);
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+
+	set_global_request(global_iface, GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK);
+	dev_dbg(kbdev->dev, "Sending request for FIRMWARE_CONFIG_UPDATE");
+	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+	err = wait_for_global_request(kbdev,
+				      GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK);
+	mutex_unlock(&kbdev->csf.reg_lock);
+
+	kbase_pm_context_idle(kbdev);
+	return err;
+}
+
 /**
- * copy_grp_and_stm - Copy command stream and/or group data
+ * copy_grp_and_stm - Copy CS and/or group data
  *
- * @iface:                Global command stream front-end interface provided by
- *                        the firmware.
+ * @iface:                Global CSF interface provided by the firmware.
  * @group_data:           Pointer where to store all the group data
  *                        (sequentially).
  * @max_group_num:        The maximum number of groups to be read. Can be 0, in
  *                        which case group_data is unused.
- * @stream_data:          Pointer where to store all the stream data
+ * @stream_data:          Pointer where to store all the CS data
  *                        (sequentially).
- * @max_total_stream_num: The maximum number of streams to be read.
+ * @max_total_stream_num: The maximum number of CSs to be read.
  *                        Can be 0, in which case stream_data is unused.
  *
- * Return: Total number of command streams, summed across all groups.
+ * Return: Total number of CSs, summed across all groups.
  */
 static u32 copy_grp_and_stm(
 	const struct kbase_csf_global_iface * const iface,
@@ -1824,26 +2152,28 @@ static u32 copy_grp_and_stm(
 	return total_stream_num;
 }
 
-u32 kbase_csf_firmware_get_glb_iface(struct kbase_device *kbdev,
+u32 kbase_csf_firmware_get_glb_iface(
+	struct kbase_device *kbdev,
 	struct basep_cs_group_control *const group_data,
 	u32 const max_group_num,
 	struct basep_cs_stream_control *const stream_data,
 	u32 const max_total_stream_num, u32 *const glb_version,
-	u32 *const features, u32 *const group_num, u32 *const prfcnt_size)
+	u32 *const features, u32 *const group_num, u32 *const prfcnt_size,
+	u32 *instr_features)
 {
 	const struct kbase_csf_global_iface * const iface =
 		&kbdev->csf.global_iface;
 
-	if (WARN_ON(!glb_version) ||
-		WARN_ON(!features) ||
-		WARN_ON(!group_num) ||
-		WARN_ON(!prfcnt_size))
+	if (WARN_ON(!glb_version) || WARN_ON(!features) ||
+	    WARN_ON(!group_num) || WARN_ON(!prfcnt_size) ||
+	    WARN_ON(!instr_features))
 		return 0;
 
 	*glb_version = iface->version;
 	*features = iface->features;
 	*group_num = iface->group_num;
 	*prfcnt_size = iface->prfcnt_size;
+	*instr_features = iface->instr_features;
 
 	return copy_grp_and_stm(iface, group_data, max_group_num,
 		stream_data, max_total_stream_num);
@@ -1922,9 +2252,9 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
 	mutex_lock(&kbdev->csf.reg_lock);
 	ret = kbase_add_va_region_rbtree(kbdev, va_reg, 0, num_pages, 1);
 	va_reg->flags &= ~KBASE_REG_FREE;
-	mutex_unlock(&kbdev->csf.reg_lock);
 	if (ret)
 		goto va_region_add_error;
+	mutex_unlock(&kbdev->csf.reg_lock);
 
 	gpu_map_properties &= (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR);
 	gpu_map_properties |= gpu_map_prot;
@@ -1946,9 +2276,9 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
 mmu_insert_pages_error:
 	mutex_lock(&kbdev->csf.reg_lock);
 	kbase_remove_va_region(va_reg);
-	mutex_unlock(&kbdev->csf.reg_lock);
 va_region_add_error:
 	kbase_free_alloced_region(va_reg);
+	mutex_unlock(&kbdev->csf.reg_lock);
 va_region_alloc_error:
 	vunmap(cpu_addr);
 vmap_error:
@@ -1962,7 +2292,8 @@ page_list_alloc_error:
 	kfree(phys);
 out:
 	/* Zero-initialize the mapping to make sure that the termination
-	 * function doesn't try to unmap or free random addresses. */
+	 * function doesn't try to unmap or free random addresses.
+	 */
 	csf_mapping->phys = NULL;
 	csf_mapping->cpu_addr = NULL;
 	csf_mapping->va_reg = NULL;
@@ -1977,8 +2308,8 @@ void kbase_csf_firmware_mcu_shared_mapping_term(
 	if (csf_mapping->va_reg) {
 		mutex_lock(&kbdev->csf.reg_lock);
 		kbase_remove_va_region(csf_mapping->va_reg);
-		mutex_unlock(&kbdev->csf.reg_lock);
 		kbase_free_alloced_region(csf_mapping->va_reg);
+		mutex_unlock(&kbdev->csf.reg_lock);
 	}
 
 	if (csf_mapping->phys) {
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h
index 03a5217cffb0..768d42339caf 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,15 +17,13 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_CSF_FIRMWARE_H_
 #define _KBASE_CSF_FIRMWARE_H_
 
 #include "device/mali_kbase_device.h"
-#include "mali_gpu_csf_registers.h"
+#include <uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h>
 
 /*
  * PAGE_KERNEL_RO was only defined on 32bit ARM in 4.19 in:
@@ -71,14 +70,17 @@
 /* All implementations of the host interface with major version 0 must comply
  * with these restrictions:
  */
-/* GLB_GROUP_NUM: At least 3 command stream groups, but no more than 31 */
+/* GLB_GROUP_NUM: At least 3 CSGs, but no more than 31 */
 #define MIN_SUPPORTED_CSGS 3
 #define MAX_SUPPORTED_CSGS 31
-/* GROUP_STREAM_NUM: At least 8 command streams per CSG, but no more than 32 */
+/* GROUP_STREAM_NUM: At least 8 CSs per CSG, but no more than 32 */
 #define MIN_SUPPORTED_STREAMS_PER_GROUP 8
-/* Maximum command streams per csg. */
+/* Maximum CSs per csg. */
 #define MAX_SUPPORTED_STREAMS_PER_GROUP 32
 
+/* Waiting timeout for status change acknowledgment, in milliseconds */
+#define CSF_FIRMWARE_TIMEOUT_MS (800) /* Relaxed to 800ms from 100ms */
+
 struct kbase_device;
 
 
@@ -111,16 +113,15 @@ struct kbase_csf_trace_buffers {
 };
 
 /**
- * struct kbase_csf_cmd_stream_info - Command stream interface provided by the
- *                                    firmware.
+ * struct kbase_csf_cmd_stream_info - CSI provided by the firmware.
  *
  * @kbdev: Address of the instance of a GPU platform device that implements
  *         this interface.
- * @features: Bit field of command stream features (e.g. which types of jobs
+ * @features: Bit field of CS features (e.g. which types of jobs
  *            are supported). Bits 7:0 specify the number of work registers(-1).
  *            Bits 11:8 specify the number of scoreboard entries(-1).
- * @input: Address of command stream interface input page.
- * @output: Address of command stream interface output page.
+ * @input: Address of CSI input page.
+ * @output: Address of CSI output page.
  */
 struct kbase_csf_cmd_stream_info {
 	struct kbase_device *kbdev;
@@ -130,9 +131,9 @@ struct kbase_csf_cmd_stream_info {
 };
 
 /**
- * kbase_csf_firmware_cs_input() - Set a word in a command stream's input page
+ * kbase_csf_firmware_cs_input() - Set a word in a CS's input page
  *
- * @info: Command stream interface provided by the firmware.
+ * @info: CSI provided by the firmware.
  * @offset: Offset of the word to be written, in bytes.
  * @value: Value to be written.
  */
@@ -140,22 +141,20 @@ void kbase_csf_firmware_cs_input(
 	const struct kbase_csf_cmd_stream_info *info, u32 offset, u32 value);
 
 /**
- * kbase_csf_firmware_cs_input_read() - Read a word in a command stream's input
- *                                      page
+ * kbase_csf_firmware_cs_input_read() - Read a word in a CS's input page
  *
- * Return: Value of the word read from the command stream's input page.
+ * Return: Value of the word read from the CS's input page.
  *
- * @info: Command stream interface provided by the firmware.
+ * @info: CSI provided by the firmware.
  * @offset: Offset of the word to be read, in bytes.
  */
 u32 kbase_csf_firmware_cs_input_read(
 	const struct kbase_csf_cmd_stream_info *const info, const u32 offset);
 
 /**
- * kbase_csf_firmware_cs_input_mask() - Set part of a word in a command stream's
- *                                      input page
+ * kbase_csf_firmware_cs_input_mask() - Set part of a word in a CS's input page
  *
- * @info: Command stream interface provided by the firmware.
+ * @info: CSI provided by the firmware.
  * @offset: Offset of the word to be modified, in bytes.
  * @value: Value to be written.
  * @mask: Bitmask with the bits to be modified set.
@@ -165,19 +164,18 @@ void kbase_csf_firmware_cs_input_mask(
 	u32 value, u32 mask);
 
 /**
- * kbase_csf_firmware_cs_output() - Read a word in a command stream's output
- *                                  page
+ * kbase_csf_firmware_cs_output() - Read a word in a CS's output page
  *
- * Return: Value of the word read from the command stream's output page.
+ * Return: Value of the word read from the CS's output page.
  *
- * @info: Command stream interface provided by the firmware.
+ * @info: CSI provided by the firmware.
  * @offset: Offset of the word to be read, in bytes.
  */
 u32 kbase_csf_firmware_cs_output(
 	const struct kbase_csf_cmd_stream_info *info, u32 offset);
 /**
- * struct kbase_csf_cmd_stream_group_info - Command stream group interface
- *                                          provided by the firmware.
+ * struct kbase_csf_cmd_stream_group_info - CSG interface provided by the
+ *                                          firmware.
  *
  * @kbdev: Address of the instance of a GPU platform device that implements
  *         this interface.
@@ -185,14 +183,13 @@ u32 kbase_csf_firmware_cs_output(
  *            be ignored.
  * @input: Address of global interface input page.
  * @output: Address of global interface output page.
- * @suspend_size: Size in bytes for normal suspend buffer for the command
- *                stream group.
+ * @suspend_size: Size in bytes for normal suspend buffer for the CSG
  * @protm_suspend_size: Size in bytes for protected mode suspend buffer
- *                      for the command stream group.
- * @stream_num: Number of command streams in the command stream group.
+ *                      for the CSG.
+ * @stream_num: Number of CSs in the CSG.
  * @stream_stride: Stride in bytes in JASID0 virtual address between
- *                 command stream capability structures.
- * @streams: Address of an array of command stream capability structures.
+ *                 CS capability structures.
+ * @streams: Address of an array of CS capability structures.
  */
 struct kbase_csf_cmd_stream_group_info {
 	struct kbase_device *kbdev;
@@ -207,10 +204,9 @@ struct kbase_csf_cmd_stream_group_info {
 };
 
 /**
- * kbase_csf_firmware_csg_input() - Set a word in a command stream group's
- *                                  input page
+ * kbase_csf_firmware_csg_input() - Set a word in a CSG's input page
  *
- * @info: Command stream group interface provided by the firmware.
+ * @info: CSG interface provided by the firmware.
  * @offset: Offset of the word to be written, in bytes.
  * @value: Value to be written.
  */
@@ -219,22 +215,21 @@ void kbase_csf_firmware_csg_input(
 	u32 value);
 
 /**
- * kbase_csf_firmware_csg_input_read() - Read a word in a command stream group's
- *                                       input page
+ * kbase_csf_firmware_csg_input_read() - Read a word in a CSG's input page
  *
- * Return: Value of the word read from the command stream group's input page.
+ * Return: Value of the word read from the CSG's input page.
  *
- * @info: Command stream group interface provided by the firmware.
+ * @info: CSG interface provided by the firmware.
  * @offset: Offset of the word to be read, in bytes.
  */
 u32 kbase_csf_firmware_csg_input_read(
 	const struct kbase_csf_cmd_stream_group_info *info, u32 offset);
 
 /**
- * kbase_csf_firmware_csg_input_mask() - Set part of a word in a command stream
- *                                       group's input page
+ * kbase_csf_firmware_csg_input_mask() - Set part of a word in a CSG's
+ *                                       input page
  *
- * @info: Command stream group interface provided by the firmware.
+ * @info: CSG interface provided by the firmware.
  * @offset: Offset of the word to be modified, in bytes.
  * @value: Value to be written.
  * @mask: Bitmask with the bits to be modified set.
@@ -244,19 +239,18 @@ void kbase_csf_firmware_csg_input_mask(
 	u32 value, u32 mask);
 
 /**
- * kbase_csf_firmware_csg_output()- Read a word in a command stream group's
- *                                  output page
+ * kbase_csf_firmware_csg_output()- Read a word in a CSG's output page
  *
- * Return: Value of the word read from the command stream group's output page.
+ * Return: Value of the word read from the CSG's output page.
  *
- * @info: Command stream group interface provided by the firmware.
+ * @info: CSG interface provided by the firmware.
  * @offset: Offset of the word to be read, in bytes.
  */
 u32 kbase_csf_firmware_csg_output(
 	const struct kbase_csf_cmd_stream_group_info *info, u32 offset);
 
 /**
- * struct kbase_csf_global_iface - Global command stream front-end interface
+ * struct kbase_csf_global_iface - Global CSF interface
  *                                 provided by the firmware.
  *
  * @kbdev: Address of the instance of a GPU platform device that implements
@@ -268,11 +262,12 @@ u32 kbase_csf_firmware_csg_output(
  *            be suspended). Reserved bits should be 0, and should be ignored.
  * @input: Address of global interface input page.
  * @output: Address of global interface output page.
- * @group_num: Number of command stream groups supported.
+ * @group_num: Number of CSGs supported.
  * @group_stride: Stride in bytes in JASID0 virtual address between
- *                command stream group capability structures.
+ *                CSG capability structures.
  * @prfcnt_size: Performance counters size.
- * @groups: Address of an array of command stream group capability structures.
+ * @instr_features: Instrumentation features.
+ * @groups: Address of an array of CSG capability structures.
  */
 struct kbase_csf_global_iface {
 	struct kbase_device *kbdev;
@@ -283,13 +278,14 @@ struct kbase_csf_global_iface {
 	u32 group_num;
 	u32 group_stride;
 	u32 prfcnt_size;
+	u32 instr_features;
 	struct kbase_csf_cmd_stream_group_info *groups;
 };
 
 /**
  * kbase_csf_firmware_global_input() - Set a word in the global input page
  *
- * @iface: Command stream front-end interface provided by the firmware.
+ * @iface: CSF interface provided by the firmware.
  * @offset: Offset of the word to be written, in bytes.
  * @value: Value to be written.
  */
@@ -300,7 +296,7 @@ void kbase_csf_firmware_global_input(
  * kbase_csf_firmware_global_input_mask() - Set part of a word in the global
  *                                          input page
  *
- * @iface: Command stream front-end interface provided by the firmware.
+ * @iface: CSF interface provided by the firmware.
  * @offset: Offset of the word to be modified, in bytes.
  * @value: Value to be written.
  * @mask: Bitmask with the bits to be modified set.
@@ -314,7 +310,7 @@ void kbase_csf_firmware_global_input_mask(
  *
  * Return: Value of the word read from the global input page.
  *
- * @info: Command stream group interface provided by the firmware.
+ * @info: CSG interface provided by the firmware.
  * @offset: Offset of the word to be read, in bytes.
  */
 u32 kbase_csf_firmware_global_input_read(
@@ -325,7 +321,7 @@ u32 kbase_csf_firmware_global_input_read(
  *
  * Return: Value of the word read from the global output page.
  *
- * @iface: Command stream front-end interface provided by the firmware.
+ * @iface: CSF interface provided by the firmware.
  * @offset: Offset of the word to be read, in bytes.
  */
 u32 kbase_csf_firmware_global_output(
@@ -403,20 +399,28 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev);
 /**
  * kbase_csf_firmware_ping - Send the ping request to firmware.
  *
- * The function sends the ping request to firmware to confirm it is alive.
+ * The function sends the ping request to firmware.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ */
+void kbase_csf_firmware_ping(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_firmware_ping_wait - Send the ping request to firmware and waits.
+ *
+ * The function sends the ping request to firmware and waits to confirm it is
+ * alive.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  *
  * Return: 0 on success, or negative on failure.
  */
-int kbase_csf_firmware_ping(struct kbase_device *kbdev);
+int kbase_csf_firmware_ping_wait(struct kbase_device *kbdev);
 
 /**
  * kbase_csf_firmware_set_timeout - Set a hardware endpoint progress timeout.
  *
- * @kbdev:   Instance of a GPU platform device that implements a command
- *           stream front-end interface.
+ * @kbdev:   Instance of a GPU platform device that implements a CSF interface.
  * @timeout: The maximum number of GPU cycles that is allowed to elapse
  *           without forward progress before the driver terminates a GPU
  *           command queue group.
@@ -433,8 +437,7 @@ int kbase_csf_firmware_set_timeout(struct kbase_device *kbdev, u64 timeout);
  *                                  enter protected mode and wait for its
  *                                  completion.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  */
 void kbase_csf_enter_protected_mode(struct kbase_device *kbdev);
 
@@ -454,16 +457,14 @@ static inline bool kbase_csf_firmware_mcu_halted(struct kbase_device *kbdev)
  *                                       into a known internal state for warm
  *                                       boot later.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  */
 void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev);
 
 /**
  * kbase_csf_firmware_enable_mcu - Send the command to enable MCU
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  */
 static inline void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev)
 {
@@ -477,8 +478,7 @@ static inline void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev)
 /**
  * kbase_csf_firmware_disable_mcu - Send the command to disable MCU
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  */
 static inline void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev)
 {
@@ -489,8 +489,7 @@ static inline void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev)
  * kbase_csf_firmware_disable_mcu_wait - Wait for the MCU to reach disabled
  *                                       status.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  */
 void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev);
 
@@ -499,8 +498,7 @@ void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev);
  *                                 cold boot case firmware image would be
  *                                 reloaded from filesystem into memory.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  */
 void kbase_csf_firmware_trigger_reload(struct kbase_device *kbdev);
 
@@ -508,8 +506,7 @@ void kbase_csf_firmware_trigger_reload(struct kbase_device *kbdev);
  * kbase_csf_firmware_reload_completed - The reboot of MCU firmware has
  *                                       completed.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  */
 void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev);
 
@@ -517,10 +514,11 @@ void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev);
  * kbase_csf_firmware_global_reinit - Send the Global configuration requests
  *                                    after the reboot of MCU firmware.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @core_mask: Mask of the enabled shader cores.
  */
-void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev);
+void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev,
+				      u64 core_mask);
 
 /**
  * kbase_csf_firmware_global_reinit_complete - Check the Global configuration
@@ -529,45 +527,69 @@ void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev);
  *
  * Return: true if the Global configuration requests completed otherwise false.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  */
 bool kbase_csf_firmware_global_reinit_complete(struct kbase_device *kbdev);
 
+/**
+ * kbase_csf_firmware_update_core_attr - Send the Global configuration request
+ *                                       to update the requested core attribute
+ *                                       changes.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @update_core_pwroff_timer: If true, signal the firmware needs to update
+ *                            the MCU power-off timer value.
+ * @update_core_mask:         If true, need to do the core_mask update with
+ *                            the supplied core_mask value.
+ * @core_mask:                New core mask value if update_core_mask is true,
+ *                            otherwise unused.
+ */
+void kbase_csf_firmware_update_core_attr(struct kbase_device *kbdev,
+		bool update_core_pwroff_timer, bool update_core_mask, u64 core_mask);
+
+/**
+ * kbase_csf_firmware_core_attr_updated - Check the Global configuration
+ *                  request has completed or not, that was sent to update
+ *                  the core attributes.
+ *
+ * Return: true if the Global configuration request to update the core
+ *         attributes has completed, otherwise false.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ */
+bool kbase_csf_firmware_core_attr_updated(struct kbase_device *kbdev);
+
 /**
  * Request the global control block of CSF interface capabilities
  *
- * Return: Total number of command streams, summed across all groups.
+ * Return: Total number of CSs, summed across all groups.
  *
  * @kbdev:                 Kbase device.
  * @group_data:            Pointer where to store all the group data
  *                         (sequentially).
  * @max_group_num:         The maximum number of groups to be read.
  *                         Can be 0, in which case group_data is unused.
- * @stream_data:           Pointer where to store all the stream data
+ * @stream_data:           Pointer where to store all the CS data
  *                         (sequentially).
- * @max_total_stream_num:  The maximum number of streams to be read.
+ * @max_total_stream_num:  The maximum number of CSs to be read.
  *                         Can be 0, in which case stream_data is unused.
  * @glb_version:           Where to store the global interface version.
- *                         Bits 31:16 hold the major version number and
- *                         15:0 hold the minor version number.
- *                         A higher minor version is backwards-compatible
- *                         with a lower minor version for the same major
- *                         version.
  * @features:              Where to store a bit mask of features (e.g.
  *                         whether certain types of job can be suspended).
- * @group_num:             Where to store the number of command stream groups
+ * @group_num:             Where to store the number of CSGs
  *                         supported.
  * @prfcnt_size:           Where to store the size of CSF performance counters,
  *                         in bytes. Bits 31:16 hold the size of firmware
  *                         performance counter data and 15:0 hold the size of
  *                         hardware performance counter data.
+ * @instr_features:        Instrumentation features. Bits 7:4 hold the max size
+ *                         of events. Bits 3:0 hold the offset update rate.
  */
-u32 kbase_csf_firmware_get_glb_iface(struct kbase_device *kbdev,
-	struct basep_cs_group_control *group_data, u32 max_group_num,
-	struct basep_cs_stream_control *stream_data, u32 max_total_stream_num,
-	u32 *glb_version, u32 *features, u32 *group_num, u32 *prfcnt_size);
-
+u32 kbase_csf_firmware_get_glb_iface(
+	struct kbase_device *kbdev, struct basep_cs_group_control *group_data,
+	u32 max_group_num, struct basep_cs_stream_control *stream_data,
+	u32 max_total_stream_num, u32 *glb_version, u32 *features,
+	u32 *group_num, u32 *prfcnt_size, u32 *instr_features);
 
 /**
  * Get CSF firmware header timeline metadata content
@@ -660,4 +682,125 @@ static inline long kbase_csf_timeout_in_jiffies(const unsigned int msecs)
 #endif
 }
 
+/**
+ * kbase_csf_firmware_enable_gpu_idle_timer() - Activate the idle hysteresis
+ *                                              monitoring operation
+ *
+ * Program the firmware interface with its configured hysteresis count value
+ * and enable the firmware to act on it. The Caller is
+ * assumed to hold the kbdev->csf.scheduler.interrupt_lock.
+ *
+ * @kbdev: Kbase device structure
+ */
+void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_firmware_disable_gpu_idle_timer() - Disable the idle time
+ *                                             hysteresis monitoring operation
+ *
+ * Program the firmware interface to disable the idle hysteresis timer. The
+ * Caller is assumed to hold the kbdev->csf.scheduler.interrupt_lock.
+ *
+ * @kbdev: Kbase device structure
+ */
+void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_firmware_get_gpu_idle_hysteresis_time - Get the firmware GPU idle
+ *                                               detection hysteresis duration
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * Return: the internally recorded hysteresis (nominal) value.
+ */
+u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_firmware_set_gpu_idle_hysteresis_time - Set the firmware GPU idle
+ *                                               detection hysteresis duration
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @dur:     The duration value (unit: milliseconds) for the configuring
+ *           hysteresis field for GPU idle detection
+ *
+ * The supplied value will be recorded internally without any change. But the
+ * actual field value will be subject to hysteresis source frequency scaling
+ * and maximum value limiting. The default source will be SYSTEM_TIMESTAMP
+ * counter. But in case the platform is not able to supply it, the GPU
+ * CYCLE_COUNTER source will be used as an alternative. Bit-31 on the
+ * returned value is the source configuration flag, and it is set to '1'
+ * when CYCLE_COUNTER alternative source is used.
+ *
+ * Return: the actual internally configured hysteresis field value.
+ */
+u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur);
+
+/**
+ * kbase_csf_firmware_get_mcu_core_pwroff_time - Get the MCU core power-off
+ *                                               time value
+ *
+ * @kbdev:   Instance of a GPU platform device that implements a CSF interface.
+ *
+ * Return: the internally recorded MCU core power-off (nominal) value. The unit
+ *         of the value is in micro-seconds.
+ */
+u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_firmware_set_mcu_core_pwroff_time - Set the MCU core power-off
+ *                                               time value
+ *
+ * @kbdev:   Instance of a GPU platform device that implements a CSF interface.
+ * @dur:     The duration value (unit: micro-seconds) for configuring MCU
+ *           core power-off timer, when the shader cores' power
+ *           transitions are delegated to the MCU (normal operational
+ *           mode)
+ *
+ * The supplied value will be recorded internally without any change. But the
+ * actual field value will be subject to core power-off timer source frequency
+ * scaling and maximum value limiting. The default source will be
+ * SYSTEM_TIMESTAMP counter. But in case the platform is not able to supply it,
+ * the GPU CYCLE_COUNTER source will be used as an alternative. Bit-31 on the
+ * returned value is the source configuration flag, and it is set to '1'
+ * when CYCLE_COUNTER alternative source is used.
+ *
+ * The configured MCU core power-off timer will only have effect when the host
+ * driver has delegated the shader cores' power management to MCU.
+ *
+ * Return: the actual internal core power-off timer value in register defined
+ *         format.
+ */
+u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur);
+
+/**
+ * kbase_csf_interface_version - Helper function to build the full firmware
+ *                               interface version in a format compatible with
+ *                               with GLB_VERSION register
+ *
+ * @major:     major version of csf interface
+ * @minor:     minor version of csf interface
+ * @patch:     patch version of csf interface
+ *
+ * Return: firmware interface version
+ */
+static inline u32 kbase_csf_interface_version(u32 major, u32 minor, u32 patch)
+{
+	return ((major << GLB_VERSION_MAJOR_SHIFT) |
+		(minor << GLB_VERSION_MINOR_SHIFT) |
+		(patch << GLB_VERSION_PATCH_SHIFT));
+}
+
+/**
+ * kbase_csf_trigger_firmware_config_update - Send a firmware config update.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * Any changes done to firmware configuration entry or tracebuffer entry
+ * requires a GPU silent reset to reflect the configuration changes
+ * requested, but if Firmware.header.entry.bit(30) is set then we can request a
+ * FIRMWARE_CONFIG_UPDATE rather than doing a silent reset.
+ *
+ * Return: 0 if success, or negative error code on failure.
+ */
+int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev);
 #endif
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c
index d282d5ca7fc2..10bc4197f83a 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <mali_kbase.h>
@@ -41,6 +40,8 @@
  *               inside CSF_FIRMWARE_CFG_SYSFS_DIR_NAME directory,
  *               representing the configuration option @name.
  * @kobj_inited: kobject initialization state
+ * @updatable:   Indicates whether config items can be updated with
+ *               FIRMWARE_CONFIG_UPDATE
  * @name:        NUL-terminated string naming the option
  * @address:     The address in the firmware image of the configuration option
  * @min:         The lowest legal value of the configuration option
@@ -52,6 +53,7 @@ struct firmware_config {
 	struct kbase_device *kbdev;
 	struct kobject kobj;
 	bool kobj_inited;
+	bool updatable;
 	char *name;
 	u32 address;
 	u32 min;
@@ -142,14 +144,20 @@ static ssize_t store_fw_cfg(struct kobject *kobj,
 			return count;
 		}
 
-		/*
-		 * If there is already a GPU reset pending then inform
-		 * the User to retry the write.
+		/* If configuration update cannot be performed with
+		 * FIRMWARE_CONFIG_UPDATE then we need to do a
+		 * silent reset before we update the memory.
 		 */
-		if (kbase_reset_gpu_silent(kbdev)) {
-			spin_unlock_irqrestore(
-				&kbdev->hwaccess_lock, flags);
-			return -EAGAIN;
+		if (!config->updatable) {
+			/*
+			 * If there is already a GPU reset pending then inform
+			 * the User to retry the write.
+			 */
+			if (kbase_reset_gpu_silent(kbdev)) {
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						       flags);
+				return -EAGAIN;
+			}
 		}
 
 		/*
@@ -165,10 +173,21 @@ static ssize_t store_fw_cfg(struct kobject *kobj,
 			kbdev, config->address, val);
 
 		config->cur_val = val;
+
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
+		/* If we can update the config without firmware reset then
+		 * we need to just trigger FIRMWARE_CONFIG_UPDATE.
+		 */
+		if (config->updatable) {
+			ret = kbase_csf_trigger_firmware_config_update(kbdev);
+			if (ret)
+				return ret;
+		}
+
 		/* Wait for the config update to take effect */
-		kbase_reset_gpu_wait(kbdev);
+		if (!config->updatable)
+			kbase_reset_gpu_wait(kbdev);
 	} else {
 		dev_warn(kbdev->dev,
 			"Unexpected write to entry %s/%s",
@@ -254,8 +273,9 @@ void kbase_csf_firmware_cfg_term(struct kbase_device *kbdev)
 }
 
 int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev,
-		const struct firmware *fw,
-		const u32 *entry, unsigned int size)
+					      const struct firmware *fw,
+					      const u32 *entry,
+					      unsigned int size, bool updatable)
 {
 	const char *name = (char *)&entry[3];
 	struct firmware_config *config;
@@ -270,6 +290,7 @@ int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev,
 		return -ENOMEM;
 
 	config->kbdev = kbdev;
+	config->updatable = updatable;
 	config->name = (char *)(config+1);
 	config->address = entry[0];
 	config->min = entry[1];
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.h
index ab4b6ebc5296..36883abad2eb 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_CSF_FIRMWARE_CFG_H_
@@ -61,12 +60,15 @@ void kbase_csf_firmware_cfg_term(struct kbase_device *kbdev);
  *
  * Return: 0 if successful, negative error code on failure
  *
- * @kbdev: Kbase device structure
- * @fw:    Firmware image containing the section
- * @entry: Pointer to the section
- * @size:  Size (in bytes) of the section
+ * @kbdev:     Kbase device structure
+ * @fw:        Firmware image containing the section
+ * @entry:     Pointer to the section
+ * @size:      Size (in bytes) of the section
+ * @updatable: Indicates if entry can be updated with FIRMWARE_CONFIG_UPDATE
  */
 int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev,
-		const struct firmware *fw,
-		const u32 *entry, unsigned int size);
+					      const struct firmware *fw,
+					      const u32 *entry,
+					      unsigned int size,
+					      bool updatable);
 #endif /* _KBASE_CSF_FIRMWARE_CFG_H_ */
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c
index 7401113c5d6a..311e3bba6f43 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include "mali_kbase.h"
@@ -26,19 +25,23 @@
 #include "mali_kbase_csf_timeout.h"
 #include "mali_kbase_mem.h"
 #include "mali_kbase_reset_gpu.h"
+#include "mali_kbase_ctx_sched.h"
 #include "device/mali_kbase_device.h"
 #include "backend/gpu/mali_kbase_pm_internal.h"
 #include "mali_kbase_csf_scheduler.h"
 #include "mmu/mali_kbase_mmu.h"
+#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
 
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/firmware.h>
 #include <linux/mman.h>
 #include <linux/string.h>
+#include <linux/mutex.h>
 #if (KERNEL_VERSION(4, 13, 0) <= LINUX_VERSION_CODE)
 #include <linux/set_memory.h>
 #endif
+#include <asm/arch_timer.h>
 
 #ifdef CONFIG_MALI_BIFROST_DEBUG
 /* Makes Driver wait indefinitely for an acknowledgment for the different
@@ -56,7 +59,7 @@ MODULE_PARM_DESC(fw_debug,
 #define DUMMY_FW_PAGE_SIZE SZ_4K
 
 /**
- * struct dummy_firmware_csi - Represents a dummy interface for MCU firmware streams
+ * struct dummy_firmware_csi - Represents a dummy interface for MCU firmware CSs
  *
  * @cs_kernel_input:  CS kernel input memory region
  * @cs_kernel_output: CS kernel output memory region
@@ -67,7 +70,7 @@ struct dummy_firmware_csi {
 };
 
 /**
- * struct dummy_firmware_csg - Represents a dummy interface for MCU firmware stream groups
+ * struct dummy_firmware_csg - Represents a dummy interface for MCU firmware CSGs
  *
  * @csg_input:  CSG kernel input memory region
  * @csg_output: CSG kernel output memory region
@@ -95,8 +98,9 @@ struct dummy_firmware_interface {
 	struct list_head node;
 } dummy_firmware_interface;
 
-#define CSF_GLB_REQ_CFG_MASK \
-	(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK)
+#define CSF_GLB_REQ_CFG_MASK                                                   \
+	(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK |         \
+	 GLB_REQ_CFG_PWROFF_TIMER_MASK)
 
 static inline u32 input_page_read(const u32 *const input, const u32 offset)
 {
@@ -233,6 +237,9 @@ static int invent_capabilities(struct kbase_device *kbdev)
 	iface->kbdev = kbdev;
 	iface->features = 0;
 	iface->prfcnt_size = 64;
+	iface->instr_features =
+		0x81; /* update rate=1, max event size = 1<<8 = 256 */
+
 	iface->group_num = ARRAY_SIZE(interface->csg);
 	iface->group_stride = 0;
 
@@ -416,6 +423,69 @@ u32 kbase_csf_firmware_global_output(
 	return val;
 }
 
+/**
+ * handle_internal_firmware_fatal - Handler for CS internal firmware fault.
+ *
+ * @kbdev:  Pointer to kbase device
+ *
+ * Report group fatal error to user space for all GPU command queue groups
+ * in the device, terminate them and reset GPU.
+ */
+static void handle_internal_firmware_fatal(struct kbase_device *const kbdev)
+{
+	int as;
+
+	for (as = 0; as < kbdev->nr_hw_address_spaces; as++) {
+		unsigned long flags;
+		struct kbase_context *kctx;
+		struct kbase_fault fault;
+
+		if (as == MCU_AS_NR)
+			continue;
+
+		/* Only handle the fault for an active address space. Lock is
+		 * taken here to atomically get reference to context in an
+		 * active address space and retain its refcount.
+		 */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kctx = kbase_ctx_sched_as_to_ctx_nolock(kbdev, as);
+
+		if (kctx) {
+			kbase_ctx_sched_retain_ctx_refcount(kctx);
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		} else {
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			continue;
+		}
+
+		fault = (struct kbase_fault) {
+			.status = GPU_EXCEPTION_TYPE_SW_FAULT_1,
+		};
+
+		kbase_csf_ctx_handle_fault(kctx, &fault);
+		kbase_ctx_sched_release_ctx_lock(kctx);
+	}
+
+	if (kbase_prepare_to_reset_gpu(kbdev,
+				       RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
+		kbase_reset_gpu(kbdev);
+}
+
+/**
+ * firmware_error_worker - Worker function for handling firmware internal error
+ *
+ * @data: Pointer to a work_struct embedded in kbase device.
+ *
+ * Handle the CS internal firmware error
+ */
+static void firmware_error_worker(struct work_struct *const data)
+{
+	struct kbase_device *const kbdev =
+		container_of(data, struct kbase_device, csf.fw_error_work);
+
+	handle_internal_firmware_fatal(kbdev);
+}
+
 static bool global_request_complete(struct kbase_device *const kbdev,
 				    u32 const req_mask)
 {
@@ -441,7 +511,7 @@ static int wait_for_global_request(struct kbase_device *const kbdev,
 				   u32 const req_mask)
 {
 	const long wait_timeout =
-		kbase_csf_timeout_in_jiffies(GLB_REQ_WAIT_TIMEOUT_MS);
+		kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
 	long remaining;
 	int err = 0;
 
@@ -464,7 +534,7 @@ static void set_global_request(
 {
 	u32 glb_req;
 
-	lockdep_assert_held(&global_iface->kbdev->csf.reg_lock);
+	kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev);
 
 	glb_req = kbase_csf_firmware_global_output(global_iface, GLB_ACK);
 	glb_req ^= req_mask;
@@ -484,6 +554,26 @@ static void enable_endpoints_global(
 	set_global_request(global_iface, GLB_REQ_CFG_ALLOC_EN_MASK);
 }
 
+static void enable_shader_poweroff_timer(struct kbase_device *const kbdev,
+		const struct kbase_csf_global_iface *const global_iface)
+{
+	u32 pwroff_reg;
+
+	if (kbdev->csf.firmware_hctl_core_pwr)
+		pwroff_reg =
+		    GLB_PWROFF_TIMER_TIMER_SOURCE_SET(DISABLE_GLB_PWROFF_TIMER,
+			GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP);
+	else
+		pwroff_reg = kbdev->csf.mcu_core_pwroff_dur_count;
+
+	kbase_csf_firmware_global_input(global_iface, GLB_PWROFF_TIMER,
+					pwroff_reg);
+	set_global_request(global_iface, GLB_REQ_CFG_PWROFF_TIMER_MASK);
+
+	/* Save the programed reg value in its shadow field */
+	kbdev->csf.mcu_core_pwroff_reg_shadow = pwroff_reg;
+}
+
 static void set_timeout_global(
 	const struct kbase_csf_global_iface *const global_iface,
 	u64 const timeout)
@@ -494,13 +584,16 @@ static void set_timeout_global(
 	set_global_request(global_iface, GLB_REQ_CFG_PROGRESS_TIMER_MASK);
 }
 
-static void global_init(struct kbase_device *const kbdev, u32 req_mask)
+static void global_init(struct kbase_device *const kbdev, u64 core_mask)
 {
-	u32 const ack_irq_mask = GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK  |
-			GLB_ACK_IRQ_MASK_PING_MASK |
-			GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK |
-			GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
-			GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK;
+	u32 const ack_irq_mask = GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK |
+				 GLB_ACK_IRQ_MASK_PING_MASK |
+				 GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK |
+				 GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
+				 GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
+				 GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK |
+				 GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK |
+				 GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK;
 
 	const struct kbase_csf_global_iface *const global_iface =
 		&kbdev->csf.global_iface;
@@ -508,9 +601,9 @@ static void global_init(struct kbase_device *const kbdev, u32 req_mask)
 
 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
 
-	/* Enable endpoints on all present shader cores */
-	enable_endpoints_global(global_iface,
-		kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));
+	/* Update shader core allocation enable mask */
+	enable_endpoints_global(global_iface, core_mask);
+	enable_shader_poweroff_timer(kbdev, global_iface);
 
 	set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev));
 
@@ -526,8 +619,7 @@ static void global_init(struct kbase_device *const kbdev, u32 req_mask)
 /**
  * global_init_on_boot - Sends a global request to control various features.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  *
  * Currently only the request to enable endpoints and cycle counter is sent.
  *
@@ -535,19 +627,29 @@ static void global_init(struct kbase_device *const kbdev, u32 req_mask)
  */
 static int global_init_on_boot(struct kbase_device *const kbdev)
 {
-	u32 const req_mask = CSF_GLB_REQ_CFG_MASK;
+	unsigned long flags;
+	u64 core_mask;
 
-	global_init(kbdev, req_mask);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	core_mask = kbase_pm_ca_get_core_mask(kbdev);
+	kbdev->csf.firmware_hctl_core_pwr =
+				kbase_pm_no_mcu_core_pwroff(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
-	return wait_for_global_request(kbdev, req_mask);
+	global_init(kbdev, core_mask);
+
+	return wait_for_global_request(kbdev, CSF_GLB_REQ_CFG_MASK);
 }
 
-void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev)
+void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev,
+				      u64 core_mask)
 {
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
 	kbdev->csf.glb_init_request_pending = true;
-	global_init(kbdev, CSF_GLB_REQ_CFG_MASK);
+	kbdev->csf.firmware_hctl_core_pwr =
+				kbase_pm_no_mcu_core_pwroff(kbdev);
+	global_init(kbdev, core_mask);
 }
 
 bool kbase_csf_firmware_global_reinit_complete(struct kbase_device *kbdev)
@@ -561,6 +663,31 @@ bool kbase_csf_firmware_global_reinit_complete(struct kbase_device *kbdev)
 	return !kbdev->csf.glb_init_request_pending;
 }
 
+void kbase_csf_firmware_update_core_attr(struct kbase_device *kbdev,
+		bool update_core_pwroff_timer, bool update_core_mask, u64 core_mask)
+{
+	unsigned long flags;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	if (update_core_mask)
+		enable_endpoints_global(&kbdev->csf.global_iface, core_mask);
+	if (update_core_pwroff_timer)
+		enable_shader_poweroff_timer(kbdev, &kbdev->csf.global_iface);
+
+	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+}
+
+bool kbase_csf_firmware_core_attr_updated(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	return global_request_complete(kbdev, GLB_REQ_CFG_ALLOC_EN_MASK |
+					      GLB_REQ_CFG_PWROFF_TIMER_MASK);
+}
+
 static void kbase_csf_firmware_reload_worker(struct work_struct *work)
 {
 	struct kbase_device *kbdev = container_of(work, struct kbase_device,
@@ -604,6 +731,129 @@ void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev)
 	kbase_pm_update_state(kbdev);
 }
 
+static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_ms)
+{
+#define HYSTERESIS_VAL_UNIT_SHIFT (10)
+	/* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */
+	u64 freq = arch_timer_get_cntfrq();
+	u64 dur_val = dur_ms;
+	u32 cnt_val_u32, reg_val_u32;
+	bool src_system_timestamp = freq > 0;
+
+	if (!src_system_timestamp) {
+		/* Get the cycle_counter source alternative */
+		spin_lock(&kbdev->pm.clk_rtm.lock);
+		if (kbdev->pm.clk_rtm.clks[0])
+			freq = kbdev->pm.clk_rtm.clks[0]->clock_val;
+		else
+			dev_warn(kbdev->dev, "No GPU clock, unexpected intregration issue!");
+		spin_unlock(&kbdev->pm.clk_rtm.lock);
+
+		dev_info(kbdev->dev, "Can't get the timestamp frequency, "
+			 "use cycle counter format with firmware idle hysteresis!");
+	}
+
+	/* Formula for dur_val = ((dur_ms/1000) * freq_HZ) >> 10) */
+	dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT;
+	dur_val = div_u64(dur_val, 1000);
+
+	/* Interface limits the value field to S32_MAX */
+	cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val;
+
+	reg_val_u32 = GLB_IDLE_TIMER_TIMEOUT_SET(0, cnt_val_u32);
+	/* add the source flag */
+	if (src_system_timestamp)
+		reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32,
+				GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP);
+	else
+		reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32,
+				GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER);
+
+	return reg_val_u32;
+}
+
+u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev)
+{
+	return kbdev->csf.gpu_idle_hysteresis_ms;
+}
+
+u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur)
+{
+	unsigned long flags;
+	const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur);
+
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	kbdev->csf.gpu_idle_hysteresis_ms = dur;
+	kbdev->csf.gpu_idle_dur_count = hysteresis_val;
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+	dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x",
+		hysteresis_val);
+
+	return hysteresis_val;
+}
+
+static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us)
+{
+#define PWROFF_VAL_UNIT_SHIFT (10)
+	/* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */
+	u64 freq = arch_timer_get_cntfrq();
+	u64 dur_val = dur_us;
+	u32 cnt_val_u32, reg_val_u32;
+	bool src_system_timestamp = freq > 0;
+
+	if (!src_system_timestamp) {
+		/* Get the cycle_counter source alternative */
+		spin_lock(&kbdev->pm.clk_rtm.lock);
+		if (kbdev->pm.clk_rtm.clks[0])
+			freq = kbdev->pm.clk_rtm.clks[0]->clock_val;
+		else
+			dev_warn(kbdev->dev, "No GPU clock, unexpected integration issue!");
+		spin_unlock(&kbdev->pm.clk_rtm.lock);
+
+		dev_info(kbdev->dev, "Can't get the timestamp frequency, "
+			 "use cycle counter with MCU Core Poweroff timer!");
+	}
+
+	/* Formula for dur_val = ((dur_us/1e6) * freq_HZ) >> 10) */
+	dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT;
+	dur_val = div_u64(dur_val, 1000000);
+
+	/* Interface limits the value field to S32_MAX */
+	cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val;
+
+	reg_val_u32 = GLB_PWROFF_TIMER_TIMEOUT_SET(0, cnt_val_u32);
+	/* add the source flag */
+	if (src_system_timestamp)
+		reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val_u32,
+				GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP);
+	else
+		reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val_u32,
+				GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER);
+
+	return reg_val_u32;
+}
+
+u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev)
+{
+	return kbdev->csf.mcu_core_pwroff_dur_us;
+}
+
+u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur)
+{
+	unsigned long flags;
+	const u32 pwroff = convert_dur_to_core_pwroff_count(kbdev, dur);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->csf.mcu_core_pwroff_dur_us = dur;
+	kbdev->csf.mcu_core_pwroff_dur_count = pwroff;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	dev_dbg(kbdev->dev, "MCU Core Poweroff input update: 0x%.8x", pwroff);
+
+	return pwroff;
+}
+
 int kbase_csf_firmware_init(struct kbase_device *kbdev)
 {
 	int ret;
@@ -623,15 +873,21 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
 
 	init_waitqueue_head(&kbdev->csf.event_wait);
 	kbdev->csf.interrupt_received = false;
+	kbdev->csf.fw_timeout_ms = CSF_FIRMWARE_TIMEOUT_MS;
 
 	INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces);
 	INIT_LIST_HEAD(&kbdev->csf.firmware_config);
 	INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list);
 	INIT_WORK(&kbdev->csf.firmware_reload_work,
 		  kbase_csf_firmware_reload_worker);
+	INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker);
 
 	mutex_init(&kbdev->csf.reg_lock);
 
+	kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
+	kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(kbdev,
+						FIRMWARE_IDLE_HYSTERESIS_TIME_MS);
+
 	ret = kbase_mcu_shared_interface_region_tracker_init(kbdev);
 	if (ret != 0) {
 		dev_err(kbdev->dev, "Failed to setup the rb tree for managing shared interface segment\n");
@@ -659,6 +915,10 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
 	if (ret != 0)
 		goto error;
 
+	ret = kbase_csf_setup_dummy_user_reg_page(kbdev);
+	if (ret != 0)
+		goto error;
+
 	ret = kbase_csf_scheduler_init(kbdev);
 	if (ret != 0)
 		goto error;
@@ -680,6 +940,8 @@ error:
 
 void kbase_csf_firmware_term(struct kbase_device *kbdev)
 {
+	cancel_work_sync(&kbdev->csf.fw_error_work);
+
 	kbase_csf_timeout_term(kbdev);
 
 	/* NO_MALI: Don't stop firmware or unload MMU tables */
@@ -688,6 +950,8 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
 
 	kbase_csf_scheduler_term(kbdev);
 
+	kbase_csf_free_dummy_user_reg_page(kbdev);
+
 	kbase_csf_doorbell_mapping_term(kbdev);
 
 	free_global_iface(kbdev);
@@ -721,7 +985,51 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
 	kbase_mcu_shared_interface_region_tracker_term(kbdev);
 }
 
-int kbase_csf_firmware_ping(struct kbase_device *const kbdev)
+void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev)
+{
+	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+	u32 glb_req;
+
+	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
+	/* The scheduler is assumed to only call the enable when its internal
+	 * state indicates that the idle timer has previously been disabled. So
+	 * on entry the expected field values are:
+	 *   1. GLOBAL_INPUT_BLOCK.GLB_REQ.IDLE_ENABLE: 0
+	 *   2. GLOBAL_OUTPUT_BLOCK.GLB_ACK.IDLE_ENABLE: 0, or, on 1 -> 0
+	 */
+
+	glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
+	if (glb_req & GLB_REQ_IDLE_ENABLE_MASK)
+		dev_err(kbdev->dev, "Incoherent scheduler state on REQ_IDLE_ENABLE!");
+
+	kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER,
+					kbdev->csf.gpu_idle_dur_count);
+
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ,
+				GLB_REQ_REQ_IDLE_ENABLE, GLB_REQ_IDLE_ENABLE_MASK);
+
+	dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x",
+		kbdev->csf.gpu_idle_dur_count);
+	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+}
+
+void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev)
+{
+	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+
+	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ,
+					GLB_REQ_REQ_IDLE_DISABLE,
+					GLB_REQ_IDLE_DISABLE_MASK);
+
+	dev_dbg(kbdev->dev, "Sending request to disable gpu idle timer");
+
+	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+}
+
+void kbase_csf_firmware_ping(struct kbase_device *const kbdev)
 {
 	const struct kbase_csf_global_iface *const global_iface =
 		&kbdev->csf.global_iface;
@@ -731,7 +1039,11 @@ int kbase_csf_firmware_ping(struct kbase_device *const kbdev)
 	set_global_request(global_iface, GLB_REQ_PING_MASK);
 	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+}
 
+int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev)
+{
+	kbase_csf_firmware_ping(kbdev);
 	return wait_for_global_request(kbdev, GLB_REQ_PING_MASK);
 }
 
@@ -763,13 +1075,9 @@ void kbase_csf_enter_protected_mode(struct kbase_device *kbdev)
 {
 	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
 	unsigned long flags;
-	unsigned int value;
 
 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
-	value = kbase_csf_firmware_global_output(global_iface, GLB_ACK);
-	value ^= GLB_REQ_PROTM_ENTER_MASK;
-	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, value,
-					     GLB_REQ_PROTM_ENTER_MASK);
+	set_global_request(global_iface, GLB_REQ_PROTM_ENTER_MASK);
 	dev_dbg(kbdev->dev, "Sending request to enter protected mode");
 	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
@@ -781,22 +1089,41 @@ void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev)
 {
 	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
 	unsigned long flags;
-	unsigned int value;
 
 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
-	value = kbase_csf_firmware_global_output(global_iface, GLB_ACK);
-	value ^= GLB_REQ_HALT_MASK;
-	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, value,
-					     GLB_REQ_HALT_MASK);
+	set_global_request(global_iface, GLB_REQ_HALT_MASK);
 	dev_dbg(kbdev->dev, "Sending request to HALT MCU");
 	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
 }
 
+int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev)
+{
+	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+	unsigned long flags;
+	int err = 0;
+
+	/* The 'reg_lock' is also taken and is held till the update is
+	 * complete, to ensure the config update gets serialized.
+	 */
+	mutex_lock(&kbdev->csf.reg_lock);
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+
+	set_global_request(global_iface, GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK);
+	dev_dbg(kbdev->dev, "Sending request for FIRMWARE_CONFIG_UPDATE");
+	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+	err = wait_for_global_request(kbdev,
+				      GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK);
+	mutex_unlock(&kbdev->csf.reg_lock);
+	return err;
+}
+
 /**
- * copy_grp_and_stm - Copy command stream and/or group data
+ * copy_grp_and_stm - Copy CS and/or group data
  *
- * @iface:                Global command stream front-end interface provided by
+ * @iface:                Global CSF interface provided by
  *                        the firmware.
  * @group_data:           Pointer where to store all the group data
  *                        (sequentially).
@@ -807,7 +1134,7 @@ void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev)
  * @max_total_stream_num: The maximum number of streams to be read.
  *                        Can be 0, in which case stream_data is unused.
  *
- * Return: Total number of command streams, summed across all groups.
+ * Return: Total number of CSs, summed across all groups.
  */
 static u32 copy_grp_and_stm(
 	const struct kbase_csf_global_iface * const iface,
@@ -830,6 +1157,8 @@ static u32 copy_grp_and_stm(
 		if (i < max_group_num) {
 			group_data[i].features = iface->groups[i].features;
 			group_data[i].stream_num = iface->groups[i].stream_num;
+			group_data[i].suspend_size =
+				iface->groups[i].suspend_size;
 		}
 		for (j = 0; j < iface->groups[i].stream_num; j++) {
 			if (total_stream_num < max_total_stream_num)
@@ -842,26 +1171,28 @@ static u32 copy_grp_and_stm(
 	return total_stream_num;
 }
 
-u32 kbase_csf_firmware_get_glb_iface(struct kbase_device *kbdev,
+u32 kbase_csf_firmware_get_glb_iface(
+	struct kbase_device *kbdev,
 	struct basep_cs_group_control *const group_data,
 	u32 const max_group_num,
 	struct basep_cs_stream_control *const stream_data,
 	u32 const max_total_stream_num, u32 *const glb_version,
-	u32 *const features, u32 *const group_num, u32 *const prfcnt_size)
+	u32 *const features, u32 *const group_num, u32 *const prfcnt_size,
+	u32 *const instr_features)
 {
 	const struct kbase_csf_global_iface * const iface =
 		&kbdev->csf.global_iface;
 
-	if (WARN_ON(!glb_version) ||
-		WARN_ON(!features) ||
-		WARN_ON(!group_num) ||
-		WARN_ON(!prfcnt_size))
+	if (WARN_ON(!glb_version) || WARN_ON(!features) ||
+	    WARN_ON(!group_num) || WARN_ON(!prfcnt_size) ||
+	    WARN_ON(!instr_features))
 		return 0;
 
 	*glb_version = iface->version;
 	*features = iface->features;
 	*group_num = iface->group_num;
 	*prfcnt_size = iface->prfcnt_size;
+	*instr_features = iface->instr_features;
 
 	return copy_grp_and_stm(iface, group_data, max_group_num,
 		stream_data, max_total_stream_num);
@@ -941,9 +1272,9 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
 	mutex_lock(&kbdev->csf.reg_lock);
 	ret = kbase_add_va_region_rbtree(kbdev, va_reg, 0, num_pages, 1);
 	va_reg->flags &= ~KBASE_REG_FREE;
-	mutex_unlock(&kbdev->csf.reg_lock);
 	if (ret)
 		goto va_region_add_error;
+	mutex_unlock(&kbdev->csf.reg_lock);
 
 	gpu_map_properties &= (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR);
 	gpu_map_properties |= gpu_map_prot;
@@ -965,9 +1296,9 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
 mmu_insert_pages_error:
 	mutex_lock(&kbdev->csf.reg_lock);
 	kbase_remove_va_region(va_reg);
-	mutex_unlock(&kbdev->csf.reg_lock);
 va_region_add_error:
 	kbase_free_alloced_region(va_reg);
+	mutex_unlock(&kbdev->csf.reg_lock);
 va_region_alloc_error:
 	vunmap(cpu_addr);
 vmap_error:
@@ -981,7 +1312,8 @@ page_list_alloc_error:
 	kfree(phys);
 out:
 	/* Zero-initialize the mapping to make sure that the termination
-	 * function doesn't try to unmap or free random addresses. */
+	 * function doesn't try to unmap or free random addresses.
+	 */
 	csf_mapping->phys = NULL;
 	csf_mapping->cpu_addr = NULL;
 	csf_mapping->va_reg = NULL;
@@ -996,8 +1328,8 @@ void kbase_csf_firmware_mcu_shared_mapping_term(
 	if (csf_mapping->va_reg) {
 		mutex_lock(&kbdev->csf.reg_lock);
 		kbase_remove_va_region(csf_mapping->va_reg);
-		mutex_unlock(&kbdev->csf.reg_lock);
 		kbase_free_alloced_region(csf_mapping->va_reg);
+		mutex_unlock(&kbdev->csf.reg_lock);
 	}
 
 	if (csf_mapping->phys) {
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c
index 087cc858c2b8..6c6d181d27e1 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <mali_kbase.h>
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.h
index f71ea01ed8c0..a39ee92cad24 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <mali_kbase.h>
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c
index e1263d535918..1203d2c40807 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <mali_kbase.h>
@@ -228,15 +227,6 @@ static int kbase_kcpu_jit_allocate_process(
 
 	/* Now start the allocation loop */
 	for (i = 0, info = alloc_info->info; i < count; i++, info++) {
-		if (kctx->jit_alloc[info->id]) {
-			/* The JIT ID is duplicated in this command. Roll back
-			 * previous allocations and fail.
-			 */
-			dev_warn(kctx->kbdev->dev, "JIT ID is duplicated\n");
-			ret = -EINVAL;
-			goto fail;
-		}
-
 		/* Create a JIT allocation */
 		reg = kbase_jit_allocate(kctx, info, true);
 		if (!reg) {
@@ -251,7 +241,7 @@ static int kbase_kcpu_jit_allocate_process(
 					u8 const*const free_ids = jit_cmd->info.jit_free.ids;
 
 					if (free_ids && *free_ids && kctx->jit_alloc[*free_ids]) {
-						/**
+						/*
 						 * A JIT free which is active
 						 * and submitted before this
 						 * command.
@@ -263,11 +253,11 @@ static int kbase_kcpu_jit_allocate_process(
 			}
 
 			if (!can_block) {
-				/**
+				/*
 				 * No prior JIT_FREE command is active. Roll
 				 * back previous allocations and fail.
 				 */
-				dev_warn_ratelimited(kctx->kbdev->dev, "JIT alloc command failed: %p\n", cmd);
+				dev_warn_ratelimited(kctx->kbdev->dev, "JIT alloc command failed: %pK\n", cmd);
 				ret = -ENOMEM;
 				goto fail;
 			}
@@ -368,6 +358,18 @@ static int kbase_kcpu_jit_allocate_prepare(
 			goto out_free;
 	}
 
+	/* Search for duplicate JIT ids */
+	for (i = 0; i < (count - 1); i++) {
+		u32 j;
+
+		for (j = (i + 1); j < count; j++) {
+			if (info[i].id == info[j].id) {
+				ret = -EINVAL;
+				goto out_free;
+			}
+		}
+	}
+
 	current_command->type = BASE_KCPU_COMMAND_TYPE_JIT_ALLOC;
 	list_add_tail(&current_command->info.jit_alloc.node,
 			&kctx->csf.kcpu_queues.jit_cmds_head);
@@ -397,7 +399,7 @@ static void kbase_kcpu_jit_allocate_finish(
 	/* Remove this command from the jit_cmds_head list */
 	list_del(&cmd->info.jit_alloc.node);
 
-	/**
+	/*
 	 * If we get to this point we must have already cleared the blocked
 	 * flag, otherwise it'd be a bug.
 	 */
@@ -420,7 +422,7 @@ static void kbase_kcpu_jit_retry_pending_allocs(struct kbase_context *kctx)
 
 	lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
 
-	/**
+	/*
 	 * Reschedule all queues blocked by JIT_ALLOC commands.
 	 * NOTE: This code traverses the list of blocked queues directly. It
 	 * only works as long as the queued works are not executed at the same
@@ -432,47 +434,63 @@ static void kbase_kcpu_jit_retry_pending_allocs(struct kbase_context *kctx)
 		queue_work(kctx->csf.kcpu_queues.wq, &blocked_queue->work);
 }
 
-static int kbase_kcpu_jit_free_process(struct kbase_context *kctx,
-		struct kbase_kcpu_command *const cmd)
+static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue,
+				       struct kbase_kcpu_command *const cmd)
 {
-	struct kbase_kcpu_command_jit_free_info *const free_info =
-			&cmd->info.jit_free;
-	u8 *ids = free_info->ids;
-	u32 count = free_info->count;
+	struct kbase_kcpu_command_jit_free_info const *const free_info =
+		&cmd->info.jit_free;
+	u8 const *const ids = free_info->ids;
+	u32 const count = free_info->count;
 	u32 i;
+	int rc = 0;
+	struct kbase_context *kctx = queue->kctx;
 
 	if (WARN_ON(!ids))
 		return -EINVAL;
 
 	lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
 
-	for (i = 0; i < count; i++, ids++) {
-		if ((*ids == 0) || (kctx->jit_alloc[*ids] == NULL)) {
-			dev_warn(kctx->kbdev->dev, "invalid JIT free ID\n");
-		} else {
-			/* If the ID is valid but the allocation request
-			 * failed, still succeed this command but don't
-			 * try and free the allocation.
-			 */
-			if (kctx->jit_alloc[*ids] !=
-					KBASE_RESERVED_REG_JIT_ALLOC)
-				kbase_jit_free(kctx, kctx->jit_alloc[*ids]);
+	KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END(
+		queue->kctx->kbdev, queue);
 
-			kctx->jit_alloc[*ids] = NULL;
+	for (i = 0; i < count; i++) {
+		u64 pages_used = 0;
+		int item_err = 0;
+
+		if (!kctx->jit_alloc[ids[i]]) {
+			dev_warn(kctx->kbdev->dev, "invalid JIT free ID\n");
+			rc = -EINVAL;
+			item_err = rc;
+		} else {
+			struct kbase_va_region *const reg = kctx->jit_alloc[ids[i]];
+
+			/*
+			 * If the ID is valid but the allocation request failed, still
+			 * succeed this command but don't try and free the allocation.
+			 */
+			if (reg != KBASE_RESERVED_REG_JIT_ALLOC) {
+				pages_used = reg->gpu_alloc->nents;
+				kbase_jit_free(kctx, reg);
+			}
+
+			kctx->jit_alloc[ids[i]] = NULL;
 		}
+
+		KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END(
+			queue->kctx->kbdev, queue, item_err, pages_used);
 	}
 
 	/* Free the list of ids */
-	kfree(free_info->ids);
+	kfree(ids);
 
-	/**
+	/*
 	 * Remove this command from the jit_cmds_head list and retry pending
 	 * allocations.
 	 */
 	list_del(&cmd->info.jit_free.node);
 	kbase_kcpu_jit_retry_pending_allocs(kctx);
 
-	return 0;
+	return rc;
 }
 
 static int kbase_kcpu_jit_free_prepare(
@@ -520,6 +538,18 @@ static int kbase_kcpu_jit_free_prepare(
 		}
 	}
 
+	/* Search for duplicate JIT ids */
+	for (i = 0; i < (count - 1); i++) {
+		u32 j;
+
+		for (j = (i + 1); j < count; j++) {
+			if (ids[i] == ids[j]) {
+				ret = -EINVAL;
+				goto out_free;
+			}
+		}
+	}
+
 	current_command->type = BASE_KCPU_COMMAND_TYPE_JIT_FREE;
 	list_add_tail(&current_command->info.jit_free.node,
 			&kctx->csf.kcpu_queues.jit_cmds_head);
@@ -545,8 +575,8 @@ static int kbase_csf_queue_group_suspend_prepare(
 	u64 end_addr = addr + suspend_buf->size - 1;
 	u64 last_page_addr = end_addr & PAGE_MASK;
 	int nr_pages = (last_page_addr - page_addr) / PAGE_SIZE + 1;
-	int pinned_pages;
-	int ret = 0;
+	int pinned_pages = 0, ret = 0;
+	struct kbase_va_region *reg;
 
 	lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
 
@@ -573,17 +603,58 @@ static int kbase_csf_queue_group_suspend_prepare(
 		goto out_clean_sus_buf;
 	}
 
-	pinned_pages = get_user_pages_fast(page_addr, nr_pages, 1,
-			sus_buf->pages);
-	if (pinned_pages < 0) {
-		ret = pinned_pages;
-		goto out_clean_pages;
-	}
-	if (pinned_pages != nr_pages) {
-		ret = -EINVAL;
-		goto out_clean_pages;
+	/* Check if the page_addr is a valid GPU VA from SAME_VA zone,
+	 * otherwise consider it is a CPU VA corresponding to the Host
+	 * memory allocated by userspace.
+	 */
+	kbase_gpu_vm_lock(kctx);
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx,
+							page_addr);
+
+	if (kbase_is_region_invalid_or_free(reg)) {
+		kbase_gpu_vm_unlock(kctx);
+		pinned_pages = get_user_pages_fast(page_addr, nr_pages, 1,
+							sus_buf->pages);
+		kbase_gpu_vm_lock(kctx);
+
+		if (pinned_pages < 0) {
+			ret = pinned_pages;
+			goto out_clean_pages;
+		}
+		if (pinned_pages != nr_pages) {
+			ret = -EINVAL;
+			goto out_clean_pages;
+		}
+	} else {
+		struct tagged_addr *page_array;
+		u64 start, end, i;
+
+		if (!(reg->flags & BASE_MEM_SAME_VA) ||
+				reg->nr_pages < nr_pages ||
+				kbase_reg_current_backed_size(reg) !=
+					reg->nr_pages) {
+			ret = -EINVAL;
+			goto out_clean_pages;
+		}
+
+		start = PFN_DOWN(page_addr) - reg->start_pfn;
+		end = start + nr_pages;
+
+		if (end > reg->nr_pages) {
+			ret = -EINVAL;
+			goto out_clean_pages;
+		}
+
+		sus_buf->cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+		kbase_mem_phy_alloc_kernel_mapped(reg->cpu_alloc);
+		page_array = kbase_get_cpu_phy_pages(reg);
+		page_array += start;
+
+		for (i = 0; i < nr_pages; i++, page_array++)
+			sus_buf->pages[i] = as_page(*page_array);
 	}
 
+	kbase_gpu_vm_unlock(kctx);
 	current_command->type = BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND;
 	current_command->info.suspend_buf_copy.sus_buf = sus_buf;
 	current_command->info.suspend_buf_copy.group_handle =
@@ -591,9 +662,11 @@ static int kbase_csf_queue_group_suspend_prepare(
 	return ret;
 
 out_clean_pages:
+	kbase_gpu_vm_unlock(kctx);
 	kfree(sus_buf->pages);
 out_clean_sus_buf:
 	kfree(sus_buf);
+
 	return ret;
 }
 
@@ -642,13 +715,9 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev,
 
 	lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
 
-	if (WARN_ON(!cqs_wait->nr_objs))
-		return -EINVAL;
-
 	if (WARN_ON(!cqs_wait->objs))
 		return -EINVAL;
 
-
 	/* Skip the CQS waits that have already been signaled when processing */
 	for (i = find_first_zero_bit(cqs_wait->signaled, cqs_wait->nr_objs); i < cqs_wait->nr_objs; i++) {
 		if (!test_bit(i, cqs_wait->signaled)) {
@@ -661,22 +730,37 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev,
 				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START(
 					kbdev, queue);
 				queue->command_started = true;
+				KBASE_KTRACE_ADD_CSF_KCPU(kbdev, CQS_WAIT_START,
+						   queue, cqs_wait->nr_objs, 0);
 			}
 
-			if (WARN_ON(!evt)) {
+			if (!evt) {
+				dev_warn(kbdev->dev,
+					"Sync memory %llx already freed", cqs_wait->objs[i].addr);
 				queue->has_error = true;
 				return -EINVAL;
 			}
 
 			sig_set = evt[BASEP_EVENT_VAL_INDEX] > cqs_wait->objs[i].val;
 			if (sig_set) {
+				bool error = false;
+
 				bitmap_set(cqs_wait->signaled, i, 1);
 				if ((cqs_wait->inherit_err_flags & (1U << i)) &&
-				    evt[BASEP_EVENT_ERR_INDEX] > 0)
+				    evt[BASEP_EVENT_ERR_INDEX] > 0) {
 					queue->has_error = true;
+					error = true;
+				}
+
+				KBASE_KTRACE_ADD_CSF_KCPU(kbdev, CQS_WAIT_END,
+						queue, cqs_wait->objs[i].addr,
+						error);
 
 				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END(
-					kbdev, queue);
+					kbdev, queue,
+					queue->has_error ?
+						evt[BASEP_EVENT_ERR_INDEX] :
+						0);
 				queue->command_started = false;
 			}
 
@@ -697,12 +781,15 @@ static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue,
 		struct base_kcpu_command_cqs_wait_info *cqs_wait_info,
 		struct kbase_kcpu_command *current_command)
 {
-	struct base_cqs_wait *objs;
+	struct base_cqs_wait_info *objs;
 	unsigned int nr_objs = cqs_wait_info->nr_objs;
 
 	lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
 
-	if (cqs_wait_info->nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS)
+	if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS)
+		return -EINVAL;
+
+	if (!nr_objs)
 		return -EINVAL;
 
 	objs = kcalloc(nr_objs, sizeof(*objs), GFP_KERNEL);
@@ -719,6 +806,7 @@ static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue,
 		if (kbase_csf_event_wait_add(queue->kctx,
 				event_cqs_callback, queue)) {
 			kfree(objs);
+			queue->cqs_wait_count--;
 			return -ENOMEM;
 		}
 	}
@@ -731,8 +819,15 @@ static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue,
 
 	current_command->info.cqs_wait.signaled = kcalloc(BITS_TO_LONGS(nr_objs),
 		sizeof(*current_command->info.cqs_wait.signaled), GFP_KERNEL);
-	if (!current_command->info.cqs_wait.signaled)
+	if (!current_command->info.cqs_wait.signaled) {
+		if (--queue->cqs_wait_count == 0) {
+			kbase_csf_event_wait_remove(queue->kctx,
+				event_cqs_callback, queue);
+		}
+
+		kfree(objs);
 		return -ENOMEM;
+	}
 
 	return 0;
 }
@@ -745,24 +840,32 @@ static void kbase_kcpu_cqs_set_process(struct kbase_device *kbdev,
 
 	lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
 
-	WARN_ON(!cqs_set->nr_objs);
-	WARN_ON(!cqs_set->objs);
+	if (WARN_ON(!cqs_set->objs))
+		return;
 
 	for (i = 0; i < cqs_set->nr_objs; i++) {
 		struct kbase_vmap_struct *mapping;
-		u32 *evt = (u32 *)kbase_phy_alloc_mapping_get(queue->kctx,
-					cqs_set->objs[i].addr, &mapping);
-		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET(kbdev, queue);
-		if (WARN_ON(!evt))
+		u32 *evt;
+
+		evt = (u32 *)kbase_phy_alloc_mapping_get(
+			queue->kctx, cqs_set->objs[i].addr, &mapping);
+
+		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET(kbdev, queue,
+								  evt ? 0 : 1);
+
+		if (!evt) {
+			dev_warn(kbdev->dev,
+				"Sync memory %llx already freed", cqs_set->objs[i].addr);
 			queue->has_error = true;
-		else {
-			if (cqs_set->propagate_flags & (1 << i))
-				evt[BASEP_EVENT_ERR_INDEX] = queue->has_error;
-			else
-				evt[BASEP_EVENT_ERR_INDEX] = false;
+		} else {
+			evt[BASEP_EVENT_ERR_INDEX] = queue->has_error;
 			/* Set to signaled */
 			evt[BASEP_EVENT_VAL_INDEX]++;
 			kbase_phy_alloc_mapping_put(queue->kctx, mapping);
+
+			KBASE_KTRACE_ADD_CSF_KCPU(kbdev, CQS_SET,
+					queue, cqs_set->objs[i].addr,
+					evt[BASEP_EVENT_ERR_INDEX]);
 		}
 	}
 
@@ -783,7 +886,10 @@ static int kbase_kcpu_cqs_set_prepare(
 
 	lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
 
-	if (cqs_set_info->nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS)
+	if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS)
+		return -EINVAL;
+
+	if (!nr_objs)
 		return -EINVAL;
 
 	objs = kcalloc(nr_objs, sizeof(*objs), GFP_KERNEL);
@@ -799,8 +905,267 @@ static int kbase_kcpu_cqs_set_prepare(
 	current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_SET;
 	current_command->info.cqs_set.nr_objs = nr_objs;
 	current_command->info.cqs_set.objs = objs;
-	current_command->info.cqs_set.propagate_flags =
-					cqs_set_info->propagate_flags;
+
+	return 0;
+}
+
+static void cleanup_cqs_wait_operation(struct kbase_kcpu_command_queue *queue,
+		struct kbase_kcpu_command_cqs_wait_operation_info *cqs_wait_operation)
+{
+	WARN_ON(!cqs_wait_operation->nr_objs);
+	WARN_ON(!cqs_wait_operation->objs);
+	WARN_ON(!cqs_wait_operation->signaled);
+	WARN_ON(!queue->cqs_wait_count);
+
+	if (--queue->cqs_wait_count == 0) {
+		kbase_csf_event_wait_remove(queue->kctx,
+				event_cqs_callback, queue);
+	}
+
+	kfree(cqs_wait_operation->signaled);
+	kfree(cqs_wait_operation->objs);
+	cqs_wait_operation->signaled = NULL;
+	cqs_wait_operation->objs = NULL;
+}
+
+static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev,
+		struct kbase_kcpu_command_queue *queue,
+		struct kbase_kcpu_command_cqs_wait_operation_info *cqs_wait_operation)
+{
+	u32 i;
+
+	lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
+
+	if (WARN_ON(!cqs_wait_operation->objs))
+		return -EINVAL;
+
+	/* Skip the CQS waits that have already been signaled when processing */
+	for (i = find_first_zero_bit(cqs_wait_operation->signaled, cqs_wait_operation->nr_objs); i < cqs_wait_operation->nr_objs; i++) {
+		if (!test_bit(i, cqs_wait_operation->signaled)) {
+			struct kbase_vmap_struct *mapping;
+			bool sig_set;
+			u64 *evt = (u64 *)kbase_phy_alloc_mapping_get(queue->kctx,
+						cqs_wait_operation->objs[i].addr, &mapping);
+
+			/* GPUCORE-28172 RDT to review */
+			if (!queue->command_started)
+				queue->command_started = true;
+
+			if (!evt) {
+				dev_warn(kbdev->dev,
+					"Sync memory %llx already freed", cqs_wait_operation->objs[i].addr);
+				queue->has_error = true;
+				return -EINVAL;
+			}
+
+			switch (cqs_wait_operation->objs[i].operation) {
+			case BASEP_CQS_WAIT_OPERATION_LE:
+				sig_set = *evt <= cqs_wait_operation->objs[i].val;
+				break;
+			case BASEP_CQS_WAIT_OPERATION_GT:
+				sig_set = *evt > cqs_wait_operation->objs[i].val;
+				break;
+			default:
+				dev_warn(kbdev->dev,
+					"Unsupported CQS wait operation %d", cqs_wait_operation->objs[i].operation);
+
+				kbase_phy_alloc_mapping_put(queue->kctx, mapping);
+				queue->has_error = true;
+
+				return -EINVAL;
+			}
+
+			/* Increment evt up to the error_state value depending on the CQS data type */
+			switch (cqs_wait_operation->objs[i].data_type) {
+			default:
+				dev_warn(kbdev->dev, "Unreachable data_type=%d", cqs_wait_operation->objs[i].data_type);
+			/* Fallthrough - hint to compiler that there's really only 2 options at present */
+			case BASEP_CQS_DATA_TYPE_U32:
+				evt = (u64 *)((u8 *)evt + sizeof(u32));
+				break;
+			case BASEP_CQS_DATA_TYPE_U64:
+				evt = (u64 *)((u8 *)evt + sizeof(u64));
+				break;
+			}
+
+			if (sig_set) {
+				bitmap_set(cqs_wait_operation->signaled, i, 1);
+				if ((cqs_wait_operation->inherit_err_flags & (1U << i)) &&
+				    *evt > 0) {
+					queue->has_error = true;
+				}
+
+				/* GPUCORE-28172 RDT to review */
+
+				queue->command_started = false;
+			}
+
+			kbase_phy_alloc_mapping_put(queue->kctx, mapping);
+
+			if (!sig_set)
+				break;
+		}
+	}
+
+	/* For the queue to progress further, all cqs objects should get
+	 * signaled.
+	 */
+	return bitmap_full(cqs_wait_operation->signaled, cqs_wait_operation->nr_objs);
+}
+
+static int kbase_kcpu_cqs_wait_operation_prepare(struct kbase_kcpu_command_queue *queue,
+		struct base_kcpu_command_cqs_wait_operation_info *cqs_wait_operation_info,
+		struct kbase_kcpu_command *current_command)
+{
+	struct base_cqs_wait_operation_info *objs;
+	unsigned int nr_objs = cqs_wait_operation_info->nr_objs;
+
+	lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
+
+	if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS)
+		return -EINVAL;
+
+	if (!nr_objs)
+		return -EINVAL;
+
+	objs = kcalloc(nr_objs, sizeof(*objs), GFP_KERNEL);
+	if (!objs)
+		return -ENOMEM;
+
+	if (copy_from_user(objs, u64_to_user_ptr(cqs_wait_operation_info->objs),
+			nr_objs * sizeof(*objs))) {
+		kfree(objs);
+		return -ENOMEM;
+	}
+
+	if (++queue->cqs_wait_count == 1) {
+		if (kbase_csf_event_wait_add(queue->kctx,
+				event_cqs_callback, queue)) {
+			kfree(objs);
+			queue->cqs_wait_count--;
+			return -ENOMEM;
+		}
+	}
+
+	current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION;
+	current_command->info.cqs_wait_operation.nr_objs = nr_objs;
+	current_command->info.cqs_wait_operation.objs = objs;
+	current_command->info.cqs_wait_operation.inherit_err_flags =
+					cqs_wait_operation_info->inherit_err_flags;
+
+	current_command->info.cqs_wait_operation.signaled = kcalloc(BITS_TO_LONGS(nr_objs),
+		sizeof(*current_command->info.cqs_wait_operation.signaled), GFP_KERNEL);
+	if (!current_command->info.cqs_wait_operation.signaled) {
+		if (--queue->cqs_wait_count == 0) {
+			kbase_csf_event_wait_remove(queue->kctx,
+				event_cqs_callback, queue);
+		}
+
+		kfree(objs);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void kbase_kcpu_cqs_set_operation_process(
+		struct kbase_device *kbdev,
+		struct kbase_kcpu_command_queue *queue,
+		struct kbase_kcpu_command_cqs_set_operation_info *cqs_set_operation)
+{
+	unsigned int i;
+
+	lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
+
+	if (WARN_ON(!cqs_set_operation->objs))
+		return;
+
+	for (i = 0; i < cqs_set_operation->nr_objs; i++) {
+		struct kbase_vmap_struct *mapping;
+		u64 *evt;
+
+		evt = (u64 *)kbase_phy_alloc_mapping_get(
+			queue->kctx, cqs_set_operation->objs[i].addr, &mapping);
+
+		/* GPUCORE-28172 RDT to review */
+
+		if (!evt) {
+			dev_warn(kbdev->dev,
+				"Sync memory %llx already freed", cqs_set_operation->objs[i].addr);
+			queue->has_error = true;
+		} else {
+			switch (cqs_set_operation->objs[i].operation) {
+			case BASEP_CQS_SET_OPERATION_ADD:
+				*evt += cqs_set_operation->objs[i].val;
+				break;
+			case BASEP_CQS_SET_OPERATION_SET:
+				*evt = cqs_set_operation->objs[i].val;
+				break;
+			default:
+				dev_warn(kbdev->dev,
+					"Unsupported CQS set operation %d", cqs_set_operation->objs[i].operation);
+				queue->has_error = true;
+				break;
+			}
+
+			/* Increment evt up to the error_state value depending on the CQS data type */
+			switch (cqs_set_operation->objs[i].data_type) {
+			default:
+				dev_warn(kbdev->dev, "Unreachable data_type=%d", cqs_set_operation->objs[i].data_type);
+			/* Fallthrough - hint to compiler that there's really only 2 options at present */
+			case BASEP_CQS_DATA_TYPE_U32:
+				evt = (u64 *)((u8 *)evt + sizeof(u32));
+				break;
+			case BASEP_CQS_DATA_TYPE_U64:
+				evt = (u64 *)((u8 *)evt + sizeof(u64));
+				break;
+			}
+
+			/* GPUCORE-28172 RDT to review */
+
+			/* Always propagate errors */
+			*evt = queue->has_error;
+
+			kbase_phy_alloc_mapping_put(queue->kctx, mapping);
+		}
+	}
+
+	kbase_csf_event_signal_notify_gpu(queue->kctx);
+
+	kfree(cqs_set_operation->objs);
+	cqs_set_operation->objs = NULL;
+}
+
+static int kbase_kcpu_cqs_set_operation_prepare(
+		struct kbase_kcpu_command_queue *kcpu_queue,
+		struct base_kcpu_command_cqs_set_operation_info *cqs_set_operation_info,
+		struct kbase_kcpu_command *current_command)
+{
+	struct kbase_context *const kctx = kcpu_queue->kctx;
+	struct base_cqs_set_operation_info *objs;
+	unsigned int nr_objs = cqs_set_operation_info->nr_objs;
+
+	lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+
+	if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS)
+		return -EINVAL;
+
+	if (!nr_objs)
+		return -EINVAL;
+
+	objs = kcalloc(nr_objs, sizeof(*objs), GFP_KERNEL);
+	if (!objs)
+		return -ENOMEM;
+
+	if (copy_from_user(objs, u64_to_user_ptr(cqs_set_operation_info->objs),
+			nr_objs * sizeof(*objs))) {
+		kfree(objs);
+		return -ENOMEM;
+	}
+
+	current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION;
+	current_command->info.cqs_set_operation.nr_objs = nr_objs;
+	current_command->info.cqs_set_operation.objs = objs;
 
 	return 0;
 }
@@ -819,6 +1184,9 @@ static void kbase_csf_fence_wait_callback(struct dma_fence *fence,
 	struct kbase_kcpu_command_queue *kcpu_queue = fence_info->kcpu_queue;
 	struct kbase_context *const kctx = kcpu_queue->kctx;
 
+	KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, FENCE_WAIT_END, kcpu_queue,
+				  fence->context, fence->seqno);
+
 	/* Resume kcpu command queue processing. */
 	queue_work(kctx->csf.kcpu_queues.wq, &kcpu_queue->work);
 }
@@ -835,8 +1203,13 @@ static void kbase_kcpu_fence_wait_cancel(
 		return;
 
 	if (kcpu_queue->fence_wait_processed) {
-		dma_fence_remove_callback(fence_info->fence,
+		bool removed = dma_fence_remove_callback(fence_info->fence,
 				&fence_info->fence_cb);
+
+		if (removed)
+			KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, FENCE_WAIT_END,
+					kcpu_queue, fence_info->fence->context,
+					fence_info->fence->seqno);
 	}
 
 	/* Release the reference which is kept by the kcpu_queue */
@@ -880,6 +1253,9 @@ static int kbase_kcpu_fence_wait_process(
 			&fence_info->fence_cb,
 			kbase_csf_fence_wait_callback);
 
+		KBASE_KTRACE_ADD_CSF_KCPU(kcpu_queue->kctx->kbdev,
+					  FENCE_WAIT_START, kcpu_queue,
+					  fence->context, fence->seqno);
 		fence_status = cb_err;
 		if (cb_err == 0)
 			kcpu_queue->fence_wait_processed = true;
@@ -950,6 +1326,10 @@ static int kbase_kcpu_fence_signal_process(
 			"fence_signal() failed with %d\n", ret);
 	}
 
+	KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, FENCE_SIGNAL, kcpu_queue,
+				  fence_info->fence->context,
+				  fence_info->fence->seqno);
+
 	dma_fence_put(fence_info->fence);
 	fence_info->fence = NULL;
 
@@ -1059,6 +1439,9 @@ static int delete_queue(struct kbase_context *kctx, u32 id)
 		struct kbase_kcpu_command_queue *queue =
 					kctx->csf.kcpu_queues.array[id];
 
+		KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_DESTROY,
+			queue, queue->num_pending_cmds, queue->cqs_wait_count);
+
 		/* Drain the remaining work for this queue first and go past
 		 * all the waits.
 		 */
@@ -1096,7 +1479,7 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_INFO(
 	struct kbase_device *kbdev,
 	const struct kbase_kcpu_command_queue *queue,
 	const struct kbase_kcpu_command_jit_alloc_info *jit_alloc,
-	bool alloc_success)
+	int alloc_status)
 {
 	u8 i;
 
@@ -1108,8 +1491,8 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_INFO(
 		u64 gpu_alloc_addr = 0;
 		u64 mmu_flags = 0;
 
-		if (alloc_success && !WARN_ON(!reg) &&
-			!WARN_ON(reg == KBASE_RESERVED_REG_JIT_ALLOC)) {
+		if ((alloc_status == 0) && !WARN_ON(!reg) &&
+		    !WARN_ON(reg == KBASE_RESERVED_REG_JIT_ALLOC)) {
 #ifdef CONFIG_MALI_VECTOR_DUMP
 			struct tagged_addr phy = {0};
 #endif /* CONFIG_MALI_VECTOR_DUMP */
@@ -1123,7 +1506,7 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_INFO(
 #endif /* CONFIG_MALI_VECTOR_DUMP */
 		}
 		KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(
-			kbdev, queue, gpu_alloc_addr, mmu_flags);
+			kbdev, queue, alloc_status, gpu_alloc_addr, mmu_flags);
 	}
 }
 
@@ -1135,30 +1518,6 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(
 		kbdev, queue);
 }
 
-static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_INFO(
-	struct kbase_device *kbdev,
-	const struct kbase_kcpu_command_queue *queue,
-	const struct kbase_kcpu_command_jit_free_info *jit_free)
-{
-	u8 i;
-
-	KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END(
-		kbdev, queue);
-	for (i = 0; i < jit_free->count; i++) {
-		const u8 id = jit_free->ids[i];
-		u64 pages_used = 0;
-
-		if (id != 0) {
-			const struct kbase_va_region *reg =
-				queue->kctx->jit_alloc[id];
-			if (reg && (reg != KBASE_RESERVED_REG_JIT_ALLOC))
-				pages_used = reg->gpu_alloc->nents;
-		}
-		KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END(
-			kbdev, queue, pages_used);
-	}
-}
-
 static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END(
 	struct kbase_device *kbdev,
 	const struct kbase_kcpu_command_queue *queue)
@@ -1189,10 +1548,8 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 				queue->command_started = true;
 			}
 
-#ifdef CONFIG_SYNC_FILE
 			status = 0;
-
-
+#ifdef CONFIG_SYNC_FILE
 			if (ignore_waits) {
 				kbase_kcpu_fence_wait_cancel(queue,
 					&cmd->info.fence);
@@ -1208,11 +1565,14 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 #else
 			dev_warn(kbdev->dev,
 				"unexpected fence wait command found\n");
+
+			status = -EINVAL;
+			queue->has_error = true;
 #endif
 
 			if (process_next) {
 				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END(
-					kbdev, queue);
+					kbdev, queue, status < 0 ? status : 0);
 				queue->command_started = false;
 			}
 			break;
@@ -1220,16 +1580,24 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START(
 				kbdev, queue);
 
+			status = 0;
+
 #ifdef CONFIG_SYNC_FILE
-			kbase_kcpu_fence_signal_process(queue,
-						&cmd->info.fence);
+			status = kbase_kcpu_fence_signal_process(
+				queue, &cmd->info.fence);
+
+			if (status < 0)
+				queue->has_error = true;
 #else
 			dev_warn(kbdev->dev,
 				"unexpected fence signal command found\n");
+
+			status = -EINVAL;
+			queue->has_error = true;
 #endif
 
 			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END(
-				kbdev, queue);
+				kbdev, queue, status);
 			break;
 		case BASE_KCPU_COMMAND_TYPE_CQS_WAIT:
 			status = kbase_kcpu_cqs_wait_process(kbdev, queue,
@@ -1252,48 +1620,99 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 			kbase_kcpu_cqs_set_process(kbdev, queue,
 				&cmd->info.cqs_set);
 
-			/* CQS sets are only traced before execution */
+			break;
+		case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION:
+			status = kbase_kcpu_cqs_wait_operation_process(kbdev, queue,
+						&cmd->info.cqs_wait_operation);
+
+			if (!status && !ignore_waits) {
+				process_next = false;
+			} else {
+				/* Either all CQS objects were signaled or
+				 * there was an error or the queue itself is
+				 * being deleted.
+				 * In all cases can move to the next command.
+				 * TBD: handle the error
+				 */
+				cleanup_cqs_wait_operation(queue,	&cmd->info.cqs_wait_operation);
+			}
+
+			break;
+		case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION:
+			kbase_kcpu_cqs_set_operation_process(kbdev, queue,
+				&cmd->info.cqs_set_operation);
+
 			break;
 		case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER:
 			/* Clear the queue's error state */
 			queue->has_error = false;
+
+			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER(
+				kbdev, queue);
 			break;
-		case BASE_KCPU_COMMAND_TYPE_MAP_IMPORT:
+		case BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: {
+			struct kbase_ctx_ext_res_meta *meta = NULL;
+
 			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START(
 				kbdev, queue);
 
 			kbase_gpu_vm_lock(queue->kctx);
-			kbase_sticky_resource_acquire(queue->kctx,
-						cmd->info.import.gpu_va);
+			meta = kbase_sticky_resource_acquire(
+				queue->kctx, cmd->info.import.gpu_va);
 			kbase_gpu_vm_unlock(queue->kctx);
 
+			if (meta == NULL) {
+				queue->has_error = true;
+				dev_warn(kbdev->dev,
+						"failed to map an external resource\n");
+			}
+
 			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END(
-				kbdev, queue);
+				kbdev, queue, meta ? 0 : 1);
 			break;
-		case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT:
+		}
+		case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: {
+			bool ret;
+
 			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START(
 				kbdev, queue);
 
 			kbase_gpu_vm_lock(queue->kctx);
-			kbase_sticky_resource_release(queue->kctx, NULL,
-						cmd->info.import.gpu_va);
+			ret = kbase_sticky_resource_release(
+				queue->kctx, NULL, cmd->info.import.gpu_va);
 			kbase_gpu_vm_unlock(queue->kctx);
 
+			if (!ret) {
+				queue->has_error = true;
+				dev_warn(kbdev->dev,
+						"failed to release the reference. resource not found\n");
+			}
+
 			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END(
-				kbdev, queue);
+				kbdev, queue, ret ? 0 : 1);
 			break;
-		case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE:
+		}
+		case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: {
+			bool ret;
+
 			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START(
 					kbdev, queue);
 
 			kbase_gpu_vm_lock(queue->kctx);
-			kbase_sticky_resource_release_force(queue->kctx, NULL,
-						cmd->info.import.gpu_va);
+			ret = kbase_sticky_resource_release_force(
+				queue->kctx, NULL, cmd->info.import.gpu_va);
 			kbase_gpu_vm_unlock(queue->kctx);
 
+			if (!ret) {
+				queue->has_error = true;
+				dev_warn(kbdev->dev,
+						"failed to release the reference. resource not found\n");
+			}
+
 			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END(
-					kbdev, queue);
+				kbdev, queue, ret ? 0 : 1);
 			break;
+		}
 		case BASE_KCPU_COMMAND_TYPE_JIT_ALLOC:
 		{
 			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START(
@@ -1307,7 +1726,8 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 					queue->has_error = true;
 
 				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_INFO(
-						kbdev, queue, &cmd->info.jit_alloc, (status == 0));
+					kbdev, queue, &cmd->info.jit_alloc,
+					status);
 
 				kbase_kcpu_jit_allocate_finish(queue, cmd);
 				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(
@@ -1319,27 +1739,64 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START(
 				kbdev, queue);
 
-			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_INFO(
-				kbdev, queue, &cmd->info.jit_free);
-
-			status = kbase_kcpu_jit_free_process(queue->kctx, cmd);
+			status = kbase_kcpu_jit_free_process(queue, cmd);
 			if (status)
 				queue->has_error = true;
 
 			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END(
 				kbdev, queue);
 			break;
-		case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND:
+		case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: {
+			struct kbase_suspend_copy_buffer *sus_buf =
+					cmd->info.suspend_buf_copy.sus_buf;
+
+			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START(
+				kbdev, queue);
+
 			status = kbase_csf_queue_group_suspend_process(
-					queue->kctx,
-					cmd->info.suspend_buf_copy.sus_buf,
+					queue->kctx, sus_buf,
 					cmd->info.suspend_buf_copy.group_handle);
 			if (status)
 				queue->has_error = true;
 
-			kfree(cmd->info.suspend_buf_copy.sus_buf->pages);
-			kfree(cmd->info.suspend_buf_copy.sus_buf);
+			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END(
+				kbdev, queue, status);
+
+			if (!sus_buf->cpu_alloc) {
+				int i;
+
+				for (i = 0; i < sus_buf->nr_pages; i++)
+					put_page(sus_buf->pages[i]);
+			} else {
+				kbase_mem_phy_alloc_kernel_unmapped(
+					sus_buf->cpu_alloc);
+				kbase_mem_phy_alloc_put(sus_buf->cpu_alloc);
+			}
+
+			kfree(sus_buf->pages);
+			kfree(sus_buf);
 			break;
+		}
+#if MALI_UNIT_TEST
+		case BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME: {
+			u64 time = ktime_get_raw_ns();
+			void *target_page = kmap(*cmd->info.sample_time.page);
+
+			if (target_page) {
+				memcpy(target_page +
+					       cmd->info.sample_time.page_offset,
+				       &time, sizeof(time));
+				kunmap(*cmd->info.sample_time.page);
+			} else {
+				dev_warn(kbdev->dev,
+					 "Could not kmap target page\n");
+				queue->has_error = true;
+			}
+			put_page(*cmd->info.sample_time.page);
+			kfree(cmd->info.sample_time.page);
+			break;
+		}
+#endif /* MALI_UNIT_TEST */
 		default:
 			dev_warn(kbdev->dev,
 				"Unrecognized command type\n");
@@ -1389,12 +1846,15 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
 		break;
 	case BASE_KCPU_COMMAND_TYPE_CQS_WAIT:
 	{
-		const struct base_cqs_wait *waits = cmd->info.cqs_wait.objs;
+		const struct base_cqs_wait_info *waits =
+			cmd->info.cqs_wait.objs;
+		u32 inherit_err_flags = cmd->info.cqs_wait.inherit_err_flags;
 		unsigned int i;
 
 		for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) {
 			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT(
-				kbdev, queue, waits[i].addr, waits[i].val);
+				kbdev, queue, waits[i].addr, waits[i].val,
+				(inherit_err_flags & ((u32)1 << i)) ? 1 : 0);
 		}
 		break;
 	}
@@ -1409,8 +1869,19 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
 		}
 		break;
 	}
+	case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION:
+	{
+		/* GPUCORE-28172 RDT to review */
+		break;
+	}
+	case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION:
+	{
+		/* GPUCORE-28172 RDT to review */
+		break;
+	}
 	case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER:
-		/* No implemented tracepoint */
+		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER(kbdev,
+									queue);
 		break;
 	case BASE_KCPU_COMMAND_TYPE_MAP_IMPORT:
 		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT(
@@ -1435,11 +1906,11 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
 				&cmd->info.jit_alloc.info[i];
 
 			KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC(
-				kbdev, queue,
-				info->gpu_alloc_addr, info->va_pages,
-				info->commit_pages, info->extent, info->id,
-				info->bin_id, info->max_allocations,
-				info->flags, info->usage_id);
+				kbdev, queue, info->gpu_alloc_addr,
+				info->va_pages, info->commit_pages,
+				info->extension, info->id, info->bin_id,
+				info->max_allocations, info->flags,
+				info->usage_id);
 		}
 		KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC(
 			kbdev, queue);
@@ -1460,8 +1931,18 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
 		break;
 	}
 	case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND:
-		/* No implemented tracepoint */
+		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND(
+			kbdev, queue, cmd->info.suspend_buf_copy.sus_buf,
+			cmd->info.suspend_buf_copy.group_handle);
 		break;
+#if MALI_UNIT_TEST
+	case BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME:
+		/*
+		 * This is test-only KCPU command, no need to have a timeline
+		 * entry
+		 */
+		break;
+#endif /* MALI_UNIT_TEST */
 	}
 }
 
@@ -1565,6 +2046,14 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
 			ret = kbase_kcpu_cqs_set_prepare(queue,
 					&command.info.cqs_set, kcpu_cmd);
 			break;
+		case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION:
+			ret = kbase_kcpu_cqs_wait_operation_prepare(queue,
+					&command.info.cqs_wait_operation, kcpu_cmd);
+			break;
+		case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION:
+			ret = kbase_kcpu_cqs_set_operation_prepare(queue,
+					&command.info.cqs_set_operation, kcpu_cmd);
+			break;
 		case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER:
 			kcpu_cmd->type = BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER;
 			ret = 0;
@@ -1594,7 +2083,37 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
 					&command.info.suspend_buf_copy,
 					kcpu_cmd);
 			break;
+#if MALI_UNIT_TEST
+		case BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME: {
+			int const page_cnt = 1;
 
+			kcpu_cmd->type = BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME;
+			kcpu_cmd->info.sample_time.page_addr =
+				command.info.sample_time.time & PAGE_MASK;
+			kcpu_cmd->info.sample_time.page_offset =
+				command.info.sample_time.time & ~PAGE_MASK;
+			kcpu_cmd->info.sample_time.page = kcalloc(
+				page_cnt, sizeof(struct page *), GFP_KERNEL);
+			if (!kcpu_cmd->info.sample_time.page) {
+				ret = -ENOMEM;
+			} else {
+				int pinned_pages = get_user_pages_fast(
+					kcpu_cmd->info.sample_time.page_addr,
+					page_cnt, 1,
+					kcpu_cmd->info.sample_time.page);
+
+				if (pinned_pages < 0) {
+					ret = pinned_pages;
+					kfree(kcpu_cmd->info.sample_time.page);
+				} else if (pinned_pages != page_cnt) {
+					ret = -EINVAL;
+					kfree(kcpu_cmd->info.sample_time.page);
+				}
+			}
+
+			break;
+		}
+#endif /* MALI_UNIT_TEST */
 		default:
 			dev_warn(queue->kctx->kbdev->dev,
 				"Unknown command type %u\n", command.type);
@@ -1722,6 +2241,7 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
 	INIT_LIST_HEAD(&queue->jit_blocked);
 	queue->has_error = false;
 	INIT_WORK(&queue->work, kcpu_queue_process_worker);
+	queue->id = idx;
 
 	newq->id = idx;
 
@@ -1730,6 +2250,9 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
 	 */
 	KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE(
 		kctx->kbdev, queue, kctx->id, queue->num_pending_cmds);
+
+	KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_NEW, queue,
+		queue->fence_context, 0);
 out:
 	mutex_unlock(&kctx->csf.kcpu_queues.lock);
 
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h
index 45c76af04c0f..86aa7dcc452e 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_CSF_KCPU_H_
@@ -48,9 +47,9 @@ struct kbase_kcpu_command_import_info {
  * struct kbase_kcpu_command_fence_info - Structure which holds information
  *		about the fence object enqueued in the kcpu command queue
  *
- * @fence_cb:
- * @fence:
- * @kcpu_queue:
+ * @fence_cb:   Fence callback
+ * @fence:      Fence
+ * @kcpu_queue: kcpu command queue
  */
 struct kbase_kcpu_command_fence_info {
 #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
@@ -70,13 +69,10 @@ struct kbase_kcpu_command_fence_info {
  * @objs:	Array of structures which define CQS objects to be used by
  *		the kcpu command.
  * @nr_objs:	Number of CQS objects in the array.
- * @propagate_flags:  Bit-pattern for the CQSs in the array that are set
- *		      to propagate queue error-state to the flagged CQSs.
  */
 struct kbase_kcpu_command_cqs_set_info {
 	struct base_cqs_set *objs;
 	unsigned int nr_objs;
-	u32 propagate_flags;
 };
 
 /**
@@ -93,7 +89,37 @@ struct kbase_kcpu_command_cqs_set_info {
  *			queue's error-state.
  */
 struct kbase_kcpu_command_cqs_wait_info {
-	struct base_cqs_wait *objs;
+	struct base_cqs_wait_info *objs;
+	unsigned long *signaled;
+	unsigned int nr_objs;
+	u32 inherit_err_flags;
+};
+
+/**
+ * struct kbase_kcpu_command_cqs_set_operation_info - Structure which holds information
+ *				about CQS objects for the kcpu CQS timeline set command
+ *
+ * @objs:	Array of structures which define CQS timeline objects to be used by
+ *		the kcpu command.
+ * @nr_objs:	Number of CQS objects in the array.
+ */
+struct kbase_kcpu_command_cqs_set_operation_info {
+	struct base_cqs_set_operation_info *objs;
+	unsigned int nr_objs;
+};
+
+/**
+ * struct kbase_kcpu_command_cqs_wait_operation_info - Structure which holds information
+ *				about CQS objects for the kcpu CQS timeline wait command
+ *
+ * @objs:	Array of structures which define CQS timeline objects to be used by
+ *		the kcpu command.
+ * @signaled:	Bit array used to report the status of the CQS wait objects.
+ *              1 is signaled, 0 otherwise.
+ * @nr_objs:	Number of CQS objects in the array.
+ */
+struct kbase_kcpu_command_cqs_wait_operation_info {
+	struct base_cqs_wait_operation_info *objs;
 	unsigned long *signaled;
 	unsigned int nr_objs;
 	u32 inherit_err_flags;
@@ -103,7 +129,7 @@ struct kbase_kcpu_command_cqs_wait_info {
  * struct kbase_kcpu_command_jit_alloc_info - Structure which holds information
  *				needed for the kcpu command for jit allocations
  *
- * @node	Used to keep track of all JIT free/alloc commands in submission
+ * @node:	Used to keep track of all JIT free/alloc commands in submission
  *		order. This must be located in the front of this struct to
  *		match that of kbase_kcpu_command_jit_free_info.
  * @info:	Array of objects of the struct base_jit_alloc_info type which
@@ -145,12 +171,14 @@ struct kbase_kcpu_command_jit_free_info {
  *		the user buffer.
  * @nr_pages:	number of pages.
  * @offset:	offset into the pages
+ * @cpu_alloc:	Reference to physical pages of suspend buffer allocation.
  */
 struct kbase_suspend_copy_buffer {
 	size_t size;
 	struct page **pages;
 	int nr_pages;
 	size_t offset;
+	struct kbase_mem_phy_alloc *cpu_alloc;
 };
 
 /**
@@ -159,13 +187,21 @@ struct kbase_suspend_copy_buffer {
  *
  * @sus_buf:		Pointer to the structure which contains details of the
  *			user buffer and its kernel pinned pages.
- * @group_handle:	Handle to the mapping of command stream group.
+ * @group_handle:	Handle to the mapping of CSG.
  */
 struct kbase_kcpu_command_group_suspend_info {
 	struct kbase_suspend_copy_buffer *sus_buf;
 	u8 group_handle;
 };
 
+#if MALI_UNIT_TEST
+struct kbase_kcpu_command_sample_time_info {
+	u64 page_addr;
+	u64 page_offset;
+	struct page **page;
+};
+#endif /* MALI_UNIT_TEST */
+
 /**
  * struct kbase_cpu_command - Command which is to be part of the kernel
  *                            command queue
@@ -175,6 +211,14 @@ struct kbase_kcpu_command_group_suspend_info {
  *		indicates that it has been enqueued earlier.
  * @info:	Structure which holds information about the command
  *		dependent on the command type.
+ * @info.fence:            Fence
+ * @info.cqs_wait:         CQS wait
+ * @info.cqs_set:          CQS set
+ * @info.import:           import
+ * @info.jit_alloc:        jit allocation
+ * @info.jit_free:         jit deallocation
+ * @info.suspend_buf_copy: suspend buffer copy
+ * @info.sample_time:      sample time
  */
 struct kbase_kcpu_command {
 	enum base_kcpu_command_type type;
@@ -183,10 +227,15 @@ struct kbase_kcpu_command {
 		struct kbase_kcpu_command_fence_info fence;
 		struct kbase_kcpu_command_cqs_wait_info cqs_wait;
 		struct kbase_kcpu_command_cqs_set_info cqs_set;
+		struct kbase_kcpu_command_cqs_wait_operation_info cqs_wait_operation;
+		struct kbase_kcpu_command_cqs_set_operation_info cqs_set_operation;
 		struct kbase_kcpu_command_import_info import;
 		struct kbase_kcpu_command_jit_alloc_info jit_alloc;
 		struct kbase_kcpu_command_jit_free_info jit_free;
 		struct kbase_kcpu_command_group_suspend_info suspend_buf_copy;
+#if MALI_UNIT_TEST
+		struct kbase_kcpu_command_sample_time_info sample_time;
+#endif /* MALI_UNIT_TEST */
 	} info;
 };
 
@@ -201,6 +250,7 @@ struct kbase_kcpu_command {
  *				commands enqueued into a kcpu command queue;
  *				part of kernel API for processing workqueues
  * @start_offset:		Index of the command to be executed next
+ * @id:				KCPU command queue ID.
  * @num_pending_cmds:		The number of commands enqueued but not yet
  *				executed or pending
  * @cqs_wait_count:		Tracks the number of CQS wait commands enqueued
@@ -233,6 +283,7 @@ struct kbase_kcpu_command_queue {
 	struct kbase_kcpu_command commands[KBASEP_KCPU_QUEUE_SIZE];
 	struct work_struct work;
 	u8 start_offset;
+	u8 id;
 	u16 num_pending_cmds;
 	u32 cqs_wait_count;
 	u64 fence_context;
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_debugfs.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_debugfs.c
index 55e3b64cbe71..d59e77c2b98e 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_debugfs.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_debugfs.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include "mali_kbase_csf_kcpu_debugfs.h"
@@ -96,7 +95,7 @@ static void kbasep_csf_kcpu_debugfs_print_queue(struct seq_file *file,
 			struct kbase_sync_fence_info info;
 
 			kbase_sync_fence_info_get(cmd->info.fence.fence, &info);
-			seq_printf(file, ",  Fence      %p %s %s",
+			seq_printf(file, ",  Fence      %pK %s %s",
 				   info.fence, info.name,
 				   kbase_sync_status_string(info.status));
 			break;
@@ -196,4 +195,3 @@ void kbase_csf_kcpu_debugfs_init(struct kbase_context *kctx)
 }
 
 #endif /* CONFIG_DEBUG_FS */
-
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_debugfs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_debugfs.h
index 359fe2cb0168..58b8e34ead92 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_debugfs.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu_debugfs.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_CSF_KCPU_DEBUGFS_H_
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.c
index 987cbc2fc201..09e72711d3cb 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include "mali_kbase_csf_protected_memory.h"
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.h
index 2b459911d834..95f507f52d78 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_CSF_PROTECTED_MEMORY_H_
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c
index f1a318d26f43..e8da0f3cccda 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <mali_kbase.h>
@@ -28,6 +27,8 @@
 #include <backend/gpu/mali_kbase_pm_internal.h>
 #include <mali_kbase_regs_history_debugfs.h>
 #include <csf/mali_kbase_csf_trace_buffer.h>
+#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
+#include <mali_kbase_reset_gpu.h>
 
 /* Waiting timeout for GPU reset to complete */
 #define GPU_RESET_TIMEOUT_MS (5000) /* 5 seconds */
@@ -37,6 +38,199 @@
 #define DUMP_HEX_CHARS_PER_LINE  \
 	(DUMP_DWORDS_PER_LINE * DUMP_HEX_CHARS_PER_DWORD)
 
+static inline bool
+kbase_csf_reset_state_is_silent(enum kbase_csf_reset_gpu_state state)
+{
+	return (state == KBASE_CSF_RESET_GPU_COMMITTED_SILENT);
+}
+
+static inline bool
+kbase_csf_reset_state_is_committed(enum kbase_csf_reset_gpu_state state)
+{
+	return (state == KBASE_CSF_RESET_GPU_COMMITTED ||
+		state == KBASE_CSF_RESET_GPU_COMMITTED_SILENT);
+}
+
+static inline bool
+kbase_csf_reset_state_is_active(enum kbase_csf_reset_gpu_state state)
+{
+	return (state == KBASE_CSF_RESET_GPU_HAPPENING);
+}
+
+/**
+ * DOC: Mechanism for coherent access to the HW with respect to GPU reset
+ *
+ * Access to the HW from non-atomic context outside of the reset thread must
+ * use kbase_reset_gpu_prevent_and_wait() / kbase_reset_gpu_try_prevent().
+ *
+ * This currently works by taking the &kbase_device's csf.reset.sem, for
+ * 'write' access by the GPU reset thread and 'read' access by every other
+ * thread. The use of this rw_semaphore means:
+ *
+ * - there will be mutual exclusion (and thus waiting) between the thread doing
+ *   reset ('writer') and threads trying to access the GPU for 'normal'
+ *   operations ('readers')
+ *
+ * - multiple threads may prevent reset from happening without serializing each
+ *   other prematurely. Note that at present the wait for reset to finish has
+ *   to be done higher up in the driver than actual GPU access, at a point
+ *   where it won't cause lock ordering issues. At such a point, some paths may
+ *   actually lead to no GPU access, but we would prefer to avoid serializing
+ *   at that level
+ *
+ * - lockdep (if enabled in the kernel) will check such uses for deadlock
+ *
+ * If instead &kbase_device's csf.reset.wait &wait_queue_head_t were used on
+ * its own, we'd also need to add a &lockdep_map and appropriate lockdep calls
+ * to make use of lockdep checking in all places where the &wait_queue_head_t
+ * is waited upon or signaled.
+ *
+ * Indeed places where we wait on &kbase_device's csf.reset.wait (such as
+ * kbase_reset_gpu_wait()) are the only places where we need extra call(s) to
+ * lockdep, and they are made on the existing rw_semaphore.
+ *
+ * For non-atomic access, the &kbase_device's csf.reset.state member should be
+ * checked instead, such as by using kbase_reset_gpu_is_active().
+ *
+ * Ideally the &rw_semaphore should be replaced in future with a single mutex
+ * that protects any access to the GPU, via reset or otherwise.
+ */
+
+int kbase_reset_gpu_prevent_and_wait(struct kbase_device *kbdev)
+{
+	down_read(&kbdev->csf.reset.sem);
+
+	if (atomic_read(&kbdev->csf.reset.state) ==
+	    KBASE_CSF_RESET_GPU_FAILED) {
+		up_read(&kbdev->csf.reset.sem);
+		return -ENOMEM;
+	}
+
+	if (WARN_ON(kbase_reset_gpu_is_active(kbdev))) {
+		up_read(&kbdev->csf.reset.sem);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_reset_gpu_prevent_and_wait);
+
+int kbase_reset_gpu_try_prevent(struct kbase_device *kbdev)
+{
+	if (!down_read_trylock(&kbdev->csf.reset.sem))
+		return -EAGAIN;
+
+	if (atomic_read(&kbdev->csf.reset.state) ==
+	    KBASE_CSF_RESET_GPU_FAILED) {
+		up_read(&kbdev->csf.reset.sem);
+		return -ENOMEM;
+	}
+
+	if (WARN_ON(kbase_reset_gpu_is_active(kbdev))) {
+		up_read(&kbdev->csf.reset.sem);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+void kbase_reset_gpu_allow(struct kbase_device *kbdev)
+{
+	up_read(&kbdev->csf.reset.sem);
+}
+KBASE_EXPORT_TEST_API(kbase_reset_gpu_allow);
+
+void kbase_reset_gpu_assert_prevented(struct kbase_device *kbdev)
+{
+#if KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE
+	lockdep_assert_held_read(&kbdev->csf.reset.sem);
+#else
+	lockdep_assert_held(&kbdev->csf.reset.sem);
+#endif
+	WARN_ON(kbase_reset_gpu_is_active(kbdev));
+}
+
+void kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device *kbdev)
+{
+	if (atomic_read(&kbdev->csf.reset.state) == KBASE_CSF_RESET_GPU_FAILED)
+		return;
+
+#if KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE
+	lockdep_assert_held_read(&kbdev->csf.reset.sem);
+#else
+	lockdep_assert_held(&kbdev->csf.reset.sem);
+#endif
+	WARN_ON(kbase_reset_gpu_is_active(kbdev));
+}
+
+/* Mark the reset as now happening, and synchronize with other threads that
+ * might be trying to access the GPU
+ */
+static void kbase_csf_reset_begin_hw_access_sync(
+	struct kbase_device *kbdev,
+	enum kbase_csf_reset_gpu_state initial_reset_state)
+{
+	unsigned long hwaccess_lock_flags;
+	unsigned long scheduler_spin_lock_flags;
+
+	/* Note this is a WARN/atomic_set because it is a software issue for a
+	 * race to be occurring here
+	 */
+	WARN_ON(!kbase_csf_reset_state_is_committed(initial_reset_state));
+
+	down_write(&kbdev->csf.reset.sem);
+
+	/* Threads in atomic context accessing the HW will hold one of these
+	 * locks, so synchronize with them too.
+	 */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_lock_flags);
+	kbase_csf_scheduler_spin_lock(kbdev, &scheduler_spin_lock_flags);
+	atomic_set(&kbdev->csf.reset.state, KBASE_RESET_GPU_HAPPENING);
+	kbase_csf_scheduler_spin_unlock(kbdev, scheduler_spin_lock_flags);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_lock_flags);
+}
+
+/* Mark the reset as finished and allow others threads to once more access the
+ * GPU
+ */
+static void kbase_csf_reset_end_hw_access(struct kbase_device *kbdev,
+					  int err_during_reset,
+					  bool firmware_inited)
+{
+	unsigned long hwaccess_lock_flags;
+	unsigned long scheduler_spin_lock_flags;
+
+	WARN_ON(!kbase_csf_reset_state_is_active(
+		atomic_read(&kbdev->csf.reset.state)));
+
+	/* Once again, we synchronize with atomic context threads accessing the
+	 * HW, as otherwise any actions they defer could get lost
+	 */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_lock_flags);
+	kbase_csf_scheduler_spin_lock(kbdev, &scheduler_spin_lock_flags);
+
+	if (!err_during_reset) {
+		atomic_set(&kbdev->csf.reset.state,
+			   KBASE_CSF_RESET_GPU_NOT_PENDING);
+	} else {
+		dev_err(kbdev->dev, "Reset failed to complete");
+		atomic_set(&kbdev->csf.reset.state, KBASE_CSF_RESET_GPU_FAILED);
+	}
+
+	kbase_csf_scheduler_spin_unlock(kbdev, scheduler_spin_lock_flags);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_lock_flags);
+
+	/* Invoke the scheduling tick after formally finishing the reset,
+	 * otherwise the tick might start too soon and notice that reset
+	 * is still in progress.
+	 */
+	up_write(&kbdev->csf.reset.sem);
+	wake_up(&kbdev->csf.reset.wait);
+
+	if (!err_during_reset && likely(firmware_inited))
+		kbase_csf_scheduler_enable_tick_timer(kbdev);
+}
+
 static void kbase_csf_debug_dump_registers(struct kbase_device *kbdev)
 {
 	kbase_io_history_dump(kbdev);
@@ -113,27 +307,59 @@ static void kbase_csf_dump_firmware_trace_buffer(struct kbase_device *kbdev)
 	kfree(buf);
 }
 
-static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev,
-				   bool firmware_inited)
+/**
+ * kbase_csf_hwcnt_on_reset_error() - Sets HWCNT to appropriate state in the
+ *                                    event of an error during GPU reset.
+ * @kbdev: Pointer to KBase device
+ */
+static void kbase_csf_hwcnt_on_reset_error(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	/* Treat this as an unrecoverable error for HWCNT */
+	kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface);
+
+	/* Re-enable counters to ensure matching enable/disable pair.
+	 * This might reduce the hwcnt disable count to 0, and therefore
+	 * trigger actual re-enabling of hwcnt.
+	 * However, as the backend is now in the unrecoverable error state,
+	 * re-enabling will immediately fail and put the context into the error
+	 * state, preventing the hardware from being touched (which could have
+	 * risked a hang).
+	 */
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+}
+
+static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev,
+				   bool firmware_inited, bool silent)
 {
 	unsigned long flags;
-	bool silent = false;
 	int err;
 
-	if (atomic_read(&kbdev->csf.reset.state) == KBASE_CSF_RESET_GPU_SILENT)
-		silent = true;
-
 	WARN_ON(kbdev->irq_reset_flush);
+	/* The reset must now be happening otherwise other threads will not
+	 * have been synchronized with to stop their access to the HW
+	 */
+#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE
+	lockdep_assert_held_write(&kbdev->csf.reset.sem);
+#elif KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE
+	lockdep_assert_held_exclusive(&kbdev->csf.reset.sem);
+#else
+	lockdep_assert_held(&kbdev->csf.reset.sem);
+#endif
+	WARN_ON(!kbase_reset_gpu_is_active(kbdev));
 
-	/* Reset the scheduler state before disabling the interrupts as suspend of active
-	 * CSG slots would also be done as a part of reset.
+	/* Reset the scheduler state before disabling the interrupts as suspend
+	 * of active CSG slots would also be done as a part of reset.
 	 */
 	if (likely(firmware_inited))
 		kbase_csf_scheduler_reset(kbdev);
 	cancel_work_sync(&kbdev->csf.firmware_reload_work);
 
-	/* Disable GPU hardware counters.
-	 * This call will block until counters are disabled.
+	dev_dbg(kbdev->dev, "Disable GPU hardware counters.\n");
+	/* This call will block until counters are disabled.
 	 */
 	kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
 
@@ -141,7 +367,8 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev,
 	spin_lock(&kbdev->mmu_mask_change);
 	kbase_pm_reset_start_locked(kbdev);
 
-	/* We're about to flush out the IRQs and their bottom halves */
+	dev_dbg(kbdev->dev,
+		"We're about to flush out the IRQs and their bottom halves\n");
 	kbdev->irq_reset_flush = true;
 
 	/* Disable IRQ to avoid IRQ handlers to kick in after releasing the
@@ -152,15 +379,16 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev,
 	spin_unlock(&kbdev->mmu_mask_change);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
-	/* Ensure that any IRQ handlers have finished
-	 * Must be done without any locks IRQ handlers will take.
+	dev_dbg(kbdev->dev, "Ensure that any IRQ handlers have finished\n");
+	/* Must be done without any locks IRQ handlers will take.
 	 */
 	kbase_synchronize_irqs(kbdev);
 
-	/* Flush out any in-flight work items */
+	dev_dbg(kbdev->dev, "Flush out any in-flight work items\n");
 	kbase_flush_mmu_wqs(kbdev);
 
-	/* The flush has completed so reset the active indicator */
+	dev_dbg(kbdev->dev,
+		"The flush has completed so reset the active indicator\n");
 	kbdev->irq_reset_flush = false;
 
 	mutex_lock(&kbdev->pm.lock);
@@ -177,17 +405,31 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev,
 			kbase_csf_dump_firmware_trace_buffer(kbdev);
 	}
 
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_ipa_control_handle_gpu_reset_pre(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* Tell hardware counters a reset is about to occur.
+	 * If the backend is in an unrecoverable error state (e.g. due to
+	 * firmware being unresponsive) this will transition the backend out of
+	 * it, on the assumption a reset will fix whatever problem there was.
+	 */
+	kbase_hwcnt_backend_csf_on_before_reset(&kbdev->hwcnt_gpu_iface);
+
 	/* Reset the GPU */
 	err = kbase_pm_init_hw(kbdev, 0);
 
 	mutex_unlock(&kbdev->pm.lock);
 
-	if (WARN_ON(err))
+	if (WARN_ON(err)) {
+		kbase_csf_hwcnt_on_reset_error(kbdev);
 		return err;
+	}
 
 	mutex_lock(&kbdev->mmu_hw_mutex);
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	kbase_ctx_sched_restore_all_as(kbdev);
+	kbase_ipa_control_handle_gpu_reset_post(kbdev);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 	mutex_unlock(&kbdev->mmu_hw_mutex);
 
@@ -199,13 +441,15 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev,
 	err = kbase_pm_wait_for_desired_state(kbdev);
 	mutex_unlock(&kbdev->pm.lock);
 
-	if (err)
+	if (WARN_ON(err)) {
+		kbase_csf_hwcnt_on_reset_error(kbdev);
 		return err;
+	}
 
 	/* Re-enable GPU hardware counters */
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
 	kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
 
 	if (!silent)
 		dev_err(kbdev->dev, "Reset complete");
@@ -220,6 +464,13 @@ static void kbase_csf_reset_gpu_worker(struct work_struct *data)
 	bool firmware_inited;
 	unsigned long flags;
 	int err = 0;
+	const enum kbase_csf_reset_gpu_state initial_reset_state =
+		atomic_read(&kbdev->csf.reset.state);
+
+	/* Ensure any threads (e.g. executing the CSF scheduler) have finished
+	 * using the HW
+	 */
+	kbase_csf_reset_begin_hw_access_sync(kbdev, initial_reset_state);
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	firmware_inited = kbdev->csf.firmware_inited;
@@ -227,56 +478,59 @@ static void kbase_csf_reset_gpu_worker(struct work_struct *data)
 
 	if (!kbase_pm_context_active_handle_suspend(kbdev,
 			KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
-		err = kbase_csf_reset_gpu_now(kbdev, firmware_inited);
+		bool silent =
+			kbase_csf_reset_state_is_silent(initial_reset_state);
+
+		err = kbase_csf_reset_gpu_now(kbdev, firmware_inited, silent);
 		kbase_pm_context_idle(kbdev);
 	}
 
 	kbase_disjoint_state_down(kbdev);
 
-	if (!err) {
-		atomic_set(&kbdev->csf.reset.state,
-				KBASE_CSF_RESET_GPU_NOT_PENDING);
-		if (likely(firmware_inited))
-			kbase_csf_scheduler_enable_tick_timer(kbdev);
-	} else {
-		dev_err(kbdev->dev, "Reset failed to complete");
-		atomic_set(&kbdev->csf.reset.state,
-				KBASE_CSF_RESET_GPU_FAILED);
-	}
-
-	wake_up(&kbdev->csf.reset.wait);
+	/* Allow other threads to once again use the GPU */
+	kbase_csf_reset_end_hw_access(kbdev, err, firmware_inited);
 }
 
-bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev)
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags)
 {
+	if (flags & RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)
+		kbase_hwcnt_backend_csf_on_unrecoverable_error(
+			&kbdev->hwcnt_gpu_iface);
+
 	if (atomic_cmpxchg(&kbdev->csf.reset.state,
 			KBASE_CSF_RESET_GPU_NOT_PENDING,
-			KBASE_CSF_RESET_GPU_HAPPENING) !=
-			KBASE_CSF_RESET_GPU_NOT_PENDING) {
+			KBASE_CSF_RESET_GPU_PREPARED) !=
+			KBASE_CSF_RESET_GPU_NOT_PENDING)
 		/* Some other thread is already resetting the GPU */
 		return false;
-	}
 
 	return true;
 }
 KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu);
 
-bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev)
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev,
+				       unsigned int flags)
 {
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
-	return kbase_prepare_to_reset_gpu(kbdev);
+	return kbase_prepare_to_reset_gpu(kbdev, flags);
 }
 
-int kbase_reset_gpu(struct kbase_device *kbdev)
+void kbase_reset_gpu(struct kbase_device *kbdev)
 {
+	/* Note this is a WARN/atomic_set because it is a software issue for
+	 * a race to be occurring here
+	 */
+	if (WARN_ON(atomic_read(&kbdev->csf.reset.state) !=
+		    KBASE_RESET_GPU_PREPARED))
+		return;
+
+	atomic_set(&kbdev->csf.reset.state, KBASE_CSF_RESET_GPU_COMMITTED);
 	dev_err(kbdev->dev, "Preparing to soft-reset GPU\n");
 
 	kbase_disjoint_state_up(kbdev);
 
 	queue_work(kbdev->csf.reset.workq, &kbdev->csf.reset.work);
-
-	return 0;
 }
 KBASE_EXPORT_TEST_API(kbase_reset_gpu);
 
@@ -291,7 +545,7 @@ int kbase_reset_gpu_silent(struct kbase_device *kbdev)
 {
 	if (atomic_cmpxchg(&kbdev->csf.reset.state,
 				KBASE_CSF_RESET_GPU_NOT_PENDING,
-				KBASE_CSF_RESET_GPU_SILENT) !=
+				KBASE_CSF_RESET_GPU_COMMITTED_SILENT) !=
 				KBASE_CSF_RESET_GPU_NOT_PENDING) {
 		/* Some other thread is already resetting the GPU */
 		return -EAGAIN;
@@ -306,23 +560,42 @@ int kbase_reset_gpu_silent(struct kbase_device *kbdev)
 
 bool kbase_reset_gpu_is_active(struct kbase_device *kbdev)
 {
-	if (atomic_read(&kbdev->csf.reset.state) ==
-			KBASE_CSF_RESET_GPU_NOT_PENDING)
-		return false;
+	enum kbase_csf_reset_gpu_state reset_state =
+		atomic_read(&kbdev->csf.reset.state);
 
-	return true;
+	/* For CSF, the reset is considered active only when the reset worker
+	 * is actually executing and other threads would have to wait for it to
+	 * complete
+	 */
+	return kbase_csf_reset_state_is_active(reset_state);
 }
 
 int kbase_reset_gpu_wait(struct kbase_device *kbdev)
 {
 	const long wait_timeout =
 		kbase_csf_timeout_in_jiffies(GPU_RESET_TIMEOUT_MS);
-	long remaining = wait_event_timeout(kbdev->csf.reset.wait,
-				(atomic_read(&kbdev->csf.reset.state) ==
-					KBASE_CSF_RESET_GPU_NOT_PENDING) ||
-				(atomic_read(&kbdev->csf.reset.state) ==
-					KBASE_CSF_RESET_GPU_FAILED),
-				wait_timeout);
+	long remaining;
+
+	/* Inform lockdep we might be trying to wait on a reset (as
+	 * would've been done with down_read() - which has no 'timeout'
+	 * variant), then use wait_event_timeout() to implement the timed
+	 * wait.
+	 *
+	 * in CONFIG_PROVE_LOCKING builds, this should catch potential 'time
+	 * bound' deadlocks such as:
+	 * - incorrect lock order with respect to others locks
+	 * - current thread has prevented reset
+	 * - current thread is executing the reset worker
+	 */
+	might_lock_read(&kbdev->csf.reset.sem);
+
+	remaining = wait_event_timeout(
+		kbdev->csf.reset.wait,
+		(atomic_read(&kbdev->csf.reset.state) ==
+		 KBASE_CSF_RESET_GPU_NOT_PENDING) ||
+			(atomic_read(&kbdev->csf.reset.state) ==
+			 KBASE_CSF_RESET_GPU_FAILED),
+		wait_timeout);
 
 	if (!remaining) {
 		dev_warn(kbdev->dev, "Timed out waiting for the GPU reset to complete");
@@ -345,6 +618,7 @@ int kbase_reset_gpu_init(struct kbase_device *kbdev)
 	INIT_WORK(&kbdev->csf.reset.work, kbase_csf_reset_gpu_worker);
 
 	init_waitqueue_head(&kbdev->csf.reset.wait);
+	init_rwsem(&kbdev->csf.reset.sem);
 
 	return 0;
 }
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c
index a3017a7f25ba..f7a20d5f6678 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <mali_kbase.h>
@@ -25,20 +24,16 @@
 #include <mali_kbase_ctx_sched.h>
 #include <mali_kbase_reset_gpu.h>
 #include <mali_kbase_as_fault_debugfs.h>
-#include <mali_kbase_bits.h>
 #include "mali_kbase_csf.h"
 #include "../tl/mali_kbase_tracepoints.h"
 #include "backend/gpu/mali_kbase_pm_internal.h"
 #include <linux/export.h>
-#include "mali_gpu_csf_registers.h"
-#include <mali_base_kernel.h>
+#include <uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h>
+#include <uapi/gpu/arm/bifrost/mali_base_kernel.h>
 
 /* Value to indicate that a queue group is not groups_to_schedule list */
 #define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX)
 
-/* Waiting timeout for status change acknowledgment, in milliseconds */
-#define CSF_STATE_WAIT_TIMEOUT_MS (800) /* Relaxed to 800ms from 100ms */
-
 /* Waiting timeout for scheduler state change for descheduling a CSG */
 #define CSG_SCHED_STOP_TIMEOUT_MS (50)
 
@@ -52,8 +47,6 @@
 
 /* CSF scheduler time slice value */
 #define CSF_SCHEDULER_TIME_TICK_MS (100) /* 100 milliseconds */
-#define CSF_SCHEDULER_TIME_TICK_JIFFIES \
-	msecs_to_jiffies(CSF_SCHEDULER_TIME_TICK_MS)
 
 /*
  * CSF scheduler time threshold for converting "tock" requests into "tick" if
@@ -76,17 +69,16 @@
  */
 #define CSF_SCHEDULER_TIME_TOCK_JIFFIES 1 /* 1 jiffies-time */
 
-/* Command stream suspended and is idle (empty ring buffer) */
+/* CS suspended and is idle (empty ring buffer) */
 #define CS_IDLE_FLAG (1 << 0)
 
-/* Command stream suspended and is wait for a CQS condition */
+/* CS suspended and is wait for a CQS condition */
 #define CS_WAIT_SYNC_FLAG (1 << 1)
 
-/* This is to avoid the immediate power down of GPU when then are no groups
- * left for scheduling. GPUCORE-24250 would add the proper GPU idle detection
- * logic.
+/* 2 GPU address space slots are reserved for MCU and privileged context for HW
+ * counter dumping. TODO remove the slot reserved for latter in GPUCORE-26293.
  */
-#define GPU_IDLE_POWEROFF_HYSTERESIS_DELAY msecs_to_jiffies((u32)10)
+#define NUM_RESERVED_AS_SLOTS (2)
 
 static int scheduler_group_schedule(struct kbase_queue_group *group);
 static void remove_group_from_idle_wait(struct kbase_queue_group *const group);
@@ -102,9 +94,95 @@ static struct kbase_queue_group *get_tock_top_group(
 static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev);
 static int suspend_active_queue_groups(struct kbase_device *kbdev,
 				       unsigned long *slot_mask);
+static void schedule_in_cycle(struct kbase_queue_group *group, bool force);
 
 #define kctx_as_enabled(kctx) (!kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT))
 
+/**
+ * tick_timer_callback() - Callback function for the scheduling tick hrtimer
+ *
+ * @timer: Pointer to the device
+ *
+ * This function will enqueue the scheduling tick work item for immediate
+ * execution, if it has not been queued already.
+ *
+ * Return: enum value to indicate that timer should not be restarted.
+ */
+static enum hrtimer_restart tick_timer_callback(struct hrtimer *timer)
+{
+	struct kbase_device *kbdev = container_of(timer, struct kbase_device,
+						  csf.scheduler.tick_timer);
+
+	kbase_csf_scheduler_advance_tick(kbdev);
+	return HRTIMER_NORESTART;
+}
+
+/**
+ * start_tick_timer() - Start the scheduling tick hrtimer.
+ *
+ * @kbdev: Pointer to the device
+ *
+ * This function will start the scheduling tick hrtimer and is supposed to
+ * be called only from the tick work item function. The tick hrtimer should
+ * should not be active already.
+ */
+static void start_tick_timer(struct kbase_device *kbdev)
+{
+	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+	unsigned long flags;
+
+	lockdep_assert_held(&scheduler->lock);
+
+	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+	WARN_ON(scheduler->tick_timer_active);
+	if (likely(!work_pending(&scheduler->tick_work))) {
+		scheduler->tick_timer_active = true;
+
+		hrtimer_start(&scheduler->tick_timer,
+		    HR_TIMER_DELAY_MSEC(scheduler->csg_scheduling_period_ms),
+		    HRTIMER_MODE_REL);
+	}
+	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+}
+
+/**
+ * cancel_tick_timer() - Cancel the scheduling tick hrtimer
+ *
+ * @kbdev: Pointer to the device
+ */
+static void cancel_tick_timer(struct kbase_device *kbdev)
+{
+	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+	unsigned long flags;
+
+	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+	scheduler->tick_timer_active = false;
+	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+	hrtimer_cancel(&scheduler->tick_timer);
+}
+
+/**
+ * enqueue_tick_work() - Enqueue the scheduling tick work item
+ *
+ * @kbdev: Pointer to the device
+ *
+ * This function will queue the scheduling tick work item for immediate
+ * execution. This shall only be called when both the tick hrtimer and tick
+ * work item are not active/pending.
+ */
+static void enqueue_tick_work(struct kbase_device *kbdev)
+{
+	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+	unsigned long flags;
+
+	lockdep_assert_held(&scheduler->lock);
+
+	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+	WARN_ON(scheduler->tick_timer_active);
+	queue_work(scheduler->wq, &scheduler->tick_work);
+	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+}
+
 static void release_doorbell(struct kbase_device *kbdev, int doorbell_nr)
 {
 	WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL);
@@ -176,7 +254,7 @@ static void assign_user_doorbell_to_queue(struct kbase_device *kbdev,
 	mutex_lock(&kbdev->csf.reg_lock);
 
 	/* If bind operation for the queue hasn't completed yet, then the
-	 * the command stream interface can't be programmed for the queue
+	 * the CSI can't be programmed for the queue
 	 * (even in stopped state) and so the doorbell also can't be assigned
 	 * to it.
 	 */
@@ -225,8 +303,7 @@ static u32 get_nr_active_csgs(struct kbase_device *kbdev)
 /**
  * csgs_active - returns true if any of CSG slots are in use
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  *
  * Return: the interface is actively engaged flag.
  */
@@ -238,7 +315,7 @@ bool csgs_active(struct kbase_device *kbdev)
 	nr_active_csgs = get_nr_active_csgs(kbdev);
 	mutex_unlock(&kbdev->csf.scheduler.lock);
 
-	/* Right now if any of the command stream group interfaces are in use
+	/* Right now if any of the CSG interfaces are in use
 	 * then we need to assume that there is some work pending.
 	 * In future when we have IDLE notifications from firmware implemented
 	 * then we would have a better idea of the pending work.
@@ -250,8 +327,7 @@ bool csgs_active(struct kbase_device *kbdev)
  * csg_slot_in_use - returns true if a queue group has been programmed on a
  *                   given CSG slot.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  * @slot:  Index/number of the CSG slot in question.
  *
  * Return: the interface is actively engaged flag.
@@ -296,6 +372,45 @@ static bool queue_group_scheduled_locked(struct kbase_queue_group *group)
 	return queue_group_scheduled(group);
 }
 
+/**
+ * scheduler_wait_protm_quit() - Wait for GPU to exit protected mode.
+ *
+ * @kbdev: Pointer to the GPU device
+ *
+ * This function waits for the GPU to exit protected mode which is confirmed
+ * when active_protm_grp is set to NULL.
+ */
+static void scheduler_wait_protm_quit(struct kbase_device *kbdev)
+{
+	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+	long wt = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
+	long remaining;
+
+	lockdep_assert_held(&scheduler->lock);
+
+	remaining = wait_event_timeout(kbdev->csf.event_wait,
+			!kbase_csf_scheduler_protected_mode_in_use(kbdev), wt);
+
+	if (!remaining)
+		dev_warn(kbdev->dev, "Timeout, protm_quit wait skipped");
+}
+
+/**
+ * scheduler_force_protm_exit() - Force GPU to exit protected mode.
+ *
+ * @kbdev: Pointer to the GPU device
+ *
+ * This function sends a ping request to the firmware and waits for the GPU
+ * to exit protected mode.
+ */
+static void scheduler_force_protm_exit(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+	kbase_csf_firmware_ping(kbdev);
+	scheduler_wait_protm_quit(kbdev);
+}
+
 /**
  * scheduler_timer_is_enabled_nolock() - Check if the scheduler wakes up
  * automatically for periodic tasks.
@@ -314,6 +429,54 @@ static bool scheduler_timer_is_enabled_nolock(struct kbase_device *kbdev)
 	return kbdev->csf.scheduler.timer_enabled;
 }
 
+static void enable_gpu_idle_fw_timer(struct kbase_device *kbdev)
+{
+	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+	unsigned long flags;
+
+	lockdep_assert_held(&scheduler->lock);
+
+	if (scheduler->gpu_idle_fw_timer_enabled)
+		return;
+
+	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+
+	/* Update the timer_enabled flag requires holding interrupt_lock */
+	scheduler->gpu_idle_fw_timer_enabled = true;
+	kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
+
+	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+}
+
+static void disable_gpu_idle_fw_timer_locked(struct kbase_device *kbdev)
+{
+	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+
+	lockdep_assert_held(&scheduler->lock);
+	lockdep_assert_held(&scheduler->interrupt_lock);
+
+	/* Update of the timer_enabled flag requires holding interrupt_lock */
+	if (scheduler->gpu_idle_fw_timer_enabled) {
+		scheduler->gpu_idle_fw_timer_enabled = false;
+		kbase_csf_firmware_disable_gpu_idle_timer(kbdev);
+	}
+}
+
+static void disable_gpu_idle_fw_timer(struct kbase_device *kbdev)
+{
+	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+	unsigned long flags;
+
+	lockdep_assert_held(&scheduler->lock);
+
+	if (!scheduler->gpu_idle_fw_timer_enabled)
+		return;
+
+	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+	disable_gpu_idle_fw_timer_locked(kbdev);
+	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+}
+
 static void scheduler_wakeup(struct kbase_device *kbdev, bool kick)
 {
 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
@@ -321,7 +484,7 @@ static void scheduler_wakeup(struct kbase_device *kbdev, bool kick)
 	lockdep_assert_held(&scheduler->lock);
 
 	if (scheduler->state == SCHED_SUSPENDED) {
-		dev_info(kbdev->dev, "Re-activating the Scheduler");
+		dev_dbg(kbdev->dev, "Re-activating the Scheduler");
 		kbase_csf_scheduler_pm_active(kbdev);
 		scheduler->state = SCHED_INACTIVE;
 
@@ -367,14 +530,20 @@ static void update_idle_suspended_group_state(struct kbase_queue_group *group)
 		remove_group_from_idle_wait(group);
 		insert_group_to_runnable(scheduler, group,
 					 KBASE_CSF_GROUP_SUSPENDED);
-	} else {
-		if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE)
-			group->run_state = KBASE_CSF_GROUP_SUSPENDED;
-		else
-			return;
-	}
+	} else if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE) {
+		group->run_state = KBASE_CSF_GROUP_SUSPENDED;
 
-	atomic_inc(&scheduler->non_idle_suspended_grps);
+		/* If scheduler is not suspended and the given group's
+		 * static priority (reflected by the scan_seq_num) is inside
+		 * the current tick slot-range, schedules an async tock.
+		 */
+		if (scheduler->state != SCHED_SUSPENDED &&
+		    group->scan_seq_num < scheduler->num_csg_slots_for_tick)
+			schedule_in_cycle(group, true);
+	} else
+		return;
+
+	atomic_inc(&scheduler->non_idle_offslot_grps);
 }
 
 int kbase_csf_scheduler_group_get_slot_locked(struct kbase_queue_group *group)
@@ -456,8 +625,8 @@ static int halt_stream_sync(struct kbase_queue *queue)
 	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
 	struct kbase_csf_cmd_stream_group_info *ginfo;
 	struct kbase_csf_cmd_stream_info *stream;
-	long remaining =
-		kbase_csf_timeout_in_jiffies(CSF_STATE_WAIT_TIMEOUT_MS);
+	int csi_index = queue->csi_index;
+	long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
 
 	if (WARN_ON(!group) ||
 	    WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
@@ -465,7 +634,7 @@ static int halt_stream_sync(struct kbase_queue *queue)
 
 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
 	ginfo = &global_iface->groups[group->csg_nr];
-	stream = &ginfo->streams[queue->csi_index];
+	stream = &ginfo->streams[csi_index];
 
 	if (CS_REQ_STATE_GET(kbase_csf_firmware_cs_input_read(stream, CS_REQ)) ==
 			CS_REQ_STATE_START) {
@@ -476,15 +645,15 @@ static int halt_stream_sync(struct kbase_queue *queue)
 
 		if (!remaining) {
 			dev_warn(kbdev->dev, "Timed out waiting for queue to start on csi %d bound to group %d on slot %d",
-				queue->csi_index, group->handle, group->csg_nr);
-			if (kbase_prepare_to_reset_gpu(kbdev))
+				 csi_index, group->handle, group->csg_nr);
+			if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
 				kbase_reset_gpu(kbdev);
 
 			return -ETIMEDOUT;
 		}
 
 		remaining =
-			kbase_csf_timeout_in_jiffies(CSF_STATE_WAIT_TIMEOUT_MS);
+			kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
 	}
 
 	/* Set state to STOP */
@@ -492,7 +661,7 @@ static int halt_stream_sync(struct kbase_queue *queue)
 					 CS_REQ_STATE_MASK);
 
 	KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP_REQUESTED, group, queue, 0u);
-	kbase_csf_ring_cs_kernel_doorbell(kbdev, queue);
+	kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr, true);
 
 	/* Timed wait */
 	remaining = wait_event_timeout(kbdev->csf.event_wait,
@@ -502,7 +671,11 @@ static int halt_stream_sync(struct kbase_queue *queue)
 	if (!remaining) {
 		dev_warn(kbdev->dev, "Timed out waiting for queue to stop on csi %d bound to group %d on slot %d",
 			 queue->csi_index, group->handle, group->csg_nr);
-		if (kbase_prepare_to_reset_gpu(kbdev))
+
+		/* TODO GPUCORE-25328: The CSG can't be terminated, the GPU
+		 * will be reset as a work-around.
+		 */
+		if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
 			kbase_reset_gpu(kbdev);
 	}
 	return (remaining) ? 0 : -ETIMEDOUT;
@@ -536,7 +709,7 @@ static bool can_halt_stream(struct kbase_device *kbdev,
  * @queue: Pointer to the GPU queue to stop.
  *
  * This function handles stopping gpu queues for groups that are either not on
- * a command stream group slot or are on the slot but undergoing transition to
+ * a CSG slot or are on the slot but undergoing transition to
  * resume or suspend states.
  * It waits until the queue group is scheduled on a slot and starts running,
  * which is needed as groups that were suspended may need to resume all queues
@@ -576,26 +749,6 @@ static int sched_halt_stream(struct kbase_queue *queue)
 		}
 	}
 retry:
-	/* First wait for the group to reach a stable state. IDLE state is
-	 * an intermediate state that is only set by Scheduler at the start
-	 * of a tick (prior to scanout) for groups that received idle
-	 * notification, then later the idle group is moved to one of the
-	 * suspended states or the runnable state.
-	 */
-	while (group->run_state == KBASE_CSF_GROUP_IDLE) {
-		mutex_unlock(&scheduler->lock);
-		remaining = wait_event_timeout(kbdev->csf.event_wait,
-				group->run_state != KBASE_CSF_GROUP_IDLE,
-				CSF_STATE_WAIT_TIMEOUT_MS);
-		mutex_lock(&scheduler->lock);
-		if (!remaining) {
-			dev_warn(kbdev->dev,
-				 "Timed out waiting for state change of Group-%d when stopping a queue on csi %d",
-				 group->handle, queue->csi_index);
-		}
-	}
-
-	WARN_ON(group->run_state == KBASE_CSF_GROUP_IDLE);
 	/* Update the group state so that it can get scheduled soon */
 	update_idle_suspended_group_state(group);
 
@@ -618,9 +771,10 @@ retry:
 	 * CSF context is locked. Therefore, the scheduler would be
 	 * the only one to update the run_state of the group.
 	 */
-	remaining = wait_event_timeout(kbdev->csf.event_wait,
-		can_halt_stream(kbdev, group),
-		kbase_csf_timeout_in_jiffies(20 * CSF_SCHEDULER_TIME_TICK_MS));
+	remaining = wait_event_timeout(
+		kbdev->csf.event_wait, can_halt_stream(kbdev, group),
+		kbase_csf_timeout_in_jiffies(
+			20 * kbdev->csf.scheduler.csg_scheduling_period_ms));
 
 	mutex_lock(&scheduler->lock);
 
@@ -628,14 +782,14 @@ retry:
 		slot = kbase_csf_scheduler_group_get_slot(group);
 
 		/* If the group is still on slot and slot is in running state
-		 * then explicitly stop the command stream interface of the
+		 * then explicitly stop the CSI of the
 		 * queue. Otherwise there are different cases to consider
 		 *
 		 * - If the queue group was already undergoing transition to
 		 *   resume/start state when this function was entered then it
-		 *   would not have disabled the command stream interface of the
+		 *   would not have disabled the CSI of the
 		 *   queue being stopped and the previous wait would have ended
-		 *   once the slot was in a running state with command stream
+		 *   once the slot was in a running state with CS
 		 *   interface still enabled.
 		 *   Now the group is going through another transition either
 		 *   to a suspend state or to a resume state (it could have
@@ -643,17 +797,17 @@ retry:
 		 *   In both scenarios need to wait again for the group to
 		 *   come on a slot and that slot to reach the running state,
 		 *   as that would guarantee that firmware will observe the
-		 *   command stream interface as disabled.
+		 *   CSI as disabled.
 		 *
 		 * - If the queue group was either off the slot or was
 		 *   undergoing transition to suspend state on entering this
 		 *   function, then the group would have been resumed with the
-		 *   queue's command stream interface in disabled state.
+		 *   queue's CSI in disabled state.
 		 *   So now if the group is undergoing another transition
 		 *   (after the resume) then just need to wait for the state
-		 *   bits in the ACK register of command stream interface to be
+		 *   bits in the ACK register of CSI to be
 		 *   set to STOP value. It is expected that firmware will
-		 *   process the stop/disable request of the command stream
+		 *   process the stop/disable request of the CS
 		 *   interface after resuming the group before it processes
 		 *   another state change request of the group.
 		 */
@@ -678,9 +832,11 @@ retry:
 				/* Timed wait */
 				remaining = wait_event_timeout(
 					kbdev->csf.event_wait,
-					(CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK))
-					== CS_ACK_STATE_STOP),
-					CSF_STATE_WAIT_TIMEOUT_MS);
+					(CS_ACK_STATE_GET(
+						 kbase_csf_firmware_cs_output(
+							 stream, CS_ACK)) ==
+					 CS_ACK_STATE_STOP),
+					kbdev->csf.fw_timeout_ms);
 
 				if (!remaining) {
 					dev_warn(kbdev->dev,
@@ -700,21 +856,6 @@ retry:
 	return err;
 }
 
-static int wait_gpu_reset(struct kbase_device *kbdev)
-{
-	int ret = 0;
-
-	lockdep_assert_held(&kbdev->csf.scheduler.lock);
-
-	while (kbase_reset_gpu_is_active(kbdev) && !ret) {
-		mutex_unlock(&kbdev->csf.scheduler.lock);
-		ret = kbase_reset_gpu_wait(kbdev);
-		mutex_lock(&kbdev->csf.scheduler.lock);
-	}
-
-	return ret;
-}
-
 int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue)
 {
 	struct kbase_device *kbdev = queue->kctx->kbdev;
@@ -725,14 +866,13 @@ int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue)
 	if (WARN_ON(!group))
 		return -EINVAL;
 
+	kbase_reset_gpu_assert_failed_or_prevented(kbdev);
 	lockdep_assert_held(&queue->kctx->csf.lock);
 	mutex_lock(&kbdev->csf.scheduler.lock);
 
 	queue->enabled = false;
 	KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP, group, queue, cs_enabled);
 
-	wait_gpu_reset(kbdev);
-
 	if (cs_enabled && queue_group_scheduled_locked(group)) {
 		struct kbase_csf_csg_slot *const csg_slot =
 			kbdev->csf.scheduler.csg_slots;
@@ -780,11 +920,12 @@ static void program_cs_extract_init(struct kbase_queue *queue)
 }
 
 static void program_cs(struct kbase_device *kbdev,
-		struct kbase_queue *queue)
+		struct kbase_queue *queue, bool ring_csg_doorbell)
 {
 	struct kbase_queue_group *group = queue->group;
 	struct kbase_csf_cmd_stream_group_info *ginfo;
 	struct kbase_csf_cmd_stream_info *stream;
+	int csi_index = queue->csi_index;
 	u64 user_input;
 	u64 user_output;
 
@@ -798,8 +939,8 @@ static void program_cs(struct kbase_device *kbdev,
 
 	ginfo = &kbdev->csf.global_iface.groups[group->csg_nr];
 
-	if (WARN_ON(queue->csi_index < 0) ||
-	    WARN_ON(queue->csi_index >= ginfo->stream_num))
+	if (WARN_ON(csi_index < 0) ||
+	    WARN_ON(csi_index >= ginfo->stream_num))
 		return;
 
 	assign_user_doorbell_to_queue(kbdev, queue);
@@ -811,7 +952,7 @@ static void program_cs(struct kbase_device *kbdev,
 	if (queue->enabled && queue_group_suspended_locked(group))
 		program_cs_extract_init(queue);
 
-	stream = &ginfo->streams[queue->csi_index];
+	stream = &ginfo->streams[csi_index];
 
 	kbase_csf_firmware_cs_input(stream, CS_BASE_LO,
 				    queue->base_addr & 0xFFFFFFFF);
@@ -839,8 +980,8 @@ static void program_cs(struct kbase_device *kbdev,
 	kbase_csf_firmware_cs_input(stream, CS_ACK_IRQ_MASK, ~((u32)0));
 
 	/*
-	 * Enable the CSG idle notification once the stream's ringbuffer
-	 * becomes empty or the stream becomes sync_idle, waiting sync update
+	 * Enable the CSG idle notification once the CS's ringbuffer
+	 * becomes empty or the CS becomes sync_idle, waiting sync update
 	 * or protected mode switch.
 	 */
 	kbase_csf_firmware_cs_input_mask(stream, CS_REQ,
@@ -854,7 +995,8 @@ static void program_cs(struct kbase_device *kbdev,
 
 	KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_START, group, queue, queue->enabled);
 
-	kbase_csf_ring_cs_kernel_doorbell(kbdev, queue);
+	kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr,
+					  ring_csg_doorbell);
 	update_hw_active(queue, true);
 }
 
@@ -866,6 +1008,7 @@ int kbase_csf_scheduler_queue_start(struct kbase_queue *queue)
 	int err = 0;
 	bool evicted = false;
 
+	kbase_reset_gpu_assert_prevented(kbdev);
 	lockdep_assert_held(&queue->kctx->csf.lock);
 
 	if (WARN_ON(!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND))
@@ -874,12 +1017,8 @@ int kbase_csf_scheduler_queue_start(struct kbase_queue *queue)
 	mutex_lock(&kbdev->csf.scheduler.lock);
 
 	KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_START, group, queue, group->run_state);
-	err = wait_gpu_reset(kbdev);
 
-	if (err) {
-		dev_warn(kbdev->dev, "Unsuccessful GPU reset detected when kicking queue (csi_index=%d) of group %d",
-			 queue->csi_index, group->handle);
-	} else if (group->run_state == KBASE_CSF_GROUP_FAULT_EVICTED) {
+	if (group->run_state == KBASE_CSF_GROUP_FAULT_EVICTED) {
 		err = -EIO;
 		evicted = true;
 	} else if ((group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC)
@@ -907,7 +1046,7 @@ int kbase_csf_scheduler_queue_start(struct kbase_queue *queue)
 					 */
 					kbase_csf_ring_cs_user_doorbell(kbdev, queue);
 				} else
-					program_cs(kbdev, queue);
+					program_cs(kbdev, queue, true);
 			}
 			queue_delayed_work(system_long_wq,
 				&kbdev->csf.scheduler.ping_work,
@@ -1031,7 +1170,7 @@ static void halt_csg_slot(struct kbase_queue_group *group, bool suspend)
 	/* When in transition, wait for it to complete */
 	if (atomic_read(&csg_slot[slot].state) == CSG_SLOT_READY2RUN) {
 		long remaining =
-		      kbase_csf_timeout_in_jiffies(CSF_STATE_WAIT_TIMEOUT_MS);
+			kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
 
 		dev_dbg(kbdev->dev, "slot %d wait for up-running\n", slot);
 		remaining = wait_event_timeout(kbdev->csf.event_wait,
@@ -1085,7 +1224,6 @@ static void suspend_csg_slot(struct kbase_queue_group *group)
  */
 static bool evaluate_sync_update(struct kbase_queue *queue)
 {
-	enum kbase_csf_group_state run_state;
 	struct kbase_vmap_struct *mapping;
 	bool updated = false;
 	u32 *sync_ptr;
@@ -1094,12 +1232,6 @@ static bool evaluate_sync_update(struct kbase_queue *queue)
 	if (WARN_ON(!queue))
 		return false;
 
-	run_state = queue->group->run_state;
-
-	if (WARN_ON((run_state != KBASE_CSF_GROUP_IDLE) &&
-		    (run_state != KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC)))
-		return false;
-
 	lockdep_assert_held(&queue->kctx->kbdev->csf.scheduler.lock);
 
 	sync_ptr = kbase_phy_alloc_mapping_get(queue->kctx, queue->sync_ptr,
@@ -1138,7 +1270,7 @@ static bool evaluate_sync_update(struct kbase_queue *queue)
 /**
  * save_slot_cs() -  Save the state for blocked GPU command queue.
  *
- * @ginfo: Pointer to the command stream group interface used by the group
+ * @ginfo: Pointer to the CSG interface used by the group
  *         the queue is bound to.
  * @queue: Pointer to the GPU command queue.
  *
@@ -1158,8 +1290,6 @@ bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo,
 	u32 status = kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT);
 	bool is_waiting = false;
 
-	WARN_ON(queue->group->run_state != KBASE_CSF_GROUP_IDLE);
-
 	if (CS_STATUS_WAIT_SYNC_WAIT_GET(status)) {
 		queue->status_wait = status;
 		queue->sync_ptr = kbase_csf_firmware_cs_output(stream,
@@ -1169,6 +1299,13 @@ bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo,
 		queue->sync_value = kbase_csf_firmware_cs_output(stream,
 			CS_STATUS_WAIT_SYNC_VALUE);
 
+		queue->sb_status = CS_STATUS_SCOREBOARDS_NONZERO_GET(
+			kbase_csf_firmware_cs_output(stream,
+						     CS_STATUS_SCOREBOARDS));
+		queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_GET(
+			kbase_csf_firmware_cs_output(stream,
+						     CS_STATUS_BLOCKED_REASON));
+
 		if (!evaluate_sync_update(queue)) {
 			is_waiting = true;
 		} else {
@@ -1260,7 +1397,7 @@ void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler,
 
 	WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE);
 
-	if (WARN_ON(group->priority >= BASE_QUEUE_GROUP_PRIORITY_COUNT))
+	if (WARN_ON(group->priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT))
 		return;
 
 	group->run_state = run_state;
@@ -1285,8 +1422,7 @@ void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler,
 	     scheduler->state == SCHED_SUSPENDED)) {
 		dev_dbg(kbdev->dev, "Kicking scheduler on first runnable group\n");
 		/* Fire a scheduling to start the time-slice */
-		mod_delayed_work(kbdev->csf.scheduler.wq,
-				 &kbdev->csf.scheduler.tick_work, 0);
+		enqueue_tick_work(kbdev);
 	} else
 		schedule_in_cycle(group, false);
 
@@ -1343,11 +1479,12 @@ void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler,
 
 	WARN_ON(scheduler->total_runnable_grps == 0);
 	scheduler->total_runnable_grps--;
-	if (!scheduler->total_runnable_grps &&
-	    scheduler->state != SCHED_SUSPENDED) {
-		dev_dbg(kctx->kbdev->dev, "Scheduler idle as no runnable groups");
-		mod_delayed_work(system_wq, &scheduler->gpu_idle_work,
-				 GPU_IDLE_POWEROFF_HYSTERESIS_DELAY);
+	if (!scheduler->total_runnable_grps) {
+		dev_dbg(kctx->kbdev->dev, "Scheduler idle has no runnable groups");
+		cancel_tick_timer(kctx->kbdev);
+		WARN_ON(atomic_read(&scheduler->non_idle_offslot_grps));
+		if (scheduler->state != SCHED_SUSPENDED)
+			queue_work(system_wq, &scheduler->gpu_idle_work);
 	}
 	KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp,
 			scheduler->num_active_address_spaces |
@@ -1396,13 +1533,88 @@ static void deschedule_idle_wait_group(struct kbase_csf_scheduler *scheduler,
 	insert_group_to_idle_wait(group);
 }
 
-static bool confirm_cs_idle(struct kbase_queue *queue)
+static void update_offslot_non_idle_cnt_for_faulty_grp(struct kbase_queue_group *group)
 {
+	struct kbase_device *kbdev = group->kctx->kbdev;
+	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+
+	lockdep_assert_held(&scheduler->lock);
+
+	if (group->prepared_seq_num < scheduler->non_idle_scanout_grps)
+		atomic_dec(&scheduler->non_idle_offslot_grps);
+}
+
+static void update_offslot_non_idle_cnt_for_onslot_grp(struct kbase_queue_group *group)
+{
+	struct kbase_device *kbdev = group->kctx->kbdev;
+	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+
+	lockdep_assert_held(&scheduler->lock);
+
+	WARN_ON(group->csg_nr < 0);
+
+	if (group->prepared_seq_num < scheduler->non_idle_scanout_grps)
+		atomic_dec(&scheduler->non_idle_offslot_grps);
+}
+
+static void update_offslot_non_idle_cnt_on_grp_suspend(
+				struct kbase_queue_group *group)
+{
+	struct kbase_device *kbdev = group->kctx->kbdev;
+	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+
+	lockdep_assert_held(&scheduler->lock);
+
+	if (scheduler->state == SCHED_BUSY) {
+		/* active phase or, async entering the protected mode */
+		if (group->prepared_seq_num >=
+		    scheduler->non_idle_scanout_grps) {
+			/* At scanout, it was tagged as on-slot idle */
+			if (group->run_state == KBASE_CSF_GROUP_SUSPENDED)
+				atomic_inc(&scheduler->non_idle_offslot_grps);
+		} else {
+			if (group->run_state != KBASE_CSF_GROUP_SUSPENDED)
+				atomic_dec(&scheduler->non_idle_offslot_grps);
+		}
+	} else {
+		/* async phases */
+		if (group->run_state == KBASE_CSF_GROUP_SUSPENDED)
+			atomic_inc(&scheduler->non_idle_offslot_grps);
+	}
+}
+
+static bool confirm_cmd_buf_empty(struct kbase_queue *queue)
+{
+	bool cs_empty;
+	bool cs_idle;
+	u32 sb_status = 0;
+
+	struct kbase_device const *const kbdev = queue->group->kctx->kbdev;
+	struct kbase_csf_global_iface const *const iface =
+		&kbdev->csf.global_iface;
+
+	u32 glb_version = iface->version;
+
 	u64 *input_addr = (u64 *)queue->user_io_addr;
 	u64 *output_addr = (u64 *)(queue->user_io_addr + PAGE_SIZE);
 
-	return (input_addr[CS_INSERT_LO / sizeof(u64)] ==
-		output_addr[CS_EXTRACT_LO / sizeof(u64)]);
+	if (glb_version >= kbase_csf_interface_version(1, 0, 0)) {
+		/* CS_STATUS_SCOREBOARD supported from CSF 1.0 */
+		struct kbase_csf_cmd_stream_group_info const *const ginfo =
+			&kbdev->csf.global_iface.groups[queue->group->csg_nr];
+		struct kbase_csf_cmd_stream_info const *const stream =
+			&ginfo->streams[queue->csi_index];
+
+		sb_status = CS_STATUS_SCOREBOARDS_NONZERO_GET(
+			kbase_csf_firmware_cs_output(stream,
+						     CS_STATUS_SCOREBOARDS));
+	}
+
+	cs_empty = (input_addr[CS_INSERT_LO / sizeof(u64)] ==
+		    output_addr[CS_EXTRACT_LO / sizeof(u64)]);
+	cs_idle = cs_empty && (!sb_status);
+
+	return cs_idle;
 }
 
 static void save_csg_slot(struct kbase_queue_group *group)
@@ -1424,48 +1636,58 @@ static void save_csg_slot(struct kbase_queue_group *group)
 
 	if (!WARN_ON((state != CSG_ACK_STATE_SUSPEND) &&
 		     (state != CSG_ACK_STATE_TERMINATE))) {
-		int i;
+		u32 max_streams = ginfo->stream_num;
+		u32 i;
+		bool sync_wait = false;
+		bool idle = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) &
+			    CSG_STATUS_STATE_IDLE_MASK;
 
 #ifdef CONFIG_MALI_BIFROST_NO_MALI
-		for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++)
+		for (i = 0; i < max_streams; i++)
 			update_hw_active(group->bound_queues[i], false);
 #endif
-		if (group->run_state == KBASE_CSF_GROUP_IDLE) {
-			bool sync_wait = false;
-			bool idle = true;
-
-			/* Loop through all bound CSs & save their context */
-			for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) {
-				struct kbase_queue *const queue =
+		for (i = 0; idle && i < max_streams; i++) {
+			struct kbase_queue *const queue =
 					group->bound_queues[i];
 
-				if (queue && queue->enabled) {
-					if (save_slot_cs(ginfo, queue))
-						sync_wait = true;
-					else if (idle)
-						idle = confirm_cs_idle(queue);
-				}
-			}
+			if (!queue || !queue->enabled)
+				continue;
 
+			if (save_slot_cs(ginfo, queue))
+				sync_wait = true;
+			else {
+				/* Need to confirm if ringbuffer of the GPU
+				 * queue is empty or not. A race can arise
+				 * between the flush of GPU queue and suspend
+				 * of CSG. If a queue is flushed after FW has
+				 * set the IDLE bit in CSG_STATUS_STATE, then
+				 * Scheduler will incorrectly consider CSG
+				 * as idle. And there may not be any further
+				 * flush call for the GPU queue, which would
+				 * have de-idled the CSG.
+				 */
+				idle = confirm_cmd_buf_empty(queue);
+			}
+		}
+
+		if (idle) {
 			/* Take the suspended group out of the runnable_groups
 			 * list of the context and move it to the
 			 * idle_wait_groups list.
 			 */
-			if (sync_wait && idle)
+			if (sync_wait)
 				deschedule_idle_wait_group(scheduler, group);
-			else if (idle) {
+			else {
 				group->run_state =
 					KBASE_CSF_GROUP_SUSPENDED_ON_IDLE;
-				dev_dbg(kbdev->dev, "Group-%d suspended: idle\n",
+				dev_dbg(kbdev->dev, "Group-%d suspended: idle",
 					group->handle);
-			} else {
-				group->run_state = KBASE_CSF_GROUP_SUSPENDED;
-				atomic_inc(&scheduler->non_idle_suspended_grps);
 			}
 		} else {
 			group->run_state = KBASE_CSF_GROUP_SUSPENDED;
-			atomic_inc(&scheduler->non_idle_suspended_grps);
 		}
+
+		update_offslot_non_idle_cnt_on_grp_suspend(group);
 	}
 }
 
@@ -1562,10 +1784,14 @@ static void update_csg_slot_priority(struct kbase_queue_group *group, u8 prio)
 	csg_slot = &kbdev->csf.scheduler.csg_slots[slot];
 	ginfo = &kbdev->csf.global_iface.groups[slot];
 
+	/* CSGs remaining on-slot can be either idle or runnable.
+	 * This also applies in protected mode.
+	 */
 	WARN_ON(!((group->run_state == KBASE_CSF_GROUP_RUNNABLE) ||
 		(group->run_state == KBASE_CSF_GROUP_IDLE)));
 
-	group->run_state = KBASE_CSF_GROUP_RUNNABLE;
+	/* Update consumes a group from scanout */
+	update_offslot_non_idle_cnt_for_onslot_grp(group);
 
 	if (csg_slot->priority == prio)
 		return;
@@ -1578,9 +1804,9 @@ static void update_csg_slot_priority(struct kbase_queue_group *group, u8 prio)
 
 	spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
 	csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
-	csg_req ^= CSG_REQ_EP_CFG;
+	csg_req ^= CSG_REQ_EP_CFG_MASK;
 	kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req,
-					  CSG_REQ_EP_CFG);
+					  CSG_REQ_EP_CFG_MASK);
 	spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
 
 	csg_slot->priority = prio;
@@ -1641,8 +1867,8 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
 	mutex_unlock(&kbdev->mmu_hw_mutex);
 
 	if (kctx->as_nr == KBASEP_AS_NR_INVALID) {
-		dev_dbg(kbdev->dev, "Could not get a valid AS for group %d of context %d_%d on slot %d\n",
-			group->handle, kctx->tgid, kctx->id, slot);
+		dev_warn(kbdev->dev, "Could not get a valid AS for group %d of context %d_%d on slot %d\n",
+			 group->handle, kctx->tgid, kctx->id, slot);
 		return;
 	}
 
@@ -1659,7 +1885,7 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
 		struct kbase_queue *queue = group->bound_queues[i];
 
 		if (queue)
-			program_cs(kbdev, queue);
+			program_cs(kbdev, queue, false);
 	}
 
 
@@ -1675,6 +1901,7 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
 	kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_OTHER,
 				     tiler_mask & U32_MAX);
 
+
 	ep_cfg = CSG_EP_REQ_COMPUTE_EP_SET(ep_cfg, compute_max);
 	ep_cfg = CSG_EP_REQ_FRAGMENT_EP_SET(ep_cfg, fragment_max);
 	ep_cfg = CSG_EP_REQ_TILER_EP_SET(ep_cfg, tiler_max);
@@ -1704,16 +1931,13 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
 
 	spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
 	csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
-	csg_req ^= CSG_REQ_EP_CFG;
+	csg_req ^= CSG_REQ_EP_CFG_MASK;
 	kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req,
-					  CSG_REQ_EP_CFG);
+					  CSG_REQ_EP_CFG_MASK);
 
 	/* Set state to START/RESUME */
 	if (queue_group_suspended_locked(group)) {
 		state = CSG_REQ_STATE_RESUME;
-		if (group->run_state == KBASE_CSF_GROUP_SUSPENDED)
-			atomic_dec(
-				&kbdev->csf.scheduler.non_idle_suspended_grps);
 	} else {
 		WARN_ON(group->run_state != KBASE_CSF_GROUP_RUNNABLE);
 		state = CSG_REQ_STATE_START;
@@ -1741,6 +1965,9 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
 				(state & (CSG_REQ_STATE_MASK >> CS_REQ_STATE_SHIFT)));
 
 	kbase_csf_ring_csg_doorbell(kbdev, slot);
+
+	/* Programming a slot consumes a group from scanout */
+	update_offslot_non_idle_cnt_for_onslot_grp(group);
 }
 
 static void remove_scheduled_group(struct kbase_device *kbdev,
@@ -1760,7 +1987,8 @@ static void remove_scheduled_group(struct kbase_device *kbdev,
 	group->kctx->csf.sched.ngrp_to_schedule--;
 }
 
-static void sched_evict_group(struct kbase_queue_group *group, bool fault)
+static void sched_evict_group(struct kbase_queue_group *group, bool fault,
+			      bool update_non_idle_offslot_grps_cnt)
 {
 	struct kbase_context *kctx = group->kctx;
 	struct kbase_device *kbdev = kctx->kbdev;
@@ -1771,8 +1999,10 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault)
 	if (queue_group_scheduled_locked(group)) {
 		u32 i;
 
-		if (group->run_state == KBASE_CSF_GROUP_SUSPENDED)
-			atomic_dec(&scheduler->non_idle_suspended_grps);
+		if (update_non_idle_offslot_grps_cnt &&
+		    (group->run_state == KBASE_CSF_GROUP_SUSPENDED ||
+		     group->run_state == KBASE_CSF_GROUP_RUNNABLE))
+			atomic_dec(&scheduler->non_idle_offslot_grps);
 
 		for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) {
 			if (group->bound_queues[i])
@@ -1808,8 +2038,7 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault)
 static int term_group_sync(struct kbase_queue_group *group)
 {
 	struct kbase_device *kbdev = group->kctx->kbdev;
-	long remaining =
-		kbase_csf_timeout_in_jiffies(CSF_STATE_WAIT_TIMEOUT_MS);
+	long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
 	int err = 0;
 
 	term_csg_slot(group);
@@ -1820,7 +2049,7 @@ static int term_group_sync(struct kbase_queue_group *group)
 	if (!remaining) {
 		dev_warn(kbdev->dev, "term request timed out for group %d on slot %d",
 			 group->handle, group->csg_nr);
-		if (kbase_prepare_to_reset_gpu(kbdev))
+		if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
 			kbase_reset_gpu(kbdev);
 		err = -ETIMEDOUT;
 	}
@@ -1836,25 +2065,24 @@ void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group)
 		kbase_csf_timeout_in_jiffies(CSG_SCHED_STOP_TIMEOUT_MS);
 	bool force = false;
 
+	kbase_reset_gpu_assert_failed_or_prevented(kbdev);
 	lockdep_assert_held(&group->kctx->csf.lock);
 	mutex_lock(&scheduler->lock);
 
 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_DESCHEDULE, group, group->run_state);
 	while (queue_group_scheduled_locked(group)) {
 		u32 saved_state = scheduler->state;
-		bool reset = kbase_reset_gpu_is_active(kbdev);
 
 		if (!kbasep_csf_scheduler_group_is_on_slot_locked(group)) {
-			sched_evict_group(group, false);
-		} else if (reset || saved_state == SCHED_INACTIVE || force) {
+			sched_evict_group(group, false, true);
+		} else if (saved_state == SCHED_INACTIVE || force) {
 			bool as_faulty;
 
-			if (!reset)
-				term_group_sync(group);
+			term_group_sync(group);
 			/* Treat the csg been terminated */
 			as_faulty = cleanup_csg_slot(group);
 			/* remove from the scheduler list */
-			sched_evict_group(group, as_faulty);
+			sched_evict_group(group, as_faulty, false);
 		}
 
 		/* waiting scheduler state to change */
@@ -1890,9 +2118,10 @@ static int scheduler_group_schedule(struct kbase_queue_group *group)
 {
 	struct kbase_context *kctx = group->kctx;
 	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
 
 	lockdep_assert_held(&kctx->csf.lock);
-	lockdep_assert_held(&kbdev->csf.scheduler.lock);
+	lockdep_assert_held(&scheduler->lock);
 
 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SCHEDULE, group, group->run_state);
 	if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC)
@@ -1903,11 +2132,44 @@ static int scheduler_group_schedule(struct kbase_queue_group *group)
 
 		if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE)
 			update_idle_suspended_group_state(group);
-		else
+		else {
+			struct kbase_queue_group *protm_grp;
+			unsigned long flags;
+
+			WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(
+				group));
+
 			group->run_state = KBASE_CSF_GROUP_RUNNABLE;
+
+			/* A normal mode CSG could be idle onslot during
+			 * protected mode. In this case clear the
+			 * appropriate bit in csg_slots_idle_mask.
+			 */
+			spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+			protm_grp = scheduler->active_protm_grp;
+			if (protm_grp && protm_grp != group)
+				clear_bit((unsigned int)group->csg_nr,
+					  scheduler->csg_slots_idle_mask);
+			spin_unlock_irqrestore(&scheduler->interrupt_lock,
+					       flags);
+
+			/* If GPU is in protected mode then any doorbells rang
+			 * would have no effect. Check if GPU is in protected
+			 * mode and if this group has higher priority than the
+			 * active protected mode group. If so prompt the FW
+			 * to exit protected mode.
+			 */
+			if (protm_grp &&
+			    group->scan_seq_num < protm_grp->scan_seq_num) {
+				/* Prompt the FW to exit protected mode */
+				scheduler_force_protm_exit(kbdev);
+			}
+		}
 	} else if (!queue_group_scheduled_locked(group)) {
 		insert_group_to_runnable(&kbdev->csf.scheduler, group,
 			KBASE_CSF_GROUP_RUNNABLE);
+		/* A new group into the scheduler */
+		atomic_inc(&kbdev->csf.scheduler.non_idle_offslot_grps);
 	}
 
 	/* Since a group has become active now, check if GPU needs to be
@@ -1919,21 +2181,22 @@ static int scheduler_group_schedule(struct kbase_queue_group *group)
 }
 
 /**
- * set_max_csg_slots() - Set the number of available command stream group slots
+ * set_max_csg_slots() - Set the number of available CSG slots
  *
  * @kbdev: Pointer of the GPU device.
  *
- * This function would set/limit the number of command stream group slots that
- * can be used in the given tick/tock. It would be less than the total command
- * stream group slots supported by firmware if the number of GPU address space
- * slots required to utilize all the CSG slots is more than the available
+ * This function would set/limit the number of CSG slots that
+ * can be used in the given tick/tock. It would be less than the total CSG
+ * slots supported by firmware if the number of GPU address space slots
+ * required to utilize all the CSG slots is more than the available
  * address space slots.
  */
 static inline void set_max_csg_slots(struct kbase_device *kbdev)
 {
 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
 	unsigned int total_csg_slots = kbdev->csf.global_iface.group_num;
-	unsigned int max_address_space_slots = kbdev->nr_hw_address_spaces - 1;
+	unsigned int max_address_space_slots =
+			kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS;
 
 	WARN_ON(scheduler->num_active_address_spaces > total_csg_slots);
 
@@ -1949,7 +2212,7 @@ static inline void set_max_csg_slots(struct kbase_device *kbdev)
  * @kctx: Pointer of the Kbase context.
  *
  * This function would update the counter that is tracking the number of GPU
- * address space slots that would be required to program the command stream
+ * address space slots that would be required to program the CS
  * group slots from the groups at the head of groups_to_schedule list.
  */
 static inline void count_active_address_space(struct kbase_device *kbdev,
@@ -1957,7 +2220,8 @@ static inline void count_active_address_space(struct kbase_device *kbdev,
 {
 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
 	unsigned int total_csg_slots = kbdev->csf.global_iface.group_num;
-	unsigned int max_address_space_slots = kbdev->nr_hw_address_spaces - 1;
+	unsigned int max_address_space_slots =
+			kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS;
 
 	if (scheduler->ngrp_to_schedule <= total_csg_slots) {
 		if (kctx->csf.sched.ngrp_to_schedule == 1) {
@@ -1970,6 +2234,67 @@ static inline void count_active_address_space(struct kbase_device *kbdev,
 	}
 }
 
+/* Two schemes are used in assigning the priority to CSG slots for a given
+ * CSG from the 'groups_to_schedule' list.
+ * This is needed as an idle on-slot group is deprioritized by moving it to
+ * the tail of 'groups_to_schedule' list. As a result it can either get
+ * evicted from the CSG slot in current tick/tock dealing, or its position
+ * can be after the lower priority non-idle groups in the 'groups_to_schedule'
+ * list. The latter case can result in the on-slot subset containing both
+ * non-idle and idle CSGs, and is handled through the 2nd scheme described
+ * below.
+ *
+ * First scheme :- If all the slots are going to be occupied by the non-idle or
+ * idle groups, then a simple assignment of the priority is done as per the
+ * position of a group in the 'groups_to_schedule' list. So maximum priority
+ * gets assigned to the slot of a group which is at the head of the list.
+ * Here the 'groups_to_schedule' list would effectively be ordered as per the
+ * static priority of groups.
+ *
+ * Second scheme :- If the slots are going to be occupied by a mix of idle and
+ * non-idle groups then the priority assignment needs to ensure that the
+ * priority of a slot belonging to a higher priority idle group will always be
+ * greater than the priority of a slot belonging to a lower priority non-idle
+ * group, reflecting the original position of a group in the scan order (i.e
+ * static priority) 'scan_seq_num', which is set during the prepare phase of a
+ * tick/tock before the group is moved to 'idle_groups_to_schedule' list if it
+ * is idle.
+ * The priority range [MAX_CSG_SLOT_PRIORITY, 0] is partitioned with the first
+ * 'slots_for_tick' groups in the original scan order are assigned a priority in
+ * the subrange [MAX_CSG_SLOT_PRIORITY, MAX_CSG_SLOT_PRIORITY - slots_for_tick),
+ * whereas rest of the groups are assigned the priority in the subrange
+ * [MAX_CSG_SLOT_PRIORITY - slots_for_tick, 0]. This way even if an idle higher
+ * priority group ends up after the non-idle lower priority groups in the
+ * 'groups_to_schedule' list, it will get a higher slot priority. And this will
+ * enable the FW to quickly start the execution of higher priority group when it
+ * gets de-idled.
+ */
+static u8 get_slot_priority(struct kbase_queue_group *group)
+{
+	struct kbase_csf_scheduler *scheduler =
+		&group->kctx->kbdev->csf.scheduler;
+	u8 slot_prio;
+	u32 slots_for_tick = scheduler->num_csg_slots_for_tick;
+	u32 used_slots = slots_for_tick - scheduler->remaining_tick_slots;
+	/* Check if all the slots are going to be occupied by the non-idle or
+	 * idle groups.
+	 */
+	if (scheduler->non_idle_scanout_grps >= slots_for_tick ||
+	    !scheduler->non_idle_scanout_grps) {
+		slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY - used_slots);
+	} else {
+		/* There will be a mix of idle and non-idle groups. */
+		if (group->scan_seq_num < slots_for_tick)
+			slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY -
+					 group->scan_seq_num);
+		else if (MAX_CSG_SLOT_PRIORITY > (slots_for_tick + used_slots))
+			slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY - (slots_for_tick + used_slots));
+		else
+			slot_prio = 0;
+	}
+	return slot_prio;
+}
+
 /**
  * update_resident_groups_priority() - Update the priority of resident groups
  *
@@ -1979,7 +2304,7 @@ static inline void count_active_address_space(struct kbase_device *kbdev,
  * that are at the head of groups_to_schedule list, preceding the first
  * non-resident group.
  *
- * This function will also adjust kbase_csf_scheduler.head_slot_priority on
+ * This function will also adjust kbase_csf_scheduler.remaining_tick_slots on
  * the priority update.
  */
 static void update_resident_groups_priority(struct kbase_device *kbdev)
@@ -2000,11 +2325,11 @@ static void update_resident_groups_priority(struct kbase_device *kbdev)
 			break;
 
 		update_csg_slot_priority(group,
-					 scheduler->head_slot_priority);
+					 get_slot_priority(group));
 
 		/* Drop the head group from the list */
 		remove_scheduled_group(kbdev, group);
-		scheduler->head_slot_priority--;
+		scheduler->remaining_tick_slots--;
 	}
 }
 
@@ -2012,14 +2337,14 @@ static void update_resident_groups_priority(struct kbase_device *kbdev)
  * program_group_on_vacant_csg_slot() - Program a non-resident group on the
  *                                      given vacant CSG slot.
  * @kbdev:    Pointer to the GPU device.
- * @slot:     Vacant command stream group slot number.
+ * @slot:     Vacant CSG slot number.
  *
  * This function will program a non-resident group at the head of
- * kbase_csf_scheduler.groups_to_schedule list on the given vacant command
- * stream group slot, provided the initial position of the non-resident
+ * kbase_csf_scheduler.groups_to_schedule list on the given vacant
+ * CSG slot, provided the initial position of the non-resident
  * group in the list is less than the number of CSG slots and there is
  * an available GPU address space slot.
- * kbase_csf_scheduler.head_slot_priority would also be adjusted after
+ * kbase_csf_scheduler.remaining_tick_slots would also be adjusted after
  * programming the slot.
  */
 static void program_group_on_vacant_csg_slot(struct kbase_device *kbdev,
@@ -2039,17 +2364,19 @@ static void program_group_on_vacant_csg_slot(struct kbase_device *kbdev,
 
 		if (!WARN_ON(ret)) {
 			if (kctx_as_enabled(group->kctx) && !group->faulted) {
-				program_csg_slot(group,
-					 slot,
-					 scheduler->head_slot_priority);
+				program_csg_slot(group, slot,
+					get_slot_priority(group));
 
 				if (likely(csg_slot_in_use(kbdev, slot))) {
 					/* Drop the head group from the list */
 					remove_scheduled_group(kbdev, group);
-					scheduler->head_slot_priority--;
+					scheduler->remaining_tick_slots--;
 				}
-			} else
+			} else {
+				update_offslot_non_idle_cnt_for_faulty_grp(
+					group);
 				remove_scheduled_group(kbdev, group);
+			}
 		}
 	}
 }
@@ -2059,15 +2386,15 @@ static void program_group_on_vacant_csg_slot(struct kbase_device *kbdev,
  *                             group and update the priority of resident groups.
  *
  * @kbdev:    Pointer to the GPU device.
- * @slot:     Vacant command stream group slot number.
+ * @slot:     Vacant CSG slot number.
  *
  * This function will first update the priority of all resident queue groups
  * that are at the head of groups_to_schedule list, preceding the first
- * non-resident group, it will then try to program the given command stream
+ * non-resident group, it will then try to program the given CS
  * group slot with the non-resident group. Finally update the priority of all
  * resident queue groups following the non-resident group.
  *
- * kbase_csf_scheduler.head_slot_priority would also be adjusted.
+ * kbase_csf_scheduler.remaining_tick_slots would also be adjusted.
  */
 static void program_vacant_csg_slot(struct kbase_device *kbdev, s8 slot)
 {
@@ -2121,12 +2448,12 @@ static bool slots_state_changed(struct kbase_device *kbdev,
  * @kbdev:    Pointer to the GPU device.
  *
  * This function will first wait for the ongoing suspension to complete on a
- * command stream group slot and will then program the vacant slot with the
+ * CSG slot and will then program the vacant slot with the
  * non-resident queue group inside the groups_to_schedule list.
  * The programming of the non-resident queue group on the vacant slot could
  * fail due to unavailability of free GPU address space slot and so the
  * programming is re-attempted after the ongoing suspension has completed
- * for all the command stream group slots.
+ * for all the CSG slots.
  * The priority of resident groups before and after the non-resident group
  * in the groups_to_schedule list would also be updated.
  * This would be repeated for all the slots undergoing suspension.
@@ -2139,11 +2466,13 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev)
 	DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS);
 	DECLARE_BITMAP(evicted_mask, MAX_SUPPORTED_CSGS) = {0};
 	bool suspend_wait_failed = false;
-	long remaining =
-		kbase_csf_timeout_in_jiffies(CSF_STATE_WAIT_TIMEOUT_MS);
+	long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
 
 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
 
+	/* In the current implementation, csgs_events_enable_mask would be used
+	 * only to indicate suspending CSGs.
+	 */
 	bitmap_complement(slot_mask, scheduler->csgs_events_enable_mask,
 		MAX_SUPPORTED_CSGS);
 
@@ -2180,7 +2509,7 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev)
 					as_fault = cleanup_csg_slot(group);
 					/* If AS fault detected, evict it */
 					if (as_fault) {
-						sched_evict_group(group, true);
+						sched_evict_group(group, true, true);
 						set_bit(i, evicted_mask);
 					}
 				}
@@ -2188,13 +2517,51 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev)
 				program_vacant_csg_slot(kbdev, (s8)i);
 			}
 		} else {
-			dev_warn(kbdev->dev, "Timed out waiting for CSG slots to suspend, slot_mask: 0x%*pb\n",
-				 num_groups, slot_mask);
+			u32 i;
+
+			/* Groups that have failed to suspend in time shall
+			 * raise a fatal error as they could no longer be
+			 * safely resumed.
+			 */
+			for_each_set_bit(i, slot_mask, num_groups) {
+				struct kbase_queue_group *const group =
+					scheduler->csg_slots[i].resident_group;
+
+				struct base_gpu_queue_group_error const
+					err_payload = { .error_type =
+								BASE_GPU_QUEUE_GROUP_ERROR_FATAL,
+							.payload = {
+								.fatal_group = {
+									.status =
+										GPU_EXCEPTION_TYPE_SW_FAULT_2,
+								} } };
+
+				if (unlikely(group == NULL))
+					continue;
+
+				kbase_csf_add_group_fatal_error(group,
+								&err_payload);
+				kbase_event_wakeup(group->kctx);
+
+				/* TODO GPUCORE-25328: The CSG can't be
+				 * terminated, the GPU will be reset as a
+				 * work-around.
+				 */
+				dev_warn(
+					kbdev->dev,
+					"Group %pK on slot %u failed to suspend\n",
+					(void *)group, i);
+
+				/* The group has failed suspension, stop
+				 * further examination.
+				 */
+				clear_bit(i, slot_mask);
+				set_bit(i, scheduler->csgs_events_enable_mask);
+				update_offslot_non_idle_cnt_for_onslot_grp(
+					group);
+			}
 
-			if (kbase_prepare_to_reset_gpu(kbdev))
-				kbase_reset_gpu(kbdev);
 			suspend_wait_failed = true;
-			break;
 		}
 	}
 
@@ -2202,20 +2569,24 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev)
 		dev_info(kbdev->dev, "Scheduler evicting slots: 0x%*pb\n",
 			 num_groups, evicted_mask);
 
-	if (unlikely(!suspend_wait_failed)) {
+	if (likely(!suspend_wait_failed)) {
 		u32 i;
 
 		while (scheduler->ngrp_to_schedule &&
-			(scheduler->head_slot_priority > (MAX_CSG_SLOT_PRIORITY
-				- scheduler->num_csg_slots_for_tick))) {
+		       scheduler->remaining_tick_slots) {
 			i = find_first_zero_bit(scheduler->csg_inuse_bitmap,
 					num_groups);
 			if (WARN_ON(i == num_groups))
 				break;
 			program_vacant_csg_slot(kbdev, (s8)i);
-			if (WARN_ON(!csg_slot_in_use(kbdev, (int)i)))
+			if (!csg_slot_in_use(kbdev, (int)i)) {
+				dev_warn(kbdev->dev, "Couldn't use CSG slot %d despite being vacant", i);
 				break;
+			}
 		}
+	} else {
+		if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
+			kbase_reset_gpu(kbdev);
 	}
 }
 
@@ -2226,6 +2597,11 @@ static void suspend_queue_group(struct kbase_queue_group *group)
 		&group->kctx->kbdev->csf.scheduler;
 
 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+	/* This shall be used in program_suspending_csg_slots() where we
+	 * assume that whilst CSGs are being suspended, this bitmask is not
+	 * used by anything else i.e., it indicates only the CSGs going
+	 * through suspension.
+	 */
 	clear_bit(group->csg_nr, scheduler->csgs_events_enable_mask);
 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
 
@@ -2240,8 +2616,7 @@ static void wait_csg_slots_start(struct kbase_device *kbdev)
 {
 	u32 num_groups = kbdev->csf.global_iface.group_num;
 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
-	long remaining =
-		kbase_csf_timeout_in_jiffies(CSF_STATE_WAIT_TIMEOUT_MS);
+	long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
 	DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0};
 	u32 i;
 
@@ -2276,7 +2651,7 @@ static void wait_csg_slots_start(struct kbase_device *kbdev)
 			dev_warn(kbdev->dev, "Timed out waiting for CSG slots to start, slots: 0x%*pb\n",
 				 num_groups, slot_mask);
 
-			if (kbase_prepare_to_reset_gpu(kbdev))
+			if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
 				kbase_reset_gpu(kbdev);
 			break;
 		}
@@ -2284,48 +2659,32 @@ static void wait_csg_slots_start(struct kbase_device *kbdev)
 }
 
 /**
- * group_on_slot_is_idle() - Check if the queue group resident on a command
- *                           stream group slot is idle.
+ * group_on_slot_is_idle() - Check if the given slot has a CSG-idle state
+ *                           flagged after the completion of a CSG status
+ *                           update command
  *
  * This function is called at the start of scheduling tick to check the
- * idle status of a queue group resident on a command sream group slot.
- * The group's idleness is determined by looping over all the bound command
- * queues and checking their respective CS_STATUS_WAIT register as well as
- * the insert and extract offsets.
-
- * This function would be simplified in future after the changes under
- * consideration with MIDHARC-3065 are introduced.
+ * idle status of a queue group resident on a CSG slot.
+ * The caller must make sure the corresponding status update command has
+ * been called and completed before checking this status.
  *
  * @kbdev:  Pointer to the GPU device.
- * @group:  Pointer to the resident group on the given slot.
- * @slot:   The slot that the given group is resident on.
+ * @slot:   The given slot for checking an occupying resident group's idle
+ *          state.
  *
  * Return: true if the group resident on slot is idle, otherwise false.
  */
 static bool group_on_slot_is_idle(struct kbase_device *kbdev,
-			struct kbase_queue_group *group, unsigned long slot)
+				  unsigned long slot)
 {
 	struct kbase_csf_cmd_stream_group_info *ginfo =
 					&kbdev->csf.global_iface.groups[slot];
-	u32 i;
+	bool idle = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) &
+			CSG_STATUS_STATE_IDLE_MASK;
 
 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
-	for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) {
-		struct kbase_queue *queue = group->bound_queues[i];
 
-		if (queue && queue->enabled) {
-			struct kbase_csf_cmd_stream_info *stream =
-					&ginfo->streams[queue->csi_index];
-			u32 status = kbase_csf_firmware_cs_output(stream,
-							CS_STATUS_WAIT);
-
-			if (!CS_STATUS_WAIT_SYNC_WAIT_GET(status) &&
-			    !confirm_cs_idle(group->bound_queues[i]))
-				return false;
-		}
-	}
-
-	return true;
+	return idle;
 }
 
 /**
@@ -2421,8 +2780,7 @@ static void wait_csg_slots_finish_prio_update(struct kbase_device *kbdev)
 {
 	unsigned long *slot_mask =
 			kbdev->csf.scheduler.csg_slots_prio_update;
-	long wait_time =
-		kbase_csf_timeout_in_jiffies(CSF_STATE_WAIT_TIMEOUT_MS);
+	long wait_time = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
 	int ret = wait_csg_slots_handshake_ack(kbdev, CSG_REQ_EP_CFG_MASK,
 					       slot_mask, wait_time);
 
@@ -2433,8 +2791,10 @@ static void wait_csg_slots_finish_prio_update(struct kbase_device *kbdev)
 		 * issue, no major consequences are expected as a
 		 * result, so just warn the case.
 		 */
-		dev_warn(kbdev->dev, "Timeout, skipping the update wait: slot mask=0x%lx",
-			 slot_mask[0]);
+		dev_warn(
+			kbdev->dev,
+			"Timeout on CSG_REQ:EP_CFG, skipping the update wait: slot mask=0x%lx",
+			slot_mask[0]);
 	}
 }
 
@@ -2446,18 +2806,27 @@ void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev,
 	u32 num_groups = kbdev->csf.global_iface.group_num;
 	u32 slot;
 	DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0};
-	DECLARE_BITMAP(terminated_slot_mask, MAX_SUPPORTED_CSGS);
-	long remaining =
-		kbase_csf_timeout_in_jiffies(DEFAULT_RESET_TIMEOUT_MS);
 
 	lockdep_assert_held(&kctx->csf.lock);
 	mutex_lock(&scheduler->lock);
 
+	/* This code is only called during reset, so we don't wait for the CSG
+	 * slots to be stopped
+	 */
+	WARN_ON(!kbase_reset_gpu_is_active(kbdev));
+
 	KBASE_KTRACE_ADD(kbdev, EVICT_CTX_SLOTS, kctx, 0u);
 	for (slot = 0; slot < num_groups; slot++) {
 		group = kbdev->csf.scheduler.csg_slots[slot].resident_group;
 		if (group && group->kctx == kctx) {
+			bool as_fault;
+
 			term_csg_slot(group);
+			as_fault = cleanup_csg_slot(group);
+			/* remove the group from the scheduler list */
+			sched_evict_group(group, as_fault, false);
+			/* return the evicted group to the caller */
+			list_add_tail(&group->link, evicted_groups);
 			set_bit(slot, slot_mask);
 		}
 	}
@@ -2465,48 +2834,6 @@ void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev,
 	dev_info(kbdev->dev, "Evicting context %d_%d slots: 0x%*pb\n",
 			kctx->tgid, kctx->id, num_groups, slot_mask);
 
-	bitmap_copy(terminated_slot_mask, slot_mask, MAX_SUPPORTED_CSGS);
-	/* Only check for GPU reset once - this thread has the scheduler lock,
-	 * so even if the return value of kbase_reset_gpu_is_active changes,
-	 * no reset work would be done anyway until the scheduler lock was
-	 * released.
-	 */
-	if (!kbase_reset_gpu_is_active(kbdev)) {
-		while (remaining
-			&& !bitmap_empty(slot_mask, MAX_SUPPORTED_CSGS)) {
-			DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS);
-
-			bitmap_copy(changed, slot_mask, MAX_SUPPORTED_CSGS);
-
-			remaining = wait_event_timeout(kbdev->csf.event_wait,
-				slots_state_changed(kbdev, changed,
-					csg_slot_stopped_raw),
-				remaining);
-
-			if (remaining)
-				bitmap_andnot(slot_mask, slot_mask, changed,
-					MAX_SUPPORTED_CSGS);
-		}
-	}
-
-	for_each_set_bit(slot, terminated_slot_mask, num_groups) {
-		bool as_fault;
-
-		group = scheduler->csg_slots[slot].resident_group;
-		as_fault = cleanup_csg_slot(group);
-		/* remove the group from the scheduler list */
-		sched_evict_group(group, as_fault);
-		/* return the evicted group to the caller */
-		list_add_tail(&group->link, evicted_groups);
-	}
-
-	if (!remaining) {
-		dev_warn(kbdev->dev, "Timeout on evicting ctx slots: 0x%*pb\n",
-				num_groups, slot_mask);
-		if (kbase_prepare_to_reset_gpu(kbdev))
-			kbase_reset_gpu(kbdev);
-	}
-
 	mutex_unlock(&scheduler->lock);
 }
 
@@ -2606,17 +2933,17 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
 
 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
 
-	/* Firmware samples the PROTM_PEND ACK bit for command streams when
+	/* Firmware samples the PROTM_PEND ACK bit for CSs when
 	 * Host sends PROTM_ENTER global request. So if PROTM_PEND ACK bit
-	 * is set for a command stream after Host has sent the PROTM_ENTER
+	 * is set for a CS after Host has sent the PROTM_ENTER
 	 * Global request, then there is no guarantee that firmware will
 	 * notice that prior to switching to protected mode. And firmware
-	 * may not again raise the PROTM_PEND interrupt for that command
-	 * stream later on. To avoid that uncertainty PROTM_PEND ACK bit
-	 * is not set for a command stream if the request to enter protected
+	 * may not again raise the PROTM_PEND interrupt for that CS
+	 * later on. To avoid that uncertainty PROTM_PEND ACK bit
+	 * is not set for a CS if the request to enter protected
 	 * mode has already been sent. It will be set later (after the exit
 	 * from protected mode has taken place) when the group to which
-	 * command stream is bound becomes the top group.
+	 * CS is bound becomes the top group.
 	 *
 	 * The actual decision of entering protected mode is hinging on the
 	 * input group is the top priority group, or, in case the previous
@@ -2647,9 +2974,13 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
 					 * GPUCORE-21394.
 					 */
 
+					/* Disable the idle timer */
+					disable_gpu_idle_fw_timer_locked(kbdev);
+
 					/* Switch to protected mode */
 					scheduler->active_protm_grp = input_grp;
 					KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_ENTER_PROTM, input_grp, 0u);
+
 					spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
 					kbase_csf_enter_protected_mode(kbdev);
 					return;
@@ -2688,6 +3019,9 @@ static void scheduler_apply(struct kbase_device *kbdev)
 		}
 	}
 
+	/* Initialize the remaining avialable csg slots for the tick/tock */
+	scheduler->remaining_tick_slots = available_csg_slots;
+
 	/* If there are spare slots, apply heads in the list */
 	spare = (available_csg_slots > resident_cnt) ?
 		(available_csg_slots - resident_cnt) : 0;
@@ -2700,7 +3034,7 @@ static void scheduler_apply(struct kbase_device *kbdev)
 		    group->prepared_seq_num < available_csg_slots) {
 			/* One of the resident remainders */
 			update_csg_slot_priority(group,
-						scheduler->head_slot_priority);
+					get_slot_priority(group));
 		} else if (spare != 0) {
 			s8 slot = (s8)find_first_zero_bit(
 				     kbdev->csf.scheduler.csg_inuse_bitmap,
@@ -2711,11 +3045,13 @@ static void scheduler_apply(struct kbase_device *kbdev)
 
 			if (!kctx_as_enabled(group->kctx) || group->faulted) {
 				/* Drop the head group and continue */
+				update_offslot_non_idle_cnt_for_faulty_grp(
+					group);
 				remove_scheduled_group(kbdev, group);
 				continue;
 			}
 			program_csg_slot(group, slot,
-					 scheduler->head_slot_priority);
+					 get_slot_priority(group));
 			if (unlikely(!csg_slot_in_use(kbdev, slot)))
 				break;
 
@@ -2725,8 +3061,8 @@ static void scheduler_apply(struct kbase_device *kbdev)
 
 		/* Drop the head csg from the list */
 		remove_scheduled_group(kbdev, group);
-		if (scheduler->head_slot_priority)
-			scheduler->head_slot_priority--;
+		if (!WARN_ON(!scheduler->remaining_tick_slots))
+			scheduler->remaining_tick_slots--;
 	}
 
 	/* Dealing with groups currently going through suspend */
@@ -2741,7 +3077,7 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev,
 
 	lockdep_assert_held(&scheduler->lock);
 	if (WARN_ON(priority < 0) ||
-	    WARN_ON(priority >= BASE_QUEUE_GROUP_PRIORITY_COUNT))
+	    WARN_ON(priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT))
 		return;
 
 	if (!kctx_as_enabled(kctx))
@@ -2756,6 +3092,9 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev,
 		if (unlikely(group->faulted))
 			continue;
 
+		/* Set the scanout sequence number, starting from 0 */
+		group->scan_seq_num = scheduler->csg_scan_count_for_tick++;
+
 		if (queue_group_idle_locked(group)) {
 			list_add_tail(&group->link_to_schedule,
 				      &scheduler->idle_groups_to_schedule);
@@ -2872,20 +3211,20 @@ static void scheduler_rotate_ctxs(struct kbase_device *kbdev)
 }
 
 /**
- * scheduler_update_idle_slots_status() - Get the status update for the command
- *                       stream group slots for which the IDLE notification was
- *                       received previously.
+ * scheduler_update_idle_slots_status() - Get the status update for the CSG
+ *                       slots for which the IDLE notification was received
+ *                        previously.
  *
- * This function sends a CSG status update request for all the command stream
- * group slots present in the bitmap scheduler->csg_slots_idle_mask and wait
- * for the request to complete.
+ * This function sends a CSG status update request for all the CSG slots
+ * present in the bitmap scheduler->csg_slots_idle_mask and wait for the
+ * request to complete.
  * The bits set in the scheduler->csg_slots_idle_mask bitmap are cleared by
  * this function.
  *
  * @kbdev:             Pointer to the GPU device.
- * @csg_bitmap:        Bitmap of the command stream group slots for which
+ * @csg_bitmap:        Bitmap of the CSG slots for which
  *                     the status update request completed successfully.
- * @failed_csg_bitmap: Bitmap of the command stream group slots for which
+ * @failed_csg_bitmap: Bitmap of the CSG slots for which
  *                     the status update request timedout.
  */
 static void scheduler_update_idle_slots_status(struct kbase_device *kbdev,
@@ -2924,38 +3263,40 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev,
 	/* The groups are aggregated into a single kernel doorbell request */
 	if (!bitmap_empty(csg_bitmap, num_groups)) {
 		long wt =
-		       kbase_csf_timeout_in_jiffies(CSF_STATE_WAIT_TIMEOUT_MS);
+			kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
 		u32 db_slots = (u32)csg_bitmap[0];
 
 		kbase_csf_ring_csg_slots_doorbell(kbdev, db_slots);
 
 		if (wait_csg_slots_handshake_ack(kbdev,
 				CSG_REQ_STATUS_UPDATE_MASK, csg_bitmap, wt)) {
-			dev_warn(kbdev->dev, "Timeout, treat groups as not idle: slot mask=0x%lx",
-				 csg_bitmap[0]);
+			dev_warn(
+				kbdev->dev,
+				"Timeout on CSG_REQ:STATUS_UPDATE, treat groups as not idle: slot mask=0x%lx",
+				csg_bitmap[0]);
 
 			/* Store the bitmap of timed out slots */
 			bitmap_copy(failed_csg_bitmap, csg_bitmap, num_groups);
 			csg_bitmap[0] = ~csg_bitmap[0] & db_slots;
 		} else {
-                       csg_bitmap[0] = db_slots;
+			csg_bitmap[0] = db_slots;
 		}
 	}
 }
 
 /**
  * scheduler_handle_idle_slots() - Update the idle status of queue groups
- *                    resident on command stream group slots for which the
+ *                    resident on CSG slots for which the
  *                    IDLE notification was received previously.
  *
  * This function is called at the start of scheduling tick/tock to reconfirm
- * the idle status of queue groups resident on command sream group slots for
+ * the idle status of queue groups resident on CSG slots for
  * which idle notification was received previously, i.e. all the CSG slots
  * present in the bitmap scheduler->csg_slots_idle_mask.
  * The confirmation is done by sending the CSG status update request to the
- * firmware. The idleness of a CSG is determined by looping over all the
- * bound command streams and checking their respective CS_STATUS_WAIT register
- * as well as the insert and extract offset.
+ * firmware. On completion, the firmware will mark the idleness at the
+ * slot's interface CSG_STATUS_STATE register accordingly.
+ *
  * The run state of the groups resident on still idle CSG slots is changed to
  * KBASE_CSF_GROUP_IDLE and the bitmap scheduler->csg_slots_idle_mask is
  * updated accordingly.
@@ -2986,15 +3327,17 @@ static void scheduler_handle_idle_slots(struct kbase_device *kbdev)
 			continue;
 		if (WARN_ON(!group))
 			continue;
-		if (WARN_ON(group->run_state != KBASE_CSF_GROUP_RUNNABLE))
+		if (WARN_ON(group->run_state != KBASE_CSF_GROUP_RUNNABLE &&
+					group->run_state != KBASE_CSF_GROUP_IDLE))
 			continue;
-		if (WARN_ON(group->priority >= BASE_QUEUE_GROUP_PRIORITY_COUNT))
+		if (WARN_ON(group->priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT))
 			continue;
 
-		if (group_on_slot_is_idle(kbdev, group, i)) {
+		if (group_on_slot_is_idle(kbdev, i)) {
 			group->run_state = KBASE_CSF_GROUP_IDLE;
 			set_bit(i, scheduler->csg_slots_idle_mask);
-		}
+		} else
+			group->run_state = KBASE_CSF_GROUP_RUNNABLE;
 	}
 
 	bitmap_or(scheduler->csg_slots_idle_mask,
@@ -3046,7 +3389,7 @@ static struct kbase_queue_group *get_tock_top_group(
 	int i;
 
 	lockdep_assert_held(&scheduler->lock);
-	for (i = 0; i < BASE_QUEUE_GROUP_PRIORITY_COUNT; ++i) {
+	for (i = 0; i < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++i) {
 		list_for_each_entry(kctx,
 			&scheduler->runnable_kctxs, csf.link) {
 			struct kbase_queue_group *group;
@@ -3080,7 +3423,7 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
 		dev_warn(kbdev->dev, "Timed out waiting for CSG slots to suspend on power down, slot_mask: 0x%*pb\n",
 			 kbdev->csf.global_iface.group_num, slot_mask);
 
-		if (kbase_prepare_to_reset_gpu(kbdev))
+		if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
 			kbase_reset_gpu(kbdev);
 
 		if (is_suspend) {
@@ -3094,30 +3437,73 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
 	/* Check if the groups became active whilst the suspend was ongoing,
 	 * but only for the case where the system suspend is not in progress
 	 */
-	if (!is_suspend && atomic_read(&scheduler->non_idle_suspended_grps))
+	if (!is_suspend && atomic_read(&scheduler->non_idle_offslot_grps))
 		return -1;
 
 	return 0;
 }
 
+static bool scheduler_idle_suspendable(struct kbase_device *kbdev)
+{
+	bool suspend;
+	unsigned long flags;
+	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+
+	lockdep_assert_held(&scheduler->lock);
+
+	if  (scheduler->state == SCHED_SUSPENDED)
+		return false;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	if (scheduler->total_runnable_grps) {
+		spin_lock(&scheduler->interrupt_lock);
+
+		/* Check both on-slots and off-slots groups idle status */
+		suspend = kbase_csf_scheduler_all_csgs_idle(kbdev) &&
+			  !atomic_read(&scheduler->non_idle_offslot_grps) &&
+			  kbase_pm_idle_groups_sched_suspendable(kbdev);
+
+		spin_unlock(&scheduler->interrupt_lock);
+	} else
+		suspend = kbase_pm_no_runnables_sched_suspendable(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return suspend;
+}
+
 static void gpu_idle_worker(struct work_struct *work)
 {
 	struct kbase_device *kbdev = container_of(
-		work, struct kbase_device, csf.scheduler.gpu_idle_work.work);
+		work, struct kbase_device, csf.scheduler.gpu_idle_work);
 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
 
+	if (kbase_reset_gpu_try_prevent(kbdev)) {
+		dev_warn(kbdev->dev, "Quit idle for failing to prevent gpu reset.\n");
+		return;
+	}
 	mutex_lock(&scheduler->lock);
 
-	if (!scheduler->total_runnable_grps) {
-		if (scheduler->state != SCHED_SUSPENDED) {
+	/* Cycle completed, disable the firmware idle timer */
+	disable_gpu_idle_fw_timer(kbdev);
+	if (scheduler_idle_suspendable(kbdev) &&
+	    !kbase_reset_gpu_is_active(kbdev)) {
+		int ret = suspend_active_groups_on_powerdown(kbdev, false);
+
+		if (!ret) {
+			dev_dbg(kbdev->dev, "Scheduler becomes idle suspended now");
 			scheduler_suspend(kbdev);
-			dev_info(kbdev->dev, "Scheduler now suspended");
+			cancel_tick_timer(kbdev);
+		} else {
+			dev_dbg(kbdev->dev, "Aborting suspend scheduler (grps: %d)",
+				atomic_read(&scheduler->non_idle_offslot_grps));
+			/* Bring forward the next tick */
+			kbase_csf_scheduler_advance_tick(kbdev);
 		}
-	} else {
-		dev_dbg(kbdev->dev, "Scheduler couldn't be suspended");
 	}
 
 	mutex_unlock(&scheduler->lock);
+	kbase_reset_gpu_allow(kbdev);
 }
 
 static int scheduler_prepare(struct kbase_device *kbdev)
@@ -3142,21 +3528,37 @@ static int scheduler_prepare(struct kbase_device *kbdev)
 		scheduler->ngrp_to_schedule = 0;
 	scheduler->top_ctx = NULL;
 	scheduler->top_grp = NULL;
-	scheduler->head_slot_priority = MAX_CSG_SLOT_PRIORITY;
+	scheduler->csg_scan_count_for_tick = 0;
 	WARN_ON(!list_empty(&scheduler->idle_groups_to_schedule));
 	scheduler->num_active_address_spaces = 0;
 	scheduler->num_csg_slots_for_tick = 0;
 	bitmap_zero(scheduler->csg_slots_prio_update, MAX_SUPPORTED_CSGS);
 
 	/* Scan out to run groups */
-	for (i = 0; i < BASE_QUEUE_GROUP_PRIORITY_COUNT; ++i) {
+	for (i = 0; i < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++i) {
 		struct kbase_context *kctx;
 
 		list_for_each_entry(kctx, &scheduler->runnable_kctxs, csf.link)
 			scheduler_ctx_scan_groups(kbdev, kctx, i);
 	}
 
+	/* Update this tick's non-idle groups */
+	scheduler->non_idle_scanout_grps = scheduler->ngrp_to_schedule;
+
+	/* Initial number of non-idle off-slot groups, before the scheduler's
+	 * scheduler_apply() operation. This gives a sensible start point view
+	 * of the tick. It will be subject to up/downs during the scheduler
+	 * active phase.
+	 */
+	atomic_set(&scheduler->non_idle_offslot_grps,
+		   scheduler->non_idle_scanout_grps);
+
+	/* Adds those idle but runnable groups to the scanout list */
 	scheduler_scan_idle_groups(kbdev);
+
+	/* After adding the idle CSGs, the two counts should be the same */
+	WARN_ON(scheduler->csg_scan_count_for_tick != scheduler->ngrp_to_schedule);
+
 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp,
 			scheduler->num_active_address_spaces |
 			(((u64)scheduler->ngrp_to_schedule) << 32));
@@ -3166,19 +3568,29 @@ static int scheduler_prepare(struct kbase_device *kbdev)
 	return 0;
 }
 
-static void scheduler_wait_protm_quit(struct kbase_device *kbdev)
+static void scheduler_handle_idle_timer_onoff(struct kbase_device *kbdev)
 {
-	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
-	long wt = kbase_csf_timeout_in_jiffies(CSF_STATE_WAIT_TIMEOUT_MS);
-	long remaining;
+	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
 
 	lockdep_assert_held(&scheduler->lock);
 
-	remaining = wait_event_timeout(kbdev->csf.event_wait,
-			!kbase_csf_scheduler_protected_mode_in_use(kbdev), wt);
-
-	if (!remaining)
-		dev_warn(kbdev->dev, "Timeout, protm_quit wait skipped");
+	/* After the scheduler apply operation, the internal variable
+	 * scheduler->non_idle_offslot_grps reflects the end-point view
+	 * of the count at the end of the active phase.
+	 *
+	 * Any changes that follow (after the scheduler has dropped the
+	 * scheduler->lock), reflects async operations to the scheduler,
+	 * such as a group gets killed (evicted) or a new group inserted,
+	 * cqs wait-sync triggered state transtion etc.
+	 *
+	 * The condition for enable the idle timer is that there is no
+	 * non-idle groups off-slots. If there is non-idle group off-slot,
+	 * the timer should be disabled.
+	 */
+	if (atomic_read(&scheduler->non_idle_offslot_grps))
+		disable_gpu_idle_fw_timer(kbdev);
+	else
+		enable_gpu_idle_fw_timer(kbdev);
 }
 
 static void schedule_actions(struct kbase_device *kbdev)
@@ -3187,7 +3599,10 @@ static void schedule_actions(struct kbase_device *kbdev)
 	unsigned long flags;
 	struct kbase_queue_group *protm_grp;
 	int ret;
+	bool skip_idle_slots_update;
+	bool new_protm_top_grp = false;
 
+	kbase_reset_gpu_assert_prevented(kbdev);
 	lockdep_assert_held(&scheduler->lock);
 
 	ret = kbase_pm_wait_for_desired_state(kbdev);
@@ -3196,7 +3611,14 @@ static void schedule_actions(struct kbase_device *kbdev)
 		return;
 	}
 
-	scheduler_handle_idle_slots(kbdev);
+	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+	skip_idle_slots_update = kbase_csf_scheduler_protected_mode_in_use(kbdev);
+	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+
+	/* Skip updating on-slot idle CSGs if GPU is in protected mode. */
+	if (!skip_idle_slots_update)
+		scheduler_handle_idle_slots(kbdev);
+
 	scheduler_prepare(kbdev);
 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
 	protm_grp = scheduler->active_protm_grp;
@@ -3214,6 +3636,7 @@ static void schedule_actions(struct kbase_device *kbdev)
 	if (protm_grp && scheduler->top_grp == protm_grp) {
 		dev_dbg(kbdev->dev, "Scheduler keep protm exec: group-%d",
 			protm_grp->handle);
+		atomic_dec(&scheduler->non_idle_offslot_grps);
 	} else if (scheduler->top_grp) {
 		if (protm_grp)
 			dev_dbg(kbdev->dev, "Scheduler drop protm exec: group-%d",
@@ -3226,28 +3649,34 @@ static void schedule_actions(struct kbase_device *kbdev)
 				scheduler->top_grp->kctx->tgid,
 				scheduler->top_grp->kctx->id);
 
-			/* Due to GPUCORE-24491 only the top-group is allowed
-			 * to be on slot and all other on slot groups have to
-			 * be suspended before entering protected mode.
-			 * This would change in GPUCORE-24492.
+			/* When entering protected mode all CSG slots can be occupied
+			 * but only the protected mode CSG will be running. Any event
+			 * that would trigger the execution of an on-slot idle CSG will
+			 * need to be handled by the host during protected mode.
 			 */
-			scheduler->num_csg_slots_for_tick = 1;
+			new_protm_top_grp = true;
 		}
 
 		spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
 
 		scheduler_apply(kbdev);
+
+		/* Post-apply, all the committed groups in this tick are on
+		 * slots, time to arrange the idle timer on/off decision.
+		 */
+		scheduler_handle_idle_timer_onoff(kbdev);
+
 		/* Scheduler is dropping the exec of the previous protm_grp,
 		 * Until the protm quit completes, the GPU is effectively
 		 * locked in the secure mode.
 		 */
 		if (protm_grp)
-			scheduler_wait_protm_quit(kbdev);
+			scheduler_force_protm_exit(kbdev);
 
 		wait_csg_slots_start(kbdev);
 		wait_csg_slots_finish_prio_update(kbdev);
 
-		if (scheduler->num_csg_slots_for_tick == 1) {
+		if (new_protm_top_grp) {
 			scheduler_group_check_protm_enter(kbdev,
 						scheduler->top_grp);
 		}
@@ -3265,13 +3694,16 @@ static void schedule_on_tock(struct work_struct *work)
 					csf.scheduler.tock_work.work);
 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
 
-	mutex_lock(&scheduler->lock);
-
-	if (kbase_reset_gpu_is_active(kbdev) ||
-	    (scheduler->state == SCHED_SUSPENDED)) {
-		mutex_unlock(&scheduler->lock);
+	int err = kbase_reset_gpu_try_prevent(kbdev);
+	/* Regardless of whether reset failed or is currently happening, exit
+	 * early
+	 */
+	if (err)
 		return;
-	}
+
+	mutex_lock(&scheduler->lock);
+	if (scheduler->state == SCHED_SUSPENDED)
+		goto exit_no_schedule_unlock;
 
 	WARN_ON(!(scheduler->state == SCHED_INACTIVE));
 	scheduler->state = SCHED_BUSY;
@@ -3288,28 +3720,38 @@ static void schedule_on_tock(struct work_struct *work)
 
 	scheduler->state = SCHED_INACTIVE;
 	mutex_unlock(&scheduler->lock);
+	kbase_reset_gpu_allow(kbdev);
 
 	dev_dbg(kbdev->dev,
 		"Waking up for event after schedule-on-tock completes.");
 	wake_up_all(&kbdev->csf.event_wait);
+	return;
+
+exit_no_schedule_unlock:
+	mutex_unlock(&scheduler->lock);
+	kbase_reset_gpu_allow(kbdev);
 }
 
 static void schedule_on_tick(struct work_struct *work)
 {
 	struct kbase_device *kbdev = container_of(work, struct kbase_device,
-					csf.scheduler.tick_work.work);
+					csf.scheduler.tick_work);
 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
 
+	int err = kbase_reset_gpu_try_prevent(kbdev);
+	/* Regardless of whether reset failed or is currently happening, exit
+	 * early
+	 */
+	if (err)
+		return;
+
 	mutex_lock(&scheduler->lock);
 
-	if (kbase_reset_gpu_is_active(kbdev) ||
-	    (scheduler->state == SCHED_SUSPENDED)) {
-		mutex_unlock(&scheduler->lock);
-		return;
-	}
+	WARN_ON(scheduler->tick_timer_active);
+	if (scheduler->state == SCHED_SUSPENDED)
+		goto exit_no_schedule_unlock;
 
 	scheduler->state = SCHED_BUSY;
-
 	/* Do scheduling stuff */
 	scheduler_rotate(kbdev);
 
@@ -3323,17 +3765,23 @@ static void schedule_on_tick(struct work_struct *work)
 	/* Kicking next scheduling if needed */
 	if (likely(scheduler_timer_is_enabled_nolock(kbdev)) &&
 			(scheduler->total_runnable_grps > 0)) {
-		mod_delayed_work(scheduler->wq, &scheduler->tick_work,
-				  CSF_SCHEDULER_TIME_TICK_JIFFIES);
-		dev_dbg(kbdev->dev, "scheduling for next tick, num_runnable_groups:%u\n",
+		start_tick_timer(kbdev);
+		dev_dbg(kbdev->dev,
+			"scheduling for next tick, num_runnable_groups:%u\n",
 			scheduler->total_runnable_grps);
 	}
 
 	scheduler->state = SCHED_INACTIVE;
 	mutex_unlock(&scheduler->lock);
+	kbase_reset_gpu_allow(kbdev);
 
 	dev_dbg(kbdev->dev, "Waking up for event after schedule-on-tick completes.");
 	wake_up_all(&kbdev->csf.event_wait);
+	return;
+
+exit_no_schedule_unlock:
+	mutex_unlock(&scheduler->lock);
+	kbase_reset_gpu_allow(kbdev);
 }
 
 int wait_csg_slots_suspend(struct kbase_device *kbdev,
@@ -3381,7 +3829,7 @@ int wait_csg_slots_suspend(struct kbase_device *kbdev,
 					 */
 					save_csg_slot(group);
 					if (cleanup_csg_slot(group))
-						sched_evict_group(group, true);
+						sched_evict_group(group, true, true);
 				}
 			}
 		} else {
@@ -3414,8 +3862,7 @@ static int suspend_active_queue_groups(struct kbase_device *kbdev,
 		}
 	}
 
-	ret = wait_csg_slots_suspend(kbdev, slot_mask,
-			CSG_SUSPEND_ON_RESET_WAIT_TIMEOUT_MS);
+	ret = wait_csg_slots_suspend(kbdev, slot_mask, kbdev->reset_timeout_ms);
 	return ret;
 }
 
@@ -3424,31 +3871,35 @@ static int suspend_active_queue_groups_on_reset(struct kbase_device *kbdev)
 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
 	DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = { 0 };
 	int ret;
+	int ret2;
 
 	mutex_lock(&scheduler->lock);
 
 	ret = suspend_active_queue_groups(kbdev, slot_mask);
+
 	if (ret) {
 		dev_warn(kbdev->dev, "Timed out waiting for CSG slots to suspend before reset, slot_mask: 0x%*pb\n",
 			 kbdev->csf.global_iface.group_num, slot_mask);
 	}
 
-	if (!bitmap_empty(slot_mask, MAX_SUPPORTED_CSGS)) {
-		int ret2;
-
-		/* Need to flush the GPU cache to ensure suspend buffer
-		 * contents are not lost on reset of GPU.
-		 * Do this even if suspend operation had timedout for some of
-		 * the CSG slots.
-		 */
-		kbase_gpu_start_cache_clean(kbdev);
-		ret2 = kbase_gpu_wait_cache_clean_timeout(kbdev,
-				DEFAULT_RESET_TIMEOUT_MS);
-		if (ret2) {
-			dev_warn(kbdev->dev, "Timed out waiting for cache clean to complete before reset");
-			if (!ret)
-				ret = ret2;
-		}
+	/* Need to flush the GPU cache to ensure suspend buffer
+	 * contents are not lost on reset of GPU.
+	 * Do this even if suspend operation had timed out for some of
+	 * the CSG slots.
+	 * In case the scheduler already in suspended state, the
+	 * cache clean is required as the async reset request from
+	 * the debugfs may race against the scheduler suspend operation
+	 * due to the extra context ref-count, which prevents the
+	 * L2 powering down cache clean operation in the non racing
+	 * case.
+	 */
+	kbase_gpu_start_cache_clean(kbdev);
+	ret2 = kbase_gpu_wait_cache_clean_timeout(kbdev,
+			kbdev->reset_timeout_ms);
+	if (ret2) {
+		dev_warn(kbdev->dev, "Timed out waiting for cache clean to complete before reset");
+		if (!ret)
+			ret = ret2;
 	}
 
 	mutex_unlock(&scheduler->lock);
@@ -3465,7 +3916,9 @@ static void scheduler_inner_reset(struct kbase_device *kbdev)
 	WARN_ON(csgs_active(kbdev));
 
 	/* Cancel any potential queued delayed work(s) */
-	cancel_delayed_work_sync(&scheduler->tick_work);
+	cancel_work_sync(&kbdev->csf.scheduler.gpu_idle_work);
+	cancel_tick_timer(kbdev);
+	cancel_work_sync(&scheduler->tick_work);
 	cancel_delayed_work_sync(&scheduler->tock_work);
 	cancel_delayed_work_sync(&scheduler->ping_work);
 
@@ -3496,8 +3949,7 @@ void kbase_csf_scheduler_reset(struct kbase_device *kbdev)
 	WARN_ON(!kbase_reset_gpu_is_active(kbdev));
 
 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET, NULL, 0u);
-	if (!kbase_csf_scheduler_protected_mode_in_use(kbdev) &&
-	    !suspend_active_queue_groups_on_reset(kbdev)) {
+	if (!suspend_active_queue_groups_on_reset(kbdev)) {
 		/* As all groups have been successfully evicted from the CSG
 		 * slots, clear out thee scheduler data fields and return
 		 */
@@ -3542,10 +3994,23 @@ static void firmware_aliveness_monitor(struct work_struct *work)
 					csf.scheduler.ping_work.work);
 	int err;
 
-	/* Get the scheduler mutex to ensure that reset will not change while
-	 * this function is being executed as otherwise calling kbase_reset_gpu
-	 * when reset is already occurring is a programming error.
+	/* Ensure that reset will not be occurring while this function is being
+	 * executed as otherwise calling kbase_reset_gpu when reset is already
+	 * occurring is a programming error.
+	 *
+	 * We must use the 'try' variant as the Reset worker can try to flush
+	 * this workqueue, which would otherwise deadlock here if we tried to
+	 * wait for the reset (and thus ourselves) to complete.
 	 */
+	err = kbase_reset_gpu_try_prevent(kbdev);
+	if (err) {
+		/* It doesn't matter whether the value was -EAGAIN or a fatal
+		 * error, just stop processing. In case of -EAGAIN, the Reset
+		 * worker will restart the scheduler later to resume ping
+		 */
+		return;
+	}
+
 	mutex_lock(&kbdev->csf.scheduler.lock);
 
 #ifdef CONFIG_MALI_BIFROST_DEBUG
@@ -3558,9 +4023,6 @@ static void firmware_aliveness_monitor(struct work_struct *work)
 	if (kbdev->csf.scheduler.state == SCHED_SUSPENDED)
 		goto exit;
 
-	if (kbase_reset_gpu_is_active(kbdev))
-		goto exit;
-
 	if (get_nr_active_csgs(kbdev) != 1)
 		goto exit;
 
@@ -3575,10 +4037,14 @@ static void firmware_aliveness_monitor(struct work_struct *work)
 
 	kbase_pm_wait_for_desired_state(kbdev);
 
-	err = kbase_csf_firmware_ping(kbdev);
+	err = kbase_csf_firmware_ping_wait(kbdev);
 
 	if (err) {
-		if (kbase_prepare_to_reset_gpu(kbdev))
+		/* It is acceptable to enqueue a reset whilst we've prevented
+		 * them, it will happen after we've allowed them again
+		 */
+		if (kbase_prepare_to_reset_gpu(
+			    kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
 			kbase_reset_gpu(kbdev);
 	} else if (get_nr_active_csgs(kbdev) == 1) {
 		queue_delayed_work(system_long_wq,
@@ -3589,6 +4055,8 @@ static void firmware_aliveness_monitor(struct work_struct *work)
 	kbase_pm_context_idle(kbdev);
 exit:
 	mutex_unlock(&kbdev->csf.scheduler.lock);
+	kbase_reset_gpu_allow(kbdev);
+	return;
 }
 
 int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group,
@@ -3597,18 +4065,12 @@ int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group,
 	struct kbase_context *const kctx = group->kctx;
 	struct kbase_device *const kbdev = kctx->kbdev;
 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
-	int err;
+	int err = 0;
 
+	kbase_reset_gpu_assert_prevented(kbdev);
 	lockdep_assert_held(&kctx->csf.lock);
 	mutex_lock(&scheduler->lock);
 
-	err = wait_gpu_reset(kbdev);
-	if (err) {
-		dev_warn(kbdev->dev, "Error while waiting for the GPU reset to complete when suspending group %d on slot %d",
-			 group->handle, group->csg_nr);
-		goto exit;
-	}
-
 	if (kbasep_csf_scheduler_group_is_on_slot_locked(group)) {
 		DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0};
 
@@ -3617,7 +4079,7 @@ int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group,
 		if (!WARN_ON(scheduler->state == SCHED_SUSPENDED))
 			suspend_queue_group(group);
 		err = wait_csg_slots_suspend(kbdev, slot_mask,
-				CSF_STATE_WAIT_TIMEOUT_MS);
+					     kbdev->csf.fw_timeout_ms);
 		if (err) {
 			dev_warn(kbdev->dev, "Timed out waiting for the group %d to suspend on slot %d",
 					group->handle, group->csg_nr);
@@ -3698,7 +4160,9 @@ static bool group_sync_updated(struct kbase_queue_group *group)
 	bool updated = false;
 	int stream;
 
-	WARN_ON(group->run_state != KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC);
+	/* Groups can also be blocked on-slot during protected mode. */
+	WARN_ON(group->run_state != KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC &&
+		    group->run_state != KBASE_CSF_GROUP_IDLE);
 
 	for (stream = 0; stream < MAX_SUPPORTED_STREAMS_PER_GROUP; ++stream) {
 		struct kbase_queue *const queue = group->bound_queues[stream];
@@ -3790,17 +4254,145 @@ void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group)
 	struct kbase_device *const kbdev = group->kctx->kbdev;
 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
 
+	int err = kbase_reset_gpu_try_prevent(kbdev);
+	/* Regardless of whether reset failed or is currently happening, exit
+	 * early
+	 */
+	if (err)
+		return;
+
 	mutex_lock(&scheduler->lock);
 
-	/* Check if the group is now eligible for execution in protected mode
-	 * and accordingly undertake full scheduling actions as due to
-	 * GPUCORE-24491 the on slot groups other than the top group have to
-	 * be suspended first before entering protected mode.
-	 */
+	/* Check if the group is now eligible for execution in protected mode. */
 	if (scheduler_get_protm_enter_async_group(kbdev, group))
-		schedule_actions(kbdev);
+		scheduler_group_check_protm_enter(kbdev, group);
 
 	mutex_unlock(&scheduler->lock);
+	kbase_reset_gpu_allow(kbdev);
+}
+
+/**
+ * check_sync_update_for_idle_group_protm() - Check the sync wait condition
+ *                                            for all the queues bound to
+ *                                            the given group.
+ *
+ * @group:    Pointer to the group that requires evaluation.
+ *
+ * This function is called if the GPU is in protected mode and there are on
+ * slot idle groups with higher priority than the active protected mode group.
+ * This function will evaluate the sync condition, if any, of all the queues
+ * bound to the given group.
+ *
+ * Return true if the sync condition of at least one queue has been satisfied.
+ */
+static bool check_sync_update_for_idle_group_protm(
+		struct kbase_queue_group *group)
+{
+	struct kbase_device *const kbdev = group->kctx->kbdev;
+	struct kbase_csf_scheduler *const scheduler =
+				&kbdev->csf.scheduler;
+	bool sync_update_done = false;
+	int i;
+
+	lockdep_assert_held(&scheduler->lock);
+
+	for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) {
+		struct kbase_queue *queue = group->bound_queues[i];
+
+		if (queue && queue->enabled && !sync_update_done) {
+			struct kbase_csf_cmd_stream_group_info *const ginfo =
+				&kbdev->csf.global_iface.groups[group->csg_nr];
+			struct kbase_csf_cmd_stream_info *const stream =
+				&ginfo->streams[queue->csi_index];
+			u32 status = kbase_csf_firmware_cs_output(
+					stream, CS_STATUS_WAIT);
+			unsigned long flags;
+
+			if (!CS_STATUS_WAIT_SYNC_WAIT_GET(status))
+				continue;
+
+			/* Save the information of sync object of the command
+			 * queue so the callback function, 'group_sync_updated'
+			 * can evaluate the sync object when it gets updated
+			 * later.
+			 */
+			queue->status_wait = status;
+			queue->sync_ptr = kbase_csf_firmware_cs_output(
+				stream, CS_STATUS_WAIT_SYNC_POINTER_LO);
+			queue->sync_ptr |= (u64)kbase_csf_firmware_cs_output(
+				stream, CS_STATUS_WAIT_SYNC_POINTER_HI) << 32;
+			queue->sync_value = kbase_csf_firmware_cs_output(
+				stream, CS_STATUS_WAIT_SYNC_VALUE);
+
+			if (!evaluate_sync_update(queue))
+				continue;
+
+			/* Update csg_slots_idle_mask and group's run_state */
+			spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+			clear_bit((unsigned int)group->csg_nr,
+					scheduler->csg_slots_idle_mask);
+			spin_unlock_irqrestore(&scheduler->interrupt_lock,
+					       flags);
+			group->run_state = KBASE_CSF_GROUP_RUNNABLE;
+
+			KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u);
+			sync_update_done = true;
+		}
+	}
+
+	return sync_update_done;
+}
+
+/**
+ * check_sync_update_for_idle_groups_protm() - Check the sync wait condition
+ *                                             for the idle groups on slot
+ *                                             during protected mode.
+ *
+ * @kbdev:    Pointer to the GPU device
+ *
+ * This function checks the gpu queues of all the idle groups on slot during
+ * protected mode that has a higher priority than the active protected mode
+ * group.
+ *
+ * Return true if the sync condition of at least one queue in a group has been
+ * satisfied.
+ */
+static bool check_sync_update_for_idle_groups_protm(struct kbase_device *kbdev)
+{
+	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+	struct kbase_queue_group *protm_grp;
+	bool exit_protm = false;
+	unsigned long flags;
+	u32 num_groups;
+	u32 i;
+
+	lockdep_assert_held(&scheduler->lock);
+
+	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+	protm_grp = scheduler->active_protm_grp;
+	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+
+	if (!protm_grp)
+		return exit_protm;
+
+	num_groups = kbdev->csf.global_iface.group_num;
+
+	for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) {
+		struct kbase_csf_csg_slot *csg_slot =
+					&scheduler->csg_slots[i];
+		struct kbase_queue_group *group = csg_slot->resident_group;
+
+		if (group->scan_seq_num < protm_grp->scan_seq_num) {
+			/* If sync update has been performed for the group that
+			 * has a higher priority than the protm group, then we
+			 * need to exit protected mode.
+			 */
+			if (check_sync_update_for_idle_group_protm(group))
+				exit_protm = true;
+		}
+	}
+
+	return exit_protm;
 }
 
 /**
@@ -3810,18 +4402,20 @@ void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group)
  * @work:    Pointer to the context-specific work item for evaluating the wait
  *           condition for all the queue groups in idle_wait_groups list.
  *
- * This function checks the gpu queues of all the groups present in
- * idle_wait_groups list of a context. If the sync wait condition
- * for at least one queue bound to the group has been satisfied then
- * the group is moved to the per context list of runnable groups so
- * that Scheduler can consider scheduling the group in next tick.
+ * This function checks the gpu queues of all the groups present in both
+ * idle_wait_groups list of a context and all on slot idle groups (if GPU
+ * is in protected mode).
+ * If the sync wait condition for at least one queue bound to the group has
+ * been satisfied then the group is moved to the per context list of
+ * runnable groups so that Scheduler can consider scheduling the group
+ * in next tick or exit protected mode.
  */
 static void check_group_sync_update_worker(struct work_struct *work)
 {
 	struct kbase_context *const kctx = container_of(work,
 		struct kbase_context, csf.sched.sync_update_work);
-	struct kbase_csf_scheduler *const scheduler =
-		&kctx->kbdev->csf.scheduler;
+	struct kbase_device *const kbdev = kctx->kbdev;
+	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
 
 	mutex_lock(&scheduler->lock);
 
@@ -3835,13 +4429,16 @@ static void check_group_sync_update_worker(struct work_struct *work)
 				 * groups list of the context.
 				 */
 				update_idle_suspended_group_state(group);
-				KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u);
+				KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u);
 			}
 		}
 	} else {
 		WARN_ON(!list_empty(&kctx->csf.sched.idle_wait_groups));
 	}
 
+	if (check_sync_update_for_idle_groups_protm(kbdev))
+		scheduler_force_protm_exit(kbdev);
+
 	mutex_unlock(&scheduler->lock);
 }
 
@@ -3862,7 +4459,7 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx)
 	int priority;
 	int err;
 
-	for (priority = 0; priority < BASE_QUEUE_GROUP_PRIORITY_COUNT;
+	for (priority = 0; priority < KBASE_QUEUE_GROUP_PRIORITY_COUNT;
 	     ++priority) {
 		INIT_LIST_HEAD(&kctx->csf.sched.runnable_groups[priority]);
 	}
@@ -3930,11 +4527,11 @@ int kbase_csf_scheduler_init(struct kbase_device *kbdev)
 		return -ENOMEM;
 	}
 
-	INIT_DEFERRABLE_WORK(&scheduler->tick_work, schedule_on_tick);
+	INIT_WORK(&scheduler->tick_work, schedule_on_tick);
 	INIT_DEFERRABLE_WORK(&scheduler->tock_work, schedule_on_tock);
 
 	INIT_DEFERRABLE_WORK(&scheduler->ping_work, firmware_aliveness_monitor);
-	BUILD_BUG_ON(GLB_REQ_WAIT_TIMEOUT_MS >= FIRMWARE_PING_INTERVAL_MS);
+	BUILD_BUG_ON(CSF_FIRMWARE_TIMEOUT_MS >= FIRMWARE_PING_INTERVAL_MS);
 
 	mutex_init(&scheduler->lock);
 	spin_lock_init(&scheduler->interrupt_lock);
@@ -3956,10 +4553,16 @@ int kbase_csf_scheduler_init(struct kbase_device *kbdev)
 	scheduler->last_schedule = 0;
 	scheduler->tock_pending_request = false;
 	scheduler->active_protm_grp = NULL;
+	scheduler->gpu_idle_fw_timer_enabled = false;
+	scheduler->csg_scheduling_period_ms = CSF_SCHEDULER_TIME_TICK_MS;
 	scheduler_doorbell_init(kbdev);
 
-	INIT_DEFERRABLE_WORK(&scheduler->gpu_idle_work, gpu_idle_worker);
-	atomic_set(&scheduler->non_idle_suspended_grps, 0);
+	INIT_WORK(&scheduler->gpu_idle_work, gpu_idle_worker);
+	atomic_set(&scheduler->non_idle_offslot_grps, 0);
+
+	hrtimer_init(&scheduler->tick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	scheduler->tick_timer.function = tick_timer_callback;
+	scheduler->tick_timer_active = false;
 
 	return 0;
 }
@@ -3967,9 +4570,17 @@ int kbase_csf_scheduler_init(struct kbase_device *kbdev)
 void kbase_csf_scheduler_term(struct kbase_device *kbdev)
 {
 	if (kbdev->csf.scheduler.csg_slots) {
+		WARN_ON(atomic_read(&kbdev->csf.scheduler.non_idle_offslot_grps));
 		WARN_ON(csgs_active(kbdev));
-		cancel_delayed_work_sync(&kbdev->csf.scheduler.gpu_idle_work);
+		flush_work(&kbdev->csf.scheduler.gpu_idle_work);
+		mutex_lock(&kbdev->csf.scheduler.lock);
+		if (WARN_ON(kbdev->csf.scheduler.state != SCHED_SUSPENDED))
+			scheduler_suspend(kbdev);
+		mutex_unlock(&kbdev->csf.scheduler.lock);
 		cancel_delayed_work_sync(&kbdev->csf.scheduler.ping_work);
+		cancel_tick_timer(kbdev);
+		cancel_work_sync(&kbdev->csf.scheduler.tick_work);
+		cancel_delayed_work_sync(&kbdev->csf.scheduler.tock_work);
 		destroy_workqueue(kbdev->csf.scheduler.wq);
 		mutex_destroy(&kbdev->csf.scheduler.lock);
 		kfree(kbdev->csf.scheduler.csg_slots);
@@ -3980,8 +4591,7 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev)
 /**
  * scheduler_enable_tick_timer_nolock - Enable the scheduler tick timer.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  *
  * This function will restart the scheduler tick so that regular scheduling can
  * be resumed without any explicit trigger (like kicking of GPU queues). This
@@ -3999,11 +4609,12 @@ static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev)
 
 	WARN_ON((scheduler->state != SCHED_INACTIVE) &&
 		(scheduler->state != SCHED_SUSPENDED));
-	WARN_ON(delayed_work_pending(&scheduler->tick_work));
 
 	if (scheduler->total_runnable_grps > 0) {
-		mod_delayed_work(scheduler->wq, &scheduler->tick_work, 0);
+		enqueue_tick_work(kbdev);
 		dev_dbg(kbdev->dev, "Re-enabling the scheduler timer\n");
+	} else if (scheduler->state != SCHED_SUSPENDED) {
+		queue_work(system_wq, &scheduler->gpu_idle_work);
 	}
 }
 
@@ -4037,16 +4648,19 @@ void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev,
 	currently_enabled = scheduler_timer_is_enabled_nolock(kbdev);
 	if (currently_enabled && !enable) {
 		scheduler->timer_enabled = false;
-
-		cancel_delayed_work(&scheduler->tick_work);
+		cancel_tick_timer(kbdev);
 		cancel_delayed_work(&scheduler->tock_work);
+		mutex_unlock(&scheduler->lock);
+		/* The non-sync version to cancel the normal work item is not
+		 * available, so need to drop the lock before cancellation.
+		 */
+		cancel_work_sync(&scheduler->tick_work);
 	} else if (!currently_enabled && enable) {
 		scheduler->timer_enabled = true;
 
 		scheduler_enable_tick_timer_nolock(kbdev);
+		mutex_unlock(&scheduler->lock);
 	}
-
-	mutex_unlock(&scheduler->lock);
 }
 
 void kbase_csf_scheduler_kick(struct kbase_device *kbdev)
@@ -4059,7 +4673,7 @@ void kbase_csf_scheduler_kick(struct kbase_device *kbdev)
 		goto out;
 
 	if (scheduler->total_runnable_grps > 0) {
-		mod_delayed_work(scheduler->wq, &scheduler->tick_work, 0);
+		enqueue_tick_work(kbdev);
 		dev_dbg(kbdev->dev, "Kicking the scheduler manually\n");
 	}
 
@@ -4072,20 +4686,30 @@ void kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev)
 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
 
 	/* Cancel any potential queued delayed work(s) */
-	cancel_delayed_work_sync(&scheduler->tick_work);
+	cancel_work_sync(&scheduler->tick_work);
 	cancel_delayed_work_sync(&scheduler->tock_work);
 
+	if (kbase_reset_gpu_prevent_and_wait(kbdev)) {
+		dev_warn(kbdev->dev,
+			 "Stop PM suspending for failing to prevent gpu reset.\n");
+		return;
+	}
+
 	mutex_lock(&scheduler->lock);
 
-	WARN_ON(!kbase_pm_is_suspending(kbdev));
+	disable_gpu_idle_fw_timer(kbdev);
 
 	if (scheduler->state != SCHED_SUSPENDED) {
 		suspend_active_groups_on_powerdown(kbdev, true);
 		dev_info(kbdev->dev, "Scheduler PM suspend");
 		scheduler_suspend(kbdev);
+		cancel_tick_timer(kbdev);
 	}
 	mutex_unlock(&scheduler->lock);
+
+	kbase_reset_gpu_allow(kbdev);
 }
+KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_suspend);
 
 void kbase_csf_scheduler_pm_resume(struct kbase_device *kbdev)
 {
@@ -4093,8 +4717,6 @@ void kbase_csf_scheduler_pm_resume(struct kbase_device *kbdev)
 
 	mutex_lock(&scheduler->lock);
 
-	WARN_ON(kbase_pm_is_suspending(kbdev));
-
 	if (scheduler->total_runnable_grps > 0) {
 		WARN_ON(scheduler->state != SCHED_SUSPENDED);
 		dev_info(kbdev->dev, "Scheduler PM resume");
@@ -4102,6 +4724,7 @@ void kbase_csf_scheduler_pm_resume(struct kbase_device *kbdev)
 	}
 	mutex_unlock(&scheduler->lock);
 }
+KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_resume);
 
 void kbase_csf_scheduler_pm_active(struct kbase_device *kbdev)
 {
@@ -4118,6 +4741,7 @@ void kbase_csf_scheduler_pm_active(struct kbase_device *kbdev)
 	else
 		WARN_ON(prev_count == U32_MAX);
 }
+KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_active);
 
 void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev)
 {
@@ -4133,3 +4757,4 @@ void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev)
 	else
 		WARN_ON(prev_count == 0);
 }
+KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_idle);
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h
index 1b1c0681f64d..1607ff637554 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_CSF_SCHEDULER_H_
@@ -31,11 +30,10 @@
  *
  * @queue: Pointer to the GPU command queue to be started.
  *
- * This function would enable the start of a command stream interface, within a
- * command stream group, to which the @queue was bound.
- * If the command stream group is already scheduled and resident, the command
- * stream interface will be started right away, otherwise once the group is
- * made resident.
+ * This function would enable the start of a CSI, within a
+ * CSG, to which the @queue was bound.
+ * If the CSG is already scheduled and resident, the CSI will be started
+ * right away, otherwise once the group is made resident.
  *
  * Return: 0 on success, or negative on failure.
  */
@@ -47,8 +45,7 @@ int kbase_csf_scheduler_queue_start(struct kbase_queue *queue);
  *
  * @queue: Pointer to the GPU command queue to be stopped.
  *
- * This function would stop the command stream interface, within a command
- * stream group, to which the @queue was bound.
+ * This function would stop the CSI, within a CSG, to which @queue was bound.
  *
  * Return: 0 on success, or negative on failure.
  */
@@ -69,7 +66,7 @@ void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group);
 
 /**
  * kbase_csf_scheduler_group_get_slot() - Checks if a queue group is
- *                           programmed on a firmware Command Stream Group slot
+ *                           programmed on a firmware CSG slot
  *                           and returns the slot number.
  *
  * @group: The command queue group.
@@ -84,7 +81,7 @@ int kbase_csf_scheduler_group_get_slot(struct kbase_queue_group *group);
 
 /**
  * kbase_csf_scheduler_group_get_slot_locked() - Checks if a queue group is
- *                           programmed on a firmware Command Stream Group slot
+ *                           programmed on a firmware CSG slot
  *                           and returns the slot number.
  *
  * @group: The command queue group.
@@ -112,7 +109,7 @@ bool kbase_csf_scheduler_group_events_enabled(struct kbase_device *kbdev,
 
 /**
  * kbase_csf_scheduler_get_group_on_slot()- Gets the queue group that has been
- *                          programmed to a firmware Command Stream Group slot.
+ *                          programmed to a firmware CSG slot.
  *
  * @kbdev: The GPU device.
  * @slot:  The slot for which to get the queue group.
@@ -128,7 +125,7 @@ struct kbase_queue_group *kbase_csf_scheduler_get_group_on_slot(
  * kbase_csf_scheduler_group_deschedule() - Deschedule a GPU command queue
  *                                          group from the firmware.
  *
- * @group: Pointer to the queue group to be scheduled.
+ * @group: Pointer to the queue group to be descheduled.
  *
  * This function would disable the scheduling of GPU command queue group on
  * firmware.
@@ -166,10 +163,9 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx);
 /**
  * kbase_csf_scheduler_init - Initialize the CSF scheduler
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  *
- * The scheduler does the arbitration for the command stream group slots
+ * The scheduler does the arbitration for the CSG slots
  * provided by the firmware between the GPU command queue groups created
  * by the Clients.
  *
@@ -178,7 +174,7 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx);
 int kbase_csf_scheduler_init(struct kbase_device *kbdev);
 
 /**
- * kbase_csf_scheduler_context_init() - Terminate the context-specific part
+ * kbase_csf_scheduler_context_term() - Terminate the context-specific part
  *                                      for CSF scheduler.
  *
  * @kctx: Pointer to kbase context that is being terminated.
@@ -190,8 +186,7 @@ void kbase_csf_scheduler_context_term(struct kbase_context *kctx);
 /**
  * kbase_csf_scheduler_term - Terminate the CSF scheduler.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  *
  * This should be called when unload of firmware is done on device
  * termination.
@@ -202,8 +197,7 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev);
  * kbase_csf_scheduler_reset - Reset the state of all active GPU command
  *                             queue groups.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  *
  * This function will first iterate through all the active/scheduled GPU
  * command queue groups and suspend them (to avoid losing work for groups
@@ -223,8 +217,7 @@ void kbase_csf_scheduler_reset(struct kbase_device *kbdev);
 /**
  * kbase_csf_scheduler_enable_tick_timer - Enable the scheduler tick timer.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  *
  * This function will restart the scheduler tick so that regular scheduling can
  * be resumed without any explicit trigger (like kicking of GPU queues).
@@ -251,8 +244,7 @@ int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group,
 /**
  * kbase_csf_scheduler_lock - Acquire the global Scheduler lock.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  *
  * This function will take the global scheduler lock, in order to serialize
  * against the Scheduler actions, for access to CS IO pages.
@@ -265,8 +257,7 @@ static inline void kbase_csf_scheduler_lock(struct kbase_device *kbdev)
 /**
  * kbase_csf_scheduler_unlock - Release the global Scheduler lock.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  */
 static inline void kbase_csf_scheduler_unlock(struct kbase_device *kbdev)
 {
@@ -276,8 +267,7 @@ static inline void kbase_csf_scheduler_unlock(struct kbase_device *kbdev)
 /**
  * kbase_csf_scheduler_spin_lock - Acquire Scheduler interrupt spinlock.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  * @flags: Pointer to the memory location that would store the previous
  *         interrupt state.
  *
@@ -293,8 +283,7 @@ static inline void kbase_csf_scheduler_spin_lock(struct kbase_device *kbdev,
 /**
  * kbase_csf_scheduler_spin_unlock - Release Scheduler interrupt spinlock.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  * @flags: Previously stored interrupt state when Scheduler interrupt
  *         spinlock was acquired.
  */
@@ -308,8 +297,7 @@ static inline void kbase_csf_scheduler_spin_unlock(struct kbase_device *kbdev,
  * kbase_csf_scheduler_spin_lock_assert_held - Assert if the Scheduler
  *                                          interrupt spinlock is held.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  */
 static inline void
 kbase_csf_scheduler_spin_lock_assert_held(struct kbase_device *kbdev)
@@ -342,8 +330,7 @@ void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev,
  *
  * Note: This function is only effective if the scheduling timer is disabled.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  */
 void kbase_csf_scheduler_kick(struct kbase_device *kbdev);
 
@@ -367,8 +354,7 @@ static inline bool kbase_csf_scheduler_protected_mode_in_use(
  * Note: This function will increase the scheduler's internal pm_active_count
  * value, ensuring that both GPU and MCU are powered for access.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  */
 void kbase_csf_scheduler_pm_active(struct kbase_device *kbdev);
 
@@ -378,16 +364,14 @@ void kbase_csf_scheduler_pm_active(struct kbase_device *kbdev);
  * Note: This function will decrease the scheduler's internal pm_active_count
  * value. On reaching 0, the MCU and GPU could be powered off.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  */
 void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev);
 
 /**
  * kbase_csf_scheduler_pm_resume - Reactivate the scheduler on system resume
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  *
  * This function will make the scheduler resume the scheduling of queue groups
  * and take the power managemenet reference, if there are any runnable groups.
@@ -397,12 +381,69 @@ void kbase_csf_scheduler_pm_resume(struct kbase_device *kbdev);
 /**
  * kbase_csf_scheduler_pm_suspend - Idle the scheduler on system suspend
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  *
  * This function will make the scheduler suspend all the running queue groups
  * and drop its power managemenet reference.
  */
 void kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev);
 
+/**
+ * kbase_csf_scheduler_all_csgs_idle() - Check if the scheduler internal
+ * runtime used slots are all tagged as idle command queue groups.
+ *
+ * @kbdev: Pointer to the device
+ *
+ * Return: true if all the used slots are tagged as idle CSGs.
+ */
+static inline bool kbase_csf_scheduler_all_csgs_idle(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock);
+	return bitmap_equal(kbdev->csf.scheduler.csg_slots_idle_mask,
+			    kbdev->csf.scheduler.csg_inuse_bitmap,
+			    kbdev->csf.global_iface.group_num);
+}
+
+/**
+ * kbase_csf_scheduler_advance_tick_nolock() - Advance the scheduling tick
+ *
+ * @kbdev: Pointer to the device
+ *
+ * This function advances the scheduling tick by enqueing the tick work item for
+ * immediate execution, but only if the tick hrtimer is active. If the timer
+ * is inactive then the tick work item is already in flight.
+ * The caller must hold the interrupt lock.
+ */
+static inline void
+kbase_csf_scheduler_advance_tick_nolock(struct kbase_device *kbdev)
+{
+	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+
+	lockdep_assert_held(&scheduler->interrupt_lock);
+
+	if (scheduler->tick_timer_active) {
+		scheduler->tick_timer_active = false;
+		queue_work(scheduler->wq, &scheduler->tick_work);
+	}
+}
+
+/**
+ * kbase_csf_scheduler_advance_tick() - Advance the scheduling tick
+ *
+ * @kbdev: Pointer to the device
+ *
+ * This function advances the scheduling tick by enqueing the tick work item for
+ * immediate execution, but only if the tick hrtimer is active. If the timer
+ * is inactive then the tick work item is already in flight.
+ */
+static inline void kbase_csf_scheduler_advance_tick(struct kbase_device *kbdev)
+{
+	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+	unsigned long flags;
+
+	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+	kbase_csf_scheduler_advance_tick_nolock(kbdev);
+	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+}
+
 #endif /* _KBASE_CSF_SCHEDULER_H_ */
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c
index 60cae15bc8ef..4b402df2f1c3 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,10 +17,10 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
+#include <tl/mali_kbase_tracepoints.h>
+
 #include "mali_kbase_csf_tiler_heap.h"
 #include "mali_kbase_csf_tiler_heap_def.h"
 #include "mali_kbase_csf_heap_context_alloc.h"
@@ -337,6 +338,12 @@ static void delete_heap(struct kbase_csf_tiler_heap *heap)
 		heap->gpu_va);
 
 	list_del(&heap->link);
+
+	WARN_ON(heap->chunk_count);
+	KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id,
+		heap->heap_id, 0, 0, heap->max_chunks, heap->chunk_size, 0,
+		heap->target_in_flight, 0);
+
 	kfree(heap);
 }
 
@@ -473,11 +480,20 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx,
 			list_first_entry(&heap->chunks_list,
 				struct kbase_csf_tiler_heap_chunk, link);
 
+		kctx->csf.tiler_heaps.nr_of_heaps++;
+		heap->heap_id = kctx->csf.tiler_heaps.nr_of_heaps;
 		list_add(&heap->link, &kctx->csf.tiler_heaps.list);
 
 		*heap_gpu_va = heap->gpu_va;
 		*first_chunk_va = first_chunk->gpu_va;
 
+		KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(
+			kctx->kbdev, kctx->id, heap->heap_id,
+			PFN_UP(heap->chunk_size * heap->max_chunks),
+			PFN_UP(heap->chunk_size * heap->chunk_count),
+			heap->max_chunks, heap->chunk_size, heap->chunk_count,
+			heap->target_in_flight, 0);
+
 		dev_dbg(kctx->kbdev->dev, "Created tiler heap 0x%llX\n",
 			heap->gpu_va);
 	}
@@ -513,49 +529,53 @@ int kbase_csf_tiler_heap_term(struct kbase_context *const kctx,
  * on the settings provided by userspace when the heap was created and the
  * heap's statistics (like number of render passes in-flight).
  *
- * @heap:         Pointer to the tiler heap.
- * @nr_in_flight: Number of render passes that are in-flight, must not be zero.
- * @new_chunk_ptr: Where to store the GPU virtual address & size of the new
- *                 chunk allocated for the heap.
+ * @heap:               Pointer to the tiler heap.
+ * @nr_in_flight:       Number of render passes that are in-flight, must not be zero.
+ * @pending_frag_count: Number of render passes in-flight with completed vertex/tiler stage.
+ *                      The minimum value is zero but it must be less or equal to
+ *                      the total number of render passes in flight
+ * @new_chunk_ptr:      Where to store the GPU virtual address & size of the new
+ *                      chunk allocated for the heap.
  *
  * Return: 0 if a new chunk was allocated otherwise an appropriate negative
  *         error code.
  */
 static int alloc_new_chunk(struct kbase_csf_tiler_heap *heap,
-		u32 nr_in_flight, u64 *new_chunk_ptr)
+		u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr)
 {
 	int err = -ENOMEM;
 
 	lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
 
-	if (!nr_in_flight)
+	if (WARN_ON(!nr_in_flight) ||
+		WARN_ON(pending_frag_count > nr_in_flight))
 		return -EINVAL;
 
-	if ((nr_in_flight <= heap->target_in_flight) &&
-	    (heap->chunk_count < heap->max_chunks)) {
-		/* Not exceeded the target number of render passes yet so be
-		 * generous with memory.
-		 */
-		err = create_chunk(heap, false);
+	if (nr_in_flight <= heap->target_in_flight) {
+		if (heap->chunk_count < heap->max_chunks) {
+			/* Not exceeded the target number of render passes yet so be
+			 * generous with memory.
+			 */
+			err = create_chunk(heap, false);
 
-		if (likely(!err)) {
-			struct kbase_csf_tiler_heap_chunk *new_chunk =
-							get_last_chunk(heap);
-			if (!WARN_ON(!new_chunk)) {
-				*new_chunk_ptr =
-					encode_chunk_ptr(heap->chunk_size,
-							 new_chunk->gpu_va);
-				return 0;
+			if (likely(!err)) {
+				struct kbase_csf_tiler_heap_chunk *new_chunk =
+								get_last_chunk(heap);
+				if (!WARN_ON(!new_chunk)) {
+					*new_chunk_ptr =
+						encode_chunk_ptr(heap->chunk_size,
+								 new_chunk->gpu_va);
+					return 0;
+				}
 			}
+		} else if (pending_frag_count > 0) {
+			err = -EBUSY;
+		} else {
+			err = -ENOMEM;
 		}
-	}
-
-	/* A new chunk wasn't allocated this time, check if the allocation can
-	 * be retried later.
-	 */
-	if (nr_in_flight > 1) {
-		/* Can retry as there are some ongoing fragment
-		 * jobs which are expected to free up chunks.
+	} else {
+		/* Reached target number of render passes in flight.
+		 * Wait for some of them to finish
 		 */
 		err = -EBUSY;
 	}
@@ -564,7 +584,7 @@ static int alloc_new_chunk(struct kbase_csf_tiler_heap *heap,
 }
 
 int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx,
-	u64 gpu_heap_va, u32 nr_in_flight, u64 *new_chunk_ptr)
+	u64 gpu_heap_va, u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr)
 {
 	struct kbase_csf_tiler_heap *heap;
 	int err = -EINVAL;
@@ -574,8 +594,15 @@ int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx,
 	heap = find_tiler_heap(kctx, gpu_heap_va);
 
 	if (likely(heap)) {
-		err = alloc_new_chunk(heap, nr_in_flight,
+		err = alloc_new_chunk(heap, nr_in_flight, pending_frag_count,
 			new_chunk_ptr);
+
+		KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(
+			kctx->kbdev, kctx->id, heap->heap_id,
+			PFN_UP(heap->chunk_size * heap->max_chunks),
+			PFN_UP(heap->chunk_size * heap->chunk_count),
+			heap->max_chunks, heap->chunk_size, heap->chunk_count,
+			heap->target_in_flight, nr_in_flight);
 	}
 
 	mutex_unlock(&kctx->csf.tiler_heaps.lock);
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.h
index 1a4729df6ca3..683aeca38bc3 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
  * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_CSF_TILER_HEAP_H_
@@ -97,11 +96,14 @@ int kbase_csf_tiler_heap_term(struct kbase_context *kctx, u64 gpu_heap_va);
  * It would return an appropriate error code if a new chunk couldn't be
  * allocated.
  *
- * @kctx: Pointer to the kbase context in which the tiler heap was initialized.
- * @gpu_heap_va:  GPU virtual address of the heap context.
- * @nr_in_flight: Number of render passes that are in-flight, must not be zero.
- * @new_chunk_ptr: Where to store the GPU virtual address & size of the new
- *                 chunk allocated for the heap.
+ * @kctx:               Pointer to the kbase context in which the tiler heap was initialized.
+ * @gpu_heap_va:        GPU virtual address of the heap context.
+ * @nr_in_flight:       Number of render passes that are in-flight, must not be zero.
+ * @pending_frag_count: Number of render passes in-flight with completed vertex/tiler stage.
+ *                      The minimum value is zero but it must be less or equal to
+ *                      the total number of render passes in flight
+ * @new_chunk_ptr:      Where to store the GPU virtual address & size of the new
+ *                      chunk allocated for the heap.
  *
  * Return: 0 if a new chunk was allocated otherwise an appropriate negative
  *         error code (like -EBUSY when a free chunk is expected to be
@@ -109,5 +111,5 @@ int kbase_csf_tiler_heap_term(struct kbase_context *kctx, u64 gpu_heap_va);
  *         invalid value was passed for one of the argument).
  */
 int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx,
-	u64 gpu_heap_va, u32 nr_in_flight, u64 *new_chunk_ptr);
+	u64 gpu_heap_va, u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr);
 #endif
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_debugfs.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_debugfs.c
index 5d744b81fe4a..0f69500f01ca 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_debugfs.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_debugfs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include "mali_kbase_csf_tiler_heap_debugfs.h"
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_debugfs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_debugfs.h
index 44c580d82068..65dfaf701845 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_debugfs.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_debugfs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
  * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_CSF_TILER_HEAP_DEBUGFS_H_
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_def.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_def.h
index 1f9e208904a9..0bf655178fae 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_def.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_def.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
  * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_CSF_TILER_HEAP_DEF_H_
@@ -97,6 +96,8 @@ struct kbase_csf_tiler_heap_chunk {
  * @gpu_va:          The GPU virtual address of the heap context structure that
  *                   was allocated for the firmware. This is also used to
  *                   uniquely identify the heap.
+ * @heap_id:         Unique id representing the heap, assigned during heap
+ *                   initialization.
  * @chunks_list:     Linked list of allocated chunks.
  */
 struct kbase_csf_tiler_heap {
@@ -107,6 +108,7 @@ struct kbase_csf_tiler_heap {
 	u32 max_chunks;
 	u16 target_in_flight;
 	u64 gpu_va;
+	u64 heap_id;
 	struct list_head chunks_list;
 };
 #endif /* !_KBASE_CSF_TILER_HEAP_DEF_H_ */
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_timeout.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_timeout.c
index 495ff2850500..854aad581318 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_timeout.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_timeout.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <linux/kernel.h>
@@ -30,13 +29,13 @@
 #include "mali_kbase_config_defaults.h"
 #include "mali_kbase_csf_firmware.h"
 #include "mali_kbase_csf_timeout.h"
+#include "mali_kbase_reset_gpu.h"
 #include "backend/gpu/mali_kbase_pm_internal.h"
 
 /**
  * set_timeout - set a new global progress timeout.
  *
- * @kbdev:   Instance of a GPU platform device that implements a command
- *           stream front-end interface.
+ * @kbdev:   Instance of a GPU platform device that implements a CSF interface.
  * @timeout: the maximum number of GPU cycles without forward progress to allow
  *           to elapse before terminating a GPU command queue group.
  *
@@ -66,6 +65,9 @@ static int set_timeout(struct kbase_device *const kbdev, u64 const timeout)
  *
  * This function is called when the progress_timeout sysfs file is written to.
  * It checks the data written, and if valid updates the progress timeout value.
+ * The function also checks gpu reset status, if the gpu is in reset process,
+ * the function will return an error code (-EBUSY), and no change for timeout
+ * value.
  *
  * Return: @count if the function succeeded. An error code on failure.
  */
@@ -80,15 +82,21 @@ static ssize_t progress_timeout_store(struct device * const dev,
 	if (!kbdev)
 		return -ENODEV;
 
-	err = kstrtou64(buf, 0, &timeout);
+	err = kbase_reset_gpu_try_prevent(kbdev);
 	if (err) {
+		dev_warn(kbdev->dev,
+			 "Couldn't process progress_timeout write operation for GPU reset.\n");
+		return -EBUSY;
+	}
+
+	err = kstrtou64(buf, 0, &timeout);
+	if (err)
 		dev_err(kbdev->dev,
 			"Couldn't process progress_timeout write operation.\n"
 			"Use format <progress_timeout>\n");
-		return err;
-	}
+	else
+		err = set_timeout(kbdev, timeout);
 
-	err = set_timeout(kbdev, timeout);
 	if (!err) {
 		kbase_csf_scheduler_pm_active(kbdev);
 
@@ -99,6 +107,7 @@ static ssize_t progress_timeout_store(struct device * const dev,
 		kbase_csf_scheduler_pm_idle(kbdev);
 	}
 
+	kbase_reset_gpu_allow(kbdev);
 	if (err)
 		return err;
 
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_timeout.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_timeout.h
index d0156c09a60f..48c71818a0ab 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_timeout.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_timeout.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_CSF_TIMEOUT_H_
@@ -28,8 +27,8 @@ struct kbase_device;
 /**
  * kbase_csf_timeout_init - Initialize the progress timeout.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface. Must be zero-initialized.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *         Must be zero-initialized.
  *
  * The progress timeout is the number of GPU clock cycles allowed to elapse
  * before the driver terminates a GPU command queue group in which a task is
@@ -46,8 +45,7 @@ int kbase_csf_timeout_init(struct kbase_device *kbdev);
 /**
  * kbase_csf_timeout_term - Terminate the progress timeout.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  *
  * Removes the sysfs file which allowed the timeout to be reconfigured.
  * Does nothing if called on a zero-initialized object.
@@ -57,8 +55,7 @@ void kbase_csf_timeout_term(struct kbase_device *kbdev);
 /**
  * kbase_csf_timeout_get - get the current global progress timeout.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  *
  * Return: the maximum number of GPU cycles that is allowed to elapse without
  *         forward progress before the driver terminates a GPU command queue
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c
index 5079a8e5af8c..de292c1d0ca2 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include "mali_kbase_csf_tl_reader.h"
@@ -40,18 +39,18 @@
 #include "tl/mali_kbase_timeline_priv.h"
 #include <linux/debugfs.h>
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0))
+#if (KERNEL_VERSION(4, 7, 0) > LINUX_VERSION_CODE)
 #define DEFINE_DEBUGFS_ATTRIBUTE DEFINE_SIMPLE_ATTRIBUTE
 #endif
 #endif
 
-/** Name of the CSFFW timeline tracebuffer. */
+/* Name of the CSFFW timeline tracebuffer. */
 #define KBASE_CSFFW_TRACEBUFFER_NAME "timeline"
-/** Name of the timeline header metatadata */
+/* Name of the timeline header metatadata */
 #define KBASE_CSFFW_TIMELINE_HEADER_NAME "timeline_header"
 
 /**
- * CSFFW timeline message.
+ * struct kbase_csffw_tl_message - CSFFW timeline message.
  *
  * @msg_id: Message ID.
  * @timestamp: Timestamp of the event.
@@ -130,6 +129,7 @@ static void get_cpu_gpu_time(
  * kbase_ts_converter_init() - Initialize system timestamp converter.
  *
  * @self:	System Timestamp Converter instance.
+ * @kbdev:	Kbase device pointer
  *
  * Return: Zero on success, -1 otherwise.
  */
@@ -253,8 +253,9 @@ static void tl_reader_reset(struct kbase_csf_tl_reader *self)
 	self->tl_header.btc = 0;
 }
 
-void kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self)
+int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self)
 {
+	int ret = 0;
 	struct kbase_device *kbdev = self->kbdev;
 	struct kbase_tlstream *stream = self->stream;
 
@@ -273,7 +274,7 @@ void kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self)
 	/* If not running, early exit. */
 	if (!self->is_active) {
 		spin_unlock_irqrestore(&self->read_lock, flags);
-		return;
+		return -EBUSY;
 	}
 
 	/* Copying the whole buffer in a single shot. We assume
@@ -297,6 +298,7 @@ void kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self)
 			dev_warn(
 				kbdev->dev,
 				"Unable to parse CSFFW tracebuffer event header.");
+				ret = -EBUSY;
 			break;
 		}
 
@@ -317,6 +319,7 @@ void kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self)
 			dev_warn(kbdev->dev,
 				"event_id: %u, can't read with event_size: %u.",
 				event_id, event_size);
+				ret = -EBUSY;
 			break;
 		}
 
@@ -338,6 +341,7 @@ void kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self)
 	}
 
 	spin_unlock_irqrestore(&self->read_lock, flags);
+	return ret;
 }
 
 static void kbasep_csf_tl_reader_read_callback(struct timer_list *timer)
@@ -420,39 +424,18 @@ static int tl_reader_init_late(
  * Update the first bit of a CSFFW tracebufer and then reset the GPU.
  * This is to make these changes visible to the MCU.
  *
- * Return: 0 on success, -EAGAIN if a GPU reset was in progress.
+ * Return: 0 on success, or negative error code for failure.
  */
 static int tl_reader_update_enable_bit(
 	struct kbase_csf_tl_reader *self,
 	bool value)
 {
-	struct kbase_device *kbdev = self->kbdev;
-	unsigned long flags;
+	int err = 0;
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-
-	/* If there is already a GPU reset pending then inform
-	 * the User to retry the update.
-	 */
-	if (kbase_reset_gpu_silent(kbdev)) {
-		spin_unlock_irqrestore(
-			&kbdev->hwaccess_lock, flags);
-		dev_warn(
-			kbdev->dev,
-			"GPU reset already in progress when enabling firmware timeline.");
-		return -EAGAIN;
-	}
-
-	/* GPU reset request has been placed, now update the
-	 * firmware image. GPU reset will take place only after
-	 * hwaccess_lock is released.
-	 */
-	kbase_csf_firmware_trace_buffer_update_trace_enable_bit(
+	err = kbase_csf_firmware_trace_buffer_update_trace_enable_bit(
 		self->trace_buffer, 0, value);
 
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
-	return 0;
+	return err;
 }
 
 void kbase_csf_tl_reader_init(struct kbase_csf_tl_reader *self,
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.h
index f5ce9d629f55..c691871cbdc5 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_CSFFW_TL_READER_H_
@@ -27,13 +26,13 @@
 #include <linux/timer.h>
 #include <asm/page.h>
 
-/** The number of pages used for CSFFW trace buffer. Can be tweaked. */
-#define KBASE_CSF_TL_BUFFER_NR_PAGES 4
-/** CSFFW Timeline read polling minimum period in milliseconds. */
+/* The number of pages used for CSFFW trace buffer. Can be tweaked. */
+#define KBASE_CSF_TL_BUFFER_NR_PAGES 128
+/* CSFFW Timeline read polling minimum period in milliseconds. */
 #define KBASE_CSF_TL_READ_INTERVAL_MIN 20
-/** CSFFW Timeline read polling default period in milliseconds. */
+/* CSFFW Timeline read polling default period in milliseconds. */
 #define KBASE_CSF_TL_READ_INTERVAL_DEFAULT 200
-/** CSFFW Timeline read polling maximum period in milliseconds. */
+/* CSFFW Timeline read polling maximum period in milliseconds. */
 #define KBASE_CSF_TL_READ_INTERVAL_MAX (60*1000)
 
 struct firmware_trace_buffer;
@@ -41,6 +40,7 @@ struct kbase_tlstream;
 struct kbase_device;
 
 /**
+ * struct kbase_ts_converter -
  * System timestamp to CPU timestamp converter state.
  *
  * @multiplier:	Numerator of the converter's fraction.
@@ -80,6 +80,7 @@ struct kbase_ts_converter {
  *                     is copied.
  * @kbdev:             KBase device.
  * @trace_buffer:      CSF Firmware timeline tracebuffer.
+ * @tl_header:         CSFFW Timeline header
  * @tl_header.data:    CSFFW Timeline header content.
  * @tl_header.size:    CSFFW Timeline header size.
  * @tl_header.btc:     CSFFW Timeline header remaining bytes to copy to
@@ -92,6 +93,7 @@ struct kbase_ts_converter {
  *                     is only valid when got_first_event is true.
  * @read_buffer:       Temporary buffer used for CSFFW timeline data
  *                     reading from the tracebufer.
+ * @read_lock:         CSFFW timeline reader lock.
  */
 struct kbase_csf_tl_reader {
 	struct timer_list read_timer;
@@ -136,9 +138,11 @@ void kbase_csf_tl_reader_term(struct kbase_csf_tl_reader *self);
  *   Flush trace from buffer into CSFFW timeline stream.
  *
  * @self:    CSFFW TL Reader instance.
+ *
+ * Return: Zero on success, negative error code (EBUSY) otherwise
  */
 
-void kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self);
+int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self);
 
 /**
  * kbase_csf_tl_reader_start() -
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c
index 4d68766b8b9a..afcc90b39b54 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include "mali_kbase.h"
@@ -30,6 +29,12 @@
 #include <linux/list.h>
 #include <linux/mman.h>
 
+#ifdef CONFIG_DEBUG_FS
+#if (KERNEL_VERSION(4, 7, 0) > LINUX_VERSION_CODE)
+#define DEFINE_DEBUGFS_ATTRIBUTE DEFINE_SIMPLE_ATTRIBUTE
+#endif
+#endif
+
 /**
  * struct firmware_trace_buffer - Trace Buffer within the MCU firmware
  *
@@ -43,25 +48,28 @@
  * @node:         List head linking all trace buffers to
  *                kbase_device:csf.firmware_trace_buffers
  * @data_mapping: MCU shared memory mapping used for the data buffer.
+ * @updatable:    Indicates whether config items can be updated with
+ *                FIRMWARE_CONFIG_UPDATE
  * @type:         The type of the trace buffer.
  * @trace_enable_entry_count: Number of Trace Enable bits.
- * @gpu_va:       Structure containing all the Firmware addresses
- *                that are accessed by the MCU.
- * @size_address:    The address where the MCU shall read the size of
- *                   the data buffer.
- * @insert_address:  The address that shall be dereferenced by the MCU
- *                   to write the Insert offset.
- * @extract_address: The address that shall be dereferenced by the MCU
- *                   to read the Extract offset.
- * @data_address:    The address that shall be dereferenced by the MCU
- *                   to write the Trace Buffer.
- * @trace_enable:    The address where the MCU shall read the array of
- *                   Trace Enable bits describing which trace points
- *                   and features shall be enabled.
- * @cpu_va:          Structure containing CPU addresses of variables which
- *                   are permanently mapped on the CPU address space.
- * @insert_cpu_va:   CPU virtual address of the Insert variable.
- * @extract_cpu_va:  CPU virtual address of the Extract variable.
+ * @gpu_va:                 Structure containing all the Firmware addresses
+ *                          that are accessed by the MCU.
+ * @gpu_va.size_address:    The address where the MCU shall read the size of
+ *                          the data buffer.
+ * @gpu_va.insert_address:  The address that shall be dereferenced by the MCU
+ *                          to write the Insert offset.
+ * @gpu_va.extract_address: The address that shall be dereferenced by the MCU
+ *                          to read the Extract offset.
+ * @gpu_va.data_address:    The address that shall be dereferenced by the MCU
+ *                          to write the Trace Buffer.
+ * @gpu_va.trace_enable:    The address where the MCU shall read the array of
+ *                          Trace Enable bits describing which trace points
+ *                          and features shall be enabled.
+ * @cpu_va:                 Structure containing CPU addresses of variables
+ *                          which are permanently mapped on the CPU address
+ *                          space.
+ * @cpu_va.insert_cpu_va:   CPU virtual address of the Insert variable.
+ * @cpu_va.extract_cpu_va:  CPU virtual address of the Extract variable.
  * @num_pages: Size of the data buffer, in pages.
  * @trace_enable_init_mask: Initial value for the trace enable bit mask.
  * @name:  NULL terminated string which contains the name of the trace buffer.
@@ -70,6 +78,7 @@ struct firmware_trace_buffer {
 	struct kbase_device *kbdev;
 	struct list_head node;
 	struct kbase_csf_mapping data_mapping;
+	bool updatable;
 	u32 type;
 	u32 trace_enable_entry_count;
 	struct gpu_va {
@@ -106,7 +115,7 @@ struct firmware_trace_buffer_data {
 	size_t size;
 };
 
-/**
+/*
  * Table of configuration data for trace buffers.
  *
  * This table contains the configuration data for the trace buffers that are
@@ -244,7 +253,9 @@ void kbase_csf_firmware_trace_buffers_term(struct kbase_device *kbdev)
 }
 
 int kbase_csf_firmware_parse_trace_buffer_entry(struct kbase_device *kbdev,
-		const u32 *entry, unsigned int size)
+						const u32 *entry,
+						unsigned int size,
+						bool updatable)
 {
 	const char *name = (char *)&entry[7];
 	const unsigned int name_len = size - TRACE_BUFFER_ENTRY_NAME_OFFSET;
@@ -268,6 +279,7 @@ int kbase_csf_firmware_parse_trace_buffer_entry(struct kbase_device *kbdev,
 			unsigned int j;
 
 			trace_buffer->kbdev = kbdev;
+			trace_buffer->updatable = updatable;
 			trace_buffer->type = entry[0];
 			trace_buffer->gpu_va.size_address = entry[1];
 			trace_buffer->gpu_va.insert_address = entry[2];
@@ -386,9 +398,13 @@ unsigned int kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count(
 }
 EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count);
 
-void kbase_csf_firmware_trace_buffer_update_trace_enable_bit(
+static void kbasep_csf_firmware_trace_buffer_update_trace_enable_bit(
 	struct firmware_trace_buffer *tb, unsigned int bit, bool value)
 {
+	struct kbase_device *kbdev = tb->kbdev;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
 	if (bit < tb->trace_enable_entry_count) {
 		unsigned int trace_enable_reg_offset = bit >> 5;
 		u32 trace_enable_bit_mask = 1u << (bit & 0x1F);
@@ -408,11 +424,48 @@ void kbase_csf_firmware_trace_buffer_update_trace_enable_bit(
 		 * trace buffers, since firmware could continue to use the
 		 * value of bitmask it cached after the boot.
 		 */
-		kbase_csf_update_firmware_memory(tb->kbdev,
-			tb->gpu_va.trace_enable + trace_enable_reg_offset*4,
+		kbase_csf_update_firmware_memory(
+			kbdev,
+			tb->gpu_va.trace_enable + trace_enable_reg_offset * 4,
 			tb->trace_enable_init_mask[trace_enable_reg_offset]);
 	}
 }
+
+int kbase_csf_firmware_trace_buffer_update_trace_enable_bit(
+	struct firmware_trace_buffer *tb, unsigned int bit, bool value)
+{
+	struct kbase_device *kbdev = tb->kbdev;
+	int err = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* If trace buffer update cannot be performed with
+	 * FIRMWARE_CONFIG_UPDATE then we need to do a
+	 * silent reset before we update the memory.
+	 */
+	if (!tb->updatable) {
+		/* If there is already a GPU reset pending then inform
+		 * the User to retry the update.
+		 */
+		if (kbase_reset_gpu_silent(kbdev)) {
+			dev_warn(
+				kbdev->dev,
+				"GPU reset already in progress when enabling firmware timeline.");
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			return -EAGAIN;
+		}
+	}
+
+	kbasep_csf_firmware_trace_buffer_update_trace_enable_bit(tb, bit,
+								 value);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (tb->updatable)
+		err = kbase_csf_trigger_firmware_config_update(kbdev);
+
+	return err;
+}
 EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_update_trace_enable_bit);
 
 bool kbase_csf_firmware_trace_buffer_is_empty(
@@ -479,8 +532,8 @@ static void update_trace_buffer_active_mask64(struct firmware_trace_buffer *tb,
 	unsigned int i;
 
 	for (i = 0; i < tb->trace_enable_entry_count; i++)
-		kbase_csf_firmware_trace_buffer_update_trace_enable_bit(tb, i,
-							(mask >> i) & 1);
+		kbasep_csf_firmware_trace_buffer_update_trace_enable_bit(
+			tb, i, (mask >> i) & 1);
 }
 
 static int set_trace_buffer_active_mask64(struct firmware_trace_buffer *tb,
@@ -490,13 +543,25 @@ static int set_trace_buffer_active_mask64(struct firmware_trace_buffer *tb,
 	unsigned long flags;
 	int err = 0;
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	/* If there is already a GPU reset pending, need a retry */
-	if (kbase_reset_gpu_silent(kbdev))
-		err = -EAGAIN;
-	else
+	if (!tb->updatable) {
+		/* If there is already a GPU reset pending, need a retry */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		if (kbase_reset_gpu_silent(kbdev))
+			err = -EAGAIN;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+
+	if (!err) {
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 		update_trace_buffer_active_mask64(tb, mask);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		/* if we can update the config we need to just trigger
+		 * FIRMWARE_CONFIG_UPDATE.
+		 */
+		if (tb->updatable)
+			err = kbase_csf_trigger_firmware_config_update(kbdev);
+	}
 
 	return err;
 }
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.h
index 2cac55e0664d..e6babef9250c 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_CSF_TRACE_BUFFER_H_
@@ -76,12 +75,15 @@ void kbase_csf_firmware_trace_buffers_term(struct kbase_device *kbdev);
  *
  * Return: 0 if successful, negative error code on failure.
  *
- * @kbdev: Kbase device structure
- * @entry: Pointer to the section
- * @size:  Size (in bytes) of the section
+ * @kbdev:     Kbase device structure
+ * @entry:     Pointer to the section
+ * @size:      Size (in bytes) of the section
+ * @updatable: Indicates whether config items can be updated with FIRMWARE_CONFIG_UPDATE
  */
 int kbase_csf_firmware_parse_trace_buffer_entry(struct kbase_device *kbdev,
-		const u32 *entry, unsigned int size);
+						const u32 *entry,
+						unsigned int size,
+						bool updatable);
 
 /**
  * kbase_csf_firmware_reload_trace_buffers_data -
@@ -134,9 +136,12 @@ unsigned int kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count(
  * @trace_buffer: Trace buffer handle
  * @bit:          Bit to update
  * @value:        New value for the given bit
+ *
+ * Return: 0 if successful, negative error code on failure.
  */
-void kbase_csf_firmware_trace_buffer_update_trace_enable_bit(
-	struct firmware_trace_buffer *trace_buffer, unsigned int bit, bool value);
+int kbase_csf_firmware_trace_buffer_update_trace_enable_bit(
+	struct firmware_trace_buffer *trace_buffer, unsigned int bit,
+	bool value);
 
 /**
  * kbase_csf_firmware_trace_buffer_is_empty - Empty trace buffer predicate
diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_csf.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_csf.h
index 32181d711193..7ed62aa02972 100644
--- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_csf.h
+++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_csf.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
  * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
@@ -109,6 +108,46 @@ int dummy_array[] = {
 	KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_START),
 	KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_STOP),
 
+	/*
+	 * KCPU queue events
+	 */
+	/* KTrace info_val == KCPU queue fence context
+	 * KCPU extra_info_val == N/A.
+	 */
+	KBASE_KTRACE_CODE_MAKE_CODE(KCPU_QUEUE_NEW),
+	/* KTrace info_val == Number of pending commands in KCPU queue when
+	 * it is destroyed.
+	 * KCPU extra_info_val == Number of CQS wait operations present in
+	 * the KCPU queue when it is destroyed.
+	 */
+	KBASE_KTRACE_CODE_MAKE_CODE(KCPU_QUEUE_DESTROY),
+	/* KTrace info_val == CQS event memory address
+	 * KCPU extra_info_val == Upper 32 bits of event memory, i.e. contents
+	 * of error field.
+	 */
+	KBASE_KTRACE_CODE_MAKE_CODE(CQS_SET),
+	/* KTrace info_val == Number of CQS objects to be waited upon
+	 * KCPU extra_info_val == N/A.
+	 */
+	KBASE_KTRACE_CODE_MAKE_CODE(CQS_WAIT_START),
+	/* KTrace info_val == CQS event memory address
+	 * KCPU extra_info_val == 1 if CQS was signaled with an error and queue
+	 * inherited the error, otherwise 0.
+	 */
+	KBASE_KTRACE_CODE_MAKE_CODE(CQS_WAIT_END),
+	/* KTrace info_val == Fence context
+	 * KCPU extra_info_val == Fence seqno.
+	 */
+	KBASE_KTRACE_CODE_MAKE_CODE(FENCE_SIGNAL),
+	/* KTrace info_val == Fence context
+	 * KCPU extra_info_val == Fence seqno.
+	 */
+	KBASE_KTRACE_CODE_MAKE_CODE(FENCE_WAIT_START),
+	/* KTrace info_val == Fence context
+	 * KCPU extra_info_val == Fence seqno.
+	 */
+	KBASE_KTRACE_CODE_MAKE_CODE(FENCE_WAIT_END),
+
 #if 0 /* Dummy section to avoid breaking formatting */
 };
 #endif
diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_jm.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_jm.h
index b201e49bd0f2..a616f2bc492d 100644
--- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_jm.h
+++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_jm.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2011-2015,2018-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2015, 2018-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.c b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.c
index 2ea901b666c2..9183520a33f0 100644
--- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.c
+++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,9 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
+
 #include <mali_kbase.h>
 #include "debug/mali_kbase_debug_ktrace_internal.h"
 #include "debug/backend/mali_kbase_debug_ktrace_csf.h"
@@ -28,30 +28,30 @@
 void kbasep_ktrace_backend_format_header(char *buffer, int sz, s32 *written)
 {
 	*written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0),
-			"group,slot,prio,csi"), 0);
+			"group,slot,prio,csi,kcpu"), 0);
 }
 
 void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg,
 		char *buffer, int sz, s32 *written)
 {
-	const struct kbase_ktrace_backend * const be_msg = &trace_msg->backend;
+	const union kbase_ktrace_backend * const be_msg = &trace_msg->backend;
 	/* At present, no need to check for KBASE_KTRACE_FLAG_BACKEND, as the
 	 * other backend-specific flags currently imply this anyway
 	 */
 
 	/* group parts */
-	if (be_msg->flags & KBASE_KTRACE_FLAG_CSF_GROUP) {
-		const s8 slot = be_msg->csg_nr;
+	if (be_msg->gpu.flags & KBASE_KTRACE_FLAG_CSF_GROUP) {
+		const s8 slot = be_msg->gpu.csg_nr;
 		/* group,slot, */
 		*written += MAX(snprintf(buffer + *written,
 				MAX(sz - *written, 0),
-				"%u,%d,", be_msg->group_handle, slot), 0);
+				"%u,%d,", be_msg->gpu.group_handle, slot), 0);
 
 		/* prio */
 		if (slot >= 0)
 			*written += MAX(snprintf(buffer + *written,
 					MAX(sz - *written, 0),
-					"%u", be_msg->slot_prio), 0);
+					"%u", be_msg->gpu.slot_prio), 0);
 
 		/* , */
 		*written += MAX(snprintf(buffer + *written,
@@ -65,10 +65,24 @@ void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg,
 	}
 
 	/* queue parts: csi */
-	if (trace_msg->backend.flags & KBASE_KTRACE_FLAG_CSF_QUEUE)
+	if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_CSF_QUEUE)
 		*written += MAX(snprintf(buffer + *written,
 				MAX(sz - *written, 0),
-				"%d", be_msg->csi_index), 0);
+				"%d", be_msg->gpu.csi_index), 0);
+
+	/* , */
+	*written += MAX(snprintf(buffer + *written,
+				MAX(sz - *written, 0),
+				","), 0);
+
+	if (be_msg->gpu.flags & KBASE_KTRACE_FLAG_CSF_KCPU) {
+		/* kcpu data */
+		*written += MAX(snprintf(buffer + *written,
+				MAX(sz - *written, 0),
+				"kcpu %d (0x%llx)",
+				be_msg->kcpu.id,
+				be_msg->kcpu.extra_info_val), 0);
+	}
 
 	/* Don't end with a trailing "," - this is a 'standalone' formatted
 	 * msg, caller will handle the delimiters
@@ -95,14 +109,14 @@ void kbasep_ktrace_add_csf(struct kbase_device *kbdev,
 	else if (queue)
 		kctx = queue->kctx;
 
-	/* Fill the common part of the message (including backend.flags) */
+	/* Fill the common part of the message (including backend.gpu.flags) */
 	kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, flags,
 			info_val);
 
 	/* Indicate to the common code that backend-specific parts will be
 	 * valid
 	 */
-	trace_msg->backend.flags |= KBASE_KTRACE_FLAG_BACKEND;
+	trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_BACKEND;
 
 	/* Fill the CSF-specific parts of the message
 	 *
@@ -111,30 +125,66 @@ void kbasep_ktrace_add_csf(struct kbase_device *kbdev,
 	 */
 
 	if (queue) {
-		trace_msg->backend.flags |= KBASE_KTRACE_FLAG_CSF_QUEUE;
-		trace_msg->backend.csi_index = queue->csi_index;
+		trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_CSF_QUEUE;
+		trace_msg->backend.gpu.csi_index = queue->csi_index;
 	}
 
 	if (group) {
 		const s8 slot = group->csg_nr;
 
-		trace_msg->backend.flags |= KBASE_KTRACE_FLAG_CSF_GROUP;
+		trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_CSF_GROUP;
 
-		trace_msg->backend.csg_nr = slot;
+		trace_msg->backend.gpu.csg_nr = slot;
 
 		if (slot >= 0) {
-			struct kbase_csf_csg_slot *csg_slot = &kbdev->csf.scheduler.csg_slots[slot];
+			struct kbase_csf_csg_slot *csg_slot =
+				&kbdev->csf.scheduler.csg_slots[slot];
 
-			trace_msg->backend.slot_prio = csg_slot->priority;
+			trace_msg->backend.gpu.slot_prio =
+				csg_slot->priority;
 		}
 		/* slot >=0 indicates whether slot_prio valid, so no need to
 		 * initialize in the case where it's invalid
 		 */
 
-		trace_msg->backend.group_handle = group->handle;
+		trace_msg->backend.gpu.group_handle = group->handle;
 	}
 
-	WARN_ON((trace_msg->backend.flags & ~KBASE_KTRACE_FLAG_ALL));
+	WARN_ON((trace_msg->backend.gpu.flags & ~KBASE_KTRACE_FLAG_ALL));
+
+	/* Done */
+	spin_unlock_irqrestore(&kbdev->ktrace.lock, irqflags);
+}
+
+void kbasep_ktrace_add_csf_kcpu(struct kbase_device *kbdev,
+				enum kbase_ktrace_code code,
+				struct kbase_kcpu_command_queue *queue,
+				u64 info_val1, u64 info_val2)
+{
+	unsigned long irqflags;
+	struct kbase_ktrace_msg *trace_msg;
+	struct kbase_context *kctx = queue->kctx;
+
+	spin_lock_irqsave(&kbdev->ktrace.lock, irqflags);
+
+	/* Reserve and update indices */
+	trace_msg = kbasep_ktrace_reserve(&kbdev->ktrace);
+
+	/* Fill the common part of the message */
+	kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, 0,
+		info_val1);
+
+	/* Indicate to the common code that backend-specific parts will be
+	 * valid
+	 */
+	trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_BACKEND;
+
+	/* Fill the KCPU-specific parts of the message */
+	trace_msg->backend.kcpu.id = queue->id;
+	trace_msg->backend.kcpu.extra_info_val = info_val2;
+	trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_CSF_KCPU;
+
+	WARN_ON((trace_msg->backend.gpu.flags & ~KBASE_KTRACE_FLAG_ALL));
 
 	/* Done */
 	spin_unlock_irqrestore(&kbdev->ktrace.lock, irqflags);
diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.h
index b055ff82a116..62cd1bca1292 100644
--- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.h
+++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_csf.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
  * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_DEBUG_KTRACE_CSF_H_
@@ -28,8 +27,7 @@
  */
 #if KBASE_KTRACE_TARGET_RBUF
 /**
- * kbasep_ktrace_add_csf - internal function to add trace about Command Stream
- *                        Frontend
+ * kbasep_ktrace_add_csf - internal function to add trace about CSF
  * @kbdev:    kbase device
  * @code:     trace code
  * @group:    queue group, or NULL if no queue group
@@ -45,9 +43,33 @@ void kbasep_ktrace_add_csf(struct kbase_device *kbdev,
 		struct kbase_queue *queue, kbase_ktrace_flag_t flags,
 		u64 info_val);
 
+/**
+ * kbasep_ktrace_add_csf_kcpu - internal function to add trace about the CSF
+ *				KCPU queues.
+ * @kbdev:      kbase device
+ * @code:       trace code
+ * @queue:      queue, or NULL if no queue
+ * @info_val1:  Main infoval variable with information based on the KCPU
+ *              ktrace call. Refer to mali_kbase_debug_ktrace_codes_csf.h
+ *              for information on the infoval values.
+ * @info_val2:  Extra infoval variable with information based on the KCPU
+ *              ktrace call. Refer to mali_kbase_debug_ktrace_codes_csf.h
+ *              for information on the infoval values.
+ *
+ * PRIVATE: do not use directly. Use KBASE_KTRACE_ADD_CSF_KCPU() instead.
+ */
+void kbasep_ktrace_add_csf_kcpu(struct kbase_device *kbdev,
+				enum kbase_ktrace_code code,
+				struct kbase_kcpu_command_queue *queue,
+				u64 info_val1, u64 info_val2);
+
 #define KBASE_KTRACE_RBUF_ADD_CSF(kbdev, code, group, queue, flags, info_val) \
 	kbasep_ktrace_add_csf(kbdev, KBASE_KTRACE_CODE(code), group, queue, \
-			flags, info_val)
+	flags, info_val)
+
+#define KBASE_KTRACE_RBUF_ADD_CSF_KCPU(kbdev, code, queue, info_val1, \
+	info_val2) kbasep_ktrace_add_csf_kcpu(kbdev, KBASE_KTRACE_CODE(code), \
+	queue, info_val1, info_val2)
 
 #else /* KBASE_KTRACE_TARGET_RBUF */
 
@@ -56,12 +78,21 @@ void kbasep_ktrace_add_csf(struct kbase_device *kbdev,
 		CSTD_UNUSED(kbdev);\
 		CSTD_NOP(code);\
 		CSTD_UNUSED(group);\
-		CSTD_UNUSED(queue);		\
+		CSTD_UNUSED(queue);\
 		CSTD_UNUSED(flags);\
 		CSTD_UNUSED(info_val);\
 		CSTD_NOP(0);\
 	} while (0)
 
+#define KBASE_KTRACE_RBUF_ADD_CSF_KCPU(kbdev, code, queue, info_val1, info_val2) \
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(queue);\
+		CSTD_UNUSED(info_val1);\
+		CSTD_UNUSED(info_val2);\
+	} while (0)
+
 #endif /* KBASE_KTRACE_TARGET_RBUF */
 
 /*
@@ -75,6 +106,9 @@ void kbasep_ktrace_add_csf(struct kbase_device *kbdev,
 #define KBASE_KTRACE_FTRACE_ADD_CSF(kbdev, code, group, queue, info_val) \
 	trace_mali_##code(kbdev, group, queue, info_val)
 
+#define KBASE_KTRACE_FTRACE_ADD_KCPU(code, queue, info_val1, info_val2) \
+	trace_mali_##code(queue, info_val1, info_val2)
+
 #else /* KBASE_KTRACE_TARGET_FTRACE */
 
 #define KBASE_KTRACE_FTRACE_ADD_CSF(kbdev, code, group, queue, info_val) \
@@ -87,6 +121,14 @@ void kbasep_ktrace_add_csf(struct kbase_device *kbdev,
 		CSTD_NOP(0);\
 	} while (0)
 
+#define KBASE_KTRACE_FTRACE_ADD_KCPU(code, queue, info_val1, info_val2) \
+	do {\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(queue);\
+		CSTD_UNUSED(info_val1);\
+		CSTD_UNUSED(info_val2);\
+	} while (0)
+
 #endif /* KBASE_KTRACE_TARGET_FTRACE */
 
 /*
@@ -109,7 +151,7 @@ void kbasep_ktrace_add_csf(struct kbase_device *kbdev,
  */
 #define KBASE_KTRACE_ADD_CSF_GRP(kbdev, code, group, info_val) \
 	do { \
-		/* capture values that could come from non-pure function calls */ \
+		/* capture values that could come from non-pure fn calls */ \
 		struct kbase_queue_group *__group = group; \
 		u64 __info_val = info_val; \
 		KBASE_KTRACE_RBUF_ADD_CSF(kbdev, code, __group, NULL, 0u, \
@@ -135,7 +177,7 @@ void kbasep_ktrace_add_csf(struct kbase_device *kbdev,
  */
 #define KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, code, group, queue, info_val) \
 	do { \
-		/* capture values that could come from non-pure function calls */ \
+		/* capture values that could come from non-pure fn calls */ \
 		struct kbase_queue_group *__group = group; \
 		struct kbase_queue *__queue = queue; \
 		u64 __info_val = info_val; \
@@ -145,4 +187,17 @@ void kbasep_ktrace_add_csf(struct kbase_device *kbdev,
 				__queue, __info_val); \
 	} while (0)
 
+
+#define KBASE_KTRACE_ADD_CSF_KCPU(kbdev, code, queue, info_val1, info_val2) \
+	do { \
+		/* capture values that could come from non-pure fn calls */ \
+		struct kbase_kcpu_command_queue *__queue = queue; \
+		u64 __info_val1 = info_val1; \
+		u64 __info_val2 = info_val2; \
+		KBASE_KTRACE_RBUF_ADD_CSF_KCPU(kbdev, code, __queue, \
+					       __info_val1, __info_val2); \
+		KBASE_KTRACE_FTRACE_ADD_KCPU(code, __queue, \
+					     __info_val1, __info_val2); \
+	} while (0)
+
 #endif /* _KBASE_DEBUG_KTRACE_CSF_H_ */
diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_defs_csf.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_defs_csf.h
index f265fe9a9753..ef75afa98585 100644
--- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_defs_csf.h
+++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_defs_csf.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_DEBUG_KTRACE_DEFS_CSF_H_
@@ -38,9 +37,15 @@
  * ftrace backend now outputs kctx field (as %d_%u format).
  *
  * Add fields group, slot, prio, csi into backend-specific part.
+ *
+ * 1.2:
+ * There is a new class of KCPU traces; with this, a new KCPU column in the
+ * ringbuffer RBUF (mali_trace) between csi and info_val, which is empty
+ * for non-kcpu related traces, and usually displays the KCPU Queue ID and
+ * an extra information value. ftrace also displays these KCPU traces.
  */
 #define KBASE_KTRACE_VERSION_MAJOR 1
-#define KBASE_KTRACE_VERSION_MINOR 1
+#define KBASE_KTRACE_VERSION_MINOR 2
 
 /* indicates if the trace message has valid queue-group related info. */
 #define KBASE_KTRACE_FLAG_CSF_GROUP     (((kbase_ktrace_flag_t)1) << 0)
@@ -48,37 +53,58 @@
 /* indicates if the trace message has valid queue related info. */
 #define KBASE_KTRACE_FLAG_CSF_QUEUE     (((kbase_ktrace_flag_t)1) << 1)
 
+/* indicates if the trace message has valid KCPU-queue related info. */
+#define KBASE_KTRACE_FLAG_CSF_KCPU     (((kbase_ktrace_flag_t)1) << 2)
+
 /* Collect all the flags together for debug checking */
 #define KBASE_KTRACE_FLAG_BACKEND_ALL \
-		(KBASE_KTRACE_FLAG_CSF_GROUP | KBASE_KTRACE_FLAG_CSF_QUEUE)
-
+		(KBASE_KTRACE_FLAG_CSF_GROUP | KBASE_KTRACE_FLAG_CSF_QUEUE | \
+		 KBASE_KTRACE_FLAG_CSF_KCPU)
 
 /**
- * struct kbase_ktrace_backend - backend specific part of a trace message
- *
- * @code:         Identifies the event, refer to enum kbase_ktrace_code.
- * @flags:        indicates information about the trace message itself. Used
- *                during dumping of the message.
- * @group_handle: Handle identifying the associated queue group. Only valid
- *                when @flags contains KBASE_KTRACE_FLAG_CSF_GROUP.
- * @csg_nr:       Number/index of the associated queue group's command stream
- *                group to which it is mapped, or negative if none associated.
- *                Only valid when @flags contains KBASE_KTRACE_FLAG_CSF_GROUP.
- * @slot_prio:    The priority of the slot for the associated group, if it was
- *                scheduled. Hence, only valid when @csg_nr >=0 and @flags
- *                contains KBASE_KTRACE_FLAG_CSF_GROUP.
- * @csi_index:    ID of the associated queue's Command Stream HW interface.
- *                Only valid when @flags contains KBASE_KTRACE_FLAG_CSF_QUEUE.
+ * union kbase_ktrace_backend - backend specific part of a trace message
+ * @kcpu:           kcpu union member
+ * @kcpu.code:      Identifies the event, refer to enum kbase_ktrace_code.
+ * @kcpu.flags:     indicates information about the trace message itself. Used
+ *                  during dumping of the message.
+ * @kcpu.id:        ID of the KCPU queue.
+ * @kcpu.extra_info_val: value specific to the type of KCPU event being traced.
+ *                  Refer to the KPU specific code in enum kbase_ktrace_code in
+ *                  mali_kbase_debug_ktrace_codes_csf.h
+ * @gpu:            gpu union member
+ * @gpu.code:       Identifies the event, refer to enum kbase_ktrace_code.
+ * @gpu.flags:      indicates information about the trace message itself. Used
+ *                  during dumping of the message.
+ * @gpu.group_handle: Handle identifying the associated queue group. Only valid
+ *                  when @flags contains KBASE_KTRACE_FLAG_CSF_GROUP.
+ * @gpu.csg_nr:     Number/index of the associated queue group's CS group to
+ *                  which it is mapped, or negative if none associated. Only
+ *                  valid when @flags contains KBASE_KTRACE_FLAG_CSF_GROUP.
+ * @gpu.slot_prio:  The priority of the slot for the associated group, if it
+ *                  was scheduled. Hence, only valid when @csg_nr >=0 and
+ *                  @flags contains KBASE_KTRACE_FLAG_CSF_GROUP.
+ * @gpu.csi_index:  ID of the associated queue's CS HW interface.
+ *                  Only valid when @flags contains KBASE_KTRACE_FLAG_CSF_QUEUE.
  */
-struct kbase_ktrace_backend {
+
+union kbase_ktrace_backend {
 	/* Place 64 and 32-bit members together */
 	/* Pack smaller members together */
-	kbase_ktrace_code_t code;
-	kbase_ktrace_flag_t flags;
-	u8 group_handle;
-	s8 csg_nr;
-	u8 slot_prio;
-	s8 csi_index;
+	struct {
+		kbase_ktrace_code_t code;
+		kbase_ktrace_flag_t flags;
+		u8 id;
+		u64 extra_info_val;
+	} kcpu;
+
+	struct {
+		kbase_ktrace_code_t code;
+		kbase_ktrace_flag_t flags;
+		u8 group_handle;
+		s8 csg_nr;
+		u8 slot_prio;
+		s8 csi_index;
+	} gpu;
 };
 
 #endif /* KBASE_KTRACE_TARGET_RBUF */
diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_defs_jm.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_defs_jm.h
index ea8e01a87f3f..abd35ff46e06 100644
--- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_defs_jm.h
+++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_defs_jm.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_DEBUG_KTRACE_DEFS_JM_H_
@@ -71,31 +70,39 @@
 		| KBASE_KTRACE_FLAG_JM_ATOM)
 
 /**
- * struct kbase_ktrace_backend - backend specific part of a trace message
+ * union kbase_ktrace_backend - backend specific part of a trace message
+ * Contains only a struct but is a union such that it is compatible with
+ * generic JM and CSF KTrace calls.
  *
- * @atom_udata:  Copy of the user data sent for the atom in base_jd_submit.
- *               Only valid if KBASE_KTRACE_FLAG_JM_ATOM is set in @flags
- * @gpu_addr:    GPU address, usually of the job-chain represented by an atom.
- * @atom_number: id of the atom for which trace message was added. Only valid
- *               if KBASE_KTRACE_FLAG_JM_ATOM is set in @flags
- * @code:        Identifies the event, refer to enum kbase_ktrace_code.
- * @flags:       indicates information about the trace message itself. Used
- *               during dumping of the message.
- * @jobslot:     job-slot for which trace message was added, valid only for
- *               job-slot management events.
- * @refcount:    reference count for the context, valid for certain events
- *               related to scheduler core and policy.
+ * @gpu:             gpu union member
+ * @gpu.atom_udata:  Copy of the user data sent for the atom in base_jd_submit.
+ *                   Only valid if KBASE_KTRACE_FLAG_JM_ATOM is set in @flags
+ * @gpu.gpu_addr:    GPU address, usually of the job-chain represented by an
+ *                   atom.
+ * @gpu.atom_number: id of the atom for which trace message was added. Only
+ *                   valid if KBASE_KTRACE_FLAG_JM_ATOM is set in @flags
+ * @gpu.code:        Identifies the event, refer to enum kbase_ktrace_code.
+ * @gpu.flags:       indicates information about the trace message itself. Used
+ *                   during dumping of the message.
+ * @gpu.jobslot:     job-slot for which trace message was added, valid only for
+ *                   job-slot management events.
+ * @gpu.refcount:    reference count for the context, valid for certain events
+ *                   related to scheduler core and policy.
  */
-struct kbase_ktrace_backend {
-	/* Place 64 and 32-bit members together */
-	u64 atom_udata[2]; /* Only valid for KBASE_KTRACE_FLAG_JM_ATOM */
-	u64 gpu_addr;
-	int atom_number; /* Only valid for KBASE_KTRACE_FLAG_JM_ATOM */
-	/* Pack smaller members together */
-	kbase_ktrace_code_t code;
-	kbase_ktrace_flag_t flags;
-	u8 jobslot;
-	u8 refcount;
+union kbase_ktrace_backend {
+	struct {
+		/* Place 64 and 32-bit members together */
+		u64 atom_udata[2]; /* Only valid for
+				    * KBASE_KTRACE_FLAG_JM_ATOM
+				    */
+		u64 gpu_addr;
+		int atom_number; /* Only valid for KBASE_KTRACE_FLAG_JM_ATOM */
+		/* Pack smaller members together */
+		kbase_ktrace_code_t code;
+		kbase_ktrace_flag_t flags;
+		u8 jobslot;
+		u8 refcount;
+	} gpu;
 };
 #endif /* KBASE_KTRACE_TARGET_RBUF */
 
diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.c b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.c
index 1b821281f09f..276290917a82 100644
--- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.c
+++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,9 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
+
 #include <mali_kbase.h>
 #include "debug/mali_kbase_debug_ktrace_internal.h"
 #include "debug/backend/mali_kbase_debug_ktrace_jm.h"
@@ -35,38 +35,39 @@ void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg,
 		char *buffer, int sz, s32 *written)
 {
 	/* katom */
-	if (trace_msg->backend.flags & KBASE_KTRACE_FLAG_JM_ATOM)
+	if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_JM_ATOM)
 		*written += MAX(snprintf(buffer + *written,
 				MAX(sz - *written, 0),
 				"atom %d (ud: 0x%llx 0x%llx)",
-				trace_msg->backend.atom_number,
-				trace_msg->backend.atom_udata[0],
-				trace_msg->backend.atom_udata[1]), 0);
+				trace_msg->backend.gpu.atom_number,
+				trace_msg->backend.gpu.atom_udata[0],
+				trace_msg->backend.gpu.atom_udata[1]), 0);
 
 	/* gpu_addr */
-	if (trace_msg->backend.flags & KBASE_KTRACE_FLAG_BACKEND)
+	if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_BACKEND)
 		*written += MAX(snprintf(buffer + *written,
 				MAX(sz - *written, 0),
-				",%.8llx,", trace_msg->backend.gpu_addr), 0);
+				",%.8llx,", trace_msg->backend.gpu.gpu_addr),
+				0);
 	else
 		*written += MAX(snprintf(buffer + *written,
 				MAX(sz - *written, 0),
 				",,"), 0);
 
 	/* jobslot */
-	if (trace_msg->backend.flags & KBASE_KTRACE_FLAG_JM_JOBSLOT)
+	if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_JM_JOBSLOT)
 		*written += MAX(snprintf(buffer + *written,
 				MAX(sz - *written, 0),
-				"%d", trace_msg->backend.jobslot), 0);
+				"%d", trace_msg->backend.gpu.jobslot), 0);
 
 	*written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0),
 				","), 0);
 
 	/* refcount */
-	if (trace_msg->backend.flags & KBASE_KTRACE_FLAG_JM_REFCOUNT)
+	if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_JM_REFCOUNT)
 		*written += MAX(snprintf(buffer + *written,
 				MAX(sz - *written, 0),
-				"%d", trace_msg->backend.refcount), 0);
+				"%d", trace_msg->backend.gpu.refcount), 0);
 }
 
 void kbasep_ktrace_add_jm(struct kbase_device *kbdev,
@@ -83,30 +84,31 @@ void kbasep_ktrace_add_jm(struct kbase_device *kbdev,
 	/* Reserve and update indices */
 	trace_msg = kbasep_ktrace_reserve(&kbdev->ktrace);
 
-	/* Fill the common part of the message (including backend.flags) */
+	/* Fill the common part of the message (including backend.gpu.flags) */
 	kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, flags,
 			info_val);
 
 	/* Indicate to the common code that backend-specific parts will be
 	 * valid
 	 */
-	trace_msg->backend.flags |= KBASE_KTRACE_FLAG_BACKEND;
+	trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_BACKEND;
 
 	/* Fill the JM-specific parts of the message */
 	if (katom) {
-		trace_msg->backend.flags |= KBASE_KTRACE_FLAG_JM_ATOM;
+		trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_JM_ATOM;
 
-		trace_msg->backend.atom_number = kbase_jd_atom_id(katom->kctx, katom);
-		trace_msg->backend.atom_udata[0] = katom->udata.blob[0];
-		trace_msg->backend.atom_udata[1] = katom->udata.blob[1];
+		trace_msg->backend.gpu.atom_number =
+			kbase_jd_atom_id(katom->kctx, katom);
+		trace_msg->backend.gpu.atom_udata[0] = katom->udata.blob[0];
+		trace_msg->backend.gpu.atom_udata[1] = katom->udata.blob[1];
 	}
 
-	trace_msg->backend.gpu_addr = gpu_addr;
-	trace_msg->backend.jobslot = jobslot;
+	trace_msg->backend.gpu.gpu_addr = gpu_addr;
+	trace_msg->backend.gpu.jobslot = jobslot;
 	/* Clamp refcount */
-	trace_msg->backend.refcount = MIN((unsigned int)refcount, 0xFF);
+	trace_msg->backend.gpu.refcount = MIN((unsigned int)refcount, 0xFF);
 
-	WARN_ON((trace_msg->backend.flags & ~KBASE_KTRACE_FLAG_ALL));
+	WARN_ON((trace_msg->backend.gpu.flags & ~KBASE_KTRACE_FLAG_ALL));
 
 	/* Done */
 	spin_unlock_irqrestore(&kbdev->ktrace.lock, irqflags);
diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.h
index adfcb1aa556e..233d06f2eae7 100644
--- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.h
+++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
  * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_DEBUG_KTRACE_JM_H_
diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_csf.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_csf.h
index d103e5766456..4ed175e53f49 100644
--- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_csf.h
+++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_csf.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
  * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
@@ -144,4 +143,50 @@ DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_STOP);
 
 #undef DEFINE_MALI_CSF_GRP_Q_EVENT
 
+/*
+ * KCPU queue events
+ */
+DECLARE_EVENT_CLASS(mali_csf_kcpu_queue_template,
+	TP_PROTO(struct kbase_kcpu_command_queue *queue,
+		 u64 info_val1, u64 info_val2),
+	TP_ARGS(queue, info_val1, info_val2),
+	TP_STRUCT__entry(
+		__field(u64, info_val1)
+		__field(u64, info_val2)
+		__field(pid_t, kctx_tgid)
+		__field(u32, kctx_id)
+		__field(u8, id)
+	),
+	TP_fast_assign(
+		{
+			__entry->info_val1 = info_val1;
+			__entry->info_val2 = info_val2;
+			__entry->kctx_id = queue->kctx->id;
+			__entry->kctx_tgid = queue->kctx->tgid;
+			__entry->id = queue->id;
+		}
+
+	),
+	TP_printk("kctx=%d_%u id=%u info_val1=0x%llx info_val2=0x%llx",
+			__entry->kctx_tgid, __entry->kctx_id, __entry->id,
+			__entry->info_val1, __entry->info_val2)
+);
+
+#define DEFINE_MALI_CSF_KCPU_EVENT(name)  \
+	DEFINE_EVENT(mali_csf_kcpu_queue_template, mali_##name, \
+	TP_PROTO(struct kbase_kcpu_command_queue *queue, \
+		 u64 info_val1, u64 info_val2), \
+	TP_ARGS(queue, info_val1, info_val2))
+
+DEFINE_MALI_CSF_KCPU_EVENT(KCPU_QUEUE_NEW);
+DEFINE_MALI_CSF_KCPU_EVENT(KCPU_QUEUE_DESTROY);
+DEFINE_MALI_CSF_KCPU_EVENT(CQS_SET);
+DEFINE_MALI_CSF_KCPU_EVENT(CQS_WAIT_START);
+DEFINE_MALI_CSF_KCPU_EVENT(CQS_WAIT_END);
+DEFINE_MALI_CSF_KCPU_EVENT(FENCE_SIGNAL);
+DEFINE_MALI_CSF_KCPU_EVENT(FENCE_WAIT_START);
+DEFINE_MALI_CSF_KCPU_EVENT(FENCE_WAIT_END);
+
+#undef DEFINE_MALI_CSF_KCPU_EVENT
+
 #endif /* !defined(_KBASE_DEBUG_LINUX_KTRACE_CSF_H_) || defined(TRACE_HEADER_MULTI_READ) */
diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_jm.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_jm.h
index 037b1edecd8e..b368a8510ad1 100644
--- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_jm.h
+++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_jm.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2014,2018,2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2018, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.c b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.c
index a13c0ba20c94..443b048d6d16 100644
--- a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.c
+++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,9 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
+
 #include <mali_kbase.h>
 #include "debug/mali_kbase_debug_ktrace_internal.h"
 
@@ -93,7 +93,8 @@ static void kbasep_ktrace_format_msg(struct kbase_ktrace_msg *trace_msg,
 			(int)trace_msg->timestamp.tv_sec,
 			(int)(trace_msg->timestamp.tv_nsec / 1000),
 			trace_msg->thread_id, trace_msg->cpu,
-			kbasep_ktrace_code_string[trace_msg->backend.code]), 0);
+			kbasep_ktrace_code_string[trace_msg->backend.gpu.code]),
+			0);
 
 	/* kctx part: */
 	if (trace_msg->kctx_tgid) {
@@ -171,8 +172,8 @@ void kbasep_ktrace_msg_init(struct kbase_ktrace *ktrace,
 		trace_msg->kctx_id = 0;
 	}
 	trace_msg->info_val = info_val;
-	trace_msg->backend.code = code;
-	trace_msg->backend.flags = flags;
+	trace_msg->backend.gpu.code = code;
+	trace_msg->backend.gpu.flags = flags;
 }
 
 void kbasep_ktrace_add(struct kbase_device *kbdev, enum kbase_ktrace_code code,
@@ -189,7 +190,7 @@ void kbasep_ktrace_add(struct kbase_device *kbdev, enum kbase_ktrace_code code,
 	/* Reserve and update indices */
 	trace_msg = kbasep_ktrace_reserve(&kbdev->ktrace);
 
-	/* Fill the common part of the message (including backend.flags) */
+	/* Fill the common part of the message (including backend.gpu.flags) */
 	kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, flags,
 			info_val);
 
diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.h b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.h
index e4e2e8c35001..b7545e08ab28 100644
--- a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.h
+++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
  * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_codes.h b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_codes.h
index b50bceee4244..c5ca08e0a18b 100644
--- a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_codes.h
+++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_codes.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2011-2015,2018-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2015, 2018-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
@@ -115,6 +114,7 @@ int dummy_array[] = {
 	KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED_TILER),
 	KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE),
 	KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_TILER),
+	KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_L2),
 	KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE),
 	KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE_TILER),
 	KBASE_KTRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED),
@@ -145,7 +145,14 @@ int dummy_array[] = {
 	KBASE_KTRACE_CODE_MAKE_CODE(SCHED_RETAIN_CTX_NOLOCK),
 	/* info_val == kctx->refcount */
 	KBASE_KTRACE_CODE_MAKE_CODE(SCHED_RELEASE_CTX),
-
+#ifdef CONFIG_MALI_ARBITER_SUPPORT
+	/*
+	 * Arbitration events
+	 */
+	KBASE_KTRACE_CODE_MAKE_CODE(ARB_GPU_LOST),
+	KBASE_KTRACE_CODE_MAKE_CODE(ARB_VM_STATE),
+	KBASE_KTRACE_CODE_MAKE_CODE(ARB_VM_EVT),
+#endif
 
 #if MALI_USE_CSF
 #include "debug/backend/mali_kbase_debug_ktrace_codes_csf.h"
diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_defs.h b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_defs.h
index c680feb86387..accf3cf17236 100644
--- a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_defs.h
+++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_defs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
  * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_DEBUG_KTRACE_DEFS_H_
@@ -78,12 +77,18 @@ typedef u8 kbase_ktrace_code_t;
  */
 
 /*
- * struct kbase_ktrace_backend - backend specific part of a trace message
- *
- * At the very least, this must contain a kbase_ktrace_code_t 'code' member and
- * a kbase_ktrace_flag_t 'flags' member
+ * union kbase_ktrace_backend - backend specific part of a trace message.
+ * At the very least, this must contain a kbase_ktrace_code_t 'code' member
+ * and a kbase_ktrace_flag_t 'flags' inside a "gpu" sub-struct. Should a
+ * backend need several sub structs in its union to optimize the data storage
+ * for different message types, then it can use a "common initial sequence" to
+ * allow 'flags' and 'code' to pack optimally without corrupting them.
+ * Different backends need not share common initial sequences between them, they
+ * only need to ensure they have gpu.flags and gpu.code members, it
+ * is up to the backend then how to order these.
  */
-struct kbase_ktrace_backend;
+union kbase_ktrace_backend;
+
 #endif /* KBASE_KTRACE_TARGET_RBUF */
 
 #if MALI_USE_CSF
@@ -145,9 +150,9 @@ enum kbase_ktrace_code {
  * @kctx_id:   Unique identifier of the &kbase_context associated with the
  *             message. Only valid if @kctx_tgid != 0.
  * @info_val:  value specific to the type of event being traced. Refer to the
- *             specific code in enum kbase_ktrace_code
+ *             specific code in enum kbase_ktrace_code.
  * @backend:   backend-specific trace information. All backends must implement
- *             a minimum common set of members
+ *             a minimum common set of members.
  */
 struct kbase_ktrace_msg {
 	struct timespec64 timestamp;
@@ -156,8 +161,7 @@ struct kbase_ktrace_msg {
 	pid_t kctx_tgid;
 	u32 kctx_id;
 	u64 info_val;
-
-	struct kbase_ktrace_backend backend;
+	union kbase_ktrace_backend backend;
 };
 
 struct kbase_ktrace {
diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_internal.h b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_internal.h
index e450760e3426..1f0fa5ecf9ea 100644
--- a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_internal.h
+++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_internal.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
  * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_DEBUG_KTRACE_INTERNAL_H_
diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_linux_ktrace.h b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_linux_ktrace.h
index 27f687faf072..4a73da50029b 100644
--- a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_linux_ktrace.h
+++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_linux_ktrace.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2014,2018,2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2018, 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
@@ -86,6 +85,7 @@ DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE);
 DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE_TILER);
 DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE);
 DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_TILER);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_L2);
 DEFINE_MALI_ADD_EVENT(PM_GPU_ON);
 DEFINE_MALI_ADD_EVENT(PM_GPU_OFF);
 DEFINE_MALI_ADD_EVENT(PM_SET_POLICY);
@@ -97,11 +97,17 @@ DEFINE_MALI_ADD_EVENT(PM_CONTEXT_IDLE);
 DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS);
 DEFINE_MALI_ADD_EVENT(SCHED_RETAIN_CTX_NOLOCK);
 DEFINE_MALI_ADD_EVENT(SCHED_RELEASE_CTX);
+#ifdef CONFIG_MALI_ARBITER_SUPPORT
 
+DEFINE_MALI_ADD_EVENT(ARB_GPU_LOST);
+DEFINE_MALI_ADD_EVENT(ARB_VM_STATE);
+DEFINE_MALI_ADD_EVENT(ARB_VM_EVT);
+
+#endif
 #if MALI_USE_CSF
-#include "mali_kbase_debug_linux_ktrace_csf.h"
+#include "backend/mali_kbase_debug_linux_ktrace_csf.h"
 #else
-#include "mali_kbase_debug_linux_ktrace_jm.h"
+#include "backend/mali_kbase_debug_linux_ktrace_jm.h"
 #endif
 
 #undef DEFINE_MALI_ADD_EVENT
diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c
index d8b3fff6a214..39a46276cf22 100644
--- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c
+++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c
@@ -1,12 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -17,18 +17,17 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include "../mali_kbase_device_internal.h"
 #include "../mali_kbase_device.h"
 
-#include <mali_kbase_config_defaults.h>
 #include <mali_kbase_hwaccess_backend.h>
+#include <mali_kbase_hwcnt_backend_csf_if_fw.h>
 #include <mali_kbase_ctx_sched.h>
 #include <mali_kbase_reset_gpu.h>
 #include <csf/mali_kbase_csf.h>
+#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
 
 #ifdef CONFIG_MALI_BIFROST_NO_MALI
 #include <mali_kbase_model_linux.h>
@@ -36,13 +35,12 @@
 
 #include <mali_kbase.h>
 #include <backend/gpu/mali_kbase_irq_internal.h>
-#include <backend/gpu/mali_kbase_js_internal.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
 #include <backend/gpu/mali_kbase_clk_rate_trace_mgr.h>
 
 static void kbase_device_csf_firmware_term(struct kbase_device *kbdev)
 {
-	kbase_clk_rate_trace_manager_term(kbdev);
 	kbase_csf_firmware_term(kbdev);
 }
 
@@ -64,9 +62,6 @@ static int kbase_device_csf_firmware_init(struct kbase_device *kbdev)
 	 */
 	kbase_pm_context_idle(kbdev);
 
-	if (!err)
-		kbase_clk_rate_trace_manager_init(kbdev);
-
 	return err;
 }
 
@@ -106,6 +101,15 @@ static int kbase_backend_late_init(struct kbase_device *kbdev)
 #endif /* !CONFIG_MALI_BIFROST_NO_MALI */
 #endif /* CONFIG_MALI_BIFROST_DEBUG */
 
+	kbase_ipa_control_init(kbdev);
+
+	/* Initialise the metrics subsystem, it couldn't be initialized earlier
+	 * due to dependency on kbase_ipa_control.
+	 */
+	err = kbasep_pm_metrics_init(kbdev);
+	if (err)
+		goto fail_pm_metrics_init;
+
 	/* Do the initialisation of devfreq.
 	 * Devfreq needs backend_timer_init() for completion of its
 	 * initialisation and it also needs to catch the first callback
@@ -124,10 +128,16 @@ static int kbase_backend_late_init(struct kbase_device *kbdev)
 
 	init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
 
+	/* kbase_pm_context_idle is called after the boot of firmware */
+
 	return 0;
 
 fail_update_l2_features:
+	kbase_backend_devfreq_term(kbdev);
 fail_devfreq_init:
+	kbasep_pm_metrics_term(kbdev);
+fail_pm_metrics_init:
+	kbase_ipa_control_term(kbdev);
 
 #ifdef CONFIG_MALI_BIFROST_DEBUG
 #ifndef CONFIG_MALI_BIFROST_NO_MALI
@@ -137,6 +147,7 @@ fail_interrupt_test:
 
 	kbase_backend_timer_term(kbdev);
 fail_timer:
+	kbase_pm_context_idle(kbdev);
 	kbase_hwaccess_pm_halt(kbdev);
 fail_pm_powerup:
 	kbase_reset_gpu_term(kbdev);
@@ -153,11 +164,84 @@ fail_reset_gpu_init:
 static void kbase_backend_late_term(struct kbase_device *kbdev)
 {
 	kbase_backend_devfreq_term(kbdev);
+	kbasep_pm_metrics_term(kbdev);
+	kbase_ipa_control_term(kbdev);
 	kbase_hwaccess_pm_halt(kbdev);
 	kbase_reset_gpu_term(kbdev);
 	kbase_hwaccess_pm_term(kbdev);
 }
 
+/**
+ * kbase_device_hwcnt_backend_csf_if_init - Create hardware counter backend
+ *                                          firmware interface.
+ * @kbdev:	Device pointer
+ */
+static int kbase_device_hwcnt_backend_csf_if_init(struct kbase_device *kbdev)
+{
+	return kbase_hwcnt_backend_csf_if_fw_create(
+		kbdev, &kbdev->hwcnt_backend_csf_if_fw);
+}
+
+/**
+ * kbase_device_hwcnt_backend_csf_if_term - Terminate hardware counter backend
+ *                                          firmware interface.
+ * @kbdev:	Device pointer
+ */
+static void kbase_device_hwcnt_backend_csf_if_term(struct kbase_device *kbdev)
+{
+	kbase_hwcnt_backend_csf_if_fw_destroy(&kbdev->hwcnt_backend_csf_if_fw);
+}
+
+/**
+ * kbase_device_hwcnt_backend_csf_init - Create hardware counter backend.
+ * @kbdev:	Device pointer
+ */
+
+static int kbase_device_hwcnt_backend_csf_init(struct kbase_device *kbdev)
+{
+	return kbase_hwcnt_backend_csf_create(
+		&kbdev->hwcnt_backend_csf_if_fw,
+		KBASE_HWCNT_BACKEND_CSF_RING_BUFFER_COUNT,
+		&kbdev->hwcnt_gpu_iface);
+}
+
+/**
+ * kbase_device_hwcnt_backend_csf_term - Terminate hardware counter backend.
+ * @kbdev:	Device pointer
+ */
+static void kbase_device_hwcnt_backend_csf_term(struct kbase_device *kbdev)
+{
+	kbase_hwcnt_backend_csf_destroy(&kbdev->hwcnt_gpu_iface);
+}
+
+/**
+ * kbase_device_hwcnt_backend_csf_metadata_init - Initialize hardware counter
+ *                                                metadata.
+ * @kbdev:	Device pointer
+ */
+static int
+kbase_device_hwcnt_backend_csf_metadata_init(struct kbase_device *kbdev)
+{
+	/* For CSF GPUs, HWC metadata needs to query information from CSF
+	 * firmware, so the initialization of HWC metadata only can be called
+	 * after firmware initialized, but firmware initialization depends on
+	 * HWC backend initialization, so we need to separate HWC backend
+	 * metadata initialization from HWC backend initialization.
+	 */
+	return kbase_hwcnt_backend_csf_metadata_init(&kbdev->hwcnt_gpu_iface);
+}
+
+/**
+ * kbase_device_hwcnt_backend_csf_metadata_term - Terminate hardware counter
+ *                                                metadata.
+ * @kbdev:	Device pointer
+ */
+static void
+kbase_device_hwcnt_backend_csf_metadata_term(struct kbase_device *kbdev)
+{
+	kbase_hwcnt_backend_csf_metadata_term(&kbdev->hwcnt_gpu_iface);
+}
+
 static const struct kbase_device_init dev_init[] = {
 #ifdef CONFIG_MALI_BIFROST_NO_MALI
 	{kbase_gpu_device_create, kbase_gpu_device_destroy,
@@ -178,6 +262,8 @@ static const struct kbase_device_init dev_init[] = {
 			"Populating max frequency failed"},
 	{kbase_device_misc_init, kbase_device_misc_term,
 			"Miscellaneous device initialization failed"},
+	{kbase_device_pcm_dev_init, kbase_device_pcm_dev_term,
+			"Priority control manager initialization failed"},
 	{kbase_ctx_sched_init, kbase_ctx_sched_term,
 			"Context scheduler initialization failed"},
 	{kbase_mem_init, kbase_mem_term,
@@ -195,20 +281,26 @@ static const struct kbase_device_init dev_init[] = {
 	{kbase_clk_rate_trace_manager_init,
 			kbase_clk_rate_trace_manager_term,
 			"Clock rate trace manager initialization failed"},
-	{kbase_device_hwcnt_backend_jm_init,
-			kbase_device_hwcnt_backend_jm_term,
+	{kbase_device_hwcnt_backend_csf_if_init,
+			kbase_device_hwcnt_backend_csf_if_term,
+			"GPU hwcnt backend CSF interface creation failed"},
+	{kbase_device_hwcnt_backend_csf_init,
+			kbase_device_hwcnt_backend_csf_term,
 			"GPU hwcnt backend creation failed"},
 	{kbase_device_hwcnt_context_init, kbase_device_hwcnt_context_term,
 			"GPU hwcnt context initialization failed"},
+	{kbase_backend_late_init, kbase_backend_late_term,
+			"Late backend initialization failed"},
+	{kbase_device_csf_firmware_init, kbase_device_csf_firmware_term,
+			"Firmware initialization failed"},
+	{kbase_device_hwcnt_backend_csf_metadata_init,
+			kbase_device_hwcnt_backend_csf_metadata_term,
+			"GPU hwcnt backend metadata creation failed"},
 	{kbase_device_hwcnt_virtualizer_init,
 			kbase_device_hwcnt_virtualizer_term,
 			"GPU hwcnt virtualizer initialization failed"},
 	{kbase_device_vinstr_init, kbase_device_vinstr_term,
 			"Virtual instrumentation initialization failed"},
-	{kbase_backend_late_init, kbase_backend_late_term,
-			"Late backend initialization failed"},
-	{kbase_device_csf_firmware_init, kbase_device_csf_firmware_term,
-			"Firmware initialization failed"},
 #ifdef MALI_KBASE_BUILD
 	{kbase_device_debugfs_init, kbase_device_debugfs_term,
 			"DebugFS initialization failed"},
@@ -224,12 +316,10 @@ static const struct kbase_device_init dev_init[] = {
 	 * paragraph that starts with "Word of warning", currently the
 	 * second-last paragraph.
 	 */
-	{kbase_sysfs_init, kbase_sysfs_term, "SysFS group creation failed"},
+	{kbase_sysfs_init, kbase_sysfs_term,
+			"SysFS group creation failed"},
 	{kbase_device_misc_register, kbase_device_misc_deregister,
 			"Misc device registration failed"},
-#ifdef CONFIG_MALI_BUSLOG
-	{buslog_init, buslog_term, "Bus log client registration failed"},
-#endif
 	{kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer,
 			"GPU property population failed"},
 #endif
@@ -246,6 +336,7 @@ static void kbase_device_term_partial(struct kbase_device *kbdev,
 
 void kbase_device_term(struct kbase_device *kbdev)
 {
+	kbdev->csf.mali_file_inode = NULL;
 	kbase_device_term_partial(kbdev, ARRAY_SIZE(dev_init));
 	kbase_mem_halt(kbdev);
 }
diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c
index 97bcc1d23aa3..259e42a6d3ea 100644
--- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c
+++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c
@@ -1,12 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -17,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <mali_kbase.h>
@@ -72,7 +70,8 @@ static bool kbase_gpu_fault_interrupt(struct kbase_device *kbdev)
 			kbase_report_gpu_fault(kbdev, status, as_nr, as_valid);
 
 			dev_err(kbdev->dev, "GPU bus fault triggering gpu-reset ...\n");
-			if (kbase_prepare_to_reset_gpu(kbdev))
+			if (kbase_prepare_to_reset_gpu(
+				    kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
 				kbase_reset_gpu(kbdev);
 		} else {
 			/* Handle Bus fault */
@@ -108,11 +107,26 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 		kbase_csf_scheduler_spin_lock(kbdev, &flags);
-		if (!WARN_ON(!kbase_csf_scheduler_protected_mode_in_use(kbdev)))
+		if (!WARN_ON(!kbase_csf_scheduler_protected_mode_in_use(
+			    kbdev))) {
+			struct base_gpu_queue_group_error const
+				err_payload = { .error_type =
+							BASE_GPU_QUEUE_GROUP_ERROR_FATAL,
+						.payload = {
+							.fatal_group = {
+								.status =
+									GPU_EXCEPTION_TYPE_SW_FAULT_0,
+							} } };
+
 			scheduler->active_protm_grp->faulted = true;
+			kbase_csf_add_group_fatal_error(
+				scheduler->active_protm_grp, &err_payload);
+			kbase_event_wakeup(scheduler->active_protm_grp->kctx);
+		}
 		kbase_csf_scheduler_spin_unlock(kbdev, flags);
 
-		if (kbase_prepare_to_reset_gpu(kbdev))
+		if (kbase_prepare_to_reset_gpu(
+			    kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
 			kbase_reset_gpu(kbdev);
 	}
 
diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c
index a11d778071b5..33ebe0901fe2 100644
--- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c
+++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c
@@ -6,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -17,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <mali_kbase.h>
diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c
index 8e853eb82fa1..4f5de18d3571 100644
--- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c
+++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c
@@ -1,12 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -17,12 +17,11 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include "../mali_kbase_device_internal.h"
 #include "../mali_kbase_device.h"
+#include "../mali_kbase_hwaccess_instr.h"
 
 #include <mali_kbase_config_defaults.h>
 #include <mali_kbase_hwaccess_backend.h>
@@ -96,9 +95,6 @@ static int kbase_backend_late_init(struct kbase_device *kbdev)
 	if (err)
 		goto fail_devfreq_init;
 
-	/* Idle the GPU and/or cores, if the policy wants it to */
-	kbase_pm_context_idle(kbdev);
-
 	/* Update gpuprops with L2_FEATURES if applicable */
 	err = kbase_gpuprops_update_l2_features(kbdev);
 	if (err)
@@ -106,9 +102,13 @@ static int kbase_backend_late_init(struct kbase_device *kbdev)
 
 	init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
 
+	/* Idle the GPU and/or cores, if the policy wants it to */
+	kbase_pm_context_idle(kbdev);
+
 	return 0;
 
 fail_update_l2_features:
+	kbase_backend_devfreq_term(kbdev);
 fail_devfreq_init:
 	kbase_job_slot_term(kbdev);
 fail_job_slot:
@@ -121,6 +121,7 @@ fail_interrupt_test:
 
 	kbase_backend_timer_term(kbdev);
 fail_timer:
+	kbase_pm_context_idle(kbdev);
 	kbase_hwaccess_pm_halt(kbdev);
 fail_pm_powerup:
 	kbase_reset_gpu_term(kbdev);
@@ -145,6 +146,16 @@ static void kbase_backend_late_term(struct kbase_device *kbdev)
 	kbase_hwaccess_pm_term(kbdev);
 }
 
+static int kbase_device_hwcnt_backend_jm_init(struct kbase_device *kbdev)
+{
+	return kbase_hwcnt_backend_jm_create(kbdev, &kbdev->hwcnt_gpu_iface);
+}
+
+static void kbase_device_hwcnt_backend_jm_term(struct kbase_device *kbdev)
+{
+	kbase_hwcnt_backend_jm_destroy(&kbdev->hwcnt_gpu_iface);
+}
+
 static const struct kbase_device_init dev_init[] = {
 #ifdef CONFIG_MALI_BIFROST_NO_MALI
 	{kbase_gpu_device_create, kbase_gpu_device_destroy,
@@ -165,6 +176,8 @@ static const struct kbase_device_init dev_init[] = {
 			"Populating max frequency failed"},
 	{kbase_device_misc_init, kbase_device_misc_term,
 			"Miscellaneous device initialization failed"},
+	{kbase_device_pcm_dev_init, kbase_device_pcm_dev_term,
+			"Priority control manager initialization failed"},
 	{kbase_ctx_sched_init, kbase_ctx_sched_term,
 			"Context scheduler initialization failed"},
 	{kbase_mem_init, kbase_mem_term,
@@ -182,6 +195,8 @@ static const struct kbase_device_init dev_init[] = {
 	{kbase_clk_rate_trace_manager_init,
 			kbase_clk_rate_trace_manager_term,
 			"Clock rate trace manager initialization failed"},
+	{kbase_instr_backend_init, kbase_instr_backend_term,
+			"Instrumentation backend initialization failed"},
 	{kbase_device_hwcnt_backend_jm_init,
 			kbase_device_hwcnt_backend_jm_term,
 			"GPU hwcnt backend creation failed"},
@@ -214,9 +229,6 @@ static const struct kbase_device_init dev_init[] = {
 	{kbase_sysfs_init, kbase_sysfs_term, "SysFS group creation failed"},
 	{kbase_device_misc_register, kbase_device_misc_deregister,
 			"Misc device registration failed"},
-#ifdef CONFIG_MALI_BUSLOG
-	{buslog_init, buslog_term, "Bus log client registration failed"},
-#endif
 	{kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer,
 			"GPU property population failed"},
 #endif
@@ -253,7 +265,8 @@ int kbase_device_init(struct kbase_device *kbdev)
 	for (i = 0; i < ARRAY_SIZE(dev_init); i++) {
 		err = dev_init[i].init(kbdev);
 		if (err) {
-			dev_err(kbdev->dev, "%s error = %d\n",
+			if (err != -EPROBE_DEFER)
+				dev_err(kbdev->dev, "%s error = %d\n",
 						dev_init[i].err_mes, err);
 			kbase_device_term_partial(kbdev, i);
 			break;
diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device.c b/drivers/gpu/arm/bifrost/device/mali_kbase_device.c
index 76fb33a5e881..5e900d0fd0d4 100644
--- a/drivers/gpu/arm/bifrost/device/mali_kbase_device.c
+++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device.c
@@ -1,12 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -17,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
@@ -38,6 +36,7 @@
 #include <mali_kbase_hwaccess_instr.h>
 #include <mali_kbase_hw.h>
 #include <mali_kbase_config_defaults.h>
+#include <linux/priority_control_manager.h>
 
 #include <tl/mali_kbase_timeline.h>
 #include "mali_kbase_vinstr.h"
@@ -45,6 +44,7 @@
 #include "mali_kbase_hwcnt_virtualizer.h"
 
 #include "mali_kbase_device.h"
+#include "mali_kbase_device_internal.h"
 #include "backend/gpu/mali_kbase_pm_internal.h"
 #include "backend/gpu/mali_kbase_irq_internal.h"
 #include "mali_kbase_regs_history_debugfs.h"
@@ -106,6 +106,55 @@ static void kbase_device_all_as_term(struct kbase_device *kbdev)
 		kbase_mmu_as_term(kbdev, i);
 }
 
+int kbase_device_pcm_dev_init(struct kbase_device *const kbdev)
+{
+	int err = 0;
+
+#ifdef CONFIG_OF
+	struct device_node *prio_ctrl_node;
+
+	/* Check to see whether or not a platform specific priority control manager
+	 * is available.
+	 */
+	prio_ctrl_node = of_parse_phandle(kbdev->dev->of_node,
+			"priority-control-manager", 0);
+	if (!prio_ctrl_node) {
+		dev_info(kbdev->dev,
+			"No priority control manager is configured");
+	} else {
+		struct platform_device *const pdev =
+			of_find_device_by_node(prio_ctrl_node);
+
+		if (!pdev) {
+			dev_err(kbdev->dev,
+				"The configured priority control manager was not found");
+		} else {
+			struct priority_control_manager_device *pcm_dev =
+						platform_get_drvdata(pdev);
+			if (!pcm_dev) {
+				dev_info(kbdev->dev, "Priority control manager is not ready");
+				err = -EPROBE_DEFER;
+			} else if (!try_module_get(pcm_dev->owner)) {
+				dev_err(kbdev->dev, "Failed to get priority control manager module");
+				err = -ENODEV;
+			} else {
+				dev_info(kbdev->dev, "Priority control manager successfully loaded");
+				kbdev->pcm_dev = pcm_dev;
+			}
+		}
+		of_node_put(prio_ctrl_node);
+	}
+#endif /* CONFIG_OF */
+
+	return err;
+}
+
+void kbase_device_pcm_dev_term(struct kbase_device *const kbdev)
+{
+	if (kbdev->pcm_dev)
+		module_put(kbdev->pcm_dev->owner);
+}
+
 int kbase_device_misc_init(struct kbase_device * const kbdev)
 {
 	int err;
@@ -136,6 +185,7 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
 		}
 	}
 #endif /* CONFIG_ARM64 */
+
 	/* Get the list of workarounds for issues on the current HW
 	 * (identified by the GPU_ID register)
 	 */
@@ -152,11 +202,6 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
 	if (err)
 		goto fail;
 
-	/* On Linux 4.0+, dma coherency is determined from device tree */
-#if defined(CONFIG_ARM64) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
-	set_dma_ops(kbdev->dev, &noncoherent_swiotlb_dma_ops);
-#endif
-
 	/* Workaround a pre-3.13 Linux issue, where dma_mask is NULL when our
 	 * device structure was created by device-tree
 	 */
@@ -179,8 +224,6 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
 	if (err)
 		goto dma_set_mask_failed;
 
-	spin_lock_init(&kbdev->hwcnt.lock);
-
 	err = kbase_ktrace_init(kbdev);
 	if (err)
 		goto term_as;
@@ -191,18 +234,11 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
 
 	atomic_set(&kbdev->ctx_num, 0);
 
-	err = kbase_instr_backend_init(kbdev);
-	if (err)
-		goto term_trace;
-
 	kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD;
 
 	kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS;
 
-	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
-		kbdev->mmu_mode = kbase_mmu_mode_get_aarch64();
-	else
-		kbdev->mmu_mode = kbase_mmu_mode_get_lpae();
+	kbdev->mmu_mode = kbase_mmu_mode_get_aarch64();
 
 	mutex_init(&kbdev->kctx_list_lock);
 	INIT_LIST_HEAD(&kbdev->kctx_list);
@@ -210,8 +246,7 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
 	spin_lock_init(&kbdev->hwaccess_lock);
 
 	return 0;
-term_trace:
-	kbase_ktrace_term(kbdev);
+
 term_as:
 	kbase_device_all_as_term(kbdev);
 dma_set_mask_failed:
@@ -229,8 +264,6 @@ void kbase_device_misc_term(struct kbase_device *kbdev)
 	kbase_debug_assert_register_hook(NULL, NULL);
 #endif
 
-	kbase_instr_backend_term(kbdev);
-
 	kbase_ktrace_term(kbdev);
 
 	kbase_device_all_as_term(kbdev);
@@ -253,16 +286,6 @@ void kbase_increment_device_id(void)
 	kbase_dev_nr++;
 }
 
-int kbase_device_hwcnt_backend_jm_init(struct kbase_device *kbdev)
-{
-	return kbase_hwcnt_backend_jm_create(kbdev, &kbdev->hwcnt_gpu_iface);
-}
-
-void kbase_device_hwcnt_backend_jm_term(struct kbase_device *kbdev)
-{
-	kbase_hwcnt_backend_jm_destroy(&kbdev->hwcnt_gpu_iface);
-}
-
 int kbase_device_hwcnt_context_init(struct kbase_device *kbdev)
 {
 	return kbase_hwcnt_context_init(&kbdev->hwcnt_gpu_iface,
@@ -382,7 +405,14 @@ int kbase_device_early_init(struct kbase_device *kbdev)
 	/* We're done accessing the GPU registers for now. */
 	kbase_pm_register_access_disable(kbdev);
 
+#ifdef CONFIG_MALI_ARBITER_SUPPORT
+	if (kbdev->arb.arb_if)
+		err = kbase_arbiter_pm_install_interrupts(kbdev);
+	else
+		err = kbase_install_interrupts(kbdev);
+#else
 	err = kbase_install_interrupts(kbdev);
+#endif
 	if (err)
 		goto fail_interrupts;
 
diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device.h b/drivers/gpu/arm/bifrost/device/mali_kbase_device.h
index 33264bcc0464..3a774fc3f9b0 100644
--- a/drivers/gpu/arm/bifrost/device/mali_kbase_device.h
+++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
  * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <mali_kbase.h>
diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c b/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c
index 3a75c6c05cfa..4097296952d8 100644
--- a/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c
+++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c
@@ -6,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -17,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <mali_kbase.h>
diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device_internal.h b/drivers/gpu/arm/bifrost/device/mali_kbase_device_internal.h
index 54644582eac5..067f33ce0528 100644
--- a/drivers/gpu/arm/bifrost/device/mali_kbase_device_internal.h
+++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device_internal.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <mali_kbase.h>
@@ -43,9 +42,6 @@ void kbase_device_vinstr_term(struct kbase_device *kbdev);
 int kbase_device_timeline_init(struct kbase_device *kbdev);
 void kbase_device_timeline_term(struct kbase_device *kbdev);
 
-int kbase_device_hwcnt_backend_jm_init(struct kbase_device *kbdev);
-void kbase_device_hwcnt_backend_jm_term(struct kbase_device *kbdev);
-
 int kbase_device_hwcnt_context_init(struct kbase_device *kbdev);
 void kbase_device_hwcnt_context_term(struct kbase_device *kbdev);
 
diff --git a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_csf.c b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_csf.c
index f7e9b125ba8b..ab0e3264615c 100644
--- a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_csf.c
+++ b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_csf.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,10 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <mali_kbase.h>
-#include "csf/mali_gpu_csf_registers.h"
+#include <uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h>
 #include "../mali_kbase_gpu_fault.h"
 
 const char *kbase_gpu_exception_name(u32 const exception_code)
@@ -29,14 +28,14 @@ const char *kbase_gpu_exception_name(u32 const exception_code)
 	const char *e;
 
 	switch (exception_code) {
-	/* Command Stream exceptions */
+	/* CS exceptions */
 	case CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED:
 		e = "CS_RESOURCE_TERMINATED";
 		break;
 	case CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT:
 		e = "CS_INHERIT_FAULT";
 		break;
-	/* Command Stream fatal exceptions */
+	/* CS fatal exceptions */
 	case CS_FATAL_EXCEPTION_TYPE_CS_CONFIG_FAULT:
 		e = "CS_CONFIG_FAULT";
 		break;
diff --git a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_jm.c b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_jm.c
index 56f541516489..110e5b3244b7 100644
--- a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_jm.c
+++ b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_jm.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <mali_kbase.h>
diff --git a/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu.c b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu.c
index 3128db4cabfc..92aa59373bcd 100644
--- a/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu.c
+++ b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,11 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <mali_kbase.h>
 #include <mali_kbase_defs.h>
+#include <gpu/mali_kbase_gpu_fault.h>
 
 const char *kbase_gpu_access_type_name(u32 fault_status)
 {
diff --git a/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_fault.h b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_fault.h
index e63c3881a3ca..9f4dc66bed94 100644
--- a/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_fault.h
+++ b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_fault.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,15 +17,14 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_GPU_FAULT_H_
 #define _KBASE_GPU_FAULT_H_
 
-/** Returns the name associated with a Mali exception code
- *
+/**
+ * kbase_gpu_exception_name() -
+ * Returns the name associated with a Mali exception code
  * @exception_code: exception code
  *
  * This function is called from the interrupt handler when a GPU fault occurs.
diff --git a/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h
index d8066f43768b..8e5941ed3f3f 100644
--- a/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h
+++ b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,413 +17,17 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_GPU_REGMAP_H_
 #define _KBASE_GPU_REGMAP_H_
 
-#include "mali_kbase_gpu_coherency.h"
-#include "mali_kbase_gpu_id.h"
-#if MALI_USE_CSF
-#include "backend/mali_kbase_gpu_regmap_csf.h"
-#else
-#include "backend/mali_kbase_gpu_regmap_jm.h"
-#endif
-
-/* Begin Register Offsets */
-/* GPU control registers */
-
-#define GPU_CONTROL_BASE        0x0000
-#define GPU_CONTROL_REG(r)      (GPU_CONTROL_BASE + (r))
-#define GPU_ID                  0x000   /* (RO) GPU and revision identifier */
-#define L2_FEATURES             0x004   /* (RO) Level 2 cache features */
-#define TILER_FEATURES          0x00C   /* (RO) Tiler Features */
-#define MEM_FEATURES            0x010   /* (RO) Memory system features */
-#define MMU_FEATURES            0x014   /* (RO) MMU features */
-#define AS_PRESENT              0x018   /* (RO) Address space slots present */
-#define GPU_IRQ_RAWSTAT         0x020   /* (RW) */
-#define GPU_IRQ_CLEAR           0x024   /* (WO) */
-#define GPU_IRQ_MASK            0x028   /* (RW) */
-#define GPU_IRQ_STATUS          0x02C   /* (RO) */
-
-#define GPU_COMMAND             0x030   /* (WO) */
-#define GPU_STATUS              0x034   /* (RO) */
-
-#define GPU_DBGEN               (1 << 8)    /* DBGEN wire status */
-
-#define GPU_FAULTSTATUS         0x03C   /* (RO) GPU exception type and fault status */
-#define GPU_FAULTADDRESS_LO     0x040   /* (RO) GPU exception fault address, low word */
-#define GPU_FAULTADDRESS_HI     0x044   /* (RO) GPU exception fault address, high word */
-
-#define L2_CONFIG               0x048   /* (RW) Level 2 cache configuration */
-
-#define GROUPS_L2_COHERENT      (1 << 0) /* Cores groups are l2 coherent */
-#define SUPER_L2_COHERENT       (1 << 1) /* Shader cores within a core
-					  * supergroup are l2 coherent
-					  */
-
-#define PWR_KEY                 0x050   /* (WO) Power manager key register */
-#define PWR_OVERRIDE0           0x054   /* (RW) Power manager override settings */
-#define PWR_OVERRIDE1           0x058   /* (RW) Power manager override settings */
-
-#define CYCLE_COUNT_LO          0x090   /* (RO) Cycle counter, low word */
-#define CYCLE_COUNT_HI          0x094   /* (RO) Cycle counter, high word */
-#define TIMESTAMP_LO            0x098   /* (RO) Global time stamp counter, low word */
-#define TIMESTAMP_HI            0x09C   /* (RO) Global time stamp counter, high word */
-
-#define THREAD_MAX_THREADS      0x0A0   /* (RO) Maximum number of threads per core */
-#define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */
-#define THREAD_MAX_BARRIER_SIZE 0x0A8   /* (RO) Maximum threads waiting at a barrier */
-#define THREAD_FEATURES         0x0AC   /* (RO) Thread features */
-#define THREAD_TLS_ALLOC        0x310   /* (RO) Number of threads per core that TLS must be allocated for */
-
-#define TEXTURE_FEATURES_0      0x0B0   /* (RO) Support flags for indexed texture formats 0..31 */
-#define TEXTURE_FEATURES_1      0x0B4   /* (RO) Support flags for indexed texture formats 32..63 */
-#define TEXTURE_FEATURES_2      0x0B8   /* (RO) Support flags for indexed texture formats 64..95 */
-#define TEXTURE_FEATURES_3      0x0BC   /* (RO) Support flags for texture order */
-
-#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2))
-
-#define SHADER_PRESENT_LO       0x100   /* (RO) Shader core present bitmap, low word */
-#define SHADER_PRESENT_HI       0x104   /* (RO) Shader core present bitmap, high word */
-
-#define TILER_PRESENT_LO        0x110   /* (RO) Tiler core present bitmap, low word */
-#define TILER_PRESENT_HI        0x114   /* (RO) Tiler core present bitmap, high word */
-
-#define L2_PRESENT_LO           0x120   /* (RO) Level 2 cache present bitmap, low word */
-#define L2_PRESENT_HI           0x124   /* (RO) Level 2 cache present bitmap, high word */
-
-#define STACK_PRESENT_LO        0xE00   /* (RO) Core stack present bitmap, low word */
-#define STACK_PRESENT_HI        0xE04   /* (RO) Core stack present bitmap, high word */
-
-#define SHADER_READY_LO         0x140   /* (RO) Shader core ready bitmap, low word */
-#define SHADER_READY_HI         0x144   /* (RO) Shader core ready bitmap, high word */
-
-#define TILER_READY_LO          0x150   /* (RO) Tiler core ready bitmap, low word */
-#define TILER_READY_HI          0x154   /* (RO) Tiler core ready bitmap, high word */
-
-#define L2_READY_LO             0x160   /* (RO) Level 2 cache ready bitmap, low word */
-#define L2_READY_HI             0x164   /* (RO) Level 2 cache ready bitmap, high word */
-
-#define STACK_READY_LO          0xE10   /* (RO) Core stack ready bitmap, low word */
-#define STACK_READY_HI          0xE14   /* (RO) Core stack ready bitmap, high word */
-
-#define SHADER_PWRON_LO         0x180   /* (WO) Shader core power on bitmap, low word */
-#define SHADER_PWRON_HI         0x184   /* (WO) Shader core power on bitmap, high word */
-
-#define TILER_PWRON_LO          0x190   /* (WO) Tiler core power on bitmap, low word */
-#define TILER_PWRON_HI          0x194   /* (WO) Tiler core power on bitmap, high word */
-
-#define L2_PWRON_LO             0x1A0   /* (WO) Level 2 cache power on bitmap, low word */
-#define L2_PWRON_HI             0x1A4   /* (WO) Level 2 cache power on bitmap, high word */
-
-#define STACK_PWRON_LO          0xE20   /* (RO) Core stack power on bitmap, low word */
-#define STACK_PWRON_HI          0xE24   /* (RO) Core stack power on bitmap, high word */
-
-#define SHADER_PWROFF_LO        0x1C0   /* (WO) Shader core power off bitmap, low word */
-#define SHADER_PWROFF_HI        0x1C4   /* (WO) Shader core power off bitmap, high word */
-
-#define TILER_PWROFF_LO         0x1D0   /* (WO) Tiler core power off bitmap, low word */
-#define TILER_PWROFF_HI         0x1D4   /* (WO) Tiler core power off bitmap, high word */
-
-#define L2_PWROFF_LO            0x1E0   /* (WO) Level 2 cache power off bitmap, low word */
-#define L2_PWROFF_HI            0x1E4   /* (WO) Level 2 cache power off bitmap, high word */
-
-#define STACK_PWROFF_LO         0xE30   /* (RO) Core stack power off bitmap, low word */
-#define STACK_PWROFF_HI         0xE34   /* (RO) Core stack power off bitmap, high word */
-
-#define SHADER_PWRTRANS_LO      0x200   /* (RO) Shader core power transition bitmap, low word */
-#define SHADER_PWRTRANS_HI      0x204   /* (RO) Shader core power transition bitmap, high word */
-
-#define TILER_PWRTRANS_LO       0x210   /* (RO) Tiler core power transition bitmap, low word */
-#define TILER_PWRTRANS_HI       0x214   /* (RO) Tiler core power transition bitmap, high word */
-
-#define L2_PWRTRANS_LO          0x220   /* (RO) Level 2 cache power transition bitmap, low word */
-#define L2_PWRTRANS_HI          0x224   /* (RO) Level 2 cache power transition bitmap, high word */
-
-#define STACK_PWRTRANS_LO       0xE40   /* (RO) Core stack power transition bitmap, low word */
-#define STACK_PWRTRANS_HI       0xE44   /* (RO) Core stack power transition bitmap, high word */
-
-#define SHADER_PWRACTIVE_LO     0x240   /* (RO) Shader core active bitmap, low word */
-#define SHADER_PWRACTIVE_HI     0x244   /* (RO) Shader core active bitmap, high word */
-
-#define TILER_PWRACTIVE_LO      0x250   /* (RO) Tiler core active bitmap, low word */
-#define TILER_PWRACTIVE_HI      0x254   /* (RO) Tiler core active bitmap, high word */
-
-#define L2_PWRACTIVE_LO         0x260   /* (RO) Level 2 cache active bitmap, low word */
-#define L2_PWRACTIVE_HI         0x264   /* (RO) Level 2 cache active bitmap, high word */
-
-#define COHERENCY_FEATURES      0x300   /* (RO) Coherency features present */
-#define COHERENCY_ENABLE        0x304   /* (RW) Coherency enable */
-
-#define SHADER_CONFIG           0xF04   /* (RW) Shader core configuration (implementation-specific) */
-#define TILER_CONFIG            0xF08   /* (RW) Tiler core configuration (implementation-specific) */
-#define L2_MMU_CONFIG           0xF0C   /* (RW) L2 cache and MMU configuration (implementation-specific) */
-
-/* Job control registers */
-
-#define JOB_CONTROL_BASE        0x1000
-
-#define JOB_CONTROL_REG(r)      (JOB_CONTROL_BASE + (r))
-
-#define JOB_IRQ_RAWSTAT         0x000   /* Raw interrupt status register */
-#define JOB_IRQ_CLEAR           0x004   /* Interrupt clear register */
-#define JOB_IRQ_MASK            0x008   /* Interrupt mask register */
-#define JOB_IRQ_STATUS          0x00C   /* Interrupt status register */
-
-/* MMU control registers */
-
-#define MEMORY_MANAGEMENT_BASE  0x2000
-#define MMU_REG(r)              (MEMORY_MANAGEMENT_BASE + (r))
-
-#define MMU_IRQ_RAWSTAT         0x000   /* (RW) Raw interrupt status register */
-#define MMU_IRQ_CLEAR           0x004   /* (WO) Interrupt clear register */
-#define MMU_IRQ_MASK            0x008   /* (RW) Interrupt mask register */
-#define MMU_IRQ_STATUS          0x00C   /* (RO) Interrupt status register */
-
-#define MMU_AS0                 0x400   /* Configuration registers for address space 0 */
-#define MMU_AS1                 0x440   /* Configuration registers for address space 1 */
-#define MMU_AS2                 0x480   /* Configuration registers for address space 2 */
-#define MMU_AS3                 0x4C0   /* Configuration registers for address space 3 */
-#define MMU_AS4                 0x500   /* Configuration registers for address space 4 */
-#define MMU_AS5                 0x540   /* Configuration registers for address space 5 */
-#define MMU_AS6                 0x580   /* Configuration registers for address space 6 */
-#define MMU_AS7                 0x5C0   /* Configuration registers for address space 7 */
-#define MMU_AS8                 0x600   /* Configuration registers for address space 8 */
-#define MMU_AS9                 0x640   /* Configuration registers for address space 9 */
-#define MMU_AS10                0x680   /* Configuration registers for address space 10 */
-#define MMU_AS11                0x6C0   /* Configuration registers for address space 11 */
-#define MMU_AS12                0x700   /* Configuration registers for address space 12 */
-#define MMU_AS13                0x740   /* Configuration registers for address space 13 */
-#define MMU_AS14                0x780   /* Configuration registers for address space 14 */
-#define MMU_AS15                0x7C0   /* Configuration registers for address space 15 */
-
-/* MMU address space control registers */
-
-#define MMU_AS_REG(n, r)        (MMU_REG(MMU_AS0 + ((n) << 6)) + (r))
-
-#define AS_TRANSTAB_LO         0x00	/* (RW) Translation Table Base Address for address space n, low word */
-#define AS_TRANSTAB_HI         0x04	/* (RW) Translation Table Base Address for address space n, high word */
-#define AS_MEMATTR_LO          0x08	/* (RW) Memory attributes for address space n, low word. */
-#define AS_MEMATTR_HI          0x0C	/* (RW) Memory attributes for address space n, high word. */
-#define AS_LOCKADDR_LO         0x10	/* (RW) Lock region address for address space n, low word */
-#define AS_LOCKADDR_HI         0x14	/* (RW) Lock region address for address space n, high word */
-#define AS_COMMAND             0x18	/* (WO) MMU command register for address space n */
-#define AS_FAULTSTATUS         0x1C	/* (RO) MMU fault status register for address space n */
-#define AS_FAULTADDRESS_LO     0x20	/* (RO) Fault Address for address space n, low word */
-#define AS_FAULTADDRESS_HI     0x24	/* (RO) Fault Address for address space n, high word */
-#define AS_STATUS              0x28	/* (RO) Status flags for address space n */
-
-/* (RW) Translation table configuration for address space n, low word */
-#define AS_TRANSCFG_LO         0x30
-/* (RW) Translation table configuration for address space n, high word */
-#define AS_TRANSCFG_HI         0x34
-/* (RO) Secondary fault address for address space n, low word */
-#define AS_FAULTEXTRA_LO       0x38
-/* (RO) Secondary fault address for address space n, high word */
-#define AS_FAULTEXTRA_HI       0x3C
-
-/* End Register Offsets */
+#include <uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h>
 
 /* Include POWER_CHANGED_SINGLE in debug builds for use in irq latency test. */
 #ifdef CONFIG_MALI_BIFROST_DEBUG
+#undef GPU_IRQ_REG_ALL
 #define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE)
-#else /* CONFIG_MALI_BIFROST_DEBUG */
-#define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON)
 #endif /* CONFIG_MALI_BIFROST_DEBUG */
 
-/*
- * MMU_IRQ_RAWSTAT register values. Values are valid also for
- * MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers.
- */
-
-#define MMU_PAGE_FAULT_FLAGS    16
-
-/* Macros returning a bitmask to retrieve page fault or bus error flags from
- * MMU registers */
-#define MMU_PAGE_FAULT(n)       (1UL << (n))
-#define MMU_BUS_ERROR(n)        (1UL << ((n) + MMU_PAGE_FAULT_FLAGS))
-
-/*
- * Begin LPAE MMU TRANSTAB register values
- */
-#define AS_TRANSTAB_LPAE_ADDR_SPACE_MASK   0xfffff000
-#define AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED  (0u << 0)
-#define AS_TRANSTAB_LPAE_ADRMODE_IDENTITY  (1u << 1)
-#define AS_TRANSTAB_LPAE_ADRMODE_TABLE     (3u << 0)
-#define AS_TRANSTAB_LPAE_READ_INNER        (1u << 2)
-#define AS_TRANSTAB_LPAE_SHARE_OUTER       (1u << 4)
-
-#define AS_TRANSTAB_LPAE_ADRMODE_MASK      0x00000003
-
-/*
- * Begin AARCH64 MMU TRANSTAB register values
- */
-#define MMU_HW_OUTA_BITS 40
-#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4))
-
-/*
- * Begin MMU STATUS register values
- */
-#define AS_STATUS_AS_ACTIVE 0x01
-
-#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK                      (0x7<<3)
-#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT         (0x0<<3)
-#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT          (0x1<<3)
-#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT        (0x2<<3)
-#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG               (0x3<<3)
-#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT        (0x4<<3)
-#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT   (0x5<<3)
-
-#define AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0
-#define AS_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFF << AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT)
-#define AS_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \
-	(((reg_val)&AS_FAULTSTATUS_EXCEPTION_TYPE_MASK) >> AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT)
-#define AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_0 0xC0
-
-#define AS_FAULTSTATUS_ACCESS_TYPE_SHIFT 8
-#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3 << AS_FAULTSTATUS_ACCESS_TYPE_SHIFT)
-#define AS_FAULTSTATUS_ACCESS_TYPE_GET(reg_val) \
-	(((reg_val)&AS_FAULTSTATUS_ACCESS_TYPE_MASK) >> AS_FAULTSTATUS_ACCESS_TYPE_SHIFT)
-
-#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC       (0x0)
-#define AS_FAULTSTATUS_ACCESS_TYPE_EX           (0x1)
-#define AS_FAULTSTATUS_ACCESS_TYPE_READ         (0x2)
-#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE        (0x3)
-
-#define AS_FAULTSTATUS_SOURCE_ID_SHIFT 16
-#define AS_FAULTSTATUS_SOURCE_ID_MASK (0xFFFF << AS_FAULTSTATUS_SOURCE_ID_SHIFT)
-#define AS_FAULTSTATUS_SOURCE_ID_GET(reg_val) \
-	(((reg_val)&AS_FAULTSTATUS_SOURCE_ID_MASK) >> AS_FAULTSTATUS_SOURCE_ID_SHIFT)
-
-/*
- * Begin MMU TRANSCFG register values
- */
-#define AS_TRANSCFG_ADRMODE_LEGACY      0
-#define AS_TRANSCFG_ADRMODE_UNMAPPED    1
-#define AS_TRANSCFG_ADRMODE_IDENTITY    2
-#define AS_TRANSCFG_ADRMODE_AARCH64_4K  6
-#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8
-
-#define AS_TRANSCFG_ADRMODE_MASK        0xF
-
-/*
- * Begin TRANSCFG register values
- */
-#define AS_TRANSCFG_PTW_MEMATTR_MASK (3ull << 24)
-#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1ull << 24)
-#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2ull << 24)
-
-#define AS_TRANSCFG_PTW_SH_MASK ((3ull << 28))
-#define AS_TRANSCFG_PTW_SH_OS (2ull << 28)
-#define AS_TRANSCFG_PTW_SH_IS (3ull << 28)
-#define AS_TRANSCFG_R_ALLOCATE (1ull << 30)
-
-/*
- * Begin Command Values
- */
-
-/* AS_COMMAND register commands */
-#define AS_COMMAND_NOP         0x00	/* NOP Operation */
-#define AS_COMMAND_UPDATE      0x01	/* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */
-#define AS_COMMAND_LOCK        0x02	/* Issue a lock region command to all MMUs */
-#define AS_COMMAND_UNLOCK      0x03	/* Issue a flush region command to all MMUs */
-#define AS_COMMAND_FLUSH       0x04	/* Flush all L2 caches then issue a flush region command to all MMUs
-					   (deprecated - only for use with T60x) */
-#define AS_COMMAND_FLUSH_PT    0x04	/* Flush all L2 caches then issue a flush region command to all MMUs */
-#define AS_COMMAND_FLUSH_MEM   0x05	/* Wait for memory accesses to complete, flush all the L1s cache then
-					   flush all L2 caches then issue a flush region command to all MMUs */
-
-/* GPU_STATUS values */
-#define GPU_STATUS_PRFCNT_ACTIVE            (1 << 2)    /* Set if the performance counters are active. */
-#define GPU_STATUS_PROTECTED_MODE_ACTIVE    (1 << 7)    /* Set if protected mode is active */
-
-/* PRFCNT_CONFIG register values */
-#define PRFCNT_CONFIG_MODE_SHIFT        0 /* Counter mode position. */
-#define PRFCNT_CONFIG_AS_SHIFT          4 /* Address space bitmap position. */
-#define PRFCNT_CONFIG_SETSELECT_SHIFT   8 /* Set select position. */
-
-/* The performance counters are disabled. */
-#define PRFCNT_CONFIG_MODE_OFF          0
-/* The performance counters are enabled, but are only written out when a
- * PRFCNT_SAMPLE command is issued using the GPU_COMMAND register.
- */
-#define PRFCNT_CONFIG_MODE_MANUAL       1
-/* The performance counters are enabled, and are written out each time a tile
- * finishes rendering.
- */
-#define PRFCNT_CONFIG_MODE_TILE         2
-
-/* AS<n>_MEMATTR values from MMU_MEMATTR_STAGE1: */
-/* Use GPU implementation-defined caching policy. */
-#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull
-/* The attribute set to force all resources to be cached. */
-#define AS_MEMATTR_FORCE_TO_CACHE_ALL    0x8Full
-/* Inner write-alloc cache setup, no outer caching */
-#define AS_MEMATTR_WRITE_ALLOC           0x8Dull
-
-/* Use GPU implementation-defined  caching policy. */
-#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull
-/* The attribute set to force all resources to be cached. */
-#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL    0x4Full
-/* Inner write-alloc cache setup, no outer caching */
-#define AS_MEMATTR_LPAE_WRITE_ALLOC           0x4Dull
-/* Set to implementation defined, outer caching */
-#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF        0x88ull
-/* Set to write back memory, outer caching */
-#define AS_MEMATTR_LPAE_OUTER_WA              0x8Dull
-/* There is no LPAE support for non-cacheable, since the memory type is always
- * write-back.
- * Marking this setting as reserved for LPAE
- */
-#define AS_MEMATTR_LPAE_NON_CACHEABLE_RESERVED
-
-/* L2_MMU_CONFIG register */
-#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT       (23)
-#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY             (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT)
-
-/* End L2_MMU_CONFIG register */
-
-/* THREAD_* registers */
-
-/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */
-#define IMPLEMENTATION_UNSPECIFIED  0
-#define IMPLEMENTATION_SILICON      1
-#define IMPLEMENTATION_FPGA         2
-#define IMPLEMENTATION_MODEL        3
-
-/* Default values when registers are not supported by the implemented hardware */
-#define THREAD_MT_DEFAULT     256
-#define THREAD_MWS_DEFAULT    256
-#define THREAD_MBS_DEFAULT    256
-#define THREAD_MR_DEFAULT     1024
-#define THREAD_MTQ_DEFAULT    4
-#define THREAD_MTGS_DEFAULT   10
-
-/* End THREAD_* registers */
-
-/* SHADER_CONFIG register */
-#define SC_LS_ALLOW_ATTR_TYPES      (1ul << 16)
-#define SC_TLS_HASH_ENABLE          (1ul << 17)
-#define SC_LS_ATTR_CHECK_DISABLE    (1ul << 18)
-#define SC_VAR_ALGORITHM            (1ul << 29)
-/* End SHADER_CONFIG register */
-
-/* TILER_CONFIG register */
-#define TC_CLOCK_GATE_OVERRIDE      (1ul << 0)
-/* End TILER_CONFIG register */
-
-/* L2_CONFIG register */
-#define L2_CONFIG_SIZE_SHIFT        16
-#define L2_CONFIG_SIZE_MASK         (0xFFul << L2_CONFIG_SIZE_SHIFT)
-#define L2_CONFIG_HASH_SHIFT        24
-#define L2_CONFIG_HASH_MASK         (0xFFul << L2_CONFIG_HASH_SHIFT)
-/* End L2_CONFIG register */
-
-/* IDVS_GROUP register */
-#define IDVS_GROUP_SIZE_SHIFT (16)
-#define IDVS_GROUP_MAX_SIZE (0x3F)
-
 #endif /* _KBASE_GPU_REGMAP_H_ */
diff --git a/drivers/gpu/arm/bifrost/ipa/Kbuild b/drivers/gpu/arm/bifrost/ipa/Kbuild
index 04aa9d82d7c5..4faa325732c7 100644
--- a/drivers/gpu/arm/bifrost/ipa/Kbuild
+++ b/drivers/gpu/arm/bifrost/ipa/Kbuild
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 #
-# (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2016-2018, 2020 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,14 +16,20 @@
 # along with this program; if not, you can access it online at
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
-# SPDX-License-Identifier: GPL-2.0
-#
 #
 
 bifrost_kbase-y += \
 	ipa/mali_kbase_ipa_simple.o \
-	ipa/mali_kbase_ipa.o \
-	ipa/mali_kbase_ipa_vinstr_g7x.o \
-	ipa/mali_kbase_ipa_vinstr_common.o
+	ipa/mali_kbase_ipa.o
 
 bifrost_kbase-$(CONFIG_DEBUG_FS) += ipa/mali_kbase_ipa_debugfs.o
+
+ifeq ($(MALI_USE_CSF),1)
+	bifrost_kbase-y += \
+		ipa/backend/mali_kbase_ipa_counter_csf.o \
+		ipa/backend/mali_kbase_ipa_counter_common_csf.o
+else
+	bifrost_kbase-y += \
+		ipa/backend/mali_kbase_ipa_counter_jm.o \
+		ipa/backend/mali_kbase_ipa_counter_common_jm.o
+endif
diff --git a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_csf.c b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_csf.c
new file mode 100644
index 000000000000..89bba49a7c98
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_csf.c
@@ -0,0 +1,457 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "mali_kbase_ipa_counter_common_csf.h"
+#include "ipa/mali_kbase_ipa_debugfs.h"
+
+#define DEFAULT_SCALING_FACTOR 5
+
+/* If the value of GPU_ACTIVE is below this, use the simple model
+ * instead, to avoid extrapolating small amounts of counter data across
+ * large sample periods.
+ */
+#define DEFAULT_MIN_SAMPLE_CYCLES 10000
+
+/* Typical value for the sampling interval is expected to be less than 100ms,
+ * So 5 seconds is a reasonable upper limit for the time gap between the
+ * 2 samples.
+ */
+#define MAX_SAMPLE_INTERVAL_MS ((s64)5000)
+
+/* Maximum increment that is expected for a counter value during a sampling
+ * interval is derived assuming
+ * - max sampling interval of 1 second.
+ * - max GPU frequency of 2 GHz.
+ * - max number of cores as 32.
+ * - max increment of 4 in per core counter value at every clock cycle.
+ *
+ * So max increment = 2 * 10^9 * 32 * 4 = ~2^38.
+ * If a counter increases by an amount greater than this value, then an error
+ * will be returned and the simple power model will be used.
+ */
+#define MAX_COUNTER_INCREMENT (((u64)1 << 38) - 1)
+
+static inline s64 kbase_ipa_add_saturate(s64 a, s64 b)
+{
+	s64 rtn;
+
+	if (a > 0 && (S64_MAX - a) < b)
+		rtn = S64_MAX;
+	else if (a < 0 && (S64_MIN - a) > b)
+		rtn = S64_MIN;
+	else
+		rtn = a + b;
+
+	return rtn;
+}
+
+static s64 kbase_ipa_group_energy(s32 coeff, u64 counter_value)
+{
+	/* Range: 0 < counter_value < 2^38 */
+
+	/* Range: -2^59 < ret < 2^59 (as -2^21 < coeff < 2^21) */
+	return counter_value * (s64)coeff;
+}
+
+/**
+ * kbase_ipa_attach_ipa_control() - register with kbase_ipa_control
+ * @model_data: Pointer to counter model data
+ *
+ * Register IPA counter model as a client of kbase_ipa_control, which
+ * provides an interface to retreive the accumulated value of hardware
+ * counters to calculate energy consumption.
+ *
+ * Return: 0 on success, or an error code.
+ */
+static int
+kbase_ipa_attach_ipa_control(struct kbase_ipa_counter_model_data *model_data)
+{
+	struct kbase_device *kbdev = model_data->kbdev;
+	struct kbase_ipa_control_perf_counter *perf_counters;
+	u32 cnt_idx = 0;
+	int err;
+	size_t i;
+
+	/* Value for GPU_ACTIVE counter also needs to be queried. It is required
+	 * for the normalization of top-level and shader core counters.
+	 */
+	model_data->num_counters = 1 + model_data->num_top_level_cntrs +
+				   model_data->num_shader_cores_cntrs;
+
+	perf_counters = kcalloc(model_data->num_counters,
+				sizeof(*perf_counters), GFP_KERNEL);
+
+	if (!perf_counters) {
+		dev_err(kbdev->dev,
+			"Failed to allocate memory for perf_counters array");
+		return -ENOMEM;
+	}
+
+	/* Fill in the description for GPU_ACTIVE counter which is always
+	 * needed, as mentioned above, regardless of the energy model used
+	 * by the CSF GPUs.
+	 */
+	perf_counters[cnt_idx].type = KBASE_IPA_CORE_TYPE_CSHW;
+	perf_counters[cnt_idx].idx = GPU_ACTIVE_CNT_IDX;
+	perf_counters[cnt_idx].gpu_norm = false;
+	perf_counters[cnt_idx].scaling_factor = 1;
+	cnt_idx++;
+
+	for (i = 0; i < model_data->num_top_level_cntrs; ++i) {
+		const struct kbase_ipa_counter *counter =
+			&model_data->top_level_cntrs_def[i];
+
+		perf_counters[cnt_idx].type = counter->counter_block_type;
+		perf_counters[cnt_idx].idx = counter->counter_block_offset;
+		perf_counters[cnt_idx].gpu_norm = false;
+		perf_counters[cnt_idx].scaling_factor = 1;
+		cnt_idx++;
+	}
+
+	for (i = 0; i < model_data->num_shader_cores_cntrs; ++i) {
+		const struct kbase_ipa_counter *counter =
+			&model_data->shader_cores_cntrs_def[i];
+
+		perf_counters[cnt_idx].type = counter->counter_block_type;
+		perf_counters[cnt_idx].idx = counter->counter_block_offset;
+		perf_counters[cnt_idx].gpu_norm = false;
+		perf_counters[cnt_idx].scaling_factor = 1;
+		cnt_idx++;
+	}
+
+	err = kbase_ipa_control_register(kbdev, perf_counters,
+					 model_data->num_counters,
+					 &model_data->ipa_control_client);
+	if (err)
+		dev_err(kbdev->dev,
+			"Failed to register IPA with kbase_ipa_control");
+
+	kfree(perf_counters);
+	return err;
+}
+
+/**
+ * kbase_ipa_detach_ipa_control() - De-register from kbase_ipa_control.
+ * @model_data: Pointer to counter model data
+ */
+static void
+kbase_ipa_detach_ipa_control(struct kbase_ipa_counter_model_data *model_data)
+{
+	if (model_data->ipa_control_client) {
+		kbase_ipa_control_unregister(model_data->kbdev,
+					     model_data->ipa_control_client);
+		model_data->ipa_control_client = NULL;
+	}
+}
+
+static int calculate_coeff(struct kbase_ipa_counter_model_data *model_data,
+			   const struct kbase_ipa_counter *const cnt_defs,
+			   size_t num_counters, s32 *counter_coeffs,
+			   u64 *counter_values, u32 active_cycles, u32 *coeffp)
+{
+	u64 coeff = 0, coeff_mul = 0;
+	s64 total_energy = 0;
+	size_t i;
+
+	/* Range for the 'counter_value' is [0, 2^38)
+	 * Range for the 'coeff' is [-2^21, 2^21]
+	 * So range for the 'group_energy' is [-2^59, 2^59) and range for the
+	 * 'total_energy' is +/- 2^59 * number of IPA groups (~16), i.e.
+	 * [-2^63, 2^63).
+	 */
+	for (i = 0; i < num_counters; i++) {
+		s32 coeff = counter_coeffs[i];
+		u64 counter_value = counter_values[i];
+		s64 group_energy = kbase_ipa_group_energy(coeff, counter_value);
+
+		if (counter_value > MAX_COUNTER_INCREMENT) {
+			dev_warn(model_data->kbdev->dev,
+				 "Increment in counter %s more than expected",
+				 cnt_defs[i].name);
+			return -ERANGE;
+		}
+
+		total_energy =
+			kbase_ipa_add_saturate(total_energy, group_energy);
+	}
+
+	/* Range: 0 <= coeff < 2^63 */
+	if (total_energy >= 0)
+		coeff = total_energy;
+	else
+		dev_dbg(model_data->kbdev->dev,
+			"Energy value came negative as %lld", total_energy);
+
+	/* Range: 0 <= coeff < 2^63 (because active_cycles >= 1). However, this
+	 * can be constrained further: the value of counters that are being
+	 * used for dynamic power estimation can only increment by about 128
+	 * maximum per clock cycle. This is because max number of shader
+	 * cores is expected to be 32 (max number of L2 slices is expected to
+	 * be 8) and some counters (per shader core) like SC_BEATS_RD_TEX_EXT &
+	 * SC_EXEC_STARVE_ARITH can increment by 4 every clock cycle.
+	 * Each "beat" is defined as 128 bits and each shader core can
+	 * (currently) do 512 bits read and 512 bits write to/from the L2
+	 * cache per cycle, so the SC_BEATS_RD_TEX_EXT counter can increment
+	 * [0, 4] per shader core per cycle.
+	 * We can thus write the range of 'coeff' in terms of active_cycles:
+	 *
+	 * coeff = SUM(coeffN * counterN * num_cores_for_counterN)
+	 * coeff <= SUM(coeffN * counterN) * max_cores
+	 * coeff <= num_IPA_groups * max_coeff * max_counter * max_cores
+	 *       (substitute max_counter = 2^2 * active_cycles)
+	 * coeff <= num_IPA_groups * max_coeff * 2^2 * active_cycles * max_cores
+	 * coeff <=    2^4         *    2^21   * 2^2 * active_cycles * 2^5
+	 * coeff <= 2^32 * active_cycles
+	 *
+	 * So after the division: 0 <= coeff <= 2^32
+	 */
+	coeff = div_u64(coeff, active_cycles);
+
+	/* Not all models were derived at the same reference voltage. Voltage
+	 * scaling is done by multiplying by V^2, so we need to *divide* by
+	 * Vref^2 here.
+	 * Range: 0 <= coeff <= 2^35
+	 */
+	coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1));
+	/* Range: 0 <= coeff <= 2^38 */
+	coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1));
+
+	/* Scale by user-specified integer factor.
+	 * Range: 0 <= coeff_mul < 2^43
+	 */
+	coeff_mul = coeff * model_data->scaling_factor;
+
+	/* The power models have results with units
+	 * mW/(MHz V^2), i.e. nW/(Hz V^2). With precision of 1/1000000, this
+	 * becomes fW/(Hz V^2), which are the units of coeff_mul. However,
+	 * kbase_scale_dynamic_power() expects units of pW/(Hz V^2), so divide
+	 * by 1000.
+	 * Range: 0 <= coeff_mul < 2^33
+	 */
+	coeff_mul = div_u64(coeff_mul, 1000u);
+
+	/* Clamp to a sensible range - 2^16 gives about 14W at 400MHz/750mV */
+	*coeffp = clamp(coeff_mul, (u64)0, (u64)1 << 16);
+
+	return 0;
+}
+
+int kbase_ipa_counter_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp)
+{
+	struct kbase_ipa_counter_model_data *model_data =
+		(struct kbase_ipa_counter_model_data *)model->model_data;
+	struct kbase_device *kbdev = model->kbdev;
+	s32 *counter_coeffs_p = model_data->counter_coeffs;
+	u64 *cnt_values_p = model_data->counter_values;
+	const u64 num_counters = model_data->num_counters;
+	u32 active_cycles;
+	ktime_t now, diff;
+	s64 diff_ms;
+	int ret;
+
+	lockdep_assert_held(&kbdev->ipa.lock);
+
+	/* The last argument is supposed to be a pointer to the location that
+	 * will store the time for which GPU has been in protected mode since
+	 * last query. This can be passed as NULL as counter model itself will
+	 * not be used when GPU enters protected mode, as IPA is supposed to
+	 * switch to the simple power model.
+	 */
+	ret = kbase_ipa_control_query(kbdev,
+				      model_data->ipa_control_client,
+				      cnt_values_p, num_counters, NULL);
+	if (WARN_ON(ret))
+		return ret;
+
+	now = ktime_get();
+	diff = ktime_sub(now, kbdev->ipa.last_sample_time);
+	diff_ms = ktime_to_ms(diff);
+
+	kbdev->ipa.last_sample_time = now;
+
+	/* The counter values cannot be relied upon if the sampling interval was
+	 * too long. Typically this will happen when the polling is started
+	 * after the temperature has risen above a certain trip point. After
+	 * that regular calls every 25-100 ms interval are expected.
+	 */
+	if (diff_ms > MAX_SAMPLE_INTERVAL_MS) {
+		dev_dbg(kbdev->dev,
+			"Last sample was taken %lld milli seconds ago",
+			diff_ms);
+		return -EOVERFLOW;
+	}
+
+	/* Range: 0 (GPU not used at all), to the max sampling interval, say
+	 * 1 seconds, * max GPU frequency (GPU 100% utilized).
+	 * 0 <= active_cycles <= 1 * ~2GHz
+	 * 0 <= active_cycles < 2^31
+	 */
+	if (*cnt_values_p > U32_MAX) {
+		dev_warn(kbdev->dev,
+			 "Increment in GPU_ACTIVE counter more than expected");
+		return -ERANGE;
+	}
+
+	active_cycles = (u32)*cnt_values_p;
+
+	/* If the value of the active_cycles is less than the threshold, then
+	 * return an error so that IPA framework can approximate using the
+	 * cached simple model results instead. This may be more accurate
+	 * than extrapolating using a very small counter dump.
+	 */
+	if (active_cycles < (u32)max(model_data->min_sample_cycles, 0))
+		return -ENODATA;
+
+	/* Range: 1 <= active_cycles < 2^31 */
+	active_cycles = max(1u, active_cycles);
+
+	cnt_values_p++;
+	ret = calculate_coeff(model_data, model_data->top_level_cntrs_def,
+			      model_data->num_top_level_cntrs,
+			      counter_coeffs_p, cnt_values_p, active_cycles,
+			      &coeffp[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]);
+	if (ret)
+		return ret;
+
+	cnt_values_p += model_data->num_top_level_cntrs;
+	counter_coeffs_p += model_data->num_top_level_cntrs;
+	ret = calculate_coeff(model_data, model_data->shader_cores_cntrs_def,
+			      model_data->num_shader_cores_cntrs,
+			      counter_coeffs_p, cnt_values_p, active_cycles,
+			      &coeffp[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]);
+
+	return ret;
+}
+
+void kbase_ipa_counter_reset_data(struct kbase_ipa_model *model)
+{
+	struct kbase_ipa_counter_model_data *model_data =
+		(struct kbase_ipa_counter_model_data *)model->model_data;
+	u64 *cnt_values_p = model_data->counter_values;
+	const u64 num_counters = model_data->num_counters;
+	int ret;
+
+	lockdep_assert_held(&model->kbdev->ipa.lock);
+
+	ret = kbase_ipa_control_query(model->kbdev,
+				      model_data->ipa_control_client,
+				      cnt_values_p, num_counters, NULL);
+	WARN_ON(ret);
+}
+
+int kbase_ipa_counter_common_model_init(struct kbase_ipa_model *model,
+		const struct kbase_ipa_counter *top_level_cntrs_def,
+		size_t num_top_level_cntrs,
+		const struct kbase_ipa_counter *shader_cores_cntrs_def,
+		size_t num_shader_cores_cntrs,
+		s32 reference_voltage)
+{
+	struct kbase_ipa_counter_model_data *model_data;
+	s32 *counter_coeffs_p;
+	int err = 0;
+	size_t i;
+
+	if (!model || !top_level_cntrs_def || !shader_cores_cntrs_def ||
+	    !num_top_level_cntrs || !num_shader_cores_cntrs)
+		return -EINVAL;
+
+	model_data = kzalloc(sizeof(*model_data), GFP_KERNEL);
+	if (!model_data)
+		return -ENOMEM;
+
+	model_data->kbdev = model->kbdev;
+
+	model_data->top_level_cntrs_def = top_level_cntrs_def;
+	model_data->num_top_level_cntrs = num_top_level_cntrs;
+
+	model_data->shader_cores_cntrs_def = shader_cores_cntrs_def;
+	model_data->num_shader_cores_cntrs = num_shader_cores_cntrs;
+
+	model->model_data = (void *)model_data;
+
+	counter_coeffs_p = model_data->counter_coeffs;
+
+	for (i = 0; i < model_data->num_top_level_cntrs; ++i) {
+		const struct kbase_ipa_counter *counter =
+			&model_data->top_level_cntrs_def[i];
+
+		*counter_coeffs_p = counter->coeff_default_value;
+
+		err = kbase_ipa_model_add_param_s32(
+			model, counter->name, counter_coeffs_p, 1, false);
+		if (err)
+			goto exit;
+
+		counter_coeffs_p++;
+	}
+
+	for (i = 0; i < model_data->num_shader_cores_cntrs; ++i) {
+		const struct kbase_ipa_counter *counter =
+			&model_data->shader_cores_cntrs_def[i];
+
+		*counter_coeffs_p = counter->coeff_default_value;
+
+		err = kbase_ipa_model_add_param_s32(
+			model, counter->name, counter_coeffs_p, 1, false);
+		if (err)
+			goto exit;
+
+		counter_coeffs_p++;
+	}
+
+	model_data->scaling_factor = DEFAULT_SCALING_FACTOR;
+	err = kbase_ipa_model_add_param_s32(
+		model, "scale", &model_data->scaling_factor, 1, false);
+	if (err)
+		goto exit;
+
+	model_data->min_sample_cycles = DEFAULT_MIN_SAMPLE_CYCLES;
+	err = kbase_ipa_model_add_param_s32(model, "min_sample_cycles",
+					    &model_data->min_sample_cycles, 1,
+					    false);
+	if (err)
+		goto exit;
+
+	model_data->reference_voltage = reference_voltage;
+	err = kbase_ipa_model_add_param_s32(model, "reference_voltage",
+					    &model_data->reference_voltage, 1,
+					    false);
+	if (err)
+		goto exit;
+
+	err = kbase_ipa_attach_ipa_control(model_data);
+
+exit:
+	if (err) {
+		kbase_ipa_model_param_free_all(model);
+		kfree(model_data);
+	}
+	return err;
+}
+
+void kbase_ipa_counter_common_model_term(struct kbase_ipa_model *model)
+{
+	struct kbase_ipa_counter_model_data *model_data =
+		(struct kbase_ipa_counter_model_data *)model->model_data;
+
+	kbase_ipa_detach_ipa_control(model_data);
+	kfree(model_data);
+}
diff --git a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_csf.h b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_csf.h
new file mode 100644
index 000000000000..8e299314d7de
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_csf.h
@@ -0,0 +1,159 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_IPA_COUNTER_COMMON_CSF_H_
+#define _KBASE_IPA_COUNTER_COMMON_CSF_H_
+
+#include "mali_kbase.h"
+#include "csf/ipa_control/mali_kbase_csf_ipa_control.h"
+
+/* Maximum number of HW counters used by the IPA counter model. */
+#define KBASE_IPA_MAX_COUNTER_DEF_NUM 24
+
+struct kbase_ipa_counter_model_data;
+
+/**
+ * struct kbase_ipa_counter_model_data - IPA counter model context per device
+ * @kbdev:               Pointer to kbase device
+ * @ipa_control_client:  Handle returned on registering IPA counter model as a
+ *                       client of kbase_ipa_control.
+ * @top_level_cntrs_def: Array of description of HW counters used by the IPA
+ *                       counter model for top-level.
+ * @num_top_level_cntrs: Number of elements in @top_level_cntrs_def array.
+ * @shader_cores_cntrs_def: Array of description of HW counters used by the IPA
+ *                       counter model for shader cores.
+ * @num_shader_cores_cntrs: Number of elements in @shader_cores_cntrs_def array.
+ * @counter_coeffs:      Buffer to store coefficient value used for HW counters
+ * @counter_values:      Buffer to store the accumulated value of HW counters
+ *                       retreived from kbase_ipa_control.
+ * @num_counters:        Number of counters queried from kbase_ipa_control.
+ * @reference_voltage:   voltage, in mV, of the operating point used when
+ *                       deriving the power model coefficients. Range approx
+ *                       0.1V - 5V (~= 8V): 2^7 <= reference_voltage <= 2^13
+ * @scaling_factor:      User-specified power scaling factor. This is an
+ *                       integer, which is multiplied by the power coefficient
+ *                       just before OPP scaling.
+ *                       Range approx 0-32: 0 < scaling_factor < 2^5
+ * @min_sample_cycles:   If the value of the GPU_ACTIVE counter (the number of
+ *                       cycles the GPU was working) is less than
+ *                       min_sample_cycles, the counter model will return an
+ *                       error, causing the IPA framework to approximate using
+ *                       the cached simple model results instead. This may be
+ *                       more accurate than extrapolating using a very small
+ *                       counter dump.
+ */
+struct kbase_ipa_counter_model_data {
+	struct kbase_device *kbdev;
+	void *ipa_control_client;
+	const struct kbase_ipa_counter *top_level_cntrs_def;
+	size_t num_top_level_cntrs;
+	const struct kbase_ipa_counter *shader_cores_cntrs_def;
+	size_t num_shader_cores_cntrs;
+	s32 counter_coeffs[KBASE_IPA_MAX_COUNTER_DEF_NUM];
+	u64 counter_values[KBASE_IPA_MAX_COUNTER_DEF_NUM];
+	u64 num_counters;
+	s32 reference_voltage;
+	s32 scaling_factor;
+	s32 min_sample_cycles;
+};
+
+/**
+ * struct kbase_ipa_counter - represents a single HW counter used by IPA model
+ * @name:                 Name of the HW counter used by IPA counter model
+ *                        for energy estimation.
+ * @coeff_default_value:  Default value of coefficient for the counter.
+ *                        Coefficients are interpreted as fractions where the
+ *                        denominator is 1000000.
+ * @counter_block_offset: Index to the counter within the counter block of
+ *                        type @counter_block_type.
+ * @counter_block_type:   Type of the counter block.
+ */
+struct kbase_ipa_counter {
+	const char *name;
+	s32 coeff_default_value;
+	u32 counter_block_offset;
+	enum kbase_ipa_core_type counter_block_type;
+};
+
+/**
+ * kbase_ipa_counter_dynamic_coeff() - calculate dynamic power based on HW counters
+ * @model:		pointer to instantiated model
+ * @coeffp:		pointer to location where calculated power, in
+ *			pW/(Hz V^2), is stored for top level and shader cores.
+ *
+ * This is a GPU-agnostic implementation of the get_dynamic_coeff()
+ * function of an IPA model. It relies on the model being populated
+ * with GPU-specific attributes at initialization time.
+ *
+ * Return: 0 on success, or an error code.
+ */
+int kbase_ipa_counter_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp);
+
+/**
+ * kbase_ipa_counter_reset_data() - Reset the counters data used for dynamic
+ *                                  power estimation
+ * @model:		pointer to instantiated model
+ *
+ * Retrieve the accumulated value of HW counters from the kbase_ipa_control
+ * component, without doing any processing, which is effectively a reset as the
+ * next call to kbase_ipa_counter_dynamic_coeff() will see the increment in
+ * counter values from this point onwards.
+ */
+void kbase_ipa_counter_reset_data(struct kbase_ipa_model *model);
+
+/**
+ * kbase_ipa_counter_common_model_init() - initialize ipa power model
+ * @model:		 Pointer to the ipa power model to initialize
+ * @top_level_cntrs_def: Array corresponding to the HW counters used in the
+ *                       top level counter model, contains the counter index,
+ *                       default value of the coefficient.
+ * @num_top_level_cntrs: Number of elements in the array @top_level_cntrs_def
+ * @shader_cores_cntrs_def: Array corresponding to the HW counters used in the
+ *                       shader cores counter model, contains the counter index,
+ *                       default value of the coefficient.
+ * @num_shader_cores_cntrs: Number of elements in the array
+ *                          @shader_cores_cntrs_def.
+ * @reference_voltage:   voltage, in mV, of the operating point used when
+ *                       deriving the power model coefficients.
+ *
+ * This function performs initialization steps common for ipa counter based
+ * model of all CSF GPUs. The set of counters and their respective weights
+ * could be different for each GPU. The tuple of counter index and weight
+ * is passed via  @top_level_cntrs_def and @shader_cores_cntrs_def array.
+ *
+ * Return: 0 on success, error code otherwise
+ */
+int kbase_ipa_counter_common_model_init(struct kbase_ipa_model *model,
+		const struct kbase_ipa_counter *top_level_cntrs_def,
+		size_t num_top_level_cntrs,
+		const struct kbase_ipa_counter *shader_cores_cntrs_def,
+		size_t num_shader_cores_cntrs,
+		s32 reference_voltage);
+/**
+ * kbase_ipa_counter_common_model_term() - terminate ipa power model
+ * @model: ipa power model to terminate
+ *
+ * This function performs all necessary steps to terminate ipa power model
+ * including clean up of resources allocated to hold model data.
+ */
+void kbase_ipa_counter_common_model_term(struct kbase_ipa_model *model);
+
+#endif /* _KBASE_IPA_COUNTER_COMMON_CSF_H_ */
diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_common.c b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_jm.c
similarity index 97%
rename from drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_common.c
rename to drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_jm.c
index 702db1623101..076dcd0266df 100644
--- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_common.c
+++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_jm.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2017-2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,10 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-#include "mali_kbase_ipa_vinstr_common.h"
-#include "mali_kbase_ipa_debugfs.h"
+#include "mali_kbase_ipa_counter_common_jm.h"
+#include "ipa/mali_kbase_ipa_debugfs.h"
 
 #define DEFAULT_SCALING_FACTOR 5
 
@@ -273,6 +272,12 @@ err0:
 	return err;
 }
 
+void kbase_ipa_vinstr_reset_data(struct kbase_ipa_model *model)
+{
+	/* Currently not implemented */
+	WARN_ON_ONCE(1);
+}
+
 int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model,
 				       const struct kbase_ipa_group *ipa_groups_def,
 				       size_t ipa_group_size,
diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_common.h b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_jm.h
similarity index 88%
rename from drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_common.h
rename to drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_jm.h
index 46e3cd4bc6e1..24602beed3d2 100644
--- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_common.h
+++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_jm.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2018, 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,10 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-#ifndef _KBASE_IPA_VINSTR_COMMON_H_
-#define _KBASE_IPA_VINSTR_COMMON_H_
+#ifndef _KBASE_IPA_COUNTER_COMMON_JM_H_
+#define _KBASE_IPA_COUNTER_COMMON_JM_H_
 
 #include "mali_kbase.h"
 #include "mali_kbase_hwcnt_virtualizer.h"
@@ -47,6 +46,7 @@ typedef u32 (*kbase_ipa_get_active_cycles_callback)(struct kbase_ipa_model_vinst
 /**
  * struct kbase_ipa_model_vinstr_data - IPA context per device
  * @kbdev:               pointer to kbase device
+ * @group_values:        values of coefficients for IPA groups
  * @groups_def:          Array of IPA groups.
  * @groups_def_num:      Number of elements in the array of IPA groups.
  * @get_active_cycles:   Callback to return number of active cycles during
@@ -102,7 +102,7 @@ struct kbase_ipa_group {
  * @model_data:		pointer to model data
  * @coeff:		model coefficient. Unity is ~2^20, so range approx
  *			+/- 4.0: -2^22 < coeff < 2^22
- * @counter		offset in bytes of the counter used to calculate energy
+ * @counter:		offset in bytes of the counter used to calculate energy
  *			for IPA group
  *
  * Calculate energy estimation based on hardware counter `counter'
@@ -149,7 +149,7 @@ s64 kbase_ipa_single_counter(
 
 /**
  * attach_vinstr() - attach a vinstr_buffer to an IPA model.
- * @model_data		pointer to model data
+ * @model_data:		pointer to model data
  *
  * Attach a vinstr_buffer to an IPA model. The vinstr_buffer
  * allows access to the hardware counters used to calculate
@@ -161,7 +161,7 @@ int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data);
 
 /**
  * detach_vinstr() - detach a vinstr_buffer from an IPA model.
- * @model_data		pointer to model data
+ * @model_data:		pointer to model data
  *
  * Detach a vinstr_buffer from an IPA model.
  */
@@ -181,6 +181,19 @@ void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data);
  */
 int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp);
 
+/**
+ * kbase_ipa_vinstr_reset_data() - Reset the counters data used for dynamic
+ *                                 power estimation
+ * @model:		pointer to instantiated model
+ *
+ * Currently it is not implemented for JM GPUs.
+ * When implemented it is expected to retrieve the accumulated value of HW
+ * counters from the Vinstr component, without doing any processing, which is
+ * effectively a reset as the next call to kbase_ipa_counter_dynamic_coeff()
+ * will see the increment in counter values from this point onwards.
+ */
+void kbase_ipa_vinstr_reset_data(struct kbase_ipa_model *model);
+
 /**
  * kbase_ipa_vinstr_common_model_init() - initialize ipa power model
  * @model:		ipa power model to initialize
@@ -214,4 +227,4 @@ int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model,
  */
 void kbase_ipa_vinstr_common_model_term(struct kbase_ipa_model *model);
 
-#endif /* _KBASE_IPA_VINSTR_COMMON_H_ */
+#endif /* _KBASE_IPA_COUNTER_COMMON_JM_H_ */
diff --git a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_csf.c b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_csf.c
new file mode 100644
index 000000000000..9326b0195544
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_csf.c
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "mali_kbase_ipa_counter_common_csf.h"
+#include "mali_kbase.h"
+
+/* MEMSYS counter block offsets */
+#define L2_RD_MSG_IN            (16)
+#define L2_EXT_WRITE_NOSNP_FULL (43)
+
+/* SC counter block offsets */
+#define FRAG_QUADS_EZS_UPDATE   (13)
+#define EXEC_INSTR_FMA          (27)
+#define TEX_FILT_NUM_OPS        (39)
+#define LS_MEM_READ_SHORT       (45)
+#define LS_MEM_WRITE_SHORT      (47)
+#define VARY_SLOT_16            (51)
+
+/* Tiler counter block offsets */
+#define IDVS_POS_SHAD_STALL     (23)
+#define PREFETCH_STALL          (25)
+#define VFETCH_POS_READ_WAIT    (29)
+#define IDVS_VAR_SHAD_STALL     (38)
+
+#define COUNTER_DEF(cnt_name, coeff, cnt_idx, block_type)	\
+	{							\
+		.name = cnt_name,				\
+		.coeff_default_value = coeff,			\
+		.counter_block_offset = cnt_idx,		\
+		.counter_block_type = block_type,		\
+	}
+
+#define CSHW_COUNTER_DEF(cnt_name, coeff, cnt_idx)	\
+	COUNTER_DEF(cnt_name, coeff, cnt_idx, KBASE_IPA_CORE_TYPE_CSHW)
+
+#define MEMSYS_COUNTER_DEF(cnt_name, coeff, cnt_idx)	\
+	COUNTER_DEF(cnt_name, coeff, cnt_idx, KBASE_IPA_CORE_TYPE_MEMSYS)
+
+#define SC_COUNTER_DEF(cnt_name, coeff, cnt_idx)	\
+	COUNTER_DEF(cnt_name, coeff, cnt_idx, KBASE_IPA_CORE_TYPE_SHADER)
+
+#define TILER_COUNTER_DEF(cnt_name, coeff, cnt_idx)	\
+	COUNTER_DEF(cnt_name, coeff, cnt_idx, KBASE_IPA_CORE_TYPE_TILER)
+
+/* Table of description of HW counters used by IPA counter model.
+ *
+ * This table provides a description of each performance counter
+ * used by the top level counter model for energy estimation.
+ */
+static const struct kbase_ipa_counter ipa_top_level_cntrs_def_todx[] = {
+	MEMSYS_COUNTER_DEF("l2_rd_msg_in", 295631, L2_RD_MSG_IN),
+	MEMSYS_COUNTER_DEF("l2_ext_write_nosnp_ull", 325168, L2_EXT_WRITE_NOSNP_FULL),
+
+	TILER_COUNTER_DEF("prefetch_stall", 145435, PREFETCH_STALL),
+	TILER_COUNTER_DEF("idvs_var_shad_stall", -171917, IDVS_VAR_SHAD_STALL),
+	TILER_COUNTER_DEF("idvs_pos_shad_stall", 109980, IDVS_POS_SHAD_STALL),
+	TILER_COUNTER_DEF("vfetch_pos_read_wait", -119118, VFETCH_POS_READ_WAIT),
+};
+
+ /* This table provides a description of each performance counter
+  * used by the shader cores counter model for energy estimation.
+  */
+ static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_todx[] = {
+	SC_COUNTER_DEF("exec_instr_fma", 505449, EXEC_INSTR_FMA),
+	SC_COUNTER_DEF("tex_filt_num_operations", 574869, TEX_FILT_NUM_OPS),
+	SC_COUNTER_DEF("ls_mem_read_short", 60917, LS_MEM_READ_SHORT),
+	SC_COUNTER_DEF("frag_quads_ezs_update", 694555, FRAG_QUADS_EZS_UPDATE),
+	SC_COUNTER_DEF("ls_mem_write_short", 698290, LS_MEM_WRITE_SHORT),
+	SC_COUNTER_DEF("vary_slot_16", 181069, VARY_SLOT_16),
+};
+
+#define IPA_POWER_MODEL_OPS(gpu, init_token) \
+	const struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \
+		.name = "mali-" #gpu "-power-model", \
+		.init = kbase_ ## init_token ## _power_model_init, \
+		.term = kbase_ipa_counter_common_model_term, \
+		.get_dynamic_coeff = kbase_ipa_counter_dynamic_coeff, \
+		.reset_counter_data = kbase_ipa_counter_reset_data, \
+	}; \
+	KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops)
+
+#define STANDARD_POWER_MODEL(gpu, reference_voltage) \
+	static int kbase_ ## gpu ## _power_model_init(\
+			struct kbase_ipa_model *model) \
+	{ \
+		BUILD_BUG_ON((1 + \
+			      ARRAY_SIZE(ipa_top_level_cntrs_def_ ## gpu) +\
+			      ARRAY_SIZE(ipa_shader_core_cntrs_def_ ## gpu)) > \
+			      KBASE_IPA_MAX_COUNTER_DEF_NUM); \
+		return kbase_ipa_counter_common_model_init(model, \
+			ipa_top_level_cntrs_def_ ## gpu, \
+			ARRAY_SIZE(ipa_top_level_cntrs_def_ ## gpu), \
+			ipa_shader_core_cntrs_def_ ## gpu, \
+			ARRAY_SIZE(ipa_shader_core_cntrs_def_ ## gpu), \
+			(reference_voltage)); \
+	} \
+	IPA_POWER_MODEL_OPS(gpu, gpu)
+
+
+#define ALIAS_POWER_MODEL(gpu, as_gpu) \
+	IPA_POWER_MODEL_OPS(gpu, as_gpu)
+
+/* Reference voltage value is 750 mV.
+ */
+STANDARD_POWER_MODEL(todx, 750);
+
+/* Assuming LODX is an alias of TODX for IPA */
+ALIAS_POWER_MODEL(lodx, todx);
+
+static const struct kbase_ipa_model_ops *ipa_counter_model_ops[] = {
+	&kbase_todx_ipa_model_ops,
+	&kbase_lodx_ipa_model_ops
+};
+
+const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find(
+		struct kbase_device *kbdev, const char *name)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ipa_counter_model_ops); ++i) {
+		const struct kbase_ipa_model_ops *ops =
+			ipa_counter_model_ops[i];
+
+		if (!strcmp(ops->name, name))
+			return ops;
+	}
+
+	dev_err(kbdev->dev, "power model \'%s\' not found\n", name);
+
+	return NULL;
+}
+
+const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id)
+{
+	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
+			GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) {
+	case GPU_ID2_PRODUCT_TODX:
+		return "mali-todx-power-model";
+	case GPU_ID2_PRODUCT_LODX:
+		return "mali-lodx-power-model";
+	default:
+		return NULL;
+	}
+}
diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_g7x.c b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c
similarity index 86%
rename from drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_g7x.c
rename to drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c
index 83174eb66ded..8d33b139169f 100644
--- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_g7x.c
+++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2016-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,14 +17,15 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
+
 #include <linux/thermal.h>
 
-#include "mali_kbase_ipa_vinstr_common.h"
+#include "mali_kbase_ipa_counter_common_jm.h"
 #include "mali_kbase.h"
-
+#ifdef CONFIG_MALI_BIFROST_NO_MALI
+#include <backend/gpu/mali_kbase_model_dummy.h>
+#endif
 
 /* Performance counter blocks base offsets */
 #define JM_BASE             (0 * KBASE_IPA_NR_BYTES_PER_BLOCK)
@@ -94,10 +96,15 @@ static u32 kbase_g7x_power_model_get_memsys_counter(struct kbase_ipa_model_vinst
 static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_data *model_data,
 						u32 counter_block_offset)
 {
+#ifdef CONFIG_MALI_BIFROST_NO_MALI
+	const u32 sc_base = MEMSYS_BASE +
+		(KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS *
+		 KBASE_IPA_NR_BYTES_PER_BLOCK);
+#else
 	const u32 sc_base = MEMSYS_BASE +
 		(model_data->kbdev->gpu_props.props.l2_props.num_l2_slices *
 		 KBASE_IPA_NR_BYTES_PER_BLOCK);
-
+#endif
 	return sc_base + counter_block_offset;
 }
 
@@ -178,7 +185,7 @@ static u32 kbase_g7x_get_active_cycles(
 	return kbase_ipa_single_counter(model_data, 1, counter);
 }
 
-/** Table of IPA group definitions.
+/* Table of IPA group definitions.
  *
  * For each IPA group, this table defines a function to access the given performance block counter (or counters,
  * if the operation needs to be iterated on multiple blocks) and calculate energy estimation.
@@ -455,6 +462,7 @@ static const struct kbase_ipa_group ipa_groups_def_tbax[] = {
 		.init = kbase_ ## init_token ## _power_model_init, \
 		.term = kbase_ipa_vinstr_common_model_term, \
 		.get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \
+		.reset_counter_data = kbase_ipa_vinstr_reset_data, \
 	}; \
 	KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops)
 
@@ -488,3 +496,68 @@ STANDARD_POWER_MODEL(tbax, 1000);
 ALIAS_POWER_MODEL(g52, g76);
 /* tnax is an alias of g77 (TTRX) for IPA */
 ALIAS_POWER_MODEL(tnax, g77);
+
+static const struct kbase_ipa_model_ops *ipa_counter_model_ops[] = {
+	&kbase_g71_ipa_model_ops,
+	&kbase_g72_ipa_model_ops,
+	&kbase_g76_ipa_model_ops,
+	&kbase_g52_ipa_model_ops,
+	&kbase_g52_r1_ipa_model_ops,
+	&kbase_g51_ipa_model_ops,
+	&kbase_g77_ipa_model_ops,
+	&kbase_tnax_ipa_model_ops,
+	&kbase_tbex_ipa_model_ops,
+	&kbase_tbax_ipa_model_ops
+};
+
+const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find(
+		struct kbase_device *kbdev, const char *name)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ipa_counter_model_ops); ++i) {
+		const struct kbase_ipa_model_ops *ops =
+			ipa_counter_model_ops[i];
+
+		if (!strcmp(ops->name, name))
+			return ops;
+	}
+
+	dev_err(kbdev->dev, "power model \'%s\' not found\n", name);
+
+	return NULL;
+}
+
+const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id)
+{
+	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
+			GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) {
+	case GPU_ID2_PRODUCT_TMIX:
+		return "mali-g71-power-model";
+	case GPU_ID2_PRODUCT_THEX:
+		return "mali-g72-power-model";
+	case GPU_ID2_PRODUCT_TNOX:
+		return "mali-g76-power-model";
+	case GPU_ID2_PRODUCT_TSIX:
+		return "mali-g51-power-model";
+	case GPU_ID2_PRODUCT_TGOX:
+		if ((gpu_id & GPU_ID2_VERSION_MAJOR) ==
+				(0 << GPU_ID2_VERSION_MAJOR_SHIFT))
+			/* g52 aliased to g76 power-model's ops */
+			return "mali-g52-power-model";
+		else
+			return "mali-g52_r1-power-model";
+	case GPU_ID2_PRODUCT_TNAX:
+		return "mali-tnax-power-model";
+	case GPU_ID2_PRODUCT_TTRX:
+		return "mali-g77-power-model";
+	case GPU_ID2_PRODUCT_TBEX:
+		return "mali-tbex-power-model";
+	case GPU_ID2_PRODUCT_TBAX:
+		return "mali-tbax-power-model";
+	default:
+		return NULL;
+	}
+}
diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c
index 67adb65306dd..e86a94317b0b 100644
--- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c
+++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2016-2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,9 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
+
 #include <linux/thermal.h>
 #include <linux/devfreq_cooling.h>
 #include <linux/of.h>
@@ -27,31 +27,19 @@
 #include "mali_kbase_ipa_debugfs.h"
 #include "mali_kbase_ipa_simple.h"
 #include "backend/gpu/mali_kbase_pm_internal.h"
-
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0))
+#include "backend/gpu/mali_kbase_devfreq.h"
 #include <linux/pm_opp.h>
-#else
-#include <linux/opp.h>
-#define dev_pm_opp_find_freq_exact opp_find_freq_exact
-#define dev_pm_opp_get_voltage opp_get_voltage
-#define dev_pm_opp opp
-#endif
 
 #define KBASE_IPA_FALLBACK_MODEL_NAME "mali-simple-power-model"
 
-static const struct kbase_ipa_model_ops *kbase_ipa_all_model_ops[] = {
-	&kbase_simple_ipa_model_ops,
-	&kbase_g71_ipa_model_ops,
-	&kbase_g72_ipa_model_ops,
-	&kbase_g76_ipa_model_ops,
-	&kbase_g52_ipa_model_ops,
-	&kbase_g52_r1_ipa_model_ops,
-	&kbase_g51_ipa_model_ops,
-	&kbase_g77_ipa_model_ops,
-	&kbase_tnax_ipa_model_ops,
-	&kbase_tbex_ipa_model_ops,
-	&kbase_tbax_ipa_model_ops
-};
+/* Polling by thermal governor starts when the temperature exceeds the certain
+ * trip point. In order to have meaningful value for the counters, when the
+ * polling starts and first call to kbase_get_real_power() is made, it is
+ * required to reset the counter values every now and then.
+ * It is reasonable to do the reset every second if no polling is being done,
+ * the counter model implementation also assumes max sampling interval of 1 sec.
+ */
+#define RESET_INTERVAL_MS ((s64)1000)
 
 int kbase_ipa_model_recalculate(struct kbase_ipa_model *model)
 {
@@ -72,55 +60,24 @@ int kbase_ipa_model_recalculate(struct kbase_ipa_model *model)
 }
 
 const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev,
-							    const char *name)
+							   const char *name)
 {
-	int i;
+	if (!strcmp(name, kbase_simple_ipa_model_ops.name))
+		return &kbase_simple_ipa_model_ops;
 
-	for (i = 0; i < ARRAY_SIZE(kbase_ipa_all_model_ops); ++i) {
-		const struct kbase_ipa_model_ops *ops = kbase_ipa_all_model_ops[i];
-
-		if (!strcmp(ops->name, name))
-			return ops;
-	}
-
-	dev_err(kbdev->dev, "power model \'%s\' not found\n", name);
-
-	return NULL;
+	return kbase_ipa_counter_model_ops_find(kbdev, name);
 }
 KBASE_EXPORT_TEST_API(kbase_ipa_model_ops_find);
 
 const char *kbase_ipa_model_name_from_id(u32 gpu_id)
 {
-	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
-			GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	const char* model_name =
+		kbase_ipa_counter_model_name_from_id(gpu_id);
 
-	switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) {
-	case GPU_ID2_PRODUCT_TMIX:
-		return "mali-g71-power-model";
-	case GPU_ID2_PRODUCT_THEX:
-		return "mali-g72-power-model";
-	case GPU_ID2_PRODUCT_TNOX:
-		return "mali-g76-power-model";
-	case GPU_ID2_PRODUCT_TSIX:
-		return "mali-g51-power-model";
-	case GPU_ID2_PRODUCT_TGOX:
-		if ((gpu_id & GPU_ID2_VERSION_MAJOR) ==
-				(0 << GPU_ID2_VERSION_MAJOR_SHIFT))
-			/* g52 aliased to g76 power-model's ops */
-			return "mali-g52-power-model";
-		else
-			return "mali-g52_r1-power-model";
-	case GPU_ID2_PRODUCT_TNAX:
-		return "mali-tnax-power-model";
-	case GPU_ID2_PRODUCT_TTRX:
-		return "mali-g77-power-model";
-	case GPU_ID2_PRODUCT_TBEX:
-		return "mali-tbex-power-model";
-	case GPU_ID2_PRODUCT_TBAX:
-		return "mali-tbax-power-model";
-	default:
+	if (!model_name)
 		return KBASE_IPA_FALLBACK_MODEL_NAME;
-	}
+	else
+		return model_name;
 }
 KBASE_EXPORT_TEST_API(kbase_ipa_model_name_from_id);
 
@@ -367,6 +324,8 @@ int kbase_ipa_init(struct kbase_device *kbdev)
 		kbdev->ipa.configured_model = default_model;
 	}
 
+	kbdev->ipa.last_sample_time = ktime_get();
+
 end:
 	if (err)
 		kbase_ipa_term_locked(kbdev);
@@ -421,7 +380,8 @@ static u32 kbase_scale_dynamic_power(const u32 c, const u32 freq,
 	const u32 v2f = v2f_big / 1000;
 
 	/* Range (working backwards from next line): 0 < v2fc < 2^23 uW.
-	 * Must be < 2^42 to avoid overflowing the return value. */
+	 * Must be < 2^42 to avoid overflowing the return value.
+	 */
 	const u64 v2fc = (u64) c * (u64) v2f;
 
 	/* Range: 0 < v2fc / 1000 < 2^13 mW */
@@ -517,8 +477,9 @@ static u32 get_static_power_locked(struct kbase_device *kbdev,
 	return power;
 }
 
-#if defined(CONFIG_MALI_PWRSOFT_765) || \
-	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+#if KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE
+#if defined(CONFIG_MALI_PWRSOFT_765) ||                                        \
+	KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE
 static unsigned long kbase_get_static_power(struct devfreq *df,
 					    unsigned long voltage)
 #else
@@ -527,8 +488,8 @@ static unsigned long kbase_get_static_power(unsigned long voltage)
 {
 	struct kbase_ipa_model *model;
 	u32 power = 0;
-#if defined(CONFIG_MALI_PWRSOFT_765) || \
-	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+#if defined(CONFIG_MALI_PWRSOFT_765) ||                                        \
+	KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE
 	struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
 #else
 	struct kbase_device *kbdev = kbase_find_device(-1);
@@ -544,16 +505,66 @@ static unsigned long kbase_get_static_power(unsigned long voltage)
 
 	mutex_unlock(&kbdev->ipa.lock);
 
-#if !(defined(CONFIG_MALI_PWRSOFT_765) || \
-	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
+#if !(defined(CONFIG_MALI_PWRSOFT_765) ||                                      \
+	KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE)
 	kbase_release_device(kbdev);
 #endif
 
 	return power;
 }
+#endif /* KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE */
 
-#if defined(CONFIG_MALI_PWRSOFT_765) || \
-	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+/**
+ * opp_translate_freq_voltage() - Translate nominal OPP frequency from
+ *                                devicetree into the real frequency for
+ *                                top-level and shader cores.
+ * @kbdev:            Device pointer
+ * @nominal_freq:     Nominal frequency in Hz.
+ * @nominal_voltage:  Nominal voltage, in mV.
+ * @freqs:            Pointer to array of real frequency values.
+ * @volts:            Pointer to array of voltages.
+ *
+ * If there are 2 clock domains, then top-level and shader cores can operate
+ * at different frequency and voltage level. The nominal frequency ("opp-hz")
+ * used by devfreq from the devicetree may not be same as the real frequency
+ * at which top-level and shader cores are operating, so a translation is
+ * needed.
+ * Nominal voltage shall always be same as the real voltage for top-level.
+ */
+static void opp_translate_freq_voltage(struct kbase_device *kbdev,
+				       unsigned long nominal_freq,
+				       unsigned long nominal_voltage,
+				       unsigned long *freqs,
+				       unsigned long *volts)
+{
+#ifndef CONFIG_MALI_BIFROST_NO_MALI
+	u64 core_mask;
+
+	kbase_devfreq_opp_translate(kbdev, nominal_freq, &core_mask,
+				    freqs, volts);
+	CSTD_UNUSED(core_mask);
+
+	if (kbdev->nr_clocks == 1) {
+		freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] =
+			freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL];
+		volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] =
+			volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL];
+	}
+#else
+	/* An arbitrary voltage and frequency value can be chosen for testing
+	 * in no mali configuration which may not match with any OPP level.
+	 */
+	freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL] = nominal_freq;
+	volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL] = nominal_voltage;
+
+	freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = nominal_freq;
+	volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = nominal_voltage;
+#endif
+}
+
+#if KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE
+#if defined(CONFIG_MALI_PWRSOFT_765) ||                                        \
+	KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE
 static unsigned long kbase_get_dynamic_power(struct devfreq *df,
 					     unsigned long freq,
 					     unsigned long voltage)
@@ -563,10 +574,13 @@ static unsigned long kbase_get_dynamic_power(unsigned long freq,
 #endif
 {
 	struct kbase_ipa_model *model;
-	u32 power_coeff = 0, power = 0;
+	unsigned long freqs[KBASE_IPA_BLOCK_TYPE_NUM] = {0};
+	unsigned long volts[KBASE_IPA_BLOCK_TYPE_NUM] = {0};
+	u32 power_coeffs[KBASE_IPA_BLOCK_TYPE_NUM] = {0};
+	u32 power = 0;
 	int err = 0;
-#if defined(CONFIG_MALI_PWRSOFT_765) || \
-	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+#if defined(CONFIG_MALI_PWRSOFT_765) ||                                        \
+	KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE
 	struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
 #else
 	struct kbase_device *kbdev = kbase_find_device(-1);
@@ -579,34 +593,53 @@ static unsigned long kbase_get_dynamic_power(unsigned long freq,
 
 	model = kbdev->ipa.fallback_model;
 
-	err = model->ops->get_dynamic_coeff(model, &power_coeff);
+	err = model->ops->get_dynamic_coeff(model, power_coeffs);
 
-	if (!err)
-		power = kbase_scale_dynamic_power(power_coeff, freq, voltage);
-	else
+	if (!err) {
+		opp_translate_freq_voltage(kbdev, freq, voltage, freqs, volts);
+
+		power = kbase_scale_dynamic_power(
+			power_coeffs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL],
+			freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL],
+			volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]);
+
+		/* Here unlike kbase_get_real_power(), shader core frequency is
+		 * used for the scaling as simple power model is used to obtain
+		 * the value of dynamic coefficient (which is is a fixed value
+		 * retrieved from the device tree).
+		 */
+		power += kbase_scale_dynamic_power(
+			 power_coeffs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES],
+			 freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES],
+			 volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]);
+	} else
 		dev_err_ratelimited(kbdev->dev,
 				    "Model %s returned error code %d\n",
 				    model->ops->name, err);
 
 	mutex_unlock(&kbdev->ipa.lock);
 
-#if !(defined(CONFIG_MALI_PWRSOFT_765) || \
-	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
+#if !(defined(CONFIG_MALI_PWRSOFT_765) ||                                      \
+	KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE)
 	kbase_release_device(kbdev);
 #endif
 
 	return power;
 }
+#endif /* KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE */
 
 int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power,
 				unsigned long freq,
 				unsigned long voltage)
 {
 	struct kbase_ipa_model *model;
-	u32 power_coeff = 0;
-	int err = 0;
+	unsigned long freqs[KBASE_IPA_BLOCK_TYPE_NUM] = {0};
+	unsigned long volts[KBASE_IPA_BLOCK_TYPE_NUM] = {0};
+	u32 power_coeffs[KBASE_IPA_BLOCK_TYPE_NUM] = {0};
 	struct kbasep_pm_metrics diff;
 	u64 total_time;
+	bool skip_utilization_scaling = false;
+	int err = 0;
 
 	lockdep_assert_held(&kbdev->ipa.lock);
 
@@ -614,30 +647,62 @@ int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power,
 
 	model = get_current_model(kbdev);
 
-	err = model->ops->get_dynamic_coeff(model, &power_coeff);
+	err = model->ops->get_dynamic_coeff(model, power_coeffs);
 
 	/* If the counter model returns an error (e.g. switching back to
 	 * protected mode and failing to read counters, or a counter sample
 	 * with too few cycles), revert to the fallback model.
 	 */
 	if (err && model != kbdev->ipa.fallback_model) {
+		/* No meaningful scaling for GPU utilization can be done if
+		 * the sampling interval was too long. This is equivalent to
+		 * assuming GPU was busy throughout (similar to what is done
+		 * during protected mode).
+		 */
+		if (err == -EOVERFLOW)
+			skip_utilization_scaling = true;
+
 		model = kbdev->ipa.fallback_model;
-		err = model->ops->get_dynamic_coeff(model, &power_coeff);
+		err = model->ops->get_dynamic_coeff(model, power_coeffs);
 	}
 
-	if (err)
+	if (WARN_ON(err))
 		return err;
 
-	*power = kbase_scale_dynamic_power(power_coeff, freq, voltage);
+	opp_translate_freq_voltage(kbdev, freq, voltage, freqs, volts);
 
-	/* time_busy / total_time cannot be >1, so assigning the 64-bit
-	 * result of div_u64 to *power cannot overflow.
-	 */
-	total_time = diff.time_busy + (u64) diff.time_idle;
-	*power = div_u64(*power * (u64) diff.time_busy,
-			 max(total_time, 1ull));
+	*power = kbase_scale_dynamic_power(
+			power_coeffs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL],
+			freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL],
+			volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]);
 
-	*power += get_static_power_locked(kbdev, model, voltage);
+	if (power_coeffs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]) {
+		unsigned long freq = freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES];
+
+		/* As per the HW team, the top-level frequency needs to be used
+		 * for the scaling if the counter based model was used as
+		 * counter values are normalized with the GPU_ACTIVE counter
+		 * value, which increments at the rate of top-level frequency.
+		 */
+		if (model != kbdev->ipa.fallback_model)
+			freq = freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL];
+
+		*power += kbase_scale_dynamic_power(
+				power_coeffs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES],
+				freq, volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]);
+	}
+
+	if (!skip_utilization_scaling) {
+		/* time_busy / total_time cannot be >1, so assigning the 64-bit
+		 * result of div_u64 to *power cannot overflow.
+		 */
+		total_time = diff.time_busy + (u64) diff.time_idle;
+		*power = div_u64(*power * (u64) diff.time_busy,
+				 max(total_time, 1ull));
+	}
+
+	*power += get_static_power_locked(kbdev, model,
+				volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]);
 
 	return err;
 }
@@ -661,12 +726,42 @@ int kbase_get_real_power(struct devfreq *df, u32 *power,
 }
 KBASE_EXPORT_TEST_API(kbase_get_real_power);
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
-struct devfreq_cooling_ops kbase_ipa_power_model_ops = {
-#else
 struct devfreq_cooling_power kbase_ipa_power_model_ops = {
-#endif
+#if KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE
 	.get_static_power = &kbase_get_static_power,
 	.get_dynamic_power = &kbase_get_dynamic_power,
+#endif /* KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE */
+#if defined(CONFIG_MALI_PWRSOFT_765) ||                                        \
+	KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE
+	.get_real_power = &kbase_get_real_power,
+#endif
 };
 KBASE_EXPORT_TEST_API(kbase_ipa_power_model_ops);
+
+void kbase_ipa_reset_data(struct kbase_device *kbdev)
+{
+	ktime_t now, diff;
+	s64 elapsed_time;
+
+	mutex_lock(&kbdev->ipa.lock);
+
+	now = ktime_get();
+	diff = ktime_sub(now, kbdev->ipa.last_sample_time);
+	elapsed_time = ktime_to_ms(diff);
+
+	if (elapsed_time > RESET_INTERVAL_MS) {
+		struct kbasep_pm_metrics diff;
+		struct kbase_ipa_model *model;
+
+		kbase_pm_get_dvfs_metrics(
+			kbdev, &kbdev->ipa.last_metrics, &diff);
+
+		model = get_current_model(kbdev);
+		if (model != kbdev->ipa.fallback_model)
+			model->ops->reset_counter_data(model);
+
+		kbdev->ipa.last_sample_time = ktime_get();
+	}
+
+	mutex_unlock(&kbdev->ipa.lock);
+}
diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.h b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.h
index f43f3d9416b4..beceb6ce7b55 100644
--- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.h
+++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
  * (C) COPYRIGHT 2016-2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_IPA_H_
@@ -27,6 +26,20 @@
 
 struct devfreq;
 
+/**
+ * enum kbase_ipa_block_type - Type of block for which power estimation is done.
+ *
+ * @KBASE_IPA_BLOCK_TYPE_TOP_LEVEL:    Top-level block, that covers CSHW,
+ *                                     MEMSYS, Tiler.
+ * @KBASE_IPA_BLOCK_TYPE_SHADER_CORES: All Shader cores.
+ * @KBASE_IPA_BLOCK_TYPE_NUM:          Number of blocks.
+ */
+enum kbase_ipa_block_type {
+	KBASE_IPA_BLOCK_TYPE_TOP_LEVEL,
+	KBASE_IPA_BLOCK_TYPE_SHADER_CORES,
+	KBASE_IPA_BLOCK_TYPE_NUM
+};
+
 /**
  * struct kbase_ipa_model - Object describing a particular IPA model.
  * @kbdev:                    pointer to kbase device
@@ -89,7 +102,8 @@ struct kbase_ipa_model_ops {
 	int (*init)(struct kbase_ipa_model *model);
 	/* Called immediately after init(), or when a parameter is changed, so
 	 * that any coefficients derived from model parameters can be
-	 * recalculated. */
+	 * recalculated
+	 */
 	int (*recalculate)(struct kbase_ipa_model *model);
 	void (*term)(struct kbase_ipa_model *model);
 	/*
@@ -101,7 +115,9 @@ struct kbase_ipa_model_ops {
 	 * is then scaled by the IPA framework according to the current OPP's
 	 * frequency and voltage.
 	 *
-	 * Return: 0 on success, or an error code.
+	 * Return: 0 on success, or an error code. -EOVERFLOW error code will
+	 * indicate that sampling interval was too large and no meaningful
+	 * scaling for GPU utiliation can be done.
 	 */
 	int (*get_dynamic_coeff)(struct kbase_ipa_model *model, u32 *coeffp);
 	/*
@@ -115,6 +131,18 @@ struct kbase_ipa_model_ops {
 	 * Return: 0 on success, or an error code.
 	 */
 	int (*get_static_coeff)(struct kbase_ipa_model *model, u32 *coeffp);
+
+	/*
+	 * reset_counter_data() - Reset the HW counter data used for calculating
+	 *                        dynamic power coefficient
+	 * @model:		  pointer to model
+	 *
+	 * This method is currently applicable only to the counter based model.
+	 * The next call to get_dynamic_coeff() will have to calculate the
+	 * dynamic power coefficient based on the HW counter data generated
+	 * from this point onwards.
+	 */
+	void (*reset_counter_data)(struct kbase_ipa_model *model);
 };
 
 /**
@@ -163,6 +191,17 @@ int kbase_ipa_model_recalculate(struct kbase_ipa_model *model);
 const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev,
 							   const char *name);
 
+/**
+ * kbase_ipa_counter_model_ops_find - Lookup an IPA counter model using its name
+ * @kbdev:      pointer to kbase device
+ * @name:       name of counter model to lookup
+ *
+ * Return: Pointer to counter model's 'ops' structure, or NULL if the lookup
+ *         failed.
+ */
+const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find(
+	struct kbase_device *kbdev, const char *name);
+
 /**
  * kbase_ipa_model_name_from_id - Find the best model for a given GPU ID
  * @gpu_id:     GPU ID of GPU the model will be used for
@@ -172,6 +211,16 @@ const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *
  */
 const char *kbase_ipa_model_name_from_id(u32 gpu_id);
 
+/**
+ * kbase_ipa_counter_model_name_from_id - Find the best counter model for a
+ *                                        given GPU ID
+ * @gpu_id:     GPU ID of GPU the counter model will be used for
+ *
+ * Return: The name of the appropriate counter-based model, or NULL if the
+ *         no counter model exists.
+ */
+const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id);
+
 /**
  * kbase_ipa_init_model - Initilaize the particular IPA model
  * @kbdev:      pointer to kbase device
@@ -183,7 +232,7 @@ const char *kbase_ipa_model_name_from_id(u32 gpu_id);
  * Return: pointer to kbase_ipa_model on success, NULL on error
  */
 struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev,
-					     const struct kbase_ipa_model_ops *ops);
+					const struct kbase_ipa_model_ops *ops);
 /**
  * kbase_ipa_term_model - Terminate the particular IPA model
  * @model:      pointer to the IPA model object, already initialized
@@ -202,17 +251,6 @@ void kbase_ipa_term_model(struct kbase_ipa_model *model);
  */
 void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev);
 
-extern const struct kbase_ipa_model_ops kbase_g71_ipa_model_ops;
-extern const struct kbase_ipa_model_ops kbase_g72_ipa_model_ops;
-extern const struct kbase_ipa_model_ops kbase_g76_ipa_model_ops;
-extern const struct kbase_ipa_model_ops kbase_g52_ipa_model_ops;
-extern const struct kbase_ipa_model_ops kbase_g52_r1_ipa_model_ops;
-extern const struct kbase_ipa_model_ops kbase_g51_ipa_model_ops;
-extern const struct kbase_ipa_model_ops kbase_g77_ipa_model_ops;
-extern const struct kbase_ipa_model_ops kbase_tnax_ipa_model_ops;
-extern const struct kbase_ipa_model_ops kbase_tbex_ipa_model_ops;
-extern const struct kbase_ipa_model_ops kbase_tbax_ipa_model_ops;
-
 /**
  * kbase_get_real_power() - get the real power consumption of the GPU
  * @df: dynamic voltage and frequency scaling information for the GPU.
@@ -238,11 +276,20 @@ int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power,
 				unsigned long voltage);
 #endif /* MALI_UNIT_TEST */
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
-extern struct devfreq_cooling_ops kbase_ipa_power_model_ops;
-#else
 extern struct devfreq_cooling_power kbase_ipa_power_model_ops;
-#endif
+
+/**
+ * kbase_ipa_reset_data() - Reset the data required for power estimation.
+ * @kbdev:  Pointer to kbase device.
+ *
+ * This function is called to ensure a meaningful baseline for
+ * kbase_get_real_power(), when thermal governor starts the polling, and
+ * that is achieved by updating the GPU utilization metrics and retrieving
+ * the accumulated value of HW counters.
+ * Basically this function collects all the data required for power estimation
+ * but does not process it.
+ */
+void kbase_ipa_reset_data(struct kbase_device *kbdev);
 
 #else /* !(defined(CONFIG_MALI_BIFROST_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */
 
diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.c b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.c
index 30a3b7d1b3be..2672146e8e1d 100644
--- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.c
+++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2017-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <linux/debugfs.h>
@@ -28,7 +27,7 @@
 #include "mali_kbase_ipa.h"
 #include "mali_kbase_ipa_debugfs.h"
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0))
+#if (KERNEL_VERSION(4, 7, 0) > LINUX_VERSION_CODE)
 #define DEFINE_DEBUGFS_ATTRIBUTE DEFINE_SIMPLE_ATTRIBUTE
 #endif
 
@@ -160,7 +159,8 @@ int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name,
 		return -ENOMEM;
 
 	/* 'name' is stack-allocated for array elements, so copy it into
-	 * heap-allocated storage */
+	 * heap-allocated storage
+	 */
 	param->name = kstrdup(name, GFP_KERNEL);
 
 	if (!param->name) {
diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.h b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.h
index a983d9c14216..189b43c609d6 100644
--- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.h
+++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_IPA_DEBUGFS_H_
@@ -63,6 +62,9 @@ static inline int kbase_ipa_model_param_add(struct kbase_ipa_model *model,
 static inline void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model)
 { }
 
+static inline void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model,
+						 const char *name, s32 val)
+{ }
 #endif /* CONFIG_DEBUG_FS */
 
 #endif /* _KBASE_IPA_DEBUGFS_H_ */
diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c
index 9a11ee5b1e74..dce685a3b072 100644
--- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c
+++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2018, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <linux/freezer.h>
@@ -35,20 +34,18 @@
 #include "mali_kbase_ipa_simple.h"
 #include "mali_kbase_ipa_debugfs.h"
 
+/* This is used if the dynamic power for top-level is estimated separately
+ * through the counter model. To roughly match the contribution of top-level
+ * power in the total dynamic power, when calculated through counter model,
+ * this scalar is used for the dynamic coefficient specified in the device tree
+ * for simple power model. This value was provided by the HW team after
+ * taking all the power data collected and dividing top level power by shader
+ * core power and then averaging it across all samples.
+ */
+#define TOP_LEVEL_DYN_COEFF_SCALER (3)
+
 #if MALI_UNIT_TEST
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
-static unsigned long dummy_temp;
-
-static int kbase_simple_power_model_get_dummy_temp(
-	struct thermal_zone_device *tz,
-	unsigned long *temp)
-{
-	*temp = READ_ONCE(dummy_temp);
-	return 0;
-}
-
-#else
 static int dummy_temp;
 
 static int kbase_simple_power_model_get_dummy_temp(
@@ -58,7 +55,6 @@ static int kbase_simple_power_model_get_dummy_temp(
 	*temp = READ_ONCE(dummy_temp);
 	return 0;
 }
-#endif
 
 /* Intercept calls to the kernel function using a macro */
 #ifdef thermal_zone_get_temp
@@ -144,16 +140,13 @@ static u32 calculate_temp_scaling_factor(s32 ts[4], s64 t)
 
 /* We can't call thermal_zone_get_temp() directly in model_static_coeff(),
  * because we don't know if tz->lock is held in the same thread. So poll it in
- * a separate thread to get around this. */
+ * a separate thread to get around this.
+ */
 static int poll_temperature(void *data)
 {
 	struct kbase_ipa_model_simple_data *model_data =
 			(struct kbase_ipa_model_simple_data *) data;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
-	unsigned long temp;
-#else
 	int temp;
-#endif
 
 	set_freezable();
 
@@ -213,7 +206,21 @@ static int model_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp)
 	struct kbase_ipa_model_simple_data *model_data =
 		(struct kbase_ipa_model_simple_data *) model->model_data;
 
+#if MALI_USE_CSF
+	/* On CSF GPUs, the dynamic power for top-level and shader cores is
+	 * estimated separately. Currently there is a single dynamic
+	 * coefficient value provided in the device tree for simple model.
+	 * As per the discussion with HW team the coefficient value needs to
+	 * be scaled down for top-level to limit its contribution in the
+	 * total dyanmic power.
+	 */
+	coeffp[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL] =
+		model_data->dynamic_coefficient / TOP_LEVEL_DYN_COEFF_SCALER;
+	coeffp[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] =
+		model_data->dynamic_coefficient;
+#else
 	*coeffp = model_data->dynamic_coefficient;
+#endif
 
 	return 0;
 }
diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.h b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.h
index 84534e07ec55..40cf0d151a32 100644
--- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.h
+++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_IPA_SIMPLE_H_
diff --git a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h
index 4fb5d1d9c410..8b3e03f15964 100644
--- a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h
+++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /*
  * Definitions (types, defines, etcs) specific to Job Manager Kbase.
  * They are placed here to allow the hierarchy of header files to work.
@@ -409,6 +406,16 @@ struct kbase_ext_res {
  *                         sync through soft jobs and for the implicit
  *                         synchronization required on access to external
  *                         resources.
+ * @dma_fence.fence_in:    Input fence
+ * @dma_fence.fence:       Points to the dma-buf output fence for this atom.
+ * @dma_fence.context:     The dma-buf fence context number for this atom. A
+ *                         unique context number is allocated to each katom in
+ *                         the context on context creation.
+ * @dma_fence.seqno:       The dma-buf fence sequence number for this atom. This
+ *                         is increased every time this katom uses dma-buf fence
+ * @dma_fence.callbacks:   List of all callbacks set up to wait on other fences
+ * @dma_fence.dep_count:   Atomic counter of number of outstandind dma-buf fence
+ *                         dependencies for this atom.
  * @event_code:            Event code for the job chain represented by the atom,
  *                         both HW and low-level SW events are represented by
  *                         event codes.
@@ -443,6 +450,8 @@ struct kbase_ext_res {
  * @blocked:               flag indicating that atom's resubmission to GPU is
  *                         blocked till the work item is scheduled to return the
  *                         atom to JS.
+ * @seq_nr:                user-space sequence number, to order atoms in some
+ *                         temporal order
  * @pre_dep:               Pointer to atom that this atom has same-slot
  *                         dependency on
  * @post_dep:              Pointer to atom that has same-slot dependency on
@@ -477,11 +486,19 @@ struct kbase_ext_res {
  *                         when transitioning into or out of protected mode.
  *                         Atom will be either entering or exiting the
  *                         protected mode.
+ * @protected_state.enter: entering the protected mode.
+ * @protected_state.exit:  exiting the protected mode.
  * @runnable_tree_node:    The node added to context's job slot specific rb tree
  *                         when the atom becomes runnable.
  * @age:                   Age of atom relative to other atoms in the context,
  *                         is snapshot of the age_count counter in kbase
  *                         context.
+ * @jobslot: Job slot to use when BASE_JD_REQ_JOB_SLOT is specified.
+ * @renderpass_id:Renderpass identifier used to associate an atom that has
+ *                 BASE_JD_REQ_START_RENDERPASS set in its core requirements
+ *                 with an atom that has BASE_JD_REQ_END_RENDERPASS set.
+ * @jc_fragment:          Set of GPU fragment job chains
+ * @retry_count:          TODO: Not used,to be removed
  */
 struct kbase_jd_atom {
 	struct work_struct work;
@@ -516,7 +533,6 @@ struct kbase_jd_atom {
 		 * when working with this sub struct
 		 */
 #if defined(CONFIG_SYNC_FILE)
-		/* Input fence */
 #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 		struct fence *fence_in;
 #else
@@ -539,14 +555,7 @@ struct kbase_jd_atom {
 #else
 		struct dma_fence *fence;
 #endif
-		/* The dma-buf fence context number for this atom. A unique
-		 * context number is allocated to each katom in the context on
-		 * context creation.
-		 */
 		unsigned int context;
-		/* The dma-buf fence sequence number for this atom. This is
-		 * increased every time this katom uses dma-buf fence.
-		 */
 		atomic_t seqno;
 		/* This contains a list of all callbacks set up to wait on
 		 * other fences.  This atom must be held back from JS until all
@@ -608,7 +617,6 @@ struct kbase_jd_atom {
 
 	atomic_t blocked;
 
-	/* user-space sequence number, to order atoms in some temporal order */
 	u64 seq_nr;
 
 	struct kbase_jd_atom *pre_dep;
@@ -781,6 +789,7 @@ struct kbase_jd_renderpass {
  * @jit_pending_alloc:        A list of just-in-time memory allocation
  *                            soft-jobs which will be reattempted after the
  *                            impending free of other active allocations.
+ * @max_priority:             Max priority level allowed for this context.
  */
 struct kbase_jd_context {
 	struct mutex lock;
@@ -801,6 +810,7 @@ struct kbase_jd_context {
 
 	struct list_head jit_atoms_head;
 	struct list_head jit_pending_alloc;
+	int max_priority;
 };
 
 /**
diff --git a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h
index 6c222ceae8ee..e3275369b459 100644
--- a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h
+++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
@@ -32,6 +31,7 @@
 
 /**
  * kbasep_js_devdata_init - Initialize the Job Scheduler
+ * @kbdev: The kbase_device to operate on
  *
  * The struct kbasep_js_device_data sub-structure of kbdev must be zero
  * initialized before passing to the kbasep_js_devdata_init() function. This is
@@ -41,6 +41,7 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev);
 
 /**
  * kbasep_js_devdata_halt - Halt the Job Scheduler.
+ * @kbdev: The kbase_device to operate on
  *
  * It is safe to call this on kbdev even if it the kbasep_js_device_data
  * sub-structure was never initialized/failed initialization, to give efficient
@@ -58,6 +59,7 @@ void kbasep_js_devdata_halt(struct kbase_device *kbdev);
 
 /**
  * kbasep_js_devdata_term - Terminate the Job Scheduler
+ * @kbdev: The kbase_device to operate on
  *
  * It is safe to call this on kbdev even if it the kbasep_js_device_data
  * sub-structure was never initialized/failed initialization, to give efficient
@@ -75,6 +77,7 @@ void kbasep_js_devdata_term(struct kbase_device *kbdev);
 /**
  * kbasep_js_kctx_init - Initialize the Scheduling Component of a
  *                       struct kbase_context on the Job Scheduler.
+ * @kctx:  The kbase_context to operate on
  *
  * This effectively registers a struct kbase_context with a Job Scheduler.
  *
@@ -89,6 +92,7 @@ int kbasep_js_kctx_init(struct kbase_context *const kctx);
 /**
  * kbasep_js_kctx_term - Terminate the Scheduling Component of a
  *                       struct kbase_context on the Job Scheduler
+ * @kctx:  The kbase_context to operate on
  *
  * This effectively de-registers a struct kbase_context from its Job Scheduler
  *
@@ -108,6 +112,8 @@ void kbasep_js_kctx_term(struct kbase_context *kctx);
  * kbasep_js_add_job - Add a job chain to the Job Scheduler,
  *                     and take necessary actions to
  *                     schedule the context/run the job.
+ * @kctx:  The kbase_context to operate on
+ * @atom: Atom to add
  *
  * This atomically does the following:
  * * Update the numbers of jobs information
@@ -151,7 +157,10 @@ bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom);
 /**
  * kbasep_js_remove_job - Remove a job chain from the Job Scheduler,
  *                        except for its 'retained state'.
- *
+ * @kbdev: The kbase_device to operate on
+ * @kctx:  The kbase_context to operate on
+ * @atom: Atom to remove
+*
  * Completely removing a job requires several calls:
  * * kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of
  *   the atom
@@ -185,6 +194,9 @@ void kbasep_js_remove_job(struct kbase_device *kbdev,
  * kbasep_js_remove_cancelled_job - Completely remove a job chain from the
  *                                  Job Scheduler, in the case
  *                                  where the job chain was cancelled.
+ * @kbdev: The kbase_device to operate on
+ * @kctx:  The kbase_context to operate on
+ * @katom: Atom to remove
  *
  * This is a variant of kbasep_js_remove_job() that takes care of removing all
  * of the retained state too. This is generally useful for cancelled atoms,
@@ -215,6 +227,9 @@ bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
  * kbasep_js_runpool_requeue_or_kill_ctx - Handling the requeuing/killing of a
  *                                         context that was evicted from the
  *                                         policy queue or runpool.
+ * @kbdev: The kbase_device to operate on
+ * @kctx:  The kbase_context to operate on
+ * @has_pm_ref: tells whether to release Power Manager active reference
  *
  * This should be used whenever handing off a context that has been evicted
  * from the policy queue or the runpool:
@@ -242,6 +257,8 @@ void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev,
 /**
  * kbasep_js_runpool_release_ctx - Release a refcount of a context being busy,
  *                                 allowing it to be scheduled out.
+ * @kbdev: The kbase_device to operate on
+ * @kctx:  The kbase_context to operate on
  *
  * When the refcount reaches zero and the context might be scheduled out
  * (depending on whether the Scheduling Policy has deemed it so, or if it has
@@ -296,6 +313,9 @@ void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev,
  * kbasep_js_runpool_release_ctx_and_katom_retained_state -  Variant of
  * kbasep_js_runpool_release_ctx() that handles additional
  * actions from completing an atom.
+ * @kbdev:                KBase device
+ * @kctx:                 KBase context
+ * @katom_retained_state: Retained state from the atom
  *
  * This is usually called as part of completing an atom and releasing the
  * refcount on the context held by the atom.
@@ -315,8 +335,12 @@ void kbasep_js_runpool_release_ctx_and_katom_retained_state(
 		struct kbasep_js_atom_retained_state *katom_retained_state);
 
 /**
- * kbasep_js_runpool_release_ctx_nolock -  Variant of
- * kbase_js_runpool_release_ctx() that assumes that
+ * kbasep_js_runpool_release_ctx_nolock -
+ * Variant of kbase_js_runpool_release_ctx() w/out locks
+ * @kbdev: KBase device
+ * @kctx:  KBase context
+ *
+ * Variant of kbase_js_runpool_release_ctx() that assumes that
  * kbasep_js_device_data::runpool_mutex and
  * kbasep_js_kctx_info::ctx::jsctx_mutex are held by the caller, and does not
  * attempt to schedule new contexts.
@@ -326,6 +350,8 @@ void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
 
 /**
  * kbasep_js_schedule_privileged_ctx -  Schedule in a privileged context
+ * @kbdev: KBase device
+ * @kctx:  KBase context
  *
  * This schedules a context in regardless of the context priority.
  * If the runpool is full, a context will be forced out of the runpool and the
@@ -351,6 +377,8 @@ void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev,
 /**
  * kbasep_js_release_privileged_ctx -  Release a privileged context,
  * allowing it to be scheduled out.
+ * @kbdev: KBase device
+ * @kctx:  KBase context
  *
  * See kbasep_js_runpool_release_ctx for potential side effects.
  *
@@ -368,6 +396,7 @@ void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev,
 
 /**
  * kbase_js_try_run_jobs -  Try to submit the next job on each slot
+ * @kbdev: KBase device
  *
  * The following locks may be used:
  * * kbasep_js_device_data::runpool_mutex
@@ -378,6 +407,7 @@ void kbase_js_try_run_jobs(struct kbase_device *kbdev);
 /**
  * kbasep_js_suspend -  Suspend the job scheduler during a Power Management
  *                      Suspend event.
+ * @kbdev: KBase device
  *
  * Causes all contexts to be removed from the runpool, and prevents any
  * contexts from (re)entering the runpool.
@@ -401,6 +431,7 @@ void kbasep_js_suspend(struct kbase_device *kbdev);
 /**
  * kbasep_js_resume - Resume the Job Scheduler after a Power Management
  *                    Resume event.
+ * @kbdev: KBase device
  *
  * This restores the actions from kbasep_js_suspend():
  * * Schedules contexts back into the runpool
@@ -412,7 +443,7 @@ void kbasep_js_resume(struct kbase_device *kbdev);
  * kbase_js_dep_resolved_submit - Submit an atom to the job scheduler.
  *
  * @kctx:  Context pointer
- * @atom:  Pointer to the atom to submit
+ * @katom:  Pointer to the atom to submit
  *
  * The atom is enqueued on the context's ringbuffer. The caller must have
  * ensured that all dependencies can be represented in the ringbuffer.
@@ -457,7 +488,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js);
  * kbase_js_unpull - Return an atom to the job scheduler ringbuffer.
  *
  * @kctx:  Context pointer
- * @atom:  Pointer to the atom to unpull
+ * @katom:  Pointer to the atom to unpull
  *
  * An atom is 'unpulled' if execution is stopped but intended to be returned to
  * later. The most common reason for this is that the atom has been
@@ -584,7 +615,6 @@ void kbase_js_set_timeouts(struct kbase_device *kbdev);
  */
 void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority);
 
-
 /**
  * kbase_js_update_ctx_priority - update the context priority
  *
@@ -603,6 +633,8 @@ void kbase_js_update_ctx_priority(struct kbase_context *kctx);
 /**
  * kbasep_js_is_submit_allowed - Check that a context is allowed to submit
  *                               jobs on this policy
+ * @js_devdata: KBase Job Scheduler Device Data
+ * @kctx:       KBase context
  *
  * The purpose of this abstraction is to hide the underlying data size,
  * and wrap up the long repeated line of code.
@@ -625,13 +657,15 @@ static inline bool kbasep_js_is_submit_allowed(
 	test_bit = (u16) (1u << kctx->as_nr);
 
 	is_allowed = (bool) (js_devdata->runpool_irq.submit_allowed & test_bit);
-	dev_dbg(kctx->kbdev->dev, "JS: submit %s allowed on %p (as=%d)",
+	dev_dbg(kctx->kbdev->dev, "JS: submit %s allowed on %pK (as=%d)",
 			is_allowed ? "is" : "isn't", (void *)kctx, kctx->as_nr);
 	return is_allowed;
 }
 
 /**
  * kbasep_js_set_submit_allowed - Allow a context to submit jobs on this policy
+ * @js_devdata: KBase Job Scheduler Device Data
+ * @kctx:       KBase context
  *
  * The purpose of this abstraction is to hide the underlying data size,
  * and wrap up the long repeated line of code.
@@ -650,7 +684,7 @@ static inline void kbasep_js_set_submit_allowed(
 
 	set_bit = (u16) (1u << kctx->as_nr);
 
-	dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)",
+	dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %pK (as=%d)",
 			kctx, kctx->as_nr);
 
 	js_devdata->runpool_irq.submit_allowed |= set_bit;
@@ -659,6 +693,8 @@ static inline void kbasep_js_set_submit_allowed(
 /**
  * kbasep_js_clear_submit_allowed - Prevent a context from submitting more
  *                                  jobs on this policy
+ * @js_devdata: KBase Job Scheduler Device Data
+ * @kctx:       KBase context
  *
  * The purpose of this abstraction is to hide the underlying data size,
  * and wrap up the long repeated line of code.
@@ -679,13 +715,17 @@ static inline void kbasep_js_clear_submit_allowed(
 	clear_bit = (u16) (1u << kctx->as_nr);
 	clear_mask = ~clear_bit;
 
-	dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)",
+	dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %pK (as=%d)",
 			kctx, kctx->as_nr);
 
 	js_devdata->runpool_irq.submit_allowed &= clear_mask;
 }
 
 /**
+ * kbasep_js_atom_retained_state_init_invalid -
+ * Create an initial 'invalid' atom retained state
+ * @retained_state: pointer where to create and initialize the state
+ *
  * Create an initial 'invalid' atom retained state, that requires no
  * atom-related work to be done on releasing with
  * kbasep_js_runpool_release_ctx_and_katom_retained_state()
@@ -699,6 +739,10 @@ static inline void kbasep_js_atom_retained_state_init_invalid(
 }
 
 /**
+ * kbasep_js_atom_retained_state_copy() - Copy atom state
+ * @retained_state: where to copy
+ * @katom:          where to copy from
+ *
  * Copy atom state that can be made available after jd_done_nolock() is called
  * on that atom.
  */
@@ -743,7 +787,7 @@ static inline bool kbasep_js_has_atom_finished(
  *  kbasep_js_atom_retained_state_is_valid - Determine whether a struct
  *                                           kbasep_js_atom_retained_state
  *                                           is valid
- * @katom_retained_state        the atom's retained state to check
+ * @katom_retained_state:        the atom's retained state to check
  *
  * An invalid struct kbasep_js_atom_retained_state is allowed, and indicates
  * that the code should just ignore it.
@@ -759,6 +803,8 @@ static inline bool kbasep_js_atom_retained_state_is_valid(
 
 /**
  * kbase_js_runpool_inc_context_count - Increment number of running contexts.
+ * @kbdev: KBase device
+ * @kctx:  KBase context
  *
  * The following locking conditions are made on the caller:
  * * The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
@@ -795,6 +841,8 @@ static inline void kbase_js_runpool_inc_context_count(
 /**
  * kbase_js_runpool_dec_context_count - decrement number of running contexts.
  *
+ * @kbdev: KBase device
+ * @kctx:  KBase context
  * The following locking conditions are made on the caller:
  * * The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
  * * The caller must hold the kbasep_js_device_data::runpool_mutex
@@ -889,4 +937,17 @@ static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(int sched_prio)
 	return kbasep_js_relative_priority_to_atom[prio_idx];
 }
 
+/**
+ * kbase_js_priority_check - Check the priority requested
+ *
+ * @kbdev:    Device pointer
+ * @priority: Requested priority
+ *
+ * This will determine whether the requested priority can be satisfied.
+ *
+ * Return: The same or lower priority than requested.
+ */
+
+base_jd_prio kbase_js_priority_check(struct kbase_device *kbdev, base_jd_prio priority);
+
 #endif	/* _KBASE_JM_JS_H_ */
diff --git a/drivers/gpu/arm/bifrost/jm/mali_kbase_js_defs.h b/drivers/gpu/arm/bifrost/jm/mali_kbase_js_defs.h
index 900ecd2c1b8d..68660a9709c3 100644
--- a/drivers/gpu/arm/bifrost/jm/mali_kbase_js_defs.h
+++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_js_defs.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2011-2018, 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2018, 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,33 +17,15 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
 /**
- * @file mali_kbase_js.h
- * Job Scheduler Type Definitions
+ * DOC: Job Scheduler Type Definitions
  */
 
 #ifndef _KBASE_JS_DEFS_H_
 #define _KBASE_JS_DEFS_H_
 
-/**
- * @addtogroup base_api
- * @{
- */
-
-/**
- * @addtogroup base_kbase_api
- * @{
- */
-
-/**
- * @addtogroup kbase_js
- * @{
- */
 /* Forward decls */
 struct kbase_device;
 struct kbase_jd_atom;
@@ -50,11 +33,14 @@ struct kbase_jd_atom;
 
 typedef u32 kbase_context_flags;
 
-/** Callback function run on all of a context's jobs registered with the Job
- * Scheduler */
-typedef void (*kbasep_js_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+/*
+ * typedef kbasep_js_ctx_job_cb - Callback function run on all of a context's
+ * jobs registered with the Job Scheduler
+ */
+typedef void (*kbasep_js_ctx_job_cb)(struct kbase_device *kbdev,
+				     struct kbase_jd_atom *katom);
 
-/**
+/*
  * @brief Maximum number of jobs that can be submitted to a job slot whilst
  * inside the IRQ handler.
  *
@@ -65,7 +51,15 @@ typedef void (*kbasep_js_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd
 #define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2
 
 /**
- * @brief Context attributes
+ * enum kbasep_js_ctx_attr - Context attributes
+ * @KBASEP_JS_CTX_ATTR_COMPUTE: Attribute indicating a context that contains
+ *                              Compute jobs.
+ * @KBASEP_JS_CTX_ATTR_NON_COMPUTE: Attribute indicating a context that contains
+ * 	Non-Compute jobs.
+ * @KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES: Attribute indicating that a context
+ * 	contains compute-job atoms that aren't restricted to a coherent group,
+ * 	and can run on all cores.
+ * @KBASEP_JS_CTX_ATTR_COUNT: Must be the last in the enum
  *
  * Each context attribute can be thought of as a boolean value that caches some
  * state information about either the runpool, or the context:
@@ -82,61 +76,70 @@ typedef void (*kbasep_js_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd
  * - The runpool holds a refcount of how many contexts in the runpool have this
  * attribute.
  * - The context holds a refcount of how many atoms have this attribute.
+ *
+ * KBASEP_JS_CTX_ATTR_COMPUTE:
+ * Attribute indicating a context that contains Compute jobs. That is,
+ * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE
+ *
+ * @note A context can be both 'Compute' and 'Non Compute' if it contains
+ * both types of jobs.
+ *
+ * KBASEP_JS_CTX_ATTR_NON_COMPUTE:
+ * Attribute indicating a context that contains Non-Compute jobs. That is,
+ * the context has some jobs that are \b not of type @ref
+ * BASE_JD_REQ_ONLY_COMPUTE.
+ *
+ * @note A context can be both 'Compute' and 'Non Compute' if it contains
+ * both types of jobs.
+ *
+ * KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES:
+ * Attribute indicating that a context contains compute-job atoms that
+ * aren't restricted to a coherent group, and can run on all cores.
+ *
+ * Specifically, this is when the atom's \a core_req satisfy:
+ * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2
+ * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups
+ *
+ * Such atoms could be blocked from running if one of the coherent groups
+ * is being used by another job slot, so tracking this context attribute
+ * allows us to prevent such situations.
+ *
+ * @note This doesn't take into account the 1-coregroup case, where all
+ * compute atoms would effectively be able to run on 'all cores', but
+ * contexts will still not always get marked with this attribute. Instead,
+ * it is the caller's responsibility to take into account the number of
+ * coregroups when interpreting this attribute.
+ *
+ * @note Whilst Tiler atoms are normally combined with
+ * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without
+ * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy
+ * enough to handle anyway.
+ *
+ *
  */
 enum kbasep_js_ctx_attr {
-	/** Attribute indicating a context that contains Compute jobs. That is,
-	 * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE
-	 *
-	 * @note A context can be both 'Compute' and 'Non Compute' if it contains
-	 * both types of jobs.
-	 */
 	KBASEP_JS_CTX_ATTR_COMPUTE,
-
-	/** Attribute indicating a context that contains Non-Compute jobs. That is,
-	 * the context has some jobs that are \b not of type @ref
-	 * BASE_JD_REQ_ONLY_COMPUTE.
-	 *
-	 * @note A context can be both 'Compute' and 'Non Compute' if it contains
-	 * both types of jobs.
-	 */
 	KBASEP_JS_CTX_ATTR_NON_COMPUTE,
-
-	/** Attribute indicating that a context contains compute-job atoms that
-	 * aren't restricted to a coherent group, and can run on all cores.
-	 *
-	 * Specifically, this is when the atom's \a core_req satisfy:
-	 * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2
-	 * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups
-	 *
-	 * Such atoms could be blocked from running if one of the coherent groups
-	 * is being used by another job slot, so tracking this context attribute
-	 * allows us to prevent such situations.
-	 *
-	 * @note This doesn't take into account the 1-coregroup case, where all
-	 * compute atoms would effectively be able to run on 'all cores', but
-	 * contexts will still not always get marked with this attribute. Instead,
-	 * it is the caller's responsibility to take into account the number of
-	 * coregroups when interpreting this attribute.
-	 *
-	 * @note Whilst Tiler atoms are normally combined with
-	 * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without
-	 * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy
-	 * enough to handle anyway.
-	 */
 	KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES,
-
-	/** Must be the last in the enum */
 	KBASEP_JS_CTX_ATTR_COUNT
 };
 
 enum {
-	/** Bit indicating that new atom should be started because this atom completed */
+	/*
+	 * Bit indicating that new atom should be started because this atom
+	 * completed
+	 */
 	KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0),
-	/** Bit indicating that the atom was evicted from the JS_NEXT registers */
+	/*
+	 * Bit indicating that the atom was evicted from the JS_NEXT registers
+	 */
 	KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1)
 };
 
-/** Combination of KBASE_JS_ATOM_DONE_<...> bits */
+/**
+ * typedef kbasep_js_atom_done_code - Combination of KBASE_JS_ATOM_DONE_<...>
+ * bits
+ */
 typedef u32 kbasep_js_atom_done_code;
 
 /*
@@ -168,7 +171,9 @@ enum {
  * Internal atom priority defines for kbase_jd_atom::sched_prio
  */
 enum {
-	KBASE_JS_ATOM_SCHED_PRIO_HIGH = 0,
+	KBASE_JS_ATOM_SCHED_PRIO_FIRST = 0,
+	KBASE_JS_ATOM_SCHED_PRIO_REALTIME = KBASE_JS_ATOM_SCHED_PRIO_FIRST,
+	KBASE_JS_ATOM_SCHED_PRIO_HIGH,
 	KBASE_JS_ATOM_SCHED_PRIO_MED,
 	KBASE_JS_ATOM_SCHED_PRIO_LOW,
 	KBASE_JS_ATOM_SCHED_PRIO_COUNT,
@@ -183,7 +188,70 @@ enum {
 #define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED
 
 /**
- * @brief KBase Device Data Job Scheduler sub-structure
+ * struct kbasep_js_device_data - KBase Device Data Job Scheduler sub-structure
+ * @runpool_irq: Sub-structure to collect together Job Scheduling data used in
+ *	IRQ context. The hwaccess_lock must be held when accessing.
+ * @runpool_irq.submit_allowed: Bitvector indicating whether a currently
+ * 	scheduled context is allowed to submit jobs. When bit 'N' is set in
+ * 	this, it indicates whether the context bound to address space 'N' is
+ * 	allowed to submit jobs.
+ * @runpool_irq.ctx_attr_ref_count: Array of Context Attributes Ref_counters:
+ * 	  Each is large enough to hold a refcount of the number of contexts
+ * 	that can fit into the runpool. This is currently BASE_MAX_NR_AS.
+ * 	  Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store
+ * 	the refcount. Hence, it's not worthwhile reducing this to
+ * 	bit-manipulation on u32s to save space (where in contrast, 4 bit
+ * 	sub-fields would be easy to do and would save space).
+ * 	  Whilst this must not become negative, the sign bit is used for:
+ * 	- error detection in debug builds
+ * 	- Optimization: it is undefined for a signed int to overflow, and so
+ * 	the compiler can optimize for that never happening (thus, no masking
+ * 	is required on updating the variable)
+ * @runpool_irq.slot_affinities: Affinity management and tracking. Bitvector
+ *	to aid affinity checking. Element 'n' bit 'i' indicates that slot 'n'
+ *	is using core i (i.e. slot_affinity_refcount[n][i] > 0)
+ * @runpool_irq.slot_affinity_refcount: Array of fefcount for each core owned
+ *	by each slot. Used to generate the slot_affinities array of bitvectors.
+ *	  The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS,
+ *	because it is refcounted only when a job is definitely about to be
+ *	submitted to a slot, and is de-refcounted immediately after a job
+ *	finishes
+ * @schedule_sem: Scheduling semaphore. This must be held when calling
+ *	kbase_jm_kick()
+ * @ctx_list_pullable: List of contexts that can currently be pulled from
+ * @ctx_list_unpullable: List of contexts that can not currently be pulled
+ *	from, but have jobs currently running.
+ * @nr_user_contexts_running: Number of currently scheduled user contexts
+ *	(excluding ones that are not submitting jobs)
+ * @nr_all_contexts_running: Number of currently scheduled contexts (including
+ *	ones that are not submitting jobs)
+ * @js_reqs: Core Requirements to match up with base_js_atom's core_req memeber
+ *	@note This is a write-once member, and so no locking is required to
+ *	read
+ * @scheduling_period_ns:	Value for JS_SCHEDULING_PERIOD_NS
+ * @soft_stop_ticks:		Value for JS_SOFT_STOP_TICKS
+ * @soft_stop_ticks_cl:		Value for JS_SOFT_STOP_TICKS_CL
+ * @hard_stop_ticks_ss:		Value for JS_HARD_STOP_TICKS_SS
+ * @hard_stop_ticks_cl:		Value for JS_HARD_STOP_TICKS_CL
+ * @hard_stop_ticks_dumping:	Value for JS_HARD_STOP_TICKS_DUMPING
+ * @gpu_reset_ticks_ss:		Value for JS_RESET_TICKS_SS
+ * @gpu_reset_ticks_cl:		Value for JS_RESET_TICKS_CL
+ * @gpu_reset_ticks_dumping:	Value for JS_RESET_TICKS_DUMPING
+ * @ctx_timeslice_ns:		Value for JS_CTX_TIMESLICE_NS
+ * @suspended_soft_jobs_list:	List of suspended soft jobs
+ * @softstop_always:		Support soft-stop on a single context
+ * @init_status:The initialized-flag is placed at the end, to avoid
+ * 	cache-pollution (we should only be using this during init/term paths).
+ * 	@note This is a write-once member, and so no locking is required to
+ * 	read
+ * @nr_contexts_pullable:Number of contexts that can currently be pulled from
+ * @nr_contexts_runnable:Number of contexts that can either be pulled from or
+ * 	arecurrently running
+ * @soft_job_timeout_ms:Value for JS_SOFT_JOB_TIMEOUT
+ * @queue_mutex: Queue Lock, used to access the Policy's queue of contexts
+ * 	independently of the Run Pool.
+ *	Of course, you don't need the Run Pool lock to access this.
+ * @runpool_mutex: Run Pool mutex, for managing contexts within the runpool.
  *
  * This encapsulates the current context of the Job Scheduler on a particular
  * device. This context is global to the device, and is not tied to any
@@ -191,121 +259,49 @@ enum {
  *
  * nr_contexts_running and as_free are optimized for packing together (by making
  * them smaller types than u32). The operations on them should rarely involve
- * masking. The use of signed types for arithmetic indicates to the compiler that
- * the value will not rollover (which would be undefined behavior), and so under
- * the Total License model, it is free to make optimizations based on that (i.e.
- * to remove masking).
+ * masking. The use of signed types for arithmetic indicates to the compiler
+ * that the value will not rollover (which would be undefined behavior), and so
+ * under the Total License model, it is free to make optimizations based on
+ * that (i.e. to remove masking).
  */
 struct kbasep_js_device_data {
-	/* Sub-structure to collect together Job Scheduling data used in IRQ
-	 * context. The hwaccess_lock must be held when accessing. */
 	struct runpool_irq {
-		/** Bitvector indicating whether a currently scheduled context is allowed to submit jobs.
-		 * When bit 'N' is set in this, it indicates whether the context bound to address space
-		 * 'N' is allowed to submit jobs.
-		 */
 		u16 submit_allowed;
-
-		/** Context Attributes:
-		 * Each is large enough to hold a refcount of the number of contexts
-		 * that can fit into the runpool. This is currently BASE_MAX_NR_AS
-		 *
-		 * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store
-		 * the refcount. Hence, it's not worthwhile reducing this to
-		 * bit-manipulation on u32s to save space (where in contrast, 4 bit
-		 * sub-fields would be easy to do and would save space).
-		 *
-		 * Whilst this must not become negative, the sign bit is used for:
-		 * - error detection in debug builds
-		 * - Optimization: it is undefined for a signed int to overflow, and so
-		 * the compiler can optimize for that never happening (thus, no masking
-		 * is required on updating the variable) */
 		s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
-
-		/*
-		 * Affinity management and tracking
-		 */
-		/** Bitvector to aid affinity checking. Element 'n' bit 'i' indicates
-		 * that slot 'n' is using core i (i.e. slot_affinity_refcount[n][i] > 0) */
 		u64 slot_affinities[BASE_JM_MAX_NR_SLOTS];
-		/** Refcount for each core owned by each slot. Used to generate the
-		 * slot_affinities array of bitvectors
-		 *
-		 * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS,
-		 * because it is refcounted only when a job is definitely about to be
-		 * submitted to a slot, and is de-refcounted immediately after a job
-		 * finishes */
 		s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64];
 	} runpool_irq;
-
-	/**
-	 * Scheduling semaphore. This must be held when calling
-	 * kbase_jm_kick()
-	 */
 	struct semaphore schedule_sem;
-
-	/**
-	 * List of contexts that can currently be pulled from
-	 */
-	struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT];
-	/**
-	 * List of contexts that can not currently be pulled from, but have
-	 * jobs currently running.
-	 */
-	struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT];
-
-	/** Number of currently scheduled user contexts (excluding ones that are not submitting jobs) */
+	struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS]
+					  [KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+	struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS]
+					    [KBASE_JS_ATOM_SCHED_PRIO_COUNT];
 	s8 nr_user_contexts_running;
-	/** Number of currently scheduled contexts (including ones that are not submitting jobs) */
 	s8 nr_all_contexts_running;
-
-	/** Core Requirements to match up with base_js_atom's core_req memeber
-	 * @note This is a write-once member, and so no locking is required to read */
 	base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS];
 
-	u32 scheduling_period_ns;    /*< Value for JS_SCHEDULING_PERIOD_NS */
-	u32 soft_stop_ticks;	     /*< Value for JS_SOFT_STOP_TICKS */
-	u32 soft_stop_ticks_cl;	     /*< Value for JS_SOFT_STOP_TICKS_CL */
-	u32 hard_stop_ticks_ss;	     /*< Value for JS_HARD_STOP_TICKS_SS */
-	u32 hard_stop_ticks_cl;	     /*< Value for JS_HARD_STOP_TICKS_CL */
-	u32 hard_stop_ticks_dumping; /*< Value for JS_HARD_STOP_TICKS_DUMPING */
-	u32 gpu_reset_ticks_ss;	     /*< Value for JS_RESET_TICKS_SS */
-	u32 gpu_reset_ticks_cl;	     /*< Value for JS_RESET_TICKS_CL */
-	u32 gpu_reset_ticks_dumping; /*< Value for JS_RESET_TICKS_DUMPING */
-	u32 ctx_timeslice_ns;		 /**< Value for JS_CTX_TIMESLICE_NS */
+	u32 scheduling_period_ns;
+	u32 soft_stop_ticks;
+	u32 soft_stop_ticks_cl;
+	u32 hard_stop_ticks_ss;
+	u32 hard_stop_ticks_cl;
+	u32 hard_stop_ticks_dumping;
+	u32 gpu_reset_ticks_ss;
+	u32 gpu_reset_ticks_cl;
+	u32 gpu_reset_ticks_dumping;
+	u32 ctx_timeslice_ns;
 
-	/** List of suspended soft jobs */
 	struct list_head suspended_soft_jobs_list;
 
 #ifdef CONFIG_MALI_BIFROST_DEBUG
-	/* Support soft-stop on a single context */
 	bool softstop_always;
 #endif				/* CONFIG_MALI_BIFROST_DEBUG */
-
-	/** The initalized-flag is placed at the end, to avoid cache-pollution (we should
-	 * only be using this during init/term paths).
-	 * @note This is a write-once member, and so no locking is required to read */
 	int init_status;
-
-	/* Number of contexts that can currently be pulled from */
 	u32 nr_contexts_pullable;
-
-	/* Number of contexts that can either be pulled from or are currently
-	 * running */
 	atomic_t nr_contexts_runnable;
-
-	/** Value for JS_SOFT_JOB_TIMEOUT */
 	atomic_t soft_job_timeout_ms;
-
-	/**
-	 * Queue Lock, used to access the Policy's queue of contexts
-	 * independently of the Run Pool.
-	 *
-	 * Of course, you don't need the Run Pool lock to access this.
-	 */
 	struct mutex queue_mutex;
-
-	/**
+	/*
 	 * Run Pool mutex, for managing contexts within the runpool.
 	 * Unless otherwise specified, you must hold this lock whilst accessing
 	 * any members that follow
@@ -317,61 +313,59 @@ struct kbasep_js_device_data {
 };
 
 /**
- * @brief KBase Context Job Scheduling information structure
+ * struct kbasep_js_kctx_info - KBase Context Job Scheduling information
+ *	structure
+ * @ctx: Job Scheduler Context information sub-structure.Its members are
+ *	accessed regardless of whether the context is:
+ *	- In the Policy's Run Pool
+ *	- In the Policy's Queue
+ *	- Not queued nor in the Run Pool.
+ *	You must obtain the @ctx.jsctx_mutex before accessing any other members
+ *	of this substructure.
+ *	You may not access any of its members from IRQ context.
+ * @ctx.jsctx_mutex: Job Scheduler Context lock
+ * @ctx.nr_jobs: Number of jobs <b>ready to run</b> - does \em not include
+ *	the jobs waiting in the dispatcher, and dependency-only
+ *	jobs. See kbase_jd_context::job_nr for such jobs
+ * @ctx.ctx_attr_ref_count: Context Attributes ref count. Each is large enough
+ *	to hold a refcount of the number of atoms on the context.
+ * @ctx.is_scheduled_wait: Wait queue to wait for KCTX_SHEDULED flag state
+ *	changes.
+ * @ctx.ctx_list_entry: Link implementing JS queues. Context can be present on
+ *	one list per job slot.
+ * @init_status: The initalized-flag is placed at the end, to avoid
+ *	cache-pollution (we should only be using this during init/term paths)
  *
  * This is a substructure in the struct kbase_context that encapsulates all the
  * scheduling information.
  */
 struct kbasep_js_kctx_info {
-
-	/**
-	 * Job Scheduler Context information sub-structure. These members are
-	 * accessed regardless of whether the context is:
-	 * - In the Policy's Run Pool
-	 * - In the Policy's Queue
-	 * - Not queued nor in the Run Pool.
-	 *
-	 * You must obtain the jsctx_mutex before accessing any other members of
-	 * this substructure.
-	 *
-	 * You may not access any of these members from IRQ context.
-	 */
 	struct kbase_jsctx {
-		struct mutex jsctx_mutex;		    /**< Job Scheduler Context lock */
+		struct mutex jsctx_mutex;
 
-		/** Number of jobs <b>ready to run</b> - does \em not include the jobs waiting in
-		 * the dispatcher, and dependency-only jobs. See kbase_jd_context::job_nr
-		 * for such jobs*/
 		u32 nr_jobs;
-
-		/** Context Attributes:
-		 * Each is large enough to hold a refcount of the number of atoms on
-		 * the context. **/
 		u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
-
-		/**
-		 * Wait queue to wait for KCTX_SHEDULED flag state changes.
-		 * */
 		wait_queue_head_t is_scheduled_wait;
-
-		/** Link implementing JS queues. Context can be present on one
-		 * list per job slot
-		 */
 		struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS];
 	} ctx;
-
-	/* The initalized-flag is placed at the end, to avoid cache-pollution (we should
-	 * only be using this during init/term paths) */
 	int init_status;
 };
 
-/** Subset of atom state that can be available after jd_done_nolock() is called
+/**
+ * struct kbasep_js_atom_retained_state - Subset of atom state.
+ * @event_code: to determine whether the atom has finished
+ * @core_req: core requirements
+ * @sched_priority: priority
+ * @device_nr: Core group atom was executed on
+ *
+ * Subset of atom state that can be available after jd_done_nolock() is called
  * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(),
- * because the original atom could disappear. */
+ * because the original atom could disappear.
+ */
 struct kbasep_js_atom_retained_state {
-	/** Event code - to determine whether the atom has finished */
+	/* Event code - to determine whether the atom has finished */
 	enum base_jd_event_code event_code;
-	/** core requirements */
+	/* core requirements */
 	base_jd_core_req core_req;
 	/* priority */
 	int sched_priority;
@@ -380,30 +374,23 @@ struct kbasep_js_atom_retained_state {
 
 };
 
-/**
+/*
  * Value signifying 'no retry on a slot required' for:
  * - kbase_js_atom_retained_state::retry_submit_on_slot
  * - kbase_jd_atom::retry_submit_on_slot
  */
 #define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1)
 
-/**
- * base_jd_core_req value signifying 'invalid' for a kbase_jd_atom_retained_state.
- *
- * @see kbase_atom_retained_state_is_valid()
+/*
+ * base_jd_core_req value signifying 'invalid' for a
+ * kbase_jd_atom_retained_state. See kbase_atom_retained_state_is_valid()
  */
 #define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP
 
-/**
- * @brief The JS timer resolution, in microseconds
- *
+/*
+ * The JS timer resolution, in microseconds
  * Any non-zero difference in time will be at least this size.
  */
 #define KBASEP_JS_TICK_RESOLUTION_US 1
 
-
-	  /** @} *//* end group kbase_js */
-	  /** @} *//* end group base_kbase_api */
-	  /** @} *//* end group base_api */
-
-#endif				/* _KBASE_JS_DEFS_H_ */
+#endif /* _KBASE_JS_DEFS_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h b/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h
index 0dc08381bee6..bdc769fd4f06 100644
--- a/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h
+++ b/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
@@ -50,12 +49,12 @@ enum base_hw_feature {
 	BASE_HW_FEATURE_FLUSH_REDUCTION,
 	BASE_HW_FEATURE_COHERENCY_REG,
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
-	BASE_HW_FEATURE_AARCH64_MMU,
 	BASE_HW_FEATURE_TLS_HASHING,
 	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
 	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
 	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
 	BASE_HW_FEATURE_L2_CONFIG,
+	BASE_HW_FEATURE_ASN_HASH,
 	BASE_HW_FEATURE_END
 };
 
@@ -85,7 +84,6 @@ static const enum base_hw_feature base_hw_features_tMIx[] = {
 	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
 	BASE_HW_FEATURE_FLUSH_REDUCTION,
 	BASE_HW_FEATURE_COHERENCY_REG,
-	BASE_HW_FEATURE_AARCH64_MMU,
 	BASE_HW_FEATURE_END
 };
 
@@ -112,7 +110,6 @@ static const enum base_hw_feature base_hw_features_tHEx[] = {
 	BASE_HW_FEATURE_FLUSH_REDUCTION,
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
 	BASE_HW_FEATURE_COHERENCY_REG,
-	BASE_HW_FEATURE_AARCH64_MMU,
 	BASE_HW_FEATURE_END
 };
 
@@ -139,7 +136,6 @@ static const enum base_hw_feature base_hw_features_tSIx[] = {
 	BASE_HW_FEATURE_FLUSH_REDUCTION,
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
 	BASE_HW_FEATURE_COHERENCY_REG,
-	BASE_HW_FEATURE_AARCH64_MMU,
 	BASE_HW_FEATURE_END
 };
 
@@ -166,7 +162,6 @@ static const enum base_hw_feature base_hw_features_tDVx[] = {
 	BASE_HW_FEATURE_FLUSH_REDUCTION,
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
 	BASE_HW_FEATURE_COHERENCY_REG,
-	BASE_HW_FEATURE_AARCH64_MMU,
 	BASE_HW_FEATURE_END
 };
 
@@ -193,7 +188,6 @@ static const enum base_hw_feature base_hw_features_tNOx[] = {
 	BASE_HW_FEATURE_FLUSH_REDUCTION,
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
 	BASE_HW_FEATURE_COHERENCY_REG,
-	BASE_HW_FEATURE_AARCH64_MMU,
 	BASE_HW_FEATURE_TLS_HASHING,
 	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
 	BASE_HW_FEATURE_END
@@ -222,7 +216,6 @@ static const enum base_hw_feature base_hw_features_tGOx[] = {
 	BASE_HW_FEATURE_FLUSH_REDUCTION,
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
 	BASE_HW_FEATURE_COHERENCY_REG,
-	BASE_HW_FEATURE_AARCH64_MMU,
 	BASE_HW_FEATURE_TLS_HASHING,
 	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
 	BASE_HW_FEATURE_END
@@ -250,7 +243,6 @@ static const enum base_hw_feature base_hw_features_tTRx[] = {
 	BASE_HW_FEATURE_FLUSH_REDUCTION,
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
 	BASE_HW_FEATURE_COHERENCY_REG,
-	BASE_HW_FEATURE_AARCH64_MMU,
 	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
 	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
 	BASE_HW_FEATURE_END
@@ -278,7 +270,6 @@ static const enum base_hw_feature base_hw_features_tNAx[] = {
 	BASE_HW_FEATURE_FLUSH_REDUCTION,
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
 	BASE_HW_FEATURE_COHERENCY_REG,
-	BASE_HW_FEATURE_AARCH64_MMU,
 	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
 	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
 	BASE_HW_FEATURE_END
@@ -306,7 +297,6 @@ static const enum base_hw_feature base_hw_features_tBEx[] = {
 	BASE_HW_FEATURE_FLUSH_REDUCTION,
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
 	BASE_HW_FEATURE_COHERENCY_REG,
-	BASE_HW_FEATURE_AARCH64_MMU,
 	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
 	BASE_HW_FEATURE_L2_CONFIG,
 	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
@@ -335,7 +325,6 @@ static const enum base_hw_feature base_hw_features_tBAx[] = {
 	BASE_HW_FEATURE_FLUSH_REDUCTION,
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
 	BASE_HW_FEATURE_COHERENCY_REG,
-	BASE_HW_FEATURE_AARCH64_MMU,
 	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
 	BASE_HW_FEATURE_L2_CONFIG,
 	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
@@ -364,7 +353,6 @@ static const enum base_hw_feature base_hw_features_tDUx[] = {
 	BASE_HW_FEATURE_FLUSH_REDUCTION,
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
 	BASE_HW_FEATURE_COHERENCY_REG,
-	BASE_HW_FEATURE_AARCH64_MMU,
 	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
 	BASE_HW_FEATURE_L2_CONFIG,
 	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
@@ -393,123 +381,10 @@ static const enum base_hw_feature base_hw_features_tODx[] = {
 	BASE_HW_FEATURE_FLUSH_REDUCTION,
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
 	BASE_HW_FEATURE_COHERENCY_REG,
-	BASE_HW_FEATURE_AARCH64_MMU,
 	BASE_HW_FEATURE_L2_CONFIG,
 	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
 	BASE_HW_FEATURE_END
 };
 
-static const enum base_hw_feature base_hw_features_tGRx[] = {
-	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
-	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
-	BASE_HW_FEATURE_XAFFINITY,
-	BASE_HW_FEATURE_WARPING,
-	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
-	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
-	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
-	BASE_HW_FEATURE_BRNDOUT_CC,
-	BASE_HW_FEATURE_BRNDOUT_KILL,
-	BASE_HW_FEATURE_LD_ST_LEA_TEX,
-	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
-	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
-	BASE_HW_FEATURE_MRT,
-	BASE_HW_FEATURE_MSAA_16X,
-	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
-	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
-	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
-	BASE_HW_FEATURE_TEST4_DATUM_MODE,
-	BASE_HW_FEATURE_FLUSH_REDUCTION,
-	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
-	BASE_HW_FEATURE_COHERENCY_REG,
-	BASE_HW_FEATURE_AARCH64_MMU,
-	BASE_HW_FEATURE_L2_CONFIG,
-	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
-	BASE_HW_FEATURE_END
-};
-
-static const enum base_hw_feature base_hw_features_tVAx[] = {
-	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
-	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
-	BASE_HW_FEATURE_XAFFINITY,
-	BASE_HW_FEATURE_WARPING,
-	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
-	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
-	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
-	BASE_HW_FEATURE_BRNDOUT_CC,
-	BASE_HW_FEATURE_BRNDOUT_KILL,
-	BASE_HW_FEATURE_LD_ST_LEA_TEX,
-	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
-	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
-	BASE_HW_FEATURE_MRT,
-	BASE_HW_FEATURE_MSAA_16X,
-	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
-	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
-	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
-	BASE_HW_FEATURE_TEST4_DATUM_MODE,
-	BASE_HW_FEATURE_FLUSH_REDUCTION,
-	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
-	BASE_HW_FEATURE_COHERENCY_REG,
-	BASE_HW_FEATURE_AARCH64_MMU,
-	BASE_HW_FEATURE_L2_CONFIG,
-	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
-	BASE_HW_FEATURE_END
-};
-
-static const enum base_hw_feature base_hw_features_tTUx[] = {
-	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
-	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
-	BASE_HW_FEATURE_XAFFINITY,
-	BASE_HW_FEATURE_WARPING,
-	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
-	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
-	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
-	BASE_HW_FEATURE_BRNDOUT_CC,
-	BASE_HW_FEATURE_BRNDOUT_KILL,
-	BASE_HW_FEATURE_LD_ST_LEA_TEX,
-	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
-	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
-	BASE_HW_FEATURE_MRT,
-	BASE_HW_FEATURE_MSAA_16X,
-	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
-	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
-	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
-	BASE_HW_FEATURE_TEST4_DATUM_MODE,
-	BASE_HW_FEATURE_FLUSH_REDUCTION,
-	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
-	BASE_HW_FEATURE_COHERENCY_REG,
-	BASE_HW_FEATURE_AARCH64_MMU,
-	BASE_HW_FEATURE_L2_CONFIG,
-	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
-	BASE_HW_FEATURE_END
-};
-
-static const enum base_hw_feature base_hw_features_tE2x[] = {
-	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
-	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
-	BASE_HW_FEATURE_XAFFINITY,
-	BASE_HW_FEATURE_WARPING,
-	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
-	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
-	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
-	BASE_HW_FEATURE_BRNDOUT_CC,
-	BASE_HW_FEATURE_BRNDOUT_KILL,
-	BASE_HW_FEATURE_LD_ST_LEA_TEX,
-	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
-	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
-	BASE_HW_FEATURE_MRT,
-	BASE_HW_FEATURE_MSAA_16X,
-	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
-	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
-	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
-	BASE_HW_FEATURE_TEST4_DATUM_MODE,
-	BASE_HW_FEATURE_FLUSH_REDUCTION,
-	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
-	BASE_HW_FEATURE_COHERENCY_REG,
-	BASE_HW_FEATURE_AARCH64_MMU,
-	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
-	BASE_HW_FEATURE_L2_CONFIG,
-	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
-	BASE_HW_FEATURE_END
-};
 
 #endif /* _BASE_HWCONFIG_FEATURES_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h b/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h
index c1ad3ac40705..a61eeb2ca200 100644
--- a/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h
+++ b/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
@@ -608,77 +607,5 @@ static const enum base_hw_issue base_hw_issues_model_tODx[] = {
 	BASE_HW_ISSUE_END
 };
 
-static const enum base_hw_issue base_hw_issues_tGRx_r0p0[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_1337,
-	BASE_HW_ISSUE_GPU2019_3212,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_model_tGRx[] = {
-	BASE_HW_ISSUE_5736,
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_1337,
-	BASE_HW_ISSUE_GPU2019_3212,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_1337,
-	BASE_HW_ISSUE_GPU2019_3212,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_model_tVAx[] = {
-	BASE_HW_ISSUE_5736,
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_1337,
-	BASE_HW_ISSUE_GPU2019_3212,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_1337,
-	BASE_HW_ISSUE_GPU2019_3212,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_model_tTUx[] = {
-	BASE_HW_ISSUE_5736,
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_1337,
-	BASE_HW_ISSUE_GPU2019_3212,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_tE2x_r0p0[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_1337,
-	BASE_HW_ISSUE_TTRX_921,
-	BASE_HW_ISSUE_TTRX_3414,
-	BASE_HW_ISSUE_TTRX_3083,
-	BASE_HW_ISSUE_GPU2019_3212,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_model_tE2x[] = {
-	BASE_HW_ISSUE_5736,
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_1337,
-	BASE_HW_ISSUE_TTRX_3414,
-	BASE_HW_ISSUE_TTRX_3083,
-	BASE_HW_ISSUE_GPU2019_3212,
-	BASE_HW_ISSUE_END
-};
 
 #endif /* _BASE_HWCONFIG_ISSUES_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase.h b/drivers/gpu/arm/bifrost/mali_kbase.h
index 8189d02ab910..52dfbe4f6e7f 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 #ifndef _KBASE_H_
 #define _KBASE_H_
 
@@ -38,7 +35,7 @@
 #include <linux/mutex.h>
 #include <linux/rwsem.h>
 #include <linux/sched.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0))
+#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE)
 #include <linux/sched/mm.h>
 #endif
 #include <linux/slab.h>
@@ -48,7 +45,7 @@
 #include <linux/workqueue.h>
 #include <linux/interrupt.h>
 
-#include "mali_base_kernel.h"
+#include <uapi/gpu/arm/bifrost/mali_base_kernel.h>
 #include <mali_kbase_linux.h>
 
 /*
@@ -67,7 +64,7 @@
 #include "mali_kbase_gpu_memory_debugfs.h"
 #include "mali_kbase_mem_profile_debugfs.h"
 #include "mali_kbase_gpuprops.h"
-#include "mali_kbase_ioctl.h"
+#include <uapi/gpu/arm/bifrost/mali_kbase_ioctl.h>
 #if !MALI_USE_CSF
 #include "mali_kbase_debug_job_fault.h"
 #include "mali_kbase_jd_debugfs.h"
@@ -93,11 +90,11 @@
 #endif
 
 #if MALI_USE_CSF
-/* Physical memory group ID for command stream frontend user I/O.
+/* Physical memory group ID for CSF user I/O.
  */
 #define KBASE_MEM_GROUP_CSF_IO BASE_MEM_GROUP_DEFAULT
 
-/* Physical memory group ID for command stream frontend firmware.
+/* Physical memory group ID for CSF firmware.
  */
 #define KBASE_MEM_GROUP_CSF_FW BASE_MEM_GROUP_DEFAULT
 #endif
@@ -155,9 +152,9 @@ void kbase_release_device(struct kbase_device *kbdev);
  * the flag @ref KBASE_REG_TILER_ALIGN_TOP (check the flags of the kbase
  * region):
  * - alignment offset is set to the difference between the kbase region
- * extent (converted from the original value in pages to bytes) and the kbase
+ * extension (converted from the original value in pages to bytes) and the kbase
  * region initial_commit (also converted from the original value in pages to
- * bytes); alignment mask is set to the kbase region extent in bytes and
+ * bytes); alignment mask is set to the kbase region extension in bytes and
  * decremented by 1.
  *
  * Return: if successful, address of the unmapped area aligned as required;
@@ -216,10 +213,6 @@ void registers_unmap(struct kbase_device *kbdev);
 
 int kbase_device_coherency_init(struct kbase_device *kbdev);
 
-#ifdef CONFIG_MALI_BUSLOG
-int buslog_init(struct kbase_device *kbdev);
-void buslog_term(struct kbase_device *kbdev);
-#endif
 
 #if !MALI_USE_CSF
 int kbase_jd_init(struct kbase_context *kctx);
@@ -413,13 +406,15 @@ void kbasep_as_do_poke(struct work_struct *work);
 
 /**
  * Check whether a system suspend is in progress, or has already been suspended
+ * @kbdev: The kbase device structure for the device
  *
  * The caller should ensure that either kbdev->pm.active_count_lock is held, or
  * a dmb was executed recently (to ensure the value is most
  * up-to-date). However, without a lock the value could change afterwards.
  *
- * @return false if a suspend is not in progress
- * @return !=false otherwise
+ * Return:
+ * * false if a suspend is not in progress
+ * * !=false otherwise
  */
 static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev)
 {
@@ -456,7 +451,11 @@ static inline bool kbase_pm_is_gpu_lost(struct kbase_device *kbdev)
 static inline void kbase_pm_set_gpu_lost(struct kbase_device *kbdev,
 	bool gpu_lost)
 {
-	atomic_set(&kbdev->pm.gpu_lost, (gpu_lost ? 1 : 0));
+	const int new_val = (gpu_lost ? 1 : 0);
+	const int cur_val = atomic_xchg(&kbdev->pm.gpu_lost, new_val);
+
+	if (new_val != cur_val)
+		KBASE_KTRACE_ADD(kbdev, ARB_GPU_LOST, NULL, new_val);
 }
 #endif
 
@@ -496,6 +495,8 @@ void kbase_pm_metrics_stop(struct kbase_device *kbdev);
 /**
  * Return the atom's ID, as was originally supplied by userspace in
  * base_jd_atom::atom_number
+ * @kctx:  KBase context pointer
+ * @katom: Atome for which to return ID
  */
 static inline int kbase_jd_atom_id(struct kbase_context *kctx, struct kbase_jd_atom *katom)
 {
@@ -547,7 +548,7 @@ static inline struct kbase_jd_atom *kbase_jd_atom_from_id(
  * The disjoint event counter is also incremented immediately whenever a job is soft stopped
  * and during context creation.
  *
- * @param kbdev The kbase device
+ * @kbdev: The kbase device
  *
  * Return: 0 on success and non-zero value on failure.
  */
@@ -557,7 +558,7 @@ void kbase_disjoint_init(struct kbase_device *kbdev);
  * Increase the count of disjoint events
  * called when a disjoint event has happened
  *
- * @param kbdev The kbase device
+ * @kbdev: The kbase device
  */
 void kbase_disjoint_event(struct kbase_device *kbdev);
 
@@ -567,14 +568,14 @@ void kbase_disjoint_event(struct kbase_device *kbdev);
  * This should be called when something happens which could be disjoint if the GPU
  * is in a disjoint state. The state refcount keeps track of this.
  *
- * @param kbdev The kbase device
+ * @kbdev: The kbase device
  */
 void kbase_disjoint_event_potential(struct kbase_device *kbdev);
 
 /**
  * Returns the count of disjoint events
  *
- * @param kbdev The kbase device
+ * @kbdev: The kbase device
  * @return the count of disjoint events
  */
 u32 kbase_disjoint_event_get(struct kbase_device *kbdev);
@@ -586,7 +587,7 @@ u32 kbase_disjoint_event_get(struct kbase_device *kbdev);
  * eventually after the disjoint state has completed @ref kbase_disjoint_state_down
  * should be called
  *
- * @param kbdev The kbase device
+ * @kbdev: The kbase device
  */
 void kbase_disjoint_state_up(struct kbase_device *kbdev);
 
@@ -597,10 +598,34 @@ void kbase_disjoint_state_up(struct kbase_device *kbdev);
  *
  * Called after @ref kbase_disjoint_state_up once the disjoint state is over
  *
- * @param kbdev The kbase device
+ * @kbdev: The kbase device
  */
 void kbase_disjoint_state_down(struct kbase_device *kbdev);
 
+/**
+ * kbase_device_pcm_dev_init() - Initialize the priority control manager device
+ *
+ * @kbdev: Pointer to the structure for the kbase device
+ *
+ * Pointer to the priority control manager device is retrieved from the device
+ * tree and a reference is taken on the module implementing the callbacks for
+ * priority control manager operations.
+ *
+ * Return: 0 if successful, or an error code on failure
+ */
+int kbase_device_pcm_dev_init(struct kbase_device *const kbdev);
+
+/**
+ * kbase_device_pcm_dev_term() - Performs priority control manager device
+ *                               deinitialization.
+ *
+ * @kbdev: Pointer to the structure for the kbase device
+ *
+ * Reference is released on the module implementing the callbacks for priority
+ * control manager operations.
+ */
+void kbase_device_pcm_dev_term(struct kbase_device *const kbdev);
+
 /**
  * If a job is soft stopped and the number of contexts is >= this value
  * it is reported as a disjoint event
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c
index 76bbfffe03a0..18ca56cb7cec 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2016-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <linux/debugfs.h>
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.h b/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.h
index 58d7fcf030a4..a9cf99c0aa55 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_AS_FAULT_DEBUG_FS_H
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_bits.h b/drivers/gpu/arm/bifrost/mali_kbase_bits.h
index 2c110937a792..6f6dba1b2a02 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_bits.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_bits.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,17 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
- *//* SPDX-License-Identifier: GPL-2.0 */
-/*
- *
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
  */
 
 #ifndef _KBASE_BITS_H_
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_cache_policy.c b/drivers/gpu/arm/bifrost/mali_kbase_cache_policy.c
index 27a03cf02138..7f266aa53cac 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_cache_policy.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_cache_policy.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2018, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /*
  * Cache Policy API.
  */
@@ -58,10 +55,11 @@ void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
 {
 	dma_sync_single_for_device(kbdev->dev, handle, size, dir);
 }
-
+KBASE_EXPORT_TEST_API(kbase_sync_single_for_device);
 
 void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
 		size_t size, enum dma_data_direction dir)
 {
 	dma_sync_single_for_cpu(kbdev->dev, handle, size, dir);
 }
+KBASE_EXPORT_TEST_API(kbase_sync_single_for_cpu);
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_cache_policy.h b/drivers/gpu/arm/bifrost/mali_kbase_cache_policy.h
index 8a1e5291bf5f..6799492be822 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_cache_policy.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_cache_policy.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2013, 2015, 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /*
  * Cache Policy API.
  */
@@ -30,7 +27,7 @@
 #define _KBASE_CACHE_POLICY_H_
 
 #include "mali_kbase.h"
-#include "mali_base_kernel.h"
+#include <uapi/gpu/arm/bifrost/mali_base_kernel.h>
 
 /**
  * kbase_cache_enabled - Choose the cache policy for a specific region
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_caps.h b/drivers/gpu/arm/bifrost/mali_kbase_caps.h
index b201a60fa6e3..478a3177e8ec 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_caps.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_caps.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,15 +17,10 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
 /**
- * @file mali_kbase_caps.h
- *
- * Driver Capability Queries.
+ * DOC: Driver Capability Queries.
  */
 
 #ifndef _KBASE_CAPS_H_
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ccswe.c b/drivers/gpu/arm/bifrost/mali_kbase_ccswe.c
index 87d5aaa6bb5d..624482ad6c15 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_ccswe.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_ccswe.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include "mali_kbase_ccswe.h"
@@ -50,7 +49,6 @@ void kbase_ccswe_init(struct kbase_ccswe *self)
 
 	spin_lock_init(&self->access);
 }
-KBASE_EXPORT_TEST_API(kbase_ccswe_init);
 
 u64 kbase_ccswe_cycle_at(struct kbase_ccswe *self, u64 timestamp_ns)
 {
@@ -63,7 +61,6 @@ u64 kbase_ccswe_cycle_at(struct kbase_ccswe *self, u64 timestamp_ns)
 
 	return result;
 }
-KBASE_EXPORT_TEST_API(kbase_ccswe_cycle_at);
 
 void kbase_ccswe_freq_change(
 	struct kbase_ccswe *self, u64 timestamp_ns, u32 gpu_freq)
@@ -87,7 +84,6 @@ void kbase_ccswe_freq_change(
 exit:
 	spin_unlock_irqrestore(&self->access, flags);
 }
-KBASE_EXPORT_TEST_API(kbase_ccswe_freq_change);
 
 void kbase_ccswe_reset(struct kbase_ccswe *self)
 {
@@ -102,4 +98,3 @@ void kbase_ccswe_reset(struct kbase_ccswe *self)
 
 	spin_unlock_irqrestore(&self->access, flags);
 }
-
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ccswe.h b/drivers/gpu/arm/bifrost/mali_kbase_ccswe.h
index 3a7cf73d9eac..5c440b8473d3 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_ccswe.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_ccswe.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
  * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_CCSWE_H_
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_config.c b/drivers/gpu/arm/bifrost/mali_kbase_config.c
index ce7070d1d634..fe71526fdf96 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_config.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_config.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2011-2015,2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2015, 2017, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 #include <mali_kbase.h>
 #include <mali_kbase_defs.h>
 #include <mali_kbase_config_defaults.h>
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_config.h b/drivers/gpu/arm/bifrost/mali_kbase_config.h
index 57456e2b90db..fe21cf5fa69f 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_config.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_config.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2010-2017, 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2017, 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,15 +17,10 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /**
- * @file mali_kbase_config.h
- * Configuration API and Attributes for KBase
+ * DOC: Configuration API and Attributes for KBase
  */
 
 #ifndef _KBASE_CONFIG_H_
@@ -35,26 +31,11 @@
 #include <mali_kbase_backend_config.h>
 #include <linux/rbtree.h>
 
-/**
- * @addtogroup base_api
- * @{
- */
-
-/**
- * @addtogroup base_kbase_api
- * @{
- */
-
-/**
- * @addtogroup kbase_config Configuration API and Attributes
- * @{
- */
-
 /* Forward declaration of struct kbase_device */
 struct kbase_device;
 
 /**
- * kbase_platform_funcs_conf - Specifies platform init/term function pointers
+ * struct kbase_platform_funcs_conf - Specifies platform init/term function pointers
  *
  * Specifies the functions pointers for platform specific initialization and
  * termination. By default no functions are required. No additional platform
@@ -62,7 +43,7 @@ struct kbase_device;
  */
 struct kbase_platform_funcs_conf {
 	/**
-	 * platform_init_func - platform specific init function pointer
+	 * @platform_init_func: platform specific init function pointer
 	 * @kbdev - kbase_device pointer
 	 *
 	 * Returns 0 on success, negative error code otherwise.
@@ -77,7 +58,7 @@ struct kbase_platform_funcs_conf {
 	 */
 	int (*platform_init_func)(struct kbase_device *kbdev);
 	/**
-	 * platform_term_func - platform specific termination function pointer
+	 * @platform_term_func: platform specific termination function pointer
 	 * @kbdev - kbase_device pointer
 	 *
 	 * Function pointer for platform specific termination or NULL if no
@@ -241,14 +222,15 @@ struct kbase_gpu_clk_notifier_data {
 };
 
 /**
- * kbase_clk_rate_trace_op_conf - Specifies GPU clock rate trace operations.
+ * struct kbase_clk_rate_trace_op_conf - Specifies GPU clock rate trace
+ * operations.
  *
  * Specifies the functions pointers for platform specific GPU clock rate trace
  * operations. By default no functions are required.
  */
 struct kbase_clk_rate_trace_op_conf {
 	/**
-	 * enumerate_gpu_clk - Enumerate a GPU clock on the given index
+	 * @enumerate_gpu_clk: Enumerate a GPU clock on the given index
 	 * @kbdev - kbase_device pointer
 	 * @index - GPU clock index
 	 *
@@ -262,7 +244,7 @@ struct kbase_clk_rate_trace_op_conf {
 		unsigned int index);
 
 	/**
-	 * get_gpu_clk_rate - Get the current rate for an enumerated clock.
+	 * @get_gpu_clk_rate: Get the current rate for an enumerated clock.
 	 * @kbdev          - kbase_device pointer
 	 * @gpu_clk_handle - Handle unique to the enumerated GPU clock
 	 *
@@ -272,7 +254,7 @@ struct kbase_clk_rate_trace_op_conf {
 		void *gpu_clk_handle);
 
 	/**
-	 * gpu_clk_notifier_register - Register a clock rate change notifier.
+	 * @gpu_clk_notifier_register: Register a clock rate change notifier.
 	 * @kbdev          - kbase_device pointer
 	 * @gpu_clk_handle - Handle unique to the enumerated GPU clock
 	 * @nb             - notifier block containing the callback function
@@ -291,7 +273,7 @@ struct kbase_clk_rate_trace_op_conf {
 		void *gpu_clk_handle, struct notifier_block *nb);
 
 	/**
-	 * gpu_clk_notifier_unregister - Unregister clock rate change notifier
+	 * @gpu_clk_notifier_unregister: Unregister clock rate change notifier
 	 * @kbdev          - kbase_device pointer
 	 * @gpu_clk_handle - Handle unique to the enumerated GPU clock
 	 * @nb             - notifier block containing the callback function
@@ -335,7 +317,7 @@ struct kbase_platform_config {
 #endif /* CONFIG_OF */
 
 /**
- * @brief Gets the pointer to platform config.
+ * kbase_get_platform_config - Gets the pointer to platform config.
  *
  * @return Pointer to the platform config
  */
@@ -386,8 +368,4 @@ int kbase_platform_register(void);
 void kbase_platform_unregister(void);
 #endif
 
-	  /** @} *//* end group kbase_config */
-	  /** @} *//* end group base_kbase_api */
-	  /** @} *//* end group base_api */
-
 #endif				/* _KBASE_CONFIG_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h b/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h
index e079281127ab..aad8e6c88e61 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2013-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,14 +17,10 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /**
- * @file mali_kbase_config_defaults.h
- *
- * Default values for configuration settings
+ * DOC: Default values for configuration settings
  *
  */
 
@@ -88,29 +85,38 @@ enum {
 };
 
 /**
- * Default period for DVFS sampling
+ * Default period for DVFS sampling (can be overridden by platform header)
  */
+#ifndef DEFAULT_PM_DVFS_PERIOD
 #define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */
+#endif
 
 /**
  * Power Management poweroff tick granuality. This is in nanoseconds to
- * allow HR timer support.
+ * allow HR timer support (can be overridden by platform header).
  *
  * On each scheduling tick, the power manager core may decide to:
  * -# Power off one or more shader cores
  * -# Power off the entire GPU
  */
+#ifndef DEFAULT_PM_GPU_POWEROFF_TICK_NS
 #define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */
+#endif
 
 /**
  * Power Manager number of ticks before shader cores are powered off
+ * (can be overridden by platform header).
  */
+#ifndef DEFAULT_PM_POWEROFF_TICK_SHADER
 #define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */
+#endif
 
 /**
- * Default scheduling tick granuality
+ * Default scheduling tick granuality (can be overridden by platform header)
  */
+#ifndef DEFAULT_JS_SCHEDULING_PERIOD_NS
 #define DEFAULT_JS_SCHEDULING_PERIOD_NS    (100000000u) /* 100ms */
+#endif
 
 /**
  * Default minimum number of scheduling ticks before jobs are soft-stopped.
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c b/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c
index 071b9236dee0..42539832b05e 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <mali_kbase.h>
@@ -37,9 +36,9 @@
 #include <backend/gpu/mali_kbase_model_dummy.h>
 #endif /* CONFIG_MALI_BIFROST_NO_MALI */
 #include "mali_kbase_mem_profile_debugfs_buf_size.h"
-#include "mali_kbase_debug_mem_view.h"
 #include "mali_kbase_mem.h"
 #include "mali_kbase_mem_pool_debugfs.h"
+#include "mali_kbase_mem_pool_group.h"
 #include "mali_kbase_debugfs_helper.h"
 #if !MALI_CUSTOMER_RELEASE
 #include "mali_kbase_regs_dump_debugfs.h"
@@ -50,12 +49,11 @@
 #if !MALI_USE_CSF
 #include <mali_kbase_hwaccess_jm.h>
 #endif /* !MALI_USE_CSF */
-#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS
+#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS
 #include <mali_kbase_hwaccess_instr.h>
 #endif
-#include <mali_kbase_ctx_sched.h>
 #include <mali_kbase_reset_gpu.h>
-#include "mali_kbase_ioctl.h"
+#include <uapi/gpu/arm/bifrost/mali_kbase_ioctl.h>
 #if !MALI_USE_CSF
 #include "mali_kbase_kinstr_jm.h"
 #endif
@@ -66,8 +64,8 @@
 #if MALI_USE_CSF
 #include "csf/mali_kbase_csf_firmware.h"
 #include "csf/mali_kbase_csf_tiler_heap.h"
-#include "csf/mali_kbase_csf_kcpu_debugfs.h"
 #include "csf/mali_kbase_csf_csg_debugfs.h"
+#include "csf/mali_kbase_csf_cpu_queue_debugfs.h"
 #endif
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
 #include "arbiter/mali_kbase_arbiter_pm.h"
@@ -79,6 +77,7 @@
 #include "mali_kbase_gwt.h"
 #endif
 #include "mali_kbase_pm_internal.h"
+#include "mali_kbase_dvfs_debugfs.h"
 
 #include <linux/module.h>
 #include <linux/init.h>
@@ -110,13 +109,8 @@
 #include <mali_kbase_config.h>
 
 
-#if (KERNEL_VERSION(3, 13, 0) <= LINUX_VERSION_CODE)
 #include <linux/pm_opp.h>
 #include <soc/rockchip/rockchip_opp_select.h>
-#else
-#include <linux/opp.h>
-#endif
-
 #include <linux/pm_runtime.h>
 
 #include <tl/mali_kbase_timeline.h>
@@ -135,7 +129,9 @@
 #define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)"
 
 /**
- * Kernel min/maj <=> API Version
+ * KBASE_API_VERSION - KBase API Version
+ * @major: Kernel major version
+ * @minor: Kernel minor version
  */
 #define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20)  | \
 					 (((minor) & 0xFFF) << 8) | \
@@ -145,33 +141,14 @@
 #define KBASE_API_MAJ(api_version) ((api_version >> 20) & 0xFFF)
 
 /**
- * mali_kbase_api_version_to_maj_min - convert an api_version to a min/maj pair
- *
- * @api_version: API version to convert
- * @major:  Major version number (must not exceed 12 bits)
- * @minor:  Major version number (must not exceed 12 bits)
- */
-void mali_kbase_api_version_to_maj_min(unsigned long api_version, u16 *maj, u16 *min)
-{
-	if (WARN_ON(!maj))
-		return;
-
-	if (WARN_ON(!min))
-		return;
-
-	*maj = KBASE_API_MAJ(api_version);
-	*min = KBASE_API_MIN(api_version);
-}
-
-/**
- * kbase capabilities table
+ * typedef mali_kbase_capability_def - kbase capabilities table
  */
 typedef struct mali_kbase_capability_def {
 	u16 required_major;
 	u16 required_minor;
 } mali_kbase_capability_def;
 
-/**
+/*
  * This must be kept in-sync with mali_kbase_cap
  *
  * TODO: The alternative approach would be to embed the cap enum values
@@ -435,25 +412,6 @@ static int kbase_api_handshake_dummy(struct kbase_file *kfile,
 	return -EPERM;
 }
 
-/**
- * enum mali_error - Mali error codes shared with userspace
- *
- * This is subset of those common Mali errors that can be returned to userspace.
- * Values of matching user and kernel space enumerators MUST be the same.
- * MALI_ERROR_NONE is guaranteed to be 0.
- *
- * @MALI_ERROR_NONE: Success
- * @MALI_ERROR_OUT_OF_GPU_MEMORY: Not used in the kernel driver
- * @MALI_ERROR_OUT_OF_MEMORY: Memory allocation failure
- * @MALI_ERROR_FUNCTION_FAILED: Generic error code
- */
-enum mali_error {
-	MALI_ERROR_NONE = 0,
-	MALI_ERROR_OUT_OF_GPU_MEMORY,
-	MALI_ERROR_OUT_OF_MEMORY,
-	MALI_ERROR_FUNCTION_FAILED,
-};
-
 static struct kbase_device *to_kbase_device(struct device *dev)
 {
 	return dev_get_drvdata(dev);
@@ -531,9 +489,9 @@ void kbase_release_device(struct kbase_device *kbdev)
 EXPORT_SYMBOL(kbase_release_device);
 
 #ifdef CONFIG_DEBUG_FS
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && \
-		!(LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 28) && \
-		LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0))
+#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE &&                            \
+	!(KERNEL_VERSION(4, 4, 28) <= LINUX_VERSION_CODE &&                    \
+	  KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE)
 /*
  * Older versions, before v4.6, of the kernel doesn't have
  * kstrtobool_from_user(), except longterm 4.4.y which had it added in 4.4.28
@@ -892,10 +850,8 @@ static int kbase_api_mem_alloc(struct kbase_context *kctx,
 	}
 #endif
 
-	reg = kbase_mem_alloc(kctx, alloc->in.va_pages,
-			alloc->in.commit_pages,
-			alloc->in.extent,
-			&flags, &gpu_va);
+	reg = kbase_mem_alloc(kctx, alloc->in.va_pages, alloc->in.commit_pages,
+			      alloc->in.extension, &flags, &gpu_va);
 
 	if (!reg)
 		return -ENOMEM;
@@ -1195,10 +1151,7 @@ static int kbase_api_mem_alias(struct kbase_context *kctx,
 	u64 flags;
 	int err;
 
-	if (alias->in.nents == 0 || alias->in.nents > 2048)
-		return -EINVAL;
-
-	if (alias->in.stride > (U64_MAX / 2048))
+	if (alias->in.nents == 0 || alias->in.nents > BASE_MEM_ALIAS_MAX_ENTS)
 		return -EINVAL;
 
 	ai = vmalloc(sizeof(*ai) * alias->in.nents);
@@ -1402,18 +1355,6 @@ static int kbase_api_sticky_resource_unmap(struct kbase_context *kctx,
 }
 
 #if MALI_UNIT_TEST
-static int kbase_api_tlstream_test(struct kbase_context *kctx,
-		struct kbase_ioctl_tlstream_test *test)
-{
-	kbase_timeline_test(
-			kctx->kbdev,
-			test->tpw_count,
-			test->msg_delay,
-			test->msg_count,
-			test->aux_msg);
-
-	return 0;
-}
 
 static int kbase_api_tlstream_stats(struct kbase_context *kctx,
 		struct kbase_ioctl_tlstream_stats *stats)
@@ -1553,14 +1494,11 @@ static int kbase_ioctl_cs_get_glb_iface(struct kbase_context *kctx,
 	}
 
 	if (!err) {
-		param->out.total_stream_num =
-			kbase_csf_firmware_get_glb_iface(kctx->kbdev,
-				group_data, max_group_num,
-				stream_data, max_total_stream_num,
-				&param->out.glb_version, &param->out.features,
-				&param->out.group_num, &param->out.prfcnt_size);
-
-		param->out.padding = 0;
+		param->out.total_stream_num = kbase_csf_firmware_get_glb_iface(
+			kctx->kbdev, group_data, max_group_num, stream_data,
+			max_total_stream_num, &param->out.glb_version,
+			&param->out.features, &param->out.group_num,
+			&param->out.prfcnt_size, &param->out.instr_features);
 
 		if (copy_to_user(user_groups, group_data,
 			MIN(max_group_num, param->out.group_num) *
@@ -1578,56 +1516,109 @@ static int kbase_ioctl_cs_get_glb_iface(struct kbase_context *kctx,
 	kfree(stream_data);
 	return err;
 }
+
+static int kbasep_ioctl_cs_cpu_queue_dump(struct kbase_context *kctx,
+			struct kbase_ioctl_cs_cpu_queue_info *cpu_queue_info)
+{
+	return kbase_csf_cpu_queue_dump(kctx, cpu_queue_info->buffer,
+					cpu_queue_info->size);
+}
+
 #endif /* MALI_USE_CSF */
 
-#define KBASE_HANDLE_IOCTL(cmd, function, arg)    \
-	do {                                          \
-		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_NONE); \
-		return function(arg);                     \
+static int kbasep_ioctl_context_priority_check(struct kbase_context *kctx,
+			struct kbase_ioctl_context_priority_check *priority_check)
+{
+#if MALI_USE_CSF
+	priority_check->priority = kbase_csf_priority_check(kctx->kbdev, priority_check->priority);
+#else
+	base_jd_prio req_priority = (base_jd_prio)priority_check->priority;
+
+	priority_check->priority = (u8)kbase_js_priority_check(kctx->kbdev, req_priority);
+#endif
+	return 0;
+}
+
+#define KBASE_HANDLE_IOCTL(cmd, function, arg)                                 \
+	do {                                                                   \
+		int ret;                                                       \
+		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_NONE);                      \
+		dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function);       \
+		ret = function(arg);                                           \
+		dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret,     \
+			#function);                                            \
+		return ret;                                                    \
 	} while (0)
 
-#define KBASE_HANDLE_IOCTL_IN(cmd, function, type, arg)    \
-	do {                                                   \
-		type param;                                        \
-		int err;                                           \
-		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_WRITE);         \
-		BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd));     \
-		err = copy_from_user(&param, uarg, sizeof(param)); \
-		if (err)                                           \
-			return -EFAULT;                                \
-		return function(arg, &param);                      \
+#define KBASE_HANDLE_IOCTL_IN(cmd, function, type, arg)                        \
+	do {                                                                   \
+		type param;                                                    \
+		int ret, err;                                                  \
+		dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function);       \
+		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_WRITE);                     \
+		BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd));                 \
+		err = copy_from_user(&param, uarg, sizeof(param));             \
+		if (err)                                                       \
+			return -EFAULT;                                        \
+		ret = function(arg, &param);                                   \
+		dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret,     \
+			#function);                                            \
+		return ret;                                                    \
 	} while (0)
 
-#define KBASE_HANDLE_IOCTL_OUT(cmd, function, type, arg)   \
-	do {                                                   \
-		type param;                                        \
-		int ret, err;                                      \
-		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_READ);          \
-		BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd));     \
-		memset(&param, 0, sizeof(param));                  \
-		ret = function(arg, &param);                       \
-		err = copy_to_user(uarg, &param, sizeof(param));   \
-		if (err)                                           \
-			return -EFAULT;                                \
-		return ret;                                        \
+#define KBASE_HANDLE_IOCTL_OUT(cmd, function, type, arg)                       \
+	do {                                                                   \
+		type param;                                                    \
+		int ret, err;                                                  \
+		dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function);       \
+		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_READ);                      \
+		BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd));                 \
+		memset(&param, 0, sizeof(param));                              \
+		ret = function(arg, &param);                                   \
+		err = copy_to_user(uarg, &param, sizeof(param));               \
+		if (err)                                                       \
+			return -EFAULT;                                        \
+		dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret,     \
+			#function);                                            \
+		return ret;                                                    \
 	} while (0)
 
-#define KBASE_HANDLE_IOCTL_INOUT(cmd, function, type, arg)     \
-	do {                                                       \
-		type param;                                            \
-		int ret, err;                                          \
-		BUILD_BUG_ON(_IOC_DIR(cmd) != (_IOC_WRITE|_IOC_READ)); \
-		BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd));         \
-		err = copy_from_user(&param, uarg, sizeof(param));     \
-		if (err)                                               \
-			return -EFAULT;                                    \
-		ret = function(arg, &param);                           \
-		err = copy_to_user(uarg, &param, sizeof(param));       \
-		if (err)                                               \
-			return -EFAULT;                                    \
-		return ret;                                            \
+#define KBASE_HANDLE_IOCTL_INOUT(cmd, function, type, arg)                     \
+	do {                                                                   \
+		type param;                                                    \
+		int ret, err;                                                  \
+		dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function);       \
+		BUILD_BUG_ON(_IOC_DIR(cmd) != (_IOC_WRITE | _IOC_READ));       \
+		BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd));                 \
+		err = copy_from_user(&param, uarg, sizeof(param));             \
+		if (err)                                                       \
+			return -EFAULT;                                        \
+		ret = function(arg, &param);                                   \
+		err = copy_to_user(uarg, &param, sizeof(param));               \
+		if (err)                                                       \
+			return -EFAULT;                                        \
+		dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret,     \
+			#function);                                            \
+		return ret;                                                    \
 	} while (0)
 
+static int kbasep_ioctl_set_limited_core_count(struct kbase_context *kctx,
+			struct kbase_ioctl_set_limited_core_count *set_limited_core_count)
+{
+	const u64 shader_core_mask =
+		kbase_pm_get_present_cores(kctx->kbdev, KBASE_PM_CORE_SHADER);
+	const u64 limited_core_mask =
+		((u64)1 << (set_limited_core_count->max_core_count)) - 1;
+
+	if ((shader_core_mask & limited_core_mask) == 0) {
+		/* At least one shader core must be available after applying the mask */
+		return -EINVAL;
+	}
+
+	kctx->limited_core_mask = limited_core_mask;
+	return 0;
+}
+
 static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	struct kbase_file *const kfile = filp->private_data;
@@ -1981,14 +1972,14 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 				union kbase_ioctl_cs_get_glb_iface,
 				kctx);
 		break;
-#endif /* MALI_USE_CSF */
-#if MALI_UNIT_TEST
-	case KBASE_IOCTL_TLSTREAM_TEST:
-		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_TEST,
-				kbase_api_tlstream_test,
-				struct kbase_ioctl_tlstream_test,
+	case KBASE_IOCTL_CS_CPU_QUEUE_DUMP:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_CPU_QUEUE_DUMP,
+				kbasep_ioctl_cs_cpu_queue_dump,
+				struct kbase_ioctl_cs_cpu_queue_info,
 				kctx);
 		break;
+#endif /* MALI_USE_CSF */
+#if MALI_UNIT_TEST
 	case KBASE_IOCTL_TLSTREAM_STATS:
 		KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_TLSTREAM_STATS,
 				kbase_api_tlstream_stats,
@@ -1996,6 +1987,18 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 				kctx);
 		break;
 #endif /* MALI_UNIT_TEST */
+	case KBASE_IOCTL_CONTEXT_PRIORITY_CHECK:
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CONTEXT_PRIORITY_CHECK,
+				kbasep_ioctl_context_priority_check,
+				struct kbase_ioctl_context_priority_check,
+				kctx);
+		break;
+	case KBASE_IOCTL_SET_LIMITED_CORE_COUNT:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SET_LIMITED_CORE_COUNT,
+				kbasep_ioctl_set_limited_core_count,
+				struct kbase_ioctl_set_limited_core_count,
+				kctx);
+		break;
 	}
 
 	dev_warn(kbdev->dev, "Unknown ioctl 0x%x nr:%d", cmd, _IOC_NR(cmd));
@@ -2023,13 +2026,17 @@ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, lof
 		read_error = kbase_csf_read_error(kctx, &event_data);
 
 	if (!read_event && !read_error) {
+		bool dump = kbase_csf_cpu_queue_read_dump_req(kctx,
+							&event_data);
 		/* This condition is not treated as an error.
 		 * It is possible that event handling thread was woken up due
 		 * to a fault/error that occurred for a queue group, but before
 		 * the corresponding fault data was read by the thread the
 		 * queue group was already terminated by the userspace.
 		 */
-		dev_dbg(kctx->kbdev->dev, "Neither event nor error signaled");
+		if (!dump)
+			dev_dbg(kctx->kbdev->dev,
+				"Neither event nor error signaled");
 	}
 
 	if (copy_to_user(buf, &event_data, data_size) != 0) {
@@ -2108,7 +2115,8 @@ static unsigned int kbase_poll(struct file *filp, poll_table *wait)
 void kbase_event_wakeup(struct kbase_context *kctx)
 {
 	KBASE_DEBUG_ASSERT(kctx);
-
+	dev_dbg(kctx->kbdev->dev, "Waking event queue for context %pK\n",
+		(void *)kctx);
 	wake_up_interruptible(&kctx->event_queue);
 }
 
@@ -2120,7 +2128,8 @@ int kbase_event_pending(struct kbase_context *ctx)
 	WARN_ON_ONCE(!ctx);
 
 	return (atomic_read(&ctx->event_count) != 0) ||
-		kbase_csf_error_pending(ctx);
+		kbase_csf_error_pending(ctx) ||
+		kbase_csf_cpu_queue_dump_needed(ctx);
 }
 #else
 int kbase_event_pending(struct kbase_context *ctx)
@@ -2303,6 +2312,7 @@ static DEVICE_ATTR(power_policy, S_IRUGO | S_IWUSR, show_policy, set_policy);
 static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, char * const buf)
 {
 	struct kbase_device *kbdev;
+	unsigned long flags;
 	ssize_t ret = 0;
 
 	kbdev = to_kbase_device(dev);
@@ -2310,6 +2320,19 @@ static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr,
 	if (!kbdev)
 		return -ENODEV;
 
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+#if MALI_USE_CSF
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			 "Current debug core mask : 0x%llX\n",
+			 kbdev->pm.debug_core_mask);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			 "Current desired core mask : 0x%llX\n",
+			 kbase_pm_ca_get_core_mask(kbdev));
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			 "Current in use core mask : 0x%llX\n",
+			 kbdev->pm.backend.shaders_avail);
+#else
 	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
 			"Current core mask (JS0) : 0x%llX\n",
 			kbdev->pm.debug_core_mask[0]);
@@ -2319,10 +2342,14 @@ static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr,
 	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
 			"Current core mask (JS2) : 0x%llX\n",
 			kbdev->pm.debug_core_mask[2]);
+#endif /* MALI_USE_CSF */
+
 	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
 			"Available core mask : 0x%llX\n",
 			kbdev->gpu_props.props.raw_props.shader_present);
 
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
 	return ret;
 }
 
@@ -2341,17 +2368,35 @@ static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr,
 static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct kbase_device *kbdev;
+#if MALI_USE_CSF
+	u64 new_core_mask;
+#else
 	u64 new_core_mask[3];
-	int items, i;
+	u64 group0_core_mask;
+	int i;
+#endif /* MALI_USE_CSF */
+
+	int items;
 	ssize_t err = count;
 	unsigned long flags;
-	u64 shader_present, group0_core_mask;
+	u64 shader_present;
 
 	kbdev = to_kbase_device(dev);
 
 	if (!kbdev)
 		return -ENODEV;
 
+#if MALI_USE_CSF
+	items = sscanf(buf, "%llx", &new_core_mask);
+
+	if (items != 1) {
+		dev_err(kbdev->dev,
+			"Couldn't process core mask write operation.\n"
+			"Use format <core_mask>\n");
+		err = -EINVAL;
+		goto end;
+	}
+#else
 	items = sscanf(buf, "%llx %llx %llx",
 			&new_core_mask[0], &new_core_mask[1],
 			&new_core_mask[2]);
@@ -2366,11 +2411,35 @@ static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr,
 
 	if (items == 1)
 		new_core_mask[1] = new_core_mask[2] = new_core_mask[0];
+#endif
 
 	mutex_lock(&kbdev->pm.lock);
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
 	shader_present = kbdev->gpu_props.props.raw_props.shader_present;
+
+#if MALI_USE_CSF
+	if ((new_core_mask & shader_present) != new_core_mask) {
+		dev_err(dev,
+			"Invalid core mask 0x%llX: Includes non-existent cores (present = 0x%llX)",
+			new_core_mask, shader_present);
+		err = -EINVAL;
+		goto unlock;
+
+	} else if (!(new_core_mask & shader_present &
+		     kbdev->pm.backend.ca_cores_enabled)) {
+		dev_err(dev,
+			"Invalid core mask 0x%llX: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX\n",
+			new_core_mask,
+			kbdev->gpu_props.props.raw_props.shader_present,
+			kbdev->pm.backend.ca_cores_enabled);
+		err = -EINVAL;
+		goto unlock;
+	}
+
+	if (kbdev->pm.debug_core_mask != new_core_mask)
+		kbase_pm_set_debug_core_mask(kbdev, new_core_mask);
+#else
 	group0_core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
 
 	for (i = 0; i < 3; ++i) {
@@ -2405,6 +2474,7 @@ static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr,
 		kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0],
 				new_core_mask[1], new_core_mask[2]);
 	}
+#endif /* MALI_USE_CSF */
 
 unlock:
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
@@ -2740,7 +2810,8 @@ static ssize_t set_js_scheduling_period(struct device *dev,
 
 	/* If no contexts have been scheduled since js_timeouts was last written
 	 * to, the new timeouts might not have been latched yet. So check if an
-	 * update is pending and use the new values if necessary. */
+	 * update is pending and use the new values if necessary.
+	 */
 
 	/* Use previous 'new' scheduling period as a base if present. */
 	old_period = js_data->scheduling_period_ns;
@@ -3015,25 +3086,15 @@ static ssize_t kbase_show_gpuinfo(struct device *dev,
 		{ .id = GPU_ID2_PRODUCT_TBEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
 		  .name = "Mali-G78" },
 		{ .id = GPU_ID2_PRODUCT_TBAX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
-		  .name = "Mali-TBAX" },
+		  .name = "Mali-G78AE" },
 		{ .id = GPU_ID2_PRODUCT_LBEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
 		  .name = "Mali-G68" },
 		{ .id = GPU_ID2_PRODUCT_TNAX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
 		  .name = "Mali-G57" },
 		{ .id = GPU_ID2_PRODUCT_TODX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
 		  .name = "Mali-TODX" },
-		{ .id = GPU_ID2_PRODUCT_TGRX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
-		  .name = "Mali-TGRX" },
-		{ .id = GPU_ID2_PRODUCT_TVAX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
-		  .name = "Mali-TVAX" },
 		{ .id = GPU_ID2_PRODUCT_LODX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
 		  .name = "Mali-LODX" },
-		{ .id = GPU_ID2_PRODUCT_TTUX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
-		  .name = "Mali-TTUX" },
-		{ .id = GPU_ID2_PRODUCT_LTUX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
-		  .name = "Mali-LTUX" },
-		{ .id = GPU_ID2_PRODUCT_TE2X >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
-		  .name = "Mali-TE2X" },
 	};
 	const char *product_name = "(Unknown Mali GPU)";
 	struct kbase_device *kbdev;
@@ -3224,6 +3285,75 @@ static ssize_t show_pm_poweroff(struct device *dev,
 static DEVICE_ATTR(pm_poweroff, S_IRUGO | S_IWUSR, show_pm_poweroff,
 		set_pm_poweroff);
 
+#if MALI_USE_CSF
+/**
+ * set_idle_hysteresis_time - Store callback for CSF idle_hysteresis_time
+ *                            sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the idle_hysteresis_time sysfs file is
+ * written to.
+ *
+ * This file contains values of the idle idle hysteresis duration.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_idle_hysteresis_time(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	u32 dur;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if (kstrtou32(buf, 0, &dur)) {
+		dev_err(kbdev->dev, "Couldn't process idle_hysteresis_time write operation.\n"
+				"Use format <idle_hysteresis_time>\n");
+		return -EINVAL;
+	}
+
+	kbase_csf_firmware_set_gpu_idle_hysteresis_time(kbdev, dur);
+
+	return count;
+}
+
+/**
+ * show_idle_hysteresis_time - Show callback for CSF idle_hysteresis_time
+ *                             sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current idle hysteresis duration in ms.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_idle_hysteresis_time(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+	u32 dur;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	dur = kbase_csf_firmware_get_gpu_idle_hysteresis_time(kbdev);
+	ret = scnprintf(buf, PAGE_SIZE, "%u\n", dur);
+
+	return ret;
+}
+
+static DEVICE_ATTR(idle_hysteresis_time, S_IRUGO | S_IWUSR,
+		show_idle_hysteresis_time, set_idle_hysteresis_time);
+#endif
+
 /**
  * set_reset_timeout - Store callback for the reset_timeout sysfs file.
  * @dev:   The device with sysfs file is for
@@ -3460,6 +3590,203 @@ static ssize_t set_lp_mem_pool_max_size(struct device *dev,
 static DEVICE_ATTR(lp_mem_pool_max_size, S_IRUGO | S_IWUSR, show_lp_mem_pool_max_size,
 		set_lp_mem_pool_max_size);
 
+/**
+ * show_simplified_mem_pool_max_size - Show the maximum size for the memory
+ *                                     pool 0 of small (4KiB) pages.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the max size.
+ *
+ * This function is called to get the maximum size for the memory pool 0 of
+ * small (4KiB) pages. It is assumed that the maximum size value is same for
+ * all the pools.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_simplified_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *const kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE,
+		kbdev->mem_pools.small, 1, kbase_mem_pool_debugfs_max_size);
+}
+
+/**
+ * set_simplified_mem_pool_max_size - Set the same maximum size for all the
+ *                                    memory pools of small (4KiB) pages.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called to set the same maximum size for all the memory
+ * pools of small (4KiB) pages.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t set_simplified_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *const kbdev = to_kbase_device(dev);
+	unsigned long new_size;
+	int gid;
+	int err;
+
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, &new_size);
+	if (err)
+		return -EINVAL;
+
+	for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid)
+		kbase_mem_pool_debugfs_set_max_size(
+			kbdev->mem_pools.small, gid, (size_t)new_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(max_size, 0600, show_simplified_mem_pool_max_size,
+		set_simplified_mem_pool_max_size);
+
+/**
+ * show_simplified_lp_mem_pool_max_size - Show the maximum size for the memory
+ *                                        pool 0 of large (2MiB) pages.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the total current pool size.
+ *
+ * This function is called to get the maximum size for the memory pool 0 of
+ * large (2MiB) pages. It is assumed that the maximum size value is same for
+ * all the pools.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_simplified_lp_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *const kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE,
+		kbdev->mem_pools.large, 1, kbase_mem_pool_debugfs_max_size);
+}
+
+/**
+ * set_simplified_lp_mem_pool_max_size - Set the same maximum size for all the
+ *                                       memory pools of large (2MiB) pages.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called to set the same maximum size for all the memory
+ * pools of large (2MiB) pages.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t set_simplified_lp_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *const kbdev = to_kbase_device(dev);
+	unsigned long new_size;
+	int gid;
+	int err;
+
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, &new_size);
+	if (err)
+		return -EINVAL;
+
+	for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid)
+		kbase_mem_pool_debugfs_set_max_size(
+			kbdev->mem_pools.large, gid, (size_t)new_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(lp_max_size, 0600, show_simplified_lp_mem_pool_max_size,
+		set_simplified_lp_mem_pool_max_size);
+
+/**
+ * show_simplified_ctx_default_max_size - Show the default maximum size for the
+ *                                        memory pool 0 of small (4KiB) pages.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the pool size.
+ *
+ * This function is called to get the default ctx maximum size for the memory
+ * pool 0 of small (4KiB) pages. It is assumed that maximum size value is same
+ * for all the pools. The maximum size for the pool of large (2MiB) pages will
+ * be same as max size of the pool of small (4KiB) pages in terms of bytes.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_simplified_ctx_default_max_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+	size_t max_size;
+
+	if (!kbdev)
+		return -ENODEV;
+
+	max_size = kbase_mem_pool_config_debugfs_max_size(
+			kbdev->mem_pool_defaults.small, 0);
+
+	return scnprintf(buf, PAGE_SIZE, "%zu\n", max_size);
+}
+
+/**
+ * set_simplified_ctx_default_max_size - Set the same default maximum size for
+ *                                       all the pools created for new
+ *                                       contexts. This covers the pool of
+ *                                       large pages as well and its max size
+ *                                       will be same as max size of the pool
+ *                                       of small pages in terms of bytes.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The value written to the sysfs file.
+ * @count: The number of bytes written to the sysfs file.
+ *
+ * This function is called to set the same maximum size for all pools created
+ * for new contexts.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_simplified_ctx_default_max_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	unsigned long new_size;
+	int err;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, &new_size);
+	if (err)
+		return -EINVAL;
+
+	kbase_mem_pool_group_config_set_max_size(
+		&kbdev->mem_pool_defaults, (size_t)new_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(ctx_default_max_size, 0600,
+		show_simplified_ctx_default_max_size,
+		set_simplified_ctx_default_max_size);
+
 #if !MALI_USE_CSF
 /**
  * show_js_ctx_scheduling_mode - Show callback for js_ctx_scheduling_mode sysfs
@@ -3767,21 +4094,28 @@ static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data)
 {
 	struct kbase_device *kbdev = container_of(data, struct kbase_device,
 		protected_mode_hwcnt_disable_work);
+	spinlock_t *backend_lock;
 	unsigned long flags;
 
 	bool do_disable;
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+#if MALI_USE_CSF
+	backend_lock = &kbdev->csf.scheduler.interrupt_lock;
+#else
+	backend_lock = &kbdev->hwaccess_lock;
+#endif
+
+	spin_lock_irqsave(backend_lock, flags);
 	do_disable = !kbdev->protected_mode_hwcnt_desired &&
 		!kbdev->protected_mode_hwcnt_disabled;
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(backend_lock, flags);
 
 	if (!do_disable)
 		return;
 
 	kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(backend_lock, flags);
 	do_disable = !kbdev->protected_mode_hwcnt_desired &&
 		!kbdev->protected_mode_hwcnt_disabled;
 
@@ -3801,9 +4135,10 @@ static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data)
 		kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
 	}
 
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(backend_lock, flags);
 }
 
+#ifndef PLATFORM_PROTECTED_CALLBACKS
 static int kbasep_protected_mode_enable(struct protected_mode_device *pdev)
 {
 	struct kbase_device *kbdev = pdev->data;
@@ -3823,6 +4158,9 @@ static const struct protected_mode_ops kbasep_native_protected_ops = {
 	.protected_mode_disable = kbasep_protected_mode_disable
 };
 
+#define PLATFORM_PROTECTED_CALLBACKS (&kbasep_native_protected_ops)
+#endif /* PLATFORM_PROTECTED_CALLBACKS */
+
 int kbase_protected_mode_init(struct kbase_device *kbdev)
 {
 	/* Use native protected ops */
@@ -3831,7 +4169,7 @@ int kbase_protected_mode_init(struct kbase_device *kbdev)
 	if (!kbdev->protected_dev)
 		return -ENOMEM;
 	kbdev->protected_dev->data = kbdev;
-	kbdev->protected_ops = &kbasep_native_protected_ops;
+	kbdev->protected_ops = PLATFORM_PROTECTED_CALLBACKS;
 	INIT_WORK(&kbdev->protected_mode_hwcnt_disable_work,
 		kbasep_protected_mode_hwcnt_disable_worker);
 	kbdev->protected_mode_hwcnt_desired = true;
@@ -3999,6 +4337,7 @@ int kbase_device_pm_init(struct kbase_device *kbdev)
 	u32 gpu_model_id;
 
 	if (kbase_is_pv_enabled(kbdev->dev->of_node)) {
+		dev_info(kbdev->dev, "Arbitration interface enabled\n");
 		if (kbase_is_pm_enabled(kbdev->dev->of_node)) {
 			/* Arbitration AND power management invalid */
 			dev_err(kbdev->dev, "Invalid combination of arbitration AND power management\n");
@@ -4022,13 +4361,16 @@ int kbase_device_pm_init(struct kbase_device *kbdev)
 			gpu_model_id = GPU_ID2_MODEL_MATCH_VALUE(product_id);
 
 			if (gpu_model_id != GPU_ID2_PRODUCT_TGOX
-				&& gpu_model_id != GPU_ID2_PRODUCT_TNOX) {
+				&& gpu_model_id != GPU_ID2_PRODUCT_TNOX
+				&& gpu_model_id != GPU_ID2_PRODUCT_TBAX) {
 				kbase_arbiter_pm_early_term(kbdev);
 				dev_err(kbdev->dev, "GPU platform not suitable for arbitration\n");
 				return -EPERM;
 			}
 		}
 	} else {
+		kbdev->arb.arb_if = NULL;
+		kbdev->arb.arb_dev = NULL;
 		err = power_control_init(kbdev);
 	}
 #else
@@ -4053,7 +4395,7 @@ void kbase_device_pm_term(struct kbase_device *kbdev)
 
 int power_control_init(struct kbase_device *kbdev)
 {
-#if KERNEL_VERSION(3, 18, 0) > LINUX_VERSION_CODE || !defined(CONFIG_OF)
+#ifndef CONFIG_OF
 	/* Power control initialization requires at least the capability to get
 	 * regulators and clocks from the device tree, as well as parsing
 	 * arrays of unsigned integer values.
@@ -4143,6 +4485,11 @@ int power_control_init(struct kbase_device *kbdev)
 	kbdev->nr_clocks = i;
 	dev_dbg(&pdev->dev, "Clocks probed: %u\n", kbdev->nr_clocks);
 
+	/* Any error in parsing the OPP table from the device file
+	 * shall be ignored. The fact that the table may be absent or wrong
+	 * on the device tree of the platform shouldn't prevent the driver
+	 * from completing its initialization.
+	 */
 #if defined(CONFIG_PM_OPP)
 #if ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && \
 	defined(CONFIG_REGULATOR))
@@ -4152,16 +4499,14 @@ int power_control_init(struct kbase_device *kbdev)
 	}
 #endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */
 #ifdef CONFIG_ARCH_ROCKCHIP
-	err = kbase_platform_rk_init_opp_table(kbdev);
-	if (err)
-		dev_err(kbdev->dev, "Failed to init_opp_table (%d)\n", err);
+       err = kbase_platform_rk_init_opp_table(kbdev);
+       if (err)
+               dev_err(kbdev->dev, "Failed to init_opp_table (%d)\n", err);
 #else
 	err = dev_pm_opp_of_add_table(kbdev->dev);
 	CSTD_UNUSED(err);
 #endif
 #endif /* CONFIG_PM_OPP */
-
-#endif /* KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE */
 	return 0;
 
 clocks_probe_defer:
@@ -4170,19 +4515,13 @@ clocks_probe_defer:
 		regulator_put(kbdev->regulators[i]);
 #endif
 	return err;
+#endif /* CONFIG_OF */
 }
 
 void power_control_term(struct kbase_device *kbdev)
 {
 	unsigned int i;
 
-#if (KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE && \
-	!defined(LSK_OPPV2_BACKPORT))
-#if KERNEL_VERSION(3, 19, 0) <= LINUX_VERSION_CODE
-	of_free_opp_table(kbdev->dev);
-#endif
-#else
-
 #if defined(CONFIG_PM_OPP)
 	dev_pm_opp_of_remove_table(kbdev->dev);
 #if ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && \
@@ -4192,8 +4531,6 @@ void power_control_term(struct kbase_device *kbdev)
 #endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */
 #endif /* CONFIG_PM_OPP */
 
-#endif /* KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE */
-
 	for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) {
 		if (kbdev->clocks[i]) {
 			clk_unprepare(kbdev->clocks[i]);
@@ -4203,15 +4540,14 @@ void power_control_term(struct kbase_device *kbdev)
 			break;
 	}
 
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
-			&& defined(CONFIG_REGULATOR)
+#if defined(CONFIG_OF) && defined(CONFIG_REGULATOR)
 	for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) {
 		if (kbdev->regulators[i]) {
 			regulator_put(kbdev->regulators[i]);
 			kbdev->regulators[i] = NULL;
 		}
 	}
-#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+#endif
 }
 
 #ifdef MALI_KBASE_BUILD
@@ -4220,7 +4556,7 @@ void power_control_term(struct kbase_device *kbdev)
 static void trigger_reset(struct kbase_device *kbdev)
 {
 	kbase_pm_context_active(kbdev);
-	if (kbase_prepare_to_reset_gpu(kbdev))
+	if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
 		kbase_reset_gpu(kbdev);
 	kbase_pm_context_idle(kbdev);
 }
@@ -4248,7 +4584,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get,\
 MAKE_QUIRK_ACCESSORS(sc);
 MAKE_QUIRK_ACCESSORS(tiler);
 MAKE_QUIRK_ACCESSORS(mmu);
-MAKE_QUIRK_ACCESSORS(jm);
+MAKE_QUIRK_ACCESSORS(gpu);
 
 static ssize_t kbase_device_debugfs_reset_write(struct file *file,
 		const char __user *ubuf, size_t count, loff_t *ppos)
@@ -4369,7 +4705,9 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev)
 	kbdev->mali_debugfs_directory = debugfs_create_dir(kbdev->devname,
 			NULL);
 	if (!kbdev->mali_debugfs_directory) {
-		dev_err(kbdev->dev, "Couldn't create mali debugfs directory\n");
+		dev_err(kbdev->dev,
+			"Couldn't create mali debugfs directory: %s\n",
+			kbdev->devname);
 		err = -ENOMEM;
 		goto out;
 	}
@@ -4409,11 +4747,12 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev)
 
 	kbasep_gpu_memory_debugfs_init(kbdev);
 	kbase_as_fault_debugfs_init(kbdev);
-#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS
+#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS
 	kbase_instr_backend_debugfs_init(kbdev);
 #endif
 	/* fops_* variables created by invocations of macro
-	 * MAKE_QUIRK_ACCESSORS() above. */
+	 * MAKE_QUIRK_ACCESSORS() above.
+	 */
 	debugfs_create_file("quirks_sc", 0644,
 			kbdev->mali_debugfs_directory, kbdev,
 			&fops_sc_quirks);
@@ -4423,9 +4762,8 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev)
 	debugfs_create_file("quirks_mmu", 0644,
 			kbdev->mali_debugfs_directory, kbdev,
 			&fops_mmu_quirks);
-	debugfs_create_file("quirks_jm", 0644,
-			kbdev->mali_debugfs_directory, kbdev,
-			&fops_jm_quirks);
+	debugfs_create_file("quirks_gpu", 0644, kbdev->mali_debugfs_directory,
+			    kbdev, &fops_gpu_quirks);
 
 	debugfs_create_bool("infinite_cache", mode,
 			debugfs_ctx_defaults_directory,
@@ -4468,6 +4806,8 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev)
 			&kbasep_serialize_jobs_debugfs_fops);
 #endif
 
+	kbase_dvfs_status_debugfs_init(kbdev);
+
 	return 0;
 
 out:
@@ -4519,6 +4859,17 @@ int kbase_device_coherency_init(struct kbase_device *kbdev)
 
 		override_coherency = be32_to_cpup(coherency_override_dts);
 
+#if MALI_USE_CSF && !defined(CONFIG_MALI_BIFROST_NO_MALI)
+		/* ACE coherency mode is not supported by Driver on CSF GPUs.
+		 * Return an error to signal the invalid device tree configuration.
+		 */
+		if (override_coherency == COHERENCY_ACE) {
+			dev_err(kbdev->dev,
+				"ACE coherency not supported, wrong DT configuration");
+			return -EINVAL;
+		}
+#endif
+
 		if ((override_coherency <= COHERENCY_NONE) &&
 			(supported_coherency_bitmap &
 			 COHERENCY_FEATURE_BIT(override_coherency))) {
@@ -4542,40 +4893,149 @@ int kbase_device_coherency_init(struct kbase_device *kbdev)
 	return 0;
 }
 
-#ifdef CONFIG_MALI_BUSLOG
 
-/* Callback used by the kbase bus logger client, to initiate a GPU reset
- * when the bus log is restarted.  GPU reset is used as reference point
- * in HW bus log analyses.
+#if MALI_USE_CSF
+/**
+ * csg_scheduling_period_store - Store callback for the csg_scheduling_period
+ * sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the csg_scheduling_period sysfs file is written
+ * to. It checks the data written, and if valid updates the reset timeout.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
  */
-static void kbase_logging_started_cb(void *data)
+static ssize_t csg_scheduling_period_store(struct device *dev,
+					   struct device_attribute *attr,
+					   const char *buf, size_t count)
 {
-	struct kbase_device *kbdev = (struct kbase_device *)data;
+	struct kbase_device *kbdev;
+	int ret;
+	unsigned int csg_scheduling_period;
 
-	if (kbase_prepare_to_reset_gpu(kbdev))
-		kbase_reset_gpu(kbdev);
-	dev_info(kbdev->dev, "KBASE - Bus logger restarted\n");
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtouint(buf, 0, &csg_scheduling_period);
+	if (ret || csg_scheduling_period == 0) {
+		dev_err(kbdev->dev,
+			"Couldn't process csg_scheduling_period write operation.\n"
+			"Use format 'csg_scheduling_period_ms', and csg_scheduling_period_ms > 0\n");
+		return -EINVAL;
+	}
+
+	kbase_csf_scheduler_lock(kbdev);
+	kbdev->csf.scheduler.csg_scheduling_period_ms = csg_scheduling_period;
+	dev_dbg(kbdev->dev, "CSG scheduling period: %ums\n",
+		csg_scheduling_period);
+	kbase_csf_scheduler_unlock(kbdev);
+
+	return count;
 }
 
-int buslog_init(struct kbase_device *kbdev)
+/**
+ * csg_scheduling_period_show - Show callback for the csg_scheduling_period
+ * sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current reset timeout.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t csg_scheduling_period_show(struct device *dev,
+					  struct device_attribute *attr,
+					  char *const buf)
 {
-	int err = 0;
+	struct kbase_device *kbdev;
+	ssize_t ret;
 
-	err = bl_core_client_register(kbdev->devname,
-					kbase_logging_started_cb,
-					kbdev, &kbdev->buslogger,
-					THIS_MODULE, NULL);
-	if (err == 0)
-		bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024);
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
 
-	return err;
+	ret = scnprintf(buf, PAGE_SIZE, "%u\n",
+			kbdev->csf.scheduler.csg_scheduling_period_ms);
+
+	return ret;
 }
 
-void buslog_term(struct kbase_device *kbdev)
+static DEVICE_ATTR(csg_scheduling_period, 0644, csg_scheduling_period_show,
+		   csg_scheduling_period_store);
+
+/**
+ * fw_timeout_store - Store callback for the fw_timeout sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the fw_timeout sysfs file is written to. It
+ * checks the data written, and if valid updates the reset timeout.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t fw_timeout_store(struct device *dev,
+				struct device_attribute *attr, const char *buf,
+				size_t count)
 {
-	bl_core_client_unregister(kbdev->buslogger);
+	struct kbase_device *kbdev;
+	int ret;
+	unsigned int fw_timeout;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtouint(buf, 0, &fw_timeout);
+	if (ret || fw_timeout == 0) {
+		dev_err(kbdev->dev, "%s\n%s\n%u",
+			"Couldn't process fw_timeout write operation.",
+			"Use format 'fw_timeout_ms', and fw_timeout_ms > 0",
+			FIRMWARE_PING_INTERVAL_MS);
+		return -EINVAL;
+	}
+
+	kbase_csf_scheduler_lock(kbdev);
+	kbdev->csf.fw_timeout_ms = fw_timeout;
+	kbase_csf_scheduler_unlock(kbdev);
+	dev_dbg(kbdev->dev, "Firmware timeout: %ums\n", fw_timeout);
+
+	return count;
 }
-#endif
+
+/**
+ * fw_timeout_show - Show callback for the firmware timeout sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current reset timeout.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t fw_timeout_show(struct device *dev,
+			       struct device_attribute *attr, char *const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%u\n", kbdev->csf.fw_timeout_ms);
+
+	return ret;
+}
+
+static DEVICE_ATTR(fw_timeout, 0644, fw_timeout_show, fw_timeout_store);
+#endif /* MALI_USE_CSF */
 
 static struct attribute *kbase_scheduling_attrs[] = {
 #if !MALI_USE_CSF
@@ -4598,9 +5058,15 @@ static struct attribute *kbase_attrs[] = {
 	&dev_attr_gpuinfo.attr,
 	&dev_attr_dvfs_period.attr,
 	&dev_attr_pm_poweroff.attr,
+#if MALI_USE_CSF
+	&dev_attr_idle_hysteresis_time.attr,
+#endif
 	&dev_attr_reset_timeout.attr,
 #if !MALI_USE_CSF
 	&dev_attr_js_scheduling_period.attr,
+#else
+	&dev_attr_csg_scheduling_period.attr,
+	&dev_attr_fw_timeout.attr,
 #endif /* !MALI_USE_CSF */
 	&dev_attr_power_policy.attr,
 	&dev_attr_core_mask.attr,
@@ -4614,12 +5080,25 @@ static struct attribute *kbase_attrs[] = {
 	NULL
 };
 
+static struct attribute *kbase_mempool_attrs[] = {
+	&dev_attr_max_size.attr,
+	&dev_attr_lp_max_size.attr,
+	&dev_attr_ctx_default_max_size.attr,
+	NULL
+};
+
 #define SYSFS_SCHEDULING_GROUP "scheduling"
 static const struct attribute_group kbase_scheduling_attr_group = {
 	.name = SYSFS_SCHEDULING_GROUP,
 	.attrs = kbase_scheduling_attrs,
 };
 
+#define SYSFS_MEMPOOL_GROUP "mempool"
+static const struct attribute_group kbase_mempool_attr_group = {
+	.name = SYSFS_MEMPOOL_GROUP,
+	.attrs = kbase_mempool_attrs,
+};
+
 static const struct attribute_group kbase_attr_group = {
 	.attrs = kbase_attrs,
 };
@@ -4635,15 +5114,28 @@ int kbase_sysfs_init(struct kbase_device *kbdev)
 	kbdev->mdev.mode = 0666;
 
 	err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group);
-	if (!err) {
-		err = sysfs_create_group(&kbdev->dev->kobj,
-					 &kbase_scheduling_attr_group);
-		if (err) {
-			dev_err(kbdev->dev, "Creation of %s sysfs group failed",
-				SYSFS_SCHEDULING_GROUP);
-			sysfs_remove_group(&kbdev->dev->kobj,
-					   &kbase_attr_group);
-		}
+	if (err)
+		return err;
+
+	err = sysfs_create_group(&kbdev->dev->kobj,
+			&kbase_scheduling_attr_group);
+	if (err) {
+		dev_err(kbdev->dev, "Creation of %s sysfs group failed",
+			SYSFS_SCHEDULING_GROUP);
+		sysfs_remove_group(&kbdev->dev->kobj,
+			&kbase_attr_group);
+		return err;
+	}
+
+	err = sysfs_create_group(&kbdev->dev->kobj,
+			&kbase_mempool_attr_group);
+	if (err) {
+		dev_err(kbdev->dev, "Creation of %s sysfs group failed",
+			SYSFS_MEMPOOL_GROUP);
+		sysfs_remove_group(&kbdev->dev->kobj,
+			&kbase_scheduling_attr_group);
+		sysfs_remove_group(&kbdev->dev->kobj,
+			&kbase_attr_group);
 	}
 
 	return err;
@@ -4651,6 +5143,7 @@ int kbase_sysfs_init(struct kbase_device *kbdev)
 
 void kbase_sysfs_term(struct kbase_device *kbdev)
 {
+	sysfs_remove_group(&kbdev->dev->kobj, &kbase_mempool_attr_group);
 	sysfs_remove_group(&kbdev->dev->kobj, &kbase_scheduling_attr_group);
 	sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group);
 	put_device(kbdev->dev);
@@ -4710,7 +5203,8 @@ static int kbase_platform_device_probe(struct platform_device *pdev)
 
 	if (err) {
 		if (err == -EPROBE_DEFER)
-			dev_err(kbdev->dev, "Device initialization Deferred\n");
+			dev_info(kbdev->dev,
+				"Device initialization Deferred\n");
 		else
 			dev_err(kbdev->dev, "Device initialization failed\n");
 
@@ -4752,8 +5246,11 @@ static int kbase_device_suspend(struct device *dev)
 
 	kbase_pm_suspend(kbdev);
 
-#if defined(CONFIG_MALI_BIFROST_DEVFREQ) && \
-		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+#ifdef CONFIG_MALI_BIFROST_DVFS
+	kbase_pm_metrics_stop(kbdev);
+#endif
+
+#ifdef CONFIG_MALI_BIFROST_DEVFREQ
 	dev_dbg(dev, "Callback %s\n", __func__);
 	if (kbdev->devfreq) {
 		kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_SUSPEND);
@@ -4781,8 +5278,11 @@ static int kbase_device_resume(struct device *dev)
 
 	kbase_pm_resume(kbdev);
 
-#if defined(CONFIG_MALI_BIFROST_DEVFREQ) && \
-		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+#ifdef CONFIG_MALI_BIFROST_DVFS
+	kbase_pm_metrics_start(kbdev);
+#endif
+
+#ifdef CONFIG_MALI_BIFROST_DEVFREQ
 	dev_dbg(dev, "Callback %s\n", __func__);
 	if (kbdev->devfreq) {
 		mutex_lock(&kbdev->pm.lock);
@@ -4814,8 +5314,13 @@ static int kbase_device_runtime_suspend(struct device *dev)
 	if (!kbdev)
 		return -ENODEV;
 
-#if defined(CONFIG_MALI_BIFROST_DEVFREQ) && \
-		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	dev_dbg(dev, "Callback %s\n", __func__);
+
+#ifdef CONFIG_MALI_BIFROST_DVFS
+	kbase_pm_metrics_stop(kbdev);
+#endif
+
+#ifdef CONFIG_MALI_BIFROST_DEVFREQ
 	if (kbdev->devfreq)
 		kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_SUSPEND);
 #endif
@@ -4853,8 +5358,11 @@ static int kbase_device_runtime_resume(struct device *dev)
 		dev_dbg(dev, "runtime resume\n");
 	}
 
-#if defined(CONFIG_MALI_BIFROST_DEVFREQ) && \
-		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+#ifdef CONFIG_MALI_BIFROST_DVFS
+	kbase_pm_metrics_start(kbdev);
+#endif
+
+#ifdef CONFIG_MALI_BIFROST_DEVFREQ
 	if (kbdev->devfreq)
 		kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_RESUME);
 #endif
@@ -4920,7 +5428,6 @@ static struct platform_driver kbase_platform_driver = {
 	.remove = kbase_platform_device_remove,
 	.driver = {
 		   .name = kbase_drv_name,
-		   .owner = THIS_MODULE,
 		   .pm = &kbase_pm_ops,
 		   .of_match_table = of_match_ptr(kbase_dt_ids),
 	},
@@ -4965,6 +5472,7 @@ MODULE_LICENSE("GPL");
 MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \
 		__stringify(BASE_UK_VERSION_MAJOR) "." \
 		__stringify(BASE_UK_VERSION_MINOR) ")");
+MODULE_SOFTDEP("pre: memory_group_manager");
 
 #define CREATE_TRACE_POINTS
 /* Create the trace points (otherwise we just get code to call a tracepoint) */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_cs_experimental.h b/drivers/gpu/arm/bifrost/mali_kbase_cs_experimental.h
index caba2cd7a0e3..67cd5ee1ece7 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_cs_experimental.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_cs_experimental.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,18 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
- *//* SPDX-License-Identifier: GPL-2.0 */
-
-/*
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
  */
 
 #ifndef _KBASE_CS_EXPERIMENTAL_H_
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c
index 750dbd8c3924..c63bc8dfa70a 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2017-2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,13 +17,9 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <mali_kbase.h>
-#include <mali_kbase_config_defaults.h>
-
 #include <mali_kbase_defs.h>
 #include "mali_kbase_ctx_sched.h"
 #include "tl/mali_kbase_tracepoints.h"
@@ -46,7 +43,8 @@ int kbase_ctx_sched_init(struct kbase_device *kbdev)
 	int as_present = (1U << kbdev->nr_hw_address_spaces) - 1;
 
 	/* These two must be recalculated if nr_hw_address_spaces changes
-	 * (e.g. for HW workarounds) */
+	 * (e.g. for HW workarounds)
+	 */
 	kbdev->nr_user_address_spaces = kbdev->nr_hw_address_spaces;
 	kbdev->as_free = as_present; /* All ASs initially free */
 
@@ -261,7 +259,7 @@ struct kbase_context *kbase_ctx_sched_as_to_ctx_refcount(
 
 	found_kctx = kbdev->as_to_kctx[as_nr];
 
-	if (found_kctx != NULL)
+	if (!WARN_ON(found_kctx == NULL))
 		kbase_ctx_sched_retain_ctx_refcount(found_kctx);
 
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
@@ -275,23 +273,35 @@ struct kbase_context *kbase_ctx_sched_as_to_ctx(struct kbase_device *kbdev,
 	unsigned long flags;
 	struct kbase_context *found_kctx;
 
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	found_kctx = kbase_ctx_sched_as_to_ctx_nolock(kbdev, as_nr);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return found_kctx;
+}
+
+struct kbase_context *kbase_ctx_sched_as_to_ctx_nolock(
+		struct kbase_device *kbdev, size_t as_nr)
+{
+	struct kbase_context *found_kctx;
+
 	if (WARN_ON(kbdev == NULL))
 		return NULL;
 
 	if (WARN_ON(as_nr >= BASE_MAX_NR_AS))
 		return NULL;
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
 
 	found_kctx = kbdev->as_to_kctx[as_nr];
 
 	if (found_kctx) {
-		if (WARN_ON(atomic_read(&found_kctx->refcount) <= 0))
+		if (atomic_read(&found_kctx->refcount) <= 0)
 			found_kctx = NULL;
 	}
 
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
 	return found_kctx;
 }
 
@@ -353,3 +363,40 @@ void kbase_ctx_sched_release_ctx_lock(struct kbase_context *kctx)
 
 	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
 }
+
+#if MALI_USE_CSF
+bool kbase_ctx_sched_inc_refcount_if_as_valid(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+	bool added_ref = false;
+	unsigned long flags;
+
+	if (WARN_ON(kctx == NULL))
+		return added_ref;
+
+	kbdev = kctx->kbdev;
+
+	if (WARN_ON(kbdev == NULL))
+		return added_ref;
+
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if ((kctx->as_nr != KBASEP_AS_NR_INVALID) &&
+	    (kctx == kbdev->as_to_kctx[kctx->as_nr])) {
+		atomic_inc(&kctx->refcount);
+
+		if (kbdev->as_free & (1u << kctx->as_nr))
+			kbdev->as_free &= ~(1u << kctx->as_nr);
+
+		KBASE_KTRACE_ADD(kbdev, SCHED_RETAIN_CTX_NOLOCK, kctx,
+				 kbase_ktrace_get_ctx_refcnt(kctx));
+		added_ref = true;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	return added_ref;
+}
+#endif
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h
index 1affa719e6dc..cadb73538a85 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2017-2018, 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2018, 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_CTX_SCHED_H_
@@ -26,7 +25,7 @@
 #include <mali_kbase.h>
 
 /**
- * The Context Scheduler manages address space assignment and reference
+ * DOC: The Context Scheduler manages address space assignment and reference
  * counting to kbase_context. The interface has been designed to minimise
  * interactions between the Job Scheduler and Power Management/MMU to support
  * the existing Job Scheduler interface.
@@ -41,7 +40,7 @@
  */
 
 /**
- * kbase_ctx_sched_init - Initialise the context scheduler
+ * kbase_ctx_sched_init() - Initialise the context scheduler
  * @kbdev: The device for which the context scheduler needs to be initialised
  *
  * This must be called during device initialisation. The number of hardware
@@ -167,6 +166,21 @@ struct kbase_context *kbase_ctx_sched_as_to_ctx_refcount(
 struct kbase_context *kbase_ctx_sched_as_to_ctx(struct kbase_device *kbdev,
 		size_t as_nr);
 
+/**
+ * kbase_ctx_sched_as_to_ctx_nolock - Lookup a context based on its current
+ * address space.
+ * @kbdev: The device for which the returned context must belong
+ * @as_nr: address space assigned to the context of interest
+ *
+ * The following lock must be held by the caller:
+ * * kbase_device::hwaccess_lock
+ *
+ * Return: a valid struct kbase_context on success or NULL on failure,
+ * indicating that no context was found in as_nr.
+ */
+struct kbase_context *kbase_ctx_sched_as_to_ctx_nolock(
+		struct kbase_device *kbdev, size_t as_nr);
+
 /**
  * kbase_ctx_sched_inc_refcount_nolock - Refcount a context as being busy,
  * preventing it from being scheduled out.
@@ -206,4 +220,22 @@ bool kbase_ctx_sched_inc_refcount(struct kbase_context *kctx);
  */
 void kbase_ctx_sched_release_ctx_lock(struct kbase_context *kctx);
 
+#if MALI_USE_CSF
+/**
+ * kbase_ctx_sched_inc_refcount_if_as_valid - Refcount the context if it has GPU
+ *                                            address space slot assigned to it.
+ *
+ * @kctx: Context to be refcounted
+ *
+ * This function takes a reference on the context if it has a GPU address space
+ * slot assigned to it. The address space slot will not be available for
+ * re-assignment until the reference is released.
+ *
+ * Return: true if refcount succeeded and the address space slot will not be
+ * reassigned, false if the refcount failed (because the address space slot
+ * was not assigned).
+ */
+bool kbase_ctx_sched_inc_refcount_if_as_valid(struct kbase_context *kctx);
+#endif
+
 #endif /* _KBASE_CTX_SCHED_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug.c b/drivers/gpu/arm/bifrost/mali_kbase_debug.c
index 118f787fb74c..6caf56ca263e 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_debug.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_debug.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2014, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 #include <mali_kbase.h>
 
 static struct kbasep_debug_assert_cb kbasep_debug_assert_registered_cb = {
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug.h b/drivers/gpu/arm/bifrost/mali_kbase_debug.h
index f33413908405..87d3069d958b 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_debug.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_debug.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2012-2015, 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2015, 2017, 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 #ifndef _KBASE_DEBUG_H
 #define _KBASE_DEBUG_H
 
@@ -51,9 +48,9 @@ struct kbasep_debug_assert_cb {
 };
 
 /**
- * @def KBASEP_DEBUG_PRINT_TRACE
- * @brief Private macro containing the format of the trace to display before every message
- * @sa KBASE_DEBUG_SKIP_TRACE, KBASE_DEBUG_SKIP_FUNCTION_NAME
+ * KBASEP_DEBUG_PRINT_TRACE - Private macro containing the format of the trace
+ * to display before every message @sa KBASE_DEBUG_SKIP_TRACE,
+ * KBASE_DEBUG_SKIP_FUNCTION_NAME
  */
 #if !KBASE_DEBUG_SKIP_TRACE
 #define KBASEP_DEBUG_PRINT_TRACE \
@@ -68,21 +65,22 @@ struct kbasep_debug_assert_cb {
 #endif
 
 /**
- * @def KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)
- * @brief (Private) system printing function associated to the @ref KBASE_DEBUG_ASSERT_MSG event.
- * @param trace location in the code from where the message is printed
- * @param function function from where the message is printed
- * @param ... Format string followed by format arguments.
+ * KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) - (Private) system printing
+ * function associated to the @ref KBASE_DEBUG_ASSERT_MSG event.
+ * @trace: location in the code from where the message is printed
+ * @function: function from where the message is printed
+ * @...: Format string followed by format arguments.
+ *
  * @note function parameter cannot be concatenated with other strings
  */
 /* Select the correct system output function*/
 #ifdef CONFIG_MALI_BIFROST_DEBUG
-#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)\
-		do { \
-			pr_err("Mali<ASSERT>: %s function:%s ", trace, function);\
-			pr_err(__VA_ARGS__);\
-			pr_err("\n");\
-		} while (false)
+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)                          \
+	do {                                                                   \
+		pr_err("Mali<ASSERT>: %s function:%s ", trace, function);      \
+		pr_err(__VA_ARGS__);                                           \
+		pr_err("\n");                                                  \
+	} while (false)
 #else
 #define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) CSTD_NOP()
 #endif
@@ -94,12 +92,12 @@ struct kbasep_debug_assert_cb {
 #endif
 
 /**
- * @def KBASE_DEBUG_ASSERT(expr)
- * @brief Calls @ref KBASE_PRINT_ASSERT and prints the expression @a expr if @a expr is false
+ * KBASE_DEBUG_ASSERT(expr) - Calls @ref KBASE_PRINT_ASSERT and prints the
+ * expression @a expr if @a expr is false
+ * @expr: Boolean expression
  *
  * @note This macro does nothing if the flag @ref KBASE_DEBUG_DISABLE_ASSERTS is set to 1
  *
- * @param expr Boolean expression
  */
 #define KBASE_DEBUG_ASSERT(expr) \
 	KBASE_DEBUG_ASSERT_MSG(expr, #expr)
@@ -107,15 +105,15 @@ struct kbasep_debug_assert_cb {
 #if KBASE_DEBUG_DISABLE_ASSERTS
 #define KBASE_DEBUG_ASSERT_MSG(expr, ...) CSTD_NOP()
 #else
-	/**
-	 * @def KBASE_DEBUG_ASSERT_MSG(expr, ...)
-	 * @brief Calls @ref KBASEP_DEBUG_ASSERT_OUT and prints the given message if @a expr is false
-	 *
-	 * @note This macro does nothing if the flag @ref KBASE_DEBUG_DISABLE_ASSERTS is set to 1
-	 *
-	 * @param expr Boolean expression
-	 * @param ...  Message to display when @a expr is false, as a format string followed by format arguments.
-	 */
+/**
+ * KBASE_DEBUG_ASSERT_MSG() - Calls @ref KBASEP_DEBUG_ASSERT_OUT and prints the
+ * given message if @a expr is false
+ * @expr: Boolean expression
+ * @...:  Message to display when @a expr is false, as a format string followed
+ *        by format arguments.
+ *
+ * This macro does nothing if the flag KBASE_DEBUG_DISABLE_ASSERTS is set to 1
+ */
 #define KBASE_DEBUG_ASSERT_MSG(expr, ...) \
 		do { \
 			if (!(expr)) { \
@@ -127,10 +125,8 @@ struct kbasep_debug_assert_cb {
 #endif				/* KBASE_DEBUG_DISABLE_ASSERTS */
 
 /**
- * @def KBASE_DEBUG_CODE( X )
- * @brief Executes the code inside the macro only in debug mode
- *
- * @param X Code to compile only in debug mode.
+ * KBASE_DEBUG_CODE( X ) - Executes the code inside the macro only in debug mode
+ * @X: Code to compile only in debug mode.
  */
 #ifdef CONFIG_MALI_BIFROST_DEBUG
 #define KBASE_DEBUG_CODE(X) X
@@ -141,7 +137,9 @@ struct kbasep_debug_assert_cb {
 /** @} */
 
 /**
- * @brief Register a function to call on ASSERT
+ * kbase_debug_assert_register_hook - Register a function to call on ASSERT
+ * @func: the function to call when an assert is triggered.
+ * @param: the parameter to pass to \a func when calling it
  *
  * Such functions will \b only be called during Debug mode, and for debugging
  * features \b only. Do not rely on them to be called in general use.
@@ -151,13 +149,12 @@ struct kbasep_debug_assert_cb {
  * @note This function is not thread-safe, and should only be used to
  * register/deregister once in the module's lifetime.
  *
- * @param[in] func the function to call when an assert is triggered.
- * @param[in] param the parameter to pass to \a func when calling it
  */
 void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param);
 
 /**
- * @brief Call a debug assert hook previously registered with kbase_debug_assert_register_hook()
+ * kbasep_debug_assert_call_hook - Call a debug assert hook previously
+ * registered with kbase_debug_assert_register_hook()
  *
  * @note This function is not thread-safe with respect to multiple threads
  * registering functions and parameters with
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_job_fault.c b/drivers/gpu/arm/bifrost/mali_kbase_debug_job_fault.c
index dbc774d56ab4..7dfdff1b1f18 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_debug_job_fault.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_job_fault.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2012-2016, 2018-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016, 2018-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <mali_kbase.h>
@@ -518,23 +517,24 @@ void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
 /*
  *  Initialize the relevant data structure per context
  */
-void kbase_debug_job_fault_context_init(struct kbase_context *kctx)
+int kbase_debug_job_fault_context_init(struct kbase_context *kctx)
 {
 
 	/* We need allocate double size register range
 	 * Because this memory will keep the register address and value
 	 */
 	kctx->reg_dump = vmalloc(0x4000 * 2);
-	if (kctx->reg_dump == NULL)
-		return;
-
-	if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == false) {
-		vfree(kctx->reg_dump);
-		kctx->reg_dump = NULL;
+	if (kctx->reg_dump != NULL) {
+		if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) ==
+		    false) {
+			vfree(kctx->reg_dump);
+			kctx->reg_dump = NULL;
+		}
+		INIT_LIST_HEAD(&kctx->job_fault_resume_event_list);
+		atomic_set(&kctx->job_fault_count, 0);
 	}
-	INIT_LIST_HEAD(&kctx->job_fault_resume_event_list);
-	atomic_set(&kctx->job_fault_count, 0);
 
+	return 0;
 }
 
 /*
@@ -549,6 +549,14 @@ void kbase_debug_job_fault_kctx_unblock(struct kbase_context *kctx)
 {
 	WARN_ON(!kbase_ctx_flag(kctx, KCTX_DYING));
 
+	/* Return early if the job fault part of the kbase_device is not
+	 * initialized yet. An error can happen during the device probe after
+	 * the privileged Kbase context was created for the HW counter dumping
+	 * but before the job fault part is initialized.
+	 */
+	if (!kctx->kbdev->job_fault_resume_workq)
+		return;
+
 	kbase_ctx_remove_pending_event(kctx);
 }
 
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_job_fault.h b/drivers/gpu/arm/bifrost/mali_kbase_debug_job_fault.h
index ef69627cdce8..63ccb3d86b23 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_debug_job_fault.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_job_fault.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2012-2016, 2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016, 2018, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_DEBUG_JOB_FAULT_H
@@ -54,8 +53,9 @@ void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev);
  * kbase_debug_job_fault_context_init - Initialize the relevant
  *		data structure per context
  * @kctx: KBase context pointer
+ * @return 0 on success
  */
-void kbase_debug_job_fault_context_init(struct kbase_context *kctx);
+int kbase_debug_job_fault_context_init(struct kbase_context *kctx);
 
 /**
  * kbase_debug_job_fault_context_term - Release the relevant
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c
index 478813705a41..9bdb76572df0 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2013-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
@@ -32,10 +31,6 @@
 
 #ifdef CONFIG_DEBUG_FS
 
-#if (KERNEL_VERSION(4, 1, 0) > LINUX_VERSION_CODE)
-#define get_file_rcu(x) atomic_long_inc_not_zero(&(x)->f_count)
-#endif
-
 struct debug_mem_mapping {
 	struct list_head node;
 
@@ -179,6 +174,13 @@ static int debug_mem_zone_open(struct rb_root *rbtree,
 			/* Empty region - ignore */
 			continue;
 
+		if (reg->flags & KBASE_REG_PROTECTED) {
+			/* CPU access to protected memory is forbidden - so
+			 * skip this GPU virtual region.
+			 */
+			continue;
+		}
+
 		mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
 		if (!mapping) {
 			ret = -ENOMEM;
@@ -222,19 +224,19 @@ static int debug_mem_open(struct inode *i, struct file *file)
 	kbase_gpu_vm_lock(kctx);
 
 	ret = debug_mem_zone_open(&kctx->reg_rbtree_same, mem_data);
-	if (0 != ret) {
+	if (ret != 0) {
 		kbase_gpu_vm_unlock(kctx);
 		goto out;
 	}
 
 	ret = debug_mem_zone_open(&kctx->reg_rbtree_custom, mem_data);
-	if (0 != ret) {
+	if (ret != 0) {
 		kbase_gpu_vm_unlock(kctx);
 		goto out;
 	}
 
 	ret = debug_mem_zone_open(&kctx->reg_rbtree_exec, mem_data);
-	if (0 != ret) {
+	if (ret != 0) {
 		kbase_gpu_vm_unlock(kctx);
 		goto out;
 	}
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.h b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.h
index b948b7cd9dd4..c913d5ce36de 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2013-2015, 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2015, 2019-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_DEBUG_MEM_VIEW_H
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.c b/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.c
index 37e507b164c5..28df887b33dc 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <linux/debugfs.h>
@@ -90,6 +89,59 @@ static int set_attr_from_string(
 	return err;
 }
 
+int kbase_debugfs_string_validator(char *const buf)
+{
+	size_t index;
+	int err = 0;
+	char *ptr = buf;
+
+	for (index = 0; *ptr; ++index) {
+		unsigned long test_number;
+		size_t len;
+
+		/* Drop leading spaces */
+		while (*ptr == ' ')
+			ptr++;
+
+		/* Strings passed into the validator will be NULL terminated
+		 * by nature, so here strcspn only needs to delimit by
+		 * newlines, spaces and NULL terminator (delimited natively).
+		 */
+		len = strcspn(ptr, "\n ");
+		if (len == 0) {
+			/* No more values (allow this) */
+			break;
+		}
+
+		/* Substitute a nul terminator for a space character to make
+		 * the substring valid for kstrtoul, and then replace it back.
+		 */
+		if (ptr[len] == ' ') {
+			ptr[len] = '\0';
+			err = kstrtoul(ptr, 0, &test_number);
+			ptr[len] = ' ';
+
+			/* len should only be incremented if there is a valid
+			 * number to follow - otherwise this will skip over
+			 * the NULL terminator in cases with no ending newline
+			 */
+			len++;
+		} else {
+			/* This would occur at the last element before a space
+			 * or a NULL terminator.
+			 */
+			err = kstrtoul(ptr, 0, &test_number);
+		}
+
+		if (err)
+			break;
+		/* Skip the substring (including any premature nul terminator)
+		 */
+		ptr += len;
+	}
+	return err;
+}
+
 int kbase_debugfs_helper_set_attr_from_string(
 	const char *const buf, void *const array, size_t const nelems,
 	kbase_debugfs_helper_set_attr_fn const set_attr_fn)
@@ -100,6 +152,13 @@ int kbase_debugfs_helper_set_attr_from_string(
 	if (!wbuf)
 		return -ENOMEM;
 
+	/* validate string before actually writing values */
+	err = kbase_debugfs_string_validator(wbuf);
+	if (err) {
+		kfree(wbuf);
+		return err;
+	}
+
 	err = set_attr_from_string(wbuf, array, nelems,
 		set_attr_fn);
 
@@ -154,6 +213,14 @@ int kbase_debugfs_helper_seq_write(struct file *const file,
 	}
 
 	buf[count] = '\0';
+
+	/* validate string before actually writing values */
+	err = kbase_debugfs_string_validator(buf);
+	if (err) {
+		kfree(buf);
+		return err;
+	}
+
 	err = set_attr_from_string(buf,
 		array, nelems, set_attr_fn);
 	kfree(buf);
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.h b/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.h
index c3c9efa14e65..5fcbb15b23e2 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_DEBUGFS_HELPER_H_
@@ -58,6 +57,29 @@ int kbase_debugfs_helper_set_attr_from_string(
 	const char *buf, void *array, size_t nelems,
 	kbase_debugfs_helper_set_attr_fn set_attr_fn);
 
+/**
+ * kbase_debugfs_string_validator - Validate a string to be written to a
+ *                                  debugfs file for any incorrect formats
+ *                                  or wrong values.
+ *
+ * This function is to be used before any writes to debugfs values are done
+ * such that any strings with erroneous values (such as octal 09 or
+ * hexadecimal 0xGH are fully ignored) - without this validation, any correct
+ * values before the first incorrect one will still be entered into the
+ * debugfs file. This essentially iterates the values through kstrtoul to see
+ * if it is valid.
+ *
+ * It is largely similar to set_attr_from_string to iterate through the values
+ * of the input string. This function also requires the input string to be
+ * writable.
+ *
+ * @buf: Null-terminated string to validate.
+ *
+ * Return: 0 with no error, else -22 (the invalid return value of kstrtoul) if
+ *         any value in the string was wrong or with an incorrect format.
+ */
+int kbase_debugfs_string_validator(char *const buf);
+
 /**
  * typedef kbase_debugfs_helper_get_attr_fn - Type of function to get an
  *                                            attribute value from an array
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_defs.h b/drivers/gpu/arm/bifrost/mali_kbase_defs.h
index 980cf09500ef..6cff28c90566 100755
--- a/drivers/gpu/arm/bifrost/mali_kbase_defs.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_defs.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,17 +17,11 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /**
- * @file mali_kbase_defs.h
- *
- * Defintions (types, defines, etcs) common to Kbase. They are placed here to
- * allow the hierarchy of header files to work.
+ * DOC: Defintions (types, defines, etcs) common to Kbase. They are placed here
+ * to allow the hierarchy of header files to work.
  */
 
 #ifndef _KBASE_DEFS_H_
@@ -40,7 +35,11 @@
 #include <mali_kbase_instr_defs.h>
 #include <mali_kbase_pm.h>
 #include <mali_kbase_gpuprops_types.h>
+#if MALI_USE_CSF
+#include <mali_kbase_hwcnt_backend_csf.h>
+#else
 #include <mali_kbase_hwcnt_backend_jm.h>
+#endif
 #include <protected_mode_switcher.h>
 
 #include <linux/atomic.h>
@@ -49,9 +48,6 @@
 #include <linux/file.h>
 #include <linux/sizes.h>
 
-#ifdef CONFIG_MALI_BUSLOG
-#include <linux/bus_logger.h>
-#endif
 
 #if defined(CONFIG_SYNC)
 #include <sync.h>
@@ -75,8 +71,7 @@
 #include <linux/regulator/consumer.h>
 #include <linux/memory_group_manager.h>
 
-#if defined(CONFIG_PM_RUNTIME) || \
-	(defined(CONFIG_PM) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0))
+#if defined(CONFIG_PM_RUNTIME) || defined(CONFIG_PM)
 #define KBASE_PM_RUNTIME 1
 #endif
 
@@ -123,6 +118,11 @@
  */
 #define KBASE_LOCK_REGION_MIN_SIZE_LOG2 (15)
 
+/**
+ * Maximum number of GPU memory region zones
+ */
+#define KBASE_REG_ZONE_MAX 4ul
+
 #include "mali_kbase_hwaccess_defs.h"
 
 /* Maximum number of pages of memory that require a permanent mapping, per
@@ -138,24 +138,28 @@
  */
 #define KBASE_HWCNT_GPU_VIRTUALIZER_DUMP_THRESHOLD_NS (200 * NSEC_PER_USEC)
 
+#if MALI_USE_CSF
+/* The buffer count of CSF hwcnt backend ring buffer, which is used when CSF
+ * hwcnt backend allocate the ring buffer to communicate with CSF firmware for
+ * HWC dump samples.
+ * To meet the hardware requirement, this number MUST be power of 2, otherwise,
+ * CSF hwcnt backend creation will be failed.
+ */
+#define KBASE_HWCNT_BACKEND_CSF_RING_BUFFER_COUNT (128)
+#endif
+
 /* Maximum number of clock/regulator pairs that may be referenced by
  * the device node.
  * This is dependent on support for of_property_read_u64_array() in the
  * kernel.
  */
-#if (KERNEL_VERSION(4, 0, 0) <= LINUX_VERSION_CODE) || \
-			defined(LSK_OPPV2_BACKPORT)
 #define BASE_MAX_NR_CLOCKS_REGULATORS (2)
-#else
-#define BASE_MAX_NR_CLOCKS_REGULATORS (1)
-#endif
 
 /* Forward declarations */
 struct kbase_context;
 struct kbase_device;
 struct kbase_as;
 struct kbase_mmu_setup;
-struct kbase_ipa_model_vinstr_data;
 struct kbase_kinstr_jm;
 
 /**
@@ -179,11 +183,7 @@ struct kbase_io_access {
  * @buf: array of kbase_io_access
  */
 struct kbase_io_history {
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
 	bool enabled;
-#else
-	u32 enabled;
-#endif
 
 	spinlock_t lock;
 	size_t count;
@@ -305,7 +305,7 @@ struct kbasep_mem_device {
 struct kbase_clk_rate_listener;
 
 /**
- * kbase_clk_rate_listener_on_change_t() - Frequency change callback
+ * typedef kbase_clk_rate_listener_on_change_t() - Frequency change callback
  *
  * @listener:     Clock frequency change listener.
  * @clk_index:    Index of the clock for which the change has occurred.
@@ -355,74 +355,62 @@ struct kbase_clk_rate_trace_manager {
 };
 
 /**
- * Data stored per device for power management.
- *
- * This structure contains data for the power management framework. There is one
- * instance of this structure per device in the system.
+ * struct kbase_pm_device_data - Data stored per device for power management.
+ * @lock: The lock protecting Power Management structures accessed outside of
+ * IRQ.
+ * This lock must also be held whenever the GPU is being powered on or
+ * off.
+ * @active_count: The reference count of active contexts on this device. Note
+ * 	that some code paths keep shaders/the tiler powered whilst this is 0.
+ * 	Use kbase_pm_is_active() instead to check for such cases.
+ * @suspending: Flag indicating suspending/suspended
+ * @gpu_lost: Flag indicating gpu lost
+ * 	This structure contains data for the power management framework. There
+ * 	is one instance of this structure per device in the system.
+ * @zero_active_count_wait: Wait queue set when active_count == 0
+ * @resume_wait: system resume of GPU device.
+ * @debug_core_mask: Bit masks identifying the available shader cores that are
+ * 	specified via sysfs. One mask per job slot.
+ * @debug_core_mask_all: Bit masks identifying the available shader cores that
+ * 	are specified via sysfs.
+ * @callback_power_runtime_init: Callback for initializing the runtime power
+ * 	management. Return 0 on success, else error code
+ * @callback_power_runtime_term: Callback for terminating the runtime power
+ * 	management.
+ * @dvfs_period: Time in milliseconds between each dvfs sample
+ * @backend: KBase PM backend data
+ * @arb_vm_state: The state of the arbiter VM machine
+ * @gpu_users_waiting: Used by virtualization to notify the arbiter that there
+ * 	are users waiting for the GPU so that it can request and resume the
+ * 	driver.
+ * @clk_rtm: The state of the GPU clock rate trace manager
  */
 struct kbase_pm_device_data {
-	/**
-	 * The lock protecting Power Management structures accessed outside of
-	 * IRQ.
-	 *
-	 * This lock must also be held whenever the GPU is being powered on or
-	 * off.
-	 */
 	struct mutex lock;
-
-	/**
-	 * The reference count of active contexts on this device. Note that
-	 * some code paths keep shaders/the tiler powered whilst this is 0. Use
-	 * kbase_pm_is_active() instead to check for such cases.
-	 */
 	int active_count;
-	/** Flag indicating suspending/suspended */
 	bool suspending;
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
-	/* Flag indicating gpu lost */
 	atomic_t gpu_lost;
 #endif /* CONFIG_MALI_ARBITER_SUPPORT */
-	/* Wait queue set when active_count == 0 */
 	wait_queue_head_t zero_active_count_wait;
+	wait_queue_head_t resume_wait;
 
-	/**
-	 * Bit masks identifying the available shader cores that are specified
-	 * via sysfs. One mask per job slot.
-	 */
+#if MALI_USE_CSF
+	u64 debug_core_mask;
+#else
+	/* One mask per job slot. */
 	u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS];
 	u64 debug_core_mask_all;
+#endif /* MALI_USE_CSF */
 
-	/**
-	 * Callback for initializing the runtime power management.
-	 *
-	 * @param kbdev The kbase device
-	 *
-	 * @return 0 on success, else error code
-	 */
-	 int (*callback_power_runtime_init)(struct kbase_device *kbdev);
-
-	/**
-	 * Callback for terminating the runtime power management.
-	 *
-	 * @param kbdev The kbase device
-	 */
+	int (*callback_power_runtime_init)(struct kbase_device *kbdev);
 	void (*callback_power_runtime_term)(struct kbase_device *kbdev);
-
-	/* Time in milliseconds between each dvfs sample */
 	u32 dvfs_period;
-
 	struct kbase_pm_backend_data backend;
-
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
-	/**
-	 * The state of the arbiter VM machine
-	 */
 	struct kbase_arbiter_vm_state *arb_vm_state;
+	atomic_t gpu_users_waiting;
 #endif /* CONFIG_MALI_ARBITER_SUPPORT */
-
-	/**
-	 * The state of the GPU clock rate trace manager
-	 */
 	struct kbase_clk_rate_trace_manager clk_rtm;
 };
 
@@ -563,7 +551,6 @@ struct kbase_mmu_mode {
 	unsigned long flags;
 };
 
-struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void);
 struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void);
 
 #define DEVNAME_SIZE	16
@@ -633,8 +620,8 @@ struct kbase_process {
  *                         issues present in the GPU.
  * @hw_quirks_mmu:         Configuration to be used for the MMU as per the HW
  *                         issues present in the GPU.
- * @hw_quirks_jm:          Configuration to be used for the Job Manager as per
- *                         the HW issues present in the GPU.
+ * @hw_quirks_gpu:         Configuration to be used for the Job Manager or CSF/MCU
+ *                         subsystems as per the HW issues present in the GPU.
  * @entry:                 Links the device instance to the global list of GPU
  *                         devices. The list would have as many entries as there
  *                         are GPU device instances.
@@ -651,6 +638,8 @@ struct kbase_process {
  * @irqs:                  Array containing IRQ resource info for 3 types of
  *                         interrupts : Job scheduling, MMU & GPU events (like
  *                         power management, cache etc.)
+ * @irqs.irq:              irq number
+ * @irqs.flags:            irq flags
  * @clocks:                Pointer to the input clock resources referenced by
  *                         the GPU device node.
  * @nr_clocks:             Number of clocks set in the clocks array.
@@ -684,6 +673,7 @@ struct kbase_process {
  *                         accesses made by the driver.
  * @pm:                    Per device object for storing data for power management
  *                         framework.
+ * @csf:                   CSF object for the GPU device.
  * @js_data:               Per device object encapsulating the current context of
  *                         Job Scheduler, which is global to the device and is not
  *                         tied to any particular struct kbase_context running on
@@ -711,11 +701,21 @@ struct kbase_process {
  * @disjoint_event:        struct for keeping track of the disjoint information,
  *                         that whether the GPU is in a disjoint state and the
  *                         number of disjoint events that have occurred on GPU.
+ * @disjoint_event.count:  disjoint event count
+ * @disjoint_event.state:  disjoint event state
  * @nr_hw_address_spaces:  Number of address spaces actually available in the
  *                         GPU, remains constant after driver initialisation.
  * @nr_user_address_spaces: Number of address spaces available to user contexts
+ * @hwcnt_backend_csf_if_fw: Firmware interface to access CSF GPU performance
+ *                         counters.
  * @hwcnt:                  Structure used for instrumentation and HW counters
  *                         dumping
+ * @hwcnt.lock:            The lock should be used when accessing any of the
+ *                         following members
+ * @hwcnt.kctx:            kbase context
+ * @hwcnt.addr:            HW counter address
+ * @hwcnt.addr_bytes:      HW counter size in bytes
+ * @hwcnt.backend:         Kbase instrumentation backend
  * @hwcnt_gpu_iface:       Backend interface for GPU hardware counter access.
  * @hwcnt_gpu_ctx:         Context for GPU hardware counter access.
  *                         @hwaccess_lock must be held when calling
@@ -726,6 +726,7 @@ struct kbase_process {
  *                         are enabled. If zero, there is no timeline client and
  *                         therefore timeline is disabled.
  * @timeline:              Timeline context created per device.
+ * @ktrace:                kbase device's ktrace
  * @trace_lock:            Lock to serialize the access to trace buffer.
  * @trace_first_out:       Index/offset in the trace buffer at which the first
  *                         unread message is present.
@@ -751,6 +752,8 @@ struct kbase_process {
  *                         including any contexts that might be created for
  *                         hardware counters.
  * @kctx_list_lock:        Lock protecting concurrent accesses to @kctx_list.
+ * @group_max_uid_in_devices: Max value of any queue group UID in any kernel
+ *                            context in the kbase device.
  * @devfreq_profile:       Describes devfreq profile for the Mali GPU device, passed
  *                         to devfreq_add_device() to add devfreq feature to Mali
  *                         GPU device.
@@ -779,6 +782,7 @@ struct kbase_process {
  *                         table in devicetree.
  * @num_opps:              Number of operating performance points available for the Mali
  *                         GPU device.
+ * @last_devfreq_metrics:  last PM metrics
  * @devfreq_queue:         Per device object for storing data that manages devfreq
  *                         suspend & resume request queue and the related items.
  * @devfreq_cooling:       Pointer returned on registering devfreq cooling device
@@ -789,6 +793,17 @@ struct kbase_process {
  *                         previously entered protected mode.
  * @ipa:                   Top level structure for IPA, containing pointers to both
  *                         configured & fallback models.
+ * @ipa.lock:              Access to this struct must be with ipa.lock held
+ * @ipa.configured_model:  ipa model to use
+ * @ipa.fallback_model:    ipa fallback model
+ * @ipa.last_metrics:      Values of the PM utilization metrics from last time
+ *                         the power model was invoked. The utilization is
+ *                         calculated as the difference between last_metrics
+ *                         and the current values.
+ * @ipa.force_fallback_model: true if use of fallback model has been forced by
+ *                            the User
+ * @ipa.last_sample_time:  Records the time when counters, used for dynamic
+ *                         energy estimation, were last sampled.
  * @previous_frequency:    Previous frequency of GPU clock used for
  *                         BASE_HW_ISSUE_GPU2017_1336 workaround, This clock is
  *                         restored when L2 is powered on.
@@ -797,6 +812,7 @@ struct kbase_process {
  * @mali_debugfs_directory: Root directory for the debugfs files created by the driver
  * @debugfs_ctx_directory: Directory inside the @mali_debugfs_directory containing
  *                         a sub-directory for every context.
+ * @debugfs_instr_directory: Instrumentation debugfs directory
  * @debugfs_as_read_bitmap: bitmap of address spaces for which the bus or page fault
  *                         has occurred.
  * @job_fault_wq:          Waitqueue to block the job fault dumping daemon till the
@@ -813,6 +829,8 @@ struct kbase_process {
  * @job_fault_event_lock:  Lock to protect concurrent accesses to @job_fault_event_list
  * @regs_dump_debugfs_data: Contains the offset of register to be read through debugfs
  *                         file "read_register".
+ * @regs_dump_debugfs_data.reg_offset: Contains the offset of register to be
+ *                         read through debugfs file "read_register".
  * @ctx_num:               Total number of contexts created for the device.
  * @io_history:            Pointer to an object keeping a track of all recent
  *                         register accesses. The history of register accesses
@@ -871,6 +889,8 @@ struct kbase_process {
  *                          Job Scheduler
  * @l2_size_override:       Used to set L2 cache size via device tree blob
  * @l2_hash_override:       Used to set L2 cache hash via device tree blob
+ * @l2_hash_values_override: true if @l2_hash_values is valid.
+ * @l2_hash_values:         Used to set L2 asn_hash via device tree blob
  * @process_root:           rb_tree root node for maintaining a rb_tree of
  *                          kbase_process based on key tgid(thread group ID).
  * @dma_buf_root:           rb_tree root node for maintaining a rb_tree of
@@ -885,12 +905,20 @@ struct kbase_process {
  * @gpu_mem_usage_lock:     This spinlock should be held while accounting
  *                          @total_gpu_pages for both native and dma-buf imported
  *                          allocations.
+ * @dummy_job_wa:           struct for dummy job execution workaround for the
+ *                          GPU hang issue
+ * @dummy_job_wa.ctx:       dummy job workaround context
+ * @dummy_job_wa.jc:        dummy job workaround job
+ * @dummy_job_wa.slot:      dummy job workaround slot
+ * @dummy_job_wa.flags:     dummy job workaround flags
+ * @arb:                    Pointer to the arbiter device
+ * @pcm_dev:                The priority control manager device.
  */
 struct kbase_device {
 	u32 hw_quirks_sc;
 	u32 hw_quirks_tiler;
 	u32 hw_quirks_mmu;
-	u32 hw_quirks_jm;
+	u32 hw_quirks_gpu;
 
 	struct list_head entry;
 	struct device *dev;
@@ -953,8 +981,10 @@ struct kbase_device {
 	s8 nr_hw_address_spaces;
 	s8 nr_user_address_spaces;
 
+#if MALI_USE_CSF
+	struct kbase_hwcnt_backend_csf_if hwcnt_backend_csf_if_fw;
+#else
 	struct kbase_hwcnt {
-		/* The lock should be used when accessing any of the following members */
 		spinlock_t lock;
 
 		struct kbase_context *kctx;
@@ -963,6 +993,7 @@ struct kbase_device {
 
 		struct kbase_instr_backend backend;
 	} hwcnt;
+#endif
 
 	struct kbase_hwcnt_backend_interface hwcnt_gpu_iface;
 	struct kbase_hwcnt_context *hwcnt_gpu_ctx;
@@ -985,6 +1016,7 @@ struct kbase_device {
 
 	struct list_head        kctx_list;
 	struct mutex            kctx_list_lock;
+	atomic_t                group_max_uid_in_devices;
 
 #ifdef CONFIG_MALI_BIFROST_DEVFREQ
 	struct devfreq_dev_profile devfreq_profile;
@@ -998,16 +1030,10 @@ struct kbase_device {
 	struct kbasep_pm_metrics last_devfreq_metrics;
 	struct monitor_dev_info *mdev_info;
 	struct ipa_power_model_data *model_data;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
 	struct kbase_devfreq_queue_info devfreq_queue;
-#endif
 
 #ifdef CONFIG_DEVFREQ_THERMAL
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
-	struct devfreq_cooling_device *devfreq_cooling;
-#else
 	struct thermal_cooling_device *devfreq_cooling;
-#endif
 	bool ipa_protection_mode_switched;
 	struct {
 		/* Access to this struct must be with ipa.lock held */
@@ -1020,11 +1046,13 @@ struct kbase_device {
 		 * the difference between last_metrics and the current values.
 		 */
 		struct kbasep_pm_metrics last_metrics;
-		/* Model data to pass to ipa_gpu_active/idle() */
-		struct kbase_ipa_model_vinstr_data *model_data;
 
 		/* true if use of fallback model has been forced by the User */
 		bool force_fallback_model;
+		/* Records the time when counters, used for dynamic energy
+		 * estimation, were last sampled.
+		 */
+		ktime_t last_sample_time;
 	} ipa;
 #endif /* CONFIG_DEVFREQ_THERMAL */
 #endif /* CONFIG_MALI_BIFROST_DEVFREQ */
@@ -1049,7 +1077,7 @@ struct kbase_device {
 
 #if !MALI_CUSTOMER_RELEASE
 	struct {
-		u16 reg_offset;
+		u32 reg_offset;
 	} regs_dump_debugfs_data;
 #endif /* !MALI_CUSTOMER_RELEASE */
 #endif /* CONFIG_DEBUG_FS */
@@ -1066,8 +1094,7 @@ struct kbase_device {
 
 	bool poweroff_pending;
 
-
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+#if (KERNEL_VERSION(4, 4, 0) <= LINUX_VERSION_CODE)
 	bool infinite_cache_active_default;
 #else
 	u32 infinite_cache_active_default;
@@ -1096,9 +1123,6 @@ struct kbase_device {
 
 	struct work_struct protected_mode_hwcnt_disable_work;
 
-#ifdef CONFIG_MALI_BUSLOG
-	struct bus_logger_client *buslogger;
-#endif
 
 	bool irq_reset_flush;
 
@@ -1110,9 +1134,11 @@ struct kbase_device {
 
 	u8 l2_size_override;
 	u8 l2_hash_override;
+	bool l2_hash_values_override;
+	u32 l2_hash_values[ASN_HASH_COUNT];
 
 #if MALI_USE_CSF
-	/* Command-stream front-end for the device. */
+	/* CSF object for the GPU device. */
 	struct kbase_csf_device csf;
 #else
 	struct kbasep_js_device_data js_data;
@@ -1144,9 +1170,10 @@ struct kbase_device {
 	} dummy_job_wa;
 
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
-		/* Pointer to the arbiter device */
 		struct kbase_arbiter_device arb;
 #endif
+	/* Priority Control Manager device */
+	struct priority_control_manager_device *pcm_dev;
 };
 
 /**
@@ -1198,7 +1225,92 @@ struct kbase_file {
 	unsigned long         api_version;
 	atomic_t              setup_state;
 };
-
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+/**
+ * enum kbase_context_flags - Flags for kbase contexts
+ *
+ * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit
+ * process on a 64-bit kernel.
+ *
+ * @KCTX_RUNNABLE_REF: Set when context is counted in
+ * kbdev->js_data.nr_contexts_runnable. Must hold queue_mutex when accessing.
+ *
+ * @KCTX_ACTIVE: Set when the context is active.
+ *
+ * @KCTX_PULLED: Set when last kick() caused atoms to be pulled from this
+ * context.
+ *
+ * @KCTX_MEM_PROFILE_INITIALIZED: Set when the context's memory profile has been
+ * initialized.
+ *
+ * @KCTX_INFINITE_CACHE: Set when infinite cache is to be enabled for new
+ * allocations. Existing allocations will not change.
+ *
+ * @KCTX_SUBMIT_DISABLED: Set to prevent context from submitting any jobs.
+ *
+ * @KCTX_PRIVILEGED:Set if the context uses an address space and should be kept
+ * scheduled in.
+ *
+ * @KCTX_SCHEDULED: Set when the context is scheduled on the Run Pool.
+ * This is only ever updated whilst the jsctx_mutex is held.
+ *
+ * @KCTX_DYING: Set when the context process is in the process of being evicted.
+ *
+ * @KCTX_NO_IMPLICIT_SYNC: Set when explicit Android fences are in use on this
+ * context, to disable use of implicit dma-buf fences. This is used to avoid
+ * potential synchronization deadlocks.
+ *
+ * @KCTX_FORCE_SAME_VA: Set when BASE_MEM_SAME_VA should be forced on memory
+ * allocations. For 64-bit clients it is enabled by default, and disabled by
+ * default on 32-bit clients. Being able to clear this flag is only used for
+ * testing purposes of the custom zone allocation on 64-bit user-space builds,
+ * where we also require more control than is available through e.g. the JIT
+ * allocation mechanism. However, the 64-bit user-space client must still
+ * reserve a JIT region using KBASE_IOCTL_MEM_JIT_INIT
+ *
+ * @KCTX_PULLED_SINCE_ACTIVE_JS0: Set when the context has had an atom pulled
+ * from it for job slot 0. This is reset when the context first goes active or
+ * is re-activated on that slot.
+ *
+ * @KCTX_PULLED_SINCE_ACTIVE_JS1: Set when the context has had an atom pulled
+ * from it for job slot 1. This is reset when the context first goes active or
+ * is re-activated on that slot.
+ *
+ * @KCTX_PULLED_SINCE_ACTIVE_JS2: Set when the context has had an atom pulled
+ * from it for job slot 2. This is reset when the context first goes active or
+ * is re-activated on that slot.
+ *
+ * @KCTX_AS_DISABLED_ON_FAULT: Set when the GPU address space is disabled for
+ * the context due to unhandled page(or bus) fault. It is cleared when the
+ * refcount for the context drops to 0 or on when the address spaces are
+ * re-enabled on GPU reset or power cycle.
+ *
+ * @KCTX_JPL_ENABLED: Set when JIT physical page limit is less than JIT virtual
+ * address page limit, so we must take care to not exceed the physical limit
+ *
+ * All members need to be separate bits. This enum is intended for use in a
+ * bitmask where multiple values get OR-ed together.
+ */
+enum kbase_context_flags {
+	KCTX_COMPAT = 1U << 0,
+	KCTX_RUNNABLE_REF = 1U << 1,
+	KCTX_ACTIVE = 1U << 2,
+	KCTX_PULLED = 1U << 3,
+	KCTX_MEM_PROFILE_INITIALIZED = 1U << 4,
+	KCTX_INFINITE_CACHE = 1U << 5,
+	KCTX_SUBMIT_DISABLED = 1U << 6,
+	KCTX_PRIVILEGED = 1U << 7,
+	KCTX_SCHEDULED = 1U << 8,
+	KCTX_DYING = 1U << 9,
+	KCTX_NO_IMPLICIT_SYNC = 1U << 10,
+	KCTX_FORCE_SAME_VA = 1U << 11,
+	KCTX_PULLED_SINCE_ACTIVE_JS0 = 1U << 12,
+	KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13,
+	KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14,
+	KCTX_AS_DISABLED_ON_FAULT = 1U << 15,
+	KCTX_JPL_ENABLED = 1U << 16,
+};
+#else
 /**
  * enum kbase_context_flags - Flags for kbase contexts
  *
@@ -1278,14 +1390,8 @@ enum kbase_context_flags {
 	KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13,
 	KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14,
 	KCTX_AS_DISABLED_ON_FAULT = 1U << 15,
-#if MALI_JIT_PRESSURE_LIMIT_BASE
-	/*
-	 * Set when JIT physical page limit is less than JIT virtual address
-	 * page limit, so we must take care to not exceed the physical limit
-	 */
-	KCTX_JPL_ENABLED = 1U << 16,
-#endif /* !MALI_JIT_PRESSURE_LIMIT_BASE */
 };
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 
 struct kbase_sub_alloc {
 	struct list_head link;
@@ -1293,6 +1399,21 @@ struct kbase_sub_alloc {
 	DECLARE_BITMAP(sub_pages, SZ_2M / SZ_4K);
 };
 
+/**
+ * struct kbase_reg_zone - Information about GPU memory region zones
+ * @base_pfn: Page Frame Number in GPU virtual address space for the start of
+ *            the Zone
+ * @va_size_pages: Size of the Zone in pages
+ *
+ * Track information about a zone KBASE_REG_ZONE() and related macros.
+ * In future, this could also store the &rb_root that are currently in
+ * &kbase_context
+ */
+struct kbase_reg_zone {
+	u64 base_pfn;
+	u64 va_size_pages;
+};
+
 /**
  * struct kbase_context - Kernel base context
  *
@@ -1343,6 +1464,7 @@ struct kbase_sub_alloc {
  * @reg_rbtree_exec:      RB tree of the memory regions allocated from the EXEC_VA
  *                        zone of the GPU virtual address space. Used for GPU-executable
  *                        allocations which don't need the SAME_VA property.
+ * @reg_zone:             Zone information for the reg_rbtree_<...> members.
  * @cookies:              Bitmask containing of BITS_PER_LONG bits, used mainly for
  *                        SAME_VA allocations to defer the reservation of memory region
  *                        (from the GPU virtual address space) from base_mem_alloc
@@ -1367,6 +1489,7 @@ struct kbase_sub_alloc {
  *                        which actually created the context. This is usually,
  *                        but not necessarily, the same as the thread which
  *                        opened the device file /dev/malixx instance.
+ * @csf:                  kbase csf context
  * @jctx:                 object encapsulating all the Job dispatcher related state,
  *                        including the array of atoms.
  * @used_pages:           Keeps a track of the number of 4KB physical pages in use
@@ -1393,6 +1516,8 @@ struct kbase_sub_alloc {
  *                        waiting atoms and the waitqueue to process the work item
  *                        queued for the atoms blocked on the signaling of dma-buf
  *                        fences.
+ * @dma_fence.waiting_resource: list head for the list of dma-buf fence
+ * @dma_fence.wq:         waitqueue to process the work item queued
  * @as_nr:                id of the address space being used for the scheduled in
  *                        context. This is effectively part of the Run Pool, because
  *                        it only has a valid setting (!=KBASEP_AS_NR_INVALID) whilst
@@ -1418,9 +1543,6 @@ struct kbase_sub_alloc {
  *                        created the context. Used for accounting the physical
  *                        pages used for GPU allocations, done for the context,
  *                        to the memory consumed by the process.
- * @same_va_end:          End address of the SAME_VA zone (in 4KB page units)
- * @exec_va_start:        Start address of the EXEC_VA zone (in 4KB page units)
- *                        or U64_MAX if the EXEC_VA zone is uninitialized.
  * @gpu_va_end:           End address of the GPU va space (in 4KB page units)
  * @jit_va:               Indicates if a JIT_VA zone has been created.
  * @mem_profile_data:     Buffer containing the profiling information provided by
@@ -1547,6 +1669,10 @@ struct kbase_sub_alloc {
  * @atoms_count:          Number of GPU atoms currently in use, per priority
  * @create_flags:         Flags used in context creation.
  * @kinstr_jm:            Kernel job manager instrumentation context handle
+ * @tl_kctx_list_node:    List item into the device timeline's list of
+ *                        contexts, for timeline summarization.
+ * @limited_core_mask:    The mask that is applied to the affinity in case of atoms
+ *                        marked with BASE_JD_REQ_LIMITED_CORE_MASK.
  *
  * A kernel base context is an entity among which the GPU is scheduled.
  * Each context has its own GPU address space.
@@ -1584,6 +1710,7 @@ struct kbase_context {
 	struct rb_root reg_rbtree_same;
 	struct rb_root reg_rbtree_custom;
 	struct rb_root reg_rbtree_exec;
+	struct kbase_reg_zone reg_zone[KBASE_REG_ZONE_MAX];
 
 #if MALI_USE_CSF
 	struct kbase_csf_context csf;
@@ -1637,8 +1764,6 @@ struct kbase_context {
 
 	spinlock_t         mm_update_lock;
 	struct mm_struct __rcu *process_mm;
-	u64 same_va_end;
-	u64 exec_va_start;
 	u64 gpu_va_end;
 	bool jit_va;
 
@@ -1693,6 +1818,9 @@ struct kbase_context {
 #if !MALI_USE_CSF
 	struct kbase_kinstr_jm *kinstr_jm;
 #endif
+	struct list_head tl_kctx_list_node;
+
+	u64 limited_core_mask;
 };
 
 #ifdef CONFIG_MALI_CINSTR_GWT
@@ -1779,29 +1907,4 @@ static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev)
 /* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */
 #define KBASE_AS_INACTIVE_MAX_LOOPS     100000000
 
-/* JobDescriptorHeader - taken from the architecture specifications, the layout
- * is currently identical for all GPU archs. */
-struct job_descriptor_header {
-	u32 exception_status;
-	u32 first_incomplete_task;
-	u64 fault_pointer;
-	u8 job_descriptor_size : 1;
-	u8 job_type : 7;
-	u8 job_barrier : 1;
-	u8 _reserved_01 : 1;
-	u8 _reserved_1 : 1;
-	u8 _reserved_02 : 1;
-	u8 _reserved_03 : 1;
-	u8 _reserved_2 : 1;
-	u8 _reserved_04 : 1;
-	u8 _reserved_05 : 1;
-	u16 job_index;
-	u16 job_dependency_index_1;
-	u16 job_dependency_index_2;
-	union {
-		u64 _64;
-		u32 _32;
-	} next_job;
-};
-
 #endif				/* _KBASE_DEFS_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_disjoint_events.c b/drivers/gpu/arm/bifrost/mali_kbase_disjoint_events.c
index b5ac414b1223..0b73f558ba06 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_disjoint_events.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_disjoint_events.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2014, 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_dma_fence.c b/drivers/gpu/arm/bifrost/mali_kbase_dma_fence.c
index 1fac5e3e68f1..3bf80ea9601a 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_dma_fence.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_dma_fence.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2011-2016, 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /* Include mali_kbase_dma_fence.h before checking for CONFIG_MALI_BIFROST_DMA_FENCE as
@@ -56,7 +55,7 @@ static int
 kbase_dma_fence_lock_reservations(struct kbase_dma_fence_resv_info *info,
 				  struct ww_acquire_ctx *ctx)
 {
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 0))
+#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
 	struct reservation_object *content_res = NULL;
 #else
 	struct dma_resv *content_res = NULL;
@@ -206,7 +205,7 @@ out:
 }
 
 static void
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 kbase_dma_fence_cb(struct fence *fence, struct fence_cb *cb)
 #else
 kbase_dma_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
@@ -226,7 +225,7 @@ kbase_dma_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
 		kbase_dma_fence_queue_work(katom);
 }
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 0))
+#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
 static int
 kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom,
 					 struct reservation_object *resv,
@@ -238,7 +237,7 @@ kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom,
 					 bool exclusive)
 #endif
 {
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 	struct fence *excl_fence = NULL;
 	struct fence **shared_fences = NULL;
 #else
@@ -302,7 +301,7 @@ out:
 	return err;
 }
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 0))
+#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
 void kbase_dma_fence_add_reservation(struct reservation_object *resv,
 				     struct kbase_dma_fence_resv_info *info,
 				     bool exclusive)
@@ -331,7 +330,7 @@ int kbase_dma_fence_wait(struct kbase_jd_atom *katom,
 			 struct kbase_dma_fence_resv_info *info)
 {
 	int err, i;
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 	struct fence *fence;
 #else
 	struct dma_fence *fence;
@@ -360,7 +359,7 @@ int kbase_dma_fence_wait(struct kbase_jd_atom *katom,
 	}
 
 	for (i = 0; i < info->dma_fence_resv_count; i++) {
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 0))
+#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
 		struct reservation_object *obj = info->resv_objs[i];
 #else
 		struct dma_resv *obj = info->resv_objs[i];
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_dma_fence.h b/drivers/gpu/arm/bifrost/mali_kbase_dma_fence.h
index 3ac8186328a1..bedc8c0c5907 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_dma_fence.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_dma_fence.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
  * (C) COPYRIGHT 2010-2016, 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_DMA_FENCE_H_
@@ -44,7 +43,7 @@ struct kbase_context;
  * reservation objects.
  */
 struct kbase_dma_fence_resv_info {
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 0))
+#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
 	struct reservation_object **resv_objs;
 #else
 	struct dma_resv **resv_objs;
@@ -63,7 +62,7 @@ struct kbase_dma_fence_resv_info {
  * reservation_objects. At the same time keeps track of which objects require
  * exclusive access in dma_fence_excl_bitmap.
  */
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 0))
+#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
 void kbase_dma_fence_add_reservation(struct reservation_object *resv,
 				     struct kbase_dma_fence_resv_info *info,
 				     bool exclusive);
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.c b/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.c
index a5a7ad744a8e..e7a87812aafa 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.h b/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.h
index e19495055b48..4f3c2275ed44 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
  * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_DUMMY_JOB_WORKAROUND_
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_dvfs_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_dvfs_debugfs.c
new file mode 100644
index 000000000000..ce23ede7e512
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/mali_kbase_dvfs_debugfs.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "mali_kbase_dvfs_debugfs.h"
+#include <mali_kbase.h>
+#include <linux/seq_file.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+/**
+ * kbasep_dvfs_utilization_debugfs_show() - Print the DVFS utilization info
+ *
+ * @file: The seq_file for printing to
+ * @data: The debugfs dentry private data, a pointer to kbase_context
+ *
+ * Return: Negative error code or 0 on success.
+ */
+static int kbasep_dvfs_utilization_debugfs_show(struct seq_file *file, void *data)
+{
+	struct kbase_device *kbdev = file->private;
+
+#if MALI_USE_CSF
+	seq_printf(file, "busy_time: %u idle_time: %u protm_time: %u\n",
+		   kbdev->pm.backend.metrics.values.time_busy,
+		   kbdev->pm.backend.metrics.values.time_idle,
+		   kbdev->pm.backend.metrics.values.time_in_protm);
+#else
+	seq_printf(file, "busy_time: %u idle_time: %u\n",
+		   kbdev->pm.backend.metrics.values.time_busy,
+		   kbdev->pm.backend.metrics.values.time_idle);
+#endif
+
+	return 0;
+}
+
+static int kbasep_dvfs_utilization_debugfs_open(struct inode *in,
+						struct file *file)
+{
+	return single_open(file, kbasep_dvfs_utilization_debugfs_show,
+			   in->i_private);
+}
+
+static const struct file_operations kbasep_dvfs_utilization_debugfs_fops = {
+	.open = kbasep_dvfs_utilization_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void kbase_dvfs_status_debugfs_init(struct kbase_device *kbdev)
+{
+	struct dentry *file;
+#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE)
+	const mode_t mode = 0444;
+#else
+	const mode_t mode = 0400;
+#endif
+
+	if (WARN_ON(!kbdev || IS_ERR_OR_NULL(kbdev->mali_debugfs_directory)))
+		return;
+
+	file = debugfs_create_file("dvfs_utilization", mode,
+				   kbdev->mali_debugfs_directory, kbdev,
+				   &kbasep_dvfs_utilization_debugfs_fops);
+
+	if (IS_ERR_OR_NULL(file)) {
+		dev_warn(kbdev->dev,
+			 "Unable to create dvfs debugfs entry");
+	}
+}
+
+#else
+/*
+ * Stub functions for when debugfs is disabled
+ */
+void kbase_dvfs_status_debugfs_init(struct kbase_device *kbdev)
+{
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_dvfs_debugfs.h b/drivers/gpu/arm/bifrost/mali_kbase_dvfs_debugfs.h
new file mode 100644
index 000000000000..080331f8bdf5
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/mali_kbase_dvfs_debugfs.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_DVFS_DEBUGFS_H_
+#define _KBASE_DVFS_DEBUGFS_H_
+
+/* Forward declaration */
+struct kbase_device;
+
+/**
+ * kbase_dvfs_status_debugfs_init() - Create a debugfs entry for DVFS queries
+ *
+ * @kbdev: Pointer to the GPU device for which to create the debugfs entry
+ */
+void kbase_dvfs_status_debugfs_init(struct kbase_device *kbdev);
+
+#endif /* _KBASE_DVFS_DEBUGFS_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_event.c b/drivers/gpu/arm/bifrost/mali_kbase_event.c
index 5adb80f9bbd2..25a379d9f0e1 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_event.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_event.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2010-2016,2018-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016,2018-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 #include <mali_kbase.h>
 #include <mali_kbase_debug.h>
 #include <tl/mali_kbase_tracepoints.h>
@@ -45,7 +42,7 @@ static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, stru
 	KBASE_TLSTREAM_TL_DEL_ATOM(kbdev, katom);
 
 	katom->status = KBASE_JD_ATOM_STATE_UNUSED;
-	dev_dbg(kbdev->dev, "Atom %p status to unused\n", (void *)katom);
+	dev_dbg(kbdev->dev, "Atom %pK status to unused\n", (void *)katom);
 	wake_up(&katom->completed);
 
 	return data;
@@ -82,7 +79,7 @@ int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *ueve
 
 	mutex_unlock(&ctx->event_mutex);
 
-	dev_dbg(ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom);
+	dev_dbg(ctx->kbdev->dev, "event dequeuing %pK\n", (void *)atom);
 	uevent->event_code = atom->event_code;
 
 	uevent->atom_number = (atom - ctx->jctx.atoms);
@@ -154,7 +151,8 @@ static int kbase_event_coalesce(struct kbase_context *kctx)
 	const int event_count = kctx->event_coalesce_count;
 
 	/* Join the list of pending events onto the tail of the main list
-	   and reset it */
+	 * and reset it
+	 */
 	list_splice_tail_init(&kctx->event_coalesce_list, &kctx->event_list);
 	kctx->event_coalesce_count = 0;
 
@@ -166,11 +164,11 @@ void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom)
 {
 	struct kbase_device *kbdev = ctx->kbdev;
 
-	dev_dbg(kbdev->dev, "Posting event for atom %p\n", (void *)atom);
+	dev_dbg(kbdev->dev, "Posting event for atom %pK\n", (void *)atom);
 
 	if (WARN_ON(atom->status != KBASE_JD_ATOM_STATE_COMPLETED)) {
 		dev_warn(kbdev->dev,
-				"%s: Atom %d (%p) not completed (status %d)\n",
+				"%s: Atom %d (%pK) not completed (status %d)\n",
 				__func__,
 				kbase_jd_atom_id(atom->kctx, atom),
 				atom->kctx,
@@ -237,7 +235,7 @@ int kbase_event_init(struct kbase_context *kctx)
 	kctx->event_coalesce_count = 0;
 	kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1);
 
-	if (NULL == kctx->event_workq)
+	if (kctx->event_workq == NULL)
 		return -EINVAL;
 
 	return 0;
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence.c b/drivers/gpu/arm/bifrost/mali_kbase_fence.c
index 5e04acf87892..d65b4ebf2535 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_fence.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_fence.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <linux/atomic.h>
@@ -29,7 +28,7 @@
 /* Spin lock protecting all Mali fences as fence->lock. */
 static DEFINE_SPINLOCK(kbase_fence_lock);
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 struct fence *
 kbase_fence_out_new(struct kbase_jd_atom *katom)
 #else
@@ -37,7 +36,7 @@ struct dma_fence *
 kbase_fence_out_new(struct kbase_jd_atom *katom)
 #endif
 {
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 	struct fence *fence;
 #else
 	struct dma_fence *fence;
@@ -98,7 +97,7 @@ kbase_fence_free_callbacks(struct kbase_jd_atom *katom)
 	return res;
 }
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 int
 kbase_fence_add_callback(struct kbase_jd_atom *katom,
 			 struct fence *fence,
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence.h b/drivers/gpu/arm/bifrost/mali_kbase_fence.h
index f319d9e1dce6..ff7dc8144a50 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_fence.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_fence.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
  * (C) COPYRIGHT 2010-2018, 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_FENCE_H_
@@ -35,7 +34,7 @@
 #include "mali_kbase_fence_defs.h"
 #include "mali_kbase.h"
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 extern const struct fence_ops kbase_fence_ops;
 #else
 extern const struct dma_fence_ops kbase_fence_ops;
@@ -49,7 +48,7 @@ extern const struct dma_fence_ops kbase_fence_ops;
 * @node:     List head for linking this callback to the katom
 */
 struct kbase_fence_cb {
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 	struct fence_cb fence_cb;
 	struct fence *fence;
 #else
@@ -66,7 +65,7 @@ struct kbase_fence_cb {
  *
  * return: A new fence object on success, NULL on failure.
  */
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 struct fence *kbase_fence_out_new(struct kbase_jd_atom *katom);
 #else
 struct dma_fence *kbase_fence_out_new(struct kbase_jd_atom *katom);
@@ -169,7 +168,7 @@ static inline int kbase_fence_out_signal(struct kbase_jd_atom *katom,
  * Return: 0 on success: fence was either already signaled, or callback was
  * set up. Negative error code is returned on error.
  */
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 int kbase_fence_add_callback(struct kbase_jd_atom *katom,
 			     struct fence *fence,
 			     fence_func_t callback);
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence_defs.h b/drivers/gpu/arm/bifrost/mali_kbase_fence_defs.h
index 303029639d38..006512d461f9 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_fence_defs.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_fence_defs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
  * (C) COPYRIGHT 2010-2018, 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_FENCE_DEFS_H_
@@ -30,7 +29,7 @@
 
 #include <linux/version.h>
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 
 #include <linux/fence.h>
 
@@ -53,7 +52,7 @@
 
 #include <linux/dma-fence.h>
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0))
+#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
 #define dma_fence_get_status(a) (dma_fence_is_signaled(a) ? \
 	(a)->status ?: 1 \
 	: 0)
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence_ops.c b/drivers/gpu/arm/bifrost/mali_kbase_fence_ops.c
index c4703748bec6..4712ef4f0606 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_fence_ops.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_fence_ops.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <linux/atomic.h>
@@ -26,7 +25,7 @@
 #include <mali_kbase.h>
 
 static const char *
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 kbase_fence_get_driver_name(struct fence *fence)
 #else
 kbase_fence_get_driver_name(struct dma_fence *fence)
@@ -36,7 +35,7 @@ kbase_fence_get_driver_name(struct dma_fence *fence)
 }
 
 static const char *
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 kbase_fence_get_timeline_name(struct fence *fence)
 #else
 kbase_fence_get_timeline_name(struct dma_fence *fence)
@@ -46,7 +45,7 @@ kbase_fence_get_timeline_name(struct dma_fence *fence)
 }
 
 static bool
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 kbase_fence_enable_signaling(struct fence *fence)
 #else
 kbase_fence_enable_signaling(struct dma_fence *fence)
@@ -56,7 +55,7 @@ kbase_fence_enable_signaling(struct dma_fence *fence)
 }
 
 static void
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 kbase_fence_fence_value_str(struct fence *fence, char *str, int size)
 #else
 kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size)
@@ -69,7 +68,7 @@ kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size)
 #endif
 }
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 const struct fence_ops kbase_fence_ops = {
 	.wait = fence_default_wait,
 #else
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gator.h b/drivers/gpu/arm/bifrost/mali_kbase_gator.h
index 579c7b6ff3aa..180beda91252 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_gator.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_gator.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /* NB taken from gator  */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.c
index 569abd920fde..a10b2bb8a416 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2012-2017, 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2017, 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,24 +17,23 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <mali_kbase.h>
 #include <device/mali_kbase_device.h>
 
 #ifdef CONFIG_DEBUG_FS
-/** Show callback for the @c gpu_memory debugfs file.
+/**
+ * kbasep_gpu_memory_seq_show - Show callback for the @c gpu_memory debugfs file
+ * @sfile: The debugfs entry
+ * @data: Data associated with the entry
  *
  * This function is called to get the contents of the @c gpu_memory debugfs
  * file. This is a report of current gpu memory usage.
  *
- * @param sfile The debugfs entry
- * @param data Data associated with the entry
- *
- * @return 0 if successfully prints data in debugfs entry file
- *         -1 if it encountered an error
+ * Return:
+ * * 0 if successfully prints data in debugfs entry file
+ * * -1 if it encountered an error
  */
 
 static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data)
@@ -54,8 +54,9 @@ static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data)
 		mutex_lock(&kbdev->kctx_list_lock);
 		list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) {
 			/* output the memory usage and cap for each kctx
-			* opened on this device */
-			seq_printf(sfile, "  %s-0x%p %10u\n",
+			* opened on this device
+			*/
+			seq_printf(sfile, "  %s-0x%pK %10u\n",
 				"kctx",
 				kctx,
 				atomic_read(&(kctx->used_pages)));
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.h b/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.h
index a45dabbb680f..023703bf2e1e 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2012-2014, 2016, 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2014, 2016, 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,15 +17,10 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /**
- * @file mali_kbase_gpu_memory_debugfs.h
- * Header file for gpu_memory entry in debugfs
+ * DOC: Header file for gpu_memory entry in debugfs
  *
  */
 
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c
index 020b5d853608..229188b0db9a 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c
@@ -1,12 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -17,12 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /*
  * Base kernel property query APIs
  */
@@ -32,7 +28,7 @@
 #include <mali_kbase_gpuprops.h>
 #include <mali_kbase_hwaccess_gpuprops.h>
 #include <mali_kbase_config_defaults.h>
-#include "mali_kbase_ioctl.h"
+#include <uapi/gpu/arm/bifrost/mali_kbase_ioctl.h>
 #include <linux/clk.h>
 #include <mali_kbase_pm_internal.h>
 #include <linux/of_platform.h>
@@ -48,7 +44,7 @@ static void kbase_gpuprops_construct_coherent_groups(
 	u64 first_set, first_set_prev;
 	u32 num_groups = 0;
 
-	KBASE_DEBUG_ASSERT(NULL != props);
+	KBASE_DEBUG_ASSERT(props != NULL);
 
 	props->coherency_info.coherency = props->raw_props.mem_features;
 	props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present);
@@ -107,6 +103,71 @@ static void kbase_gpuprops_construct_coherent_groups(
 	props->coherency_info.num_groups = num_groups;
 }
 
+/**
+ * kbase_gpuprops_get_curr_config_props - Get the current allocated resources
+ * @kbdev:       The &struct kbase_device structure for the device
+ * @curr_config: The &struct curr_config_props structure to receive the result
+ *
+ * Fill the &struct curr_config_props structure with values from the GPU
+ * configuration registers.
+ *
+ * Return: Zero on success, Linux error code on failure
+ */
+int kbase_gpuprops_get_curr_config_props(struct kbase_device *kbdev,
+	struct curr_config_props * const curr_config)
+{
+	struct kbase_current_config_regdump curr_config_regdump;
+	int err;
+
+	if (WARN_ON(!kbdev) || WARN_ON(!curr_config))
+		return -EINVAL;
+
+	/* If update not needed just return. */
+	if (!curr_config->update_needed)
+		return 0;
+
+	/* Dump relevant registers */
+	err = kbase_backend_gpuprops_get_curr_config(kbdev,
+						     &curr_config_regdump);
+	if (err)
+		return err;
+
+	curr_config->l2_slices =
+		KBASE_UBFX32(curr_config_regdump.mem_features, 8U, 4) + 1;
+
+	curr_config->l2_present =
+		((u64) curr_config_regdump.l2_present_hi << 32) +
+		curr_config_regdump.l2_present_lo;
+
+	curr_config->shader_present =
+		((u64) curr_config_regdump.shader_present_hi << 32) +
+		curr_config_regdump.shader_present_lo;
+
+	curr_config->num_cores = hweight64(curr_config->shader_present);
+
+	curr_config->update_needed = false;
+
+	return 0;
+}
+
+/**
+ * kbase_gpuprops_req_curr_config_update - Request Current Config Update
+ * @kbdev: The &struct kbase_device structure for the device
+ *
+ * Requests the current configuration to be updated next time the
+ * kbase_gpuprops_get_curr_config_props() is called.
+ *
+ * Return: Zero on success, Linux error code on failure
+ */
+int kbase_gpuprops_req_curr_config_update(struct kbase_device *kbdev)
+{
+	if (WARN_ON(!kbdev))
+		return -EINVAL;
+
+	kbdev->gpu_props.curr_config.update_needed = true;
+	return 0;
+}
+
 /**
  * kbase_gpuprops_get_props - Get the GPU configuration
  * @gpu_props: The &struct base_gpu_props structure
@@ -124,8 +185,8 @@ static int kbase_gpuprops_get_props(struct base_gpu_props * const gpu_props,
 	int i;
 	int err;
 
-	KBASE_DEBUG_ASSERT(NULL != kbdev);
-	KBASE_DEBUG_ASSERT(NULL != gpu_props);
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(gpu_props != NULL);
 
 	/* Dump relevant registers */
 	err = kbase_backend_gpuprops_get(kbdev, &regdump);
@@ -166,6 +227,10 @@ static int kbase_gpuprops_get_props(struct base_gpu_props * const gpu_props,
 	gpu_props->raw_props.thread_features = regdump.thread_features;
 	gpu_props->raw_props.thread_tls_alloc = regdump.thread_tls_alloc;
 
+	gpu_props->raw_props.gpu_features =
+		((u64) regdump.gpu_features_hi << 32) +
+		regdump.gpu_features_lo;
+
 	return 0;
 }
 
@@ -182,6 +247,59 @@ void kbase_gpuprops_update_core_props_gpu_id(
 		KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16);
 }
 
+/**
+ * kbase_gpuprops_update_max_config_props - Updates the max config properties in
+ * the base_gpu_props.
+ * @base_props: The &struct base_gpu_props structure
+ * @kbdev:      The &struct kbase_device structure for the device
+ *
+ * Updates the &struct base_gpu_props structure with the max config properties.
+ */
+static void kbase_gpuprops_update_max_config_props(
+	struct base_gpu_props * const base_props, struct kbase_device *kbdev)
+{
+	int l2_n = 0;
+
+	if (WARN_ON(!kbdev) || WARN_ON(!base_props))
+		return;
+
+	/* return if the max_config is not set during arbif initialization */
+	if (kbdev->gpu_props.max_config.core_mask == 0)
+		return;
+
+	/*
+	 * Set the base_props with the maximum config values to ensure that the
+	 * user space will always be based on the maximum resources available.
+	 */
+	base_props->l2_props.num_l2_slices =
+		kbdev->gpu_props.max_config.l2_slices;
+	base_props->raw_props.shader_present =
+		kbdev->gpu_props.max_config.core_mask;
+	/*
+	 * Update l2_present in the raw data to be consistent with the
+	 * max_config.l2_slices number.
+	 */
+	base_props->raw_props.l2_present = 0;
+	for (l2_n = 0; l2_n < base_props->l2_props.num_l2_slices; l2_n++) {
+		base_props->raw_props.l2_present <<= 1;
+		base_props->raw_props.l2_present |= 0x1;
+	}
+	/*
+	 * Update the coherency_info data using just one core group. For
+	 * architectures where the max_config is provided by the arbiter it is
+	 * not necessary to split the shader core groups in different coherent
+	 * groups.
+	 */
+	base_props->coherency_info.coherency =
+		base_props->raw_props.mem_features;
+	base_props->coherency_info.num_core_groups = 1;
+	base_props->coherency_info.num_groups = 1;
+	base_props->coherency_info.group[0].core_mask =
+		kbdev->gpu_props.max_config.core_mask;
+	base_props->coherency_info.group[0].num_cores =
+		hweight32(kbdev->gpu_props.max_config.core_mask);
+}
+
 /**
  * kbase_gpuprops_calculate_props - Calculate the derived properties
  * @gpu_props: The &struct base_gpu_props structure
@@ -195,7 +313,6 @@ static void kbase_gpuprops_calculate_props(
 {
 	int i;
 	u32 gpu_id;
-	u32 product_id;
 
 	/* Populate the base_gpu_props structure */
 	kbase_gpuprops_update_core_props_gpu_id(gpu_props);
@@ -218,7 +335,8 @@ static void kbase_gpuprops_calculate_props(
 
 	/* Field with number of l2 slices is added to MEM_FEATURES register
 	 * since t76x. Below code assumes that for older GPU reserved bits will
-	 * be read as zero. */
+	 * be read as zero.
+	 */
 	gpu_props->l2_props.num_l2_slices =
 		KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1;
 
@@ -251,8 +369,6 @@ static void kbase_gpuprops_calculate_props(
 	 * Workaround for the incorrectly applied THREAD_FEATURES to tDUx.
 	 */
 	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
-	product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
-	product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
 
 #if MALI_USE_CSF
 	gpu_props->thread_props.max_registers =
@@ -299,8 +415,30 @@ static void kbase_gpuprops_calculate_props(
 		gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT;
 		gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT;
 	}
-	/* Initialize the coherent_group structure for each group */
-	kbase_gpuprops_construct_coherent_groups(gpu_props);
+
+	/*
+	 * If the maximum resources allocated information is available it is
+	 * necessary to update the base_gpu_props with the max_config info to
+	 * the userspace. This is applicable to systems that receive this
+	 * information from the arbiter.
+	 */
+	if (kbdev->gpu_props.max_config.core_mask)
+		/* Update the max config properties in the base_gpu_props */
+		kbase_gpuprops_update_max_config_props(gpu_props,
+						       kbdev);
+	else
+		/* Initialize the coherent_group structure for each group */
+		kbase_gpuprops_construct_coherent_groups(gpu_props);
+}
+
+void kbase_gpuprops_set_max_config(struct kbase_device *kbdev,
+	const struct max_config_props *max_config)
+{
+	if (WARN_ON(!kbdev) || WARN_ON(!max_config))
+		return;
+
+	kbdev->gpu_props.max_config.l2_slices = max_config->l2_slices;
+	kbdev->gpu_props.max_config.core_mask = max_config->core_mask;
 }
 
 void kbase_gpuprops_set(struct kbase_device *kbdev)
@@ -308,7 +446,8 @@ void kbase_gpuprops_set(struct kbase_device *kbdev)
 	struct kbase_gpu_props *gpu_props;
 	struct gpu_raw_gpu_props *raw;
 
-	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	if (WARN_ON(!kbdev))
+		return;
 	gpu_props = &kbdev->gpu_props;
 	raw = &gpu_props->props.raw_props;
 
@@ -328,9 +467,19 @@ void kbase_gpuprops_set(struct kbase_device *kbdev)
 	gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8);
 
 	gpu_props->num_cores = hweight64(raw->shader_present);
-	gpu_props->num_core_groups = hweight64(raw->l2_present);
+	gpu_props->num_core_groups =
+		gpu_props->props.coherency_info.num_core_groups;
 	gpu_props->num_address_spaces = hweight32(raw->as_present);
 	gpu_props->num_job_slots = hweight32(raw->js_present);
+
+	/*
+	 * Current configuration is used on HW interactions so that the maximum
+	 * config is just used for user space avoiding interactions with parts
+	 * of the hardware that might not be allocated to the kbase instance at
+	 * that moment.
+	 */
+	kbase_gpuprops_req_curr_config_update(kbdev);
+	kbase_gpuprops_get_curr_config_props(kbdev, &gpu_props->curr_config);
 }
 
 int kbase_gpuprops_set_features(struct kbase_device *kbdev)
@@ -368,13 +517,26 @@ int kbase_gpuprops_set_features(struct kbase_device *kbdev)
  * in sysfs.
  */
 static u8 override_l2_size;
-module_param(override_l2_size, byte, 0);
+module_param(override_l2_size, byte, 0000);
 MODULE_PARM_DESC(override_l2_size, "Override L2 size config for testing");
 
 static u8 override_l2_hash;
-module_param(override_l2_hash, byte, 0);
+module_param(override_l2_hash, byte, 0000);
 MODULE_PARM_DESC(override_l2_hash, "Override L2 hash config for testing");
 
+static u32 l2_hash_values[ASN_HASH_COUNT] = {
+	0,
+};
+static int num_override_l2_hash_values;
+module_param_array(l2_hash_values, uint, &num_override_l2_hash_values, 0000);
+MODULE_PARM_DESC(l2_hash_values, "Override L2 hash values config for testing");
+
+enum l2_config_override_result {
+	L2_CONFIG_OVERRIDE_FAIL = -1,
+	L2_CONFIG_OVERRIDE_NONE,
+	L2_CONFIG_OVERRIDE_OK,
+};
+
 /**
  * kbase_read_l2_config_from_dt - Read L2 configuration
  * @kbdev: The kbase device for which to get the L2 configuration.
@@ -383,15 +545,17 @@ MODULE_PARM_DESC(override_l2_hash, "Override L2 hash config for testing");
  * Override values in module parameters take priority over override values in
  * device tree.
  *
- * Return: true if either size or hash was overridden, false if no overrides
- * were found.
+ * Return: L2_CONFIG_OVERRIDE_OK if either size or hash, or both was properly
+ *         overridden, L2_CONFIG_OVERRIDE_NONE if no overrides are provided.
+ *         L2_CONFIG_OVERRIDE_FAIL otherwise.
  */
-static bool kbase_read_l2_config_from_dt(struct kbase_device * const kbdev)
+static enum l2_config_override_result
+kbase_read_l2_config_from_dt(struct kbase_device *const kbdev)
 {
 	struct device_node *np = kbdev->dev->of_node;
 
 	if (!np)
-		return false;
+		return L2_CONFIG_OVERRIDE_NONE;
 
 	if (override_l2_size)
 		kbdev->l2_size_override = override_l2_size;
@@ -403,10 +567,41 @@ static bool kbase_read_l2_config_from_dt(struct kbase_device * const kbdev)
 	else if (of_property_read_u8(np, "l2-hash", &kbdev->l2_hash_override))
 		kbdev->l2_hash_override = 0;
 
-	if (kbdev->l2_size_override || kbdev->l2_hash_override)
-		return true;
+	kbdev->l2_hash_values_override = false;
+	if (num_override_l2_hash_values) {
+		int i;
 
-	return false;
+		kbdev->l2_hash_values_override = true;
+		for (i = 0; i < num_override_l2_hash_values; i++)
+			kbdev->l2_hash_values[i] = l2_hash_values[i];
+	} else if (!of_property_read_u32_array(np, "l2-hash-values",
+					       kbdev->l2_hash_values,
+					       ASN_HASH_COUNT))
+		kbdev->l2_hash_values_override = true;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH) &&
+	    (kbdev->l2_hash_override)) {
+		dev_err(kbdev->dev, "l2-hash not supported\n");
+		return L2_CONFIG_OVERRIDE_FAIL;
+	}
+
+	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH) &&
+	    (kbdev->l2_hash_values_override)) {
+		dev_err(kbdev->dev, "l2-hash-values not supported\n");
+		return L2_CONFIG_OVERRIDE_FAIL;
+	}
+
+	if (kbdev->l2_hash_override && kbdev->l2_hash_values_override) {
+		dev_err(kbdev->dev,
+			"both l2-hash & l2-hash-values not supported\n");
+		return L2_CONFIG_OVERRIDE_FAIL;
+	}
+
+	if (kbdev->l2_size_override || kbdev->l2_hash_override ||
+	    kbdev->l2_hash_values_override)
+		return L2_CONFIG_OVERRIDE_OK;
+
+	return L2_CONFIG_OVERRIDE_NONE;
 }
 
 int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev)
@@ -418,8 +613,25 @@ int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev)
 		struct base_gpu_props *gpu_props = &kbdev->gpu_props.props;
 
 		/* Check for L2 cache size & hash overrides */
-		if (!kbase_read_l2_config_from_dt(kbdev))
-			return 0;
+		switch (kbase_read_l2_config_from_dt(kbdev)) {
+		case L2_CONFIG_OVERRIDE_FAIL:
+			err = -EIO;
+			goto exit;
+		case L2_CONFIG_OVERRIDE_NONE:
+			goto exit;
+		default:
+			break;
+		}
+
+		/* pm.active_count is expected to be 1 here, which is set in
+		 * kbase_hwaccess_pm_powerup().
+		 */
+		WARN_ON(kbdev->pm.active_count != 1);
+		/* The new settings for L2 cache can only be applied when it is
+		 * off, so first do the power down.
+		 */
+		kbase_pm_context_idle(kbdev);
+		kbase_pm_wait_for_desired_state(kbdev);
 
 		/* Need L2 to get powered to reflect to L2_FEATURES */
 		kbase_pm_context_active(kbdev);
@@ -430,21 +642,21 @@ int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev)
 		/* Dump L2_FEATURES register */
 		err = kbase_backend_gpuprops_get_l2_features(kbdev, &regdump);
 		if (err)
-			goto idle_gpu;
+			goto exit;
 
 		dev_info(kbdev->dev, "Reflected L2_FEATURES is 0x%x\n",
-				regdump.l2_features);
+			 regdump.l2_features);
+		dev_info(kbdev->dev, "Reflected L2_CONFIG is 0x%08x\n",
+			 regdump.l2_config);
+
 
 		/* Update gpuprops with reflected L2_FEATURES */
 		gpu_props->raw_props.l2_features = regdump.l2_features;
 		gpu_props->l2_props.log2_cache_size =
 			KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8);
-
-idle_gpu:
-		/* Let GPU idle */
-		kbase_pm_context_idle(kbdev);
 	}
 
+exit:
 	return err;
 }
 
@@ -524,7 +736,7 @@ static struct {
 	PROP(RAW_THREAD_FEATURES,         raw_props.thread_features),
 	PROP(RAW_THREAD_TLS_ALLOC,        raw_props.thread_tls_alloc),
 	PROP(RAW_COHERENCY_MODE,          raw_props.coherency_mode),
-
+	PROP(RAW_GPU_FEATURES,            raw_props.gpu_features),
 	PROP(COHERENCY_NUM_GROUPS,        coherency_info.num_groups),
 	PROP(COHERENCY_NUM_CORE_GROUPS,   coherency_info.num_core_groups),
 	PROP(COHERENCY_COHERENCY,         coherency_info.coherency),
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.h b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.h
index 5eee7948381a..72f76c392fa1 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2015, 2017, 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,29 +17,10 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
- *//* SPDX-License-Identifier: GPL-2.0 */
-/*
- *
- * (C) COPYRIGHT 2011-2015, 2017, 2019-2020 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * A copy of the licence is included with the program, and can also be obtained
- * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA  02110-1301, USA.
- *
  */
 
-
-
 /**
- * @file mali_kbase_gpuprops.h
- * Base kernel property query APIs
+ * DOC: Base kernel property query APIs
  */
 
 #ifndef _KBASE_GPUPROPS_H_
@@ -64,11 +46,10 @@ struct kbase_device;
 	(((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1))
 
 /**
- * @brief Set up Kbase GPU properties.
+ * kbase_gpuprops_set - Set up Kbase GPU properties.
+ * @kbdev: The struct kbase_device structure for the device
  *
  * Set up Kbase GPU properties with information from the GPU registers
- *
- * @param kbdev		The struct kbase_device structure for the device
  */
 void kbase_gpuprops_set(struct kbase_device *kbdev);
 
@@ -89,6 +70,8 @@ int kbase_gpuprops_set_features(struct kbase_device *kbdev);
  * @kbdev:   Device pointer
  *
  * This function updates l2_features and the log2 cache size.
+ * The function expects GPU to be powered up and value of pm.active_count
+ * to be 1.
  *
  * Return: Zero on success, Linux error code for failure
  */
@@ -132,4 +115,38 @@ int kbase_device_populate_max_freq(struct kbase_device *kbdev);
 void kbase_gpuprops_update_core_props_gpu_id(
 	struct base_gpu_props * const gpu_props);
 
+/**
+ * kbase_gpuprops_set_max_config - Set the max config information
+ * @kbdev:       Device pointer
+ * @max_config:  Maximum configuration data to be updated
+ *
+ * This function sets max_config in the kbase_gpu_props.
+ */
+void kbase_gpuprops_set_max_config(struct kbase_device *kbdev,
+	const struct max_config_props *max_config);
+
+/**
+ * kbase_gpuprops_get_curr_config_props - Get the current allocated resources
+ * @kbdev: The &struct kbase_device structure for the device
+ * @curr_config: The &struct curr_config_props structure to receive the result
+ *
+ * Fill the &struct curr_config_props structure with values from the GPU
+ * configuration registers.
+ *
+ * Return: Zero on success, Linux error code on failure
+ */
+int kbase_gpuprops_get_curr_config_props(struct kbase_device *kbdev,
+	struct curr_config_props * const curr_config);
+
+/**
+ * kbase_gpuprops_req_curr_config_update - Request Current Config Update
+ * @kbdev: The &struct kbase_device structure for the device
+ *
+ * Requests the current configuration to be updated next time the
+ * kbase_gpuprops_get_curr_config_props() is called.
+ *
+ * Return: Zero on success, Linux error code on failure
+ */
+int kbase_gpuprops_req_curr_config_update(struct kbase_device *kbdev);
+
 #endif				/* _KBASE_GPUPROPS_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops_types.h b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops_types.h
index ec6f1c39ccb0..1d101e0a3e0c 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops_types.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops_types.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2011-2018, 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2018, 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,21 +17,16 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /**
- * @file mali_kbase_gpuprops_types.h
- * Base kernel property query APIs
+ * DOC: Base kernel property query APIs
  */
 
 #ifndef _KBASE_GPUPROPS_TYPES_H_
 #define _KBASE_GPUPROPS_TYPES_H_
 
-#include "mali_base_kernel.h"
+#include <uapi/gpu/arm/bifrost/mali_base_kernel.h>
 
 #define KBASE_GPU_SPEED_MHZ    123
 #define KBASE_GPU_PC_SIZE_LOG2 24U
@@ -38,6 +34,7 @@
 struct kbase_gpuprops_regdump {
 	u32 gpu_id;
 	u32 l2_features;
+	u32 l2_config;
 	u32 core_features;
 	u32 tiler_features;
 	u32 mem_features;
@@ -60,6 +57,30 @@ struct kbase_gpuprops_regdump {
 	u32 stack_present_lo;
 	u32 stack_present_hi;
 	u32 coherency_features;
+	u32 gpu_features_lo;
+	u32 gpu_features_hi;
+};
+
+/**
+ * struct kbase_current_config_regdump - Register dump for current resources
+ *                                       allocated to the GPU.
+ * @mem_features: Memory system features. Contains information about the
+ *                features of the memory system. Used here to get the L2 slice
+ *                count.
+ * @shader_present_lo: Shader core present bitmap. Low word.
+ * @shader_present_hi: Shader core present bitmap. High word.
+ * @l2_present_lo: L2 cache present bitmap. Low word.
+ * @l2_present_hi: L2 cache present bitmap. High word.
+ *
+ * Register dump structure used to store the resgisters data realated to the
+ * current resources allocated to the GPU.
+ */
+struct kbase_current_config_regdump {
+	u32 mem_features;
+	u32 shader_present_lo;
+	u32 shader_present_hi;
+	u32 l2_present_lo;
+	u32 l2_present_hi;
 };
 
 struct kbase_gpu_cache_props {
@@ -76,6 +97,50 @@ struct kbase_gpu_mmu_props {
 	u8 pa_bits;
 };
 
+/**
+ * struct max_config_props - Properties based on the maximum resources
+ *                           available.
+ * @l2_slices: Maximum number of L2 slices that can be assinged to the GPU
+ *             during runtime.
+ * @padding:   Padding to a multiple of 64 bits.
+ * @core_mask: Largest core mask bitmap that can be assigned to the GPU during
+ *             runtime.
+ *
+ * Properties based on the maximum resources available (not necessarly
+ * allocated at that moment). Used to provide the maximum configuration to the
+ * userspace allowing the applications to allocate enough resources in case the
+ * real allocated resources change.
+ */
+struct max_config_props {
+	u8 l2_slices;
+	u8 padding[3];
+	u32 core_mask;
+};
+
+/**
+ * struct curr_config_props - Properties based on the current resources
+ *                            allocated to the GPU.
+ * @l2_present:     Current L2 present bitmap that is allocated to the GPU.
+ * @shader_present: Current shader present bitmap that is allocated to the GPU.
+ * @num_cores:      Current number of shader cores allocated to the GPU.
+ * @l2_slices:      Current number of L2 slices allocated to the GPU.
+ * @update_needed:  Defines if it is necessary to re-read the registers to
+ *                  update the current allocated resources.
+ * @padding:        Padding to a multiple of 64 bits.
+ *
+ * Properties based on the current resource available. Used for operations with
+ * hardware interactions to avoid using userspace data that can be based on
+ * the maximum resource available.
+ */
+struct curr_config_props {
+	u64 l2_present;
+	u64 shader_present;
+	u16 num_cores;
+	u8 l2_slices;
+	bool update_needed;
+	u8 padding[4];
+};
+
 struct kbase_gpu_props {
 	/* kernel-only properties */
 	u8 num_cores;
@@ -88,6 +153,12 @@ struct kbase_gpu_props {
 	struct kbase_gpu_mem_props mem;
 	struct kbase_gpu_mmu_props mmu;
 
+	/* Properties based on the current resource available */
+	struct curr_config_props curr_config;
+
+	/* Properties based on the maximum resource available */
+	struct max_config_props max_config;
+
 	/* Properties shared with userspace */
 	struct base_gpu_props props;
 
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gwt.c b/drivers/gpu/arm/bifrost/mali_kbase_gwt.c
index 91dc4dbc0800..93acf8a82b4f 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_gwt.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_gwt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include "mali_kbase_gwt.h"
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gwt.h b/drivers/gpu/arm/bifrost/mali_kbase_gwt.h
index 7e7746e64915..d85833442aa6 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_gwt.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_gwt.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2017, 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,15 +17,13 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #if !defined(_KBASE_GWT_H)
 #define _KBASE_GWT_H
 
 #include <mali_kbase.h>
-#include <mali_kbase_ioctl.h>
+#include <uapi/gpu/arm/bifrost/mali_kbase_ioctl.h>
 
 /**
  * kbase_gpu_gwt_start - Start the GPU write tracking
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hw.c b/drivers/gpu/arm/bifrost/mali_kbase_hw.c
index dc58ffb931be..b1758d77aceb 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hw.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hw.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2012-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /*
  * Run-time work-arounds helpers
  */
@@ -78,20 +75,6 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev)
 	case GPU_ID2_PRODUCT_LODX:
 		features = base_hw_features_tODx;
 		break;
-	case GPU_ID2_PRODUCT_TGRX:
-		features = base_hw_features_tGRx;
-		break;
-	case GPU_ID2_PRODUCT_TVAX:
-		features = base_hw_features_tVAx;
-		break;
-	case GPU_ID2_PRODUCT_TTUX:
-		/* Fallthrough */
-	case GPU_ID2_PRODUCT_LTUX:
-		features = base_hw_features_tTUx;
-		break;
-	case GPU_ID2_PRODUCT_TE2X:
-		features = base_hw_features_tE2x;
-		break;
 	default:
 		features = base_hw_features_generic;
 		break;
@@ -143,109 +126,91 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
 	};
 
 	static const struct base_hw_product base_hw_products[] = {
-		{GPU_ID2_PRODUCT_TMIX,
-		 {{GPU_ID2_VERSION_MAKE(0, 0, 1),
-		   base_hw_issues_tMIx_r0p0_05dev0},
-		  {GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tMIx_r0p0},
-		  {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tMIx_r0p1},
-		  {U32_MAX /* sentinel value */, NULL} } },
+		{ GPU_ID2_PRODUCT_TMIX,
+		  { { GPU_ID2_VERSION_MAKE(0, 0, 1),
+		      base_hw_issues_tMIx_r0p0_05dev0 },
+		    { GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tMIx_r0p0 },
+		    { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tMIx_r0p1 },
+		    { U32_MAX /* sentinel value */, NULL } } },
 
-		{GPU_ID2_PRODUCT_THEX,
-		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tHEx_r0p0},
-		  {GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tHEx_r0p0},
-		  {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tHEx_r0p1},
-		  {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tHEx_r0p1},
-		  {GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tHEx_r0p2},
-		  {GPU_ID2_VERSION_MAKE(0, 3, 0), base_hw_issues_tHEx_r0p3},
-		  {U32_MAX, NULL} } },
+		{ GPU_ID2_PRODUCT_THEX,
+		  { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tHEx_r0p0 },
+		    { GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tHEx_r0p0 },
+		    { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tHEx_r0p1 },
+		    { GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tHEx_r0p1 },
+		    { GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tHEx_r0p2 },
+		    { GPU_ID2_VERSION_MAKE(0, 3, 0), base_hw_issues_tHEx_r0p3 },
+		    { U32_MAX, NULL } } },
 
-		{GPU_ID2_PRODUCT_TSIX,
-		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tSIx_r0p0},
-		  {GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tSIx_r0p0},
-		  {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tSIx_r0p1},
-		  {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tSIx_r1p0},
-		  {GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tSIx_r1p1},
-		  {U32_MAX, NULL} } },
+		{ GPU_ID2_PRODUCT_TSIX,
+		  { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tSIx_r0p0 },
+		    { GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tSIx_r0p0 },
+		    { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tSIx_r0p1 },
+		    { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tSIx_r1p0 },
+		    { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tSIx_r1p1 },
+		    { U32_MAX, NULL } } },
 
-		{GPU_ID2_PRODUCT_TDVX,
-		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDVx_r0p0},
-		  {U32_MAX, NULL} } },
+		{ GPU_ID2_PRODUCT_TDVX,
+		  { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDVx_r0p0 },
+		    { U32_MAX, NULL } } },
 
-		{GPU_ID2_PRODUCT_TNOX,
-		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNOx_r0p0},
-		  {U32_MAX, NULL} } },
+		{ GPU_ID2_PRODUCT_TNOX,
+		  { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNOx_r0p0 },
+		    { U32_MAX, NULL } } },
 
-		{GPU_ID2_PRODUCT_TGOX,
-		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGOx_r0p0},
-		  {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tGOx_r1p0},
-		  {U32_MAX, NULL} } },
+		{ GPU_ID2_PRODUCT_TGOX,
+		  { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGOx_r0p0 },
+		    { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tGOx_r1p0 },
+		    { U32_MAX, NULL } } },
 
-		{GPU_ID2_PRODUCT_TTRX,
-		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTRx_r0p0},
-		  {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tTRx_r0p0},
-		  {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tTRx_r0p1},
-		  {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tTRx_r0p1},
-		  {GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tTRx_r0p2},
-		  {U32_MAX, NULL} } },
+		{ GPU_ID2_PRODUCT_TTRX,
+		  { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTRx_r0p0 },
+		    { GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tTRx_r0p0 },
+		    { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tTRx_r0p1 },
+		    { GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tTRx_r0p1 },
+		    { GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tTRx_r0p2 },
+		    { U32_MAX, NULL } } },
 
-		{GPU_ID2_PRODUCT_TNAX,
-		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNAx_r0p0},
-		  {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tNAx_r0p0},
-		  {GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tNAx_r0p0},
-		  {GPU_ID2_VERSION_MAKE(0, 0, 5), base_hw_issues_tNAx_r0p0},
-		  {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tNAx_r0p1},
-		  {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tNAx_r0p1},
-		  {U32_MAX, NULL} } },
+		{ GPU_ID2_PRODUCT_TNAX,
+		  { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNAx_r0p0 },
+		    { GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tNAx_r0p0 },
+		    { GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tNAx_r0p0 },
+		    { GPU_ID2_VERSION_MAKE(0, 0, 5), base_hw_issues_tNAx_r0p0 },
+		    { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tNAx_r0p1 },
+		    { GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tNAx_r0p1 },
+		    { U32_MAX, NULL } } },
 
-		{GPU_ID2_PRODUCT_LBEX,
-		 {{GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_lBEx_r1p0},
-		  {GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_lBEx_r1p1},
-		  {U32_MAX, NULL} } },
+		{ GPU_ID2_PRODUCT_LBEX,
+		  { { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_lBEx_r1p0 },
+		    { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_lBEx_r1p1 },
+		    { U32_MAX, NULL } } },
 
-		{GPU_ID2_PRODUCT_TBEX,
-		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBEx_r0p0},
-		  {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tBEx_r0p0},
-		  {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tBEx_r0p1},
-		  {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tBEx_r1p0},
-		  {U32_MAX, NULL} } },
+		{ GPU_ID2_PRODUCT_TBEX,
+		  { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBEx_r0p0 },
+		    { GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tBEx_r0p0 },
+		    { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tBEx_r0p1 },
+		    { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tBEx_r1p0 },
+		    { U32_MAX, NULL } } },
 
-		{GPU_ID2_PRODUCT_TBAX,
-		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBAx_r0p0},
-		  {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tBAx_r0p0},
-		  {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tBAx_r1p0},
-		  {U32_MAX, NULL} } },
+		{ GPU_ID2_PRODUCT_TBAX,
+		  { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBAx_r0p0 },
+		    { GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tBAx_r0p0 },
+		    { GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tBAx_r0p0 },
+		    { U32_MAX, NULL } } },
 
-		{GPU_ID2_PRODUCT_TDUX,
-		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDUx_r0p0},
-		  {U32_MAX, NULL} } },
+		{ GPU_ID2_PRODUCT_TDUX,
+		  { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDUx_r0p0 },
+		    { U32_MAX, NULL } } },
 
-		{GPU_ID2_PRODUCT_TODX,
-		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0},
-		  {U32_MAX, NULL} } },
+		{ GPU_ID2_PRODUCT_TODX,
+		  { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0 },
+		    { GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tODx_r0p0 },
+		    { GPU_ID2_VERSION_MAKE(0, 0, 5), base_hw_issues_tODx_r0p0 },
+		    { U32_MAX, NULL } } },
 
-		{GPU_ID2_PRODUCT_LODX,
-		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0},
-		  {U32_MAX, NULL} } },
-
-		{GPU_ID2_PRODUCT_TGRX,
-		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGRx_r0p0},
-		  {U32_MAX, NULL} } },
-
-		{GPU_ID2_PRODUCT_TVAX,
-		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tVAx_r0p0},
-		  {U32_MAX, NULL} } },
-
-		{GPU_ID2_PRODUCT_TTUX,
-		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTUx_r0p0},
-		  {U32_MAX, NULL} } },
-
-		{GPU_ID2_PRODUCT_LTUX,
-		 {{GPU_ID2_VERSION_MAKE(3, 0, 0), base_hw_issues_tTUx_r0p0},
-		  {U32_MAX, NULL} } },
-
-		{GPU_ID2_PRODUCT_TE2X,
-		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tE2x_r0p0},
-		  {U32_MAX, NULL} } },
+		{ GPU_ID2_PRODUCT_LODX,
+		  { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0 },
+		    { U32_MAX, NULL } } },
 	};
 
 	u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
@@ -278,8 +243,8 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
 			}
 
 			/* Check whether this is a candidate for most recent
-				known version not later than the actual
-				version. */
+			 * known version not later than the actual version.
+			 */
 			if ((version > product->map[v].version) &&
 				(product->map[v].version >= fallback_version)) {
 #if MALI_CUSTOMER_RELEASE
@@ -296,7 +261,8 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
 
 		if ((issues == NULL) && (fallback_issues != NULL)) {
 			/* Fall back to the issue set of the most recent known
-				version not later than the actual version. */
+			 * version not later than the actual version.
+			 */
 			issues = fallback_issues;
 
 #if MALI_CUSTOMER_RELEASE
@@ -349,7 +315,8 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
 
 #if !MALI_CUSTOMER_RELEASE
 		/* The GPU ID might have been replaced with the last
-			known version of the same GPU. */
+		 * known version of the same GPU.
+		 */
 		gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
 #endif
 	} else {
@@ -393,19 +360,6 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
 		case GPU_ID2_PRODUCT_LODX:
 			issues = base_hw_issues_model_tODx;
 			break;
-		case GPU_ID2_PRODUCT_TGRX:
-			issues = base_hw_issues_model_tGRx;
-			break;
-		case GPU_ID2_PRODUCT_TVAX:
-			issues = base_hw_issues_model_tVAx;
-			break;
-		case GPU_ID2_PRODUCT_TTUX:
-		case GPU_ID2_PRODUCT_LTUX:
-			issues = base_hw_issues_model_tTUx;
-			break;
-		case GPU_ID2_PRODUCT_TE2X:
-			issues = base_hw_issues_model_tE2x;
-			break;
 		default:
 			dev_err(kbdev->dev,
 				"Unknown GPU ID %x", gpu_id);
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hw.h b/drivers/gpu/arm/bifrost/mali_kbase_hw.h
index f386b1624317..65b417c95e13 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hw.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hw.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2017, 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,15 +17,10 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /**
- * @file
- * Run-time work-arounds helpers
+ * DOC: Run-time work-arounds helpers
  */
 
 #ifndef _KBASE_HW_H_
@@ -33,13 +29,17 @@
 #include "mali_kbase_defs.h"
 
 /**
- * @brief Tell whether a work-around should be enabled
+ * Tell whether a work-around should be enabled
+ * @kbdev: Device pointer
+ * @issue: issue to be checked
  */
 #define kbase_hw_has_issue(kbdev, issue)\
 	test_bit(issue, &(kbdev)->hw_issues_mask[0])
 
 /**
- * @brief Tell whether a feature is supported
+ * Tell whether a feature is supported
+ * @kbdev: Device pointer
+ * @feature: feature to be checked
  */
 #define kbase_hw_has_feature(kbdev, feature)\
 	test_bit(feature, &(kbdev)->hw_features_mask[0])
@@ -63,7 +63,8 @@
 int kbase_hw_set_issues_mask(struct kbase_device *kbdev);
 
 /**
- * @brief Set the features mask depending on the GPU ID
+ * Set the features mask depending on the GPU ID
+ * @kbdev: Device pointer
  */
 void kbase_hw_set_features_mask(struct kbase_device *kbdev);
 
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_backend.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_backend.h
index 89df2519ab97..90851e2aa85f 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_backend.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_backend.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2014-2015, 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2019-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,11 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
 /*
  * HW access backend common APIs
  */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_defs.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_defs.h
index 124a2d9cf0c3..97a593fb6dbe 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_defs.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_defs.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2014, 2016, 2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2016-2018, 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,14 +17,10 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
 /**
- * @file mali_kbase_hwaccess_gpu_defs.h
- * HW access common definitions
+ * DOC: HW access common definitions
  */
 
 #ifndef _KBASE_HWACCESS_DEFS_H_
@@ -43,7 +40,9 @@
  * @backend:         GPU backend specific data for HW access layer
  */
 struct kbase_hwaccess_data {
+#if !MALI_USE_CSF
 	struct kbase_context *active_kctx[BASE_JM_MAX_NR_SLOTS];
+#endif
 
 	struct kbase_backend_data backend;
 };
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_gpuprops.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_gpuprops.h
index 3ae0dbe6886d..0fca83e4eb2a 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_gpuprops.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_gpuprops.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2018, 2019-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,25 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
- *//* SPDX-License-Identifier: GPL-2.0 */
-/*
- *
- * (C) COPYRIGHT 2014-2015, 2018, 2019-2020 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * A copy of the licence is included with the program, and can also be obtained
- * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA  02110-1301, USA.
- *
  */
 
-
 /**
  * Base kernel property query backend APIs
  */
@@ -55,6 +39,23 @@
 int kbase_backend_gpuprops_get(struct kbase_device *kbdev,
 					struct kbase_gpuprops_regdump *regdump);
 
+/**
+ * kbase_backend_gpuprops_get_curr_config() - Fill @curr_config_regdump with
+ *                                            relevant GPU properties read from
+ *                                            the GPU registers.
+ * @kbdev:               Device pointer.
+ * @curr_config_regdump: Pointer to struct kbase_current_config_regdump
+ *                       structure.
+ *
+ * The caller should ensure that GPU remains powered-on during this function and
+ * the caller must ensure this function returns success before using the values
+ * returned in the curr_config_regdump in any part of the kernel.
+ *
+ * Return: Zero for succeess or a Linux error code
+ */
+int kbase_backend_gpuprops_get_curr_config(struct kbase_device *kbdev,
+		struct kbase_current_config_regdump *curr_config_regdump);
+
 /**
  * kbase_backend_gpuprops_get_features - Fill @regdump with GPU properties read
  *                                       from GPU
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_instr.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_instr.h
index 4fd2e3549268..959bfeb2096c 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_instr.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_instr.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
  * (C) COPYRIGHT 2014-2015, 2017-2018, 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /*
  * HW Access instrumentation common APIs
  */
@@ -39,8 +36,7 @@
  * @shader_bm:         counters selection bitmask (Shader).
  * @tiler_bm:          counters selection bitmask (Tiler).
  * @mmu_l2_bm:         counters selection bitmask (MMU_L2).
- * @use_secondary:     use secondary performance counters set for applicable
- *                     counter blocks.
+ * @counter_set:       the performance counter set to use.
  */
 struct kbase_instr_hwcnt_enable {
 	u64 dump_buffer;
@@ -49,7 +45,7 @@ struct kbase_instr_hwcnt_enable {
 	u32 shader_bm;
 	u32 tiler_bm;
 	u32 mmu_l2_bm;
-	bool use_secondary;
+	u8 counter_set;
 };
 
 /**
@@ -139,7 +135,7 @@ int kbase_instr_backend_init(struct kbase_device *kbdev);
  */
 void kbase_instr_backend_term(struct kbase_device *kbdev);
 
-#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS
+#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS
 /**
  * kbase_instr_backend_debugfs_init() - Add a debugfs entry for the
  *                                      hardware counter set.
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h
index f6ce17e4180f..0e513c757094 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,11 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
 /*
  * HW access job manager common APIs
  */
@@ -31,7 +29,7 @@
 /**
  * kbase_backend_run_atom() - Run an atom on the GPU
  * @kbdev:	Device pointer
- * @atom:	Atom to run
+ * @katom:	Atom to run
  *
  * Caller must hold the HW access lock
  */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_pm.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_pm.h
index bbaf6eaf8d88..4b2a53e960e9 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_pm.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_pm.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2014-2015, 2018-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2018-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,14 +17,10 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
 /**
- * @file mali_kbase_hwaccess_pm.h
- * HW access power manager common APIs
+ * DOC: HW access power manager common APIs
  */
 
 #ifndef _KBASE_HWACCESS_PM_H_
@@ -80,24 +77,21 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
  * the time this function returns, regardless of whether or not the active power
  * policy asks for the GPU to be powered off.
  *
- * @param kbdev The kbase device structure for the device (must be a valid
- *              pointer)
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
  */
 void kbase_hwaccess_pm_halt(struct kbase_device *kbdev);
 
 /**
  * Perform any backend-specific actions to suspend the GPU
  *
- * @param kbdev The kbase device structure for the device (must be a valid
- *              pointer)
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
  */
 void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev);
 
 /**
  * Perform any backend-specific actions to resume the GPU from a suspend
  *
- * @param kbdev The kbase device structure for the device (must be a valid
- *              pointer)
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
  */
 void kbase_hwaccess_pm_resume(struct kbase_device *kbdev);
 
@@ -105,8 +99,7 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev);
  * Perform any required actions for activating the GPU. Called when the first
  * context goes active.
  *
- * @param kbdev The kbase device structure for the device (must be a valid
- *              pointer)
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
  */
 void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev);
 
@@ -114,35 +107,43 @@ void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev);
  * Perform any required actions for idling the GPU. Called when the last
  * context goes idle.
  *
- * @param kbdev The kbase device structure for the device (must be a valid
- *              pointer)
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
  */
 void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev);
 
-
+#if MALI_USE_CSF
 /**
  * Set the debug core mask.
  *
  * This determines which cores the power manager is allowed to use.
  *
- * @param kbdev         The kbase device structure for the device (must be a
- *                      valid pointer)
- * @param new_core_mask_js0 The core mask to use for job slot 0
- * @param new_core_mask_js0 The core mask to use for job slot 1
- * @param new_core_mask_js0 The core mask to use for job slot 2
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @new_core_mask: The core mask to use
+ */
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
+				  u64 new_core_mask);
+#else
+/**
+ * Set the debug core mask.
+ *
+ * This determines which cores the power manager is allowed to use.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @new_core_mask_js0: The core mask to use for job slot 0
+ * @new_core_mask_js1: The core mask to use for job slot 1
+ * @new_core_mask_js2: The core mask to use for job slot 2
  */
 void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
 		u64 new_core_mask_js0, u64 new_core_mask_js1,
 		u64 new_core_mask_js2);
-
+#endif /* MALI_USE_CSF */
 
 /**
  * Get the current policy.
  *
  * Returns the policy that is currently active.
  *
- * @param kbdev The kbase device structure for the device (must be a valid
- *              pointer)
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
  *
  * @return The current policy
  */
@@ -152,10 +153,9 @@ const struct kbase_pm_ca_policy
 /**
  * Change the policy to the one specified.
  *
- * @param kbdev  The kbase device structure for the device (must be a valid
- *               pointer)
- * @param policy The policy to change to (valid pointer returned from
- *               @ref kbase_pm_ca_list_policies)
+ * @kbdev:  The kbase device structure for the device (must be a valid pointer)
+ * @policy: The policy to change to (valid pointer returned from
+ *          @ref kbase_pm_ca_list_policies)
  */
 void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
 				const struct kbase_pm_ca_policy *policy);
@@ -163,23 +163,20 @@ void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
 /**
  * Retrieve a static list of the available policies.
  *
- * @param[out] policies An array pointer to take the list of policies. This may
- *                      be NULL. The contents of this array must not be
- *                      modified.
+ * @policies: An array pointer to take the list of policies. This may be NULL.
+ *            The contents of this array must not be modified.
  *
  * @return The number of policies
  */
 int
 kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **policies);
 
-
 /**
  * Get the current policy.
  *
  * Returns the policy that is currently active.
  *
- * @param kbdev The kbase device structure for the device (must be a valid
- *              pointer)
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
  *
  * @return The current policy
  */
@@ -188,9 +185,9 @@ const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev);
 /**
  * Change the policy to the one specified.
  *
- * @param kbdev  The kbase device structure for the device (must be a valid
+ * @kbdev:  The kbase device structure for the device (must be a valid
  *               pointer)
- * @param policy The policy to change to (valid pointer returned from
+ * @policy: The policy to change to (valid pointer returned from
  *               @ref kbase_pm_list_policies)
  */
 void kbase_pm_set_policy(struct kbase_device *kbdev,
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_time.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_time.h
index 94b7551b865e..8488a321c2ee 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_time.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_time.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2014,2018-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2018-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,13 +16,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- */
-
-
-/**
  *
  */
 
@@ -30,10 +24,10 @@
 
 /**
  * kbase_backend_get_gpu_time() - Get current GPU time
- * @kbdev:		Device pointer
- * @cycle_counter:	Pointer to u64 to store cycle counter in
- * @system_time:	Pointer to u64 to store system time in
- * @ts:			Pointer to struct timespec to store current monotonic
+ * @kbdev:              Device pointer
+ * @cycle_counter:      Pointer to u64 to store cycle counter in.
+ * @system_time:        Pointer to u64 to store system time in
+ * @ts:                 Pointer to struct timespec to store current monotonic
  *			time in
  */
 void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt.c b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt.c
index 2708af78b292..6bd7d5f69663 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
@@ -28,9 +27,6 @@
 #include "mali_kbase_hwcnt_accumulator.h"
 #include "mali_kbase_hwcnt_backend.h"
 #include "mali_kbase_hwcnt_types.h"
-#include "mali_malisw.h"
-#include "mali_kbase_debug.h"
-#include "mali_kbase_linux.h"
 
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
@@ -51,6 +47,7 @@ enum kbase_hwcnt_accum_state {
 
 /**
  * struct kbase_hwcnt_accumulator - Hardware counter accumulator structure.
+ * @metadata:               Pointer to immutable hwcnt metadata.
  * @backend:                Pointer to created counter backend.
  * @state:                  The current state of the accumulator.
  *                           - State transition from disabled->enabled or
@@ -89,6 +86,7 @@ enum kbase_hwcnt_accum_state {
  *                             accum_lock.
  */
 struct kbase_hwcnt_accumulator {
+	const struct kbase_hwcnt_metadata *metadata;
 	struct kbase_hwcnt_backend *backend;
 	enum kbase_hwcnt_accum_state state;
 	struct kbase_hwcnt_enable_map enable_map;
@@ -117,6 +115,10 @@ struct kbase_hwcnt_accumulator {
  *                    state_lock.
  *                  - Can be read while holding either lock.
  * @accum:         Hardware counter accumulator structure.
+ * @wq:            Centralized workqueue for users of hardware counters to
+ *                 submit async hardware counter related work. Never directly
+ *                 called, but it's expected that a lot of the functions in this
+ *                 API will end up called from the enqueued async work.
  */
 struct kbase_hwcnt_context {
 	const struct kbase_hwcnt_backend_interface *iface;
@@ -125,6 +127,7 @@ struct kbase_hwcnt_context {
 	struct mutex accum_lock;
 	bool accum_inited;
 	struct kbase_hwcnt_accumulator accum;
+	struct workqueue_struct *wq;
 };
 
 int kbase_hwcnt_context_init(
@@ -138,7 +141,7 @@ int kbase_hwcnt_context_init(
 
 	hctx = kzalloc(sizeof(*hctx), GFP_KERNEL);
 	if (!hctx)
-		return -ENOMEM;
+		goto err_alloc_hctx;
 
 	hctx->iface = iface;
 	spin_lock_init(&hctx->state_lock);
@@ -146,11 +149,21 @@ int kbase_hwcnt_context_init(
 	mutex_init(&hctx->accum_lock);
 	hctx->accum_inited = false;
 
+	hctx->wq =
+		alloc_workqueue("mali_kbase_hwcnt", WQ_HIGHPRI | WQ_UNBOUND, 0);
+	if (!hctx->wq)
+		goto err_alloc_workqueue;
+
 	*out_hctx = hctx;
 
 	return 0;
+
+	destroy_workqueue(hctx->wq);
+err_alloc_workqueue:
+	kfree(hctx);
+err_alloc_hctx:
+	return -ENOMEM;
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_context_init);
 
 void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx)
 {
@@ -159,9 +172,13 @@ void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx)
 
 	/* Make sure we didn't leak the accumulator */
 	WARN_ON(hctx->accum_inited);
+
+	/* We don't expect any work to be pending on this workqueue.
+	 * Regardless, this will safely drain and complete the work.
+	 */
+	destroy_workqueue(hctx->wq);
 	kfree(hctx);
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_context_term);
 
 /**
  * kbasep_hwcnt_accumulator_term() - Terminate the accumulator for the context.
@@ -197,22 +214,23 @@ static int kbasep_hwcnt_accumulator_init(struct kbase_hwcnt_context *hctx)
 	if (errcode)
 		goto error;
 
+	hctx->accum.metadata = hctx->iface->metadata(hctx->iface->info);
 	hctx->accum.state = ACCUM_STATE_ERROR;
 
-	errcode = kbase_hwcnt_enable_map_alloc(
-		hctx->iface->metadata, &hctx->accum.enable_map);
+	errcode = kbase_hwcnt_enable_map_alloc(hctx->accum.metadata,
+					       &hctx->accum.enable_map);
 	if (errcode)
 		goto error;
 
 	hctx->accum.enable_map_any_enabled = false;
 
-	errcode = kbase_hwcnt_dump_buffer_alloc(
-		hctx->iface->metadata, &hctx->accum.accum_buf);
+	errcode = kbase_hwcnt_dump_buffer_alloc(hctx->accum.metadata,
+						&hctx->accum.accum_buf);
 	if (errcode)
 		goto error;
 
-	errcode = kbase_hwcnt_enable_map_alloc(
-		hctx->iface->metadata, &hctx->accum.scratch_map);
+	errcode = kbase_hwcnt_enable_map_alloc(hctx->accum.metadata,
+					       &hctx->accum.scratch_map);
 	if (errcode)
 		goto error;
 
@@ -366,8 +384,8 @@ static int kbasep_hwcnt_accumulator_dump(
 	WARN_ON(!hctx);
 	WARN_ON(!ts_start_ns);
 	WARN_ON(!ts_end_ns);
-	WARN_ON(dump_buf && (dump_buf->metadata != hctx->iface->metadata));
-	WARN_ON(new_map && (new_map->metadata != hctx->iface->metadata));
+	WARN_ON(dump_buf && (dump_buf->metadata != hctx->accum.metadata));
+	WARN_ON(new_map && (new_map->metadata != hctx->accum.metadata));
 	WARN_ON(!hctx->accum_inited);
 	lockdep_assert_held(&hctx->accum_lock);
 
@@ -609,7 +627,6 @@ int kbase_hwcnt_accumulator_acquire(
 
 	return 0;
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_acquire);
 
 void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum)
 {
@@ -644,7 +661,6 @@ void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum)
 	spin_unlock_irqrestore(&hctx->state_lock, flags);
 	mutex_unlock(&hctx->accum_lock);
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_release);
 
 void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx)
 {
@@ -663,7 +679,6 @@ void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx)
 
 	mutex_unlock(&hctx->accum_lock);
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_context_disable);
 
 bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx)
 {
@@ -692,7 +707,6 @@ bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx)
 
 	return atomic_disabled;
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_context_disable_atomic);
 
 void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx)
 {
@@ -712,7 +726,6 @@ void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx)
 
 	spin_unlock_irqrestore(&hctx->state_lock, flags);
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_context_enable);
 
 const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(
 	struct kbase_hwcnt_context *hctx)
@@ -720,9 +733,17 @@ const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(
 	if (!hctx)
 		return NULL;
 
-	return hctx->iface->metadata;
+	return hctx->iface->metadata(hctx->iface->info);
+}
+
+bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx,
+				    struct work_struct *work)
+{
+	if (WARN_ON(!hctx) || WARN_ON(!work))
+		return false;
+
+	return queue_work(hctx->wq, work);
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_context_metadata);
 
 int kbase_hwcnt_accumulator_set_counters(
 	struct kbase_hwcnt_accumulator *accum,
@@ -739,8 +760,8 @@ int kbase_hwcnt_accumulator_set_counters(
 
 	hctx = container_of(accum, struct kbase_hwcnt_context, accum);
 
-	if ((new_map->metadata != hctx->iface->metadata) ||
-	    (dump_buf && (dump_buf->metadata != hctx->iface->metadata)))
+	if ((new_map->metadata != hctx->accum.metadata) ||
+	    (dump_buf && (dump_buf->metadata != hctx->accum.metadata)))
 		return -EINVAL;
 
 	mutex_lock(&hctx->accum_lock);
@@ -752,7 +773,6 @@ int kbase_hwcnt_accumulator_set_counters(
 
 	return errcode;
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_set_counters);
 
 int kbase_hwcnt_accumulator_dump(
 	struct kbase_hwcnt_accumulator *accum,
@@ -768,7 +788,7 @@ int kbase_hwcnt_accumulator_dump(
 
 	hctx = container_of(accum, struct kbase_hwcnt_context, accum);
 
-	if (dump_buf && (dump_buf->metadata != hctx->iface->metadata))
+	if (dump_buf && (dump_buf->metadata != hctx->accum.metadata))
 		return -EINVAL;
 
 	mutex_lock(&hctx->accum_lock);
@@ -780,7 +800,6 @@ int kbase_hwcnt_accumulator_dump(
 
 	return errcode;
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_dump);
 
 u64 kbase_hwcnt_accumulator_timestamp_ns(struct kbase_hwcnt_accumulator *accum)
 {
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_accumulator.h b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_accumulator.h
index eb82ea4bfd14..fbd13a442691 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_accumulator.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_accumulator.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /**
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend.h b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend.h
index 3a921b754b55..a3013a616579 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
@@ -41,11 +40,25 @@ struct kbase_hwcnt_dump_buffer;
 struct kbase_hwcnt_backend_info;
 
 /*
- * struct kbase_hwcnt_backend_info - Opaque pointer to a hardware counter
- *                                   backend, used to perform dumps.
+ * struct kbase_hwcnt_backend - Opaque pointer to a hardware counter
+ *                              backend, used to perform dumps.
  */
 struct kbase_hwcnt_backend;
 
+/*
+ * typedef kbase_hwcnt_backend_metadata_fn - Get the immutable hardware counter
+ *                                           metadata that describes the layout
+ *                                           of the counter data structures.
+ * @info:        Non-NULL pointer to backend info.
+ *
+ * Multiple calls to this function with the same info are guaranteed to return
+ * the same metadata object each time.
+ *
+ * Return: Non-NULL pointer to immutable hardware counter metadata.
+ */
+typedef const struct kbase_hwcnt_metadata *(*kbase_hwcnt_backend_metadata_fn)(
+	const struct kbase_hwcnt_backend_info *info);
+
 /**
  * typedef kbase_hwcnt_backend_init_fn - Initialise a counter backend.
  * @info:        Non-NULL pointer to backend info.
@@ -171,9 +184,9 @@ typedef int (*kbase_hwcnt_backend_dump_wait_fn)(
  * @accumulate:  True if counters should be accumulated into dump_buffer, rather
  *               than copied.
  *
- * If the backend is not enabled, returns an error.
- * If a dump is in progress (i.e. dump_wait has not yet returned successfully)
- * then the resultant contents of the dump buffer will be undefined.
+ * The resultant contents of the dump buffer are only well defined if a prior
+ * call to dump_wait returned successfully, and a new dump has not yet been
+ * requested by a call to dump_request.
  *
  * Return: 0 on success, else error code.
  */
@@ -186,9 +199,10 @@ typedef int (*kbase_hwcnt_backend_dump_get_fn)(
 /**
  * struct kbase_hwcnt_backend_interface - Hardware counter backend virtual
  *                                        interface.
- * @metadata:           Immutable hardware counter metadata.
  * @info:               Immutable info used to initialise an instance of the
  *                      backend.
+ * @metadata:           Function ptr to get the immutable hardware counter
+ *                      metadata.
  * @init:               Function ptr to initialise an instance of the backend.
  * @term:               Function ptr to terminate an instance of the backend.
  * @timestamp_ns:       Function ptr to get the current backend timestamp.
@@ -203,8 +217,8 @@ typedef int (*kbase_hwcnt_backend_dump_get_fn)(
  *                      buffer.
  */
 struct kbase_hwcnt_backend_interface {
-	const struct kbase_hwcnt_metadata *metadata;
 	const struct kbase_hwcnt_backend_info *info;
+	kbase_hwcnt_backend_metadata_fn metadata;
 	kbase_hwcnt_backend_init_fn init;
 	kbase_hwcnt_backend_term_fn term;
 	kbase_hwcnt_backend_timestamp_ns_fn timestamp_ns;
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf.c b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf.c
new file mode 100644
index 000000000000..d9592fd70529
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf.c
@@ -0,0 +1,1859 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "mali_kbase_hwcnt_backend_csf.h"
+#include "mali_kbase_hwcnt_gpu.h"
+#include "mali_kbase_hwcnt_types.h"
+
+#include <linux/log2.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+#include <linux/completion.h>
+
+#ifndef BASE_MAX_NR_CLOCKS_REGULATORS
+#define BASE_MAX_NR_CLOCKS_REGULATORS 2
+#endif
+
+/**
+ * enum kbase_hwcnt_backend_csf_dump_state - HWC CSF backend dumping states.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE: Initial state, or the state if there is
+ * an error.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED: A dump has been requested and we are
+ * waiting for an ACK, this ACK could come from either PRFCNT_ACK,
+ * PROTMODE_ENTER_ACK, or if an error occurs.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT: Checking the insert
+ * immediately after receiving the ACK, so we know which index corresponds to
+ * the buffer we requested.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED: The insert has been saved and
+ * now we have kicked off the worker.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING: The insert has been saved and now
+ * we have kicked off the worker to accumulate up to that insert and then copy
+ * the delta to the user buffer to prepare for dump_get().
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED: The dump completed successfully.
+ *
+ * Valid state transitions:
+ * IDLE -> REQUESTED (on dump request)
+ * REQUESTED -> QUERYING_INSERT (on dump ack)
+ * QUERYING_INSERT -> WORKER_LAUNCHED (on worker submission)
+ * WORKER_LAUNCHED -> ACCUMULATING (while the worker is accumulating)
+ * ACCUMULATING -> COMPLETED (on accumulation completion)
+ * COMPLETED -> REQUESTED (on dump request)
+ * COMPLETED -> IDLE (on disable)
+ * ANY -> IDLE (on error)
+ */
+enum kbase_hwcnt_backend_csf_dump_state {
+	KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE,
+	KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED,
+	KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT,
+	KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED,
+	KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING,
+	KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED,
+};
+
+/**
+ * enum kbase_hwcnt_backend_csf_enable_state - HWC CSF backend enable states.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_DISABLED: Initial state, and the state when backend
+ * is disabled.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED: Enable request is in
+ * progress, waiting for firmware acknowledgment.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_ENABLED: Enable request has been acknowledged,
+ * enable is done.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED: Disable request is in
+ * progress, waiting for firmware acknowledgment.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER: Disable request has been
+ * acknowledged, waiting for dump workers to be finished.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER: An
+ * unrecoverable error happened, waiting for dump workers to be finished.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR:  An unrecoverable error
+ * happened, and dump workers have finished, waiting for reset.
+ *
+ * Valid state transitions:
+ * DISABLED -> TRANSITIONING_TO_ENABLED (on enable)
+ * TRANSITIONING_TO_ENABLED -> ENABLED (on enable ack)
+ * ENABLED -> TRANSITIONING_TO_DISABLED (on disable)
+ * TRANSITIONING_TO_DISABLED -> DISABLED_WAIT_FOR_WORKER (on disable ack)
+ * DISABLED_WAIT_FOR_WORKER -> DISABLED (after workers are flushed)
+ * DISABLED -> UNRECOVERABLE_ERROR (on unrecoverable error)
+ * ANY but DISABLED -> UNRECOVERABLE_ERROR_WAIT_FOR_WORKER (on unrecoverable
+ *                                                          error)
+ * UNRECOVERABLE_ERROR -> DISABLED (on before reset)
+ */
+enum kbase_hwcnt_backend_csf_enable_state {
+	KBASE_HWCNT_BACKEND_CSF_DISABLED,
+	KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED,
+	KBASE_HWCNT_BACKEND_CSF_ENABLED,
+	KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED,
+	KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER,
+	KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER,
+	KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR,
+};
+
+/**
+ * struct kbase_hwcnt_backend_csf_info - Information used to create an instance
+ *                                       of a CSF hardware counter backend.
+ * @backend:                      Pointer to access CSF backend.
+ * @fw_in_protected_mode:         True if FW is running in protected mode, else
+ *                                false.
+ * @unrecoverable_error_happened: True if an recoverable error happened, else
+ *                                false.
+ * @csf_if:                       CSF interface object pointer.
+ * @ring_buf_cnt:                 Dump buffer count in the ring buffer.
+ * @counter_set:                  The performance counter set to use.
+ * @metadata:                     Hardware counter metadata.
+ * @prfcnt_info:                  Performance counter information.
+ */
+struct kbase_hwcnt_backend_csf_info {
+	struct kbase_hwcnt_backend_csf *backend;
+	bool fw_in_protected_mode;
+	bool unrecoverable_error_happened;
+	struct kbase_hwcnt_backend_csf_if *csf_if;
+	u32 ring_buf_cnt;
+	enum kbase_hwcnt_set counter_set;
+	const struct kbase_hwcnt_metadata *metadata;
+	struct kbase_hwcnt_backend_csf_if_prfcnt_info prfcnt_info;
+};
+
+/**
+ * struct kbase_hwcnt_csf_physical_layout - HWC sample memory physical layout
+ *                                          information.
+ * @fe_cnt:             Front end block count.
+ * @tiler_cnt:          Tiler block count.
+ * @mmu_l2_cnt:         Memory system(MMU and L2 cache) block count.
+ * @shader_cnt:         Shader Core block count.
+ * @block_cnt:          Total block count (sum of all other block counts).
+ * @shader_avail_mask:  Bitmap of all shader cores in the system.
+ * @offset_enable_mask: Offset of enable mask in the block.
+ * @headers_per_block:  Header size per block.
+ * @counters_per_block: Counters size per block.
+ * @values_per_block:   Total size per block.
+ */
+struct kbase_hwcnt_csf_physical_layout {
+	size_t fe_cnt;
+	size_t tiler_cnt;
+	size_t mmu_l2_cnt;
+	size_t shader_cnt;
+	size_t block_cnt;
+	u64 shader_avail_mask;
+	size_t offset_enable_mask;
+	size_t headers_per_block;
+	size_t counters_per_block;
+	size_t values_per_block;
+};
+
+/**
+ * struct kbase_hwcnt_backend_csf - Instance of a CSF hardware counter backend.
+ * @info:                       CSF Info used to create the backend.
+ * @dump_state:                 The dumping state of the backend.
+ * @enable_state:               The CSF backend internal enabled state.
+ * @insert_index_to_accumulate: The insert index in the ring buffer which need
+ *                              to accumulate up to.
+ * @enable_state_waitq:         Wait queue object used to notify the enable
+ *                              changing flag is done.
+ * @to_user_buf:                HWC sample buffer for client user.
+ * @accum_buf:                  HWC sample buffer used as an internal
+ *                              accumulator.
+ * @old_sample_buf:             HWC sample buffer to save the previous values
+ *                              for delta calculation.
+ * @ring_buf:                   Opaque pointer for ring buffer object.
+ * @ring_buf_cpu_base:          CPU base address of the allocated ring buffer.
+ * @clk_enable_map:             The enable map specifying enabled clock domains.
+ * @cycle_count_elapsed:        Cycle count elapsed for a given sample period.
+ * @prev_cycle_count:           Previous cycle count to calculate the cycle
+ *                              count for sample period.
+ * @phys_layout:                Physical memory layout information of HWC
+ *                              sample buffer.
+ * @dump_completed:             Completion signaled by the dump worker when
+ *                              it is completed accumulating up to the
+ *                              insert_index_to_accumulate.
+ *                              Should be initialized to the "complete" state.
+ * @hwc_dump_workq:             Single threaded work queue for HWC workers
+ *                              execution.
+ * @hwc_dump_work:              Worker to accumulate samples.
+ * @hwc_threshold_work:         Worker for consuming available samples when
+ *                              threshold interrupt raised.
+ */
+struct kbase_hwcnt_backend_csf {
+	struct kbase_hwcnt_backend_csf_info *info;
+	enum kbase_hwcnt_backend_csf_dump_state dump_state;
+	enum kbase_hwcnt_backend_csf_enable_state enable_state;
+	u32 insert_index_to_accumulate;
+	wait_queue_head_t enable_state_waitq;
+	u32 *to_user_buf;
+	u32 *accum_buf;
+	u32 *old_sample_buf;
+	struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf;
+	void *ring_buf_cpu_base;
+	u64 clk_enable_map;
+	u64 cycle_count_elapsed[BASE_MAX_NR_CLOCKS_REGULATORS];
+	u64 prev_cycle_count[BASE_MAX_NR_CLOCKS_REGULATORS];
+	struct kbase_hwcnt_csf_physical_layout phys_layout;
+	struct completion dump_completed;
+	struct workqueue_struct *hwc_dump_workq;
+	struct work_struct hwc_dump_work;
+	struct work_struct hwc_threshold_work;
+};
+
+bool kbasep_hwcnt_backend_csf_backend_exists(
+	struct kbase_hwcnt_backend_csf_info *csf_info)
+{
+	WARN_ON(!csf_info);
+	csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
+	return (csf_info->backend != NULL);
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_cc_initial_sample() - Initialize cycle count
+ *                                                tracking.
+ *
+ * @backend_csf: Non-NULL pointer to backend.
+ * @enable_map:  Non-NULL pointer to enable map specifying enabled counters.
+ */
+static void kbasep_hwcnt_backend_csf_cc_initial_sample(
+	struct kbase_hwcnt_backend_csf *backend_csf,
+	const struct kbase_hwcnt_enable_map *enable_map)
+{
+	u64 clk_enable_map = enable_map->clk_enable_map;
+	u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS];
+	size_t clk;
+
+	/* Read cycle count from CSF interface for both clock domains. */
+	backend_csf->info->csf_if->get_gpu_cycle_count(
+		backend_csf->info->csf_if->ctx, cycle_counts, clk_enable_map);
+
+	kbase_hwcnt_metadata_for_each_clock(enable_map->metadata, clk) {
+		if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, clk))
+			backend_csf->prev_cycle_count[clk] = cycle_counts[clk];
+	}
+
+	/* Keep clk_enable_map for dump_request. */
+	backend_csf->clk_enable_map = clk_enable_map;
+}
+
+static void
+kbasep_hwcnt_backend_csf_cc_update(struct kbase_hwcnt_backend_csf *backend_csf)
+{
+	u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS];
+	size_t clk;
+
+	backend_csf->info->csf_if->assert_lock_held(
+		backend_csf->info->csf_if->ctx);
+
+	backend_csf->info->csf_if->get_gpu_cycle_count(
+		backend_csf->info->csf_if->ctx, cycle_counts,
+		backend_csf->clk_enable_map);
+
+	kbase_hwcnt_metadata_for_each_clock(backend_csf->info->metadata, clk) {
+		if (kbase_hwcnt_clk_enable_map_enabled(
+			    backend_csf->clk_enable_map, clk)) {
+			backend_csf->cycle_count_elapsed[clk] =
+				cycle_counts[clk] -
+				backend_csf->prev_cycle_count[clk];
+			backend_csf->prev_cycle_count[clk] = cycle_counts[clk];
+		}
+	}
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */
+static u64
+kbasep_hwcnt_backend_csf_timestamp_ns(struct kbase_hwcnt_backend *backend)
+{
+	struct kbase_hwcnt_backend_csf *backend_csf =
+		(struct kbase_hwcnt_backend_csf *)backend;
+
+	if (!backend_csf || !backend_csf->info || !backend_csf->info->csf_if)
+		return 0;
+
+	return backend_csf->info->csf_if->timestamp_ns(
+		backend_csf->info->csf_if->ctx);
+}
+
+/** kbasep_hwcnt_backend_csf_process_enable_map() - Process the enable_map to
+ *                                                  guarantee headers are
+ *                                                  enabled if any counter is
+ *                                                  required.
+ *@phys_enable_map: HWC physical enable map to be processed.
+ */
+static void kbasep_hwcnt_backend_csf_process_enable_map(
+	struct kbase_hwcnt_physical_enable_map *phys_enable_map)
+{
+	WARN_ON(!phys_enable_map);
+
+	/* Enable header if any counter is required from user, the header is
+	 * controlled by bit 0 of the enable mask.
+	 */
+	if (phys_enable_map->fe_bm)
+		phys_enable_map->fe_bm |= 1;
+
+	if (phys_enable_map->tiler_bm)
+		phys_enable_map->tiler_bm |= 1;
+
+	if (phys_enable_map->mmu_l2_bm)
+		phys_enable_map->mmu_l2_bm |= 1;
+
+	if (phys_enable_map->shader_bm)
+		phys_enable_map->shader_bm |= 1;
+}
+
+static void kbasep_hwcnt_backend_csf_init_layout(
+	const struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info,
+	struct kbase_hwcnt_csf_physical_layout *phys_layout)
+{
+	WARN_ON(!prfcnt_info);
+	WARN_ON(!phys_layout);
+
+	phys_layout->fe_cnt = 1;
+	phys_layout->tiler_cnt = 1;
+	phys_layout->mmu_l2_cnt = prfcnt_info->l2_count;
+	phys_layout->shader_cnt = fls64(prfcnt_info->core_mask);
+	phys_layout->block_cnt = phys_layout->fe_cnt + phys_layout->tiler_cnt +
+				 phys_layout->mmu_l2_cnt +
+				 phys_layout->shader_cnt;
+
+	phys_layout->shader_avail_mask = prfcnt_info->core_mask;
+
+	phys_layout->headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+	phys_layout->counters_per_block = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK;
+	phys_layout->values_per_block = KBASE_HWCNT_V5_VALUES_PER_BLOCK;
+	phys_layout->offset_enable_mask = KBASE_HWCNT_V5_PRFCNT_EN_HEADER;
+}
+
+static void kbasep_hwcnt_backend_csf_reset_internal_buffers(
+	struct kbase_hwcnt_backend_csf *backend_csf)
+{
+	memset(backend_csf->to_user_buf, 0,
+	       backend_csf->info->prfcnt_info.dump_bytes);
+	memset(backend_csf->accum_buf, 0,
+	       backend_csf->info->prfcnt_info.dump_bytes);
+	memset(backend_csf->old_sample_buf, 0,
+	       backend_csf->info->prfcnt_info.dump_bytes);
+}
+
+static void kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header(
+	struct kbase_hwcnt_backend_csf *backend_csf, u32 *sample)
+{
+	u32 block_idx;
+	const struct kbase_hwcnt_csf_physical_layout *phys_layout;
+	u32 *block_buf;
+
+	phys_layout = &backend_csf->phys_layout;
+
+	for (block_idx = 0; block_idx < phys_layout->block_cnt; block_idx++) {
+		block_buf = sample + block_idx * phys_layout->values_per_block;
+		block_buf[phys_layout->offset_enable_mask] = 0;
+	}
+}
+
+static void kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(
+	struct kbase_hwcnt_backend_csf *backend_csf)
+{
+	u32 idx;
+	u32 *sample;
+	char *cpu_dump_base;
+	size_t dump_bytes = backend_csf->info->prfcnt_info.dump_bytes;
+
+	cpu_dump_base = (char *)backend_csf->ring_buf_cpu_base;
+
+	for (idx = 0; idx < backend_csf->info->ring_buf_cnt; idx++) {
+		sample = (u32 *)&cpu_dump_base[idx * dump_bytes];
+		kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header(
+			backend_csf, sample);
+	}
+}
+
+static void kbasep_hwcnt_backend_csf_update_user_sample(
+	struct kbase_hwcnt_backend_csf *backend_csf)
+{
+	/* Copy the data into the sample and wait for the user to get it. */
+	memcpy(backend_csf->to_user_buf, backend_csf->accum_buf,
+	       backend_csf->info->prfcnt_info.dump_bytes);
+
+	/* After copied data into user sample, clear the accumulator values to
+	 * prepare for the next accumulator, such as the next request or
+	 * threshold.
+	 */
+	memset(backend_csf->accum_buf, 0,
+	       backend_csf->info->prfcnt_info.dump_bytes);
+}
+
+static void kbasep_hwcnt_backend_csf_accumulate_sample(
+	const struct kbase_hwcnt_csf_physical_layout *phys_layout,
+	size_t dump_bytes, u32 *accum_buf, const u32 *old_sample_buf,
+	const u32 *new_sample_buf, bool clearing_samples)
+{
+	size_t block_idx, ctr_idx;
+	const u32 *old_block = old_sample_buf;
+	const u32 *new_block = new_sample_buf;
+	u32 *acc_block = accum_buf;
+
+	for (block_idx = 0; block_idx < phys_layout->block_cnt; block_idx++) {
+		const u32 old_enable_mask =
+			old_block[phys_layout->offset_enable_mask];
+		const u32 new_enable_mask =
+			new_block[phys_layout->offset_enable_mask];
+
+		if (new_enable_mask == 0) {
+			/* Hardware block was unavailable or we didn't turn on
+			 * any counters. Do nothing.
+			 */
+		} else {
+			/* Hardware block was available and it had some counters
+			 * enabled. We need to update the accumulation buffer.
+			 */
+
+			/* Unconditionally copy the headers. */
+			memcpy(acc_block, new_block,
+			       phys_layout->headers_per_block *
+				       KBASE_HWCNT_VALUE_BYTES);
+
+			/* Accumulate counter samples
+			 *
+			 * When accumulating samples we need to take into
+			 * account whether the counter sampling method involves
+			 * clearing counters back to zero after each sample is
+			 * taken.
+			 *
+			 * The intention for CSF was that all HW should use
+			 * counters which wrap to zero when their maximum value
+			 * is reached. This, combined with non-clearing
+			 * sampling, enables multiple concurrent users to
+			 * request samples without interfering with each other.
+			 *
+			 * However some early HW may not support wrapping
+			 * counters, for these GPUs counters must be cleared on
+			 * sample to avoid loss of data due to counters
+			 * saturating at their maximum value.
+			 */
+			if (!clearing_samples) {
+				if (old_enable_mask == 0) {
+					/* Hardware block was previously
+					 * unavailable. Accumulate the new
+					 * counters only, as we know previous
+					 * values are zeroes.
+					 */
+					for (ctr_idx =
+						     phys_layout
+							     ->headers_per_block;
+					     ctr_idx <
+					     phys_layout->values_per_block;
+					     ctr_idx++) {
+						acc_block[ctr_idx] +=
+							new_block[ctr_idx];
+					}
+				} else {
+					/* Hardware block was previously
+					 * available. Accumulate the delta
+					 * between old and new counter values.
+					 */
+					for (ctr_idx =
+						     phys_layout
+							     ->headers_per_block;
+					     ctr_idx <
+					     phys_layout->values_per_block;
+					     ctr_idx++) {
+						acc_block[ctr_idx] +=
+							new_block[ctr_idx] -
+							old_block[ctr_idx];
+					}
+				}
+			} else {
+				for (ctr_idx = phys_layout->headers_per_block;
+				     ctr_idx < phys_layout->values_per_block;
+				     ctr_idx++) {
+					acc_block[ctr_idx] +=
+						new_block[ctr_idx];
+				}
+			}
+		}
+		old_block += phys_layout->values_per_block;
+		new_block += phys_layout->values_per_block;
+		acc_block += phys_layout->values_per_block;
+	}
+
+	WARN_ON(old_block !=
+		old_sample_buf + dump_bytes / KBASE_HWCNT_VALUE_BYTES);
+	WARN_ON(new_block !=
+		new_sample_buf + dump_bytes / KBASE_HWCNT_VALUE_BYTES);
+	WARN_ON(acc_block != accum_buf + dump_bytes / KBASE_HWCNT_VALUE_BYTES);
+	(void)dump_bytes;
+}
+
+static void kbasep_hwcnt_backend_csf_accumulate_samples(
+	struct kbase_hwcnt_backend_csf *backend_csf, u32 extract_index_to_start,
+	u32 insert_index_to_stop)
+{
+	u32 raw_idx;
+	unsigned long flags;
+	u8 *cpu_dump_base = (u8 *)backend_csf->ring_buf_cpu_base;
+	const size_t ring_buf_cnt = backend_csf->info->ring_buf_cnt;
+	const size_t buf_dump_bytes = backend_csf->info->prfcnt_info.dump_bytes;
+	bool clearing_samples = backend_csf->info->prfcnt_info.clearing_samples;
+	u32 *old_sample_buf = backend_csf->old_sample_buf;
+	u32 *new_sample_buf;
+
+	if (extract_index_to_start == insert_index_to_stop)
+		/* No samples to accumulate. Early out. */
+		return;
+
+	/* Sync all the buffers to CPU side before read the data. */
+	backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx,
+						 backend_csf->ring_buf,
+						 extract_index_to_start,
+						 insert_index_to_stop, true);
+
+	/* Consider u32 wrap case, '!=' is used here instead of '<' operator */
+	for (raw_idx = extract_index_to_start; raw_idx != insert_index_to_stop;
+	     raw_idx++) {
+		/* The logical "&" acts as a modulo operation since buf_count
+		 * must be a power of two.
+		 */
+		const u32 buf_idx = raw_idx & (ring_buf_cnt - 1);
+
+		new_sample_buf =
+			(u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes];
+
+		kbasep_hwcnt_backend_csf_accumulate_sample(
+			&backend_csf->phys_layout, buf_dump_bytes,
+			backend_csf->accum_buf, old_sample_buf, new_sample_buf,
+			clearing_samples);
+
+		old_sample_buf = new_sample_buf;
+	}
+
+	/* Save the newest buffer as the old buffer for next time. */
+	memcpy(backend_csf->old_sample_buf, new_sample_buf, buf_dump_bytes);
+
+	/* Reset the prfcnt_en header on each sample before releasing them. */
+	for (raw_idx = extract_index_to_start; raw_idx != insert_index_to_stop;
+	     raw_idx++) {
+		const u32 buf_idx = raw_idx & (ring_buf_cnt - 1);
+		u32 *sample = (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes];
+
+		kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header(
+			backend_csf, sample);
+	}
+
+	/* Sync zeroed buffers to avoid coherency issues on future use. */
+	backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx,
+						 backend_csf->ring_buf,
+						 extract_index_to_start,
+						 insert_index_to_stop, false);
+
+	/* After consuming all samples between extract_idx and insert_idx,
+	 * set the raw extract index to insert_idx so that the sample buffers
+	 * can be released back to the ring buffer pool.
+	 */
+	backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+	backend_csf->info->csf_if->set_extract_index(
+		backend_csf->info->csf_if->ctx, insert_index_to_stop);
+	backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
+					  flags);
+}
+
+static void kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+	struct kbase_hwcnt_backend_csf *backend_csf,
+	enum kbase_hwcnt_backend_csf_enable_state new_state)
+{
+	backend_csf->info->csf_if->assert_lock_held(
+		backend_csf->info->csf_if->ctx);
+
+	if (backend_csf->enable_state != new_state) {
+		backend_csf->enable_state = new_state;
+
+		wake_up(&backend_csf->enable_state_waitq);
+	}
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_dump_worker() - HWC dump worker.
+ * @work: Work structure.
+ *
+ * To accumulate all available samples in the ring buffer when a request has
+ * been done.
+ *
+ */
+static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work)
+{
+	unsigned long flags;
+	struct kbase_hwcnt_backend_csf *backend_csf;
+	u32 insert_index_to_acc;
+	u32 extract_index;
+	u32 insert_index;
+
+	WARN_ON(!work);
+	backend_csf = container_of(work, struct kbase_hwcnt_backend_csf,
+				   hwc_dump_work);
+	backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+	/* Assert the backend is not destroyed. */
+	WARN_ON(backend_csf != backend_csf->info->backend);
+
+	/* The backend was disabled or had an error while the worker was being
+	 * launched.
+	 */
+	if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) {
+		WARN_ON(backend_csf->dump_state !=
+			KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE);
+		WARN_ON(!completion_done(&backend_csf->dump_completed));
+		backend_csf->info->csf_if->unlock(
+			backend_csf->info->csf_if->ctx, flags);
+		return;
+	}
+
+	WARN_ON(backend_csf->dump_state !=
+		KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED);
+
+	backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING;
+	insert_index_to_acc = backend_csf->insert_index_to_accumulate;
+
+	/* Read the raw extract and insert indexes from the CSF interface. */
+	backend_csf->info->csf_if->get_indexes(backend_csf->info->csf_if->ctx,
+					       &extract_index, &insert_index);
+
+	backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
+					  flags);
+
+	/* Accumulate up to the insert we grabbed at the prfcnt request
+	 * interrupt.
+	 */
+	kbasep_hwcnt_backend_csf_accumulate_samples(backend_csf, extract_index,
+						    insert_index_to_acc);
+
+	/* Copy to the user buffer so if a threshold interrupt fires
+	 * between now and get(), the accumulations are untouched.
+	 */
+	kbasep_hwcnt_backend_csf_update_user_sample(backend_csf);
+
+	/* Dump done, set state back to COMPLETED for next request. */
+	backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+	/* Assert the backend is not destroyed. */
+	WARN_ON(backend_csf != backend_csf->info->backend);
+
+	/* The backend was disabled or had an error while we were accumulating.
+	 */
+	if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) {
+		WARN_ON(backend_csf->dump_state !=
+			KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE);
+		WARN_ON(!completion_done(&backend_csf->dump_completed));
+		backend_csf->info->csf_if->unlock(
+			backend_csf->info->csf_if->ctx, flags);
+		return;
+	}
+
+	WARN_ON(backend_csf->dump_state !=
+		KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING);
+
+	/* Our work here is done - set the wait object and unblock waiters. */
+	backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED;
+	complete_all(&backend_csf->dump_completed);
+	backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
+					  flags);
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_threshold_worker() - Threshold worker.
+ *
+ * @work: Work structure.
+ *
+ * Called when a HWC threshold interrupt raised to consume all available samples
+ * in the ring buffer.
+ */
+static void kbasep_hwcnt_backend_csf_threshold_worker(struct work_struct *work)
+{
+	unsigned long flags;
+	struct kbase_hwcnt_backend_csf *backend_csf;
+	u32 extract_index;
+	u32 insert_index;
+
+	WARN_ON(!work);
+
+	backend_csf = container_of(work, struct kbase_hwcnt_backend_csf,
+				   hwc_threshold_work);
+	backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+
+	/* Assert the backend is not destroyed. */
+	WARN_ON(backend_csf != backend_csf->info->backend);
+
+	/* Read the raw extract and insert indexes from the CSF interface. */
+	backend_csf->info->csf_if->get_indexes(backend_csf->info->csf_if->ctx,
+					       &extract_index, &insert_index);
+
+	/* The backend was disabled or had an error while the worker was being
+	 * launched.
+	 */
+	if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) {
+		backend_csf->info->csf_if->unlock(
+			backend_csf->info->csf_if->ctx, flags);
+		return;
+	}
+
+	/* Early out if we are not in the IDLE state or COMPLETED state, as this
+	 * means a concurrent dump is in progress and we don't want to
+	 * interfere.
+	 */
+	if ((backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) &&
+	    (backend_csf->dump_state !=
+	     KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED)) {
+		backend_csf->info->csf_if->unlock(
+			backend_csf->info->csf_if->ctx, flags);
+		return;
+	}
+	backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
+					  flags);
+
+	/* Accumulate everything we possibly can. We grabbed the insert index
+	 * immediately after we acquired the lock but before we checked whether
+	 * a concurrent dump was triggered. This ensures that if a concurrent
+	 * dump was triggered between releasing the lock and now, we know for a
+	 * fact that our insert will not exceed the concurrent dump's
+	 * insert_to_accumulate, so we don't risk accumulating too much data.
+	 */
+	kbasep_hwcnt_backend_csf_accumulate_samples(backend_csf, extract_index,
+						    insert_index);
+
+	/* No need to wake up anything since it is not a user dump request. */
+}
+
+static void kbase_hwcnt_backend_csf_submit_dump_worker(
+	struct kbase_hwcnt_backend_csf_info *csf_info)
+{
+	u32 extract_index;
+
+	WARN_ON(!csf_info);
+	csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
+
+	WARN_ON(!kbasep_hwcnt_backend_csf_backend_exists(csf_info));
+	WARN_ON(csf_info->backend->enable_state !=
+		KBASE_HWCNT_BACKEND_CSF_ENABLED);
+	WARN_ON(csf_info->backend->dump_state !=
+		KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT);
+
+	/* Save insert index now so that the dump worker only accumulates the
+	 * HWC data associated with this request. Extract index is not stored
+	 * as that needs to be checked when accumulating to prevent re-reading
+	 * buffers that have already been read and returned to the GPU.
+	 */
+	csf_info->csf_if->get_indexes(
+		csf_info->csf_if->ctx, &extract_index,
+		&csf_info->backend->insert_index_to_accumulate);
+	csf_info->backend->dump_state =
+		KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED;
+
+	/* Submit the accumulator task into the work queue. */
+	queue_work(csf_info->backend->hwc_dump_workq,
+		   &csf_info->backend->hwc_dump_work);
+}
+
+static void kbasep_hwcnt_backend_csf_get_physical_enable(
+	struct kbase_hwcnt_backend_csf *backend_csf,
+	const struct kbase_hwcnt_enable_map *enable_map,
+	struct kbase_hwcnt_backend_csf_if_enable *enable)
+{
+	enum kbase_hwcnt_physical_set phys_counter_set;
+	struct kbase_hwcnt_physical_enable_map phys_enable_map;
+
+	kbase_hwcnt_gpu_enable_map_to_physical(&phys_enable_map, enable_map);
+
+	/* process the enable_map to guarantee the block header is enabled which
+	 * is needed for delta calculation.
+	 */
+	kbasep_hwcnt_backend_csf_process_enable_map(&phys_enable_map);
+
+	kbase_hwcnt_gpu_set_to_physical(&phys_counter_set,
+					backend_csf->info->counter_set);
+
+	/* Use processed enable_map to enable HWC in HW level. */
+	enable->fe_bm = phys_enable_map.fe_bm;
+	enable->shader_bm = phys_enable_map.shader_bm;
+	enable->tiler_bm = phys_enable_map.tiler_bm;
+	enable->mmu_l2_bm = phys_enable_map.mmu_l2_bm;
+	enable->counter_set = phys_counter_set;
+	enable->clk_enable_map = enable_map->clk_enable_map;
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */
+static int kbasep_hwcnt_backend_csf_dump_enable_nolock(
+	struct kbase_hwcnt_backend *backend,
+	const struct kbase_hwcnt_enable_map *enable_map)
+{
+	struct kbase_hwcnt_backend_csf *backend_csf =
+		(struct kbase_hwcnt_backend_csf *)backend;
+	struct kbase_hwcnt_backend_csf_if_enable enable;
+
+	if (!backend_csf || !enable_map ||
+	    (enable_map->metadata != backend_csf->info->metadata))
+		return -EINVAL;
+
+	backend_csf->info->csf_if->assert_lock_held(
+		backend_csf->info->csf_if->ctx);
+
+	kbasep_hwcnt_backend_csf_get_physical_enable(backend_csf, enable_map,
+						     &enable);
+
+	/* enable_state should be DISABLED before we transfer it to enabled */
+	if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED)
+		return -EIO;
+
+	backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE;
+	WARN_ON(!completion_done(&backend_csf->dump_completed));
+	kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+		backend_csf, KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED);
+
+	backend_csf->info->csf_if->dump_enable(backend_csf->info->csf_if->ctx,
+					       backend_csf->ring_buf, &enable);
+
+	kbasep_hwcnt_backend_csf_cc_initial_sample(backend_csf, enable_map);
+
+	return 0;
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_dump_enable_fn */
+static int kbasep_hwcnt_backend_csf_dump_enable(
+	struct kbase_hwcnt_backend *backend,
+	const struct kbase_hwcnt_enable_map *enable_map)
+{
+	int errcode;
+	unsigned long flags;
+	struct kbase_hwcnt_backend_csf *backend_csf =
+		(struct kbase_hwcnt_backend_csf *)backend;
+
+	if (!backend_csf)
+		return -EINVAL;
+
+	backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+	errcode = kbasep_hwcnt_backend_csf_dump_enable_nolock(backend,
+							      enable_map);
+	backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
+					  flags);
+	return errcode;
+}
+
+static void kbasep_hwcnt_backend_csf_wait_enable_transition_complete(
+	struct kbase_hwcnt_backend_csf *backend_csf, unsigned long *lock_flags)
+{
+	backend_csf->info->csf_if->assert_lock_held(
+		backend_csf->info->csf_if->ctx);
+
+	while ((backend_csf->enable_state ==
+		KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) ||
+	       (backend_csf->enable_state ==
+		KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED)) {
+		backend_csf->info->csf_if->unlock(
+			backend_csf->info->csf_if->ctx, *lock_flags);
+
+		wait_event(
+			backend_csf->enable_state_waitq,
+			(backend_csf->enable_state !=
+			 KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) &&
+				(backend_csf->enable_state !=
+				 KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED));
+
+		backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx,
+						lock_flags);
+	}
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_dump_disable_fn */
+static void
+kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend)
+{
+	unsigned long flags;
+	struct kbase_hwcnt_backend_csf *backend_csf =
+		(struct kbase_hwcnt_backend_csf *)backend;
+	bool do_disable = false;
+
+	WARN_ON(!backend_csf);
+
+	backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+
+	/* Make sure we wait until any previous enable or disable have completed
+	 * before doing anything.
+	 */
+	kbasep_hwcnt_backend_csf_wait_enable_transition_complete(backend_csf,
+								 &flags);
+
+	if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_DISABLED ||
+	    backend_csf->enable_state ==
+		    KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) {
+		/* If we are already disabled or in an unrecoverable error
+		 * state, there is nothing for us to do.
+		 */
+		backend_csf->info->csf_if->unlock(
+			backend_csf->info->csf_if->ctx, flags);
+		return;
+	}
+
+	if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) {
+		kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+			backend_csf,
+			KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED);
+		backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE;
+		complete_all(&backend_csf->dump_completed);
+		/* Only disable if we were previously enabled - in all other
+		 * cases the call to disable will have already been made.
+		 */
+		do_disable = true;
+	}
+
+	WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE);
+	WARN_ON(!completion_done(&backend_csf->dump_completed));
+
+	backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
+					  flags);
+
+	/* Block until any async work has completed. We have transitioned out of
+	 * the ENABLED state so we can guarantee no new work will concurrently
+	 * be submitted.
+	 */
+	flush_workqueue(backend_csf->hwc_dump_workq);
+
+	backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+
+	if (do_disable)
+		backend_csf->info->csf_if->dump_disable(
+			backend_csf->info->csf_if->ctx);
+
+	kbasep_hwcnt_backend_csf_wait_enable_transition_complete(backend_csf,
+								 &flags);
+
+	switch (backend_csf->enable_state) {
+	case KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER:
+		kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+			backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED);
+		break;
+	case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER:
+		kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+			backend_csf,
+			KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR);
+		break;
+	default:
+		WARN_ON(true);
+		break;
+	}
+
+	backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
+					  flags);
+
+	/* After disable, zero the header of all buffers in the ring buffer back
+	 * to 0 to prepare for the next enable.
+	 */
+	kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(backend_csf);
+
+	/* Sync zeroed buffers to avoid coherency issues on future use. */
+	backend_csf->info->csf_if->ring_buf_sync(
+		backend_csf->info->csf_if->ctx, backend_csf->ring_buf, 0,
+		backend_csf->info->ring_buf_cnt, false);
+
+	/* Reset accumulator, old_sample_buf and user_sample to all-0 to prepare
+	 * for next enable.
+	 */
+	kbasep_hwcnt_backend_csf_reset_internal_buffers(backend_csf);
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_dump_request_fn */
+static int
+kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend,
+				      u64 *dump_time_ns)
+{
+	unsigned long flags;
+	struct kbase_hwcnt_backend_csf *backend_csf =
+		(struct kbase_hwcnt_backend_csf *)backend;
+	bool do_request = false;
+
+	if (!backend_csf)
+		return -EINVAL;
+
+	backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+
+	/* If we're transitioning to enabled there's nothing to accumulate, and
+	 * the user dump buffer is already zeroed. We can just short circuit to
+	 * the DUMP_COMPLETED state.
+	 */
+	if (backend_csf->enable_state ==
+	    KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) {
+		backend_csf->dump_state =
+			KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED;
+		*dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend);
+		kbasep_hwcnt_backend_csf_cc_update(backend_csf);
+		backend_csf->info->csf_if->unlock(
+			backend_csf->info->csf_if->ctx, flags);
+		return 0;
+	}
+
+	/* Otherwise, make sure we're already enabled. */
+	if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) {
+		backend_csf->info->csf_if->unlock(
+			backend_csf->info->csf_if->ctx, flags);
+		return -EIO;
+	}
+
+	/* Make sure that this is either the first request since enable or the
+	 * previous dump has completed, so we can avoid midway through a dump.
+	 */
+	if ((backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) &&
+	    (backend_csf->dump_state !=
+	     KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED)) {
+		backend_csf->info->csf_if->unlock(
+			backend_csf->info->csf_if->ctx, flags);
+		/* HWC is disabled or another dump is ongoing, or we are on
+		 * fault.
+		 */
+		return -EIO;
+	}
+
+	/* Reset the completion so dump_wait() has something to wait on. */
+	reinit_completion(&backend_csf->dump_completed);
+
+	if ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) &&
+	    !backend_csf->info->fw_in_protected_mode) {
+		/* Only do the request if we are fully enabled and not in
+		 * protected mode.
+		 */
+		backend_csf->dump_state =
+			KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED;
+		do_request = true;
+	} else {
+		/* Skip the request and waiting for ack and go straight to
+		 * checking the insert and kicking off the worker to do the dump
+		 */
+		backend_csf->dump_state =
+			KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT;
+	}
+
+	/* CSF firmware might enter protected mode now, but still call request.
+	 * That is fine, as we changed state while holding the lock, so the
+	 * protected mode enter function will query the insert and launch the
+	 * dumping worker.
+	 * At some point we will get the dump request ACK saying a dump is done,
+	 * but we can ignore it if we are not in the REQUESTED state and process
+	 * it in next round dumping worker.
+	 */
+
+	*dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend);
+	kbasep_hwcnt_backend_csf_cc_update(backend_csf);
+
+	if (do_request)
+		backend_csf->info->csf_if->dump_request(
+			backend_csf->info->csf_if->ctx);
+	else
+		kbase_hwcnt_backend_csf_submit_dump_worker(backend_csf->info);
+
+	backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
+					  flags);
+	return 0;
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_dump_wait_fn */
+static int
+kbasep_hwcnt_backend_csf_dump_wait(struct kbase_hwcnt_backend *backend)
+{
+	unsigned long flags;
+	struct kbase_hwcnt_backend_csf *backend_csf =
+		(struct kbase_hwcnt_backend_csf *)backend;
+	int errcode;
+
+	if (!backend_csf)
+		return -EINVAL;
+
+	wait_for_completion(&backend_csf->dump_completed);
+
+	backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+	/* Make sure the last dump actually succeeded. */
+	errcode = (backend_csf->dump_state ==
+		   KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) ?
+			  0 :
+			  -EIO;
+	backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
+					  flags);
+
+	return errcode;
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_dump_clear_fn */
+static int
+kbasep_hwcnt_backend_csf_dump_clear(struct kbase_hwcnt_backend *backend)
+{
+	struct kbase_hwcnt_backend_csf *backend_csf =
+		(struct kbase_hwcnt_backend_csf *)backend;
+	int errcode;
+	u64 ts;
+
+	if (!backend_csf)
+		return -EINVAL;
+
+	/* Request a dump so we can clear all current counters. */
+	errcode = kbasep_hwcnt_backend_csf_dump_request(backend, &ts);
+	if (!errcode)
+		/* Wait for the manual dump or auto dump to be done and
+		 * accumulator to be updated.
+		 */
+		errcode = kbasep_hwcnt_backend_csf_dump_wait(backend);
+
+	return errcode;
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_dump_get_fn */
+static int kbasep_hwcnt_backend_csf_dump_get(
+	struct kbase_hwcnt_backend *backend,
+	struct kbase_hwcnt_dump_buffer *dst,
+	const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate)
+{
+	struct kbase_hwcnt_backend_csf *backend_csf =
+		(struct kbase_hwcnt_backend_csf *)backend;
+	int ret;
+	size_t clk;
+
+	if (!backend_csf || !dst || !dst_enable_map ||
+	    (backend_csf->info->metadata != dst->metadata) ||
+	    (dst_enable_map->metadata != dst->metadata))
+		return -EINVAL;
+
+	kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) {
+		if (!kbase_hwcnt_clk_enable_map_enabled(
+			    dst_enable_map->clk_enable_map, clk))
+			continue;
+
+		/* Extract elapsed cycle count for each clock domain. */
+		dst->clk_cnt_buf[clk] = backend_csf->cycle_count_elapsed[clk];
+	}
+
+	/* We just return the user buffer without checking the current state,
+	 * as it is undefined to call this function without a prior succeeding
+	 * one to dump_wait().
+	 */
+	ret = kbase_hwcnt_csf_dump_get(dst, backend_csf->to_user_buf,
+				       dst_enable_map, accumulate);
+
+	return ret;
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_destroy() - Destroy CSF backend.
+ * @backend_csf: Pointer to CSF backend to destroy.
+ *
+ * Can be safely called on a backend in any state of partial construction.
+ *
+ */
+static void
+kbasep_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_csf *backend_csf)
+{
+	if (!backend_csf)
+		return;
+
+	destroy_workqueue(backend_csf->hwc_dump_workq);
+
+	backend_csf->info->csf_if->ring_buf_free(backend_csf->info->csf_if->ctx,
+						 backend_csf->ring_buf);
+
+	kfree(backend_csf->accum_buf);
+	backend_csf->accum_buf = NULL;
+
+	kfree(backend_csf->old_sample_buf);
+	backend_csf->old_sample_buf = NULL;
+
+	kfree(backend_csf->to_user_buf);
+	backend_csf->to_user_buf = NULL;
+
+	kfree(backend_csf);
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_create() - Create a CSF backend instance.
+ *
+ * @csf_info:    Non-NULL pointer to backend info.
+ * @out_backend: Non-NULL pointer to where backend is stored on success.
+ * Return: 0 on success, else error code.
+ */
+static int
+kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info,
+				struct kbase_hwcnt_backend_csf **out_backend)
+{
+	struct kbase_hwcnt_backend_csf *backend_csf = NULL;
+	int errcode = -ENOMEM;
+
+	WARN_ON(!csf_info);
+	WARN_ON(!out_backend);
+
+	backend_csf = kzalloc(sizeof(*backend_csf), GFP_KERNEL);
+	if (!backend_csf)
+		goto alloc_error;
+
+	backend_csf->info = csf_info;
+	kbasep_hwcnt_backend_csf_init_layout(&csf_info->prfcnt_info,
+					     &backend_csf->phys_layout);
+
+	backend_csf->accum_buf =
+		kzalloc(csf_info->prfcnt_info.dump_bytes, GFP_KERNEL);
+	if (!backend_csf->accum_buf)
+		goto err_alloc_acc_buf;
+
+	backend_csf->old_sample_buf =
+		kzalloc(csf_info->prfcnt_info.dump_bytes, GFP_KERNEL);
+	if (!backend_csf->old_sample_buf)
+		goto err_alloc_pre_sample_buf;
+
+	backend_csf->to_user_buf =
+		kzalloc(csf_info->prfcnt_info.dump_bytes, GFP_KERNEL);
+	if (!backend_csf->to_user_buf)
+		goto err_alloc_user_sample_buf;
+
+	errcode = csf_info->csf_if->ring_buf_alloc(
+		csf_info->csf_if->ctx, csf_info->ring_buf_cnt,
+		&backend_csf->ring_buf_cpu_base, &backend_csf->ring_buf);
+	if (errcode)
+		goto err_ring_buf_alloc;
+
+	/* Zero all performance enable header to prepare for first enable. */
+	kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(backend_csf);
+
+	/* Sync zeroed buffers to avoid coherency issues on use. */
+	backend_csf->info->csf_if->ring_buf_sync(
+		backend_csf->info->csf_if->ctx, backend_csf->ring_buf, 0,
+		backend_csf->info->ring_buf_cnt, false);
+
+	init_completion(&backend_csf->dump_completed);
+
+	init_waitqueue_head(&backend_csf->enable_state_waitq);
+
+	/* Allocate a single threaded work queue for dump worker and threshold
+	 * worker.
+	 */
+	backend_csf->hwc_dump_workq =
+		alloc_workqueue("mali_hwc_dump_wq", WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (!backend_csf->hwc_dump_workq)
+		goto err_alloc_workqueue;
+
+	INIT_WORK(&backend_csf->hwc_dump_work,
+		  kbasep_hwcnt_backend_csf_dump_worker);
+	INIT_WORK(&backend_csf->hwc_threshold_work,
+		  kbasep_hwcnt_backend_csf_threshold_worker);
+
+	backend_csf->enable_state = KBASE_HWCNT_BACKEND_CSF_DISABLED;
+	backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE;
+	complete_all(&backend_csf->dump_completed);
+
+	*out_backend = backend_csf;
+	return 0;
+
+	destroy_workqueue(backend_csf->hwc_dump_workq);
+err_alloc_workqueue:
+	backend_csf->info->csf_if->ring_buf_free(backend_csf->info->csf_if->ctx,
+						 backend_csf->ring_buf);
+err_ring_buf_alloc:
+	kfree(backend_csf->to_user_buf);
+	backend_csf->to_user_buf = NULL;
+err_alloc_user_sample_buf:
+	kfree(backend_csf->old_sample_buf);
+	backend_csf->old_sample_buf = NULL;
+err_alloc_pre_sample_buf:
+	kfree(backend_csf->accum_buf);
+	backend_csf->accum_buf = NULL;
+err_alloc_acc_buf:
+	kfree(backend_csf);
+alloc_error:
+	return errcode;
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_init_fn */
+static int
+kbasep_hwcnt_backend_csf_init(const struct kbase_hwcnt_backend_info *info,
+			      struct kbase_hwcnt_backend **out_backend)
+{
+	unsigned long flags;
+	struct kbase_hwcnt_backend_csf *backend_csf = NULL;
+	struct kbase_hwcnt_backend_csf_info *csf_info =
+		(struct kbase_hwcnt_backend_csf_info *)info;
+	int errcode;
+	bool success = false;
+
+	if (!info || !out_backend)
+		return -EINVAL;
+
+	/* Create the backend. */
+	errcode = kbasep_hwcnt_backend_csf_create(csf_info, &backend_csf);
+	if (errcode)
+		return errcode;
+
+	/* If it was not created before, attach it to csf_info.
+	 * Use spin lock to avoid concurrent initialization.
+	 */
+	backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+	if (csf_info->backend == NULL) {
+		csf_info->backend = backend_csf;
+		*out_backend = (struct kbase_hwcnt_backend *)backend_csf;
+		success = true;
+		if (csf_info->unrecoverable_error_happened)
+			backend_csf->enable_state =
+				KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR;
+	}
+	backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
+					  flags);
+
+	/* Destroy the new created backend if the backend has already created
+	 * before. In normal case, this won't happen if the client call init()
+	 * function properly.
+	 */
+	if (!success) {
+		kbasep_hwcnt_backend_csf_destroy(backend_csf);
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_term_fn */
+static void kbasep_hwcnt_backend_csf_term(struct kbase_hwcnt_backend *backend)
+{
+	unsigned long flags;
+	struct kbase_hwcnt_backend_csf *backend_csf =
+		(struct kbase_hwcnt_backend_csf *)backend;
+
+	if (!backend)
+		return;
+
+	kbasep_hwcnt_backend_csf_dump_disable(backend);
+
+	/* Set the backend in csf_info to NULL so we won't handle any external
+	 * notification anymore since we are terminating.
+	 */
+	backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+	backend_csf->info->backend = NULL;
+	backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx,
+					  flags);
+
+	kbasep_hwcnt_backend_csf_destroy(backend_csf);
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_info_destroy() - Destroy a CSF backend info.
+ * @info: Pointer to info to destroy.
+ *
+ * Can be safely called on a backend info in any state of partial construction.
+ *
+ */
+static void kbasep_hwcnt_backend_csf_info_destroy(
+	const struct kbase_hwcnt_backend_csf_info *info)
+{
+	if (!info)
+		return;
+
+	/* The backend should be destroyed before the info object destroy. */
+	WARN_ON(info->backend != NULL);
+
+	/* The metadata should be destroyed before the info object destroy. */
+	WARN_ON(info->metadata != NULL);
+
+	kfree(info);
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_info_create() - Create a CSF backend info.
+ *
+ * @csf_if:        Non-NULL pointer to a hwcnt backend CSF interface structure
+ *                 used to create backend interface.
+ * @ring_buf_cnt: The buffer count of the CSF hwcnt backend ring buffer.
+ *                MUST be power of 2.
+ * @out_info:     Non-NULL pointer to where info is stored on success.
+ * @return 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_csf_info_create(
+	struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt,
+	const struct kbase_hwcnt_backend_csf_info **out_info)
+{
+	struct kbase_hwcnt_backend_csf_info *info = NULL;
+
+	WARN_ON(!csf_if);
+	WARN_ON(!out_info);
+	WARN_ON(!is_power_of_2(ring_buf_cnt));
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
+#if defined(CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY)
+	info->counter_set = KBASE_HWCNT_SET_SECONDARY;
+#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY)
+	info->counter_set = KBASE_HWCNT_SET_TERTIARY;
+#else
+	/* Default to primary */
+	info->counter_set = KBASE_HWCNT_SET_PRIMARY;
+#endif
+
+	info->backend = NULL;
+	info->csf_if = csf_if;
+	info->ring_buf_cnt = ring_buf_cnt;
+	info->fw_in_protected_mode = false;
+	info->unrecoverable_error_happened = false;
+
+	*out_info = info;
+
+	return 0;
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_metadata_fn */
+static const struct kbase_hwcnt_metadata *
+kbasep_hwcnt_backend_csf_metadata(const struct kbase_hwcnt_backend_info *info)
+{
+	if (!info)
+		return NULL;
+
+	WARN_ON(!((const struct kbase_hwcnt_backend_csf_info *)info)->metadata);
+
+	return ((const struct kbase_hwcnt_backend_csf_info *)info)->metadata;
+}
+
+static void kbasep_hwcnt_backend_csf_handle_unrecoverable_error(
+	struct kbase_hwcnt_backend_csf *backend_csf)
+{
+	bool do_disable = false;
+
+	backend_csf->info->csf_if->assert_lock_held(
+		backend_csf->info->csf_if->ctx);
+
+	/* We are already in or transitioning to the unrecoverable error state.
+	 * Early out.
+	 */
+	if ((backend_csf->enable_state ==
+	     KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) ||
+	    (backend_csf->enable_state ==
+	     KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER))
+		return;
+
+	/* If we are disabled, we know we have no pending workers, so skip the
+	 * waiting state.
+	 */
+	if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_DISABLED) {
+		kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+			backend_csf,
+			KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR);
+		return;
+	}
+
+	/* Trigger a disable only if we are not already transitioning to
+	 * disabled, we don't want to disable twice if an unrecoverable error
+	 * happens while we are disabling.
+	 */
+	do_disable = (backend_csf->enable_state !=
+		      KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED);
+
+	kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+		backend_csf,
+		KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER);
+
+	/* Transition the dump to the IDLE state and unblock any waiters. The
+	 * IDLE state signifies an error.
+	 */
+	backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE;
+	complete_all(&backend_csf->dump_completed);
+
+	/* Trigger a disable only if we are not already transitioning to
+	 * disabled, - we don't want to disable twice if an unrecoverable error
+	 * happens while we are disabling.
+	 */
+	if (do_disable)
+		backend_csf->info->csf_if->dump_disable(
+			backend_csf->info->csf_if->ctx);
+}
+
+static void kbasep_hwcnt_backend_csf_handle_recoverable_error(
+	struct kbase_hwcnt_backend_csf *backend_csf)
+{
+	backend_csf->info->csf_if->assert_lock_held(
+		backend_csf->info->csf_if->ctx);
+
+	switch (backend_csf->enable_state) {
+	case KBASE_HWCNT_BACKEND_CSF_DISABLED:
+	case KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER:
+	case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED:
+	case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR:
+	case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER:
+		/* Already disabled or disabling, or in an unrecoverable error.
+		 * Nothing to be done to handle the error.
+		 */
+		return;
+	case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED:
+		/* A seemingly recoverable error that occurs while we are
+		 * transitioning to enabled is probably unrecoverable.
+		 */
+		kbasep_hwcnt_backend_csf_handle_unrecoverable_error(
+			backend_csf);
+		return;
+	case KBASE_HWCNT_BACKEND_CSF_ENABLED:
+		/* Start transitioning to the disabled state. We can't wait for
+		 * it as this recoverable error might be triggered from an
+		 * interrupt. The wait will be done in the eventual call to
+		 * disable().
+		 */
+		kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+			backend_csf,
+			KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED);
+		/* Transition the dump to the IDLE state and unblock any
+		 * waiters. The IDLE state signifies an error.
+		 */
+		backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE;
+		complete_all(&backend_csf->dump_completed);
+
+		backend_csf->info->csf_if->dump_disable(
+			backend_csf->info->csf_if->ctx);
+		return;
+	}
+}
+
+void kbase_hwcnt_backend_csf_protm_entered(
+	struct kbase_hwcnt_backend_interface *iface)
+{
+	struct kbase_hwcnt_backend_csf_info *csf_info =
+		(struct kbase_hwcnt_backend_csf_info *)iface->info;
+
+	csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
+	csf_info->fw_in_protected_mode = true;
+
+	/* Call on_prfcnt_sample() to trigger collection of the protected mode
+	 * entry auto-sample if there is currently a pending dump request.
+	 */
+	kbase_hwcnt_backend_csf_on_prfcnt_sample(iface);
+}
+
+void kbase_hwcnt_backend_csf_protm_exited(
+	struct kbase_hwcnt_backend_interface *iface)
+{
+	struct kbase_hwcnt_backend_csf_info *csf_info;
+
+	csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+
+	csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
+	csf_info->fw_in_protected_mode = false;
+}
+
+void kbase_hwcnt_backend_csf_on_unrecoverable_error(
+	struct kbase_hwcnt_backend_interface *iface)
+{
+	unsigned long flags;
+	struct kbase_hwcnt_backend_csf_info *csf_info;
+
+	csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+
+	csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags);
+	csf_info->unrecoverable_error_happened = true;
+	/* Early out if the backend does not exist. */
+	if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) {
+		csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags);
+		return;
+	}
+
+	kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend);
+
+	csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags);
+}
+
+void kbase_hwcnt_backend_csf_on_before_reset(
+	struct kbase_hwcnt_backend_interface *iface)
+{
+	unsigned long flags;
+	struct kbase_hwcnt_backend_csf_info *csf_info;
+	struct kbase_hwcnt_backend_csf *backend_csf;
+
+	csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+
+	csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags);
+	csf_info->unrecoverable_error_happened = false;
+	/* Early out if the backend does not exist. */
+	if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) {
+		csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags);
+		return;
+	}
+	backend_csf = csf_info->backend;
+
+	if ((backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED) &&
+	    (backend_csf->enable_state !=
+	     KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR)) {
+		/* Before a reset occurs, we must either have been disabled
+		 * (else we lose data) or we should have encountered an
+		 * unrecoverable error. Either way, we will have disabled the
+		 * interface and waited for any workers that might have still
+		 * been in flight.
+		 * If not in these states, fire off one more disable to make
+		 * sure everything is turned off before the power is pulled.
+		 * We can't wait for this disable to complete, but it doesn't
+		 * really matter, the power is being pulled.
+		 */
+		kbasep_hwcnt_backend_csf_handle_unrecoverable_error(
+			csf_info->backend);
+	}
+
+	/* A reset is the only way to exit the unrecoverable error state */
+	if (backend_csf->enable_state ==
+	    KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) {
+		kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+			backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED);
+	}
+
+	csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags);
+}
+
+void kbase_hwcnt_backend_csf_on_prfcnt_sample(
+	struct kbase_hwcnt_backend_interface *iface)
+{
+	struct kbase_hwcnt_backend_csf_info *csf_info;
+	struct kbase_hwcnt_backend_csf *backend_csf;
+
+	csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+	csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
+
+	/* Early out if the backend does not exist. */
+	if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info))
+		return;
+	backend_csf = csf_info->backend;
+
+	/* If the current state is not REQUESTED, this HWC sample will be
+	 * skipped and processed in next dump_request.
+	 */
+	if (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED)
+		return;
+	backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT;
+
+	kbase_hwcnt_backend_csf_submit_dump_worker(csf_info);
+}
+
+void kbase_hwcnt_backend_csf_on_prfcnt_threshold(
+	struct kbase_hwcnt_backend_interface *iface)
+{
+	struct kbase_hwcnt_backend_csf_info *csf_info;
+	struct kbase_hwcnt_backend_csf *backend_csf;
+
+	csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+	csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
+
+	/* Early out if the backend does not exist. */
+	if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info))
+		return;
+	backend_csf = csf_info->backend;
+
+	if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED)
+		/* Submit the threshold work into the work queue to consume the
+		 * available samples.
+		 */
+		queue_work(backend_csf->hwc_dump_workq,
+			   &backend_csf->hwc_threshold_work);
+}
+
+void kbase_hwcnt_backend_csf_on_prfcnt_overflow(
+	struct kbase_hwcnt_backend_interface *iface)
+{
+	struct kbase_hwcnt_backend_csf_info *csf_info;
+
+	csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+	csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
+
+	/* Early out if the backend does not exist. */
+	if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info))
+		return;
+
+	/* Called when an overflow occurs. We treat this as a recoverable error,
+	 * so we start transitioning to the disabled state.
+	 * We could try and handle it while enabled, but in a real system we
+	 * never expect an overflow to occur so there is no point implementing
+	 * complex recovery code when we can just turn ourselves off instead for
+	 * a while.
+	 */
+	kbasep_hwcnt_backend_csf_handle_recoverable_error(csf_info->backend);
+}
+
+void kbase_hwcnt_backend_csf_on_prfcnt_enable(
+	struct kbase_hwcnt_backend_interface *iface)
+{
+	struct kbase_hwcnt_backend_csf_info *csf_info;
+	struct kbase_hwcnt_backend_csf *backend_csf;
+
+	csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+	csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
+
+	/* Early out if the backend does not exist. */
+	if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info))
+		return;
+	backend_csf = csf_info->backend;
+
+	if (backend_csf->enable_state ==
+	    KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) {
+		kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+			backend_csf, KBASE_HWCNT_BACKEND_CSF_ENABLED);
+	} else if (backend_csf->enable_state ==
+		   KBASE_HWCNT_BACKEND_CSF_ENABLED) {
+		/* Unexpected, but we are already in the right state so just
+		 * ignore it.
+		 */
+	} else {
+		/* Unexpected state change, assume everything is broken until
+		 * we reset.
+		 */
+		kbasep_hwcnt_backend_csf_handle_unrecoverable_error(
+			csf_info->backend);
+	}
+}
+
+void kbase_hwcnt_backend_csf_on_prfcnt_disable(
+	struct kbase_hwcnt_backend_interface *iface)
+{
+	struct kbase_hwcnt_backend_csf_info *csf_info;
+	struct kbase_hwcnt_backend_csf *backend_csf;
+
+	csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+	csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
+
+	/* Early out if the backend does not exist. */
+	if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info))
+		return;
+	backend_csf = csf_info->backend;
+
+	if (backend_csf->enable_state ==
+	    KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED) {
+		kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+			backend_csf,
+			KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER);
+	} else if (backend_csf->enable_state ==
+		   KBASE_HWCNT_BACKEND_CSF_DISABLED) {
+		/* Unexpected, but we are already in the right state so just
+		 * ignore it.
+		 */
+	} else {
+		/* Unexpected state change, assume everything is broken until
+		 * we reset.
+		 */
+		kbasep_hwcnt_backend_csf_handle_unrecoverable_error(
+			csf_info->backend);
+	}
+}
+
+int kbase_hwcnt_backend_csf_metadata_init(
+	struct kbase_hwcnt_backend_interface *iface)
+{
+	int errcode;
+	struct kbase_hwcnt_backend_csf_info *csf_info;
+	struct kbase_hwcnt_gpu_info gpu_info;
+
+	if (!iface)
+		return -EINVAL;
+
+	csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+
+	WARN_ON(!csf_info->csf_if->get_prfcnt_info);
+
+	csf_info->csf_if->get_prfcnt_info(csf_info->csf_if->ctx,
+					  &csf_info->prfcnt_info);
+
+	/* The clock domain counts should not exceed the number of maximum
+	 * number of clock regulators.
+	 */
+	if (csf_info->prfcnt_info.clk_cnt > BASE_MAX_NR_CLOCKS_REGULATORS)
+		return -EIO;
+
+	gpu_info.l2_count = csf_info->prfcnt_info.l2_count;
+	gpu_info.core_mask = csf_info->prfcnt_info.core_mask;
+	gpu_info.clk_cnt = csf_info->prfcnt_info.clk_cnt;
+	errcode = kbase_hwcnt_csf_metadata_create(
+		&gpu_info, csf_info->counter_set, &csf_info->metadata);
+	if (errcode)
+		return errcode;
+
+	/*
+	 * Dump abstraction size should be exactly the same size and layout as
+	 * the physical dump size, for backwards compatibility.
+	 */
+	WARN_ON(csf_info->prfcnt_info.dump_bytes !=
+		csf_info->metadata->dump_buf_bytes);
+
+	return 0;
+}
+
+void kbase_hwcnt_backend_csf_metadata_term(
+	struct kbase_hwcnt_backend_interface *iface)
+{
+	struct kbase_hwcnt_backend_csf_info *csf_info;
+
+	if (!iface)
+		return;
+
+	csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+	if (csf_info->metadata) {
+		kbase_hwcnt_csf_metadata_destroy(csf_info->metadata);
+		csf_info->metadata = NULL;
+	}
+}
+
+int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if,
+				   u32 ring_buf_cnt,
+				   struct kbase_hwcnt_backend_interface *iface)
+{
+	int errcode;
+	const struct kbase_hwcnt_backend_csf_info *info = NULL;
+
+	if (!iface || !csf_if)
+		return -EINVAL;
+
+	/* The buffer count must be power of 2 */
+	if (!is_power_of_2(ring_buf_cnt))
+		return -EINVAL;
+
+	errcode = kbasep_hwcnt_backend_csf_info_create(csf_if, ring_buf_cnt,
+						       &info);
+	if (errcode)
+		return errcode;
+
+	iface->info = (struct kbase_hwcnt_backend_info *)info;
+	iface->metadata = kbasep_hwcnt_backend_csf_metadata;
+	iface->init = kbasep_hwcnt_backend_csf_init;
+	iface->term = kbasep_hwcnt_backend_csf_term;
+	iface->timestamp_ns = kbasep_hwcnt_backend_csf_timestamp_ns;
+	iface->dump_enable = kbasep_hwcnt_backend_csf_dump_enable;
+	iface->dump_enable_nolock = kbasep_hwcnt_backend_csf_dump_enable_nolock;
+	iface->dump_disable = kbasep_hwcnt_backend_csf_dump_disable;
+	iface->dump_clear = kbasep_hwcnt_backend_csf_dump_clear;
+	iface->dump_request = kbasep_hwcnt_backend_csf_dump_request;
+	iface->dump_wait = kbasep_hwcnt_backend_csf_dump_wait;
+	iface->dump_get = kbasep_hwcnt_backend_csf_dump_get;
+
+	return 0;
+}
+
+void kbase_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_interface *iface)
+{
+	if (!iface)
+		return;
+
+	kbasep_hwcnt_backend_csf_info_destroy(
+		(const struct kbase_hwcnt_backend_csf_info *)iface->info);
+	memset(iface, 0, sizeof(*iface));
+}
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf.h b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf.h
new file mode 100644
index 000000000000..75062744753a
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf.h
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/**
+ * Concrete implementation of mali_kbase_hwcnt_backend interface for CSF
+ * backend.
+ */
+
+#ifndef _KBASE_HWCNT_BACKEND_CSF_H_
+#define _KBASE_HWCNT_BACKEND_CSF_H_
+
+#include "mali_kbase_hwcnt_backend.h"
+#include "mali_kbase_hwcnt_backend_csf_if.h"
+
+/**
+ * kbase_hwcnt_backend_csf_create() - Create a CSF hardware counter backend
+ *                                    interface.
+ * @csf_if:       Non-NULL pointer to a hwcnt backend CSF interface structure
+ *                used to create backend interface.
+ * @ring_buf_cnt: The buffer count of CSF hwcnt backend, used when allocate ring
+ *                buffer, MUST be power of 2.
+ * @iface:        Non-NULL pointer to backend interface structure that is filled
+ *                in on creation success.
+ *
+ * Calls to iface->dump_enable_nolock() require the CSF Scheduler IRQ lock.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if,
+				   u32 ring_buf_cnt,
+				   struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_metadata_init() - Initialize the metadata for a CSF
+ *                                           hardware counter backend.
+ * @iface: Non-NULL pointer to backend interface structure
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_backend_csf_metadata_init(
+	struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_metadata_term() - Terminate the metadata for a CSF
+ *                                           hardware counter backend.
+ * @iface: Non-NULL pointer to backend interface structure.
+ */
+void kbase_hwcnt_backend_csf_metadata_term(
+	struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_destroy() - Destroy a CSF hardware counter backend
+ *                                     interface.
+ * @iface: Pointer to interface to destroy.
+ *
+ * Can be safely called on an all-zeroed interface, or on an already destroyed
+ * interface.
+ */
+void kbase_hwcnt_backend_csf_destroy(
+	struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_protm_entered() - CSF HWC backend function to receive
+ *                                           notification that protected mode
+ *                                           has been entered.
+ * @iface: Non-NULL pointer to HWC backend interface.
+ */
+void kbase_hwcnt_backend_csf_protm_entered(
+	struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_protm_exited() - CSF HWC backend function to receive
+ *                                          notification that protected mode has
+ *                                          been exited.
+ * @iface: Non-NULL pointer to HWC backend interface.
+ */
+void kbase_hwcnt_backend_csf_protm_exited(
+	struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_on_unrecoverable_error() - CSF HWC backend function
+ *                                                    called when unrecoverable
+ *                                                    errors are detected.
+ * @iface: Non-NULL pointer to HWC backend interface.
+ *
+ * This should be called on encountering errors that can only be recovered from
+ * with reset, or that may put HWC logic in state that could result in hang. For
+ * example, on bus error, or when FW becomes unresponsive.
+ */
+void kbase_hwcnt_backend_csf_on_unrecoverable_error(
+	struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_on_before_reset() - CSF HWC backend function to be
+ *                                             called immediately before a
+ *                                             reset. Takes us out of the
+ *                                             unrecoverable error state, if we
+ *                                             were in it.
+ * @iface: Non-NULL pointer to HWC backend interface.
+ */
+void kbase_hwcnt_backend_csf_on_before_reset(
+	struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_on_prfcnt_sample() - CSF performance counter sample
+ *                                              complete interrupt handler.
+ * @iface: Non-NULL pointer to HWC backend interface.
+ */
+void kbase_hwcnt_backend_csf_on_prfcnt_sample(
+	struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_on_prfcnt_threshold() - CSF performance counter
+ *                                                 buffer reach threshold
+ *                                                 interrupt handler.
+ * @iface: Non-NULL pointer to HWC backend interface.
+ */
+void kbase_hwcnt_backend_csf_on_prfcnt_threshold(
+	struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_on_prfcnt_overflow() - CSF performance counter buffer
+ *                                                overflow interrupt handler.
+ * @iface: Non-NULL pointer to HWC backend interface.
+ */
+void kbase_hwcnt_backend_csf_on_prfcnt_overflow(
+	struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_on_prfcnt_enable() - CSF performance counter enabled
+ *                                              interrupt handler.
+ * @iface: Non-NULL pointer to HWC backend interface.
+ */
+void kbase_hwcnt_backend_csf_on_prfcnt_enable(
+	struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_on_prfcnt_disable() - CSF performance counter
+ *                                               disabled interrupt handler.
+ * @iface: Non-NULL pointer to HWC backend interface.
+ */
+void kbase_hwcnt_backend_csf_on_prfcnt_disable(
+	struct kbase_hwcnt_backend_interface *iface);
+
+#endif /* _KBASE_HWCNT_BACKEND_CSF_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf_if.h b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf_if.h
new file mode 100644
index 000000000000..b4ddd31d3cb0
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf_if.h
@@ -0,0 +1,307 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Virtual interface for CSF hardware counter backend.
+ */
+
+#ifndef _KBASE_HWCNT_BACKEND_CSF_IF_H_
+#define _KBASE_HWCNT_BACKEND_CSF_IF_H_
+
+#include <linux/types.h>
+
+/**
+ * struct kbase_hwcnt_backend_csf_if_ctx - Opaque pointer to a CSF interface
+ *                                         context.
+ */
+struct kbase_hwcnt_backend_csf_if_ctx;
+
+/**
+ * struct kbase_hwcnt_backend_csf_if_ring_buf - Opaque pointer to a CSF
+ *                                              interface ring buffer.
+ */
+struct kbase_hwcnt_backend_csf_if_ring_buf;
+
+/**
+ * struct kbase_hwcnt_backend_csf_if_enable - enable hardware counter collection
+ *                                            structure.
+ * @fe_bm:          Front End counters selection bitmask.
+ * @shader_bm:      Shader counters selection bitmask.
+ * @tiler_bm:       Tiler counters selection bitmask.
+ * @mmu_l2_bm:      MMU_L2 counters selection bitmask.
+ * @counter_set:    The performance counter set to enable.
+ * @clk_enable_map: An array of u64 bitfields, each bit of which enables cycle
+ *                  counter for a given clock domain.
+ */
+struct kbase_hwcnt_backend_csf_if_enable {
+	u32 fe_bm;
+	u32 shader_bm;
+	u32 tiler_bm;
+	u32 mmu_l2_bm;
+	u8 counter_set;
+	u64 clk_enable_map;
+};
+
+/**
+ * struct kbase_hwcnt_backend_csf_if_prfcnt_info - Performance counter
+ *                                                 information.
+ * @dump_bytes:       Bytes of GPU memory required to perform a performance
+ *                    counter dump.
+ * @l2_count:         The MMU L2 cache count.
+ * @core_mask:        Shader core mask.
+ * @clk_cnt:          Clock domain count in the system.
+ * @clearing_samples: Indicates whether counters are cleared after each sample
+ *                    is taken.
+ */
+struct kbase_hwcnt_backend_csf_if_prfcnt_info {
+	size_t dump_bytes;
+	size_t l2_count;
+	u64 core_mask;
+	u8 clk_cnt;
+	bool clearing_samples;
+};
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_assert_lock_held_fn - Assert that the
+ *                                                          backend spinlock is
+ *                                                          held.
+ * @ctx: Non-NULL pointer to a CSF context.
+ */
+typedef void (*kbase_hwcnt_backend_csf_if_assert_lock_held_fn)(
+	struct kbase_hwcnt_backend_csf_if_ctx *ctx);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_lock_fn - Acquire backend spinlock.
+ *
+ * @ctx:   Non-NULL pointer to a CSF context.
+ * @flags: Pointer to the memory location that would store the previous
+ *         interrupt state.
+ */
+typedef void (*kbase_hwcnt_backend_csf_if_lock_fn)(
+	struct kbase_hwcnt_backend_csf_if_ctx *ctx, unsigned long *flags);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_unlock_fn - Release backend spinlock.
+ *
+ * @ctx:   Non-NULL pointer to a CSF context.
+ * @flags: Previously stored interrupt state when Scheduler interrupt
+ *         spinlock was acquired.
+ */
+typedef void (*kbase_hwcnt_backend_csf_if_unlock_fn)(
+	struct kbase_hwcnt_backend_csf_if_ctx *ctx, unsigned long flags);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn - Get performance
+ *                                                         counter information.
+ * @ctx:          Non-NULL pointer to a CSF context.
+ * @prfcnt_info:  Non-NULL pointer to struct where performance counter
+ *                information should be stored.
+ */
+typedef void (*kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn)(
+	struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+	struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn - Allocate a ring buffer
+ *                                                        for CSF interface.
+ * @ctx:           Non-NULL pointer to a CSF context.
+ * @buf_count:     The buffer count in the ring buffer to be allocated,
+ *                 MUST be power of 2.
+ * @cpu_dump_base: Non-NULL pointer to where ring buffer CPU base address is
+ *                 stored when success.
+ * @ring_buf:      Non-NULL pointer to where ring buffer is stored when success.
+ *
+ * A ring buffer is needed by the CSF interface to do manual HWC sample and
+ * automatic HWC samples, the buffer count in the ring buffer MUST be power
+ * of 2 to meet the hardware requirement.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int (*kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn)(
+	struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count,
+	void **cpu_dump_base,
+	struct kbase_hwcnt_backend_csf_if_ring_buf **ring_buf);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_ring_buf_sync_fn - Sync HWC dump buffers
+ *                                                       memory.
+ * @ctx:             Non-NULL pointer to a CSF context.
+ * @ring_buf:        Non-NULL pointer to the ring buffer.
+ * @buf_index_first: The first buffer index in the ring buffer to be synced,
+ *                   inclusive.
+ * @buf_index_last:  The last buffer index in the ring buffer to be synced,
+ *                   exclusive.
+ * @for_cpu:         The direction of sync to be applied, set to true when CPU
+ *                   cache needs invalidating before reading the buffer, and set
+ *                   to false after CPU writes to flush these before this memory
+ *                   is overwritten by the GPU.
+ *
+ * Flush cached HWC dump buffer data to ensure that all writes from GPU and CPU
+ * are correctly observed.
+ */
+typedef void (*kbase_hwcnt_backend_csf_if_ring_buf_sync_fn)(
+	struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+	struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
+	u32 buf_index_first, u32 buf_index_last, bool for_cpu);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_ring_buf_free_fn - Free a ring buffer for
+ *                                                       the CSF interface.
+ *
+ * @ctx:      Non-NULL pointer to a CSF interface context.
+ * @ring_buf: Non-NULL pointer to the ring buffer which to be freed.
+ */
+typedef void (*kbase_hwcnt_backend_csf_if_ring_buf_free_fn)(
+	struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+	struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_timestamp_ns_fn - Get the current
+ *                                                      timestamp of the CSF
+ *                                                      interface.
+ * @ctx: Non-NULL pointer to a CSF interface context.
+ *
+ * Return: CSF interface timestamp in nanoseconds.
+ */
+typedef u64 (*kbase_hwcnt_backend_csf_if_timestamp_ns_fn)(
+	struct kbase_hwcnt_backend_csf_if_ctx *ctx);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_dump_enable_fn - Setup and enable hardware
+ *                                                     counter in CSF interface.
+ * @ctx:      Non-NULL pointer to a CSF interface context.
+ * @ring_buf: Non-NULL pointer to the ring buffer which used to setup the HWC.
+ * @enable:   Non-NULL pointer to the enable map of HWC.
+ *
+ * Requires lock to be taken before calling.
+ */
+typedef void (*kbase_hwcnt_backend_csf_if_dump_enable_fn)(
+	struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+	struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
+	struct kbase_hwcnt_backend_csf_if_enable *enable);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_dump_disable_fn - Disable hardware counter
+ *                                                      in CSF interface.
+ * @ctx: Non-NULL pointer to a CSF interface context.
+ *
+ * Requires lock to be taken before calling.
+ */
+typedef void (*kbase_hwcnt_backend_csf_if_dump_disable_fn)(
+	struct kbase_hwcnt_backend_csf_if_ctx *ctx);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_dump_request_fn - Request a HWC dump.
+ *
+ * @ctx: Non-NULL pointer to the interface context.
+ *
+ * Requires lock to be taken before calling.
+ */
+typedef void (*kbase_hwcnt_backend_csf_if_dump_request_fn)(
+	struct kbase_hwcnt_backend_csf_if_ctx *ctx);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_get_indexes_fn - Get current extract and
+ *                                                     insert indexes of the
+ *                                                     ring buffer.
+ *
+ * @ctx:           Non-NULL pointer to a CSF interface context.
+ * @extract_index: Non-NULL pointer where current extract index to be saved.
+ * @insert_index:  Non-NULL pointer where current insert index to be saved.
+ *
+ * Requires lock to be taken before calling.
+ */
+typedef void (*kbase_hwcnt_backend_csf_if_get_indexes_fn)(
+	struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 *extract_index,
+	u32 *insert_index);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_set_extract_index_fn - Update the extract
+ *                                                           index of the ring
+ *                                                           buffer.
+ *
+ * @ctx:            Non-NULL pointer to a CSF interface context.
+ * @extract_index:  New extract index to be set.
+ *
+ * Requires lock to be taken before calling.
+ */
+typedef void (*kbase_hwcnt_backend_csf_if_set_extract_index_fn)(
+	struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 extract_index);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn - Get the current
+ *                                                             GPU cycle count.
+ * @ctx:            Non-NULL pointer to a CSF interface context.
+ * @cycle_counts:   Non-NULL pointer to an array where cycle counts to be saved,
+ *                  the array size should be at least as big as the number of
+ *                  clock domains returned by get_prfcnt_info interface.
+ * @clk_enable_map: An array of bitfields, each bit specifies an enabled clock
+ *                  domain.
+ *
+ * Requires lock to be taken before calling.
+ */
+typedef void (*kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn)(
+	struct kbase_hwcnt_backend_csf_if_ctx *ctx, u64 *cycle_counts,
+	u64 clk_enable_map);
+
+/**
+ * struct kbase_hwcnt_backend_csf_if - Hardware counter backend CSF virtual
+ *                                     interface.
+ * @ctx:                 CSF interface context.
+ * @assert_lock_held:    Function ptr to assert backend spinlock is held.
+ * @lock:                Function ptr to acquire backend spinlock.
+ * @unlock:              Function ptr to release backend spinlock.
+ * @get_prfcnt_info:     Function ptr to get performance counter related
+ *                       information.
+ * @ring_buf_alloc:      Function ptr to allocate ring buffer for CSF HWC.
+ * @ring_buf_sync:       Function ptr to sync ring buffer to CPU.
+ * @ring_buf_free:       Function ptr to free ring buffer for CSF HWC.
+ * @timestamp_ns:        Function ptr to get the current CSF interface
+ *                       timestamp.
+ * @dump_enable:         Function ptr to enable dumping.
+ * @dump_enable_nolock:  Function ptr to enable dumping while the
+ *                       backend-specific spinlock is already held.
+ * @dump_disable:        Function ptr to disable dumping.
+ * @dump_request:        Function ptr to request a dump.
+ * @get_indexes:         Function ptr to get extract and insert indexes of the
+ *                       ring buffer.
+ * @set_extract_index:   Function ptr to set extract index of ring buffer.
+ * @get_gpu_cycle_count: Function ptr to get the GPU cycle count.
+ */
+struct kbase_hwcnt_backend_csf_if {
+	struct kbase_hwcnt_backend_csf_if_ctx *ctx;
+	kbase_hwcnt_backend_csf_if_assert_lock_held_fn assert_lock_held;
+	kbase_hwcnt_backend_csf_if_lock_fn lock;
+	kbase_hwcnt_backend_csf_if_unlock_fn unlock;
+	kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn get_prfcnt_info;
+	kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn ring_buf_alloc;
+	kbase_hwcnt_backend_csf_if_ring_buf_sync_fn ring_buf_sync;
+	kbase_hwcnt_backend_csf_if_ring_buf_free_fn ring_buf_free;
+	kbase_hwcnt_backend_csf_if_timestamp_ns_fn timestamp_ns;
+	kbase_hwcnt_backend_csf_if_dump_enable_fn dump_enable;
+	kbase_hwcnt_backend_csf_if_dump_disable_fn dump_disable;
+	kbase_hwcnt_backend_csf_if_dump_request_fn dump_request;
+	kbase_hwcnt_backend_csf_if_get_indexes_fn get_indexes;
+	kbase_hwcnt_backend_csf_if_set_extract_index_fn set_extract_index;
+	kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn get_gpu_cycle_count;
+};
+
+#endif /* #define _KBASE_HWCNT_BACKEND_CSF_IF_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf_if_fw.c b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf_if_fw.c
new file mode 100644
index 000000000000..35f1225acaae
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf_if_fw.c
@@ -0,0 +1,786 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * CSF GPU HWC backend firmware interface APIs.
+ */
+
+#include <mali_kbase.h>
+#include <gpu/mali_kbase_gpu_regmap.h>
+#include <device/mali_kbase_device.h>
+#include "mali_kbase_hwcnt_gpu.h"
+#include "mali_kbase_hwcnt_types.h"
+#include <uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h>
+
+#include "csf/mali_kbase_csf_firmware.h"
+#include "mali_kbase_hwcnt_backend_csf_if_fw.h"
+#include "mali_kbase_hwaccess_time.h"
+#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
+
+#include <linux/log2.h>
+#include "mali_kbase_ccswe.h"
+#ifdef CONFIG_MALI_BIFROST_NO_MALI
+#include <backend/gpu/mali_kbase_model_dummy.h>
+#endif
+
+/** The number of nanoseconds in a second. */
+#define NSECS_IN_SEC 1000000000ull /* ns */
+
+/* Ring buffer virtual address start at 4GB  */
+#define KBASE_HWC_CSF_RING_BUFFER_VA_START (1ull << 32)
+
+/**
+ * struct kbase_hwcnt_backend_csf_if_fw_ring_buf - ring buffer for CSF interface
+ *                                                 used to save the manual and
+ *                                                 auto HWC samples from
+ *                                                 firmware.
+ * @gpu_dump_base: Starting GPU base address of the ring buffer.
+ * @cpu_dump_base: Starting CPU address for the mapping.
+ * @buf_count:     Buffer count in the ring buffer, MUST be power of 2.
+ * @as_nr:         Address space number for the memory mapping.
+ * @phys:          Physical memory allocation used by the mapping.
+ * @num_pages:     Size of the mapping, in memory pages.
+ */
+struct kbase_hwcnt_backend_csf_if_fw_ring_buf {
+	u64 gpu_dump_base;
+	void *cpu_dump_base;
+	size_t buf_count;
+	u32 as_nr;
+	struct tagged_addr *phys;
+	size_t num_pages;
+};
+
+/**
+ * struct kbase_hwcnt_backend_csf_if_fw_ctx - Firmware context for the CSF
+ *                                            interface, used to communicate
+ *                                            with firmware.
+ * @kbdev:              KBase device.
+ * @buf_bytes:	        The size in bytes for each buffer in the ring buffer.
+ * @clk_cnt:            The number of clock domains in the system.
+ *                      The maximum is 64.
+ * @rate_listener:      Clock rate listener callback state.
+ * @ccswe_shader_cores: Shader cores cycle count software estimator.
+ */
+struct kbase_hwcnt_backend_csf_if_fw_ctx {
+	struct kbase_device *kbdev;
+	size_t buf_bytes;
+	u8 clk_cnt;
+	u64 clk_enable_map;
+	struct kbase_clk_rate_listener rate_listener;
+	struct kbase_ccswe ccswe_shader_cores;
+};
+
+static void kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(
+	struct kbase_hwcnt_backend_csf_if_ctx *ctx)
+{
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
+	struct kbase_device *kbdev;
+
+	WARN_ON(!ctx);
+
+	fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+	kbdev = fw_ctx->kbdev;
+
+	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+}
+
+static void
+kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+				    unsigned long *flags)
+{
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
+	struct kbase_device *kbdev;
+
+	WARN_ON(!ctx);
+
+	fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+	kbdev = fw_ctx->kbdev;
+
+	kbase_csf_scheduler_spin_lock(kbdev, flags);
+}
+
+static void kbasep_hwcnt_backend_csf_if_fw_unlock(
+	struct kbase_hwcnt_backend_csf_if_ctx *ctx, unsigned long flags)
+{
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
+	struct kbase_device *kbdev;
+
+	WARN_ON(!ctx);
+
+	fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+	kbdev = fw_ctx->kbdev;
+
+	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_if_fw_on_freq_change() - On freq change callback
+ *
+ * @rate_listener:    Callback state
+ * @clk_index:        Clock index
+ * @clk_rate_hz:      Clock frequency(hz)
+ */
+static void kbasep_hwcnt_backend_csf_if_fw_on_freq_change(
+	struct kbase_clk_rate_listener *rate_listener, u32 clk_index,
+	u32 clk_rate_hz)
+{
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+		container_of(rate_listener,
+			     struct kbase_hwcnt_backend_csf_if_fw_ctx,
+			     rate_listener);
+	u64 timestamp_ns;
+
+	if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES)
+		return;
+
+	timestamp_ns = ktime_get_raw_ns();
+	kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns,
+				clk_rate_hz);
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_if_fw_cc_enable() - Enable cycle count tracking
+ *
+ * @fw_ctx:     Non-NULL pointer to CSF firmware interface context.
+ * @enable_map: Non-NULL pointer to enable map specifying enabled counters.
+ */
+static void kbasep_hwcnt_backend_csf_if_fw_cc_enable(
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx, u64 clk_enable_map)
+{
+	struct kbase_device *kbdev = fw_ctx->kbdev;
+
+	if (kbase_hwcnt_clk_enable_map_enabled(
+		    clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
+		/* software estimation for non-top clock domains */
+		struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
+		const struct kbase_clk_data *clk_data =
+			rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES];
+		u32 cur_freq;
+		unsigned long flags;
+		u64 timestamp_ns;
+
+		timestamp_ns = ktime_get_raw_ns();
+
+		spin_lock_irqsave(&rtm->lock, flags);
+
+		cur_freq = (u32)clk_data->clock_val;
+		kbase_ccswe_reset(&fw_ctx->ccswe_shader_cores);
+		kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores,
+					timestamp_ns, cur_freq);
+
+		kbase_clk_rate_trace_manager_subscribe_no_lock(
+			rtm, &fw_ctx->rate_listener);
+
+		spin_unlock_irqrestore(&rtm->lock, flags);
+	}
+
+	fw_ctx->clk_enable_map = clk_enable_map;
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_if_fw_cc_disable() - Disable cycle count tracking
+ *
+ * @fw_ctx:     Non-NULL pointer to CSF firmware interface context.
+ */
+static void kbasep_hwcnt_backend_csf_if_fw_cc_disable(
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx)
+{
+	struct kbase_device *kbdev = fw_ctx->kbdev;
+	struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
+	u64 clk_enable_map = fw_ctx->clk_enable_map;
+
+	if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map,
+					       KBASE_CLOCK_DOMAIN_SHADER_CORES))
+		kbase_clk_rate_trace_manager_unsubscribe(
+			rtm, &fw_ctx->rate_listener);
+}
+
+static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
+	struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+	struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info)
+{
+#ifdef CONFIG_MALI_BIFROST_NO_MALI
+	prfcnt_info->l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS;
+	prfcnt_info->core_mask =
+		(1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1;
+	prfcnt_info->dump_bytes = KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS *
+				  KBASE_DUMMY_MODEL_BLOCK_SIZE;
+	prfcnt_info->clk_cnt = 1;
+	prfcnt_info->clearing_samples = false;
+#else
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
+	struct kbase_device *kbdev;
+	u32 prfcnt_size;
+	u32 prfcnt_hw_size = 0;
+	u32 prfcnt_fw_size = 0;
+
+	WARN_ON(!ctx);
+	WARN_ON(!prfcnt_info);
+
+	fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+	kbdev = fw_ctx->kbdev;
+	prfcnt_size = kbdev->csf.global_iface.prfcnt_size;
+	prfcnt_hw_size = (prfcnt_size & 0xFF) << 8;
+	prfcnt_fw_size = (prfcnt_size >> 16) << 8;
+	fw_ctx->buf_bytes = prfcnt_hw_size + prfcnt_fw_size;
+	prfcnt_info->dump_bytes = fw_ctx->buf_bytes;
+
+	prfcnt_info->l2_count = kbdev->gpu_props.props.l2_props.num_l2_slices;
+	prfcnt_info->core_mask =
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+
+	prfcnt_info->clk_cnt = fw_ctx->clk_cnt;
+	prfcnt_info->clearing_samples = true;
+#endif
+}
+
+static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
+	struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count,
+	void **cpu_dump_base,
+	struct kbase_hwcnt_backend_csf_if_ring_buf **out_ring_buf)
+{
+	struct kbase_device *kbdev;
+	struct tagged_addr *phys;
+	struct page **page_list;
+	void *cpu_addr;
+	int ret;
+	int i;
+	size_t num_pages;
+	u64 flags;
+	struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf;
+
+	pgprot_t cpu_map_prot = PAGE_KERNEL;
+	u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START;
+
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+
+	WARN_ON(!ctx);
+	WARN_ON(!cpu_dump_base);
+	WARN_ON(!out_ring_buf);
+
+	kbdev = fw_ctx->kbdev;
+
+	/* The buffer count must be power of 2 */
+	if (!is_power_of_2(buf_count))
+		return -EINVAL;
+
+	/* alignment failure */
+	if (gpu_va_base & (2048 - 1))
+		return -EINVAL;
+
+	fw_ring_buf = kzalloc(sizeof(*fw_ring_buf), GFP_KERNEL);
+	if (!fw_ring_buf)
+		return -ENOMEM;
+
+	num_pages = PFN_UP(fw_ctx->buf_bytes * buf_count);
+	phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL);
+	if (!phys)
+		goto phys_alloc_error;
+
+	page_list = kmalloc_array(num_pages, sizeof(*page_list), GFP_KERNEL);
+	if (!page_list)
+		goto page_list_alloc_error;
+
+	/* Get physical page for the buffer */
+	ret = kbase_mem_pool_alloc_pages(
+		&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
+		phys, false);
+	if (ret != num_pages)
+		goto phys_mem_pool_alloc_error;
+
+	/* Get the CPU virtual address */
+	for (i = 0; i < num_pages; i++)
+		page_list[i] = as_page(phys[i]);
+
+	cpu_addr = vmap(page_list, num_pages, VM_MAP, cpu_map_prot);
+	if (!cpu_addr)
+		goto vmap_error;
+
+	flags = KBASE_REG_GPU_WR | KBASE_REG_GPU_NX |
+		KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
+
+	/* Update MMU table */
+	ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu,
+				     gpu_va_base >> PAGE_SHIFT, phys, num_pages,
+				     flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW);
+	if (ret)
+		goto mmu_insert_failed;
+
+	kfree(page_list);
+
+	fw_ring_buf->gpu_dump_base = gpu_va_base;
+	fw_ring_buf->cpu_dump_base = cpu_addr;
+	fw_ring_buf->phys = phys;
+	fw_ring_buf->num_pages = num_pages;
+	fw_ring_buf->buf_count = buf_count;
+	fw_ring_buf->as_nr = MCU_AS_NR;
+
+	*cpu_dump_base = fw_ring_buf->cpu_dump_base;
+	*out_ring_buf =
+		(struct kbase_hwcnt_backend_csf_if_ring_buf *)fw_ring_buf;
+
+	return 0;
+
+mmu_insert_failed:
+	vunmap(cpu_addr);
+vmap_error:
+	kbase_mem_pool_free_pages(
+		&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
+		phys, false, false);
+phys_mem_pool_alloc_error:
+	kfree(page_list);
+page_list_alloc_error:
+	kfree(phys);
+phys_alloc_error:
+	kfree(fw_ring_buf);
+	return -ENOMEM;
+}
+
+static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(
+	struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+	struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
+	u32 buf_index_first, u32 buf_index_last, bool for_cpu)
+{
+	struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf =
+		(struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf;
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+	size_t i;
+	size_t pg_first;
+	size_t pg_last;
+	u64 start_address;
+	u64 stop_address;
+	u32 ring_buf_index_first;
+	u32 ring_buf_index_last;
+
+	WARN_ON(!ctx);
+	WARN_ON(!ring_buf);
+
+	/* The index arguments for this function form an inclusive, exclusive
+	 * range.
+	 * However, when masking back to the available buffers we will make this
+	 * inclusive at both ends so full flushes are not 0 -> 0.
+	 */
+	ring_buf_index_first = buf_index_first & (fw_ring_buf->buf_count - 1);
+	ring_buf_index_last =
+		(buf_index_last - 1) & (fw_ring_buf->buf_count - 1);
+
+	/* The start address is the offset of the first buffer. */
+	start_address = fw_ctx->buf_bytes * ring_buf_index_first;
+	pg_first = start_address >> PAGE_SHIFT;
+
+	/* The stop address is the last byte in the final buffer. */
+	stop_address = (fw_ctx->buf_bytes * (ring_buf_index_last + 1)) - 1;
+	pg_last = stop_address >> PAGE_SHIFT;
+
+	/* Check whether the buffer range wraps. */
+	if (start_address > stop_address) {
+		/* sync the first part to the end of ring buffer. */
+		for (i = pg_first; i < fw_ring_buf->num_pages; i++) {
+			struct page *pg = as_page(fw_ring_buf->phys[i]);
+
+			if (for_cpu) {
+				kbase_sync_single_for_cpu(fw_ctx->kbdev,
+							  kbase_dma_addr(pg),
+							  PAGE_SIZE,
+							  DMA_BIDIRECTIONAL);
+			} else {
+				kbase_sync_single_for_device(fw_ctx->kbdev,
+							     kbase_dma_addr(pg),
+							     PAGE_SIZE,
+							     DMA_BIDIRECTIONAL);
+			}
+		}
+
+		/* second part starts from page 0. */
+		pg_first = 0;
+	}
+
+	for (i = pg_first; i <= pg_last; i++) {
+		struct page *pg = as_page(fw_ring_buf->phys[i]);
+
+		if (for_cpu) {
+			kbase_sync_single_for_cpu(fw_ctx->kbdev,
+						  kbase_dma_addr(pg), PAGE_SIZE,
+						  DMA_BIDIRECTIONAL);
+		} else {
+			kbase_sync_single_for_device(fw_ctx->kbdev,
+						     kbase_dma_addr(pg),
+						     PAGE_SIZE,
+						     DMA_BIDIRECTIONAL);
+		}
+	}
+}
+
+static u64 kbasep_hwcnt_backend_csf_if_fw_timestamp_ns(
+	struct kbase_hwcnt_backend_csf_if_ctx *ctx)
+{
+	CSTD_UNUSED(ctx);
+	return ktime_get_raw_ns();
+}
+
+static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(
+	struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+	struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf)
+{
+	struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf =
+		(struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf;
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+
+	if (!fw_ring_buf)
+		return;
+
+	if (fw_ring_buf->phys) {
+		u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START;
+
+		WARN_ON(kbase_mmu_teardown_pages(
+			fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu,
+			gpu_va_base >> PAGE_SHIFT, fw_ring_buf->num_pages,
+			MCU_AS_NR));
+
+		vunmap(fw_ring_buf->cpu_dump_base);
+
+		kbase_mem_pool_free_pages(
+			&fw_ctx->kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
+			fw_ring_buf->num_pages, fw_ring_buf->phys, false,
+			false);
+
+		kfree(fw_ring_buf->phys);
+
+		kfree(fw_ring_buf);
+	}
+}
+
+static void kbasep_hwcnt_backend_csf_if_fw_dump_enable(
+	struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+	struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
+	struct kbase_hwcnt_backend_csf_if_enable *enable)
+{
+	u32 prfcnt_config;
+	struct kbase_device *kbdev;
+	struct kbase_csf_global_iface *global_iface;
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+	struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf =
+		(struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf;
+
+	WARN_ON(!ctx);
+	WARN_ON(!ring_buf);
+	WARN_ON(!enable);
+	kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
+
+	kbdev = fw_ctx->kbdev;
+	global_iface = &kbdev->csf.global_iface;
+
+	/* Configure */
+	prfcnt_config = fw_ring_buf->buf_count;
+	prfcnt_config |= enable->counter_set << PRFCNT_CONFIG_SETSELECT_SHIFT;
+
+	/* Configure the ring buffer base address */
+	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_JASID,
+					fw_ring_buf->as_nr);
+	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_LO,
+					fw_ring_buf->gpu_dump_base & U32_MAX);
+	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_HI,
+					fw_ring_buf->gpu_dump_base >> 32);
+
+	/* Set extract position to 0 */
+	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_EXTRACT, 0);
+
+	/* Configure the enable bitmap */
+	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CSF_EN,
+					enable->fe_bm);
+	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_SHADER_EN,
+					enable->shader_bm);
+	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_MMU_L2_EN,
+					enable->mmu_l2_bm);
+	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_TILER_EN,
+					enable->tiler_bm);
+
+	/* Configure the HWC set and buffer size */
+	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CONFIG,
+					prfcnt_config);
+
+	kbdev->csf.hwcnt.enable_pending = true;
+
+	/* Unmask the interrupts */
+	kbase_csf_firmware_global_input_mask(
+		global_iface, GLB_ACK_IRQ_MASK,
+		GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK,
+		GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK);
+	kbase_csf_firmware_global_input_mask(
+		global_iface, GLB_ACK_IRQ_MASK,
+		GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK,
+		GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK);
+	kbase_csf_firmware_global_input_mask(
+		global_iface, GLB_ACK_IRQ_MASK,
+		GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK,
+		GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK);
+	kbase_csf_firmware_global_input_mask(
+		global_iface, GLB_ACK_IRQ_MASK,
+		GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK,
+		GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK);
+
+	/* Enable the HWC */
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ,
+					     (1 << GLB_REQ_PRFCNT_ENABLE_SHIFT),
+					     GLB_REQ_PRFCNT_ENABLE_MASK);
+	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+
+	prfcnt_config = kbase_csf_firmware_global_input_read(global_iface,
+							     GLB_PRFCNT_CONFIG);
+
+	kbasep_hwcnt_backend_csf_if_fw_cc_enable(fw_ctx,
+						 enable->clk_enable_map);
+}
+
+static void kbasep_hwcnt_backend_csf_if_fw_dump_disable(
+	struct kbase_hwcnt_backend_csf_if_ctx *ctx)
+{
+	struct kbase_device *kbdev;
+	struct kbase_csf_global_iface *global_iface;
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+
+	WARN_ON(!ctx);
+	kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
+
+	kbdev = fw_ctx->kbdev;
+	global_iface = &kbdev->csf.global_iface;
+
+	/* Disable the HWC */
+	kbdev->csf.hwcnt.enable_pending = true;
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, 0,
+					     GLB_REQ_PRFCNT_ENABLE_MASK);
+	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+
+	/* mask the interrupts */
+	kbase_csf_firmware_global_input_mask(
+		global_iface, GLB_ACK_IRQ_MASK, 0,
+		GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK);
+	kbase_csf_firmware_global_input_mask(
+		global_iface, GLB_ACK_IRQ_MASK, 0,
+		GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK);
+	kbase_csf_firmware_global_input_mask(
+		global_iface, GLB_ACK_IRQ_MASK, 0,
+		GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK);
+
+	/* In case we have a previous request in flight when the disable
+	 * happens.
+	 */
+	kbdev->csf.hwcnt.request_pending = false;
+
+	kbasep_hwcnt_backend_csf_if_fw_cc_disable(fw_ctx);
+}
+
+static void kbasep_hwcnt_backend_csf_if_fw_dump_request(
+	struct kbase_hwcnt_backend_csf_if_ctx *ctx)
+{
+	u32 glb_req;
+	struct kbase_device *kbdev;
+	struct kbase_csf_global_iface *global_iface;
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+
+	WARN_ON(!ctx);
+	kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
+
+	kbdev = fw_ctx->kbdev;
+	global_iface = &kbdev->csf.global_iface;
+
+	/* Trigger dumping */
+	kbdev->csf.hwcnt.request_pending = true;
+	glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
+	glb_req ^= GLB_REQ_PRFCNT_SAMPLE_MASK;
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_req,
+					     GLB_REQ_PRFCNT_SAMPLE_MASK);
+	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+}
+
+static void kbasep_hwcnt_backend_csf_if_fw_get_indexes(
+	struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 *extract_index,
+	u32 *insert_index)
+{
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+
+	WARN_ON(!ctx);
+	WARN_ON(!extract_index);
+	WARN_ON(!insert_index);
+	kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
+
+	*extract_index = kbase_csf_firmware_global_input_read(
+		&fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_EXTRACT);
+	*insert_index = kbase_csf_firmware_global_output(
+		&fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_INSERT);
+}
+
+static void kbasep_hwcnt_backend_csf_if_fw_set_extract_index(
+	struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 extract_idx)
+{
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+
+	WARN_ON(!ctx);
+	kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
+
+	/* Set the raw extract index to release the buffer back to the ring
+	 * buffer.
+	 */
+	kbase_csf_firmware_global_input(&fw_ctx->kbdev->csf.global_iface,
+					GLB_PRFCNT_EXTRACT, extract_idx);
+}
+
+static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count(
+	struct kbase_hwcnt_backend_csf_if_ctx *ctx, u64 *cycle_counts,
+	u64 clk_enable_map)
+{
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+	u8 clk;
+	u64 timestamp_ns = ktime_get_raw_ns();
+
+	WARN_ON(!ctx);
+	WARN_ON(!cycle_counts);
+	kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
+
+	for (clk = 0; clk < fw_ctx->clk_cnt; clk++) {
+		if (!(clk_enable_map & (1ull << clk)))
+			continue;
+
+		if (clk == KBASE_CLOCK_DOMAIN_TOP) {
+			/* Read cycle count for top clock domain. */
+			kbase_backend_get_gpu_time_norequest(
+				fw_ctx->kbdev, &cycle_counts[clk], NULL, NULL);
+		} else {
+			/* Estimate cycle count for non-top clock domain. */
+			cycle_counts[clk] = kbase_ccswe_cycle_at(
+				&fw_ctx->ccswe_shader_cores, timestamp_ns);
+		}
+	}
+}
+
+/**
+ * @brief Destroy a CSF FW interface context.
+ *
+ * @param[in,out] fw_ctx Pointer to context to destroy.
+ */
+static void kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx)
+{
+	if (!fw_ctx)
+		return;
+
+	kfree(fw_ctx);
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_if_fw_ctx_create() - Create a CSF Firmware context.
+ *
+ * @kbdev:   Non_NULL pointer to kbase device.
+ * @out_ctx: Non-NULL pointer to where info is stored on success.
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_csf_if_fw_ctx_create(
+	struct kbase_device *kbdev,
+	struct kbase_hwcnt_backend_csf_if_fw_ctx **out_ctx)
+{
+	u8 clk;
+	int errcode = -ENOMEM;
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL;
+
+	WARN_ON(!kbdev);
+	WARN_ON(!out_ctx);
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		goto error;
+
+	ctx->kbdev = kbdev;
+
+	/* Determine the number of available clock domains. */
+	for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) {
+		if (kbdev->pm.clk_rtm.clks[clk] == NULL)
+			break;
+	}
+	ctx->clk_cnt = clk;
+
+	ctx->clk_enable_map = 0;
+	kbase_ccswe_init(&ctx->ccswe_shader_cores);
+	ctx->rate_listener.notify =
+		kbasep_hwcnt_backend_csf_if_fw_on_freq_change;
+
+	*out_ctx = ctx;
+
+	return 0;
+error:
+	kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(ctx);
+	return errcode;
+}
+
+void kbase_hwcnt_backend_csf_if_fw_destroy(
+	struct kbase_hwcnt_backend_csf_if *if_fw)
+{
+	if (!if_fw)
+		return;
+
+	kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(
+		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)if_fw->ctx);
+	memset(if_fw, 0, sizeof(*if_fw));
+}
+
+int kbase_hwcnt_backend_csf_if_fw_create(
+	struct kbase_device *kbdev, struct kbase_hwcnt_backend_csf_if *if_fw)
+{
+	int errcode;
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL;
+
+	if (!kbdev || !if_fw)
+		return -EINVAL;
+
+	errcode = kbasep_hwcnt_backend_csf_if_fw_ctx_create(kbdev, &ctx);
+	if (errcode)
+		return errcode;
+
+	if_fw->ctx = (struct kbase_hwcnt_backend_csf_if_ctx *)ctx;
+	if_fw->assert_lock_held =
+		kbasep_hwcnt_backend_csf_if_fw_assert_lock_held;
+	if_fw->lock = kbasep_hwcnt_backend_csf_if_fw_lock;
+	if_fw->unlock = kbasep_hwcnt_backend_csf_if_fw_unlock;
+	if_fw->get_prfcnt_info = kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info;
+	if_fw->ring_buf_alloc = kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc;
+	if_fw->ring_buf_sync = kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync;
+	if_fw->ring_buf_free = kbasep_hwcnt_backend_csf_if_fw_ring_buf_free;
+	if_fw->timestamp_ns = kbasep_hwcnt_backend_csf_if_fw_timestamp_ns;
+	if_fw->dump_enable = kbasep_hwcnt_backend_csf_if_fw_dump_enable;
+	if_fw->dump_disable = kbasep_hwcnt_backend_csf_if_fw_dump_disable;
+	if_fw->dump_request = kbasep_hwcnt_backend_csf_if_fw_dump_request;
+	if_fw->get_gpu_cycle_count =
+		kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count;
+	if_fw->get_indexes = kbasep_hwcnt_backend_csf_if_fw_get_indexes;
+	if_fw->set_extract_index =
+		kbasep_hwcnt_backend_csf_if_fw_set_extract_index;
+
+	return 0;
+}
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf_if_fw.h b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf_if_fw.h
new file mode 100644
index 000000000000..f55efb6e896b
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf_if_fw.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Concrete implementation of kbase_hwcnt_backend_csf_if interface for CSF FW
+ */
+
+#ifndef _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_
+#define _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_
+
+#include "mali_kbase_hwcnt_backend_csf_if.h"
+
+/**
+ * kbase_hwcnt_backend_csf_if_fw_create() - Create a firmware CSF interface
+ *                                          of hardware counter backend.
+ * @kbdev: Non-NULL pointer to Kbase device.
+ * @if_fw: Non-NULL pointer to backend interface structure that is filled in on
+ *         creation success.
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_backend_csf_if_fw_create(
+	struct kbase_device *kbdev, struct kbase_hwcnt_backend_csf_if *if_fw);
+
+/**
+ * kbase_hwcnt_backend_csf_if_fw_destroy() - Destroy a firmware CSF interface of
+ *                                           hardware counter backend.
+ * @if_fw: Pointer to a CSF interface to destroy.
+ */
+void kbase_hwcnt_backend_csf_if_fw_destroy(
+	struct kbase_hwcnt_backend_csf_if *if_fw);
+
+#endif /* _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_jm.c b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_jm.c
index 9f65de41694f..ffacaeb0a748 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_jm.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_jm.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include "mali_kbase_hwcnt_backend_jm.h"
@@ -34,25 +33,20 @@
 #endif
 #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
 
-#if MALI_USE_CSF
-#include "mali_kbase_ctx_sched.h"
-#else
 #include "backend/gpu/mali_kbase_pm_internal.h"
-#endif
 
 /**
  * struct kbase_hwcnt_backend_jm_info - Information used to create an instance
  *                                      of a JM hardware counter backend.
  * @kbdev:         KBase device.
- * @use_secondary: True if secondary performance counters should be used,
- *                 else false. Ignored if secondary counters are not supported.
+ * @counter_set:   The performance counter set to use.
  * @metadata:      Hardware counter metadata.
  * @dump_bytes:    Bytes of GPU memory required to perform a
  *                 hardware counter dump.
  */
 struct kbase_hwcnt_backend_jm_info {
 	struct kbase_device *kbdev;
-	bool use_secondary;
+	enum kbase_hwcnt_set counter_set;
 	const struct kbase_hwcnt_metadata *metadata;
 	size_t dump_bytes;
 };
@@ -68,6 +62,8 @@ struct kbase_hwcnt_backend_jm_info {
  * @enabled:          True if dumping has been enabled, else false.
  * @pm_core_mask:     PM state sync-ed shaders core mask for the enabled
  *                    dumping.
+ * @curr_config:      Current allocated hardware resources to correctly map the src
+ *                    raw dump buffer to the dst dump buffer.
  * @clk_enable_map:   The enable map specifying enabled clock domains.
  * @cycle_count_elapsed:
  *                    Cycle count elapsed for a given sample period.
@@ -87,6 +83,7 @@ struct kbase_hwcnt_backend_jm {
 	struct kbase_vmap_struct *vmap;
 	bool enabled;
 	u64 pm_core_mask;
+	struct kbase_hwcnt_curr_config curr_config;
 	u64 clk_enable_map;
 	u64 cycle_count_elapsed[BASE_MAX_NR_CLOCKS_REGULATORS];
 	u64 prev_cycle_count[BASE_MAX_NR_CLOCKS_REGULATORS];
@@ -94,6 +91,48 @@ struct kbase_hwcnt_backend_jm {
 	struct kbase_ccswe ccswe_shader_cores;
 };
 
+/**
+ * kbasep_hwcnt_backend_jm_gpu_info_init() - Initialise an info structure used
+ *                                           to create the hwcnt metadata.
+ * @kbdev: Non-NULL pointer to kbase device.
+ * @info:  Non-NULL pointer to data structure to be filled in.
+ *
+ * The initialised info struct will only be valid for use while kbdev is valid.
+ */
+static int
+kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev,
+				      struct kbase_hwcnt_gpu_info *info)
+{
+	size_t clk;
+
+	if (!kbdev || !info)
+		return -EINVAL;
+
+#ifdef CONFIG_MALI_BIFROST_NO_MALI
+	info->l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS;
+	info->core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1;
+#else /* CONFIG_MALI_BIFROST_NO_MALI */
+	{
+		const struct base_gpu_props *props = &kbdev->gpu_props.props;
+		const size_t l2_count = props->l2_props.num_l2_slices;
+		const size_t core_mask =
+			props->coherency_info.group[0].core_mask;
+
+		info->l2_count = l2_count;
+		info->core_mask = core_mask;
+	}
+#endif /* CONFIG_MALI_BIFROST_NO_MALI */
+
+	/* Determine the number of available clock domains. */
+	for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) {
+		if (kbdev->pm.clk_rtm.clks[clk] == NULL)
+			break;
+	}
+	info->clk_cnt = clk;
+
+	return 0;
+}
+
 /**
  * kbasep_hwcnt_backend_jm_on_freq_change() - On freq change callback
  *
@@ -121,7 +160,7 @@ static void kbasep_hwcnt_backend_jm_on_freq_change(
 /**
  * kbasep_hwcnt_backend_jm_cc_enable() - Enable cycle count tracking
  *
- * @backend:      Non-NULL pointer to backend.
+ * @backend_jm:      Non-NULL pointer to backend.
  * @enable_map:   Non-NULL pointer to enable map specifying enabled counters.
  * @timestamp_ns: Timestamp(ns) when HWCNT were enabled.
  */
@@ -136,10 +175,8 @@ static void kbasep_hwcnt_backend_jm_cc_enable(
 
 	if (kbase_hwcnt_clk_enable_map_enabled(
 		    clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) {
-#if !MALI_USE_CSF
 		/* turn on the cycle counter */
 		kbase_pm_request_gpu_cycle_counter_l2_is_on(kbdev);
-#endif
 		/* Read cycle count for top clock domain. */
 		kbase_backend_get_gpu_time_norequest(
 			kbdev, &cycle_count, NULL, NULL);
@@ -183,7 +220,7 @@ static void kbasep_hwcnt_backend_jm_cc_enable(
 /**
  * kbasep_hwcnt_backend_jm_cc_disable() - Disable cycle count tracking
  *
- * @backend:      Non-NULL pointer to backend.
+ * @backend_jm:      Non-NULL pointer to backend.
  */
 static void kbasep_hwcnt_backend_jm_cc_disable(
 	struct kbase_hwcnt_backend_jm *backend_jm)
@@ -192,13 +229,12 @@ static void kbasep_hwcnt_backend_jm_cc_disable(
 	struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
 	u64 clk_enable_map = backend_jm->clk_enable_map;
 
-#if !MALI_USE_CSF
 	if (kbase_hwcnt_clk_enable_map_enabled(
 		clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) {
 		/* turn off the cycle counter */
 		kbase_pm_release_gpu_cycle_counter(kbdev);
 	}
-#endif
+
 	if (kbase_hwcnt_clk_enable_map_enabled(
 		clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
 
@@ -208,6 +244,37 @@ static void kbasep_hwcnt_backend_jm_cc_disable(
 }
 
 
+/**
+ * kbasep_hwcnt_gpu_update_curr_config() - Update the destination buffer with
+ *                                        current config information.
+ * @kbdev:       Non-NULL pointer to kbase device.
+ * @curr_config: Non-NULL pointer to return the current configuration of
+ *               hardware allocated to the GPU.
+ *
+ * The current configuration information is used for architectures where the
+ * max_config interface is available from the Arbiter. In this case the current
+ * allocated hardware is not always the same, so the current config information
+ * is used to correctly map the current allocated resources to the memory layout
+ * that is copied to the user space.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_gpu_update_curr_config(
+	struct kbase_device *kbdev,
+	struct kbase_hwcnt_curr_config *curr_config)
+{
+	if (WARN_ON(!kbdev) || WARN_ON(!curr_config))
+		return -EINVAL;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	curr_config->num_l2_slices =
+		kbdev->gpu_props.curr_config.l2_slices;
+	curr_config->shader_present =
+		kbdev->gpu_props.curr_config.shader_present;
+	return 0;
+}
+
 /* JM backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */
 static u64 kbasep_hwcnt_backend_jm_timestamp_ns(
 	struct kbase_hwcnt_backend *backend)
@@ -226,7 +293,8 @@ static int kbasep_hwcnt_backend_jm_dump_enable_nolock(
 		(struct kbase_hwcnt_backend_jm *)backend;
 	struct kbase_context *kctx;
 	struct kbase_device *kbdev;
-	struct kbase_hwcnt_physical_enable_map phys;
+	struct kbase_hwcnt_physical_enable_map phys_enable_map;
+	enum kbase_hwcnt_physical_set phys_counter_set;
 	struct kbase_instr_hwcnt_enable enable;
 	u64 timestamp_ns;
 
@@ -239,23 +307,33 @@ static int kbasep_hwcnt_backend_jm_dump_enable_nolock(
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
-	kbase_hwcnt_gpu_enable_map_to_physical(&phys, enable_map);
+	kbase_hwcnt_gpu_enable_map_to_physical(&phys_enable_map, enable_map);
 
-	enable.fe_bm = phys.fe_bm;
-	enable.shader_bm = phys.shader_bm;
-	enable.tiler_bm = phys.tiler_bm;
-	enable.mmu_l2_bm = phys.mmu_l2_bm;
-	enable.use_secondary = backend_jm->info->use_secondary;
+	kbase_hwcnt_gpu_set_to_physical(&phys_counter_set,
+					backend_jm->info->counter_set);
+
+	enable.fe_bm = phys_enable_map.fe_bm;
+	enable.shader_bm = phys_enable_map.shader_bm;
+	enable.tiler_bm = phys_enable_map.tiler_bm;
+	enable.mmu_l2_bm = phys_enable_map.mmu_l2_bm;
+	enable.counter_set = phys_counter_set;
 	enable.dump_buffer = backend_jm->gpu_dump_va;
 	enable.dump_buffer_bytes = backend_jm->info->dump_bytes;
 
 	timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend);
 
+	/* Update the current configuration information. */
+	errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev,
+						      &backend_jm->curr_config);
+	if (errcode)
+		goto error;
+
 	errcode = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable);
 	if (errcode)
 		goto error;
 
 	backend_jm->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev);
+
 	backend_jm->enabled = true;
 
 	kbasep_hwcnt_backend_jm_cc_enable(backend_jm, enable_map, timestamp_ns);
@@ -336,7 +414,7 @@ static int kbasep_hwcnt_backend_jm_dump_request(
 	size_t clk;
 	int ret;
 
-	if (!backend_jm || !backend_jm->enabled)
+	if (!backend_jm || !backend_jm->enabled || !dump_time_ns)
 		return -EINVAL;
 
 	kbdev = backend_jm->kctx->kbdev;
@@ -405,6 +483,11 @@ static int kbasep_hwcnt_backend_jm_dump_get(
 	struct kbase_hwcnt_backend_jm *backend_jm =
 		(struct kbase_hwcnt_backend_jm *)backend;
 	size_t clk;
+#ifdef CONFIG_MALI_BIFROST_NO_MALI
+	struct kbase_device *kbdev;
+	unsigned long flags;
+	int errcode;
+#endif
 
 	if (!backend_jm || !dst || !dst_enable_map ||
 	    (backend_jm->info->metadata != dst->metadata) ||
@@ -424,9 +507,24 @@ static int kbasep_hwcnt_backend_jm_dump_get(
 		dst->clk_cnt_buf[clk] = backend_jm->cycle_count_elapsed[clk];
 	}
 
-	return kbase_hwcnt_gpu_dump_get(
-		dst, backend_jm->cpu_dump_va, dst_enable_map,
-		backend_jm->pm_core_mask, accumulate);
+#ifdef CONFIG_MALI_BIFROST_NO_MALI
+	kbdev = backend_jm->kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Update the current configuration information. */
+	errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev,
+		&backend_jm->curr_config);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (errcode)
+		return errcode;
+#endif
+
+	return kbase_hwcnt_jm_dump_get(dst, backend_jm->cpu_dump_va,
+				       dst_enable_map, backend_jm->pm_core_mask,
+				       &backend_jm->curr_config, accumulate);
 }
 
 /**
@@ -454,10 +552,8 @@ static int kbasep_hwcnt_backend_jm_dump_alloc(
 	flags = BASE_MEM_PROT_CPU_RD |
 		BASE_MEM_PROT_GPU_WR |
 		BASEP_MEM_PERMANENT_KERNEL_MAPPING |
-		BASE_MEM_CACHED_CPU;
-
-	if (kctx->kbdev->mmu_mode->flags & KBASE_MMU_MODE_HAS_NON_CACHEABLE)
-		flags |= BASE_MEM_UNCACHED_GPU;
+		BASE_MEM_CACHED_CPU |
+		BASE_MEM_UNCACHED_GPU;
 
 	nr_pages = PFN_UP(info->dump_bytes);
 
@@ -496,9 +592,6 @@ static void kbasep_hwcnt_backend_jm_destroy(
 		return;
 
 	if (backend->kctx) {
-#if MALI_USE_CSF
-		unsigned long flags;
-#endif
 		struct kbase_context *kctx = backend->kctx;
 		struct kbase_device *kbdev = kctx->kbdev;
 
@@ -509,13 +602,7 @@ static void kbasep_hwcnt_backend_jm_destroy(
 			kbasep_hwcnt_backend_jm_dump_free(
 				kctx, backend->gpu_dump_va);
 
-#if MALI_USE_CSF
-		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-		kbase_ctx_sched_release_ctx(kctx);
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-#else
 		kbasep_js_release_privileged_ctx(kbdev, kctx);
-#endif
 		kbase_destroy_context(kctx);
 	}
 
@@ -533,9 +620,6 @@ static int kbasep_hwcnt_backend_jm_create(
 	const struct kbase_hwcnt_backend_jm_info *info,
 	struct kbase_hwcnt_backend_jm **out_backend)
 {
-#if MALI_USE_CSF
-	unsigned long flags;
-#endif
 	int errcode;
 	struct kbase_device *kbdev;
 	struct kbase_hwcnt_backend_jm *backend = NULL;
@@ -556,17 +640,7 @@ static int kbasep_hwcnt_backend_jm_create(
 	if (!backend->kctx)
 		goto alloc_error;
 
-#if MALI_USE_CSF
-	kbase_pm_context_active(kbdev);
-	mutex_lock(&kbdev->mmu_hw_mutex);
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	kbase_ctx_sched_retain_ctx(backend->kctx);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-	mutex_unlock(&kbdev->mmu_hw_mutex);
-	kbase_pm_context_idle(kbdev);
-#else
 	kbasep_js_schedule_privileged_ctx(kbdev, backend->kctx);
-#endif
 
 	errcode = kbasep_hwcnt_backend_jm_dump_alloc(
 		info, backend->kctx, &backend->gpu_dump_va);
@@ -596,6 +670,16 @@ error:
 	return errcode;
 }
 
+/* JM backend implementation of kbase_hwcnt_backend_metadata_fn */
+static const struct kbase_hwcnt_metadata *
+kbasep_hwcnt_backend_jm_metadata(const struct kbase_hwcnt_backend_info *info)
+{
+	if (!info)
+		return NULL;
+
+	return ((const struct kbase_hwcnt_backend_jm_info *)info)->metadata;
+}
+
 /* JM backend implementation of kbase_hwcnt_backend_init_fn */
 static int kbasep_hwcnt_backend_jm_init(
 	const struct kbase_hwcnt_backend_info *info,
@@ -640,7 +724,7 @@ static void kbasep_hwcnt_backend_jm_info_destroy(
 	if (!info)
 		return;
 
-	kbase_hwcnt_gpu_metadata_destroy(info->metadata);
+	kbase_hwcnt_jm_metadata_destroy(info->metadata);
 	kfree(info);
 }
 
@@ -662,7 +746,7 @@ static int kbasep_hwcnt_backend_jm_info_create(
 	WARN_ON(!kbdev);
 	WARN_ON(!out_info);
 
-	errcode = kbase_hwcnt_gpu_info_init(kbdev, &hwcnt_gpu_info);
+	errcode = kbasep_hwcnt_backend_jm_gpu_info_init(kbdev, &hwcnt_gpu_info);
 	if (errcode)
 		return errcode;
 
@@ -673,15 +757,18 @@ static int kbasep_hwcnt_backend_jm_info_create(
 	info->kbdev = kbdev;
 
 #ifdef CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY
-	info->use_secondary = true;
+	info->counter_set = KBASE_HWCNT_SET_SECONDARY;
+#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY)
+	info->counter_set = KBASE_HWCNT_SET_TERTIARY;
 #else
-	info->use_secondary = false;
+	/* Default to primary */
+	info->counter_set = KBASE_HWCNT_SET_PRIMARY;
 #endif
 
-	errcode = kbase_hwcnt_gpu_metadata_create(
-		&hwcnt_gpu_info, info->use_secondary,
-		&info->metadata,
-		&info->dump_bytes);
+	errcode = kbase_hwcnt_jm_metadata_create(&hwcnt_gpu_info,
+						 info->counter_set,
+						 &info->metadata,
+						 &info->dump_bytes);
 	if (errcode)
 		goto error;
 
@@ -708,8 +795,8 @@ int kbase_hwcnt_backend_jm_create(
 	if (errcode)
 		return errcode;
 
-	iface->metadata = info->metadata;
 	iface->info = (struct kbase_hwcnt_backend_info *)info;
+	iface->metadata = kbasep_hwcnt_backend_jm_metadata;
 	iface->init = kbasep_hwcnt_backend_jm_init;
 	iface->term = kbasep_hwcnt_backend_jm_term;
 	iface->timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns;
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_jm.h b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_jm.h
index f15faeba704a..5d1947ea4021 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_jm.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_jm.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
  * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /**
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_context.h b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_context.h
index bc50ad12c2f4..403b1c5ff149 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_context.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_context.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /**
@@ -28,6 +27,7 @@
 #define _KBASE_HWCNT_CONTEXT_H_
 
 #include <linux/types.h>
+#include <linux/workqueue.h>
 
 struct kbase_hwcnt_backend_interface;
 struct kbase_hwcnt_context;
@@ -66,7 +66,7 @@ const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(
 
 /**
  * kbase_hwcnt_context_disable() - Increment the disable count of the context.
- * @hctx: Pointer to the hardware counter context.
+ * @hctx: Non-NULL pointer to the hardware counter context.
  *
  * If a call to this function increments the disable count from 0 to 1, and
  * an accumulator has been acquired, then a counter dump will be performed
@@ -84,7 +84,7 @@ void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx);
  * kbase_hwcnt_context_disable_atomic() - Increment the disable count of the
  *                                        context if possible in an atomic
  *                                        context.
- * @hctx: Pointer to the hardware counter context.
+ * @hctx: Non-NULL pointer to the hardware counter context.
  *
  * This function will only succeed if hardware counters are effectively already
  * disabled, i.e. there is no accumulator, the disable count is already
@@ -99,7 +99,7 @@ bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx);
 
 /**
  * kbase_hwcnt_context_enable() - Decrement the disable count of the context.
- * @hctx: Pointer to the hardware counter context.
+ * @hctx: Non-NULL pointer to the hardware counter context.
  *
  * If a call to this function decrements the disable count from 1 to 0, and
  * an accumulator has been acquired, then counters will be re-enabled via the
@@ -116,4 +116,36 @@ bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx);
  */
 void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx);
 
+/**
+ * kbase_hwcnt_context_queue_work() - Queue hardware counter related async
+ *                                    work on a workqueue specialized for
+ *                                    hardware counters.
+ * @hctx: Non-NULL pointer to the hardware counter context.
+ * @work: Non-NULL pointer to work to queue.
+ *
+ * Return: false if work was already on a queue, true otherwise.
+ *
+ * Performance counter related work is high priority, short running, and
+ * generally CPU locality is unimportant. There is no standard workqueue that
+ * can service this flavor of work.
+ *
+ * Rather than have each user of counters define their own workqueue, we have
+ * a centralized one in here that anybody using this hardware counter API
+ * should use.
+ *
+ * Before the context is destroyed, all work submitted must have been completed.
+ * Given that the work enqueued via this function is likely to be hardware
+ * counter related and will therefore use the context object, this is likely
+ * to be behavior that will occur naturally.
+ *
+ * Historical note: prior to this centralized workqueue, the system_highpri_wq
+ * was used. This was generally fine, except when a particularly long running,
+ * higher priority thread ended up scheduled on the enqueuing CPU core. Given
+ * that hardware counters requires tight integration with power management,
+ * this meant progress through the power management states could be stalled
+ * for however long that higher priority thread took.
+ */
+bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx,
+				    struct work_struct *work);
+
 #endif /* _KBASE_HWCNT_CONTEXT_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_gpu.c b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_gpu.c
index 499f3bc23bec..4fba6b6d33c2 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_gpu.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_gpu.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,16 +17,13 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include "mali_kbase_hwcnt_gpu.h"
 #include "mali_kbase_hwcnt_types.h"
-#include "mali_kbase.h"
-#ifdef CONFIG_MALI_BIFROST_NO_MALI
-#include "backend/gpu/mali_kbase_model_dummy.h"
-#endif
+
+#include <linux/bug.h>
+#include <linux/err.h>
 
 #define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 4
 #define KBASE_HWCNT_V5_HEADERS_PER_BLOCK 4
@@ -35,20 +33,102 @@
 /* Index of the PRFCNT_EN header into a V5 counter block */
 #define KBASE_HWCNT_V5_PRFCNT_EN_HEADER 2
 
+static void kbasep_get_fe_block_type(u64 *dst, enum kbase_hwcnt_set counter_set,
+				     bool is_csf)
+{
+	switch (counter_set) {
+	case KBASE_HWCNT_SET_PRIMARY:
+		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE;
+		break;
+	case KBASE_HWCNT_SET_SECONDARY:
+		if (is_csf) {
+			*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2;
+		} else {
+			*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED;
+		}
+		break;
+	case KBASE_HWCNT_SET_TERTIARY:
+		if (is_csf) {
+			*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3;
+		} else {
+			*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED;
+		}
+		break;
+	default:
+		WARN_ON(true);
+	}
+}
+
+static void kbasep_get_tiler_block_type(u64 *dst,
+					enum kbase_hwcnt_set counter_set)
+{
+	switch (counter_set) {
+	case KBASE_HWCNT_SET_PRIMARY:
+		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER;
+		break;
+	case KBASE_HWCNT_SET_SECONDARY:
+	case KBASE_HWCNT_SET_TERTIARY:
+		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED;
+		break;
+	default:
+		WARN_ON(true);
+	}
+}
+
+static void kbasep_get_sc_block_type(u64 *dst, enum kbase_hwcnt_set counter_set,
+				     bool is_csf)
+{
+	switch (counter_set) {
+	case KBASE_HWCNT_SET_PRIMARY:
+		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC;
+		break;
+	case KBASE_HWCNT_SET_SECONDARY:
+		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2;
+		break;
+	case KBASE_HWCNT_SET_TERTIARY:
+		if (is_csf) {
+			*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3;
+		} else {
+			*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED;
+		}
+		break;
+	default:
+		WARN_ON(true);
+	}
+}
+
+static void kbasep_get_memsys_block_type(u64 *dst,
+					 enum kbase_hwcnt_set counter_set)
+{
+	switch (counter_set) {
+	case KBASE_HWCNT_SET_PRIMARY:
+		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS;
+		break;
+	case KBASE_HWCNT_SET_SECONDARY:
+		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2;
+		break;
+	case KBASE_HWCNT_SET_TERTIARY:
+		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED;
+		break;
+	default:
+		WARN_ON(true);
+	}
+}
+
 /**
- * kbasep_hwcnt_backend_gpu_metadata_v5_create() - Create hardware counter
- *                                                 metadata for a v5 GPU.
- * @v5_info:       Non-NULL pointer to hwcnt info for a v5 GPU.
- * @use_secondary: True if secondary performance counters should be used, else
- *                 false. Ignored if secondary counters are not supported.
+ * kbasep_hwcnt_backend_gpu_metadata_create() - Create hardware counter metadata
+ *                                              for the GPU.
+ * @gpu_info:      Non-NULL pointer to hwcnt info for current GPU.
+ * @is_csf:        true for CSF GPU, otherwise false.
+ * @counter_set:   The performance counter set to use.
  * @metadata:      Non-NULL pointer to where created metadata is stored
  *                 on success.
  *
  * Return: 0 on success, else error code.
  */
-static int kbasep_hwcnt_backend_gpu_metadata_v5_create(
-	const struct kbase_hwcnt_gpu_v5_info *v5_info,
-	bool use_secondary,
+static int kbasep_hwcnt_backend_gpu_metadata_create(
+	const struct kbase_hwcnt_gpu_info *gpu_info, const bool is_csf,
+	enum kbase_hwcnt_set counter_set,
 	const struct kbase_hwcnt_metadata **metadata)
 {
 	struct kbase_hwcnt_description desc;
@@ -58,13 +138,13 @@ static int kbasep_hwcnt_backend_gpu_metadata_v5_create(
 	size_t non_sc_block_count;
 	size_t sc_block_count;
 
-	WARN_ON(!v5_info);
+	WARN_ON(!gpu_info);
 	WARN_ON(!metadata);
 
 	/* Calculate number of block instances that aren't shader cores */
-	non_sc_block_count = 2 + v5_info->l2_count;
+	non_sc_block_count = 2 + gpu_info->l2_count;
 	/* Calculate number of block instances that are shader cores */
-	sc_block_count = fls64(v5_info->core_mask);
+	sc_block_count = fls64(gpu_info->core_mask);
 
 	/*
 	 * A system can have up to 64 shader cores, but the 64-bit
@@ -76,23 +156,21 @@ static int kbasep_hwcnt_backend_gpu_metadata_v5_create(
 	if ((sc_block_count + non_sc_block_count) > KBASE_HWCNT_AVAIL_MASK_BITS)
 		return -EINVAL;
 
-	/* One Job Manager block */
-	blks[0].type = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM;
+	/* One Front End block */
+	kbasep_get_fe_block_type(&blks[0].type, counter_set, is_csf);
 	blks[0].inst_cnt = 1;
 	blks[0].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
 	blks[0].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK;
 
 	/* One Tiler block */
-	blks[1].type = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER;
+	kbasep_get_tiler_block_type(&blks[1].type, counter_set);
 	blks[1].inst_cnt = 1;
 	blks[1].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
 	blks[1].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK;
 
 	/* l2_count memsys blks */
-	blks[2].type = use_secondary ?
-		KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2 :
-		KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS;
-	blks[2].inst_cnt = v5_info->l2_count;
+	kbasep_get_memsys_block_type(&blks[2].type, counter_set);
+	blks[2].inst_cnt = gpu_info->l2_count;
 	blks[2].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
 	blks[2].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK;
 
@@ -112,9 +190,7 @@ static int kbasep_hwcnt_backend_gpu_metadata_v5_create(
 	 * requirements, and embed the core mask into the availability mask so
 	 * we can determine later which shader cores physically exist.
 	 */
-	blks[3].type = use_secondary ?
-		KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2 :
-		KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC;
+	kbasep_get_sc_block_type(&blks[3].type, counter_set, is_csf);
 	blks[3].inst_cnt = sc_block_count;
 	blks[3].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
 	blks[3].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK;
@@ -127,72 +203,35 @@ static int kbasep_hwcnt_backend_gpu_metadata_v5_create(
 
 	desc.grp_cnt = 1;
 	desc.grps = &group;
-	desc.clk_cnt = v5_info->clk_cnt;
+	desc.clk_cnt = gpu_info->clk_cnt;
 
 	/* The JM, Tiler, and L2s are always available, and are before cores */
 	desc.avail_mask = (1ull << non_sc_block_count) - 1;
 	/* Embed the core mask directly in the availability mask */
-	desc.avail_mask |= (v5_info->core_mask << non_sc_block_count);
+	desc.avail_mask |= (gpu_info->core_mask << non_sc_block_count);
 
 	return kbase_hwcnt_metadata_create(&desc, metadata);
 }
 
 /**
- * kbasep_hwcnt_backend_gpu_v5_dump_bytes() - Get the raw dump buffer size for a
- *                                            V5 GPU.
- * @v5_info: Non-NULL pointer to hwcnt info for a v5 GPU.
+ * kbasep_hwcnt_backend_jm_dump_bytes() - Get the raw dump buffer size for the
+ *                                        GPU.
+ * @gpu_info: Non-NULL pointer to hwcnt info for the GPU.
  *
- * Return: Size of buffer the V5 GPU needs to perform a counter dump.
+ * Return: Size of buffer the GPU needs to perform a counter dump.
  */
-static size_t kbasep_hwcnt_backend_gpu_v5_dump_bytes(
-	const struct kbase_hwcnt_gpu_v5_info *v5_info)
+static size_t
+kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_info *gpu_info)
 {
-	WARN_ON(!v5_info);
-	return (2 + v5_info->l2_count + fls64(v5_info->core_mask)) *
-		KBASE_HWCNT_V5_VALUES_PER_BLOCK *
-		KBASE_HWCNT_VALUE_BYTES;
+	WARN_ON(!gpu_info);
+
+	return (2 + gpu_info->l2_count + fls64(gpu_info->core_mask)) *
+	       KBASE_HWCNT_V5_VALUES_PER_BLOCK * KBASE_HWCNT_VALUE_BYTES;
 }
 
-int kbase_hwcnt_gpu_info_init(
-	struct kbase_device *kbdev,
-	struct kbase_hwcnt_gpu_info *info)
-{
-	size_t clk;
-
-	if (!kbdev || !info)
-		return -EINVAL;
-
-#ifdef CONFIG_MALI_BIFROST_NO_MALI
-	/* NO_MALI uses V5 layout, regardless of the underlying platform. */
-	info->type = KBASE_HWCNT_GPU_GROUP_TYPE_V5;
-	info->v5.l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS;
-	info->v5.core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1;
-#else
-	{
-		const struct base_gpu_props *props = &kbdev->gpu_props.props;
-		const size_t l2_count = props->l2_props.num_l2_slices;
-		const size_t core_mask =
-			props->coherency_info.group[0].core_mask;
-
-		info->type = KBASE_HWCNT_GPU_GROUP_TYPE_V5;
-		info->v5.l2_count = l2_count;
-		info->v5.core_mask = core_mask;
-	}
-#endif
-
-	/* Determine the number of available clock domains. */
-	for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) {
-		if (kbdev->pm.clk_rtm.clks[clk] == NULL)
-			break;
-	}
-	info->v5.clk_cnt = clk;
-
-	return 0;
-}
-
-int kbase_hwcnt_gpu_metadata_create(
-	const struct kbase_hwcnt_gpu_info *info,
-	bool use_secondary,
+int kbase_hwcnt_jm_metadata_create(
+	const struct kbase_hwcnt_gpu_info *gpu_info,
+	enum kbase_hwcnt_set counter_set,
 	const struct kbase_hwcnt_metadata **out_metadata,
 	size_t *out_dump_bytes)
 {
@@ -200,16 +239,19 @@ int kbase_hwcnt_gpu_metadata_create(
 	const struct kbase_hwcnt_metadata *metadata;
 	size_t dump_bytes;
 
-	if (!info || !out_metadata || !out_dump_bytes)
+	if (!gpu_info || !out_metadata || !out_dump_bytes)
 		return -EINVAL;
 
-	if (info->type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
-		dump_bytes = kbasep_hwcnt_backend_gpu_v5_dump_bytes(&info->v5);
-		errcode = kbasep_hwcnt_backend_gpu_metadata_v5_create(
-			&info->v5, use_secondary, &metadata);
-	} else {
-		return -EINVAL;
-	}
+	/*
+	 * For architectures where a max_config interface is available
+	 * from the arbiter, the v5 dump bytes and the metadata v5 are
+	 * based on the maximum possible allocation of the HW in the
+	 * GPU cause it needs to be prepared for the worst case where
+	 * all the available L2 cache and Shader cores are allocated.
+	 */
+	dump_bytes = kbasep_hwcnt_backend_jm_dump_bytes(gpu_info);
+	errcode = kbasep_hwcnt_backend_gpu_metadata_create(
+		gpu_info, false, counter_set, &metadata);
 	if (errcode)
 		return errcode;
 
@@ -224,9 +266,37 @@ int kbase_hwcnt_gpu_metadata_create(
 
 	return 0;
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_metadata_create);
 
-void kbase_hwcnt_gpu_metadata_destroy(
+void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata)
+{
+	if (!metadata)
+		return;
+
+	kbase_hwcnt_metadata_destroy(metadata);
+}
+
+int kbase_hwcnt_csf_metadata_create(
+	const struct kbase_hwcnt_gpu_info *gpu_info,
+	enum kbase_hwcnt_set counter_set,
+	const struct kbase_hwcnt_metadata **out_metadata)
+{
+	int errcode;
+	const struct kbase_hwcnt_metadata *metadata;
+
+	if (!gpu_info || !out_metadata)
+		return -EINVAL;
+
+	errcode = kbasep_hwcnt_backend_gpu_metadata_create(
+		gpu_info, true, counter_set, &metadata);
+	if (errcode)
+		return errcode;
+
+	*out_metadata = metadata;
+
+	return 0;
+}
+
+void kbase_hwcnt_csf_metadata_destroy(
 	const struct kbase_hwcnt_metadata *metadata)
 {
 	if (!metadata)
@@ -234,7 +304,6 @@ void kbase_hwcnt_gpu_metadata_destroy(
 
 	kbase_hwcnt_metadata_destroy(metadata);
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_metadata_destroy);
 
 static bool is_block_type_shader(
 	const u64 grp_type,
@@ -248,24 +317,48 @@ static bool is_block_type_shader(
 		return false;
 
 	if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC ||
-	    blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2)
+	    blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2 ||
+	    blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3)
 		is_shader = true;
 
 	return is_shader;
 }
 
-int kbase_hwcnt_gpu_dump_get(
-	struct kbase_hwcnt_dump_buffer *dst,
-	void *src,
-	const struct kbase_hwcnt_enable_map *dst_enable_map,
-	u64 pm_core_mask,
-	bool accumulate)
+static bool is_block_type_l2_cache(
+	const u64 grp_type,
+	const u64 blk_type)
+{
+	bool is_l2_cache = false;
+
+	switch (grp_type) {
+	case KBASE_HWCNT_GPU_GROUP_TYPE_V5:
+		if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS ||
+		    blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2)
+			is_l2_cache = true;
+		break;
+	default:
+		/* Warn on unknown group type */
+		WARN_ON(true);
+	}
+
+	return is_l2_cache;
+}
+
+int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src,
+			    const struct kbase_hwcnt_enable_map *dst_enable_map,
+			    u64 pm_core_mask,
+			    const struct kbase_hwcnt_curr_config *curr_config,
+			    bool accumulate)
 {
 	const struct kbase_hwcnt_metadata *metadata;
 	const u32 *dump_src;
 	size_t src_offset, grp, blk, blk_inst;
 	u64 core_mask = pm_core_mask;
 
+	/* Variables to deal with the current configuration */
+	int l2_count = 0;
+	bool hw_res_available = true;
+
 	if (!dst || !src || !dst_enable_map ||
 	    (dst_enable_map->metadata != dst->metadata))
 		return -EINVAL;
@@ -287,15 +380,43 @@ int kbase_hwcnt_gpu_dump_get(
 		const bool is_shader_core = is_block_type_shader(
 			kbase_hwcnt_metadata_group_type(metadata, grp),
 			blk_type, blk);
+		const bool is_l2_cache = is_block_type_l2_cache(
+			kbase_hwcnt_metadata_group_type(metadata, grp),
+			blk_type);
 
-		/* Early out if no values in the dest block are enabled */
+		/*
+		 * If l2 blocks is greater than the current allocated number of
+		 * L2 slices, there is no hw allocated to that block.
+		 */
+		if (is_l2_cache) {
+			l2_count++;
+			if (l2_count > curr_config->num_l2_slices)
+				hw_res_available = false;
+			else
+				hw_res_available = true;
+		}
+		/*
+		 * For the shader cores, the current shader_mask allocated is
+		 * always a subgroup of the maximum shader_mask, so after
+		 * jumping any L2 cache not available the available shader cores
+		 * will always have a matching set of blk instances available to
+		 * accumulate them.
+		 */
+		else {
+			hw_res_available = true;
+		}
+
+		/*
+		 * Early out if no values in the dest block are enabled or if
+		 * the resource target of the block is not available in the HW.
+		 */
 		if (kbase_hwcnt_enable_map_block_enabled(
 			dst_enable_map, grp, blk, blk_inst)) {
 			u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
 				dst, grp, blk, blk_inst);
 			const u32 *src_blk = dump_src + src_offset;
 
-			if (!is_shader_core || (core_mask & 1)) {
+			if ((!is_shader_core || (core_mask & 1)) && hw_res_available) {
 				if (accumulate) {
 					kbase_hwcnt_dump_buffer_block_accumulate(
 						dst_blk, src_blk, hdr_cnt,
@@ -311,14 +432,60 @@ int kbase_hwcnt_gpu_dump_get(
 			}
 		}
 
-		src_offset += (hdr_cnt + ctr_cnt);
+		/* Just increase the src_offset if the HW is available */
+		if (hw_res_available)
+			src_offset += (hdr_cnt + ctr_cnt);
 		if (is_shader_core)
 			core_mask = core_mask >> 1;
 	}
 
 	return 0;
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_dump_get);
+
+int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src,
+			     const struct kbase_hwcnt_enable_map *dst_enable_map,
+			     bool accumulate)
+{
+	const struct kbase_hwcnt_metadata *metadata;
+	const u32 *dump_src;
+	size_t src_offset, grp, blk, blk_inst;
+
+	if (!dst || !src || !dst_enable_map ||
+	    (dst_enable_map->metadata != dst->metadata))
+		return -EINVAL;
+
+	metadata = dst->metadata;
+	dump_src = (const u32 *)src;
+	src_offset = 0;
+
+	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
+		const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(
+			metadata, grp, blk);
+		const size_t ctr_cnt =
+			kbase_hwcnt_metadata_block_counters_count(metadata, grp,
+								  blk);
+
+		/* Early out if no values in the dest block are enabled */
+		if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp,
+							 blk, blk_inst)) {
+			u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
+				dst, grp, blk, blk_inst);
+			const u32 *src_blk = dump_src + src_offset;
+
+			if (accumulate) {
+				kbase_hwcnt_dump_buffer_block_accumulate(
+					dst_blk, src_blk, hdr_cnt, ctr_cnt);
+			} else {
+				kbase_hwcnt_dump_buffer_block_copy(
+					dst_blk, src_blk, (hdr_cnt + ctr_cnt));
+			}
+		}
+
+		src_offset += (hdr_cnt + ctr_cnt);
+	}
+
+	return 0;
+}
 
 /**
  * kbasep_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block
@@ -437,7 +604,12 @@ void kbase_hwcnt_gpu_enable_map_to_physical(
 		    KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
 			WARN_ON(blk_val_cnt != KBASE_HWCNT_V5_VALUES_PER_BLOCK);
 			switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) {
-			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM:
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED:
+				/* Nothing to do in this case. */
+				break;
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE:
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2:
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3:
 				fe_bm |= *blk_map;
 				break;
 			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
@@ -445,6 +617,7 @@ void kbase_hwcnt_gpu_enable_map_to_physical(
 				break;
 			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:
 			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3:
 				shader_bm |= *blk_map;
 				break;
 			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:
@@ -468,7 +641,24 @@ void kbase_hwcnt_gpu_enable_map_to_physical(
 	dst->mmu_l2_bm =
 		kbasep_hwcnt_backend_gpu_block_map_to_physical(mmu_l2_bm, 0);
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_enable_map_to_physical);
+
+void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst,
+				     enum kbase_hwcnt_set src)
+{
+	switch (src) {
+	case KBASE_HWCNT_SET_PRIMARY:
+		*dst = KBASE_HWCNT_PHYSICAL_SET_PRIMARY;
+		break;
+	case KBASE_HWCNT_SET_SECONDARY:
+		*dst = KBASE_HWCNT_PHYSICAL_SET_SECONDARY;
+		break;
+	case KBASE_HWCNT_SET_TERTIARY:
+		*dst = KBASE_HWCNT_PHYSICAL_SET_TERTIARY;
+		break;
+	default:
+		WARN_ON(true);
+	}
+}
 
 void kbase_hwcnt_gpu_enable_map_from_physical(
 	struct kbase_hwcnt_enable_map *dst,
@@ -512,7 +702,12 @@ void kbase_hwcnt_gpu_enable_map_from_physical(
 		    KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
 			WARN_ON(blk_val_cnt != KBASE_HWCNT_V5_VALUES_PER_BLOCK);
 			switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) {
-			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM:
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED:
+				/* Nothing to do in this case. */
+				break;
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE:
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2:
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3:
 				*blk_map = fe_bm;
 				break;
 			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
@@ -520,6 +715,7 @@ void kbase_hwcnt_gpu_enable_map_from_physical(
 				break;
 			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:
 			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3:
 				*blk_map = shader_bm;
 				break;
 			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:
@@ -534,7 +730,6 @@ void kbase_hwcnt_gpu_enable_map_from_physical(
 		}
 	}
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_enable_map_from_physical);
 
 void kbase_hwcnt_gpu_patch_dump_headers(
 	struct kbase_hwcnt_dump_buffer *buf,
@@ -568,4 +763,3 @@ void kbase_hwcnt_gpu_patch_dump_headers(
 		}
 	}
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_patch_dump_headers);
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_gpu.h b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_gpu.h
index f0d51763f7f7..9b846a94bc3a 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_gpu.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_gpu.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_HWCNT_GPU_H_
@@ -30,34 +29,67 @@ struct kbase_hwcnt_metadata;
 struct kbase_hwcnt_enable_map;
 struct kbase_hwcnt_dump_buffer;
 
+#define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 4
+#define KBASE_HWCNT_V5_HEADERS_PER_BLOCK 4
+#define KBASE_HWCNT_V5_COUNTERS_PER_BLOCK 60
+#define KBASE_HWCNT_V5_VALUES_PER_BLOCK                                        \
+	(KBASE_HWCNT_V5_HEADERS_PER_BLOCK + KBASE_HWCNT_V5_COUNTERS_PER_BLOCK)
+/** Index of the PRFCNT_EN header into a V5 counter block */
+#define KBASE_HWCNT_V5_PRFCNT_EN_HEADER 2
+
 /**
  * enum kbase_hwcnt_gpu_group_type - GPU hardware counter group types, used to
  *                                   identify metadata groups.
  * @KBASE_HWCNT_GPU_GROUP_TYPE_V5: GPU V5 group type.
  */
 enum kbase_hwcnt_gpu_group_type {
-	KBASE_HWCNT_GPU_GROUP_TYPE_V5 = 0x10,
+	KBASE_HWCNT_GPU_GROUP_TYPE_V5,
 };
 
 /**
  * enum kbase_hwcnt_gpu_v5_block_type - GPU V5 hardware counter block types,
  *                                      used to identify metadata blocks.
- * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM:      Job Manager block.
- * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:   Tiler block.
- * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:      Shader Core block.
- * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:     Secondary Shader Core block.
- * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:  Memsys block.
- * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: Secondary Memsys block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED: Undefined block (e.g. if a
+ *                                                counter set that a block
+ *                                                doesn't support is used).
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE:        Front End block (Job manager
+ *                                                or CSF HW).
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2:       Secondary Front End block (Job
+ *                                                manager or CSF HW).
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3:       Tertiary Front End block (Job
+ *                                                manager or CSF HW).
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:     Tiler block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:        Shader Core block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:       Secondary Shader Core block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3:       Tertiary Shader Core block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:    Memsys block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2:   Secondary Memsys block.
  */
 enum kbase_hwcnt_gpu_v5_block_type {
-	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM = 0x40,
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED,
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE,
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2,
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3,
 	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER,
 	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC,
 	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2,
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3,
 	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS,
 	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2,
 };
 
+/**
+ * enum kbase_hwcnt_set - GPU hardware counter sets
+ * @KBASE_HWCNT_SET_PRIMARY:   The Primary set of counters
+ * @KBASE_HWCNT_SET_SECONDARY: The Secondary set of counters
+ * @KBASE_HWCNT_SET_TERTIARY:  The Tertiary set of counters
+ */
+enum kbase_hwcnt_set {
+	KBASE_HWCNT_SET_PRIMARY,
+	KBASE_HWCNT_SET_SECONDARY,
+	KBASE_HWCNT_SET_TERTIARY,
+};
+
 /**
  * struct kbase_hwcnt_physical_enable_map - Representation of enable map
  *                                          directly used by GPU.
@@ -73,48 +105,77 @@ struct kbase_hwcnt_physical_enable_map {
 	u32 mmu_l2_bm;
 };
 
+/*
+ * Values for Hardware Counter SET_SELECT value.
+ * Directly passed to HW.
+ */
+enum kbase_hwcnt_physical_set {
+	KBASE_HWCNT_PHYSICAL_SET_PRIMARY = 0,
+	KBASE_HWCNT_PHYSICAL_SET_SECONDARY = 1,
+	KBASE_HWCNT_PHYSICAL_SET_TERTIARY = 2,
+};
+
 /**
- * struct kbase_hwcnt_gpu_v5_info - Information about hwcnt blocks on v5 GPUs.
+ * struct kbase_hwcnt_gpu_info - Information about hwcnt blocks on the GPUs.
  * @l2_count:   L2 cache count.
  * @core_mask:  Shader core mask. May be sparse.
  * @clk_cnt:    Number of clock domains available.
  */
-struct kbase_hwcnt_gpu_v5_info {
+struct kbase_hwcnt_gpu_info {
 	size_t l2_count;
 	u64 core_mask;
 	u8 clk_cnt;
 };
 
 /**
- * struct kbase_hwcnt_gpu_info - Tagged union with information about the current
- *                               GPU's hwcnt blocks.
- * @type: GPU type.
- * @v5:   Info filled in if a v5 GPU.
+ * struct kbase_hwcnt_curr_config - Current Configuration of HW allocated to the
+ *                                  GPU.
+ * @num_l2_slices:  Current number of L2 slices allocated to the GPU.
+ * @shader_present: Current shader present bitmap that is allocated to the GPU.
+ *
+ * For architectures with the max_config interface available from the Arbiter,
+ * the current resources allocated may change during runtime due to a
+ * re-partitioning (possible with partition manager). Thus, the HWC needs to be
+ * prepared to report any possible set of counters. For this reason the memory
+ * layout in the userspace is based on the maximum possible allocation. On the
+ * other hand, each partition has just the view of its currently allocated
+ * resources. Therefore, it is necessary to correctly map the dumped HWC values
+ * from the registers into this maximum memory layout so that it can be exposed
+ * to the userspace side correctly.
+ *
+ * For L2 cache just the number is enough once the allocated ones will be
+ * accumulated on the first L2 slots available in the destination buffer.
+ *
+ * For the correct mapping of the shader cores it is necessary to jump all the
+ * L2 cache slots in the destination buffer that are not allocated. But, it is
+ * not necessary to add any logic to map the shader cores bitmap into the memory
+ * layout because the shader_present allocated will always be a subset of the
+ * maximum shader_present. It is possible because:
+ * 1 - Partitions are made of slices and they are always ordered from the ones
+ *     with more shader cores to the ones with less.
+ * 2 - The shader cores in a slice are always contiguous.
+ * 3 - A partition can only have a contiguous set of slices allocated to it.
+ * So, for example, if 4 slices are available in total, 1 with 4 cores, 2 with
+ * 3 cores and 1 with 2 cores. The maximum possible shader_present would be:
+ * 0x0011|0111|0111|1111 -> note the order and that the shader cores are
+ *                          contiguous in any slice.
+ * Supposing that a partition takes the two slices in the middle, the current
+ * config shader_present for this partition would be:
+ * 0x0111|0111 -> note that this is a subset of the maximum above and the slices
+ *                are contiguous.
+ * Therefore, by directly copying any subset of the maximum possible
+ * shader_present the mapping is already achieved.
  */
-struct kbase_hwcnt_gpu_info {
-	enum kbase_hwcnt_gpu_group_type type;
-	struct kbase_hwcnt_gpu_v5_info v5;
+struct kbase_hwcnt_curr_config {
+	size_t num_l2_slices;
+	u64 shader_present;
 };
 
 /**
- * kbase_hwcnt_gpu_info_init() - Initialise an info structure used to create the
- *                               hwcnt metadata.
- * @kbdev: Non-NULL pointer to kbase device.
- * @info:  Non-NULL pointer to data structure to be filled in.
- *
- * The initialised info struct will only be valid for use while kbdev is valid.
- */
-int kbase_hwcnt_gpu_info_init(
-	struct kbase_device *kbdev,
-	struct kbase_hwcnt_gpu_info *info);
-
-/**
- * kbase_hwcnt_gpu_metadata_create() - Create hardware counter metadata for the
- *                                     current GPU.
- * @info:           Non-NULL pointer to info struct initialised by
- *                  kbase_hwcnt_gpu_info_init.
- * @use_secondary:  True if secondary performance counters should be used, else
- *                  false. Ignored if secondary counters are not supported.
+ * kbase_hwcnt_jm_metadata_create() - Create hardware counter metadata for the
+ *                                    JM GPUs.
+ * @info:           Non-NULL pointer to info struct.
+ * @counter_set:    The performance counter set used.
  * @out_metadata:   Non-NULL pointer to where created metadata is stored on
  *                  success.
  * @out_dump_bytes: Non-NULL pointer to where the size of the GPU counter dump
@@ -122,44 +183,91 @@ int kbase_hwcnt_gpu_info_init(
  *
  * Return: 0 on success, else error code.
  */
-int kbase_hwcnt_gpu_metadata_create(
+int kbase_hwcnt_jm_metadata_create(
 	const struct kbase_hwcnt_gpu_info *info,
-	bool use_secondary,
+	enum kbase_hwcnt_set counter_set,
 	const struct kbase_hwcnt_metadata **out_metadata,
 	size_t *out_dump_bytes);
 
 /**
- * kbase_hwcnt_gpu_metadata_destroy() - Destroy GPU hardware counter metadata.
+ * kbase_hwcnt_jm_metadata_destroy() - Destroy JM GPU hardware counter metadata.
+ *
  * @metadata: Pointer to metadata to destroy.
  */
-void kbase_hwcnt_gpu_metadata_destroy(
+void kbase_hwcnt_jm_metadata_destroy(
 	const struct kbase_hwcnt_metadata *metadata);
 
 /**
- * kbase_hwcnt_gpu_dump_get() - Copy or accumulate enabled counters from the raw
+ * kbase_hwcnt_csf_metadata_create() - Create hardware counter metadata for the
+ *                                     CSF GPUs.
+ * @info:           Non-NULL pointer to info struct.
+ * @counter_set:    The performance counter set used.
+ * @out_metadata:   Non-NULL pointer to where created metadata is stored on
+ *                  success.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_csf_metadata_create(
+	const struct kbase_hwcnt_gpu_info *info,
+	enum kbase_hwcnt_set counter_set,
+	const struct kbase_hwcnt_metadata **out_metadata);
+
+/**
+ * kbase_hwcnt_csf_metadata_destroy() - Destroy CSF GPU hardware counter
+ *                                      metadata.
+ * @metadata: Pointer to metadata to destroy.
+ */
+void kbase_hwcnt_csf_metadata_destroy(
+	const struct kbase_hwcnt_metadata *metadata);
+
+/**
+ * kbase_hwcnt_jm_dump_get() - Copy or accumulate enabled counters from the raw
+ *                             dump buffer in src into the dump buffer
+ *                             abstraction in dst.
+ * @dst:            Non-NULL pointer to dst dump buffer.
+ * @src:            Non-NULL pointer to src raw dump buffer, of same length
+ *                  as returned in out_dump_bytes parameter of
+ *                  kbase_hwcnt_jm_metadata_create.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ * @pm_core_mask:   PM state synchronized shaders core mask with the dump.
+ * @curr_config:    Current allocated hardware resources to correctly map the
+ *                  src raw dump buffer to the dst dump buffer.
+ * @accumulate:     True if counters in src should be accumulated into dst,
+ *                  rather than copied.
+ *
+ * The dst and dst_enable_map MUST have been created from the same metadata as
+ * returned from the call to kbase_hwcnt_jm_metadata_create as was used to get
+ * the length of src.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src,
+			    const struct kbase_hwcnt_enable_map *dst_enable_map,
+			    const u64 pm_core_mask,
+			    const struct kbase_hwcnt_curr_config *curr_config,
+			    bool accumulate);
+
+/**
+ * kbase_hwcnt_csf_dump_get() - Copy or accumulate enabled counters from the raw
  *                              dump buffer in src into the dump buffer
  *                              abstraction in dst.
  * @dst:            Non-NULL pointer to dst dump buffer.
  * @src:            Non-NULL pointer to src raw dump buffer, of same length
  *                  as returned in out_dump_bytes parameter of
- *                  kbase_hwcnt_gpu_metadata_create.
+ *                  kbase_hwcnt_csf_metadata_create.
  * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
- * @pm_core_mask:   PM state synchronized shaders core mask with the dump.
  * @accumulate:     True if counters in src should be accumulated into dst,
  *                  rather than copied.
  *
  * The dst and dst_enable_map MUST have been created from the same metadata as
- * returned from the call to kbase_hwcnt_gpu_metadata_create as was used to get
+ * returned from the call to kbase_hwcnt_csf_metadata_create as was used to get
  * the length of src.
  *
  * Return: 0 on success, else error code.
  */
-int kbase_hwcnt_gpu_dump_get(
-	struct kbase_hwcnt_dump_buffer *dst,
-	void *src,
-	const struct kbase_hwcnt_enable_map *dst_enable_map,
-	const u64 pm_core_mask,
-	bool accumulate);
+int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src,
+			     const struct kbase_hwcnt_enable_map *dst_enable_map,
+			     bool accumulate);
 
 /**
  * kbase_hwcnt_gpu_enable_map_to_physical() - Convert an enable map abstraction
@@ -168,7 +276,7 @@ int kbase_hwcnt_gpu_dump_get(
  * @src: Non-NULL pointer to src enable map abstraction.
  *
  * The src must have been created from a metadata returned from a call to
- * kbase_hwcnt_gpu_metadata_create.
+ * kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create.
  *
  * This is a lossy conversion, as the enable map abstraction has one bit per
  * individual counter block value, but the physical enable map uses 1 bit for
@@ -178,6 +286,16 @@ void kbase_hwcnt_gpu_enable_map_to_physical(
 	struct kbase_hwcnt_physical_enable_map *dst,
 	const struct kbase_hwcnt_enable_map *src);
 
+/**
+ * kbase_hwcnt_gpu_set_to_physical() - Map counter set selection to physical
+ *                                     SET_SELECT value.
+ *
+ * @dst: Non-NULL pointer to dst physical SET_SELECT value.
+ * @src: Non-NULL pointer to src counter set selection.
+ */
+void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst,
+				     enum kbase_hwcnt_set src);
+
 /**
  * kbase_hwcnt_gpu_enable_map_from_physical() - Convert a physical enable map to
  *                                              an enable map abstraction.
@@ -185,7 +303,7 @@ void kbase_hwcnt_gpu_enable_map_to_physical(
  * @src: Non-NULL pointer to src physical enable map.
  *
  * The dst must have been created from a metadata returned from a call to
- * kbase_hwcnt_gpu_metadata_create.
+ * kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create.
  *
  * This is a lossy conversion, as the physical enable map can technically
  * support counter blocks with 128 counters each, but no hardware actually uses
@@ -204,7 +322,7 @@ void kbase_hwcnt_gpu_enable_map_from_physical(
  * @enable_map: Non-NULL pointer to enable map.
  *
  * The buf and enable_map must have been created from a metadata returned from
- * a call to kbase_hwcnt_gpu_metadata_create.
+ * a call to kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create.
  *
  * This function should be used before handing off a dump buffer over the
  * kernel-user boundary, to ensure the header is accurate for the enable map
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_legacy.c b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_legacy.c
index 794ef39e365c..45cd9fb12835 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_legacy.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_legacy.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,15 +17,13 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include "mali_kbase_hwcnt_legacy.h"
 #include "mali_kbase_hwcnt_virtualizer.h"
 #include "mali_kbase_hwcnt_types.h"
 #include "mali_kbase_hwcnt_gpu.h"
-#include "mali_kbase_ioctl.h"
+#include <uapi/gpu/arm/bifrost/mali_kbase_ioctl.h>
 
 #include <linux/slab.h>
 #include <linux/uaccess.h>
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_legacy.h b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_legacy.h
index 7a610ae378a2..deaf7eb39236 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_legacy.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_legacy.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /**
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_types.c b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_types.c
index 2b9fe02acd75..931fbf3f2051 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_types.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_types.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,11 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include "mali_kbase_hwcnt_types.h"
-#include "mali_kbase.h"
+
+#include <linux/slab.h>
 
 /* Minimum alignment of each block of hardware counters */
 #define KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT \
@@ -175,13 +175,11 @@ int kbase_hwcnt_metadata_create(
 	*out_metadata = metadata;
 	return 0;
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_metadata_create);
 
 void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata)
 {
 	kfree(metadata);
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_metadata_destroy);
 
 int kbase_hwcnt_enable_map_alloc(
 	const struct kbase_hwcnt_metadata *metadata,
@@ -205,7 +203,6 @@ int kbase_hwcnt_enable_map_alloc(
 	enable_map->hwcnt_enable_map = enable_map_buf;
 	return 0;
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_enable_map_alloc);
 
 void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map)
 {
@@ -216,7 +213,6 @@ void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map)
 	enable_map->hwcnt_enable_map = NULL;
 	enable_map->metadata = NULL;
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_enable_map_free);
 
 int kbase_hwcnt_dump_buffer_alloc(
 	const struct kbase_hwcnt_metadata *metadata,
@@ -243,7 +239,6 @@ int kbase_hwcnt_dump_buffer_alloc(
 
 	return 0;
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_alloc);
 
 void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf)
 {
@@ -253,7 +248,6 @@ void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf)
 	kfree(dump_buf->dump_buf);
 	memset(dump_buf, 0, sizeof(*dump_buf));
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_free);
 
 int kbase_hwcnt_dump_buffer_array_alloc(
 	const struct kbase_hwcnt_metadata *metadata,
@@ -309,7 +303,6 @@ int kbase_hwcnt_dump_buffer_array_alloc(
 
 	return 0;
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_array_alloc);
 
 void kbase_hwcnt_dump_buffer_array_free(
 	struct kbase_hwcnt_dump_buffer_array *dump_bufs)
@@ -321,7 +314,6 @@ void kbase_hwcnt_dump_buffer_array_free(
 	free_pages(dump_bufs->page_addr, dump_bufs->page_order);
 	memset(dump_bufs, 0, sizeof(*dump_bufs));
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_array_free);
 
 void kbase_hwcnt_dump_buffer_zero(
 	struct kbase_hwcnt_dump_buffer *dst,
@@ -356,7 +348,6 @@ void kbase_hwcnt_dump_buffer_zero(
 	memset(dst->clk_cnt_buf, 0,
 		sizeof(*dst->clk_cnt_buf) * metadata->clk_cnt);
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero);
 
 void kbase_hwcnt_dump_buffer_zero_strict(
 	struct kbase_hwcnt_dump_buffer *dst)
@@ -369,7 +360,6 @@ void kbase_hwcnt_dump_buffer_zero_strict(
 	memset(dst->clk_cnt_buf, 0,
 		sizeof(*dst->clk_cnt_buf) * dst->metadata->clk_cnt);
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero_strict);
 
 void kbase_hwcnt_dump_buffer_zero_non_enabled(
 	struct kbase_hwcnt_dump_buffer *dst,
@@ -409,7 +399,6 @@ void kbase_hwcnt_dump_buffer_zero_non_enabled(
 		}
 	}
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero_non_enabled);
 
 void kbase_hwcnt_dump_buffer_copy(
 	struct kbase_hwcnt_dump_buffer *dst,
@@ -455,7 +444,6 @@ void kbase_hwcnt_dump_buffer_copy(
 			dst->clk_cnt_buf[clk] = src->clk_cnt_buf[clk];
 	}
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_copy);
 
 void kbase_hwcnt_dump_buffer_copy_strict(
 	struct kbase_hwcnt_dump_buffer *dst,
@@ -502,7 +490,6 @@ void kbase_hwcnt_dump_buffer_copy_strict(
 		dst->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0;
 	}
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_copy_strict);
 
 void kbase_hwcnt_dump_buffer_accumulate(
 	struct kbase_hwcnt_dump_buffer *dst,
@@ -552,7 +539,6 @@ void kbase_hwcnt_dump_buffer_accumulate(
 			dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk];
 	}
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_accumulate);
 
 void kbase_hwcnt_dump_buffer_accumulate_strict(
 	struct kbase_hwcnt_dump_buffer *dst,
@@ -601,4 +587,3 @@ void kbase_hwcnt_dump_buffer_accumulate_strict(
 			dst->clk_cnt_buf[clk] = 0;
 	}
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_accumulate_strict);
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_types.h b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_types.h
index 3394b1271cc8..e775393b57dc 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_types.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_types.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /**
@@ -85,7 +84,6 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/types.h>
-#include "mali_malisw.h"
 
 /* Number of bytes in each bitfield */
 #define KBASE_HWCNT_BITFIELD_BYTES (sizeof(u64))
@@ -1115,10 +1113,10 @@ static inline void kbase_hwcnt_dump_buffer_block_accumulate_strict(
 }
 
 /**
- * @brief Iterate over each clock domain in the metadata.
+ * Iterate over each clock domain in the metadata.
  *
- * @param[in] md          Non-NULL pointer to metadata.
- * @param[in] clk         size_t variable used as clock iterator.
+ * @md:          Non-NULL pointer to metadata.
+ * @clk:         size_t variable used as clock iterator.
  */
 #define kbase_hwcnt_metadata_for_each_clock(md, clk)    \
 	for ((clk) = 0; (clk) < (md)->clk_cnt; (clk)++)
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_virtualizer.c b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_virtualizer.c
index 917e47cda0f9..4bb84890fb67 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_virtualizer.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_virtualizer.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,17 +17,12 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include "mali_kbase_hwcnt_virtualizer.h"
 #include "mali_kbase_hwcnt_accumulator.h"
 #include "mali_kbase_hwcnt_context.h"
 #include "mali_kbase_hwcnt_types.h"
-#include "mali_malisw.h"
-#include "mali_kbase_debug.h"
-#include "mali_kbase_linux.h"
 
 #include <linux/mutex.h>
 #include <linux/slab.h>
@@ -87,7 +83,6 @@ const struct kbase_hwcnt_metadata *kbase_hwcnt_virtualizer_metadata(
 
 	return hvirt->metadata;
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_metadata);
 
 /**
  * kbasep_hwcnt_virtualizer_client_free - Free a virtualizer client's memory.
@@ -496,7 +491,6 @@ int kbase_hwcnt_virtualizer_client_set_counters(
 
 	return errcode;
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_set_counters);
 
 /**
  * kbasep_hwcnt_virtualizer_client_dump - Perform a dump of the client's
@@ -686,7 +680,6 @@ int kbase_hwcnt_virtualizer_client_dump(
 
 	return errcode;
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_dump);
 
 int kbase_hwcnt_virtualizer_client_create(
 	struct kbase_hwcnt_virtualizer *hvirt,
@@ -719,7 +712,6 @@ int kbase_hwcnt_virtualizer_client_create(
 	*out_hvcli = hvcli;
 	return 0;
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_create);
 
 void kbase_hwcnt_virtualizer_client_destroy(
 	struct kbase_hwcnt_virtualizer_client *hvcli)
@@ -735,7 +727,6 @@ void kbase_hwcnt_virtualizer_client_destroy(
 
 	kbasep_hwcnt_virtualizer_client_free(hvcli);
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_destroy);
 
 int kbase_hwcnt_virtualizer_init(
 	struct kbase_hwcnt_context *hctx,
@@ -766,7 +757,6 @@ int kbase_hwcnt_virtualizer_init(
 	*out_hvirt = virt;
 	return 0;
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_init);
 
 void kbase_hwcnt_virtualizer_term(
 	struct kbase_hwcnt_virtualizer *hvirt)
@@ -787,4 +777,12 @@ void kbase_hwcnt_virtualizer_term(
 
 	kfree(hvirt);
 }
-KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_term);
+
+bool kbase_hwcnt_virtualizer_queue_work(struct kbase_hwcnt_virtualizer *hvirt,
+					struct work_struct *work)
+{
+	if (WARN_ON(!hvirt) || WARN_ON(!work))
+		return false;
+
+	return kbase_hwcnt_context_queue_work(hvirt->hctx, work);
+}
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_virtualizer.h b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_virtualizer.h
index 8f628c3306fc..1bce6914f11a 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_virtualizer.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_virtualizer.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /**
@@ -31,6 +30,7 @@
 #define _KBASE_HWCNT_VIRTUALIZER_H_
 
 #include <linux/types.h>
+#include <linux/workqueue.h>
 
 struct kbase_hwcnt_context;
 struct kbase_hwcnt_virtualizer;
@@ -142,4 +142,19 @@ int kbase_hwcnt_virtualizer_client_dump(
 	u64 *ts_end_ns,
 	struct kbase_hwcnt_dump_buffer *dump_buf);
 
+/**
+ * kbase_hwcnt_virtualizer_queue_work() - Queue hardware counter related async
+ *                                        work on a workqueue specialized for
+ *                                        hardware counters.
+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer.
+ * @work:  Non-NULL pointer to work to queue.
+ *
+ * Return: false if work was already on a queue, true otherwise.
+ *
+ * This is a convenience function that directly calls the underlying
+ * kbase_hwcnt_context's kbase_hwcnt_context_queue_work.
+ */
+bool kbase_hwcnt_virtualizer_queue_work(struct kbase_hwcnt_virtualizer *hvirt,
+					struct work_struct *work);
+
 #endif /* _KBASE_HWCNT_VIRTUALIZER_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jd.c b/drivers/gpu/arm/bifrost/mali_kbase_jd.c
index d0674d1bd8f4..f680a5ee144b 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_jd.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_jd.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 #include <linux/dma-buf.h>
 #ifdef CONFIG_COMPAT
 #include <linux/compat.h>
@@ -30,6 +27,7 @@
 #include <linux/random.h>
 #include <linux/version.h>
 #include <linux/ratelimit.h>
+#include <linux/priority_control_manager.h>
 
 #include <mali_kbase_jm.h>
 #include <mali_kbase_kinstr_jm.h>
@@ -44,13 +42,9 @@
 
 #define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
-/* random32 was renamed to prandom_u32 in 3.8 */
-#define prandom_u32 random32
-#endif
-
 /* Return whether katom will run on the GPU or not. Currently only soft jobs and
- * dependency-only atoms do not run on the GPU */
+ * dependency-only atoms do not run on the GPU
+ */
 #define IS_GPU_ATOM(katom) (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) ||  \
 			((katom->core_req & BASE_JD_REQ_ATOM_TYPE) ==    \
 							BASE_JD_REQ_DEP)))
@@ -80,7 +74,7 @@ static void jd_mark_atom_complete(struct kbase_jd_atom *katom)
 {
 	katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
 	kbase_kinstr_jm_atom_complete(katom);
-	dev_dbg(katom->kctx->kbdev->dev, "Atom %p status to completed\n",
+	dev_dbg(katom->kctx->kbdev->dev, "Atom %pK status to completed\n",
 		(void *)katom);
 }
 
@@ -95,7 +89,7 @@ static bool jd_run_atom(struct kbase_jd_atom *katom)
 {
 	struct kbase_context *kctx = katom->kctx;
 
-	dev_dbg(kctx->kbdev->dev, "JD run atom %p in kctx %p\n",
+	dev_dbg(kctx->kbdev->dev, "JD run atom %pK in kctx %pK\n",
 		(void *)katom, (void *)kctx);
 
 	KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
@@ -105,23 +99,23 @@ static bool jd_run_atom(struct kbase_jd_atom *katom)
 		trace_sysgraph(SGR_SUBMIT, kctx->id,
 				kbase_jd_atom_id(katom->kctx, katom));
 		jd_mark_atom_complete(katom);
-		return 0;
+		return false;
 	} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
 		/* Soft-job */
 		if (katom->will_fail_event_code) {
 			kbase_finish_soft_job(katom);
 			jd_mark_atom_complete(katom);
-			return 0;
+			return false;
 		}
 		if (kbase_process_soft_job(katom) == 0) {
 			kbase_finish_soft_job(katom);
 			jd_mark_atom_complete(katom);
 		}
-		return 0;
+		return false;
 	}
 
 	katom->status = KBASE_JD_ATOM_STATE_IN_JS;
-	dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n", (void *)katom);
+	dev_dbg(kctx->kbdev->dev, "Atom %pK status to in JS\n", (void *)katom);
 	/* Queue an action about whether we should try scheduling a context */
 	return kbasep_js_add_job(kctx, katom);
 }
@@ -243,7 +237,8 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st
 
 	/* copy user buffer to the end of our real buffer.
 	 * Make sure the struct sizes haven't changed in a way
-	 * we don't support */
+	 * we don't support
+	 */
 	BUILD_BUG_ON(sizeof(*input_extres) > sizeof(*katom->extres));
 	input_extres = (struct base_external_resource *)
 			(((unsigned char *)katom->extres) +
@@ -259,13 +254,14 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st
 
 #ifdef CONFIG_MALI_BIFROST_DMA_FENCE
 	if (implicit_sync) {
-		info.resv_objs = kmalloc_array(katom->nr_extres,
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 0))
-					sizeof(struct reservation_object *),
+		info.resv_objs =
+			kmalloc_array(katom->nr_extres,
+#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
+				      sizeof(struct reservation_object *),
 #else
-					sizeof(struct dma_resv *),
+				      sizeof(struct dma_resv *),
 #endif
-					GFP_KERNEL);
+				      GFP_KERNEL);
 		if (!info.resv_objs) {
 			err_ret_val = -ENOMEM;
 			goto early_err_out;
@@ -319,7 +315,7 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st
 #ifdef CONFIG_MALI_BIFROST_DMA_FENCE
 		if (implicit_sync &&
 		    reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 0))
+#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
 			struct reservation_object *resv;
 #else
 			struct dma_resv *resv;
@@ -336,7 +332,7 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st
 		 * at least not before the first write) as we overwrite elements
 		 * as we loop and could be overwriting ourself, so no writes
 		 * until the last read for an element.
-		 * */
+		 */
 		katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */
 		katom->extres[res_no].alloc = alloc;
 	}
@@ -463,9 +459,6 @@ static inline void jd_resolve_dep(struct list_head *out_list,
 #endif /* CONFIG_MALI_BIFROST_DMA_FENCE */
 
 			if (dep_satisfied) {
-				trace_sysgraph(SGR_DEP_RES,
-				   dep_atom->kctx->id,
-				   kbase_jd_atom_id(katom->kctx, dep_atom));
 				dep_atom->in_jd_list = true;
 				list_add_tail(&dep_atom->jd_item, out_list);
 			}
@@ -489,7 +482,8 @@ static inline void jd_resolve_dep(struct list_head *out_list,
 static bool is_dep_valid(struct kbase_jd_atom *katom)
 {
 	/* If there's no dependency then this is 'valid' from the perspective of
-	 * early dependency submission */
+	 * early dependency submission
+	 */
 	if (!katom)
 		return true;
 
@@ -498,7 +492,8 @@ static bool is_dep_valid(struct kbase_jd_atom *katom)
 		return false;
 
 	/* If dependency has completed and has failed or will fail then it is
-	 * not valid */
+	 * not valid
+	 */
 	if (katom->status >= KBASE_JD_ATOM_STATE_HW_COMPLETED &&
 			(katom->event_code != BASE_JD_EVENT_DONE ||
 			katom->will_fail_event_code))
@@ -552,10 +547,6 @@ static void jd_try_submitting_deps(struct list_head *out_list,
 #endif /* CONFIG_MALI_BIFROST_DMA_FENCE */
 
 				if (dep0_valid && dep1_valid && dep_satisfied) {
-					trace_sysgraph(SGR_DEP_RES,
-					    dep_atom->kctx->id,
-					    kbase_jd_atom_id(dep_atom->kctx,
-					    dep_atom));
 					dep_atom->in_jd_list = true;
 					list_add(&dep_atom->jd_item, out_list);
 				}
@@ -640,8 +631,8 @@ static void jd_update_jit_usage(struct kbase_jd_atom *katom)
 			u64 addr_end;
 
 			if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) {
-				const unsigned long extent_bytes = reg->extent
-					<< PAGE_SHIFT;
+				const unsigned long extension_bytes =
+					reg->extension << PAGE_SHIFT;
 				const u64 low_ptr = ptr[LOW];
 				const u64 high_ptr = ptr[HIGH];
 
@@ -662,8 +653,8 @@ static void jd_update_jit_usage(struct kbase_jd_atom *katom)
 				 * this, but here to avoid future maintenance
 				 * hazards
 				 */
-				WARN_ON(!is_power_of_2(extent_bytes));
-				addr_end = ALIGN(read_val, extent_bytes);
+				WARN_ON(!is_power_of_2(extension_bytes));
+				addr_end = ALIGN(read_val, extension_bytes);
 			} else {
 				addr_end = read_val = READ_ONCE(*ptr);
 			}
@@ -735,7 +726,8 @@ bool jd_done_nolock(struct kbase_jd_atom *katom,
 #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 
 	/* This is needed in case an atom is failed due to being invalid, this
-	 * can happen *before* the jobs that the atom depends on have completed */
+	 * can happen *before* the jobs that the atom depends on have completed
+	 */
 	for (i = 0; i < 2; i++) {
 		if (kbase_jd_katom_dep_atom(&katom->dep[i])) {
 			list_del(&katom->dep_item[i]);
@@ -766,7 +758,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom,
 			list_del(runnable_jobs.next);
 			node->in_jd_list = false;
 
-			dev_dbg(kctx->kbdev->dev, "List node %p has status %d\n",
+			dev_dbg(kctx->kbdev->dev, "List node %pK has status %d\n",
 				node, node->status);
 
 			KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED);
@@ -793,7 +785,8 @@ bool jd_done_nolock(struct kbase_jd_atom *katom,
 					!node->will_fail_event_code) {
 				/* Node successfully submitted, try submitting
 				 * dependencies as they may now be representable
-				 * in JS */
+				 * in JS
+				 */
 				jd_try_submitting_deps(&runnable_jobs, node);
 			}
 		}
@@ -809,10 +802,14 @@ bool jd_done_nolock(struct kbase_jd_atom *katom,
 
 		/* Decrement and check the TOTAL number of jobs. This includes
 		 * those not tracked by the scheduler: 'not ready to run' and
-		 * 'dependency-only' jobs. */
+		 * 'dependency-only' jobs.
+		 */
 		if (--kctx->jctx.job_nr == 0)
-			wake_up(&kctx->jctx.zero_jobs_wait);	/* All events are safely queued now, and we can signal any waiter
-								 * that we've got no more jobs (so we can be safely terminated) */
+			/* All events are safely queued now, and we can signal
+			 * any waiter that we've got no more jobs (so we can be
+			 * safely terminated)
+			 */
+			wake_up(&kctx->jctx.zero_jobs_wait);
 	}
 
 	return need_to_try_schedule_context;
@@ -904,13 +901,14 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
 	unsigned long flags;
 	enum kbase_jd_atom_state status;
 
-	dev_dbg(kbdev->dev, "User did JD submit atom %p\n", (void *)katom);
+	dev_dbg(kbdev->dev, "User did JD submit atom %pK\n", (void *)katom);
 
 	/* Update the TOTAL number of jobs. This includes those not tracked by
-	 * the scheduler: 'not ready to run' and 'dependency-only' jobs. */
+	 * the scheduler: 'not ready to run' and 'dependency-only' jobs.
+	 */
 	jctx->job_nr++;
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+#if KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE
 	katom->start_timestamp.tv64 = 0;
 #else
 	katom->start_timestamp = 0;
@@ -978,12 +976,13 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
 				katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT;
 				katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
 				dev_dbg(kbdev->dev,
-					"Atom %p status to completed\n",
+					"Atom %pK status to completed\n",
 					(void *)katom);
 
 				/* Wrong dependency setup. Atom will be sent
 				 * back to user space. Do not record any
-				 * dependencies. */
+				 * dependencies.
+				 */
 				jd_trace_atom_submit(kctx, katom, NULL);
 
 				return jd_done_nolock(katom, NULL);
@@ -1020,7 +1019,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
 			/* Atom has completed, propagate the error code if any */
 			katom->event_code = dep_atom->event_code;
 			katom->status = KBASE_JD_ATOM_STATE_QUEUED;
-			dev_dbg(kbdev->dev, "Atom %p status to queued\n",
+			dev_dbg(kbdev->dev, "Atom %pK status to queued\n",
 				(void *)katom);
 
 			/* This atom will be sent back to user space.
@@ -1053,6 +1052,8 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
 
 			return jd_done_nolock(katom, NULL);
 		}
+
+		katom->will_fail_event_code = katom->event_code;
 	}
 
 	/* These must occur after the above loop to ensure that an atom
@@ -1061,13 +1062,16 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
 	 */
 	katom->event_code = BASE_JD_EVENT_DONE;
 	katom->status = KBASE_JD_ATOM_STATE_QUEUED;
-	dev_dbg(kbdev->dev, "Atom %p status to queued\n", (void *)katom);
+	dev_dbg(kbdev->dev, "Atom %pK status to queued\n", (void *)katom);
 
 	/* For invalid priority, be most lenient and choose the default */
 	sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio);
 	if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID)
 		sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT;
-	katom->sched_priority = sched_prio;
+
+	/* Cap the priority to jctx.max_priority */
+	katom->sched_priority = (sched_prio < kctx->jctx.max_priority) ?
+			kctx->jctx.max_priority : sched_prio;
 
 	/* Create a new atom. */
 	jd_trace_atom_submit(kctx, katom, &katom->sched_priority);
@@ -1195,7 +1199,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
 		bool need_to_try_schedule_context;
 
 		katom->status = KBASE_JD_ATOM_STATE_IN_JS;
-		dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n",
+		dev_dbg(kctx->kbdev->dev, "Atom %pK status to in JS\n",
 			(void *)katom);
 
 		need_to_try_schedule_context = kbasep_js_add_job(kctx, katom);
@@ -1266,7 +1270,7 @@ int kbase_jd_submit(struct kbase_context *kctx,
 
 		if (unlikely(jd_atom_is_v2)) {
 			if (copy_from_user(&user_atom.jc, user_addr, sizeof(struct base_jd_atom_v2)) != 0) {
-				dev_err(kbdev->dev,
+				dev_dbg(kbdev->dev,
 					"Invalid atom address %p passed to job_submit\n",
 					user_addr);
 				err = -EFAULT;
@@ -1277,7 +1281,7 @@ int kbase_jd_submit(struct kbase_context *kctx,
 			user_atom.seq_nr = 0;
 		} else {
 			if (copy_from_user(&user_atom, user_addr, stride) != 0) {
-				dev_err(kbdev->dev,
+				dev_dbg(kbdev->dev,
 					"Invalid atom address %p passed to job_submit\n",
 					user_addr);
 				err = -EFAULT;
@@ -1416,7 +1420,7 @@ void kbase_jd_done_worker(struct work_struct *data)
 	js_kctx_info = &kctx->jctx.sched_info;
 	js_devdata = &kbdev->js_data;
 
-	dev_dbg(kbdev->dev, "Enter atom %p done worker for kctx %p\n",
+	dev_dbg(kbdev->dev, "Enter atom %pK done worker for kctx %pK\n",
 		(void *)katom, (void *)kctx);
 
 	KBASE_KTRACE_ADD_JM(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0);
@@ -1440,7 +1444,7 @@ void kbase_jd_done_worker(struct work_struct *data)
 	if (katom->event_code == BASE_JD_EVENT_STOPPED) {
 		unsigned long flags;
 
-		dev_dbg(kbdev->dev, "Atom %p has been promoted to stopped\n",
+		dev_dbg(kbdev->dev, "Atom %pK has been promoted to stopped\n",
 			(void *)katom);
 		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
 		mutex_unlock(&js_devdata->queue_mutex);
@@ -1448,7 +1452,7 @@ void kbase_jd_done_worker(struct work_struct *data)
 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
 		katom->status = KBASE_JD_ATOM_STATE_IN_JS;
-		dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n",
+		dev_dbg(kctx->kbdev->dev, "Atom %pK status to in JS\n",
 			(void *)katom);
 		kbase_js_unpull(kctx, katom);
 
@@ -1533,7 +1537,9 @@ void kbase_jd_done_worker(struct work_struct *data)
 	mutex_unlock(&jctx->lock);
 
 	/* Job is now no longer running, so can now safely release the context
-	 * reference, and handle any actions that were logged against the atom's retained state */
+	 * reference, and handle any actions that were logged against the
+	 * atom's retained state
+	 */
 
 	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state);
 
@@ -1541,7 +1547,8 @@ void kbase_jd_done_worker(struct work_struct *data)
 
 	if (!atomic_dec_return(&kctx->work_count)) {
 		/* If worker now idle then post all events that jd_done_nolock()
-		 * has queued */
+		 * has queued
+		 */
 		mutex_lock(&jctx->lock);
 		while (!list_empty(&kctx->completed_jobs)) {
 			struct kbase_jd_atom *atom = list_entry(
@@ -1561,7 +1568,7 @@ void kbase_jd_done_worker(struct work_struct *data)
 
 	KBASE_KTRACE_ADD_JM(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0);
 
-	dev_dbg(kbdev->dev, "Leave atom %p done worker for kctx %p\n",
+	dev_dbg(kbdev->dev, "Leave atom %pK done worker for kctx %pK\n",
 		(void *)katom, (void *)kctx);
 }
 
@@ -1616,7 +1623,8 @@ static void jd_cancel_worker(struct work_struct *data)
 	need_to_try_schedule_context = jd_done_nolock(katom, NULL);
 	/* Because we're zapping, we're not adding any more jobs to this ctx, so no need to
 	 * schedule the context. There's also no need for the jsctx_mutex to have been taken
-	 * around this too. */
+	 * around this too.
+	 */
 	KBASE_DEBUG_ASSERT(!need_to_try_schedule_context);
 
 	/* katom may have been freed now, do not use! */
@@ -1685,12 +1693,12 @@ void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
 {
 	struct kbase_context *kctx;
 
-	KBASE_DEBUG_ASSERT(NULL != kbdev);
-	KBASE_DEBUG_ASSERT(NULL != katom);
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
 	kctx = katom->kctx;
-	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
 
-	dev_dbg(kbdev->dev, "JD: cancelling atom %p\n", (void *)katom);
+	dev_dbg(kbdev->dev, "JD: cancelling atom %pK\n", (void *)katom);
 	KBASE_KTRACE_ADD_JM(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0);
 
 	/* This should only be done from a context that is not scheduled */
@@ -1759,12 +1767,15 @@ int kbase_jd_init(struct kbase_context *kctx)
 {
 	int i;
 	int mali_err = 0;
+	struct priority_control_manager_device *pcm_device = NULL;
 
 	KBASE_DEBUG_ASSERT(kctx);
+	pcm_device = kctx->kbdev->pcm_dev;
+	kctx->jctx.max_priority = KBASE_JS_ATOM_SCHED_PRIO_REALTIME;
 
 	kctx->jctx.job_done_wq = alloc_workqueue("mali_jd",
 			WQ_HIGHPRI | WQ_UNBOUND, 1);
-	if (NULL == kctx->jctx.job_done_wq) {
+	if (kctx->jctx.job_done_wq == NULL) {
 		mali_err = -ENOMEM;
 		goto out1;
 	}
@@ -1800,6 +1811,11 @@ int kbase_jd_init(struct kbase_context *kctx)
 	INIT_LIST_HEAD(&kctx->completed_jobs);
 	atomic_set(&kctx->work_count, 0);
 
+	/* Check if there are platform rules for maximum priority */
+	if (pcm_device)
+		kctx->jctx.max_priority = pcm_device->ops.pcm_scheduler_priority_check(
+				pcm_device, current, KBASE_JS_ATOM_SCHED_PRIO_REALTIME);
+
 	return 0;
 
  out1:
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jd_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_jd_debugfs.c
index 6b0c36d6b93f..2fa140cbf64a 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_jd_debugfs.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_jd_debugfs.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifdef CONFIG_DEBUG_FS
@@ -29,7 +28,7 @@
 #if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
 #include <mali_kbase_sync.h>
 #endif
-#include <mali_kbase_ioctl.h>
+#include <uapi/gpu/arm/bifrost/mali_kbase_ioctl.h>
 
 struct kbase_jd_debugfs_depinfo {
 	u8 id;
@@ -47,13 +46,13 @@ static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom,
 	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
 		res = kbase_sync_fence_out_info_get(atom, &info);
 		if (res == 0)
-			seq_printf(sfile, "Sa([%p]%d) ",
+			seq_printf(sfile, "Sa([%pK]%d) ",
 				   info.fence, info.status);
 		break;
 	case BASE_JD_REQ_SOFT_FENCE_WAIT:
 		res = kbase_sync_fence_in_info_get(atom, &info);
 		if (res == 0)
-			seq_printf(sfile, "Wa([%p]%d) ",
+			seq_printf(sfile, "Wa([%pK]%d) ",
 				   info.fence, info.status);
 		break;
 	default:
@@ -66,42 +65,40 @@ static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom,
 		struct kbase_fence_cb *cb;
 
 		if (atom->dma_fence.fence) {
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 			struct fence *fence = atom->dma_fence.fence;
 #else
 			struct dma_fence *fence = atom->dma_fence.fence;
 #endif
 
 			seq_printf(sfile,
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
-					"Sd(%u#%u: %s) ",
+#if (KERNEL_VERSION(4, 8, 0) > LINUX_VERSION_CODE)
+				   "Sd(%u#%u: %s) ",
 #else
-					"Sd(%llu#%u: %s) ",
+				   "Sd(%llu#%u: %s) ",
 #endif
-					fence->context,
-					fence->seqno,
-					dma_fence_is_signaled(fence) ?
-						"signaled" : "active");
+				   fence->context, fence->seqno,
+				   dma_fence_is_signaled(fence) ? "signaled" :
+								  "active");
 		}
 
 		list_for_each_entry(cb, &atom->dma_fence.callbacks,
 				    node) {
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 			struct fence *fence = cb->fence;
 #else
 			struct dma_fence *fence = cb->fence;
 #endif
 
 			seq_printf(sfile,
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
-					"Wd(%u#%u: %s) ",
+#if (KERNEL_VERSION(4, 8, 0) > LINUX_VERSION_CODE)
+				   "Wd(%u#%u: %s) ",
 #else
-					"Wd(%llu#%u: %s) ",
+				   "Wd(%llu#%u: %s) ",
 #endif
-					fence->context,
-					fence->seqno,
-					dma_fence_is_signaled(fence) ?
-						"signaled" : "active");
+				   fence->context, fence->seqno,
+				   dma_fence_is_signaled(fence) ? "signaled" :
+								  "active");
 		}
 	}
 #endif /* CONFIG_MALI_BIFROST_DMA_FENCE */
@@ -180,7 +177,8 @@ static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data)
 
 		/* start_timestamp is cleared as soon as the atom leaves UNUSED state
 		 * and set before a job is submitted to the h/w, a non-zero value means
-		 * it is valid */
+		 * it is valid
+		 */
 		if (ktime_to_ns(atom->start_timestamp))
 			start_timestamp = ktime_to_ns(
 					ktime_sub(ktime_get(), atom->start_timestamp));
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jd_debugfs.h b/drivers/gpu/arm/bifrost/mali_kbase_jd_debugfs.h
index 697bdef4d434..f183a9fee70e 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_jd_debugfs.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_jd_debugfs.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018, 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,13 +17,10 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /**
- * @file mali_kbase_jd_debugfs.h
- * Header file for job dispatcher-related entries in debugfs
+ * DOC: Header file for job dispatcher-related entries in debugfs
  */
 
 #ifndef _KBASE_JD_DEBUGFS_H
@@ -38,7 +36,7 @@ struct kbase_context;
 /**
  * kbasep_jd_debugfs_ctx_init() - Add debugfs entries for JD system
  *
- * @kctx Pointer to kbase_context
+ * @kctx: Pointer to kbase_context
  */
 void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx);
 
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jm.c b/drivers/gpu/arm/bifrost/mali_kbase_jm.c
index fb15a8c1727a..73e9905ab036 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_jm.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_jm.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,11 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
 /*
  * HW access job manager common APIs
  */
@@ -47,7 +45,7 @@ static bool kbase_jm_next_job(struct kbase_device *kbdev, int js,
 
 	kctx = kbdev->hwaccess.active_kctx[js];
 	dev_dbg(kbdev->dev,
-		"Trying to run the next %d jobs in kctx %p (s:%d)\n",
+		"Trying to run the next %d jobs in kctx %pK (s:%d)\n",
 		nr_jobs_to_submit, (void *)kctx, js);
 
 	if (!kctx)
@@ -110,7 +108,6 @@ void kbase_jm_try_kick_all(struct kbase_device *kbdev)
 		up(&js_devdata->schedule_sem);
 	}
 }
-#endif /* !MALI_USE_CSF */
 
 void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
 {
@@ -120,20 +117,19 @@ void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
 
 	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
 		if (kbdev->hwaccess.active_kctx[js] == kctx) {
-			dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n",
+			dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%d)\n",
 					(void *)kctx, js);
 			kbdev->hwaccess.active_kctx[js] = NULL;
 		}
 	}
 }
 
-#if !MALI_USE_CSF
 struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
 				struct kbase_jd_atom *katom)
 {
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
-	dev_dbg(kbdev->dev, "Atom %p is returning with event code 0x%x\n",
+	dev_dbg(kbdev->dev, "Atom %pK is returning with event code 0x%x\n",
 		(void *)katom, katom->event_code);
 
 	if (katom->event_code != BASE_JD_EVENT_STOPPED &&
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jm.h b/drivers/gpu/arm/bifrost/mali_kbase_jm.h
index b3fd421a1ff3..47202b9e9bd3 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_jm.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_jm.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2014, 2016, 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2014, 2016, 2019-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,11 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
 /*
  * Job manager common APIs
  */
@@ -76,6 +74,7 @@ void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask);
 void kbase_jm_try_kick_all(struct kbase_device *kbdev);
 #endif /* !MALI_USE_CSF */
 
+#if !MALI_USE_CSF
 /**
  * kbase_jm_idle_ctx() - Mark a context as idle.
  * @kbdev:	Device pointer
@@ -91,7 +90,6 @@ void kbase_jm_try_kick_all(struct kbase_device *kbdev);
  */
 void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
 
-#if !MALI_USE_CSF
 /**
  * kbase_jm_return_atom_to_js() - Return an atom to the job scheduler that has
  *				  been soft-stopped or will fail due to a
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js.c b/drivers/gpu/arm/bifrost/mali_kbase_js.c
index 9b338eb66531..c16469d0a1d9 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_js.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_js.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /*
  * Job Scheduler Implementation
  */
@@ -37,6 +34,7 @@
 
 #include "mali_kbase_jm.h"
 #include "mali_kbase_hwaccess_jm.h"
+#include <linux/priority_control_manager.h>
 
 /*
  * Private types
@@ -45,26 +43,30 @@
 /* Bitpattern indicating the result of releasing a context */
 enum {
 	/* The context was descheduled - caller should try scheduling in a new
-	 * one to keep the runpool full */
+	 * one to keep the runpool full
+	 */
 	KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED = (1u << 0),
 	/* Ctx attributes were changed - caller should try scheduling all
-	 * contexts */
+	 * contexts
+	 */
 	KBASEP_JS_RELEASE_RESULT_SCHED_ALL = (1u << 1)
 };
 
 typedef u32 kbasep_js_release_result;
 
 const int kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS] = {
-	KBASE_JS_ATOM_SCHED_PRIO_MED, /* BASE_JD_PRIO_MEDIUM */
-	KBASE_JS_ATOM_SCHED_PRIO_HIGH, /* BASE_JD_PRIO_HIGH */
-	KBASE_JS_ATOM_SCHED_PRIO_LOW  /* BASE_JD_PRIO_LOW */
+	KBASE_JS_ATOM_SCHED_PRIO_MED,      /* BASE_JD_PRIO_MEDIUM */
+	KBASE_JS_ATOM_SCHED_PRIO_HIGH,     /* BASE_JD_PRIO_HIGH */
+	KBASE_JS_ATOM_SCHED_PRIO_LOW,      /* BASE_JD_PRIO_LOW */
+	KBASE_JS_ATOM_SCHED_PRIO_REALTIME  /* BASE_JD_PRIO_REALTIME */
 };
 
 const base_jd_prio
 kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT] = {
-	BASE_JD_PRIO_HIGH,   /* KBASE_JS_ATOM_SCHED_PRIO_HIGH */
-	BASE_JD_PRIO_MEDIUM, /* KBASE_JS_ATOM_SCHED_PRIO_MED */
-	BASE_JD_PRIO_LOW     /* KBASE_JS_ATOM_SCHED_PRIO_LOW */
+	BASE_JD_PRIO_REALTIME,   /* KBASE_JS_ATOM_SCHED_PRIO_REALTIME */
+	BASE_JD_PRIO_HIGH,       /* KBASE_JS_ATOM_SCHED_PRIO_HIGH */
+	BASE_JD_PRIO_MEDIUM,     /* KBASE_JS_ATOM_SCHED_PRIO_MED */
+	BASE_JD_PRIO_LOW         /* KBASE_JS_ATOM_SCHED_PRIO_LOW */
 };
 
 
@@ -160,7 +162,7 @@ jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio)
 	none_to_pull = RB_EMPTY_ROOT(&rb->runnable_tree);
 
 	dev_dbg(kctx->kbdev->dev,
-		"Slot %d (prio %d) is %spullable in kctx %p\n",
+		"Slot %d (prio %d) is %spullable in kctx %pK\n",
 		js, prio, none_to_pull ? "not " : "", kctx);
 
 	return none_to_pull;
@@ -184,7 +186,7 @@ jsctx_rb_none_to_pull(struct kbase_context *kctx, int js)
 
 	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
 
-	for (prio = KBASE_JS_ATOM_SCHED_PRIO_HIGH;
+	for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST;
 		prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
 		if (!jsctx_rb_none_to_pull_prio(kctx, js, prio))
 			return false;
@@ -234,7 +236,7 @@ jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio,
 			WARN_ON(!(entry->core_req &
 				BASE_JD_REQ_END_RENDERPASS));
 			dev_dbg(kctx->kbdev->dev,
-				"Del runnable atom %p from X_DEP list\n",
+				"Del runnable atom %pK from X_DEP list\n",
 				(void *)entry);
 
 			list_del(&entry->queue);
@@ -250,7 +252,7 @@ jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio,
 		WARN_ON(!(entry->atom_flags &
 			KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST));
 		dev_dbg(kctx->kbdev->dev,
-			"Del blocked atom %p from X_DEP list\n",
+			"Del blocked atom %pK from X_DEP list\n",
 			(void *)entry);
 
 		list_del(queue->x_dep_head.next);
@@ -277,7 +279,7 @@ jsctx_queue_foreach(struct kbase_context *kctx, int js,
 {
 	int prio;
 
-	for (prio = KBASE_JS_ATOM_SCHED_PRIO_HIGH;
+	for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST;
 		prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++)
 		jsctx_queue_foreach_prio(kctx, js, prio, callback);
 }
@@ -301,7 +303,7 @@ jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio)
 
 	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
 	dev_dbg(kctx->kbdev->dev,
-		"Peeking runnable tree of kctx %p for prio %d (s:%d)\n",
+		"Peeking runnable tree of kctx %pK for prio %d (s:%d)\n",
 		(void *)kctx, prio, js);
 
 	node = rb_first(&rb->runnable_tree);
@@ -319,7 +321,7 @@ jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio)
  * @js:   Job slot id to check.
  *
  * Check the ring buffers for all priorities, starting from
- * KBASE_JS_ATOM_SCHED_PRIO_HIGH, for the specified @js and @prio and return a
+ * KBASE_JS_ATOM_SCHED_PRIO_REALTIME, for the specified @js and @prio and return a
  * pointer to the next atom, unless all the priority's ring buffers are empty.
  *
  * Caller must hold the hwaccess_lock.
@@ -333,7 +335,7 @@ jsctx_rb_peek(struct kbase_context *kctx, int js)
 
 	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
 
-	for (prio = KBASE_JS_ATOM_SCHED_PRIO_HIGH;
+	for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST;
 		prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
 		struct kbase_jd_atom *katom;
 
@@ -363,7 +365,7 @@ jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
 
 	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
 
-	dev_dbg(kctx->kbdev->dev, "Erasing atom %p from runnable tree of kctx %p\n",
+	dev_dbg(kctx->kbdev->dev, "Erasing atom %pK from runnable tree of kctx %pK\n",
 		(void *)katom, (void *)kctx);
 
 	/* Atoms must be pulled in the correct order. */
@@ -385,7 +387,7 @@ jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom)
 
 	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
 
-	dev_dbg(kbdev->dev, "Adding atom %p to runnable tree of kctx %p (s:%d)\n",
+	dev_dbg(kbdev->dev, "Adding atom %pK to runnable tree of kctx %pK (s:%d)\n",
 		(void *)katom, (void *)kctx, js);
 
 	while (*new) {
@@ -448,7 +450,8 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev)
 
 #ifdef CONFIG_MALI_BIFROST_DEBUG
 	/* Soft-stop will be disabled on a single context by default unless
-	 * softstop_always is set */
+	 * softstop_always is set
+	 */
 	jsdd->softstop_always = false;
 #endif				/* CONFIG_MALI_BIFROST_DEBUG */
 	jsdd->nr_all_contexts_running = 0;
@@ -531,14 +534,15 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev)
 			kbdev->gpu_props.props.raw_props.js_features[i]);
 
 	/* On error, we could continue on: providing none of the below resources
-	 * rely on the ones above */
+	 * rely on the ones above
+	 */
 
 	mutex_init(&jsdd->runpool_mutex);
 	mutex_init(&jsdd->queue_mutex);
 	sema_init(&jsdd->schedule_sem, 1);
 
 	for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) {
-		for (j = 0; j < KBASE_JS_ATOM_SCHED_PRIO_COUNT; ++j) {
+		for (j = KBASE_JS_ATOM_SCHED_PRIO_FIRST; j < KBASE_JS_ATOM_SCHED_PRIO_COUNT; ++j) {
 			INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i][j]);
 			INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i][j]);
 		}
@@ -595,16 +599,18 @@ int kbasep_js_kctx_init(struct kbase_context *const kctx)
 			sizeof(js_kctx_info->ctx.ctx_attr_ref_count));
 
 	/* Initially, the context is disabled from submission until the create
-	 * flags are set */
+	 * flags are set
+	 */
 	kbase_ctx_flag_set(kctx, KCTX_SUBMIT_DISABLED);
 
 	/* On error, we could continue on: providing none of the below resources
-	 * rely on the ones above */
+	 * rely on the ones above
+	 */
 	mutex_init(&js_kctx_info->ctx.jsctx_mutex);
 
 	init_waitqueue_head(&js_kctx_info->ctx.is_scheduled_wait);
 
-	for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) {
+	for (i = KBASE_JS_ATOM_SCHED_PRIO_FIRST; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) {
 		for (j = 0; j < BASE_JM_MAX_NR_SLOTS; j++) {
 			INIT_LIST_HEAD(&kctx->jsctx_queue[i][j].x_dep_head);
 			kctx->jsctx_queue[i][j].runnable_tree = RB_ROOT;
@@ -678,7 +684,7 @@ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
 	bool ret = false;
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
-	dev_dbg(kbdev->dev, "Add pullable tail kctx %p (s:%d)\n",
+	dev_dbg(kbdev->dev, "Add pullable tail kctx %pK (s:%d)\n",
 		(void *)kctx, js);
 
 	if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
@@ -720,7 +726,7 @@ static bool kbase_js_ctx_list_add_pullable_head_nolock(
 	bool ret = false;
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
-	dev_dbg(kbdev->dev, "Add pullable head kctx %p (s:%d)\n",
+	dev_dbg(kbdev->dev, "Add pullable head kctx %pK (s:%d)\n",
 		(void *)kctx, js);
 
 	if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
@@ -796,7 +802,7 @@ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
 	bool ret = false;
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
-	dev_dbg(kbdev->dev, "Add unpullable tail kctx %p (s:%d)\n",
+	dev_dbg(kbdev->dev, "Add unpullable tail kctx %pK (s:%d)\n",
 		(void *)kctx, js);
 
 	list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
@@ -879,7 +885,7 @@ static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
-	for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) {
+	for (i = KBASE_JS_ATOM_SCHED_PRIO_FIRST; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) {
 		if (list_empty(&kbdev->js_data.ctx_list_pullable[js][i]))
 			continue;
 
@@ -889,7 +895,7 @@ static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(
 
 		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
 		dev_dbg(kbdev->dev,
-			"Popped %p from the pullable queue (s:%d)\n",
+			"Popped %pK from the pullable queue (s:%d)\n",
 			(void *)kctx, js);
 		return kctx;
 	}
@@ -943,25 +949,25 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
 
 	if (is_scheduled) {
 		if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) {
-			dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %p\n",
+			dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %pK\n",
 				(void *)kctx);
 			return false;
 		}
 	}
 	katom = jsctx_rb_peek(kctx, js);
 	if (!katom) {
-		dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %p (s:%d)\n",
+		dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%d)\n",
 			(void *)kctx, js);
 		return false; /* No pullable atoms */
 	}
 	if (kctx->blocked_js[js][katom->sched_priority]) {
 		dev_dbg(kbdev->dev,
-			"JS: kctx %p is blocked from submitting atoms at priority %d (s:%d)\n",
+			"JS: kctx %pK is blocked from submitting atoms at priority %d (s:%d)\n",
 			(void *)kctx, katom->sched_priority, js);
 		return false;
 	}
 	if (atomic_read(&katom->blocked)) {
-		dev_dbg(kbdev->dev, "JS: Atom %p is blocked in js_ctx_pullable\n",
+		dev_dbg(kbdev->dev, "JS: Atom %pK is blocked in js_ctx_pullable\n",
 			(void *)katom);
 		return false; /* next atom blocked */
 	}
@@ -970,20 +976,20 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
 				KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
 				katom->x_pre_dep->will_fail_event_code) {
 			dev_dbg(kbdev->dev,
-				"JS: X pre-dep %p is not present in slot FIFO or will fail\n",
+				"JS: X pre-dep %pK is not present in slot FIFO or will fail\n",
 				(void *)katom->x_pre_dep);
 			return false;
 		}
 		if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
 			kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) {
 			dev_dbg(kbdev->dev,
-				"JS: Atom %p has cross-slot fail dependency and atoms on slot (s:%d)\n",
+				"JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%d)\n",
 				(void *)katom, js);
 			return false;
 		}
 	}
 
-	dev_dbg(kbdev->dev, "JS: Atom %p is pullable in kctx %p (s:%d)\n",
+	dev_dbg(kbdev->dev, "JS: Atom %pK is pullable in kctx %pK (s:%d)\n",
 		(void *)katom, (void *)kctx, js);
 
 	return true;
@@ -1007,7 +1013,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
 			int dep_prio = dep_atom->sched_priority;
 
 			dev_dbg(kbdev->dev,
-				"Checking dep %d of atom %p (s:%d) on %p (s:%d)\n",
+				"Checking dep %d of atom %pK (s:%d) on %pK (s:%d)\n",
 				i, (void *)katom, js, (void *)dep_atom, dep_js);
 
 			/* Dependent atom must already have been submitted */
@@ -1020,7 +1026,8 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
 			}
 
 			/* Dependencies with different priorities can't
-			  be represented in the ringbuffer */
+			 * be represented in the ringbuffer
+			 */
 			if (prio != dep_prio) {
 				dev_dbg(kbdev->dev,
 					"Different atom priorities\n");
@@ -1030,7 +1037,8 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
 
 			if (js == dep_js) {
 				/* Only one same-slot dependency can be
-				 * represented in the ringbuffer */
+				 * represented in the ringbuffer
+				 */
 				if (has_dep) {
 					dev_dbg(kbdev->dev,
 						"Too many same-slot deps\n");
@@ -1038,7 +1046,8 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
 					break;
 				}
 				/* Each dependee atom can only have one
-				 * same-slot dependency */
+				 * same-slot dependency
+				 */
 				if (dep_atom->post_dep) {
 					dev_dbg(kbdev->dev,
 						"Too many same-slot successors\n");
@@ -1048,7 +1057,8 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
 				has_dep = true;
 			} else {
 				/* Only one cross-slot dependency can be
-				 * represented in the ringbuffer */
+				 * represented in the ringbuffer
+				 */
 				if (has_x_dep) {
 					dev_dbg(kbdev->dev,
 						"Too many cross-slot deps\n");
@@ -1056,7 +1066,8 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
 					break;
 				}
 				/* Each dependee atom can only have one
-				 * cross-slot dependency */
+				 * cross-slot dependency
+				 */
 				if (dep_atom->x_post_dep) {
 					dev_dbg(kbdev->dev,
 						"Too many cross-slot successors\n");
@@ -1064,7 +1075,8 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
 					break;
 				}
 				/* The dependee atom can not already be in the
-				 * HW access ringbuffer */
+				 * HW access ringbuffer
+				 */
 				if (dep_atom->gpu_rb_state !=
 					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
 					dev_dbg(kbdev->dev,
@@ -1074,7 +1086,8 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
 					break;
 				}
 				/* The dependee atom can not already have
-				 * completed */
+				 * completed
+				 */
 				if (dep_atom->status !=
 						KBASE_JD_ATOM_STATE_IN_JS) {
 					dev_dbg(kbdev->dev,
@@ -1092,7 +1105,8 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
 	}
 
 	/* If dependencies can be represented by ringbuffer then clear them from
-	 * atom structure */
+	 * atom structure
+	 */
 	if (ret) {
 		for (i = 0; i < 2; i++) {
 			struct kbase_jd_atom *dep_atom = katom->dep[i].atom;
@@ -1101,7 +1115,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
 				int dep_js = kbase_js_get_slot(kbdev, dep_atom);
 
 				dev_dbg(kbdev->dev,
-					"Clearing dep %d of atom %p (s:%d) on %p (s:%d)\n",
+					"Clearing dep %d of atom %pK (s:%d) on %pK (s:%d)\n",
 					i, (void *)katom, js, (void *)dep_atom,
 					dep_js);
 
@@ -1116,7 +1130,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
 					katom->atom_flags |=
 						KBASE_KATOM_FLAG_X_DEP_BLOCKED;
 
-					dev_dbg(kbdev->dev, "Set X_DEP flag on atom %p\n",
+					dev_dbg(kbdev->dev, "Set X_DEP flag on atom %pK\n",
 						(void *)katom);
 
 					katom->x_pre_dep = dep_atom;
@@ -1140,7 +1154,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
 		}
 	} else {
 		dev_dbg(kbdev->dev,
-			"Deps of atom %p (s:%d) could not be represented\n",
+			"Deps of atom %pK (s:%d) could not be represented\n",
 			(void *)katom, js);
 	}
 
@@ -1181,7 +1195,7 @@ void kbase_js_update_ctx_priority(struct kbase_context *kctx)
 		/* Determine the new priority for context, as per the priority
 		 * of currently in-use atoms.
 		 */
-		for (prio = KBASE_JS_ATOM_SCHED_PRIO_HIGH;
+		for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST;
 			prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
 			if (kctx->atoms_count[prio]) {
 				new_priority = prio;
@@ -1192,6 +1206,7 @@ void kbase_js_update_ctx_priority(struct kbase_context *kctx)
 
 	kbase_js_set_ctx_priority(kctx, new_priority);
 }
+KBASE_EXPORT_TEST_API(kbase_js_update_ctx_priority);
 
 /**
  * js_add_start_rp() - Add an atom that starts a renderpass to the job scheduler
@@ -1222,7 +1237,7 @@ static int js_add_start_rp(struct kbase_jd_atom *const start_katom)
 	if (rp->state != KBASE_JD_RP_COMPLETE)
 		return -EINVAL;
 
-	dev_dbg(kctx->kbdev->dev, "JS add start atom %p of RP %d\n",
+	dev_dbg(kctx->kbdev->dev, "JS add start atom %pK of RP %d\n",
 		(void *)start_katom, start_katom->renderpass_id);
 
 	/* The following members are read when updating the job slot
@@ -1265,7 +1280,7 @@ static int js_add_end_rp(struct kbase_jd_atom *const end_katom)
 
 	rp = &kctx->jctx.renderpasses[end_katom->renderpass_id];
 
-	dev_dbg(kbdev->dev, "JS add end atom %p in state %d of RP %d\n",
+	dev_dbg(kbdev->dev, "JS add end atom %pK in state %d of RP %d\n",
 		(void *)end_katom, (int)rp->state, end_katom->renderpass_id);
 
 	if (rp->state == KBASE_JD_RP_COMPLETE)
@@ -1332,7 +1347,7 @@ bool kbasep_js_add_job(struct kbase_context *kctx,
 	/* Refcount ctx.nr_jobs */
 	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX);
 	++(js_kctx_info->ctx.nr_jobs);
-	dev_dbg(kbdev->dev, "Add atom %p to kctx %p; now %d in ctx\n",
+	dev_dbg(kbdev->dev, "Add atom %pK to kctx %pK; now %d in ctx\n",
 		(void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs);
 
 	/* Lock for state available during IRQ */
@@ -1345,13 +1360,14 @@ bool kbasep_js_add_job(struct kbase_context *kctx,
 		/* Dependencies could not be represented */
 		--(js_kctx_info->ctx.nr_jobs);
 		dev_dbg(kbdev->dev,
-			"Remove atom %p from kctx %p; now %d in ctx\n",
+			"Remove atom %pK from kctx %pK; now %d in ctx\n",
 			(void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs);
 
 		/* Setting atom status back to queued as it still has unresolved
-		 * dependencies */
+		 * dependencies
+		 */
 		atom->status = KBASE_JD_ATOM_STATE_QUEUED;
-		dev_dbg(kbdev->dev, "Atom %p status to queued\n", (void *)atom);
+		dev_dbg(kbdev->dev, "Atom %pK status to queued\n", (void *)atom);
 
 		/* Undo the count, as the atom will get added again later but
 		 * leave the context priority adjusted or boosted, in case if
@@ -1389,7 +1405,8 @@ bool kbasep_js_add_job(struct kbase_context *kctx,
 					kbdev, kctx, atom->slot_nr);
 	}
 	/* If this context is active and the atom is the first on its slot,
-	 * kick the job manager to attempt to fast-start the atom */
+	 * kick the job manager to attempt to fast-start the atom
+	 */
 	if (enqueue_required && kctx ==
 			kbdev->hwaccess.active_kctx[atom->slot_nr])
 		kbase_jm_try_kick(kbdev, 1 << atom->slot_nr);
@@ -1404,22 +1421,25 @@ bool kbasep_js_add_job(struct kbase_context *kctx,
 		if (kbase_ctx_flag(kctx, KCTX_DYING)) {
 			/* A job got added while/after kbase_job_zap_context()
 			 * was called on a non-scheduled context. Kill that job
-			 * by killing the context. */
+			 * by killing the context.
+			 */
 			kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx,
 					false);
 		} else if (js_kctx_info->ctx.nr_jobs == 1) {
 			/* Handle Refcount going from 0 to 1: schedule the
-			 * context on the Queue */
+			 * context on the Queue
+			 */
 			KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
-			dev_dbg(kbdev->dev, "JS: Enqueue Context %p", kctx);
+			dev_dbg(kbdev->dev, "JS: Enqueue Context %pK", kctx);
 
-			/* Queue was updated - caller must try to
-			 * schedule the head context */
+			/* Queue was updated - caller must try to schedule the
+			 * head context
+			 */
 			WARN_ON(!enqueue_required);
 		}
 	}
 out_unlock:
-	dev_dbg(kbdev->dev, "Enqueue of kctx %p is %srequired\n",
+	dev_dbg(kbdev->dev, "Enqueue of kctx %pK is %srequired\n",
 		kctx, enqueue_required ? "" : "not ");
 
 	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
@@ -1448,7 +1468,7 @@ void kbasep_js_remove_job(struct kbase_device *kbdev,
 	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0);
 	--(js_kctx_info->ctx.nr_jobs);
 	dev_dbg(kbdev->dev,
-		"Remove atom %p from kctx %p; now %d in ctx\n",
+		"Remove atom %pK from kctx %pK; now %d in ctx\n",
 		(void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs);
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@@ -1478,7 +1498,8 @@ bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
 	 *
 	 * This is because it returns false for soft-stopped atoms, but we
 	 * want to override that, because we're cancelling an atom regardless of
-	 * whether it was soft-stopped or not */
+	 * whether it was soft-stopped or not
+	 */
 	attr_state_changed = kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx,
 			&katom_retained_state);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
@@ -1525,7 +1546,8 @@ static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release(
 
 	if (js_devdata->nr_user_contexts_running != 0 && runpool_ctx_attr_change) {
 		/* A change in runpool ctx attributes might mean we can
-		 * run more jobs than before  */
+		 * run more jobs than before
+		 */
 		result = KBASEP_JS_RELEASE_RESULT_SCHED_ALL;
 
 		KBASE_KTRACE_ADD_JM_SLOT(kbdev, JD_DONE_TRY_RUN_NEXT_JOB,
@@ -1624,7 +1646,8 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
 
 	/* Make a set of checks to see if the context should be scheduled out.
 	 * Note that there'll always be at least 1 reference to the context
-	 * which was previously acquired by kbasep_js_schedule_ctx(). */
+	 * which was previously acquired by kbasep_js_schedule_ctx().
+	 */
 	if (new_ref_count == 1 &&
 		(!kbasep_js_is_submit_allowed(js_devdata, kctx) ||
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
@@ -1635,8 +1658,9 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
 		int slot;
 
 		/* Last reference, and we've been told to remove this context
-		 * from the Run Pool */
-		dev_dbg(kbdev->dev, "JS: RunPool Remove Context %p because refcount=%d, jobs=%d, allowed=%d",
+		 * from the Run Pool
+		 */
+		dev_dbg(kbdev->dev, "JS: RunPool Remove Context %pK because refcount=%d, jobs=%d, allowed=%d",
 				kctx, new_ref_count, js_kctx_info->ctx.nr_jobs,
 				kbasep_js_is_submit_allowed(js_devdata, kctx));
 
@@ -1646,7 +1670,7 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
 
 		for (slot = 0; slot < num_slots; slot++) {
 			if (kbdev->hwaccess.active_kctx[slot] == kctx) {
-				dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n",
+				dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%d)\n",
 					(void *)kctx, slot);
 				kbdev->hwaccess.active_kctx[slot] = NULL;
 			}
@@ -1662,7 +1686,8 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
 			kbasep_js_ctx_attr_runpool_release_ctx(kbdev, kctx);
 
 		/* Releasing the context and katom retained state can allow
-		 * more jobs to run */
+		 * more jobs to run
+		 */
 		release_result |=
 			kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev,
 						kctx, katom_retained_state,
@@ -1702,7 +1727,8 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
 		kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED);
 		/* Signal any waiter that the context is not scheduled, so is
 		 * safe for termination - once the jsctx_mutex is also dropped,
-		 * and jobs have finished. */
+		 * and jobs have finished.
+		 */
 		wake_up(&js_kctx_info->ctx.is_scheduled_wait);
 
 		/* Queue an action to occur after we've dropped the lock */
@@ -1744,9 +1770,10 @@ void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev,
 
 	if (kbase_ctx_flag(kctx, KCTX_DYING)) {
 		/* Dying: don't requeue, but kill all jobs on the context. This
-		 * happens asynchronously */
+		 * happens asynchronously
+		 */
 		dev_dbg(kbdev->dev,
-			"JS: ** Killing Context %p on RunPool Remove **", kctx);
+			"JS: ** Killing Context %pK on RunPool Remove **", kctx);
 		kbase_js_foreach_ctx_job(kctx, &kbase_jd_cancel);
 	}
 }
@@ -1798,7 +1825,8 @@ void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev,
 }
 
 /* Variant of kbasep_js_runpool_release_ctx() that doesn't call into
- * kbase_js_sched_all() */
+ * kbase_js_sched_all()
+ */
 static void kbasep_js_runpool_release_ctx_no_schedule(
 		struct kbase_device *kbdev, struct kbase_context *kctx)
 {
@@ -1851,7 +1879,7 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
 	bool kctx_suspended = false;
 	int as_nr;
 
-	dev_dbg(kbdev->dev, "Scheduling kctx %p (s:%d)\n", kctx, js);
+	dev_dbg(kbdev->dev, "Scheduling kctx %pK (s:%d)\n", kctx, js);
 
 	js_devdata = &kbdev->js_data;
 	js_kctx_info = &kctx->jctx.sched_info;
@@ -1867,7 +1895,8 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
 				kbdev, kctx);
 		if (as_nr != KBASEP_AS_NR_INVALID) {
 			/* Attempt to retain the context again, this should
-			 * succeed */
+			 * succeed
+			 */
 			mutex_lock(&kbdev->mmu_hw_mutex);
 			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 			as_nr = kbase_ctx_sched_retain_ctx(kctx);
@@ -1926,7 +1955,8 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
 	KBASE_TLSTREAM_TL_RET_AS_CTX(kbdev, &kbdev->as[kctx->as_nr], kctx);
 
 	/* Cause any future waiter-on-termination to wait until the context is
-	 * descheduled */
+	 * descheduled
+	 */
 	wake_up(&js_kctx_info->ctx.is_scheduled_wait);
 
 	/* Re-check for suspending: a suspend could've occurred, and all the
@@ -1939,7 +1969,8 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
 	 * was taken (i.e. this condition doesn't execute), then the
 	 * kbasep_js_suspend() code will cleanup this context instead (by virtue
 	 * of it being called strictly after the suspend flag is set, and will
-	 * wait for this lock to drop) */
+	 * wait for this lock to drop)
+	 */
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
 	if (kbase_pm_is_suspending(kbdev) || kbase_pm_is_gpu_lost(kbdev)) {
 #else
@@ -1967,13 +1998,15 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
 	mutex_unlock(&js_devdata->runpool_mutex);
 	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
 	/* Note: after this point, the context could potentially get scheduled
-	 * out immediately */
+	 * out immediately
+	 */
 
 	if (kctx_suspended) {
 		/* Finishing forcing out the context due to a suspend. Use a
 		 * variant of kbasep_js_runpool_release_ctx() that doesn't
 		 * schedule a new context, to prevent a risk of recursion back
-		 * into this function */
+		 * into this function
+		 */
 		kbasep_js_runpool_release_ctx_no_schedule(kbdev, kctx);
 		return false;
 	}
@@ -1992,7 +2025,7 @@ static bool kbase_js_use_ctx(struct kbase_device *kbdev,
 			kbase_backend_use_ctx_sched(kbdev, kctx, js)) {
 
 		dev_dbg(kbdev->dev,
-			"kctx %p already has ASID - mark as active (s:%d)\n",
+			"kctx %pK already has ASID - mark as active (s:%d)\n",
 			(void *)kctx, js);
 
 		if (kbdev->hwaccess.active_kctx[js] != kctx) {
@@ -2059,7 +2092,8 @@ void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev,
 			kbase_js_sync_timers(kbdev);
 
 		/* Fast-starting requires the jsctx_mutex to be dropped,
-		 * because it works on multiple ctxs */
+		 * because it works on multiple ctxs
+		 */
 		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
 		mutex_unlock(&js_devdata->queue_mutex);
 
@@ -2071,7 +2105,8 @@ void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev,
 			   kbase_ctx_flag(kctx, KCTX_SCHEDULED));
 	} else {
 		/* Already scheduled in - We need to retain it to keep the
-		 * corresponding address space */
+		 * corresponding address space
+		 */
 		WARN_ON(!kbase_ctx_sched_inc_refcount(kctx));
 		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
 		mutex_unlock(&js_devdata->queue_mutex);
@@ -2116,7 +2151,8 @@ void kbasep_js_suspend(struct kbase_device *kbdev)
 	js_devdata->runpool_irq.submit_allowed = 0;
 
 	/* Retain each of the contexts, so we can cause it to leave even if it
-	 * had no refcount to begin with */
+	 * had no refcount to begin with
+	 */
 	for (i = BASE_MAX_NR_AS - 1; i >= 0; --i) {
 		struct kbase_context *kctx = kbdev->as_to_kctx[i];
 
@@ -2137,7 +2173,8 @@ void kbasep_js_suspend(struct kbase_device *kbdev)
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	/* De-ref the previous retain to ensure each context gets pulled out
-	 * sometime later. */
+	 * sometime later.
+	 */
 	for (i = 0;
 		 i < BASE_MAX_NR_AS;
 		 ++i, retained = retained >> 1) {
@@ -2148,7 +2185,8 @@ void kbasep_js_suspend(struct kbase_device *kbdev)
 	}
 
 	/* Caller must wait for all Power Manager active references to be
-	 * dropped */
+	 * dropped
+	 */
 }
 
 void kbasep_js_resume(struct kbase_device *kbdev)
@@ -2162,7 +2200,7 @@ void kbasep_js_resume(struct kbase_device *kbdev)
 
 	mutex_lock(&js_devdata->queue_mutex);
 	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
-		for (prio = KBASE_JS_ATOM_SCHED_PRIO_HIGH;
+		for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST;
 			prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
 			struct kbase_context *kctx, *n;
 			unsigned long flags;
@@ -2283,7 +2321,8 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
 	lockdep_assert_held(&kctx->jctx.lock);
 
 	/* If slot will transition from unpullable to pullable then add to
-	 * pullable list */
+	 * pullable list
+	 */
 	if (jsctx_rb_none_to_pull(kctx, katom->slot_nr)) {
 		enqueue_required = true;
 	} else {
@@ -2297,7 +2336,7 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
 		int js = katom->slot_nr;
 		struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
 
-		dev_dbg(kctx->kbdev->dev, "Add atom %p to X_DEP list (s:%d)\n",
+		dev_dbg(kctx->kbdev->dev, "Add atom %pK to X_DEP list (s:%d)\n",
 			(void *)katom, js);
 
 		list_add_tail(&katom->queue, &queue->x_dep_head);
@@ -2307,7 +2346,7 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
 			add_required = false;
 		}
 	} else {
-		dev_dbg(kctx->kbdev->dev, "Atom %p not added to X_DEP list\n",
+		dev_dbg(kctx->kbdev->dev, "Atom %pK not added to X_DEP list\n",
 			(void *)katom);
 	}
 
@@ -2321,7 +2360,7 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
 	}
 
 	dev_dbg(kctx->kbdev->dev,
-		"Enqueue of kctx %p is %srequired to submit atom %p\n",
+		"Enqueue of kctx %pK is %srequired to submit atom %pK\n",
 		kctx, enqueue_required ? "" : "not ", katom);
 
 	return enqueue_required;
@@ -2348,7 +2387,7 @@ static void kbase_js_move_to_tree(struct kbase_jd_atom *katom)
 
 		if (!kbase_js_atom_blocked_on_x_dep(katom)) {
 			dev_dbg(kctx->kbdev->dev,
-				"Del atom %p from X_DEP list in js_move_to_tree\n",
+				"Del atom %pK from X_DEP list in js_move_to_tree\n",
 				(void *)katom);
 
 			list_del(&katom->queue);
@@ -2366,7 +2405,7 @@ static void kbase_js_move_to_tree(struct kbase_jd_atom *katom)
 			}
 		} else {
 			dev_dbg(kctx->kbdev->dev,
-				"Atom %p blocked on x-dep in js_move_to_tree\n",
+				"Atom %pK blocked on x-dep in js_move_to_tree\n",
 				(void *)katom);
 			break;
 		}
@@ -2409,10 +2448,8 @@ static void kbase_js_evict_deps(struct kbase_context *kctx,
 				KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) {
 		/* Remove dependency.*/
 		x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
-		trace_sysgraph(SGR_DEP_RES, kctx->id,
-				kbase_jd_atom_id(kctx, x_dep));
 
-		dev_dbg(kctx->kbdev->dev, "Cleared X_DEP flag on atom %p\n",
+		dev_dbg(kctx->kbdev->dev, "Cleared X_DEP flag on atom %pK\n",
 			(void *)x_dep);
 
 		/* Fail if it had a data dependency. */
@@ -2434,14 +2471,14 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
 	KBASE_DEBUG_ASSERT(kctx);
 
 	kbdev = kctx->kbdev;
-	dev_dbg(kbdev->dev, "JS: pulling an atom from kctx %p (s:%d)\n",
+	dev_dbg(kbdev->dev, "JS: pulling an atom from kctx %pK (s:%d)\n",
 		(void *)kctx, js);
 
 	js_devdata = &kbdev->js_data;
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
 	if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) {
-		dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %p\n",
+		dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %pK\n",
 			(void *)kctx);
 		return NULL;
 	}
@@ -2454,25 +2491,26 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
 
 	katom = jsctx_rb_peek(kctx, js);
 	if (!katom) {
-		dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %p (s:%d)\n",
+		dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%d)\n",
 			(void *)kctx, js);
 		return NULL;
 	}
 	if (kctx->blocked_js[js][katom->sched_priority]) {
 		dev_dbg(kbdev->dev,
-			"JS: kctx %p is blocked from submitting atoms at priority %d (s:%d)\n",
+			"JS: kctx %pK is blocked from submitting atoms at priority %d (s:%d)\n",
 			(void *)kctx, katom->sched_priority, js);
 		return NULL;
 	}
 	if (atomic_read(&katom->blocked)) {
-		dev_dbg(kbdev->dev, "JS: Atom %p is blocked in js_pull\n",
+		dev_dbg(kbdev->dev, "JS: Atom %pK is blocked in js_pull\n",
 			(void *)katom);
 		return NULL;
 	}
 
 	/* Due to ordering restrictions when unpulling atoms on failure, we do
 	 * not allow multiple runs of fail-dep atoms from the same context to be
-	 * present on the same slot */
+	 * present on the same slot
+	 */
 	if (katom->pre_dep && atomic_read(&kctx->atoms_pulled_slot[js])) {
 		struct kbase_jd_atom *prev_atom =
 				kbase_backend_inspect_tail(kbdev, js);
@@ -2486,14 +2524,14 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
 				KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
 				katom->x_pre_dep->will_fail_event_code)	{
 			dev_dbg(kbdev->dev,
-				"JS: X pre-dep %p is not present in slot FIFO or will fail\n",
+				"JS: X pre-dep %pK is not present in slot FIFO or will fail\n",
 				(void *)katom->x_pre_dep);
 			return NULL;
 		}
 		if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
 				kbase_backend_nr_atoms_on_slot(kbdev, js)) {
 			dev_dbg(kbdev->dev,
-				"JS: Atom %p has cross-slot fail dependency and atoms on slot (s:%d)\n",
+				"JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%d)\n",
 				(void *)katom, js);
 			return NULL;
 		}
@@ -2518,7 +2556,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
 
 	katom->ticks = 0;
 
-	dev_dbg(kbdev->dev, "JS: successfully pulled atom %p from kctx %p (s:%d)\n",
+	dev_dbg(kbdev->dev, "JS: successfully pulled atom %pK from kctx %pK (s:%d)\n",
 		(void *)katom, (void *)kctx, js);
 
 	return katom;
@@ -2561,7 +2599,7 @@ static void js_return_of_start_rp(struct kbase_jd_atom *const start_katom)
 		return;
 
 	dev_dbg(kctx->kbdev->dev,
-		"JS return start atom %p in state %d of RP %d\n",
+		"JS return start atom %pK in state %d of RP %d\n",
 		(void *)start_katom, (int)rp->state,
 		start_katom->renderpass_id);
 
@@ -2589,7 +2627,7 @@ static void js_return_of_start_rp(struct kbase_jd_atom *const start_katom)
 	/* Prevent the tiler job being pulled for execution in the
 	 * job scheduler again.
 	 */
-	dev_dbg(kbdev->dev, "Blocking start atom %p\n",
+	dev_dbg(kbdev->dev, "Blocking start atom %pK\n",
 		(void *)start_katom);
 	atomic_inc(&start_katom->blocked);
 
@@ -2601,14 +2639,14 @@ static void js_return_of_start_rp(struct kbase_jd_atom *const start_katom)
 	/* Was the fragment job chain submitted to kbase yet? */
 	end_katom = rp->end_katom;
 	if (end_katom) {
-		dev_dbg(kctx->kbdev->dev, "JS return add end atom %p\n",
+		dev_dbg(kctx->kbdev->dev, "JS return add end atom %pK\n",
 			(void *)end_katom);
 
 		if (rp->state == KBASE_JD_RP_RETRY_OOM) {
 			/* Allow the end of the renderpass to be pulled for
 			 * execution again to continue incremental rendering.
 			 */
-			dev_dbg(kbdev->dev, "Unblocking end atom %p\n",
+			dev_dbg(kbdev->dev, "Unblocking end atom %pK\n",
 				(void *)end_katom);
 			atomic_dec(&end_katom->blocked);
 			WARN_ON(!(end_katom->atom_flags &
@@ -2670,7 +2708,7 @@ static void js_return_of_end_rp(struct kbase_jd_atom *const end_katom)
 		return;
 
 	dev_dbg(kctx->kbdev->dev,
-		"JS return end atom %p in state %d of RP %d\n",
+		"JS return end atom %pK in state %d of RP %d\n",
 		(void *)end_katom, (int)rp->state, end_katom->renderpass_id);
 
 	if (WARN_ON(rp->state != KBASE_JD_RP_OOM &&
@@ -2692,14 +2730,14 @@ static void js_return_of_end_rp(struct kbase_jd_atom *const end_katom)
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 		dev_dbg(kbdev->dev,
-			"Reset backing to %zu pages for region %p\n",
+			"Reset backing to %zu pages for region %pK\n",
 			reg->threshold_pages, (void *)reg);
 
 		if (!WARN_ON(reg->flags & KBASE_REG_VA_FREED))
 			kbase_mem_shrink(kctx, reg, reg->threshold_pages);
 
 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-		dev_dbg(kbdev->dev, "Deleting region %p from list\n",
+		dev_dbg(kbdev->dev, "Deleting region %pK from list\n",
 			(void *)reg);
 		list_del_init(&reg->link);
 		kbase_va_region_alloc_put(kctx, reg);
@@ -2717,7 +2755,7 @@ static void js_return_of_end_rp(struct kbase_jd_atom *const end_katom)
 	 */
 	start_katom = rp->start_katom;
 	if (!WARN_ON(!start_katom)) {
-		dev_dbg(kbdev->dev, "Unblocking start atom %p\n",
+		dev_dbg(kbdev->dev, "Unblocking start atom %pK\n",
 			(void *)start_katom);
 		atomic_dec(&start_katom->blocked);
 		(void)kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx,
@@ -2743,7 +2781,7 @@ static void js_return_worker(struct work_struct *data)
 	unsigned long flags;
 	base_jd_core_req core_req = katom->core_req;
 
-	dev_dbg(kbdev->dev, "%s for atom %p with event code 0x%x\n",
+	dev_dbg(kbdev->dev, "%s for atom %pK with event code 0x%x\n",
 		__func__, (void *)katom, katom->event_code);
 
 	if (katom->event_code != BASE_JD_EVENT_END_RP_DONE)
@@ -2771,13 +2809,15 @@ static void js_return_worker(struct work_struct *data)
 		timer_sync |= kbase_js_ctx_list_remove_nolock(kbdev, kctx, js);
 
 	/* If this slot has been blocked due to soft-stopped atoms, and all
-	 * atoms have now been processed, then unblock the slot */
+	 * atoms have now been processed, then unblock the slot
+	 */
 	if (!kctx->atoms_pulled_slot_pri[js][prio] &&
 			kctx->blocked_js[js][prio]) {
 		kctx->blocked_js[js][prio] = false;
 
 		/* Only mark the slot as pullable if the context is not idle -
-		 * that case is handled below */
+		 * that case is handled below
+		 */
 		if (atomic_read(&kctx->atoms_pulled) &&
 				kbase_js_ctx_pullable(kctx, js, true))
 			timer_sync |= kbase_js_ctx_list_add_pullable_nolock(
@@ -2786,12 +2826,12 @@ static void js_return_worker(struct work_struct *data)
 
 	if (!atomic_read(&kctx->atoms_pulled)) {
 		dev_dbg(kbdev->dev,
-			"No atoms currently pulled from context %p\n",
+			"No atoms currently pulled from context %pK\n",
 			(void *)kctx);
 
 		if (!kctx->slots_pullable) {
 			dev_dbg(kbdev->dev,
-				"Context %p %s counted as runnable\n",
+				"Context %pK %s counted as runnable\n",
 				(void *)kctx,
 				kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF) ?
 					"is" : "isn't");
@@ -2827,7 +2867,7 @@ static void js_return_worker(struct work_struct *data)
 
 	if (context_idle) {
 		dev_dbg(kbdev->dev,
-			"Context %p %s counted as active\n",
+			"Context %pK %s counted as active\n",
 			(void *)kctx,
 			kbase_ctx_flag(kctx, KCTX_ACTIVE) ?
 				"is" : "isn't");
@@ -2866,13 +2906,13 @@ static void js_return_worker(struct work_struct *data)
 
 	kbase_backend_complete_wq_post_sched(kbdev, core_req);
 
-	dev_dbg(kbdev->dev, "Leaving %s for atom %p\n",
+	dev_dbg(kbdev->dev, "Leaving %s for atom %pK\n",
 		__func__, (void *)katom);
 }
 
 void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
 {
-	dev_dbg(kctx->kbdev->dev, "Unpulling atom %p in kctx %p\n",
+	dev_dbg(kctx->kbdev->dev, "Unpulling atom %pK in kctx %pK\n",
 		(void *)katom, (void *)kctx);
 
 	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
@@ -2927,7 +2967,7 @@ static bool js_complete_start_rp(struct kbase_context *kctx,
 		return false;
 
 	dev_dbg(kctx->kbdev->dev,
-		"Start atom %p is done in state %d of RP %d\n",
+		"Start atom %pK is done in state %d of RP %d\n",
 		(void *)start_katom, (int)rp->state,
 		start_katom->renderpass_id);
 
@@ -2939,7 +2979,7 @@ static bool js_complete_start_rp(struct kbase_context *kctx,
 		unsigned long flags;
 
 		dev_dbg(kctx->kbdev->dev,
-			"Start atom %p completed before soft-stop\n",
+			"Start atom %pK completed before soft-stop\n",
 			(void *)start_katom);
 
 		kbase_gpu_vm_lock(kctx);
@@ -2951,7 +2991,7 @@ static bool js_complete_start_rp(struct kbase_context *kctx,
 						 struct kbase_va_region, link);
 
 			WARN_ON(reg->flags & KBASE_REG_VA_FREED);
-			dev_dbg(kctx->kbdev->dev, "Deleting region %p from list\n",
+			dev_dbg(kctx->kbdev->dev, "Deleting region %pK from list\n",
 				(void *)reg);
 			list_del_init(&reg->link);
 			kbase_va_region_alloc_put(kctx, reg);
@@ -2961,7 +3001,7 @@ static bool js_complete_start_rp(struct kbase_context *kctx,
 		kbase_gpu_vm_unlock(kctx);
 	} else {
 		dev_dbg(kctx->kbdev->dev,
-			"Start atom %p did not exceed memory threshold\n",
+			"Start atom %pK did not exceed memory threshold\n",
 			(void *)start_katom);
 
 		WARN_ON(rp->state != KBASE_JD_RP_START &&
@@ -2978,7 +3018,7 @@ static bool js_complete_start_rp(struct kbase_context *kctx,
 			/* Allow the end of the renderpass to be pulled for
 			 * execution again to continue incremental rendering.
 			 */
-			dev_dbg(kbdev->dev, "Unblocking end atom %p!\n",
+			dev_dbg(kbdev->dev, "Unblocking end atom %pK!\n",
 				(void *)end_katom);
 			atomic_dec(&end_katom->blocked);
 
@@ -3022,7 +3062,7 @@ static void js_complete_end_rp(struct kbase_context *kctx,
 	if (WARN_ON(rp->end_katom != end_katom))
 		return;
 
-	dev_dbg(kbdev->dev, "End atom %p is done in state %d of RP %d\n",
+	dev_dbg(kbdev->dev, "End atom %pK is done in state %d of RP %d\n",
 		(void *)end_katom, (int)rp->state, end_katom->renderpass_id);
 
 	if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE) ||
@@ -3056,7 +3096,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
 	kbdev = kctx->kbdev;
 	atom_slot = katom->slot_nr;
 
-	dev_dbg(kbdev->dev, "%s for atom %p (s:%d)\n",
+	dev_dbg(kbdev->dev, "%s for atom %pK (s:%d)\n",
 		__func__, (void *)katom, atom_slot);
 
 	/* Update the incremental rendering state machine.
@@ -3075,7 +3115,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
 	if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) {
-		dev_dbg(kbdev->dev, "Atom %p is in runnable_tree\n",
+		dev_dbg(kbdev->dev, "Atom %pK is in runnable_tree\n",
 			(void *)katom);
 
 		context_idle = !atomic_dec_return(&kctx->atoms_pulled);
@@ -3091,11 +3131,12 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
 		}
 
 		/* If this slot has been blocked due to soft-stopped atoms, and
-		 * all atoms have now been processed, then unblock the slot */
+		 * all atoms have now been processed, then unblock the slot
+		 */
 		if (!kctx->atoms_pulled_slot_pri[atom_slot][prio]
 				&& kctx->blocked_js[atom_slot][prio]) {
 			dev_dbg(kbdev->dev,
-				"kctx %p is no longer blocked from submitting on slot %d at priority %d\n",
+				"kctx %pK is no longer blocked from submitting on slot %d at priority %d\n",
 				(void *)kctx, atom_slot, prio);
 
 			kctx->blocked_js[atom_slot][prio] = false;
@@ -3149,7 +3190,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
 	 * jd_done_worker().
 	 */
 	if (context_idle) {
-		dev_dbg(kbdev->dev, "kctx %p is no longer active\n",
+		dev_dbg(kbdev->dev, "kctx %pK is no longer active\n",
 			(void *)kctx);
 		kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
 	}
@@ -3200,7 +3241,7 @@ static bool js_end_rp_is_complete(struct kbase_jd_atom *const end_katom)
 		return true;
 
 	dev_dbg(kbdev->dev,
-		"JS complete end atom %p in state %d of RP %d\n",
+		"JS complete end atom %pK in state %d of RP %d\n",
 		(void *)end_katom, (int)rp->state,
 		end_katom->renderpass_id);
 
@@ -3229,7 +3270,7 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
 	struct kbase_jd_atom *x_dep = katom->x_post_dep;
 
 	kbdev = kctx->kbdev;
-	dev_dbg(kbdev->dev, "Atom %p complete in kctx %p (post-dep %p)\n",
+	dev_dbg(kbdev->dev, "Atom %pK complete in kctx %pK (post-dep %pK)\n",
 		(void *)katom, (void *)kctx, (void *)x_dep);
 
 	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
@@ -3245,7 +3286,7 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
 		katom->event_code = katom->will_fail_event_code;
 
 	katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED;
-	dev_dbg(kbdev->dev, "Atom %p status to HW completed\n", (void *)katom);
+	dev_dbg(kbdev->dev, "Atom %pK status to HW completed\n", (void *)katom);
 
 	if (katom->event_code != BASE_JD_EVENT_DONE) {
 		kbase_js_evict_deps(kctx, katom, katom->slot_nr,
@@ -3267,9 +3308,7 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
 		bool was_pullable = kbase_js_ctx_pullable(kctx, x_dep->slot_nr,
 				false);
 		x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
-		trace_sysgraph(SGR_DEP_RES, kctx->id,
-				kbase_jd_atom_id(katom->kctx, x_dep));
-		dev_dbg(kbdev->dev, "Cleared X_DEP flag on atom %p\n",
+		dev_dbg(kbdev->dev, "Cleared X_DEP flag on atom %pK\n",
 			(void *)x_dep);
 
 		kbase_js_move_to_tree(x_dep);
@@ -3280,13 +3319,13 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
 					x_dep->slot_nr);
 
 		if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) {
-			dev_dbg(kbdev->dev, "Atom %p is in runnable tree\n",
+			dev_dbg(kbdev->dev, "Atom %pK is in runnable tree\n",
 				(void *)x_dep);
 			return x_dep;
 		}
 	} else {
 		dev_dbg(kbdev->dev,
-			"No cross-slot dep to unblock for atom %p\n",
+			"No cross-slot dep to unblock for atom %pK\n",
 			(void *)katom);
 	}
 
@@ -3317,13 +3356,13 @@ bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *const katom)
 
 	if (!(katom->atom_flags &
 			KBASE_KATOM_FLAG_X_DEP_BLOCKED)) {
-		dev_dbg(kbdev->dev, "Atom %p is not blocked on a cross-slot dependency",
+		dev_dbg(kbdev->dev, "Atom %pK is not blocked on a cross-slot dependency",
 			(void *)katom);
 		return false;
 	}
 
 	if (!(katom->core_req & BASE_JD_REQ_END_RENDERPASS)) {
-		dev_dbg(kbdev->dev, "Atom %p is blocked on a cross-slot dependency",
+		dev_dbg(kbdev->dev, "Atom %pK is blocked on a cross-slot dependency",
 			(void *)katom);
 		return true;
 	}
@@ -3349,12 +3388,12 @@ bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *const katom)
 	 * if it only depends on the tiler job chain.
 	 */
 	if (katom->x_pre_dep != rp->start_katom) {
-		dev_dbg(kbdev->dev, "Dependency is on %p not start atom %p\n",
+		dev_dbg(kbdev->dev, "Dependency is on %pK not start atom %pK\n",
 			(void *)katom->x_pre_dep, (void *)rp->start_katom);
 		return true;
 	}
 
-	dev_dbg(kbdev->dev, "Ignoring cross-slot dep on atom %p\n",
+	dev_dbg(kbdev->dev, "Ignoring cross-slot dep on atom %pK\n",
 		(void *)katom->x_pre_dep);
 
 	return false;
@@ -3368,7 +3407,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
 	bool ctx_waiting[BASE_JM_MAX_NR_SLOTS];
 	int js;
 
-	dev_dbg(kbdev->dev, "%s kbdev %p mask 0x%x\n",
+	dev_dbg(kbdev->dev, "%s kbdev %pK mask 0x%x\n",
 		__func__, (void *)kbdev, (unsigned int)js_mask);
 
 	js_devdata = &kbdev->js_data;
@@ -3403,7 +3442,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
 				context_idle = true;
 
 				dev_dbg(kbdev->dev,
-					"kctx %p is not active (s:%d)\n",
+					"kctx %pK is not active (s:%d)\n",
 					(void *)kctx, js);
 
 				if (kbase_pm_context_active_handle_suspend(
@@ -3412,7 +3451,8 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
 					dev_dbg(kbdev->dev,
 						"Suspend pending (s:%d)\n", js);
 					/* Suspend pending - return context to
-					 * queue and stop scheduling */
+					 * queue and stop scheduling
+					 */
 					mutex_lock(
 					&kctx->jctx.sched_info.ctx.jsctx_mutex);
 					if (kbase_js_ctx_list_add_pullable_head(
@@ -3432,7 +3472,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
 					&kctx->jctx.sched_info.ctx.jsctx_mutex);
 
 				dev_dbg(kbdev->dev,
-					"kctx %p cannot be used at this time\n",
+					"kctx %pK cannot be used at this time\n",
 					kctx);
 
 				spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@@ -3474,7 +3514,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
 				bool pullable;
 
 				dev_dbg(kbdev->dev,
-					"No atoms pulled from kctx %p (s:%d)\n",
+					"No atoms pulled from kctx %pK (s:%d)\n",
 					(void *)kctx, js);
 
 				pullable = kbase_js_ctx_pullable(kctx, js,
@@ -3483,7 +3523,8 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
 				/* Failed to pull jobs - push to head of list.
 				 * Unless this context is already 'active', in
 				 * which case it's effectively already scheduled
-				 * so push it to the back of the list. */
+				 * so push it to the back of the list.
+				 */
 				if (pullable && kctx == last_active[js] &&
 						kbase_ctx_flag(kctx,
 						(KCTX_PULLED_SINCE_ACTIVE_JS0 <<
@@ -3508,7 +3549,8 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
 				 * slot, then we need to remove the active
 				 * marker to prevent it from submitting atoms in
 				 * the IRQ handler, which would prevent this
-				 * context from making progress. */
+				 * context from making progress.
+				 */
 				if (last_active[js] && kctx != last_active[js]
 						&& kbase_js_ctx_pullable(
 						last_active[js], js, true))
@@ -3534,7 +3576,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
 				break; /* Could not run atoms on this slot */
 			}
 
-			dev_dbg(kbdev->dev, "Push kctx %p to back of list\n",
+			dev_dbg(kbdev->dev, "Push kctx %pK to back of list\n",
 				(void *)kctx);
 			if (kbase_js_ctx_pullable(kctx, js, true))
 				timer_sync |=
@@ -3556,7 +3598,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
 	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
 		if (kbdev->hwaccess.active_kctx[js] == last_active[js] &&
 				ctx_waiting[js]) {
-			dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n",
+			dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%d)\n",
 					(void *)last_active[js], js);
 			kbdev->hwaccess.active_kctx[js] = NULL;
 		}
@@ -3580,13 +3622,14 @@ void kbase_js_zap_context(struct kbase_context *kctx)
 
 	/* First, atomically do the following:
 	 * - mark the context as dying
-	 * - try to evict it from the queue */
+	 * - try to evict it from the queue
+	 */
 	mutex_lock(&kctx->jctx.lock);
 	mutex_lock(&js_devdata->queue_mutex);
 	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
 	kbase_ctx_flag_set(kctx, KCTX_DYING);
 
-	dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %p", kctx);
+	dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %pK", kctx);
 
 	/*
 	 * At this point we know:
@@ -3650,13 +3693,14 @@ void kbase_js_zap_context(struct kbase_context *kctx)
 
 		KBASE_KTRACE_ADD_JM(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u, kbase_ctx_flag(kctx, KCTX_SCHEDULED));
 
-		dev_dbg(kbdev->dev, "Zap: Ctx %p scheduled=0", kctx);
+		dev_dbg(kbdev->dev, "Zap: Ctx %pK scheduled=0", kctx);
 
 		/* Only cancel jobs when we evicted from the
 		 * queue. No Power Manager active reference was held.
 		 *
-		 * Having is_dying set ensures that this kills, and
-		 * doesn't requeue */
+		 * Having is_dying set ensures that this kills, and doesn't
+		 * requeue
+		 */
 		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, false);
 
 		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
@@ -3667,9 +3711,10 @@ void kbase_js_zap_context(struct kbase_context *kctx)
 		bool was_retained;
 
 		/* Case c: didn't evict, but it is scheduled - it's in the Run
-		 * Pool */
+		 * Pool
+		 */
 		KBASE_KTRACE_ADD_JM(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u, kbase_ctx_flag(kctx, KCTX_SCHEDULED));
-		dev_dbg(kbdev->dev, "Zap: Ctx %p is in RunPool", kctx);
+		dev_dbg(kbdev->dev, "Zap: Ctx %pK is in RunPool", kctx);
 
 		/* Disable the ctx from submitting any more jobs */
 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@@ -3678,18 +3723,21 @@ void kbase_js_zap_context(struct kbase_context *kctx)
 
 		/* Retain and (later) release the context whilst it is is now
 		 * disallowed from submitting jobs - ensures that someone
-		 * somewhere will be removing the context later on */
+		 * somewhere will be removing the context later on
+		 */
 		was_retained = kbase_ctx_sched_inc_refcount_nolock(kctx);
 
 		/* Since it's scheduled and we have the jsctx_mutex, it must be
-		 * retained successfully */
+		 * retained successfully
+		 */
 		KBASE_DEBUG_ASSERT(was_retained);
 
-		dev_dbg(kbdev->dev, "Zap: Ctx %p Kill Any Running jobs", kctx);
+		dev_dbg(kbdev->dev, "Zap: Ctx %pK Kill Any Running jobs", kctx);
 
 		/* Cancel any remaining running jobs for this kctx - if any.
 		 * Submit is disallowed which takes effect immediately, so no
-		 * more new jobs will appear after we do this. */
+		 * more new jobs will appear after we do this.
+		 */
 		kbase_backend_jm_kill_running_jobs_from_kctx(kctx);
 
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
@@ -3697,7 +3745,7 @@ void kbase_js_zap_context(struct kbase_context *kctx)
 		mutex_unlock(&js_devdata->queue_mutex);
 		mutex_unlock(&kctx->jctx.lock);
 
-		dev_dbg(kbdev->dev, "Zap: Ctx %p Release (may or may not schedule out immediately)",
+		dev_dbg(kbdev->dev, "Zap: Ctx %pK Release (may or may not schedule out immediately)",
 									kctx);
 
 		kbasep_js_runpool_release_ctx(kbdev, kctx);
@@ -3711,7 +3759,8 @@ void kbase_js_zap_context(struct kbase_context *kctx)
 	 * to be destroyed, and the context to be de-scheduled (if it was on the
 	 * runpool).
 	 *
-	 * kbase_jd_zap_context() will do this. */
+	 * kbase_jd_zap_context() will do this.
+	 */
 }
 
 static inline int trace_get_refcnt(struct kbase_device *kbdev,
@@ -3758,3 +3807,18 @@ static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
 
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
+
+base_jd_prio kbase_js_priority_check(struct kbase_device *kbdev, base_jd_prio priority)
+{
+	struct priority_control_manager_device *pcm_device = kbdev->pcm_dev;
+	int req_priority, out_priority;
+	base_jd_prio out_jd_priority = priority;
+
+	if (pcm_device)	{
+		req_priority = kbasep_js_atom_prio_to_sched_prio(priority);
+		out_priority = pcm_device->ops.pcm_scheduler_priority_check(pcm_device, current, req_priority);
+		out_jd_priority = kbasep_js_sched_prio_to_atom_prio(out_priority);
+	}
+	return out_jd_priority;
+}
+
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js.h b/drivers/gpu/arm/bifrost/mali_kbase_js.h
index 541acd4afed7..a4dc2079f339 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_js.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_js.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,15 +17,10 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /**
- * @file mali_kbase_js.h
- * Job Scheduler APIs.
+ * DOC: Job Scheduler APIs.
  */
 
 #ifndef _KBASE_JS_H_
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js_ctx_attr.c b/drivers/gpu/arm/bifrost/mali_kbase_js_ctx_attr.c
index 141d04a385cb..40967cb18597 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_js_ctx_attr.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_js_ctx_attr.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2012-2016, 2018, 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016, 2018, 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,11 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
 #include <mali_kbase.h>
 #include <mali_kbase_config.h>
 
@@ -29,8 +27,11 @@
  */
 
 /**
- * @brief Check whether a ctx has a certain attribute, and if so, retain that
+ * Check whether a ctx has a certain attribute, and if so, retain that
  * attribute on the runpool.
+ * @kbdev: Device pointer
+ * @kctx:  KBase context
+ * @attribute: Atribute to check/retain
  *
  * Requires:
  * - jsctx mutex
@@ -75,8 +76,11 @@ static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, s
 }
 
 /**
- * @brief Check whether a ctx has a certain attribute, and if so, release that
+ * Check whether a ctx has a certain attribute, and if so, release that
  * attribute on the runpool.
+ * @kbdev: Device pointer
+ * @kctx:  KBase context
+ * @attribute: Atribute to release
  *
  * Requires:
  * - jsctx mutex
@@ -120,8 +124,11 @@ static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev,
 }
 
 /**
- * @brief Retain a certain attribute on a ctx, also retaining it on the runpool
+ * Retain a certain attribute on a ctx, also retaining it on the runpool
  * if the context is scheduled.
+ * @kbdev: Device pointer
+ * @kctx:  KBase context
+ * @attribute: Atribute to retain
  *
  * Requires:
  * - jsctx mutex
@@ -156,9 +163,12 @@ static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struc
 	return runpool_state_changed;
 }
 
-/*
- * @brief Release a certain attribute on a ctx, also releasing it from the runpool
+/**
+ * Release a certain attribute on a ctx, also releasing it from the runpool
  * if the context is scheduled.
+ * @kbdev: Device pointer
+ * @kctx:  KBase context
+ * @attribute: Atribute to release
  *
  * Requires:
  * - jsctx mutex
@@ -211,7 +221,8 @@ void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kb
 
 			/* We don't need to know about state changed, because retaining a
 			 * context occurs on scheduling it, and that itself will also try
-			 * to run new atoms */
+			 * to run new atoms
+			 */
 			CSTD_UNUSED(runpool_state_changed);
 		}
 	}
@@ -251,9 +262,9 @@ void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase
 		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES);
 	}
 
-	/* We don't need to know about state changed, because retaining an
-	 * atom occurs on adding it, and that itself will also try to run
-	 * new atoms */
+	/* We don't need to know about state changed, because retaining an atom
+	 * occurs on adding it, and that itself will also try to run new atoms
+	 */
 	CSTD_UNUSED(runpool_state_changed);
 }
 
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js_ctx_attr.h b/drivers/gpu/arm/bifrost/mali_kbase_js_ctx_attr.h
index 25fd39787c71..1477b1d55659 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_js_ctx_attr.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_js_ctx_attr.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2012-2015, 2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2015, 2018, 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,37 +17,19 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /**
- * @file mali_kbase_js_ctx_attr.h
- * Job Scheduler Context Attribute APIs
+ * DOC: Job Scheduler Context Attribute APIs
  */
 
 #ifndef _KBASE_JS_CTX_ATTR_H_
 #define _KBASE_JS_CTX_ATTR_H_
 
-/**
- * @addtogroup base_api
- * @{
- */
-
-/**
- * @addtogroup base_kbase_api
- * @{
- */
-
-/**
- * @addtogroup kbase_js
- * @{
- */
-
 /**
  * Retain all attributes of a context
+ * @kbdev: KBase device
+ * @kctx:  KBase context
  *
  * This occurs on scheduling in the context on the runpool (but after
  * is_scheduled is set)
@@ -60,6 +43,8 @@ void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kb
 
 /**
  * Release all attributes of a context
+ * @kbdev: KBase device
+ * @kctx:  KBase context
  *
  * This occurs on scheduling out the context from the runpool (but before
  * is_scheduled is cleared)
@@ -79,6 +64,9 @@ bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct k
 
 /**
  * Retain all attributes of an atom
+ * @kbdev: KBase device
+ * @kctx:  KBase context
+ * @katom: Atom
  *
  * This occurs on adding an atom to a context
  *
@@ -90,6 +78,9 @@ void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase
 
 /**
  * Release all attributes of an atom, given its retained state.
+ * @kbdev: KBase device
+ * @kctx:  KBase context
+ * @katom_retained_state: Retained state
  *
  * This occurs after (permanently) removing an atom from a context
  *
@@ -107,7 +98,7 @@ void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase
  */
 bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state);
 
-/**
+/*
  * Requires:
  * - runpool_irq spinlock
  */
@@ -122,7 +113,7 @@ static inline s8 kbasep_js_ctx_attr_count_on_runpool(struct kbase_device *kbdev,
 	return js_devdata->runpool_irq.ctx_attr_ref_count[attribute];
 }
 
-/**
+/*
  * Requires:
  * - runpool_irq spinlock
  */
@@ -132,7 +123,7 @@ static inline bool kbasep_js_ctx_attr_is_attr_on_runpool(struct kbase_device *kb
 	return (bool) kbasep_js_ctx_attr_count_on_runpool(kbdev, attribute);
 }
 
-/**
+/*
  * Requires:
  * - jsctx mutex
  */
@@ -148,8 +139,4 @@ static inline bool kbasep_js_ctx_attr_is_attr_on_ctx(struct kbase_context *kctx,
 	return (bool) (js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
 }
 
-	  /** @} *//* end group kbase_js */
-	  /** @} *//* end group base_kbase_api */
-	  /** @} *//* end group base_api */
-
 #endif				/* _KBASE_JS_DEFS_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c
index fd1ea8815b16..7b02b681fcf7 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
@@ -26,7 +25,7 @@
  */
 
 #include "mali_kbase_kinstr_jm.h"
-#include "mali_kbase_kinstr_jm_reader.h"
+#include <uapi/gpu/arm/bifrost/mali_kbase_kinstr_jm_reader.h>
 
 #include "mali_kbase.h"
 #include "mali_kbase_linux.h"
@@ -38,6 +37,7 @@
 #include <linux/circ_buf.h>
 #include <linux/fs.h>
 #include <linux/kref.h>
+#include <linux/ktime.h>
 #include <linux/log2.h>
 #include <linux/mutex.h>
 #include <linux/rculist_bl.h>
@@ -69,15 +69,9 @@ typedef unsigned int __poll_t;
 /* Allows us to perform ASM goto for the tracing
  * https://www.kernel.org/doc/Documentation/static-keys.txt
  */
-#if KERNEL_VERSION(4, 3, 0) <= LINUX_VERSION_CODE
 DEFINE_STATIC_KEY_FALSE(basep_kinstr_jm_reader_static_key);
-#else
-struct static_key basep_kinstr_jm_reader_static_key = STATIC_KEY_INIT_FALSE;
-#define static_branch_inc(key) static_key_slow_inc(key)
-#define static_branch_dec(key) static_key_slow_dec(key)
-#endif /* KERNEL_VERSION(4 ,3, 0) <= LINUX_VERSION_CODE */
 
-#define KBASE_KINSTR_JM_VERSION 1
+#define KBASE_KINSTR_JM_VERSION 2
 
 /**
  * struct kbase_kinstr_jm - The context for the kernel job manager atom tracing
@@ -105,6 +99,11 @@ struct kbase_kinstr_jm {
  *             KBASE_KINSTR_JM_ATOM_STATE_FLAG_* defines.
  * @reserved:  Reserved for future use.
  * @data:      Extra data for the state change. Active member depends on state.
+ * @data.start:      Extra data for the state change. Active member depends on
+ *                   state.
+ * @data.start.slot: Extra data for the state change. Active member depends on
+ *                   state.
+ * @data.padding:    Padding
  *
  * We can add new fields to the structure and old user code will gracefully
  * ignore the new fields.
@@ -831,7 +830,7 @@ void kbasep_kinstr_jm_atom_state(
 
 	switch (state) {
 	case KBASE_KINSTR_JM_READER_ATOM_STATE_START:
-		change.data.start.slot = katom->jobslot;
+		change.data.start.slot = katom->slot_nr;
 		break;
 	default:
 		break;
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.h b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.h
index 555edfeef77c..e2588d704126 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2019,2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
@@ -64,7 +63,7 @@
 #ifndef _KBASE_KINSTR_JM_H_
 #define _KBASE_KINSTR_JM_H_
 
-#include "mali_kbase_kinstr_jm_reader.h"
+#include <uapi/gpu/arm/bifrost/mali_kbase_kinstr_jm_reader.h>
 
 #ifdef __KERNEL__
 #include <linux/version.h>
@@ -127,14 +126,7 @@ void kbasep_kinstr_jm_atom_state(
  * shouldn't be changed externally, but if you do, make sure you use
  * a static_key_inc()/static_key_dec() pair.
  */
-#if KERNEL_VERSION(4, 3, 0) <= LINUX_VERSION_CODE
 extern struct static_key_false basep_kinstr_jm_reader_static_key;
-#else
-/* Pre-4.3 kernels have a different API for static keys, but work
- * mostly the same with less type safety. */
-extern struct static_key basep_kinstr_jm_reader_static_key;
-#define static_branch_unlikely(key) static_key_false(key)
-#endif /* KERNEL_VERSION(4, 3, 0) <= LINUX_VERSION_CODE */
 
 /**
  * kbase_kinstr_jm_atom_state() - Signifies that an atom has changed state
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_linux.h b/drivers/gpu/arm/bifrost/mali_kbase_linux.h
index 003ac9e68a76..ff29337d70db 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_linux.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_linux.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2014, 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,15 +17,10 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /**
- * @file mali_kbase_linux.h
- * Base kernel APIs, Linux implementation.
+ * DOC: Base kernel APIs, Linux implementation.
  */
 
 #ifndef _KBASE_LINUX_H_
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem.c b/drivers/gpu/arm/bifrost/mali_kbase_mem.c
index 82a799c2d673..326917c9df53 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /**
@@ -99,27 +98,34 @@ static size_t kbase_get_num_cpu_va_bits(struct kbase_context *kctx)
 }
 
 /* This function finds out which RB tree the given pfn from the GPU VA belongs
- * to based on the memory zone the pfn refers to */
+ * to based on the memory zone the pfn refers to
+ */
 static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx,
 								    u64 gpu_pfn)
 {
 	struct rb_root *rbtree = NULL;
+	struct kbase_reg_zone *exec_va_zone =
+		kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA);
 
 	/* The gpu_pfn can only be greater than the starting pfn of the EXEC_VA
 	 * zone if this has been initialized.
 	 */
-	if (gpu_pfn >= kctx->exec_va_start)
+	if (gpu_pfn >= exec_va_zone->base_pfn)
 		rbtree = &kctx->reg_rbtree_exec;
 	else {
 		u64 same_va_end;
 
 #ifdef CONFIG_64BIT
-		if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+		if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
 #endif /* CONFIG_64BIT */
 			same_va_end = KBASE_REG_ZONE_CUSTOM_VA_BASE;
 #ifdef CONFIG_64BIT
-		else
-			same_va_end = kctx->same_va_end;
+		} else {
+			struct kbase_reg_zone *same_va_zone =
+				kbase_ctx_reg_zone_get(kctx,
+						       KBASE_REG_ZONE_SAME_VA);
+			same_va_end = kbase_reg_zone_end_pfn(same_va_zone);
+		}
 #endif /* CONFIG_64BIT */
 
 		if (gpu_pfn >= same_va_end)
@@ -229,7 +235,7 @@ struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(
 	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
 	struct rb_root *rbtree = NULL;
 
-	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
 
 	lockdep_assert_held(&kctx->reg_lock);
 
@@ -289,7 +295,8 @@ static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(
 	struct rb_root *rbtree = NULL;
 
 	/* Note that this search is a linear search, as we do not have a target
-	   address in mind, so does not benefit from the rbtree search */
+	 * address in mind, so does not benefit from the rbtree search
+	 */
 	rbtree = reg_reqs->rbtree;
 
 	for (rbnode = rb_first(rbtree); rbnode; rbnode = rb_next(rbnode)) {
@@ -304,7 +311,8 @@ static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(
 			 *   (start_pfn + align_mask) & ~(align_mask)
 			 *
 			 * Otherwise, it aligns to n*align + offset, for the
-			 * lowest value n that makes this still >start_pfn */
+			 * lowest value n that makes this still >start_pfn
+			 */
 			start_pfn += align_mask;
 			start_pfn -= (start_pfn - align_offset) & (align_mask);
 
@@ -342,7 +350,8 @@ static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(
 }
 
 /**
- * @brief Remove a region object from the global list.
+ * Remove a region object from the global list.
+ * @reg: Region object to remove
  *
  * The region reg is removed, possibly by merging with other free and
  * compatible adjacent regions.  It must be called with the context
@@ -368,8 +377,9 @@ int kbase_remove_va_region(struct kbase_va_region *reg)
 	if (rbprev) {
 		prev = rb_entry(rbprev, struct kbase_va_region, rblink);
 		if (prev->flags & KBASE_REG_FREE) {
-			/* We're compatible with the previous VMA,
-			 * merge with it */
+			/* We're compatible with the previous VMA, merge with
+			 * it
+			 */
 			WARN_ON((prev->flags & KBASE_REG_ZONE_MASK) !=
 					    (reg->flags & KBASE_REG_ZONE_MASK));
 			prev->nr_pages += reg->nr_pages;
@@ -512,8 +522,8 @@ int kbase_add_va_region(struct kbase_context *kctx,
 	int gpu_pc_bits =
 		kbdev->gpu_props.props.core_props.log2_program_counter_size;
 
-	KBASE_DEBUG_ASSERT(NULL != kctx);
-	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(reg != NULL);
 
 	lockdep_assert_held(&kctx->reg_lock);
 
@@ -620,8 +630,8 @@ int kbase_add_va_region_rbtree(struct kbase_device *kbdev,
 			WARN(align > 1, "%s with align %lx might not be honored for KBASE_REG_TILER_ALIGN_TOP memory",
 					__func__,
 					(unsigned long)align);
-			align_mask  = reg->extent - 1;
-			align_offset = reg->extent - reg->initial_commit;
+			align_mask = reg->extension - 1;
+			align_offset = reg->extension - reg->initial_commit;
 		}
 #endif /* !MALI_USE_CSF */
 
@@ -646,7 +656,7 @@ exit:
 	return err;
 }
 
-/**
+/*
  * @brief Initialize the internal region tracker data structure.
  */
 static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
@@ -726,21 +736,24 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
 	u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE;
 	u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT;
 	u64 same_va_pages;
+	u64 same_va_base = 1u;
 	int err;
 
 	/* Take the lock as kbase_free_alloced_region requires it */
 	kbase_gpu_vm_lock(kctx);
 
-	same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
+	same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - same_va_base;
 	/* all have SAME_VA */
-	same_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 1,
-			same_va_pages,
-			KBASE_REG_ZONE_SAME_VA);
+	same_va_reg =
+		kbase_alloc_free_region(&kctx->reg_rbtree_same, same_va_base,
+					same_va_pages, KBASE_REG_ZONE_SAME_VA);
 
 	if (!same_va_reg) {
 		err = -ENOMEM;
 		goto fail_unlock;
 	}
+	kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_SAME_VA, same_va_base,
+				same_va_pages);
 
 #ifdef CONFIG_64BIT
 	/* 32-bit clients have custom VA zones */
@@ -766,17 +779,23 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
 			err = -ENOMEM;
 			goto fail_free_same_va;
 		}
+		kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA,
+					KBASE_REG_ZONE_CUSTOM_VA_BASE,
+					custom_va_size);
 #ifdef CONFIG_64BIT
 	} else {
 		custom_va_size = 0;
 	}
 #endif
+	/* EXEC_VA zone's codepaths are slightly easier when its base_pfn is
+	 * initially U64_MAX
+	 */
+	kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_VA, U64_MAX, 0u);
+	/* Other zones are 0: kbase_create_context() uses vzalloc */
 
 	kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg);
 
-	kctx->same_va_end = same_va_pages + 1;
-	kctx->gpu_va_end = kctx->same_va_end + custom_va_size;
-	kctx->exec_va_start = U64_MAX;
+	kctx->gpu_va_end = same_va_base + same_va_pages + custom_va_size;
 	kctx->jit_va = false;
 
 #if MALI_USE_CSF
@@ -793,44 +812,147 @@ fail_unlock:
 	return err;
 }
 
+static bool kbase_has_exec_va_zone_locked(struct kbase_context *kctx)
+{
+	struct kbase_reg_zone *exec_va_zone;
+
+	lockdep_assert_held(&kctx->reg_lock);
+	exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA);
+
+	return (exec_va_zone->base_pfn != U64_MAX);
+}
+
+bool kbase_has_exec_va_zone(struct kbase_context *kctx)
+{
+	bool has_exec_va_zone;
+
+	kbase_gpu_vm_lock(kctx);
+	has_exec_va_zone = kbase_has_exec_va_zone_locked(kctx);
+	kbase_gpu_vm_unlock(kctx);
+
+	return has_exec_va_zone;
+}
+
+/**
+ * Determine if any allocations have been made on a context's region tracker
+ * @kctx: KBase context
+ *
+ * Check the context to determine if any allocations have been made yet from
+ * any of its zones. This check should be done before resizing a zone, e.g. to
+ * make space to add a second zone.
+ *
+ * Whilst a zone without allocations can be resized whilst other zones have
+ * allocations, we still check all of @kctx 's zones anyway: this is a stronger
+ * guarantee and should be adhered to when creating new zones anyway.
+ *
+ * Allocations from kbdev zones are not counted.
+ *
+ * Return: true if any allocs exist on any zone, false otherwise
+ */
+static bool kbase_region_tracker_has_allocs(struct kbase_context *kctx)
+{
+	unsigned int zone_idx;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	for (zone_idx = 0; zone_idx < KBASE_REG_ZONE_MAX; ++zone_idx) {
+		struct kbase_reg_zone *zone;
+		struct kbase_va_region *reg;
+		u64 zone_base_addr;
+		unsigned long zone_bits = KBASE_REG_ZONE(zone_idx);
+		unsigned long reg_zone;
+
+		zone = kbase_ctx_reg_zone_get(kctx, zone_bits);
+		zone_base_addr = zone->base_pfn << PAGE_SHIFT;
+
+		reg = kbase_region_tracker_find_region_base_address(
+			kctx, zone_base_addr);
+
+		if (!zone->va_size_pages) {
+			WARN(reg,
+			     "Should not have found a region that starts at 0x%.16llx for zone 0x%lx",
+			     (unsigned long long)zone_base_addr, zone_bits);
+			continue;
+		}
+
+		if (WARN(!reg,
+			 "There should always be a region that starts at 0x%.16llx for zone 0x%lx, couldn't find it",
+			 (unsigned long long)zone_base_addr, zone_bits))
+			return true; /* Safest return value */
+
+		reg_zone = reg->flags & KBASE_REG_ZONE_MASK;
+		if (WARN(reg_zone != zone_bits,
+			 "The region that starts at 0x%.16llx should be in zone 0x%lx but was found in the wrong zone 0x%lx",
+			 (unsigned long long)zone_base_addr, zone_bits,
+			 reg_zone))
+			return true; /* Safest return value */
+
+		/* Unless the region is completely free, of the same size as
+		 * the original zone, then it has allocs
+		 */
+		if ((!(reg->flags & KBASE_REG_FREE)) ||
+		    (reg->nr_pages != zone->va_size_pages))
+			return true;
+	}
+
+	/* All zones are the same size as originally made, so there are no
+	 * allocs
+	 */
+	return false;
+}
+
 #ifdef CONFIG_64BIT
 static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
 		u64 jit_va_pages)
 {
-	struct kbase_va_region *same_va;
+	struct kbase_va_region *same_va_reg;
+	struct kbase_reg_zone *same_va_zone;
+	u64 same_va_zone_base_addr;
+	const unsigned long same_va_zone_bits = KBASE_REG_ZONE_SAME_VA;
 	struct kbase_va_region *custom_va_reg;
+	u64 jit_va_start;
 
 	lockdep_assert_held(&kctx->reg_lock);
 
-	/* First verify that a JIT_VA zone has not been created already. */
-	if (kctx->jit_va)
-		return -EINVAL;
-
 	/*
-	 * Modify the same VA free region after creation. Be careful to ensure
-	 * that allocations haven't been made as they could cause an overlap
-	 * to happen with existing same VA allocations and the custom VA zone.
+	 * Modify the same VA free region after creation. The caller has
+	 * ensured that allocations haven't been made, as any allocations could
+	 * cause an overlap to happen with existing same VA allocations and the
+	 * custom VA zone.
 	 */
-	same_va = kbase_region_tracker_find_region_base_address(kctx,
-			PAGE_SIZE);
-	if (!same_va)
+	same_va_zone = kbase_ctx_reg_zone_get(kctx, same_va_zone_bits);
+	same_va_zone_base_addr = same_va_zone->base_pfn << PAGE_SHIFT;
+
+	same_va_reg = kbase_region_tracker_find_region_base_address(
+		kctx, same_va_zone_base_addr);
+	if (WARN(!same_va_reg,
+		 "Already found a free region at the start of every zone, but now cannot find any region for zone base 0x%.16llx zone 0x%lx",
+		 (unsigned long long)same_va_zone_base_addr, same_va_zone_bits))
 		return -ENOMEM;
 
-	if (same_va->nr_pages < jit_va_pages || kctx->same_va_end < jit_va_pages)
+	/* kbase_region_tracker_has_allocs() in the caller has already ensured
+	 * that all of the zones have no allocs, so no need to check that again
+	 * on same_va_reg
+	 */
+	WARN_ON((!(same_va_reg->flags & KBASE_REG_FREE)) ||
+		same_va_reg->nr_pages != same_va_zone->va_size_pages);
+
+	if (same_va_reg->nr_pages < jit_va_pages ||
+	    same_va_zone->va_size_pages < jit_va_pages)
 		return -ENOMEM;
 
 	/* It's safe to adjust the same VA zone now */
-	same_va->nr_pages -= jit_va_pages;
-	kctx->same_va_end -= jit_va_pages;
+	same_va_reg->nr_pages -= jit_va_pages;
+	same_va_zone->va_size_pages -= jit_va_pages;
+	jit_va_start = kbase_reg_zone_end_pfn(same_va_zone);
 
 	/*
 	 * Create a custom VA zone at the end of the VA for allocations which
 	 * JIT can use so it doesn't have to allocate VA from the kernel.
 	 */
-	custom_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom,
-				kctx->same_va_end,
-				jit_va_pages,
-				KBASE_REG_ZONE_CUSTOM_VA);
+	custom_va_reg =
+		kbase_alloc_free_region(&kctx->reg_rbtree_custom, jit_va_start,
+					jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA);
 
 	/*
 	 * The context will be destroyed if we fail here so no point
@@ -838,6 +960,11 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
 	 */
 	if (!custom_va_reg)
 		return -ENOMEM;
+	/* Since this is 64-bit, the custom zone will not have been
+	 * initialized, so initialize it now
+	 */
+	kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA, jit_va_start,
+				jit_va_pages);
 
 	kbase_region_tracker_insert(custom_va_reg);
 	return 0;
@@ -866,6 +993,23 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
 
 	kbase_gpu_vm_lock(kctx);
 
+	/* Verify that a JIT_VA zone has not been created already. */
+	if (kctx->jit_va) {
+		err = -EINVAL;
+		goto exit_unlock;
+	}
+
+	/* If in 64-bit, we always lookup the SAME_VA zone. To ensure it has no
+	 * allocs, we can ensure there are no allocs anywhere.
+	 *
+	 * This check is also useful in 32-bit, just to make sure init of the
+	 * zone is always done before any allocs.
+	 */
+	if (kbase_region_tracker_has_allocs(kctx)) {
+		err = -ENOMEM;
+		goto exit_unlock;
+	}
+
 #ifdef CONFIG_64BIT
 	if (!kbase_ctx_flag(kctx, KCTX_COMPAT))
 		err = kbase_region_tracker_init_jit_64(kctx, jit_va_pages);
@@ -887,6 +1031,7 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
 #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 	}
 
+exit_unlock:
 	kbase_gpu_vm_unlock(kctx);
 
 	return err;
@@ -894,24 +1039,33 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
 
 int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages)
 {
-	struct kbase_va_region *shrinking_va_reg;
 	struct kbase_va_region *exec_va_reg;
-	u64 exec_va_start, exec_va_base_addr;
+	struct kbase_reg_zone *exec_va_zone;
+	struct kbase_reg_zone *target_zone;
+	struct kbase_va_region *target_reg;
+	u64 target_zone_base_addr;
+	unsigned long target_zone_bits;
+	u64 exec_va_start;
 	int err;
 
-	/* The EXEC_VA zone shall be created by making space at the end of the
-	 * address space. Firstly, verify that the number of EXEC_VA pages
-	 * requested by the client is reasonable and then make sure that it is
-	 * not greater than the address space itself before calculating the base
-	 * address of the new zone.
+	/* The EXEC_VA zone shall be created by making space either:
+	 * - for 64-bit clients, at the end of the process's address space
+	 * - for 32-bit clients, in the CUSTOM zone
+	 *
+	 * Firstly, verify that the number of EXEC_VA pages requested by the
+	 * client is reasonable and then make sure that it is not greater than
+	 * the address space itself before calculating the base address of the
+	 * new zone.
 	 */
 	if (exec_va_pages == 0 || exec_va_pages > KBASE_REG_ZONE_EXEC_VA_MAX_PAGES)
 		return -EINVAL;
 
 	kbase_gpu_vm_lock(kctx);
 
-	/* First verify that a JIT_VA zone has not been created already. */
-	if (kctx->jit_va) {
+	/* Verify that we've not already created a EXEC_VA zone, and that the
+	 * EXEC_VA zone must come before JIT's CUSTOM_VA.
+	 */
+	if (kbase_has_exec_va_zone_locked(kctx) || kctx->jit_va) {
 		err = -EPERM;
 		goto exit_unlock;
 	}
@@ -921,27 +1075,49 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages
 		goto exit_unlock;
 	}
 
-	exec_va_start = kctx->gpu_va_end - exec_va_pages;
-	exec_va_base_addr = exec_va_start << PAGE_SHIFT;
-
-	shrinking_va_reg = kbase_region_tracker_find_region_enclosing_address(kctx,
-			exec_va_base_addr);
-	if (!shrinking_va_reg) {
+	/* Verify no allocations have already been made */
+	if (kbase_region_tracker_has_allocs(kctx)) {
 		err = -ENOMEM;
 		goto exit_unlock;
 	}
 
-	/* Make sure that the EXEC_VA region is still uninitialized */
-	if ((shrinking_va_reg->flags & KBASE_REG_ZONE_MASK) ==
-			KBASE_REG_ZONE_EXEC_VA) {
-		err = -EPERM;
-		goto exit_unlock;
+#ifdef CONFIG_64BIT
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+#endif
+		/* 32-bit client: take from CUSTOM_VA zone */
+		target_zone_bits = KBASE_REG_ZONE_CUSTOM_VA;
+#ifdef CONFIG_64BIT
+	} else {
+		/* 64-bit client: take from SAME_VA zone */
+		target_zone_bits = KBASE_REG_ZONE_SAME_VA;
 	}
+#endif
+	target_zone = kbase_ctx_reg_zone_get(kctx, target_zone_bits);
+	target_zone_base_addr = target_zone->base_pfn << PAGE_SHIFT;
 
-	if (shrinking_va_reg->nr_pages <= exec_va_pages) {
+	target_reg = kbase_region_tracker_find_region_base_address(
+		kctx, target_zone_base_addr);
+	if (WARN(!target_reg,
+		 "Already found a free region at the start of every zone, but now cannot find any region for zone base 0x%.16llx zone 0x%lx",
+		 (unsigned long long)target_zone_base_addr, target_zone_bits)) {
 		err = -ENOMEM;
 		goto exit_unlock;
 	}
+	/* kbase_region_tracker_has_allocs() above has already ensured that all
+	 * of the zones have no allocs, so no need to check that again on
+	 * target_reg
+	 */
+	WARN_ON((!(target_reg->flags & KBASE_REG_FREE)) ||
+		target_reg->nr_pages != target_zone->va_size_pages);
+
+	if (target_reg->nr_pages <= exec_va_pages ||
+	    target_zone->va_size_pages <= exec_va_pages) {
+		err = -ENOMEM;
+		goto exit_unlock;
+	}
+
+	/* Taken from the end of the target zone */
+	exec_va_start = kbase_reg_zone_end_pfn(target_zone) - exec_va_pages;
 
 	exec_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_exec,
 			exec_va_start,
@@ -951,13 +1127,17 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages
 		err = -ENOMEM;
 		goto exit_unlock;
 	}
+	/* Update EXEC_VA zone
+	 *
+	 * not using kbase_ctx_reg_zone_init() - it was already initialized
+	 */
+	exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA);
+	exec_va_zone->base_pfn = exec_va_start;
+	exec_va_zone->va_size_pages = exec_va_pages;
 
-	shrinking_va_reg->nr_pages -= exec_va_pages;
-#ifdef CONFIG_64BIT
-	if (!kbase_ctx_flag(kctx, KCTX_COMPAT))
-		kctx->same_va_end -= exec_va_pages;
-#endif
-	kctx->exec_va_start = exec_va_start;
+	/* Update target zone and corresponding region */
+	target_reg->nr_pages -= exec_va_pages;
+	target_zone->va_size_pages -= exec_va_pages;
 
 	kbase_region_tracker_insert(exec_va_reg);
 	err = 0;
@@ -1108,7 +1288,11 @@ void kbase_mem_term(struct kbase_device *kbdev)
 KBASE_EXPORT_TEST_API(kbase_mem_term);
 
 /**
- * @brief Allocate a free region object.
+ * Allocate a free region object.
+ * @rbtree:    Backlink to the red-black tree of memory regions.
+ * @start_pfn: The Page Frame Number in GPU virtual address space.
+ * @nr_pages:  The size of the region in pages.
+ * @zone:      KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA
  *
  * The allocated object is not part of any list yet, and is flagged as
  * KBASE_REG_FREE. No mapping is allocated yet.
@@ -1181,7 +1365,8 @@ static struct kbase_context *kbase_reg_flags_to_kctx(
 }
 
 /**
- * @brief Free a region object.
+ * Free a region object.
+ * @reg: Region
  *
  * The described region must be freed of any mapping.
  *
@@ -1208,7 +1393,7 @@ void kbase_free_alloced_region(struct kbase_va_region *reg)
 		if (WARN_ON(kbase_is_region_invalid(reg)))
 			return;
 
-		dev_dbg(kctx->kbdev->dev, "Freeing memory region %p\n",
+		dev_dbg(kctx->kbdev->dev, "Freeing memory region %pK\n",
 			(void *)reg);
 #if MALI_USE_CSF
 		if (reg->flags & KBASE_REG_CSF_EVENT)
@@ -1293,8 +1478,8 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64
 	else
 		attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_WRITE_ALLOC);
 
-	KBASE_DEBUG_ASSERT(NULL != kctx);
-	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(reg != NULL);
 
 	err = kbase_add_va_region(kctx, reg, addr, nr_pages, align);
 	if (err)
@@ -1320,7 +1505,9 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64
 				if (err)
 					goto bad_insert;
 
-				kbase_mem_phy_alloc_gpu_mapped(alloc->imported.alias.aliased[i].alloc);
+				/* Note: mapping count is tracked at alias
+				 * creation time
+				 */
 			} else {
 				err = kbase_mmu_insert_single_page(kctx,
 					reg->start_pfn + i * stride,
@@ -1379,13 +1566,6 @@ bad_insert:
 				 reg->start_pfn, reg->nr_pages,
 				 kctx->as_nr);
 
-	if (alloc->type == KBASE_MEM_TYPE_ALIAS) {
-		KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased);
-		while (i--)
-			if (alloc->imported.alias.aliased[i].alloc)
-				kbase_mem_phy_alloc_gpu_unmapped(alloc->imported.alias.aliased[i].alloc);
-	}
-
 	kbase_remove_va_region(reg);
 
 	return err;
@@ -1399,7 +1579,6 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx,
 int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
 {
 	int err = 0;
-	size_t i;
 
 	if (reg->start_pfn == 0)
 		return 0;
@@ -1424,10 +1603,9 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
 	/* Update tracking, and other cleanup, depending on memory type. */
 	switch (reg->gpu_alloc->type) {
 	case KBASE_MEM_TYPE_ALIAS:
-		KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased);
-		for (i = 0; i < reg->gpu_alloc->imported.alias.nents; i++)
-			if (reg->gpu_alloc->imported.alias.aliased[i].alloc)
-				kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc);
+		/* We mark the source allocs as unmapped from the GPU when
+		 * putting reg's allocs
+		 */
 		break;
 	case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
 			struct kbase_alloc_import_user_buf *user_buf =
@@ -1736,9 +1914,9 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re
 {
 	int err;
 
-	KBASE_DEBUG_ASSERT(NULL != kctx);
-	KBASE_DEBUG_ASSERT(NULL != reg);
-	dev_dbg(kctx->kbdev->dev, "%s %p in kctx %p\n",
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(reg != NULL);
+	dev_dbg(kctx->kbdev->dev, "%s %pK in kctx %pK\n",
 		__func__, (void *)reg, (void *)kctx);
 	lockdep_assert_held(&kctx->reg_lock);
 
@@ -1784,7 +1962,9 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re
 KBASE_EXPORT_TEST_API(kbase_mem_free_region);
 
 /**
- * @brief Free the region from the GPU and unregister it.
+ * Free the region from the GPU and unregister it.
+ * @kctx:  KBase context
+ * @gpu_addr: GPU address to free
  *
  * This function implements the free operation on a memory segment.
  * It will loudly fail if called with outstanding mappings.
@@ -1795,7 +1975,7 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
 	struct kbase_va_region *reg;
 
 	KBASE_DEBUG_ASSERT(kctx != NULL);
-	dev_dbg(kctx->kbdev->dev, "%s 0x%llx in kctx %p\n",
+	dev_dbg(kctx->kbdev->dev, "%s 0x%llx in kctx %pK\n",
 		__func__, gpu_addr, (void *)kctx);
 
 	if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) {
@@ -1803,7 +1983,7 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
 		return -EINVAL;
 	}
 
-	if (0 == gpu_addr) {
+	if (gpu_addr == 0) {
 		dev_warn(kctx->kbdev->dev, "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using kbase_mem_free\n");
 		return -EINVAL;
 	}
@@ -1856,7 +2036,7 @@ KBASE_EXPORT_TEST_API(kbase_mem_free);
 int kbase_update_region_flags(struct kbase_context *kctx,
 		struct kbase_va_region *reg, unsigned long flags)
 {
-	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT(reg != NULL);
 	KBASE_DEBUG_ASSERT((flags & ~((1ul << BASE_MEM_FLAGS_NR_BITS) - 1)) == 0);
 
 	reg->flags |= kbase_cache_enabled(flags, reg->nr_pages);
@@ -1988,7 +2168,8 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
 		&kctx->kbdev->memdev.used_pages);
 
 	/* Increase mm counters before we allocate pages so that this
-	 * allocation is visible to the OOM killer */
+	 * allocation is visible to the OOM killer
+	 */
 	kbase_process_page_usage_inc(kctx, nr_pages_requested);
 
 	tp = alloc->pages + alloc->nents;
@@ -2392,7 +2573,7 @@ int kbase_free_phy_pages_helper(
 	}
 
 	/* early out if nothing to do */
-	if (0 == nr_pages_to_free)
+	if (nr_pages_to_free == 0)
 		return 0;
 
 	start_free = alloc->pages + alloc->nents - nr_pages_to_free;
@@ -2591,6 +2772,7 @@ void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc,
 		kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, freed);
 	}
 }
+KBASE_EXPORT_TEST_API(kbase_free_phy_pages_helper_locked);
 
 #if MALI_USE_CSF
 /**
@@ -2640,8 +2822,10 @@ void kbase_mem_kref_free(struct kref *kref)
 		aliased = alloc->imported.alias.aliased;
 		if (aliased) {
 			for (i = 0; i < alloc->imported.alias.nents; i++)
-				if (aliased[i].alloc)
+				if (aliased[i].alloc) {
+					kbase_mem_phy_alloc_gpu_unmapped(aliased[i].alloc);
 					kbase_mem_phy_alloc_put(aliased[i].alloc);
+				}
 			vfree(aliased);
 		}
 		break;
@@ -2692,7 +2876,7 @@ KBASE_EXPORT_TEST_API(kbase_mem_kref_free);
 
 int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size)
 {
-	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT(reg != NULL);
 	KBASE_DEBUG_ASSERT(vsize > 0);
 
 	/* validate user provided arguments */
@@ -2705,7 +2889,7 @@ int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size
 	if ((size_t) vsize > ((size_t) -1 / sizeof(*reg->cpu_alloc->pages)))
 		goto out_term;
 
-	KBASE_DEBUG_ASSERT(0 != vsize);
+	KBASE_DEBUG_ASSERT(vsize != 0);
 
 	if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, size) != 0)
 		goto out_term;
@@ -2755,7 +2939,7 @@ bool kbase_check_alloc_flags(unsigned long flags)
 
 #if !MALI_USE_CSF
 	/* GPU executable memory also cannot have the top of its initial
-	 * commit aligned to 'extent'
+	 * commit aligned to 'extension'
 	 */
 	if ((flags & BASE_MEM_PROT_GPU_EX) && (flags &
 			BASE_MEM_TILER_ALIGN_TOP))
@@ -2777,7 +2961,8 @@ bool kbase_check_alloc_flags(unsigned long flags)
 #endif /* !MALI_USE_CSF */
 
 	/* GPU should have at least read or write access otherwise there is no
-	   reason for allocating. */
+	 * reason for allocating.
+	 */
 	if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
 		return false;
 
@@ -2785,14 +2970,15 @@ bool kbase_check_alloc_flags(unsigned long flags)
 	if ((flags & BASE_MEM_IMPORT_SHARED) == BASE_MEM_IMPORT_SHARED)
 		return false;
 
-	/* BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP is only valid for imported
-	 * memory */
+	/* BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP is only valid for imported memory
+	 */
 	if ((flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) ==
 			BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP)
 		return false;
 
 	/* Should not combine BASE_MEM_COHERENT_LOCAL with
-	 * BASE_MEM_COHERENT_SYSTEM */
+	 * BASE_MEM_COHERENT_SYSTEM
+	 */
 	if ((flags & (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM)) ==
 			(BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM))
 		return false;
@@ -2825,7 +3011,8 @@ bool kbase_check_import_flags(unsigned long flags)
 #endif /* !MALI_USE_CSF */
 
 	/* GPU should have at least read or write access otherwise there is no
-	   reason for importing. */
+	 * reason for importing.
+	 */
 	if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
 		return false;
 
@@ -2837,19 +3024,19 @@ bool kbase_check_import_flags(unsigned long flags)
 }
 
 int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags,
-		u64 va_pages, u64 commit_pages, u64 large_extent)
+			    u64 va_pages, u64 commit_pages, u64 large_extension)
 {
 	struct device *dev = kctx->kbdev->dev;
 	int gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
 	u64 gpu_pc_pages_max = 1ULL << gpu_pc_bits >> PAGE_SHIFT;
 	struct kbase_va_region test_reg;
 
-	/* kbase_va_region's extent member can be of variable size, so check against that type */
-	test_reg.extent = large_extent;
+	/* kbase_va_region's extension member can be of variable size, so check against that type */
+	test_reg.extension = large_extension;
 
 #define KBASE_MSG_PRE "GPU allocation attempted with "
 
-	if (0 == va_pages) {
+	if (va_pages == 0) {
 		dev_warn(dev, KBASE_MSG_PRE "0 va_pages!");
 		return -EINVAL;
 	}
@@ -2861,7 +3048,8 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags,
 	}
 
 	/* Note: commit_pages is checked against va_pages during
-	 * kbase_alloc_phy_pages() */
+	 * kbase_alloc_phy_pages()
+	 */
 
 	/* Limit GPU executable allocs to GPU PC size */
 	if ((flags & BASE_MEM_PROT_GPU_EX) && (va_pages > gpu_pc_pages_max)) {
@@ -2872,25 +3060,30 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags,
 		return -EINVAL;
 	}
 
-	if ((flags & BASE_MEM_GROW_ON_GPF) && (test_reg.extent == 0)) {
-		dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GROW_ON_GPF but extent == 0\n");
+	if ((flags & BASE_MEM_GROW_ON_GPF) && (test_reg.extension == 0)) {
+		dev_warn(dev, KBASE_MSG_PRE
+			 "BASE_MEM_GROW_ON_GPF but extension == 0\n");
 		return -EINVAL;
 	}
 
 #if !MALI_USE_CSF
-	if ((flags & BASE_MEM_TILER_ALIGN_TOP) && (test_reg.extent == 0)) {
-		dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_TILER_ALIGN_TOP but extent == 0\n");
+	if ((flags & BASE_MEM_TILER_ALIGN_TOP) && (test_reg.extension == 0)) {
+		dev_warn(dev, KBASE_MSG_PRE
+			 "BASE_MEM_TILER_ALIGN_TOP but extension == 0\n");
 		return -EINVAL;
 	}
 
 	if (!(flags & (BASE_MEM_GROW_ON_GPF | BASE_MEM_TILER_ALIGN_TOP)) &&
-			test_reg.extent != 0) {
-		dev_warn(dev, KBASE_MSG_PRE "neither BASE_MEM_GROW_ON_GPF nor BASE_MEM_TILER_ALIGN_TOP set but extent != 0\n");
+	    test_reg.extension != 0) {
+		dev_warn(
+			dev, KBASE_MSG_PRE
+			"neither BASE_MEM_GROW_ON_GPF nor BASE_MEM_TILER_ALIGN_TOP set but extension != 0\n");
 		return -EINVAL;
 	}
 #else
-	if (!(flags & BASE_MEM_GROW_ON_GPF) && test_reg.extent != 0) {
-		dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GROW_ON_GPF not set but extent != 0\n");
+	if (!(flags & BASE_MEM_GROW_ON_GPF) && test_reg.extension != 0) {
+		dev_warn(dev, KBASE_MSG_PRE
+			 "BASE_MEM_GROW_ON_GPF not set but extension != 0\n");
 		return -EINVAL;
 	}
 #endif /* !MALI_USE_CSF */
@@ -2899,28 +3092,36 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags,
 	/* BASE_MEM_TILER_ALIGN_TOP memory has a number of restrictions */
 	if (flags & BASE_MEM_TILER_ALIGN_TOP) {
 #define KBASE_MSG_PRE_FLAG KBASE_MSG_PRE "BASE_MEM_TILER_ALIGN_TOP and "
-		unsigned long small_extent;
+		unsigned long small_extension;
 
-		if (large_extent > BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES) {
-			dev_warn(dev, KBASE_MSG_PRE_FLAG "extent==%lld pages exceeds limit %lld",
-					(unsigned long long)large_extent,
-					BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES);
+		if (large_extension >
+		    BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES) {
+			dev_warn(dev,
+				 KBASE_MSG_PRE_FLAG
+				 "extension==%lld pages exceeds limit %lld",
+				 (unsigned long long)large_extension,
+				 BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES);
 			return -EINVAL;
 		}
 		/* For use with is_power_of_2, which takes unsigned long, so
-		 * must ensure e.g. on 32-bit kernel it'll fit in that type */
-		small_extent = (unsigned long)large_extent;
+		 * must ensure e.g. on 32-bit kernel it'll fit in that type
+		 */
+		small_extension = (unsigned long)large_extension;
 
-		if (!is_power_of_2(small_extent)) {
-			dev_warn(dev, KBASE_MSG_PRE_FLAG "extent==%ld not a non-zero power of 2",
-					small_extent);
+		if (!is_power_of_2(small_extension)) {
+			dev_warn(dev,
+				 KBASE_MSG_PRE_FLAG
+				 "extension==%ld not a non-zero power of 2",
+				 small_extension);
 			return -EINVAL;
 		}
 
-		if (commit_pages > large_extent) {
-			dev_warn(dev, KBASE_MSG_PRE_FLAG "commit_pages==%ld exceeds extent==%ld",
-					(unsigned long)commit_pages,
-					(unsigned long)large_extent);
+		if (commit_pages > large_extension) {
+			dev_warn(dev,
+				 KBASE_MSG_PRE_FLAG
+				 "commit_pages==%ld exceeds extension==%ld",
+				 (unsigned long)commit_pages,
+				 (unsigned long)large_extension);
 			return -EINVAL;
 		}
 #undef KBASE_MSG_PRE_FLAG
@@ -2939,7 +3140,8 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags,
 }
 
 /**
- * @brief Acquire the per-context region list lock
+ * Acquire the per-context region list lock
+ * @kctx:  KBase context
  */
 void kbase_gpu_vm_lock(struct kbase_context *kctx)
 {
@@ -2950,7 +3152,8 @@ void kbase_gpu_vm_lock(struct kbase_context *kctx)
 KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock);
 
 /**
- * @brief Release the per-context region list lock
+ * Release the per-context region list lock
+ * @kctx:  KBase context
  */
 void kbase_gpu_vm_unlock(struct kbase_context *kctx)
 {
@@ -3013,7 +3216,7 @@ static ssize_t kbase_jit_debugfs_common_read(struct file *file,
 		}
 
 		size = scnprintf(data->buffer, sizeof(data->buffer),
-				"%llu,%llu,%llu", data->active_value,
+				"%llu,%llu,%llu\n", data->active_value,
 				data->pool_value, data->destroy_value);
 	}
 
@@ -3311,7 +3514,7 @@ static bool meet_size_and_tiler_align_top_requirements(
 
 #if !MALI_USE_CSF
 	if (meet_reqs && (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP)) {
-		size_t align = info->extent;
+		size_t align = info->extension;
 		size_t align_mask = align - 1;
 
 		if ((walker->start_pfn + info->commit_pages) & align_mask)
@@ -3366,20 +3569,20 @@ static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx,
 			KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES);
 	} else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) {
 		/* The GPU could report being ready to write to the next
-		 * 'extent' sized chunk, but didn't actually write to it, so we
-		 * can report up to 'extent' size pages more than the backed
+		 * 'extension' sized chunk, but didn't actually write to it, so we
+		 * can report up to 'extension' size pages more than the backed
 		 * size.
 		 *
 		 * Note, this is allowed to exceed reg->nr_pages.
 		 */
-		max_allowed_pages += reg->extent;
+		max_allowed_pages += reg->extension;
 
 		/* Also note that in these GPUs, the GPU may make a large (>1
 		 * page) initial allocation but not actually write out to all
 		 * of it. Hence it might report that a much higher amount of
 		 * memory was used than actually was written to. This does not
 		 * result in a real warning because on growing this memory we
-		 * round up the size of the allocation up to an 'extent' sized
+		 * round up the size of the allocation up to an 'extension' sized
 		 * chunk, hence automatically bringing the backed size up to
 		 * the reported size.
 		 */
@@ -3605,7 +3808,7 @@ done:
 
 	/* Update attributes of JIT allocation taken from the pool */
 	reg->initial_commit = info->commit_pages;
-	reg->extent = info->extent;
+	reg->extension = info->extension;
 
 update_failed:
 	return ret;
@@ -3963,7 +4166,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 		kbase_gpu_vm_unlock(kctx);
 
 		reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages,
-				info->extent, &flags, &gpu_addr);
+				      info->extension, &flags, &gpu_addr);
 		if (!reg) {
 			/* Most likely not enough GPU virtual space left for
 			 * the new JIT allocation.
@@ -4031,8 +4234,11 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
 			div_u64(old_pages * (100 - kctx->trim_level), 100));
 		u64 delta = old_pages - new_size;
 
-		if (delta)
+		if (delta) {
+			mutex_lock(&kctx->reg_lock);
 			kbase_mem_shrink(kctx, reg, old_pages - delta);
+			mutex_unlock(&kctx->reg_lock);
+		}
 	}
 
 #if MALI_JIT_PRESSURE_LIMIT_BASE
@@ -4248,17 +4454,6 @@ void kbase_jit_report_update_pressure(struct kbase_context *kctx,
 }
 #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 
-bool kbase_has_exec_va_zone(struct kbase_context *kctx)
-{
-	bool has_exec_va_zone;
-
-	kbase_gpu_vm_lock(kctx);
-	has_exec_va_zone = (kctx->exec_va_start != U64_MAX);
-	kbase_gpu_vm_unlock(kctx);
-
-	return has_exec_va_zone;
-}
-
 #if MALI_USE_CSF
 static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc)
 {
@@ -4297,7 +4492,7 @@ int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx,
 	if (WARN_ON(reg->gpu_alloc->imported.user_buf.mm != current->mm))
 		return -EINVAL;
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE
 	pinned_pages = get_user_pages(NULL, mm,
 			address,
 			alloc->imported.user_buf.nr_pages,
@@ -4309,19 +4504,19 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
 			reg->flags & KBASE_REG_GPU_WR,
 			0, pages, NULL);
 #endif
-#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
+#elif KERNEL_VERSION(4, 9, 0) > LINUX_VERSION_CODE
 	pinned_pages = get_user_pages_remote(NULL, mm,
 			address,
 			alloc->imported.user_buf.nr_pages,
 			reg->flags & KBASE_REG_GPU_WR,
 			0, pages, NULL);
-#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+#elif KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE
 	pinned_pages = get_user_pages_remote(NULL, mm,
 			address,
 			alloc->imported.user_buf.nr_pages,
 			reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
 			pages, NULL);
-#elif LINUX_VERSION_CODE < KERNEL_VERSION(5, 9, 0)
+#elif KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE
 	pinned_pages = get_user_pages_remote(NULL, mm,
 			address,
 			alloc->imported.user_buf.nr_pages,
@@ -4513,7 +4708,8 @@ struct kbase_mem_phy_alloc *kbase_map_external_resource(
 			goto exit;
 
 		reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++;
-		if (1 == reg->gpu_alloc->imported.user_buf.current_mapping_usage_count) {
+		if (reg->gpu_alloc->imported.user_buf
+			    .current_mapping_usage_count == 1) {
 			err = kbase_jd_user_buf_map(kctx, reg);
 			if (err) {
 				reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--;
@@ -4548,7 +4744,7 @@ void kbase_unmap_external_resource(struct kbase_context *kctx,
 	case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
 		alloc->imported.user_buf.current_mapping_usage_count--;
 
-		if (0 == alloc->imported.user_buf.current_mapping_usage_count) {
+		if (alloc->imported.user_buf.current_mapping_usage_count == 0) {
 			bool writeable = true;
 
 			if (!kbase_is_region_invalid_or_free(reg) &&
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem.h b/drivers/gpu/arm/bifrost/mali_kbase_mem.h
index 2238fbfe9e99..8a9e93437655 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,15 +17,10 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /**
- * @file mali_kbase_mem.h
- * Base kernel memory APIs
+ * DOC: Base kernel memory APIs
  */
 
 #ifndef _KBASE_MEM_H_
@@ -35,7 +31,7 @@
 #endif
 
 #include <linux/kref.h>
-#include "mali_base_kernel.h"
+#include <uapi/gpu/arm/bifrost/mali_base_kernel.h>
 #include <mali_kbase_hw.h>
 #include "mali_kbase_pm.h"
 #include "mali_kbase_defs.h"
@@ -48,10 +44,13 @@ static inline void kbase_process_page_usage_inc(struct kbase_context *kctx,
 /* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */
 #define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2)	/* round to 4 pages */
 
-/* Part of the workaround for PRLAM-9630 requires us to grow/shrink memory by 8 pages.
-The MMU reads in 8 page table entries from memory at a time, if we have more than one page fault within the same 8 pages and
-page tables are updated accordingly, the MMU does not re-read the page table entries from memory for the subsequent page table
-updates and generates duplicate page faults as the page table information used by the MMU is not valid.   */
+/* Part of the workaround for PRLAM-9630 requires us to grow/shrink memory by
+ * 8 pages. The MMU reads in 8 page table entries from memory at a time, if we
+ * have more than one page fault within the same 8 pages and page tables are
+ * updated accordingly, the MMU does not re-read the page table entries from
+ * memory for the subsequent page table updates and generates duplicate page
+ * faults as the page table information used by the MMU is not valid.
+ */
 #define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630 (3)	/* round to 8 pages */
 
 #define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2 (0)	/* round to 1 page */
@@ -60,7 +59,8 @@ updates and generates duplicate page faults as the page table information used b
 #define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2)
 #define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_8316 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316)
 #define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_9630 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630)
-/**
+
+/*
  * A CPU mapping
  */
 struct kbase_cpu_mapping {
@@ -81,16 +81,15 @@ enum kbase_memory_type {
 };
 
 /* internal structure, mirroring base_mem_aliasing_info,
- * but with alloc instead of a gpu va (handle) */
+ * but with alloc instead of a gpu va (handle)
+ */
 struct kbase_aliased {
 	struct kbase_mem_phy_alloc *alloc; /* NULL for special, non-NULL for native */
 	u64 offset; /* in pages */
 	u64 length; /* in pages */
 };
 
-/**
- * @brief Physical pages tracking object properties
-  */
+/* Physical pages tracking object properties */
 #define KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED  (1u << 0)
 #define KBASE_MEM_PHY_ALLOC_LARGE            (1u << 1)
 
@@ -105,7 +104,13 @@ struct kbase_aliased {
  * updated as part of the change.
  *
  * @kref: number of users of this alloc
- * @gpu_mappings: count number of times mapped on the GPU
+ * @gpu_mappings: count number of times mapped on the GPU. Indicates the number
+ *                of references there are to the physical pages from different
+ *                GPU VA regions.
+ * @kernel_mappings: count number of times mapped on the CPU, specifically in
+ *                   the kernel. Indicates the number of references there are
+ *                   to the physical pages to prevent flag changes or shrink
+ *                   while maps are still held.
  * @nents: 0..N
  * @pages: N elements, only 0..nents are valid
  * @mappings: List of CPU mappings of this physical memory allocation.
@@ -128,6 +133,7 @@ struct kbase_aliased {
 struct kbase_mem_phy_alloc {
 	struct kref           kref;
 	atomic_t              gpu_mappings;
+	atomic_t              kernel_mappings;
 	size_t                nents;
 	struct tagged_addr    *pages;
 	struct list_head      mappings;
@@ -211,12 +217,36 @@ static inline void kbase_mem_phy_alloc_gpu_unmapped(struct kbase_mem_phy_alloc *
 	KBASE_DEBUG_ASSERT(alloc);
 	/* we only track mappings of NATIVE buffers */
 	if (alloc->type == KBASE_MEM_TYPE_NATIVE)
-		if (0 > atomic_dec_return(&alloc->gpu_mappings)) {
+		if (atomic_dec_return(&alloc->gpu_mappings) < 0) {
 			pr_err("Mismatched %s:\n", __func__);
 			dump_stack();
 		}
 }
 
+/**
+ * kbase_mem_phy_alloc_kernel_mapped - Increment kernel_mappings
+ * counter for a memory region to prevent commit and flag changes
+ *
+ * @alloc:  Pointer to physical pages tracking object
+ */
+static inline void
+kbase_mem_phy_alloc_kernel_mapped(struct kbase_mem_phy_alloc *alloc)
+{
+	atomic_inc(&alloc->kernel_mappings);
+}
+
+/**
+ * kbase_mem_phy_alloc_kernel_unmapped - Decrement kernel_mappings
+ * counter for a memory region to allow commit and flag changes
+ *
+ * @alloc:  Pointer to physical pages tracking object
+ */
+static inline void
+kbase_mem_phy_alloc_kernel_unmapped(struct kbase_mem_phy_alloc *alloc)
+{
+	WARN_ON(atomic_dec_return(&alloc->kernel_mappings) < 0);
+}
+
 /**
  * kbase_mem_is_imported - Indicate whether a memory type is imported
  *
@@ -249,7 +279,7 @@ static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_m
 }
 
 /**
- * A GPU memory region, and attributes for CPU mappings.
+ * struct kbase_va_region - A GPU memory region, and attributes for CPU mappings
  *
  * @rblink: Node in a red-black tree of memory regions within the same zone of
  *          the GPU's virtual address space.
@@ -263,13 +293,31 @@ static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_m
  * @threshold_pages: If non-zero and the amount of memory committed to a region
  *                   that can grow on page fault exceeds this number of pages
  *                   then the driver switches to incremental rendering.
- * @extent:    Number of pages allocated on page fault.
+ * @flags:           Flags
+ * @extension:    Number of pages allocated on page fault.
  * @cpu_alloc: The physical memory we mmap to the CPU when mapping this region.
  * @gpu_alloc: The physical memory we mmap to the GPU when mapping this region.
  * @jit_node:     Links to neighboring regions in the just-in-time memory pool.
  * @jit_usage_id: The last just-in-time memory usage ID for this region.
  * @jit_bin_id:   The just-in-time memory bin this region came from.
  * @va_refcnt:    Number of users of this region. Protected by reg_lock.
+ * @heap_info_gpu_addr: Pointer to an object in GPU memory defining an end of
+ *                      an allocated region
+ *                      The object can be one of:
+ *                      - u32 value defining the size of the region
+ *                      - u64 pointer first unused byte in the region
+ *                      The interpretation of the object depends on
+ *                      BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE flag in
+ *                      jit_info_flags - if it is set, the heap info object
+ *                      should be interpreted as size.
+ * @used_pages: The current estimate of the number of pages used, which in
+ *              normal use is either:
+ *              - the initial estimate == va_pages
+ *              - the actual pages used, as found by a JIT usage report
+ *              Note that since the value is calculated from GPU memory after a
+ *              JIT usage report, at any point in time it is allowed to take a
+ *              random value that is no greater than va_pages (e.g. it may be
+ *              greater than gpu_alloc->nents)
  */
 struct kbase_va_region {
 	struct rb_node rblink;
@@ -309,8 +357,13 @@ struct kbase_va_region {
 #define KBASE_REG_SHARE_BOTH        (1ul << 10)
 
 /* Space for 4 different zones */
-#define KBASE_REG_ZONE_MASK         (3ul << 11)
-#define KBASE_REG_ZONE(x)           (((x) & 3) << 11)
+#define KBASE_REG_ZONE_MASK         ((KBASE_REG_ZONE_MAX - 1ul) << 11)
+#define KBASE_REG_ZONE(x)           (((x) & (KBASE_REG_ZONE_MAX - 1ul)) << 11)
+#define KBASE_REG_ZONE_IDX(x)       (((x) & KBASE_REG_ZONE_MASK) >> 11)
+
+#if ((KBASE_REG_ZONE_MAX - 1) & 0x3) != (KBASE_REG_ZONE_MAX - 1)
+#error KBASE_REG_ZONE_MAX too large for allocation of KBASE_REG_<...> bits
+#endif
 
 /* GPU read access */
 #define KBASE_REG_GPU_RD            (1ul<<13)
@@ -341,8 +394,9 @@ struct kbase_va_region {
 #endif
 
 #if !MALI_USE_CSF
-/* The top of the initial commit is aligned to extent pages.
- * Extent must be a power of 2 */
+/* The top of the initial commit is aligned to extension pages.
+ * Extent must be a power of 2
+ */
 #define KBASE_REG_TILER_ALIGN_TOP   (1ul << 23)
 #else
 /* Bit 23 is reserved.
@@ -416,7 +470,7 @@ struct kbase_va_region {
 #endif
 
 	unsigned long flags;
-	size_t extent;
+	size_t extension;
 	struct kbase_mem_phy_alloc *cpu_alloc;
 	struct kbase_mem_phy_alloc *gpu_alloc;
 	struct list_head jit_node;
@@ -495,7 +549,7 @@ static inline struct kbase_va_region *kbase_va_region_alloc_get(
 	WARN_ON(!region->va_refcnt);
 
 	/* non-atomic as kctx->reg_lock is held */
-	dev_dbg(kctx->kbdev->dev, "va_refcnt %d before get %p\n",
+	dev_dbg(kctx->kbdev->dev, "va_refcnt %d before get %pK\n",
 		region->va_refcnt, (void *)region);
 	region->va_refcnt++;
 
@@ -512,7 +566,7 @@ static inline struct kbase_va_region *kbase_va_region_alloc_put(
 
 	/* non-atomic as kctx->reg_lock is held */
 	region->va_refcnt--;
-	dev_dbg(kctx->kbdev->dev, "va_refcnt %d after put %p\n",
+	dev_dbg(kctx->kbdev->dev, "va_refcnt %d after put %pK\n",
 		region->va_refcnt, (void *)region);
 	if (!region->va_refcnt)
 		kbase_region_refcnt_free(region);
@@ -604,6 +658,7 @@ static inline struct kbase_mem_phy_alloc *kbase_alloc_create(
 
 	kref_init(&alloc->kref);
 	atomic_set(&alloc->gpu_mappings, 0);
+	atomic_set(&alloc->kernel_mappings, 0);
 	alloc->nents = 0;
 	alloc->pages = (void *)(alloc + 1);
 	INIT_LIST_HEAD(&alloc->mappings);
@@ -1043,7 +1098,9 @@ struct kbase_va_region *kbase_find_region_enclosing_address(
 		struct rb_root *rbtree, u64 gpu_addr);
 
 /**
- * @brief Check that a pointer is actually a valid region.
+ * Check that a pointer is actually a valid region.
+ * @kctx: kbase context containing the region
+ * @gpu_addr: pointer to check
  *
  * Must be called with context lock held.
  */
@@ -1072,7 +1129,7 @@ bool kbase_check_import_flags(unsigned long flags);
  * @flags:        The flags passed from user space
  * @va_pages:     The size of the requested region, in pages.
  * @commit_pages: Number of pages to commit initially.
- * @extent:       Number of pages to grow by on GPU page fault and/or alignment
+ * @extension:       Number of pages to grow by on GPU page fault and/or alignment
  *                (depending on flags)
  *
  * Makes checks on the size parameters passed in from user space for a memory
@@ -1081,7 +1138,7 @@ bool kbase_check_import_flags(unsigned long flags);
  * Return: 0 if sizes are valid for these flags, negative error code otherwise
  */
 int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags,
-		u64 va_pages, u64 commit_pages, u64 extent);
+			    u64 va_pages, u64 commit_pages, u64 extension);
 
 /**
  * kbase_update_region_flags - Convert user space flags to kernel region flags
@@ -1104,14 +1161,21 @@ void kbase_gpu_vm_unlock(struct kbase_context *kctx);
 int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size);
 
 /**
- * @brief Register region and map it on the GPU.
+ * Register region and map it on the GPU.
+ * @kctx: kbase context containing the region
+ * @reg: the region to add
+ * @addr: the address to insert the region at
+ * @nr_pages: the number of pages in the region
+ * @align: the minimum alignment in pages
  *
  * Call kbase_add_va_region() and map the region on the GPU.
  */
 int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align);
 
 /**
- * @brief Remove the region from the GPU and unregister it.
+ * Remove the region from the GPU and unregister it.
+ * @kctx:  KBase context
+ * @reg:   The region to remove
  *
  * Must be called with context lock held.
  */
@@ -1400,7 +1464,8 @@ static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr)
 		/* on 32-bit ARM with LPAE dma_addr_t becomes larger, but the
 		 * private field stays the same. So we have to be clever and
 		 * use the fact that we only store DMA addresses of whole pages,
-		 * so the low bits should be zero */
+		 * so the low bits should be zero
+		 */
 		KBASE_DEBUG_ASSERT(!(dma_addr & (PAGE_SIZE - 1)));
 		set_page_private(p, dma_addr >> PAGE_SHIFT);
 	} else {
@@ -1959,4 +2024,76 @@ int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages,
 		void *src_page, size_t *to_copy, unsigned int nr_pages,
 		unsigned int *target_page_nr, size_t offset);
 
+/**
+ * kbase_ctx_reg_zone_end_pfn - return the end Page Frame Number of @zone
+ * @zone: zone to query
+ *
+ * Return: The end of the zone corresponding to @zone
+ */
+static inline u64 kbase_reg_zone_end_pfn(struct kbase_reg_zone *zone)
+{
+	return zone->base_pfn + zone->va_size_pages;
+}
+
+/**
+ * kbase_ctx_reg_zone_init - initialize a zone in @kctx
+ * @kctx: Pointer to kbase context
+ * @zone_bits: A KBASE_REG_ZONE_<...> to initialize
+ * @base_pfn: Page Frame Number in GPU virtual address space for the start of
+ *            the Zone
+ * @va_size_pages: Size of the Zone in pages
+ */
+static inline void kbase_ctx_reg_zone_init(struct kbase_context *kctx,
+					   unsigned long zone_bits,
+					   u64 base_pfn, u64 va_size_pages)
+{
+	struct kbase_reg_zone *zone;
+
+	lockdep_assert_held(&kctx->reg_lock);
+	WARN_ON((zone_bits & KBASE_REG_ZONE_MASK) != zone_bits);
+
+	zone = &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)];
+	*zone = (struct kbase_reg_zone){
+		.base_pfn = base_pfn, .va_size_pages = va_size_pages,
+	};
+}
+
+/**
+ * kbase_ctx_reg_zone_get_nolock - get a zone from @kctx where the caller does
+ *                                 not have @kctx 's region lock
+ * @kctx: Pointer to kbase context
+ * @zone_bits: A KBASE_REG_ZONE_<...> to retrieve
+ *
+ * This should only be used in performance-critical paths where the code is
+ * resilient to a race with the zone changing.
+ *
+ * Return: The zone corresponding to @zone_bits
+ */
+static inline struct kbase_reg_zone *
+kbase_ctx_reg_zone_get_nolock(struct kbase_context *kctx,
+			      unsigned long zone_bits)
+{
+	WARN_ON((zone_bits & KBASE_REG_ZONE_MASK) != zone_bits);
+
+	return &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)];
+}
+
+/**
+ * kbase_ctx_reg_zone_get - get a zone from @kctx
+ * @kctx: Pointer to kbase context
+ * @zone_bits: A KBASE_REG_ZONE_<...> to retrieve
+ *
+ * The get is not refcounted - there is no corresponding 'put' operation
+ *
+ * Return: The zone corresponding to @zone_bits
+ */
+static inline struct kbase_reg_zone *
+kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits)
+{
+	lockdep_assert_held(&kctx->reg_lock);
+	WARN_ON((zone_bits & KBASE_REG_ZONE_MASK) != zone_bits);
+
+	return &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)];
+}
+
 #endif				/* _KBASE_MEM_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c
index 99b5b852667e..f58fdf3c1e3a 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,15 +17,10 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /**
- * @file mali_kbase_mem_linux.c
- * Base kernel memory APIs, Linux implementation.
+ * DOC: Base kernel memory APIs, Linux implementation.
  */
 
 #include <linux/compat.h>
@@ -35,10 +31,9 @@
 #include <linux/fs.h>
 #include <linux/version.h>
 #include <linux/dma-mapping.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \
-	(LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+#if (KERNEL_VERSION(4, 8, 0) > LINUX_VERSION_CODE)
 #include <linux/dma-attrs.h>
-#endif /* LINUX_VERSION_CODE >= 3.5.0 && < 4.8.0 */
+#endif /* LINUX_VERSION_CODE < 4.8.0 */
 #include <linux/dma-buf.h>
 #include <linux/shrinker.h>
 #include <linux/cache.h>
@@ -47,10 +42,11 @@
 #include <mali_kbase.h>
 #include <mali_kbase_mem_linux.h>
 #include <tl/mali_kbase_tracepoints.h>
-#include <mali_kbase_ioctl.h>
+#include <uapi/gpu/arm/bifrost/mali_kbase_ioctl.h>
 #include <mmu/mali_kbase_mmu.h>
 #include <mali_kbase_caps.h>
 #include <mali_kbase_trace_gpu_mem.h>
+#include <mali_kbase_reset_gpu.h>
 
 #if ((KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) || \
 	(KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE))
@@ -296,8 +292,8 @@ void kbase_phy_alloc_mapping_put(struct kbase_context *kctx,
 }
 
 struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
-		u64 va_pages, u64 commit_pages, u64 extent, u64 *flags,
-		u64 *gpu_va)
+					u64 va_pages, u64 commit_pages,
+					u64 extension, u64 *flags, u64 *gpu_va)
 {
 	int zone;
 	struct kbase_va_region *reg;
@@ -309,8 +305,9 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
 	KBASE_DEBUG_ASSERT(gpu_va);
 
 	dev = kctx->kbdev->dev;
-	dev_dbg(dev, "Allocating %lld va_pages, %lld commit_pages, %lld extent, 0x%llX flags\n",
-		va_pages, commit_pages, extent, *flags);
+	dev_dbg(dev,
+		"Allocating %lld va_pages, %lld commit_pages, %lld extension, 0x%llX flags\n",
+		va_pages, commit_pages, extension, *flags);
 
 #if MALI_USE_CSF
 	*gpu_va = 0; /* return 0 on failure */
@@ -356,7 +353,8 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
 		*flags &= ~BASE_MEM_COHERENT_SYSTEM;
 	}
 
-	if (kbase_check_alloc_sizes(kctx, *flags, va_pages, commit_pages, extent))
+	if (kbase_check_alloc_sizes(kctx, *flags, va_pages, commit_pages,
+				    extension))
 		goto bad_sizes;
 
 #ifdef CONFIG_MALI_MEMORY_FULLY_BACKED
@@ -413,15 +411,16 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
 		reg->threshold_pages = 0;
 
 	if (*flags & BASE_MEM_GROW_ON_GPF) {
-		/* kbase_check_alloc_sizes() already checks extent is valid for
-		 * assigning to reg->extent */
-		reg->extent = extent;
+		/* kbase_check_alloc_sizes() already checks extension is valid for
+		 * assigning to reg->extension
+		 */
+		reg->extension = extension;
 #if !MALI_USE_CSF
 	} else if (*flags & BASE_MEM_TILER_ALIGN_TOP) {
-		reg->extent = extent;
+		reg->extension = extension;
 #endif /* !MALI_USE_CSF */
 	} else {
-		reg->extent = 0;
+		reg->extension = 0;
 	}
 
 	if (kbase_alloc_phy_pages(reg, va_pages, commit_pages) != 0) {
@@ -448,14 +447,6 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
 		}
 	}
 
-#if MALI_USE_CSF
-	if (reg->flags & KBASE_REG_CSF_EVENT) {
-		WARN_ON(!(*flags & BASE_MEM_SAME_VA));
-
-		kbase_link_event_mem_page(kctx, reg);
-	}
-#endif
-
 	/* mmap needed to setup VA? */
 	if (*flags & BASE_MEM_SAME_VA) {
 		unsigned long cookie, cookie_nr;
@@ -503,13 +494,6 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
 
 no_mmap:
 no_cookie:
-#if MALI_USE_CSF
-	if (reg->flags & KBASE_REG_CSF_EVENT) {
-		kbase_gpu_vm_lock(kctx);
-		kbase_unlink_event_mem_page(kctx, reg);
-		kbase_gpu_vm_unlock(kctx);
-	}
-#endif
 no_kern_mapping:
 no_mem:
 #if MALI_JIT_PRESSURE_LIMIT_BASE
@@ -657,6 +641,13 @@ unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s,
 
 	kctx = container_of(s, struct kbase_context, reclaim);
 
+	WARN((sc->gfp_mask & __GFP_ATOMIC),
+	     "Shrinkers cannot be called for GFP_ATOMIC allocations. Check kernel mm for problems. gfp_mask==%x\n",
+	     sc->gfp_mask);
+	WARN(in_atomic(),
+	     "Shrinker called whilst in atomic context. The caller must switch to using GFP_ATOMIC or similar. gfp_mask==%x\n",
+	     sc->gfp_mask);
+
 	mutex_lock(&kctx->jit_evict_lock);
 
 	list_for_each_entry(alloc, &kctx->evict_list, evict_node)
@@ -739,35 +730,18 @@ out_unlock:
 	return freed;
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
-static int kbase_mem_evictable_reclaim_shrink(struct shrinker *s,
-		struct shrink_control *sc)
-{
-	if (sc->nr_to_scan == 0)
-		return kbase_mem_evictable_reclaim_count_objects(s, sc);
-
-	return kbase_mem_evictable_reclaim_scan_objects(s, sc);
-}
-#endif
-
 int kbase_mem_evictable_init(struct kbase_context *kctx)
 {
 	INIT_LIST_HEAD(&kctx->evict_list);
 	mutex_init(&kctx->jit_evict_lock);
 
-	/* Register shrinker */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
-	kctx->reclaim.shrink = kbase_mem_evictable_reclaim_shrink;
-#else
 	kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects;
 	kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects;
-#endif
 	kctx->reclaim.seeks = DEFAULT_SEEKS;
 	/* Kernel versions prior to 3.1 :
-	 * struct shrinker does not define batch */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+	 * struct shrinker does not define batch
+	 */
 	kctx->reclaim.batch = 0;
-#endif
 	register_shrinker(&kctx->reclaim);
 	return 0;
 }
@@ -945,10 +919,18 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in
 	prev_needed = (KBASE_REG_DONT_NEED & reg->flags) == KBASE_REG_DONT_NEED;
 	new_needed = (BASE_MEM_DONT_NEED & flags) == BASE_MEM_DONT_NEED;
 	if (prev_needed != new_needed) {
-		/* Aliased allocations can't be made ephemeral */
+		/* Aliased allocations can't be shrunk as the code doesn't
+		 * support looking up:
+		 * - all physical pages assigned to different GPU VAs
+		 * - CPU mappings for the physical pages at different vm_pgoff
+		 *   (==GPU VA) locations.
+		 */
 		if (atomic_read(&reg->cpu_alloc->gpu_mappings) > 1)
 			goto out_unlock;
 
+		if (atomic_read(&reg->cpu_alloc->kernel_mappings) > 0)
+			goto out_unlock;
+
 		if (new_needed) {
 			/* Only native allocations can be marked not needed */
 			if (reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
@@ -1122,7 +1104,7 @@ int kbase_mem_do_sync_imported(struct kbase_context *kctx,
 				dir);
 #endif /* KBASE_MEM_ION_SYNC_WORKAROUND */
 		break;
-	};
+	}
 
 	if (unlikely(ret))
 		dev_warn(kctx->kbdev->dev,
@@ -1483,7 +1465,7 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx,
 	reg->gpu_alloc->imported.umm.current_mapping_usage_count = 0;
 	reg->gpu_alloc->imported.umm.need_sync = need_sync;
 	reg->gpu_alloc->imported.umm.kctx = kctx;
-	reg->extent = 0;
+	reg->extension = 0;
 
 	if (!IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND)) {
 		int err;
@@ -1536,6 +1518,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 	u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev);
 	struct kbase_alloc_import_user_buf *user_buf;
 	struct page **pages = NULL;
+	int write;
 
 	/* Flag supported only for dma-buf imported memory */
 	if (*flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP)
@@ -1649,22 +1632,22 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 
 	down_read(kbase_mem_get_process_mmap_lock());
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+	write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR);
+
+#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE
 	faulted_pages = get_user_pages(current, current->mm, address, *va_pages,
 #if KERNEL_VERSION(4, 4, 168) <= LINUX_VERSION_CODE && \
 KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
-			reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
-			pages, NULL);
+			write ? FOLL_WRITE : 0, pages, NULL);
 #else
-			reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL);
+			write, 0, pages, NULL);
 #endif
-#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
+#elif KERNEL_VERSION(4, 9, 0) > LINUX_VERSION_CODE
 	faulted_pages = get_user_pages(address, *va_pages,
-			reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL);
+			write, 0, pages, NULL);
 #else
 	faulted_pages = get_user_pages(address, *va_pages,
-			reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
-			pages, NULL);
+			write ? FOLL_WRITE : 0, pages, NULL);
 #endif
 
 	up_read(kbase_mem_get_process_mmap_lock());
@@ -1673,7 +1656,7 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
 		goto fault_mismatch;
 
 	reg->gpu_alloc->nents = 0;
-	reg->extent = 0;
+	reg->extension = 0;
 
 	if (pages) {
 		struct device *dev = kctx->kbdev->dev;
@@ -1775,7 +1758,8 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
 #ifdef CONFIG_64BIT
 	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
 		/* 64-bit tasks must MMAP anyway, but not expose this address to
-		 * clients */
+		 * clients
+		 */
 		*flags |= BASE_MEM_NEED_MMAP;
 		reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0,
 				*num_pages,
@@ -1821,7 +1805,8 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
 				goto bad_handle; /* must be > 0 */
 			if (ai[i].length > stride)
 				goto bad_handle; /* can't be larger than the
-						    stride */
+						  * stride
+						  */
 			reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length;
 		} else {
 			struct kbase_va_region *aliasing_reg;
@@ -1836,6 +1821,15 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
 				goto bad_handle; /* Not found/already free */
 			if (aliasing_reg->flags & KBASE_REG_DONT_NEED)
 				goto bad_handle; /* Ephemeral region */
+			if (aliasing_reg->flags & KBASE_REG_NO_USER_FREE)
+				goto bad_handle; /* JIT regions can't be
+						  * aliased. NO_USER_FREE flag
+						  * covers the entire lifetime
+						  * of JIT regions. The other
+						  * types of regions covered
+						  * by this flag also shall
+						  * not be aliased.
+						  */
 			if (!(aliasing_reg->flags & KBASE_REG_GPU_CACHED))
 				goto bad_handle; /* GPU uncached memory */
 			if (!aliasing_reg->gpu_alloc)
@@ -1843,16 +1837,18 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
 			if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE)
 				goto bad_handle; /* Not a native alloc */
 			if (coherent != ((aliasing_reg->flags & KBASE_REG_SHARE_BOTH) != 0))
-				goto bad_handle;
-				/* Non-coherent memory cannot alias
-				   coherent memory, and vice versa.*/
+				goto bad_handle; /* Non-coherent memory cannot
+						  * alias coherent memory, and
+						  * vice versa.
+						  */
 
 			/* check size against stride */
 			if (!ai[i].length)
 				goto bad_handle; /* must be > 0 */
 			if (ai[i].length > stride)
 				goto bad_handle; /* can't be larger than the
-						    stride */
+						  * stride
+						  */
 
 			alloc = aliasing_reg->gpu_alloc;
 
@@ -1865,6 +1861,18 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
 			reg->gpu_alloc->imported.alias.aliased[i].alloc = kbase_mem_phy_alloc_get(alloc);
 			reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length;
 			reg->gpu_alloc->imported.alias.aliased[i].offset = ai[i].offset;
+
+			/* Ensure the underlying alloc is marked as being
+			 * mapped at >1 different GPU VA immediately, even
+			 * though mapping might not happen until later.
+			 *
+			 * Otherwise, we would (incorrectly) allow shrinking of
+			 * the source region (aliasing_reg) and so freeing the
+			 * physical pages (without freeing the entire alloc)
+			 * whilst we still hold an implicit reference on those
+			 * physical pages.
+			 */
+			kbase_mem_phy_alloc_gpu_mapped(alloc);
 		}
 	}
 
@@ -1908,6 +1916,10 @@ no_cookie:
 #endif
 no_mmap:
 bad_handle:
+	/* Marking the source allocs as not being mapped on the GPU and putting
+	 * them is handled by putting reg's allocs, so no rollback of those
+	 * actions is done here.
+	 */
 	kbase_gpu_vm_unlock(kctx);
 no_aliased_array:
 invalid_flags:
@@ -2161,9 +2173,20 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages)
 	if (new_pages > reg->nr_pages)
 		goto out_unlock;
 
-	/* can't be mapped more than once on the GPU */
+	/* Can't shrink when physical pages are mapped to different GPU
+	 * VAs. The code doesn't support looking up:
+	 * - all physical pages assigned to different GPU VAs
+	 * - CPU mappings for the physical pages at different vm_pgoff
+	 *   (==GPU VA) locations.
+	 *
+	 * Note that for Native allocs mapped at multiple GPU VAs, growth of
+	 * such allocs is not a supported use-case.
+	 */
 	if (atomic_read(&reg->gpu_alloc->gpu_mappings) > 1)
 		goto out_unlock;
+
+	if (atomic_read(&reg->cpu_alloc->kernel_mappings) > 0)
+		goto out_unlock;
 	/* can't grow regions which are ephemeral */
 	if (reg->flags & KBASE_REG_DONT_NEED)
 		goto out_unlock;
@@ -2463,11 +2486,7 @@ static int kbase_cpu_mmap(struct kbase_context *kctx,
 	 * See MIDBASE-1057
 	 */
 
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
 	vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO;
-#else
-	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
-#endif
 	vma->vm_ops = &kbase_vm_ops;
 	vma->vm_private_data = map;
 
@@ -2648,7 +2667,8 @@ static int kbasep_reg_mmap(struct kbase_context *kctx,
 		/* incorrect mmap size */
 		/* leave the cookie for a potential later
 		 * mapping, or to be reclaimed later when the
-		 * context is freed */
+		 * context is freed
+		 */
 		err = -ENOMEM;
 		goto out;
 	}
@@ -2677,6 +2697,11 @@ static int kbasep_reg_mmap(struct kbase_context *kctx,
 	kctx->pending_regions[cookie] = NULL;
 	bitmap_set(kctx->cookies, cookie, 1);
 
+#if MALI_USE_CSF
+	if (reg->flags & KBASE_REG_CSF_EVENT)
+		kbase_link_event_mem_page(kctx, reg);
+#endif
+
 	/*
 	 * Overwrite the offset with the region start_pfn, so we effectively
 	 * map from offset 0 in the region. However subtract the aligned
@@ -2696,7 +2721,7 @@ int kbase_context_mmap(struct kbase_context *const kctx,
 {
 	struct kbase_va_region *reg = NULL;
 	void *kaddr = NULL;
-	size_t nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	size_t nr_pages = vma_pages(vma);
 	int err = 0;
 	int free_on_close = 0;
 	struct device *dev = kctx->kbdev->dev;
@@ -2709,7 +2734,7 @@ int kbase_context_mmap(struct kbase_context *const kctx,
 	if (!(vma->vm_flags & VM_WRITE))
 		vma->vm_flags &= ~VM_MAYWRITE;
 
-	if (0 == nr_pages) {
+	if (nr_pages == 0) {
 		err = -EINVAL;
 		goto out;
 	}
@@ -2730,7 +2755,8 @@ int kbase_context_mmap(struct kbase_context *const kctx,
 	/* if not the MTP, verify that the MTP has been mapped */
 	rcu_read_lock();
 	/* catches both when the special page isn't present or
-	 * when we've forked */
+	 * when we've forked
+	 */
 	if (rcu_dereference(kctx->process_mm) != current->mm) {
 		err = -EINVAL;
 		rcu_read_unlock();
@@ -2747,7 +2773,7 @@ int kbase_context_mmap(struct kbase_context *const kctx,
 	case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE):
 		/* MMU dump */
 		err = kbase_mmu_dump_mmap(kctx, vma, &reg, &kaddr);
-		if (0 != err)
+		if (err != 0)
 			goto out_unlock;
 		/* free the region on munmap */
 		free_on_close = 1;
@@ -2770,7 +2796,7 @@ int kbase_context_mmap(struct kbase_context *const kctx,
 	     PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) - 1: {
 		err = kbasep_reg_mmap(kctx, vma, &reg, &nr_pages,
 							&aligned_offset);
-		if (0 != err)
+		if (err != 0)
 			goto out_unlock;
 		/* free the region on munmap */
 		free_on_close = 1;
@@ -2843,8 +2869,21 @@ int kbase_context_mmap(struct kbase_context *const kctx,
 
 	if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) {
 		/* MMU dump - userspace should now have a reference on
-		 * the pages, so we can now free the kernel mapping */
+		 * the pages, so we can now free the kernel mapping
+		 */
 		vfree(kaddr);
+		/* CPU mapping of GPU allocations have GPU VA as the vm_pgoff
+		 * and that is used to shrink the mapping when the commit size
+		 * is reduced. So vm_pgoff for CPU mapping created to get the
+		 * snapshot of GPU page tables shall not match with any GPU VA.
+		 * That can be ensured by setting vm_pgoff as vma->vm_start
+		 * because,
+		 * - GPU VA of any SAME_VA allocation cannot match with
+		 *   vma->vm_start, as CPU VAs are unique.
+		 * - GPU VA of CUSTOM_VA allocations are outside the CPU
+		 *   virtual address space.
+		 */
+		vma->vm_pgoff = PFN_DOWN(vma->vm_start);
 	}
 
 out_unlock:
@@ -2939,8 +2978,8 @@ static int kbase_vmap_phy_pages(struct kbase_context *kctx,
 
 	/* Note: enforcing a RO prot_request onto prot is not done, since:
 	 * - CPU-arch-specific integration required
-	 * - kbase_vmap() requires no access checks to be made/enforced */
-
+	 * - kbase_vmap() requires no access checks to be made/enforced
+	 */
 	cpu_addr = vmap(pages, page_count, VM_MAP, prot);
 
 	kfree(pages);
@@ -2961,6 +3000,7 @@ static int kbase_vmap_phy_pages(struct kbase_context *kctx,
 	if (map->sync_needed)
 		kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_CPU);
 
+	kbase_mem_phy_alloc_kernel_mapped(reg->cpu_alloc);
 	return 0;
 }
 
@@ -3016,7 +3056,8 @@ void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
 	 * be made.
 	 *
 	 * As mentioned in kbase_vmap_prot() this means that a kernel-side
-	 * CPU-RO mapping is not enforced to allow this to work */
+	 * CPU-RO mapping is not enforced to allow this to work
+	 */
 	return kbase_vmap_prot(kctx, gpu_addr, size, 0u, map);
 }
 KBASE_EXPORT_TEST_API(kbase_vmap);
@@ -3030,6 +3071,7 @@ static void kbase_vunmap_phy_pages(struct kbase_context *kctx,
 	if (map->sync_needed)
 		kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_DEVICE);
 
+	kbase_mem_phy_alloc_kernel_unmapped(map->cpu_alloc);
 	map->offset_in_page = 0;
 	map->cpu_pages = NULL;
 	map->gpu_pages = NULL;
@@ -3048,7 +3090,7 @@ KBASE_EXPORT_TEST_API(kbase_vunmap);
 
 static void kbasep_add_mm_counter(struct mm_struct *mm, int member, long value)
 {
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 19, 0))
+#if (KERNEL_VERSION(4, 19, 0) <= LINUX_VERSION_CODE)
 	/* To avoid the build breakage due to an unexported kernel symbol
 	 * 'mm_trace_rss_stat' from later kernels, i.e. from V4.19.0 onwards,
 	 * we inline here the equivalent of 'add_mm_counter()' from linux
@@ -3132,11 +3174,7 @@ static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_
 
 	/* no real access */
 	vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
 	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
-#else
-	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
-#endif
 	vma->vm_ops = &kbase_vm_special_ops;
 	vma->vm_private_data = kctx;
 
@@ -3171,16 +3209,32 @@ static void kbase_csf_user_io_pages_vm_close(struct vm_area_struct *vma)
 {
 	struct kbase_queue *queue = vma->vm_private_data;
 	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+	int err;
+	bool reset_prevented = false;
 
 	if (WARN_ON(!queue))
 		return;
 
 	kctx = queue->kctx;
+	kbdev = kctx->kbdev;
+
+	err = kbase_reset_gpu_prevent_and_wait(kbdev);
+	if (err)
+		dev_warn(
+			kbdev->dev,
+			"Unsuccessful GPU reset detected when unbinding queue (csi_index=%d), attempting to unbind regardless",
+			queue->csi_index);
+	else
+		reset_prevented = true;
 
 	mutex_lock(&kctx->csf.lock);
 	kbase_csf_queue_unbind(queue);
 	mutex_unlock(&kctx->csf.lock);
 
+	if (reset_prevented)
+		kbase_reset_gpu_allow(kbdev);
+
 	/* Now as the vma is closed, drop the reference on mali device file */
 	fput(kctx->filp);
 }
@@ -3282,7 +3336,7 @@ static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx,
 {
 	unsigned long cookie =
 		vma->vm_pgoff - PFN_DOWN(BASEP_MEM_CSF_USER_IO_PAGES_HANDLE);
-	size_t nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	size_t nr_pages = vma_pages(vma);
 	struct kbase_queue *queue;
 	int err = 0;
 
@@ -3315,11 +3369,7 @@ static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx,
 	if (err)
 		goto map_failed;
 
-#if (KERNEL_VERSION(3, 7, 0) <= LINUX_VERSION_CODE)
 	vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO;
-#else
-	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
-#endif
 	/* TODO use VM_MIXEDMAP, since it is more appropriate as both types of
 	 * memory with and without "struct page" backing are being inserted here.
 	 * Hw Doorbell pages comes from the device register area so kernel does
@@ -3342,7 +3392,13 @@ static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx,
 	return 0;
 
 map_failed:
-	kbase_csf_queue_unbind(queue);
+	/* The queue cannot have got to KBASE_CSF_QUEUE_BOUND state if we
+	 * reached here, so safe to use a variant of unbind that only works on
+	 * stopped queues
+	 *
+	 * This is so we don't enter the CSF scheduler from this path.
+	 */
+	kbase_csf_queue_unbind_stopped(queue);
 
 	return err;
 }
@@ -3367,8 +3423,10 @@ static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_fault *vmf)
 #endif
 	struct kbase_context *kctx = vma->vm_private_data;
 	struct kbase_device *kbdev = kctx->kbdev;
+	struct memory_group_manager_device *mgm_dev = kbdev->mgm_dev;
 	unsigned long pfn = PFN_DOWN(kbdev->reg_start + USER_BASE);
 	size_t nr_pages = PFN_DOWN(vma->vm_end - vma->vm_start);
+	vm_fault_t ret = VM_FAULT_SIGBUS;
 
 	/* Few sanity checks up front */
 	if (WARN_ON(nr_pages != 1) ||
@@ -3377,11 +3435,22 @@ static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_fault *vmf)
 			PFN_DOWN(BASEP_MEM_CSF_USER_REG_PAGE_HANDLE)))
 		return VM_FAULT_SIGBUS;
 
-	/* TODO: check PM state here and don't map in the actual register page
-	 * if GPU is powered down or is about to be powered down.
-	 */
+	mutex_lock(&kbdev->pm.lock);
 
-	return vmf_insert_pfn_prot(vma, vma->vm_start, pfn, vma->vm_page_prot);
+	/* Don't map in the actual register page if GPU is powered down.
+	 * Always map in the dummy page in no mali builds.
+	 */
+	if (!kbdev->pm.backend.gpu_powered || IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI))
+		pfn = PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_user_reg_page));
+
+	ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev,
+						   KBASE_MEM_GROUP_CSF_FW, vma,
+						   vma->vm_start, pfn,
+						   vma->vm_page_prot);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	return ret;
 }
 
 static const struct vm_operations_struct kbase_csf_user_reg_vm_ops = {
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h
index 85e030ab751a..c04b7fe59b0a 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2010, 2012-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010, 2012-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,21 +17,16 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /**
- * @file mali_kbase_mem_linux.h
  * Base kernel memory APIs, Linux implementation.
  */
 
 #ifndef _KBASE_MEM_LINUX_H_
 #define _KBASE_MEM_LINUX_H_
 
-/** A HWC dump mapping */
+/* A HWC dump mapping */
 struct kbase_hwc_dma_mapping {
 	void       *cpu_va;
 	dma_addr_t  dma_pa;
@@ -43,7 +39,7 @@ struct kbase_hwc_dma_mapping {
  * @kctx:         The kernel context
  * @va_pages:     The number of pages of virtual address space to reserve
  * @commit_pages: The number of physical pages to allocate upfront
- * @extent:       The number of extra pages to allocate on each GPU fault which
+ * @extension:       The number of extra pages to allocate on each GPU fault which
  *                grows the region.
  * @flags:        bitmask of BASE_MEM_* flags to convey special requirements &
  *                properties for the new allocation.
@@ -53,8 +49,8 @@ struct kbase_hwc_dma_mapping {
  * Return: 0 on success or error code
  */
 struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
-		u64 va_pages, u64 commit_pages, u64 extent, u64 *flags,
-		u64 *gpu_va);
+					u64 va_pages, u64 commit_pages,
+					u64 extension, u64 *flags, u64 *gpu_va);
 
 /**
  * kbase_mem_query - Query properties of a GPU memory region
@@ -468,11 +464,11 @@ static inline vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma,
  */
 static inline struct rw_semaphore *kbase_mem_get_process_mmap_lock(void)
 {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0)
+#if KERNEL_VERSION(5, 8, 0) > LINUX_VERSION_CODE
 	return &current->mm->mmap_sem;
-#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(5, 8, 0) */
+#else /* KERNEL_VERSION(5, 8, 0) > LINUX_VERSION_CODE */
 	return &current->mm->mmap_lock;
-#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0) */
+#endif /* KERNEL_VERSION(5, 8, 0) > LINUX_VERSION_CODE */
 }
 
 #endif				/* _KBASE_MEM_LINUX_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_lowlevel.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_lowlevel.h
index 70116030f233..ab09ec9b2b34 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem_lowlevel.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_lowlevel.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2012-2014,2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2014, 2016-2018, 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 #ifndef _KBASE_MEM_LOWLEVEL_H
 #define _KBASE_MEM_LOWLEVEL_H
 
@@ -31,9 +28,7 @@
 
 #include <linux/dma-mapping.h>
 
-/**
- * @brief Flags for kbase_phy_allocator_pages_alloc
- */
+/* Flags for kbase_phy_allocator_pages_alloc */
 #define KBASE_PHY_PAGES_FLAG_DEFAULT (0)	/** Default allocation flag */
 #define KBASE_PHY_PAGES_FLAG_CLEAR   (1 << 0)	/** Clear the pages after allocation */
 #define KBASE_PHY_PAGES_FLAG_POISON  (1 << 1)	/** Fill the memory with a poison value */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c
index 0723e32e2003..1874a6f9afd7 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <mali_kbase.h>
@@ -154,20 +153,12 @@ static void kbase_mem_pool_spill(struct kbase_mem_pool *next_pool,
 struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool)
 {
 	struct page *p;
-	gfp_t gfp;
+	gfp_t gfp = GFP_HIGHUSER | __GFP_ZERO;
 	struct kbase_device *const kbdev = pool->kbdev;
 	struct device *const dev = kbdev->dev;
 	dma_addr_t dma_addr;
 	int i;
 
-#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && \
-	LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
-	/* DMA cache sync fails for HIGHMEM before 3.5 on ARM */
-	gfp = GFP_USER | __GFP_ZERO;
-#else
-	gfp = GFP_HIGHUSER | __GFP_ZERO;
-#endif
-
 	/* don't warn on higher order failures */
 	if (pool->order)
 		gfp |= __GFP_NOWARN;
@@ -318,7 +309,7 @@ void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size)
 
 	kbase_mem_pool_unlock(pool);
 }
-
+KBASE_EXPORT_TEST_API(kbase_mem_pool_set_max_size);
 
 static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s,
 		struct shrink_control *sc)
@@ -364,17 +355,6 @@ static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s,
 	return freed;
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
-static int kbase_mem_pool_reclaim_shrink(struct shrinker *s,
-		struct shrink_control *sc)
-{
-	if (sc->nr_to_scan == 0)
-		return kbase_mem_pool_reclaim_count_objects(s, sc);
-
-	return kbase_mem_pool_reclaim_scan_objects(s, sc);
-}
-#endif
-
 int kbase_mem_pool_init(struct kbase_mem_pool *pool,
 		const struct kbase_mem_pool_config *config,
 		unsigned int order,
@@ -398,19 +378,13 @@ int kbase_mem_pool_init(struct kbase_mem_pool *pool,
 	spin_lock_init(&pool->pool_lock);
 	INIT_LIST_HEAD(&pool->page_list);
 
-	/* Register shrinker */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
-	pool->reclaim.shrink = kbase_mem_pool_reclaim_shrink;
-#else
 	pool->reclaim.count_objects = kbase_mem_pool_reclaim_count_objects;
 	pool->reclaim.scan_objects = kbase_mem_pool_reclaim_scan_objects;
-#endif
 	pool->reclaim.seeks = DEFAULT_SEEKS;
 	/* Kernel versions prior to 3.1 :
-	 * struct shrinker does not define batch */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+	 * struct shrinker does not define batch
+	 */
 	pool->reclaim.batch = 0;
-#endif
 	register_shrinker(&pool->reclaim);
 
 	pool_dbg(pool, "initialized\n");
@@ -830,8 +804,8 @@ void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool,
 		nr_to_pool = kbase_mem_pool_capacity(pool);
 		nr_to_pool = min(nr_pages, nr_to_pool);
 
-		kbase_mem_pool_add_array_locked(pool, nr_pages, pages, false,
-				dirty);
+		kbase_mem_pool_add_array_locked(pool, nr_to_pool, pages, false,
+						dirty);
 
 		i += nr_to_pool;
 	}
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_debugfs.c
index 5879fdf85b1d..e7d8fdc82e27 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_debugfs.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_debugfs.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <linux/debugfs.h>
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_debugfs.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_debugfs.h
index 2932945b3185..b2a94d7b775b 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_debugfs.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_debugfs.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_MEM_POOL_DEBUGFS_H_
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.c
index aa2554805b5b..72a17b268502 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <mali_kbase.h>
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.h
index 0484f5940ad1..35333e916ae1 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_MEM_POOL_GROUP_H_
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs.c
index 85723f825054..113b69e4d410 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2012-2017, 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2017, 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,23 +17,23 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <mali_kbase.h>
 
 #ifdef CONFIG_DEBUG_FS
 
-/** Show callback for the @c mem_profile debugfs file.
+/**
+ * Show callback for the @c mem_profile debugfs file.
  *
  * This function is called to get the contents of the @c mem_profile debugfs
  * file. This is a report of current memory usage and distribution in userspace.
  *
- * @param sfile The debugfs entry
- * @param data Data associated with the entry
+ * @sfile: The debugfs entry
+ * @data:  Data associated with the entry
  *
- * @return 0 if it successfully prints data in debugfs entry file, non-zero otherwise
+ * Return: 0 if it successfully prints data in debugfs entry file, non-zero
+ * otherwise
  */
 static int kbasep_mem_profile_seq_show(struct seq_file *sfile, void *data)
 {
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs.h
index 1462247c3bca..7b5695d44ae3 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016, 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,14 +17,9 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /**
- * @file mali_kbase_mem_profile_debugfs.h
  * Header file for mem profiles entries in debugfs
  *
  */
@@ -35,12 +31,17 @@
 #include <linux/seq_file.h>
 
 /**
- * @brief Remove entry from Mali memory profile debugfs
+ * Remove entry from Mali memory profile debugfs
+ * @kctx: The context whose debugfs file @p data should be removed from
  */
 void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx);
 
 /**
- * @brief Insert @p data to the debugfs file so it can be read by userspace
+ * Insert @p data to the debugfs file so it can be read by userspace
+ * @kctx: The context whose debugfs file @p data should be inserted to
+ * @data: A NULL-terminated string to be inserted to the debugfs file,
+ *             without the trailing new line character
+ * @size: The length of the @p data string
  *
  * The function takes ownership of @p data and frees it later when new data
  * is inserted.
@@ -48,10 +49,6 @@ void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx);
  * If the debugfs entry corresponding to the @p kctx doesn't exist,
  * an attempt will be made to create it.
  *
- * @param kctx The context whose debugfs file @p data should be inserted to
- * @param data A NULL-terminated string to be inserted to the debugfs file,
- *             without the trailing new line character
- * @param size The length of the @p data string
  * @return 0 if @p data inserted correctly
  *         -EAGAIN in case of error
  * @post @ref mem_profile_initialized will be set to @c true
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs_buf_size.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs_buf_size.h
index d55cc854c415..8489c550fc6b 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs_buf_size.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs_buf_size.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2014, 2018-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2017-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,9 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /**
- * @file mali_kbase_mem_profile_debugfs_buf_size.h
  * Header file for the size of the buffer to accumulate the histogram report text in
  */
 
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mipe_gen_header.h b/drivers/gpu/arm/bifrost/mali_kbase_mipe_gen_header.h
index 72acadfae993..d1ea7ad24792 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mipe_gen_header.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mipe_gen_header.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /* THIS FILE IS AUTOGENERATED BY mali_trace_generator.py.
@@ -40,14 +39,14 @@
  * defined. See documentation below:
  */
 
-/**
+/*
  * The name of the variable where the result BLOB will be stored.
  */
 #if !defined(MIPE_HEADER_BLOB_VAR_NAME)
 #error "MIPE_HEADER_BLOB_VAR_NAME must be defined!"
 #endif
 
-/**
+/*
  * A compiler attribute for the BLOB variable.
  *
  * e.g. __attribute__((section("my_section")))
@@ -58,6 +57,17 @@
 #define MIPE_HEADER_BLOB_VAR_ATTRIBUTE
 #endif
 
+/**
+ * A compiler attribute for packing structures
+ *
+ * e.g. __packed
+ *
+ * Default value is __attribute__((__packed__))
+ */
+#if !defined(MIPE_HEADER_PACKED_ATTRIBUTE)
+#define MIPE_HEADER_PACKED_ATTRIBUTE __attribute__((__packed__))
+#endif
+
 /**
  * MIPE stream id.
  *
@@ -67,7 +77,7 @@
 #error "MIPE_HEADER_STREAM_ID must be defined!"
 #endif
 
-/**
+/*
  * MIPE packet class.
  *
  * See enum tl_packet_class.
@@ -76,7 +86,7 @@
 #error "MIPE_HEADER_PKT_CLASS must be defined!"
 #endif
 
-/**
+/*
  * The list of tracepoints to process.
  *
  * It should be defined as follows:
@@ -95,14 +105,14 @@
 #error "MIPE_HEADER_TRACEPOINT_LIST must be defined!"
 #endif
 
-/**
+/*
  * The number of entries in MIPE_HEADER_TRACEPOINT_LIST.
  */
 #if !defined(MIPE_HEADER_TRACEPOINT_LIST_SIZE)
 #error "MIPE_HEADER_TRACEPOINT_LIST_SIZE must be defined!"
 #endif
 
-/**
+/*
  * The list of enums to process.
  *
  * It should be defined as follows:
@@ -119,7 +129,7 @@
  */
 #if defined(MIPE_HEADER_ENUM_LIST)
 
-/**
+/*
  * Tracepoint message ID used for enums declaration.
  */
 #if !defined(MIPE_HEADER_ENUM_MSG_ID)
@@ -151,7 +161,7 @@ const struct
 		char _arg_types[sizeof(arg_types)];	\
 		u32  _size_arg_names;		\
 		char _arg_names[sizeof(arg_names)];	\
-	} __attribute__ ((__packed__)) __ ## name;
+	} MIPE_HEADER_PACKED_ATTRIBUTE __ ## name;
 
 #define ENUM_DESC(arg_name, value)					\
 	struct {							\
@@ -161,13 +171,13 @@ const struct
 		u32 _value;						\
 		u32 _value_str_len;					\
 		char _value_str[sizeof(#value)];			\
-	} __attribute__ ((__packed__)) __ ## arg_name ## _ ## value;
+	} MIPE_HEADER_PACKED_ATTRIBUTE __ ## arg_name ## _ ## value;
 
 	MIPE_HEADER_TRACEPOINT_LIST
 	MIPE_HEADER_ENUM_LIST
 #undef TRACEPOINT_DESC
 #undef ENUM_DESC
-} __attribute__((packed)) MIPE_HEADER_BLOB_VAR_NAME MIPE_HEADER_BLOB_VAR_ATTRIBUTE = {
+} MIPE_HEADER_PACKED_ATTRIBUTE MIPE_HEADER_BLOB_VAR_NAME MIPE_HEADER_BLOB_VAR_ATTRIBUTE = {
 	._mipe_w0 = MIPE_PACKET_HEADER_W0(
 		TL_PACKET_FAMILY_TL,
 		MIPE_HEADER_PKT_CLASS,
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mipe_proto.h b/drivers/gpu/arm/bifrost/mali_kbase_mipe_proto.h
index 54667cfc6304..ee88ee69c0fc 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mipe_proto.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mipe_proto.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /* THIS FILE IS AUTOGENERATED BY mali_trace_generator.py.
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.c b/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.c
index 38ae46e0ddf1..957d884b9c11 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <linux/gfp.h>
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.h b/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.h
index 431b1f4cb5db..f14fee42ca12 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_native_mgm.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_NATIVE_MGM_H_
@@ -25,7 +24,7 @@
 
 #include <linux/memory_group_manager.h>
 
-/**
+/*
  * kbase_native_mgm_dev - Native memory group manager device
  *
  * An implementation of the memory group manager interface that is intended for
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_platform_fake.c b/drivers/gpu/arm/bifrost/mali_kbase_platform_fake.c
index fbb090e6c21f..02bfb256ce2a 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_platform_fake.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_platform_fake.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2011-2014, 2016-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2014, 2016-2017, 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <linux/errno.h>
@@ -41,14 +40,13 @@ static struct platform_device *mali_device;
 
 #ifndef CONFIG_OF
 /**
- * @brief Convert data in struct kbase_io_resources struct to Linux-specific resources
+ * Convert data in struct kbase_io_resources struct to Linux-specific resources
+ * @io_resources:      Input IO resource data
+ * @linux_resources:  Pointer to output array of Linux resource structures
  *
  * Function converts data in struct kbase_io_resources struct to an array of Linux resource structures. Note that function
  * assumes that size of linux_resource array is at least PLATFORM_CONFIG_RESOURCE_COUNT.
  * Resources are put in fixed order: I/O memory region, job IRQ, MMU IRQ, GPU IRQ.
- *
- * @param[in]  io_resource      Input IO resource data
- * @param[out] linux_resources  Pointer to output array of Linux resource structures
  */
 static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io_resources, struct resource *const linux_resources)
 {
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pm.c b/drivers/gpu/arm/bifrost/mali_kbase_pm.c
index 630ab1550045..3ded47ba85c5 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_pm.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_pm.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,15 +17,10 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /**
- * @file mali_kbase_pm.c
- * Base kernel power management APIs
+ * DOC: Base kernel power management APIs
  */
 
 #include <mali_kbase.h>
@@ -191,7 +187,8 @@ void kbase_pm_driver_suspend(struct kbase_device *kbdev)
 
 #if !MALI_USE_CSF
 	/* Suspend job scheduler and associated components, so that it releases all
-	 * the PM active count references */
+	 * the PM active count references
+	 */
 	kbasep_js_suspend(kbdev);
 #else
 	kbase_csf_scheduler_pm_suspend(kbdev);
@@ -259,9 +256,15 @@ void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start)
 	kbase_pm_context_idle(kbdev);
 
 	/* Re-enable GPU hardware counters */
+#if MALI_USE_CSF
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+#else
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+#endif
 
 	/* Resume vinstr */
 	kbase_vinstr_resume(kbdev->vinstr_ctx);
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pm.h b/drivers/gpu/arm/bifrost/mali_kbase_pm.h
index 13565186c11f..f7340dd4d903 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_pm.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_pm.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,14 +17,9 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /**
- * @file mali_kbase_pm.h
  * Power management API definitions
  */
 
@@ -66,12 +62,12 @@ int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags);
 
 /**
  * Halt the power management framework.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
  * Should ensure that no new interrupts are generated,
  * but allow any currently running interrupt handlers to complete successfully.
  * The GPU is forced off by the time this function returns, regardless of
  * whether or not the active power policy asks for the GPU to be powered off.
- *
- * @param kbdev     The kbase device structure for the device (must be a valid pointer)
  */
 void kbase_pm_halt(struct kbase_device *kbdev);
 
@@ -161,6 +157,7 @@ void kbase_pm_context_idle(struct kbase_device *kbdev);
 /**
  * Suspend the GPU and prevent any further register accesses to it from Kernel
  * threads.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
  *
  * This is called in response to an OS suspend event, and calls into the various
  * kbase components to complete the suspend.
@@ -168,21 +165,18 @@ void kbase_pm_context_idle(struct kbase_device *kbdev);
  * @note the mechanisms used here rely on all user-space threads being frozen
  * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up
  * the GPU e.g. via atom submission.
- *
- * @param kbdev     The kbase device structure for the device (must be a valid pointer)
  */
 void kbase_pm_suspend(struct kbase_device *kbdev);
 
 /**
  * Resume the GPU, allow register accesses to it, and resume running atoms on
  * the GPU.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
  *
  * This is called in response to an OS resume event, and calls into the various
  * kbase components to complete the resume.
  *
  * Also called when using VM arbiter, when GPU access has been granted.
- *
- * @param kbdev     The kbase device structure for the device (must be a valid pointer)
  */
 void kbase_pm_resume(struct kbase_device *kbdev);
 
@@ -199,8 +193,7 @@ void kbase_pm_vsync_callback(int buffer_updated, void *data);
 
 /**
  * kbase_pm_driver_suspend() - Put GPU and driver in suspend state
- * @param kbdev     The kbase device structure for the device
- *                  (must be a valid pointer)
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
  *
  * Suspend the GPU and prevent any further register accesses to it from Kernel
  * threads.
@@ -219,8 +212,8 @@ void kbase_pm_driver_suspend(struct kbase_device *kbdev);
 
 /**
  * kbase_pm_driver_resume() - Put GPU and driver in resume
- * @param kbdev     The kbase device structure for the device
- *                  (must be a valid pointer)
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @arb_gpu_start: Arbiter has notified we can use GPU
  *
  * Resume the GPU, allow register accesses to it, and resume running atoms on
  * the GPU.
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_regs_history_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_regs_history_debugfs.c
index 7b86c58440db..0e7b7f033fda 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_regs_history_debugfs.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_regs_history_debugfs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2014, 2016, 2019-2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include "mali_kbase.h"
@@ -118,7 +117,7 @@ void kbase_io_history_add(struct kbase_io_history *h,
 void kbase_io_history_dump(struct kbase_device *kbdev)
 {
 	struct kbase_io_history *const h = &kbdev->io_history;
-	u16 i;
+	size_t i;
 	size_t iters;
 	unsigned long flags;
 
@@ -136,7 +135,7 @@ void kbase_io_history_dump(struct kbase_device *kbdev)
 			&h->buf[(h->count - iters + i) % h->size];
 		char const access = (io->addr & 1) ? 'w' : 'r';
 
-		dev_err(kbdev->dev, "%6i: %c: reg 0x%016lx val %08x\n", i,
+		dev_err(kbdev->dev, "%6zu: %c: reg 0x%016lx val %08x\n", i,
 			access, (unsigned long)(io->addr & ~0x1), io->value);
 	}
 
@@ -180,7 +179,7 @@ DEFINE_SIMPLE_ATTRIBUTE(regs_history_size_fops,
 static int regs_history_show(struct seq_file *sfile, void *data)
 {
 	struct kbase_io_history *const h = sfile->private;
-	u16 i;
+	size_t i;
 	size_t iters;
 	unsigned long flags;
 
@@ -199,8 +198,8 @@ static int regs_history_show(struct seq_file *sfile, void *data)
 			&h->buf[(h->count - iters + i) % h->size];
 		char const access = (io->addr & 1) ? 'w' : 'r';
 
-		seq_printf(sfile, "%6i: %c: reg 0x%016lx val %08x\n", i, access,
-				(unsigned long)(io->addr & ~0x1), io->value);
+		seq_printf(sfile, "%6zu: %c: reg 0x%016lx val %08x\n", i,
+			   access, (unsigned long)(io->addr & ~0x1), io->value);
 	}
 
 	spin_unlock_irqrestore(&h->lock, flags);
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_regs_history_debugfs.h b/drivers/gpu/arm/bifrost/mali_kbase_regs_history_debugfs.h
index 200c0c2d8de8..b202b22256c8 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_regs_history_debugfs.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_regs_history_debugfs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
  * (C) COPYRIGHT 2014, 2016, 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /**
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_reset_gpu.h b/drivers/gpu/arm/bifrost/mali_kbase_reset_gpu.h
index 61bbb0b48490..cb8a082f6293 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_reset_gpu.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_reset_gpu.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,16 +17,142 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_RESET_GPU_H_
 #define _KBASE_RESET_GPU_H_
 
+/**
+ * kbase_reset_gpu_prevent_and_wait - Prevent GPU resets from starting whilst
+ *                                    the current thread is accessing the GPU,
+ *                                    and wait for any in-flight reset to
+ *                                    finish.
+ * @kbdev: Device pointer
+ *
+ * This should be used when a potential access to the HW is going to be made
+ * from a non-atomic context.
+ *
+ * It will wait for any in-flight reset to finish before returning. Hence,
+ * correct lock ordering must be observed with respect to the calling thread
+ * and the reset worker thread.
+ *
+ * This does not synchronize general access to the HW, and so multiple threads
+ * can prevent GPU reset concurrently, whilst not being serialized. This is
+ * advantageous as the threads can make this call at points where they do not
+ * know for sure yet whether they will indeed access the GPU (for example, to
+ * respect lock ordering), without unnecessarily blocking others.
+ *
+ * Threads must still use other synchronization to ensure they access the HW
+ * consistently, at a point where they are certain it needs to be accessed.
+ *
+ * On success, ensure that when access to the GPU by the caller thread has
+ * finished, that it calls kbase_reset_gpu_allow() again to allow resets to
+ * happen.
+ *
+ * This may return a failure in cases such as a previous failure to reset the
+ * GPU within a reasonable time. If that happens, the GPU might be
+ * non-operational and the caller should not attempt any further access.
+ *
+ * Note:
+ * For atomic context, instead check kbase_reset_gpu_is_active().
+ *
+ * Return: 0 on success, or negative error code on failure.
+ */
+int kbase_reset_gpu_prevent_and_wait(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_try_prevent - Attempt to prevent GPU resets from starting
+ *                               whilst the current thread is accessing the
+ *                               GPU, unless a reset is already in progress.
+ * @kbdev: Device pointer
+ *
+ * Similar to kbase_reset_gpu_prevent_and_wait(), but it does not wait for an
+ * existing reset to complete. This can be used on codepaths that the Reset
+ * worker waits on, where use of kbase_reset_gpu_prevent_and_wait() would
+ * otherwise deadlock.
+ *
+ * Instead, a reset that is currently happening will cause this function to
+ * return an error code indicating that, and further resets will not have been
+ * prevented.
+ *
+ * In such cases, the caller must check for -EAGAIN, and take similar actions
+ * as for handling reset in atomic context. That is, they must cancel any
+ * actions that depended on reset being prevented, possibly deferring them
+ * until after the reset.
+ *
+ * Otherwise a successful return means that the caller can continue its actions
+ * safely in the knowledge that reset is prevented, and the reset worker will
+ * correctly wait instead of deadlocking against this thread.
+ *
+ * On success, ensure that when access to the GPU by the caller thread has
+ * finished, that it calls kbase_reset_gpu_allow() again to allow resets to
+ * happen.
+ *
+ * Refer to kbase_reset_gpu_prevent_and_wait() for more information.
+ *
+ * Return: 0 on success. -EAGAIN if a reset is currently happening. Other
+ * negative error codes on failure.
+ */
+int kbase_reset_gpu_try_prevent(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_allow - Allow GPU resets to happen again after having been
+ *                         previously prevented.
+ * @kbdev: Device pointer
+ *
+ * This should be used when a potential access to the HW has finished from a
+ * non-atomic context.
+ *
+ * It must be used from the same thread that originally made a previously call
+ * to kbase_reset_gpu_prevent_and_wait(). It must not be deferred to another
+ * thread.
+ */
+void kbase_reset_gpu_allow(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_assert_prevented - Make debugging checks that GPU reset is
+ *                                    currently prevented by the current
+ *                                    thread.
+ * @kbdev: Device pointer
+ *
+ * Make debugging checks that the current thread has made a call to
+ * kbase_reset_gpu_prevent_and_wait(), but has yet to make a subsequent call to
+ * kbase_reset_gpu_allow().
+ *
+ * CONFIG_LOCKDEP is required to prove that reset is indeed
+ * prevented. Otherwise only limited debugging checks can be made.
+ */
+void kbase_reset_gpu_assert_prevented(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_assert_failed_or_prevented - Make debugging checks that
+ *                                              either GPU reset previously
+ *                                              failed, or is currently
+ *                                              prevented.
+ *
+ * @kbdev: Device pointer
+ *
+ * As with kbase_reset_gpu_assert_prevented(), but also allow for paths where
+ * reset was not prevented due to a failure, yet we still need to execute the
+ * cleanup code following.
+ *
+ * Cleanup code following this call must handle any inconsistent state modified
+ * by the failed GPU reset, and must timeout any blocking operations instead of
+ * waiting forever.
+ */
+void kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device *kbdev);
+
+/**
+ * Flags for kbase_prepare_to_reset_gpu
+ */
+#define RESET_FLAGS_NONE ((unsigned int)0)
+/* This reset should be treated as an unrecoverable error by HW counter logic */
+#define RESET_FLAGS_HWC_UNRECOVERABLE_ERROR ((unsigned int)(1 << 0))
+
 /**
  * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU.
  * @kbdev: Device pointer
+ * @flags: Bitfield indicating impact of reset (see flag defines)
  *
  * Caller is expected to hold the kbdev->hwaccess_lock.
  *
@@ -34,18 +161,20 @@
  * - false - Another thread is performing a reset, kbase_reset_gpu should
  *           not be called.
  */
-bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev);
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev,
+				       unsigned int flags);
 
 /**
  * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU.
  * @kbdev: Device pointer
- *
+ * @flags: Bitfield indicating impact of reset (see flag defines)
+
  * Return: a boolean which should be interpreted as follows:
  * - true  - Prepared for reset, kbase_reset_gpu should be called.
  * - false - Another thread is performing a reset, kbase_reset_gpu should
  *           not be called.
  */
-bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev);
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags);
 
 /**
  * kbase_reset_gpu - Reset the GPU
@@ -95,8 +224,13 @@ int kbase_reset_gpu_silent(struct kbase_device *kbdev);
  * kbase_reset_gpu_is_active - Reports if the GPU is being reset
  * @kbdev: Device pointer
  *
- * Return: True if the GPU is in the process of being reset (or if the reset of
- * GPU failed, not applicable to Job Manager GPUs).
+ * Any changes made to the HW when this returns true may be lost, overwritten
+ * or corrupted.
+ *
+ * Note that unless appropriate locks are held when using this function, the
+ * state could change immediately afterwards.
+ *
+ * Return: True if the GPU is in the process of being reset.
  */
 bool kbase_reset_gpu_is_active(struct kbase_device *kbdev);
 
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_smc.c b/drivers/gpu/arm/bifrost/mali_kbase_smc.c
index b5c7b1289846..82c5a10a15f5 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_smc.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_smc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2015, 2018, 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifdef CONFIG_ARM64
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_smc.h b/drivers/gpu/arm/bifrost/mali_kbase_smc.h
index 221eb21a8c7f..9b89c321bf19 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_smc.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_smc.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 #ifndef _KBASE_SMC_H_
 #define _KBASE_SMC_H_
 
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c b/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c
index c164719b3d7b..f78063835ddd 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 #include <mali_kbase.h>
 
 #include <linux/dma-buf.h>
@@ -30,7 +27,7 @@
 #include <mali_kbase_sync.h>
 #endif
 #include <linux/dma-mapping.h>
-#include <mali_base_kernel.h>
+#include <uapi/gpu/arm/bifrost/mali_base_kernel.h>
 #include <mali_kbase_hwaccess_time.h>
 #include <mali_kbase_kinstr_jm.h>
 #include <mali_kbase_mem_linux.h>
@@ -45,9 +42,7 @@
 
 #if !MALI_USE_CSF
 /**
- * @file mali_kbase_softjobs.c
- *
- * This file implements the logic behind software only jobs that are
+ * DOC: This file implements the logic behind software only jobs that are
  * executed within the driver rather than being handed over to the GPU.
  */
 
@@ -138,7 +133,7 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom)
 	void *user_result;
 	struct timespec64 ts;
 	struct base_dump_cpu_gpu_counters data;
-	u64 system_time;
+	u64 system_time = 0ULL;
 	u64 cycle_counter;
 	u64 jc = katom->jc;
 	struct kbase_context *kctx = katom->kctx;
@@ -148,7 +143,11 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom)
 
 	/* Take the PM active reference as late as possible - otherwise, it could
 	 * delay suspend until we process the atom (which may be at the end of a
-	 * long chain of dependencies */
+	 * long chain of dependencies
+	 */
+#ifdef CONFIG_MALI_ARBITER_SUPPORT
+	atomic_inc(&kctx->kbdev->pm.gpu_users_waiting);
+#endif /* CONFIG_MALI_ARBITER_SUPPORT */
 	pm_active_err = kbase_pm_context_active_handle_suspend(kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
 	if (pm_active_err) {
 		struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data;
@@ -166,6 +165,10 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom)
 
 		return pm_active_err;
 	}
+#ifdef CONFIG_MALI_ARBITER_SUPPORT
+	else
+		atomic_dec(&kctx->kbdev->pm.gpu_users_waiting);
+#endif /* CONFIG_MALI_ARBITER_SUPPORT */
 
 	kbase_backend_get_gpu_time(kctx->kbdev, &cycle_counter, &system_time,
 									&ts);
@@ -183,7 +186,8 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom)
 	/* GPU_WR access is checked on the range for returning the result to
 	 * userspace for the following reasons:
 	 * - security, this is currently how imported user bufs are checked.
-	 * - userspace ddk guaranteed to assume region was mapped as GPU_WR */
+	 * - userspace ddk guaranteed to assume region was mapped as GPU_WR
+	 */
 	user_result = kbase_vmap_prot(kctx, jc, sizeof(data), KBASE_REG_GPU_WR, &map);
 	if (!user_result)
 		return 0;
@@ -294,7 +298,7 @@ static void kbase_fence_debug_check_atom(struct kbase_jd_atom *katom)
 
 				if (!kbase_sync_fence_in_info_get(dep, &info)) {
 					dev_warn(dev,
-						 "\tVictim trigger atom %d fence [%p] %s: %s\n",
+						 "\tVictim trigger atom %d fence [%pK] %s: %s\n",
 						 kbase_jd_atom_id(kctx, dep),
 						 info.fence,
 						 info.name,
@@ -323,11 +327,11 @@ static void kbase_fence_debug_wait_timeout(struct kbase_jd_atom *katom)
 		return;
 	}
 
-	dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%p] after %dms\n",
+	dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%pK] after %dms\n",
 		 kctx->tgid, kctx->id,
 		 kbase_jd_atom_id(kctx, katom),
 		 info.fence, timeout_ms);
-	dev_warn(dev, "\tGuilty fence [%p] %s: %s\n",
+	dev_warn(dev, "\tGuilty fence [%pK] %s: %s\n",
 		 info.fence, info.name,
 		 kbase_sync_status_string(info.status));
 
@@ -715,7 +719,8 @@ out_unlock:
 
 out_cleanup:
 	/* Frees allocated memory for kbase_debug_copy_job struct, including
-	 * members, and sets jc to 0 */
+	 * members, and sets jc to 0
+	 */
 	kbase_debug_copy_finish(katom);
 	kfree(user_buffers);
 
@@ -723,7 +728,7 @@ out_cleanup:
 }
 #endif /* !MALI_USE_CSF */
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0)
+#if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE
 static void *dma_buf_kmap_page(struct kbase_mem_phy_alloc *gpu_alloc,
 	unsigned long page_num, struct page **page)
 {
@@ -804,16 +809,16 @@ int kbase_mem_copy_from_extres(struct kbase_context *kctx,
 		dma_to_copy = min(dma_buf->size,
 			(size_t)(buf_data->nr_extres_pages * PAGE_SIZE));
 		ret = dma_buf_begin_cpu_access(dma_buf,
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS)
-				0, dma_to_copy,
+#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && !defined(CONFIG_CHROMEOS)
+					       0, dma_to_copy,
 #endif
-				DMA_FROM_DEVICE);
+					       DMA_FROM_DEVICE);
 		if (ret)
 			goto out_unlock;
 
 		for (i = 0; i < dma_to_copy/PAGE_SIZE &&
 				target_page_nr < buf_data->nr_pages; i++) {
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0)
+#if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE
 			struct page *pg;
 			void *extres_page = dma_buf_kmap_page(gpu_alloc, i, &pg);
 #else
@@ -825,20 +830,20 @@ int kbase_mem_copy_from_extres(struct kbase_context *kctx,
 						buf_data->nr_pages,
 						&target_page_nr, offset);
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0)
+#if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE
 				kunmap(pg);
 #else
 				dma_buf_kunmap(dma_buf, i, extres_page);
 #endif
 				if (ret)
-					goto out_unlock;
+					break;
 			}
 		}
 		dma_buf_end_cpu_access(dma_buf,
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS)
-				0, dma_to_copy,
+#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && !defined(CONFIG_CHROMEOS)
+				       0, dma_to_copy,
 #endif
-				DMA_FROM_DEVICE);
+				       DMA_FROM_DEVICE);
 		break;
 	}
 	default:
@@ -926,11 +931,6 @@ int kbasep_jit_alloc_validate(struct kbase_context *kctx,
 
 #if !MALI_USE_CSF
 
-#if (KERNEL_VERSION(3, 18, 63) > LINUX_VERSION_CODE)
-#define offsetofend(TYPE, MEMBER) \
-	(offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER))
-#endif
-
 /*
  * Sizes of user data to copy for each just-in-time memory interface version
  *
@@ -1006,10 +1006,10 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
 		ret = kbasep_jit_alloc_validate(kctx, info);
 		if (ret)
 			goto free_info;
-		KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO(kbdev, katom,
-			info->va_pages, info->commit_pages, info->extent,
-			info->id, info->bin_id, info->max_allocations,
-			info->flags, info->usage_id);
+		KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO(
+			kbdev, katom, info->va_pages, info->commit_pages,
+			info->extension, info->id, info->bin_id,
+			info->max_allocations, info->flags, info->usage_id);
 	}
 
 	katom->jit_blocked = false;
@@ -1024,7 +1024,7 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
 	 * though the region is valid it doesn't represent the
 	 * same thing it used to.
 	 *
-	 * Complete validation of va_pages, commit_pages and extent
+	 * Complete validation of va_pages, commit_pages and extension
 	 * isn't done here as it will be done during the call to
 	 * kbase_mem_alloc.
 	 */
@@ -1100,7 +1100,7 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom)
 	}
 
 #if MALI_JIT_PRESSURE_LIMIT_BASE
-	/**
+	/*
 	 * If this is the only JIT_ALLOC atom in-flight or if JIT pressure limit
 	 * is disabled at the context scope, then bypass JIT pressure limit
 	 * logic in kbase_jit_allocate().
@@ -1228,10 +1228,10 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom)
 			 MIDGARD_MMU_BOTTOMLEVEL, kctx->jit_group_id);
 #endif
 
-		KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(kbdev, katom,
-			info->gpu_alloc_addr, new_addr, info->flags,
-			entry_mmu_flags, info->id, info->commit_pages,
-			info->extent, info->va_pages);
+		KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(
+			kbdev, katom, info->gpu_alloc_addr, new_addr,
+			info->flags, entry_mmu_flags, info->id,
+			info->commit_pages, info->extension, info->va_pages);
 		kbase_vunmap(kctx, &mapping);
 
 		kbase_trace_jit_report_gpu_mem(kctx, reg,
@@ -1429,41 +1429,27 @@ static int kbase_ext_res_prepare(struct kbase_jd_atom *katom)
 	struct base_external_resource_list *ext_res;
 	u64 count = 0;
 	size_t copy_size;
-	int ret;
 
 	user_ext_res = (__user struct base_external_resource_list *)
 			(uintptr_t) katom->jc;
 
 	/* Fail the job if there is no info structure */
-	if (!user_ext_res) {
-		ret = -EINVAL;
-		goto fail;
-	}
+	if (!user_ext_res)
+		return -EINVAL;
 
-	if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) {
-		ret = -EINVAL;
-		goto fail;
-	}
+	if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0)
+		return -EINVAL;
 
 	/* Is the number of external resources in range? */
-	if (!count || count > BASE_EXT_RES_COUNT_MAX) {
-		ret = -EINVAL;
-		goto fail;
-	}
+	if (!count || count > BASE_EXT_RES_COUNT_MAX)
+		return -EINVAL;
 
 	/* Copy the information for safe access and future storage */
 	copy_size = sizeof(*ext_res);
 	copy_size += sizeof(struct base_external_resource) * (count - 1);
-	ext_res = kzalloc(copy_size, GFP_KERNEL);
-	if (!ext_res) {
-		ret = -ENOMEM;
-		goto fail;
-	}
-
-	if (copy_from_user(ext_res, user_ext_res, copy_size) != 0) {
-		ret = -EINVAL;
-		goto free_info;
-	}
+	ext_res = memdup_user(user_ext_res, copy_size);
+	if (IS_ERR(ext_res))
+		return PTR_ERR(ext_res);
 
 	/*
 	 * Overwrite the count with the first value incase it was changed
@@ -1474,11 +1460,6 @@ static int kbase_ext_res_prepare(struct kbase_jd_atom *katom)
 	katom->softjob_data = ext_res;
 
 	return 0;
-
-free_info:
-	kfree(ext_res);
-fail:
-	return ret;
 }
 
 static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map)
@@ -1654,7 +1635,9 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom)
 			struct base_fence fence;
 			int fd;
 
-			if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence)))
+			if (copy_from_user(&fence,
+					   (__user void *)(uintptr_t)katom->jc,
+					   sizeof(fence)) != 0)
 				return -EINVAL;
 
 			fd = kbase_sync_fence_out_create(katom,
@@ -1663,7 +1646,8 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom)
 				return -EINVAL;
 
 			fence.basep.fd = fd;
-			if (0 != copy_to_user((__user void *)(uintptr_t) katom->jc, &fence, sizeof(fence))) {
+			if (copy_to_user((__user void *)(uintptr_t)katom->jc,
+					 &fence, sizeof(fence)) != 0) {
 				kbase_sync_fence_out_remove(katom);
 				kbase_sync_fence_close_fd(fd);
 				fence.basep.fd = -EINVAL;
@@ -1676,7 +1660,9 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom)
 			struct base_fence fence;
 			int ret;
 
-			if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence)))
+			if (copy_from_user(&fence,
+					   (__user void *)(uintptr_t)katom->jc,
+					   sizeof(fence)) != 0)
 				return -EINVAL;
 
 			/* Get a reference to the fence object */
@@ -1795,6 +1781,9 @@ void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev)
 		if (kbase_process_soft_job(katom_iter) == 0) {
 			kbase_finish_soft_job(katom_iter);
 			resched |= jd_done_nolock(katom_iter, NULL);
+#ifdef CONFIG_MALI_ARBITER_SUPPORT
+			atomic_dec(&kbdev->pm.gpu_users_waiting);
+#endif /* CONFIG_MALI_ARBITER_SUPPORT */
 		}
 		mutex_unlock(&kctx->jctx.lock);
 	}
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_strings.c b/drivers/gpu/arm/bifrost/mali_kbase_strings.c
index 22caa4a6d814..f38093031494 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_strings.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_strings.c
@@ -1,11 +1,12 @@
- /*
+// SPDX-License-Identifier: GPL-2.0
+/*
  *
- * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,9 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
+
 #include "mali_kbase_strings.h"
 
 #define KBASE_DRV_NAME "mali"
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_strings.h b/drivers/gpu/arm/bifrost/mali_kbase_strings.h
index d2f1825314fe..e6ec7f3bc050 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_strings.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_strings.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 extern const char kbase_drv_name[];
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_sync.h b/drivers/gpu/arm/bifrost/mali_kbase_sync.h
index 4e5ab3ca557a..ae3601f9bee4 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_sync.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_sync.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2012-2016, 2018-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016, 2018-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,14 +17,10 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /**
- * @file mali_kbase_sync.h
- *
- * This file contains our internal "API" for explicit fences.
+ * DOC: This file contains our internal "API" for explicit fences.
  * It hides the implementation details of the actual explicit fence mechanism
  * used (Android fences or sync file with DMA fences).
  */
@@ -31,6 +28,7 @@
 #ifndef MALI_KBASE_SYNC_H
 #define MALI_KBASE_SYNC_H
 
+#include <linux/fdtable.h>
 #include <linux/syscalls.h>
 #ifdef CONFIG_SYNC
 #include <sync.h>
@@ -165,7 +163,9 @@ void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom);
  */
 static inline void kbase_sync_fence_close_fd(int fd)
 {
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0)
+#if KERNEL_VERSION(5, 11, 0) <= LINUX_VERSION_CODE
+	close_fd(fd);
+#elif KERNEL_VERSION(4, 17, 0) <= LINUX_VERSION_CODE
 	ksys_close(fd);
 #else
 	sys_close(fd);
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_sync_android.c b/drivers/gpu/arm/bifrost/mali_kbase_sync_android.c
index 41f740a7bc8c..e3d2e5555d23 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_sync_android.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_sync_android.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2017, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
@@ -50,15 +49,6 @@ struct mali_sync_pt {
 	int result;
 };
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
-/* For backwards compatibility with kernels before 3.17. After 3.17
- * sync_pt_parent is included in the kernel. */
-static inline struct sync_timeline *sync_pt_parent(struct sync_pt *pt)
-{
-	return pt->parent;
-}
-#endif
-
 static struct mali_sync_timeline *to_mali_sync_timeline(
 						struct sync_timeline *timeline)
 {
@@ -196,6 +186,7 @@ int kbase_sync_fence_stream_create(const char *name, int *const out_fd)
 	return 0;
 }
 
+#if !MALI_USE_CSF
 /* Allocates a sync point within the timeline.
  *
  * The timeline must be the one allocated by kbase_sync_timeline_alloc
@@ -225,10 +216,6 @@ int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int tl_fd)
 	struct sync_timeline *tl;
 	struct sync_pt *pt;
 	struct sync_fence *fence;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0)
-	struct files_struct *files;
-	struct fdtable *fdt;
-#endif
 	int fd;
 	struct file *tl_file;
 
@@ -259,29 +246,11 @@ int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int tl_fd)
 	/* from here the fence owns the sync_pt */
 
 	/* create a fd representing the fence */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
 	fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
 	if (fd < 0) {
 		sync_fence_put(fence);
 		goto out;
 	}
-#else
-	fd = get_unused_fd();
-	if (fd < 0) {
-		sync_fence_put(fence);
-		goto out;
-	}
-
-	files = current->files;
-	spin_lock(&files->file_lock);
-	fdt = files_fdtable(files);
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
-	__set_close_on_exec(fd, fdt);
-#else
-	FD_SET(fd, fdt->close_on_exec);
-#endif
-	spin_unlock(&files->file_lock);
-#endif  /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) */
 
 	/* bind fence to the new fd */
 	sync_fence_install(fence, fd);
@@ -289,7 +258,8 @@ int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int tl_fd)
 	katom->fence = sync_fence_fdget(fd);
 	if (katom->fence == NULL) {
 		/* The only way the fence can be NULL is if userspace closed it
-		 * for us, so we don't need to clear it up */
+		 * for us, so we don't need to clear it up
+		 */
 		fd = -EINVAL;
 		goto out;
 	}
@@ -305,6 +275,7 @@ int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd)
 	katom->fence = sync_fence_fdget(fd);
 	return katom->fence ? 0 : -ENOENT;
 }
+#endif /* !MALI_USE_CSF */
 
 int kbase_sync_fence_validate(int fd)
 {
@@ -318,6 +289,7 @@ int kbase_sync_fence_validate(int fd)
 	return 0;
 }
 
+#if !MALI_USE_CSF
 /* Returns true if the specified timeline is allocated by Mali */
 static int kbase_sync_timeline_is_ours(struct sync_timeline *timeline)
 {
@@ -376,22 +348,14 @@ kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result)
 	if (!katom->fence)
 		return BASE_JD_EVENT_JOB_CANCELLED;
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
-	if (!list_is_singular(&katom->fence->pt_list_head)) {
-#else
 	if (katom->fence->num_fences != 1) {
-#endif
 		/* Not exactly one item in the list - so it didn't (directly)
-		 * come from us */
+		 * come from us
+		 */
 		return BASE_JD_EVENT_JOB_CANCELLED;
 	}
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
-	pt = list_first_entry(&katom->fence->pt_list_head,
-			      struct sync_pt, pt_list);
-#else
 	pt = container_of(katom->fence->cbs[0].sync_pt, struct sync_pt, base);
-#endif
 	timeline = sync_pt_parent(pt);
 
 	if (!kbase_sync_timeline_is_ours(timeline)) {
@@ -413,11 +377,7 @@ static inline int kbase_fence_get_status(struct sync_fence *fence)
 	if (!fence)
 		return -ENOENT;
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
-	return fence->status;
-#else
 	return atomic_read(&fence->status);
-#endif
 }
 
 static void kbase_fence_wait_callback(struct sync_fence *fence,
@@ -461,7 +421,8 @@ int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom)
 	if (ret < 0) {
 		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
 		/* We should cause the dependent jobs in the bag to be failed,
-		 * to do this we schedule the work queue to complete this job */
+		 * to do this we schedule the work queue to complete this job
+		 */
 		INIT_WORK(&katom->work, kbase_sync_fence_wait_worker);
 		queue_work(katom->kctx->jctx.job_done_wq, &katom->work);
 	}
@@ -473,7 +434,8 @@ void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom)
 {
 	if (sync_fence_cancel_async(katom->fence, &katom->sync_waiter) != 0) {
 		/* The wait wasn't cancelled - leave the cleanup for
-		 * kbase_fence_wait_callback */
+		 * kbase_fence_wait_callback
+		 */
 		return;
 	}
 
@@ -540,3 +502,4 @@ void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom)
 		sync_fence_wait(katom->fence, 1);
 }
 #endif
+#endif /* !MALI_USE_CSF */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_sync_common.c b/drivers/gpu/arm/bifrost/mali_kbase_sync_common.c
index 866894bd0f94..39a68c268bd1 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_sync_common.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_sync_common.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2012-2016, 2018-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016, 2018-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,10 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
- * @file mali_kbase_sync_common.c
+ * @file
  *
  * Common code for our explicit fence functionality
  */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_sync_file.c b/drivers/gpu/arm/bifrost/mali_kbase_sync_file.c
index 271873b9fe29..76ce17d4bd1f 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_sync_file.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_sync_file.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2012-2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
@@ -62,7 +61,7 @@ int kbase_sync_fence_stream_create(const char *name, int *const out_fd)
 #if !MALI_USE_CSF
 int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd)
 {
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 	struct fence *fence;
 #else
 	struct dma_fence *fence;
@@ -107,7 +106,7 @@ int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd)
 
 int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd)
 {
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 	struct fence *fence = sync_file_get_fence(fd);
 #else
 	struct dma_fence *fence = sync_file_get_fence(fd);
@@ -124,7 +123,7 @@ int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd)
 
 int kbase_sync_fence_validate(int fd)
 {
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 	struct fence *fence = sync_file_get_fence(fd);
 #else
 	struct dma_fence *fence = sync_file_get_fence(fd);
@@ -160,7 +159,7 @@ kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result)
 	return (result != 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE;
 }
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 static void kbase_fence_wait_callback(struct fence *fence,
 				      struct fence_cb *cb)
 #else
@@ -203,7 +202,7 @@ static void kbase_fence_wait_callback(struct dma_fence *fence,
 int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom)
 {
 	int err;
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 	struct fence *fence;
 #else
 	struct dma_fence *fence;
@@ -236,8 +235,8 @@ int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom)
 		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
 
 		/* We should cause the dependent jobs in the bag to be failed,
-		 * to do this we schedule the work queue to complete this job */
-
+		 * to do this we schedule the work queue to complete this job
+		 */
 		INIT_WORK(&katom->work, kbase_sync_fence_wait_worker);
 		queue_work(katom->kctx->jctx.job_done_wq, &katom->work);
 	}
@@ -249,7 +248,8 @@ void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom)
 {
 	if (!kbase_fence_free_callbacks(katom)) {
 		/* The wait wasn't cancelled -
-		 * leave the cleanup for kbase_fence_wait_callback */
+		 * leave the cleanup for kbase_fence_wait_callback
+		 */
 		return;
 	}
 
@@ -325,7 +325,7 @@ void kbase_sync_fence_info_get(struct dma_fence *fence,
 int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom,
 				 struct kbase_sync_fence_info *info)
 {
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 	struct fence *fence;
 #else
 	struct dma_fence *fence;
@@ -345,7 +345,7 @@ int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom,
 int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom,
 				  struct kbase_sync_fence_info *info)
 {
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 	struct fence *fence;
 #else
 	struct dma_fence *fence;
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_trace_gpu_mem.c b/drivers/gpu/arm/bifrost/mali_kbase_trace_gpu_mem.c
index 7669895b3c5d..0458e17c48a2 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_trace_gpu_mem.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_trace_gpu_mem.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <mali_kbase.h>
@@ -127,31 +126,31 @@ static bool kbase_capture_dma_buf_mapping(struct kbase_context *kctx,
 	}
 
 	if (unique_buf_imported) {
-		struct kbase_dma_buf *buf_node =
-			kzalloc(sizeof(*buf_node), GFP_KERNEL);
+		struct kbase_dma_buf *new_buf_node =
+			kzalloc(sizeof(*new_buf_node), GFP_KERNEL);
 
-		if (buf_node == NULL) {
+		if (new_buf_node == NULL) {
 			dev_err(kctx->kbdev->dev, "Error allocating memory for kbase_dma_buf\n");
 			/* Dont account for it if we fail to allocate memory */
 			unique_buf_imported = false;
 		} else {
 			struct rb_node **new = &(root->rb_node), *parent = NULL;
 
-			buf_node->dma_buf = dma_buf;
-			buf_node->import_count = 1;
+			new_buf_node->dma_buf = dma_buf;
+			new_buf_node->import_count = 1;
 			while (*new) {
-				struct kbase_dma_buf *node;
+				struct kbase_dma_buf *new_node;
 
 				parent = *new;
-				node = rb_entry(parent, struct kbase_dma_buf,
-						dma_buf_node);
-				if (dma_buf < node->dma_buf)
+				new_node = rb_entry(parent, struct kbase_dma_buf,
+						   dma_buf_node);
+				if (dma_buf < new_node->dma_buf)
 					new = &(*new)->rb_left;
 				else
 					new = &(*new)->rb_right;
 			}
-			rb_link_node(&buf_node->dma_buf_node, parent, new);
-			rb_insert_color(&buf_node->dma_buf_node, root);
+			rb_link_node(&new_buf_node->dma_buf_node, parent, new);
+			rb_insert_color(&new_buf_node->dma_buf_node, root);
 		}
 	} else if (!WARN_ON(!buf_node)) {
 		buf_node->import_count++;
@@ -220,8 +219,3 @@ void kbase_add_dma_buf_usage(struct kbase_context *kctx,
 
 	mutex_unlock(&kbdev->dma_buf_lock);
 }
-
-#if !defined(CONFIG_TRACE_GPU_MEM) && !MALI_CUSTOMER_RELEASE
-#define CREATE_TRACE_POINTS
-#include "mali_gpu_mem_trace.h"
-#endif
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_trace_gpu_mem.h b/drivers/gpu/arm/bifrost/mali_kbase_trace_gpu_mem.h
index 7e95956f3132..b78b553809b5 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_trace_gpu_mem.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_trace_gpu_mem.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
  * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_TRACE_GPU_MEM_H_
@@ -25,8 +24,6 @@
 
 #ifdef CONFIG_TRACE_GPU_MEM
 #include <trace/events/gpu_mem.h>
-#elif !MALI_CUSTOMER_RELEASE
-#include "mali_gpu_mem_trace.h"
 #endif
 
 #define DEVICE_TGID ((u32) 0U)
@@ -34,9 +31,9 @@
 static void kbase_trace_gpu_mem_usage(struct kbase_device *kbdev,
 				      struct kbase_context *kctx)
 {
+#ifdef CONFIG_TRACE_GPU_MEM
 	lockdep_assert_held(&kbdev->gpu_mem_usage_lock);
 
-#if defined(CONFIG_TRACE_GPU_MEM) || !MALI_CUSTOMER_RELEASE
 	trace_gpu_mem_total(kbdev->id, DEVICE_TGID,
 			    kbdev->total_gpu_pages << PAGE_SHIFT);
 
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_utility.h b/drivers/gpu/arm/bifrost/mali_kbase_utility.h
index 8d4f044376a9..5911969d2ecd 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_utility.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_utility.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2012-2013, 2015, 2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2013, 2015, 2018, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 #ifndef _KBASE_UTILITY_H
 #define _KBASE_UTILITY_H
 
@@ -32,7 +29,7 @@
 static inline void kbase_timer_setup(struct timer_list *timer,
 				     void (*callback)(struct timer_list *timer))
 {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0)
+#if KERNEL_VERSION(4, 14, 0) > LINUX_VERSION_CODE
 	setup_timer(timer, (void (*)(unsigned long)) callback,
 			(unsigned long) timer);
 #else
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c b/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c
index 3b0e2d6855ce..bc992bb947a0 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,16 +17,14 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include "mali_kbase_vinstr.h"
 #include "mali_kbase_hwcnt_virtualizer.h"
 #include "mali_kbase_hwcnt_types.h"
-#include "mali_kbase_hwcnt_reader.h"
+#include <uapi/gpu/arm/bifrost/mali_kbase_hwcnt_reader.h>
 #include "mali_kbase_hwcnt_gpu.h"
-#include "mali_kbase_ioctl.h"
+#include <uapi/gpu/arm/bifrost/mali_kbase_ioctl.h>
 #include "mali_malisw.h"
 #include "mali_kbase_debug.h"
 
@@ -33,6 +32,7 @@
 #include <linux/fcntl.h>
 #include <linux/fs.h>
 #include <linux/hrtimer.h>
+#include <linux/log2.h>
 #include <linux/mm.h>
 #include <linux/mutex.h>
 #include <linux/poll.h>
@@ -359,11 +359,7 @@ static enum hrtimer_restart kbasep_vinstr_dump_timer(struct hrtimer *timer)
 	 * cancelled, and the worker itself won't reschedule this timer if
 	 * suspend_count != 0.
 	 */
-#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE
-	queue_work(system_wq, &vctx->dump_work);
-#else
-	queue_work(system_highpri_wq, &vctx->dump_work);
-#endif
+	kbase_hwcnt_virtualizer_queue_work(vctx->hvirt, &vctx->dump_work);
 	return HRTIMER_NORESTART;
 }
 
@@ -389,7 +385,7 @@ static void kbasep_vinstr_client_destroy(struct kbase_vinstr_client *vcli)
  *                                 the vinstr context.
  * @vctx:     Non-NULL pointer to vinstr context.
  * @setup:    Non-NULL pointer to hardware counter ioctl setup structure.
- *            setup->buffer_count must not be 0.
+ *            setup->buffer_count must not be 0 and must be a power of 2.
  * @out_vcli: Non-NULL pointer to where created client will be stored on
  *            success.
  *
@@ -407,6 +403,7 @@ static int kbasep_vinstr_client_create(
 	WARN_ON(!vctx);
 	WARN_ON(!setup);
 	WARN_ON(setup->buffer_count == 0);
+	WARN_ON(!is_power_of_2(setup->buffer_count));
 
 	vcli = kzalloc(sizeof(*vcli), GFP_KERNEL);
 	if (!vcli)
@@ -565,11 +562,8 @@ void kbase_vinstr_resume(struct kbase_vinstr_context *vctx)
 			}
 
 			if (has_periodic_clients)
-#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE
-				queue_work(system_wq, &vctx->dump_work);
-#else
-				queue_work(system_highpri_wq, &vctx->dump_work);
-#endif
+				kbase_hwcnt_virtualizer_queue_work(
+					vctx->hvirt, &vctx->dump_work);
 		}
 	}
 
@@ -586,7 +580,8 @@ int kbase_vinstr_hwcnt_reader_setup(
 
 	if (!vctx || !setup ||
 	    (setup->buffer_count == 0) ||
-	    (setup->buffer_count > MAX_BUFFER_COUNT))
+	    (setup->buffer_count > MAX_BUFFER_COUNT) ||
+	    !is_power_of_2(setup->buffer_count))
 		return -EINVAL;
 
 	errcode = kbasep_vinstr_client_create(vctx, setup, &vcli);
@@ -719,7 +714,9 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
 	if (unlikely(copy_to_user(buffer, meta, min_size)))
 		return -EFAULT;
 
-	atomic_inc(&cli->meta_idx);
+	/* Compare exchange meta idx to protect against concurrent getters */
+	if (meta_idx != atomic_cmpxchg(&cli->meta_idx, meta_idx, meta_idx + 1))
+		return -EBUSY;
 
 	return 0;
 }
@@ -791,7 +788,13 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
 		goto out;
 	}
 
-	atomic_inc(&cli->read_idx);
+	/* Compare exchange read idx to protect against concurrent putters */
+	if (read_idx !=
+	    atomic_cmpxchg(&cli->read_idx, read_idx, read_idx + 1)) {
+		ret = -EPERM;
+		goto out;
+	}
+
 out:
 	if (unlikely(kbuf != stack_kbuf))
 		kfree(kbuf);
@@ -823,11 +826,8 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval(
 	 * worker is already queued.
 	 */
 	if ((interval != 0) && (cli->vctx->suspend_count == 0))
-#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE
-		queue_work(system_wq, &cli->vctx->dump_work);
-#else
-		queue_work(system_highpri_wq, &cli->vctx->dump_work);
-#endif
+		kbase_hwcnt_virtualizer_queue_work(cli->vctx->hvirt,
+						   &cli->vctx->dump_work);
 
 	mutex_unlock(&cli->vctx->lock);
 
@@ -898,11 +898,12 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_get_api_version(
 	struct kbase_vinstr_client *cli, unsigned long arg, size_t size)
 {
 	long ret = -EINVAL;
-	u8 clk_cnt = cli->vctx->metadata->clk_cnt;
 
 	if (size == sizeof(u32)) {
 		ret = put_user(HWCNT_READER_API, (u32 __user *)arg);
 	} else if (size == sizeof(struct kbase_hwcnt_reader_api_version)) {
+		u8 clk_cnt = cli->vctx->metadata->clk_cnt;
+		unsigned long bytes = 0;
 		struct kbase_hwcnt_reader_api_version api_version = {
 			.version = HWCNT_READER_API,
 			.features = KBASE_HWCNT_READER_API_VERSION_NO_FEATURE,
@@ -915,8 +916,16 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_get_api_version(
 			api_version.features |=
 			    KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_SHADER_CORES;
 
-		ret = copy_to_user(
+		bytes = copy_to_user(
 			(void __user *)arg, &api_version, sizeof(api_version));
+
+		/* copy_to_user returns zero in case of success.
+		 * If it fails, it returns the number of bytes that could NOT be copied
+		 */
+		if (bytes == 0)
+			ret = 0;
+		else
+			ret = -EFAULT;
 	}
 	return ret;
 }
@@ -1042,7 +1051,16 @@ static int kbasep_vinstr_hwcnt_reader_mmap(
 		return -EINVAL;
 
 	vm_size = vma->vm_end - vma->vm_start;
-	size = cli->dump_bufs.buf_cnt * cli->vctx->metadata->dump_buf_bytes;
+
+	/* The mapping is allowed to span the entirety of the page allocation,
+	 * not just the chunk where the dump buffers are allocated.
+	 * This accommodates the corner case where the combined size of the
+	 * dump buffers is smaller than a single page.
+	 * This does not pose a security risk as the pages are zeroed on
+	 * allocation, and anything out of bounds of the dump buffers is never
+	 * written to.
+	 */
+	size = (1ull << cli->dump_bufs.page_order) * PAGE_SIZE;
 
 	if (vma->vm_pgoff > (size >> PAGE_SHIFT))
 		return -EINVAL;
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_vinstr.h b/drivers/gpu/arm/bifrost/mali_kbase_vinstr.h
index 81d315f95567..a20f31aaf5d0 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_vinstr.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_vinstr.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2018, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
diff --git a/drivers/gpu/arm/bifrost/mali_linux_trace.h b/drivers/gpu/arm/bifrost/mali_linux_trace.h
index be812f62c862..6fe48ff2f7da 100644
--- a/drivers/gpu/arm/bifrost/mali_linux_trace.h
+++ b/drivers/gpu/arm/bifrost/mali_linux_trace.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2011-2016, 2018-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016, 2018-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #undef TRACE_SYSTEM
@@ -31,7 +30,7 @@
 #if defined(CONFIG_MALI_BIFROST_GATOR_SUPPORT)
 #define MALI_JOB_SLOTS_EVENT_CHANGED
 
-/**
+/*
  * mali_job_slots_event - Reports change of job slot status.
  * @gpu_id:   Kbase device id
  * @event_id: ORed together bitfields representing a type of event,
@@ -348,10 +347,7 @@ TRACE_EVENT(mali_jit_report,
 #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 #endif /* !MALI_USE_CSF */
 
-#if (KERNEL_VERSION(4, 1, 0) <= LINUX_VERSION_CODE)
 TRACE_DEFINE_ENUM(KBASE_JIT_REPORT_ON_ALLOC_OR_FREE);
-#endif
-
 #if MALI_JIT_PRESSURE_LIMIT_BASE
 /* trace_mali_jit_report_pressure
  *
@@ -393,7 +389,6 @@ TRACE_EVENT(mali_jit_report_pressure,
 /* Enum of sysgraph message IDs */
 enum sysgraph_msg {
 	SGR_ARRIVE,
-	SGR_DEP_RES,
 	SGR_SUBMIT,
 	SGR_COMPLETE,
 	SGR_POST,
@@ -421,7 +416,7 @@ TRACE_EVENT(sysgraph,
 		__entry->message    = message;
 		__entry->atom_id    = atom_id;
 	),
-	TP_printk("msg=%u proc_id=%u, param1=%d\n", __entry->message,
+	TP_printk("msg=%u proc_id=%u, param1=%d", __entry->message,
 		 __entry->proc_id,  __entry->atom_id)
 );
 
@@ -447,7 +442,7 @@ TRACE_EVENT(sysgraph_gpu,
 		__entry->atom_id    = atom_id;
 		__entry->js         = js;
 	),
-	TP_printk("msg=%u proc_id=%u, param1=%d, param2=%d\n",
+	TP_printk("msg=%u proc_id=%u, param1=%d, param2=%d",
 		  __entry->message,  __entry->proc_id,
 		  __entry->atom_id, __entry->js)
 );
@@ -536,7 +531,7 @@ TRACE_EVENT(mali_jit_trim,
 	TP_printk("freed_pages=%zu", __entry->freed_pages)
 );
 
-#include "mali_kbase_debug_linux_ktrace.h"
+#include "debug/mali_kbase_debug_linux_ktrace.h"
 
 #endif /* _TRACE_MALI_H */
 
diff --git a/drivers/gpu/arm/bifrost/mali_malisw.h b/drivers/gpu/arm/bifrost/mali_malisw.h
index 3a4db10bdb3d..c0640440839d 100644
--- a/drivers/gpu/arm/bifrost/mali_malisw.h
+++ b/drivers/gpu/arm/bifrost/mali_malisw.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2014-2015, 2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2018, 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /**
@@ -28,26 +27,11 @@
 #define _MALISW_H_
 
 #include <linux/version.h>
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
-#define U8_MAX          ((u8)~0U)
-#define S8_MAX          ((s8)(U8_MAX>>1))
-#define S8_MIN          ((s8)(-S8_MAX - 1))
-#define U16_MAX         ((u16)~0U)
-#define S16_MAX         ((s16)(U16_MAX>>1))
-#define S16_MIN         ((s16)(-S16_MAX - 1))
-#define U32_MAX         ((u32)~0U)
-#define S32_MAX         ((s32)(U32_MAX>>1))
-#define S32_MIN         ((s32)(-S32_MAX - 1))
-#define U64_MAX         ((u64)~0ULL)
-#define S64_MAX         ((s64)(U64_MAX>>1))
-#define S64_MIN         ((s64)(-S64_MAX - 1))
-#endif /* LINUX_VERSION_CODE */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
-#define SIZE_MAX        (~(size_t)0)
-#endif /* LINUX_VERSION_CODE */
 
 /**
  * MIN - Return the lesser of two values.
+ * @x: value1
+ * @y: value2
  *
  * As a macro it may evaluate its arguments more than once.
  * Refer to MAX macro for more details
@@ -55,7 +39,9 @@
 #define MIN(x, y)	((x) < (y) ? (x) : (y))
 
 /**
- * MAX -  Return the greater of two values.
+ * MAX - Return the greater of two values.
+ * @x: value1
+ * @y: value2
  *
  * As a macro it may evaluate its arguments more than once.
  * If called on the same two arguments as MIN it is guaranteed to return
@@ -67,24 +53,27 @@
 #define MAX(x, y)	((x) < (y) ? (y) : (x))
 
 /**
- * @hideinitializer
- * Function-like macro for suppressing unused variable warnings. Where possible
- * such variables should be removed; this macro is present for cases where we
- * much support API backwards compatibility.
+ * Function-like macro for suppressing unused variable warnings.
+ * @x: unused variable
+ *
+ * Where possible such variables should be removed; this macro is present for
+ * cases where we much support API backwards compatibility.
  */
 #define CSTD_UNUSED(x)	((void)(x))
 
 /**
- * @hideinitializer
- * Function-like macro for use where "no behavior" is desired. This is useful
- * when compile time macros turn a function-like macro in to a no-op, but
- * where having no statement is otherwise invalid.
+ * Function-like macro for use where "no behavior" is desired.
+ * @...: no-op
+ *
+ * This is useful when compile time macros turn a function-like macro in to a
+ * no-op, but where having no statement is otherwise invalid.
  */
 #define CSTD_NOP(...)	((void)#__VA_ARGS__)
 
 /**
- * @hideinitializer
  * Function-like macro for stringizing a single level macro.
+ * @x: macro's value
+ *
  * @code
  * #define MY_MACRO 32
  * CSTD_STR1( MY_MACRO )
@@ -94,10 +83,11 @@
 #define CSTD_STR1(x)	#x
 
 /**
- * @hideinitializer
- * Function-like macro for stringizing a macro's value. This should not be used
- * if the macro is defined in a way which may have no value; use the
- * alternative @c CSTD_STR2N macro should be used instead.
+ * Function-like macro for stringizing a macro's value.
+ * @x: macro's value
+ *
+ * This should not be used if the macro is defined in a way which may have no
+ * value; use the alternative @c CSTD_STR2N macro should be used instead.
  * @code
  * #define MY_MACRO 32
  * CSTD_STR2( MY_MACRO )
diff --git a/drivers/gpu/arm/bifrost/mali_power_gpu_frequency_trace.c b/drivers/gpu/arm/bifrost/mali_power_gpu_frequency_trace.c
index b6fb5a094fab..f548b04d9a8c 100644
--- a/drivers/gpu/arm/bifrost/mali_power_gpu_frequency_trace.c
+++ b/drivers/gpu/arm/bifrost/mali_power_gpu_frequency_trace.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /* Create the trace point if not configured in kernel */
diff --git a/drivers/gpu/arm/bifrost/mali_power_gpu_frequency_trace.h b/drivers/gpu/arm/bifrost/mali_power_gpu_frequency_trace.h
index 3b90ae437db9..d6909a4cf911 100644
--- a/drivers/gpu/arm/bifrost/mali_power_gpu_frequency_trace.h
+++ b/drivers/gpu/arm/bifrost/mali_power_gpu_frequency_trace.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
  * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _TRACE_POWER_GPU_FREQUENCY_MALI
diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c
index 1d106999228a..8240817cc135 100644
--- a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c
+++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,18 +17,15 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /**
- * Base kernel MMU management specific for CSF GPU.
+ * DOC: Base kernel MMU management specific for CSF GPU.
  */
 
 #include <mali_kbase.h>
 #include <gpu/mali_kbase_gpu_fault.h>
 #include <mali_kbase_ctx_sched.h>
-#include <mali_kbase_hwaccess_jm.h>
 #include <mali_kbase_reset_gpu.h>
 #include <mali_kbase_as_fault_debugfs.h>
 #include "../mali_kbase_mmu_internal.h"
@@ -70,17 +68,36 @@ void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut,
 static void submit_work_pagefault(struct kbase_device *kbdev, u32 as_nr,
 		struct kbase_fault *fault)
 {
+	unsigned long flags;
 	struct kbase_as *const as = &kbdev->as[as_nr];
+	struct kbase_context *kctx;
 
-	as->pf_data = (struct kbase_fault) {
-		.status = fault->status,
-		.addr = fault->addr,
-	};
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kctx = kbase_ctx_sched_as_to_ctx_nolock(kbdev, as_nr);
 
-	if (kbase_ctx_sched_as_to_ctx_refcount(kbdev, as_nr)) {
-		WARN_ON(!queue_work(as->pf_wq, &as->work_pagefault));
-		atomic_inc(&kbdev->faults_pending);
+	if (kctx) {
+		kbase_ctx_sched_retain_ctx_refcount(kctx);
+
+		as->pf_data = (struct kbase_fault) {
+			.status = fault->status,
+			.addr = fault->addr,
+		};
+
+		/*
+		 * A page fault work item could already be pending for the
+		 * context's address space, when the page fault occurs for
+		 * MCU's address space.
+		 */
+		if (!queue_work(as->pf_wq, &as->work_pagefault))
+			kbase_ctx_sched_release_ctx(kctx);
+		else {
+			dev_dbg(kbdev->dev,
+				"Page fault is already pending for as %u\n",
+				as_nr);
+			atomic_inc(&kbdev->faults_pending);
+		}
 	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
 
 void kbase_mmu_report_mcu_as_fault_and_reset(struct kbase_device *kbdev,
@@ -107,11 +124,11 @@ void kbase_mmu_report_mcu_as_fault_and_reset(struct kbase_device *kbdev,
 
 	/* Report MMU fault for all address spaces (except MCU_AS_NR) */
 	for (as_no = 1; as_no < kbdev->nr_hw_address_spaces; as_no++)
-		if (kbase_ctx_sched_as_to_ctx(kbdev, as_no))
-			submit_work_pagefault(kbdev, as_no, fault);
+		submit_work_pagefault(kbdev, as_no, fault);
 
 	/* GPU reset is required to recover */
-	if (kbase_prepare_to_reset_gpu(kbdev))
+	if (kbase_prepare_to_reset_gpu(kbdev,
+				       RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
 		kbase_reset_gpu(kbdev);
 }
 KBASE_EXPORT_TEST_API(kbase_mmu_report_mcu_as_fault_and_reset);
@@ -172,7 +189,7 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx,
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
 
-/**
+/*
  * The caller must ensure it's retained the ctx to prevent it from being
  * scheduled out whilst it's being worked on.
  */
@@ -483,18 +500,25 @@ static void submit_work_gpufault(struct kbase_device *kbdev, u32 status,
 {
 	unsigned long flags;
 	struct kbase_as *const as = &kbdev->as[as_nr];
+	struct kbase_context *kctx;
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	as->gf_data = (struct kbase_fault) {
+	kctx = kbase_ctx_sched_as_to_ctx_nolock(kbdev, as_nr);
+
+	if (kctx) {
+		kbase_ctx_sched_retain_ctx_refcount(kctx);
+
+		as->gf_data = (struct kbase_fault) {
 			.status = status,
 			.addr = address,
-	};
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		};
 
-	if (kbase_ctx_sched_as_to_ctx_refcount(kbdev, as_nr)) {
-		WARN_ON(!queue_work(as->pf_wq, &as->work_gpufault));
-		atomic_inc(&kbdev->faults_pending);
+		if (WARN_ON(!queue_work(as->pf_wq, &as->work_gpufault)))
+			kbase_ctx_sched_release_ctx(kctx);
+		else
+			atomic_inc(&kbdev->faults_pending);
 	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
 
 void kbase_mmu_gpu_fault_interrupt(struct kbase_device *kbdev, u32 status,
diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c
index b0187a46b733..ae334c182a69 100644
--- a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c
+++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,10 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /**
- * Base kernel MMU management specific for Job Manager GPU.
+ * DOC: Base kernel MMU management specific for Job Manager GPU.
  */
 
 #include <mali_kbase.h>
@@ -97,7 +96,7 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx,
 				 KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
 }
 
-/**
+/*
  * The caller must ensure it's retained the ctx to prevent it from being
  * scheduled out whilst it's being worked on.
  */
@@ -144,6 +143,7 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
 		kctx->pid);
 
 	/* hardware counters dump fault handling */
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
 	if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) &&
 			(kbdev->hwcnt.backend.state ==
 						KBASE_INSTR_STATE_DUMPING)) {
@@ -152,6 +152,7 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
 					kbdev->hwcnt.addr_bytes)))
 			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT;
 	}
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
 
 	/* Stop the kctx from submitting more jobs and cause it to be scheduled
 	 * out/rescheduled - this will occur on releasing the context's refcount
@@ -200,10 +201,12 @@ static void kbase_mmu_interrupt_process(struct kbase_device *kbdev,
 		struct kbase_context *kctx, struct kbase_as *as,
 		struct kbase_fault *fault)
 {
+	unsigned long flags;
+
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
 	dev_dbg(kbdev->dev,
-		"Entering %s kctx %p, as %p\n",
+		"Entering %s kctx %pK, as %pK\n",
 		__func__, (void *)kctx, (void *)as);
 
 	if (!kctx) {
@@ -237,11 +240,13 @@ static void kbase_mmu_interrupt_process(struct kbase_device *kbdev,
 		 * hw counters dumping in progress, signal the
 		 * other thread that it failed
 		 */
+  		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
 		if ((kbdev->hwcnt.kctx == kctx) &&
 		    (kbdev->hwcnt.backend.state ==
 					KBASE_INSTR_STATE_DUMPING))
 			kbdev->hwcnt.backend.state =
 						KBASE_INSTR_STATE_FAULT;
+  		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
 
 		/*
 		 * Stop the kctx from submitting more jobs and cause it
@@ -250,14 +255,10 @@ static void kbase_mmu_interrupt_process(struct kbase_device *kbdev,
 		 */
 		kbasep_js_clear_submit_allowed(js_devdata, kctx);
 
-		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
-			dev_warn(kbdev->dev,
-					"Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n",
-					as->number, fault->addr,
-					fault->extra_addr);
-		else
-			dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n",
-					as->number, fault->addr);
+		dev_warn(kbdev->dev,
+				"Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n",
+				as->number, fault->addr,
+				fault->extra_addr);
 
 		/*
 		 * We need to switch to UNMAPPED mode - but we do this in a
@@ -271,7 +272,7 @@ static void kbase_mmu_interrupt_process(struct kbase_device *kbdev,
 	}
 
 	dev_dbg(kbdev->dev,
-		"Leaving %s kctx %p, as %p\n",
+		"Leaving %s kctx %pK, as %pK\n",
 		__func__, (void *)kctx, (void *)as);
 }
 
@@ -370,14 +371,11 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
 		/* record the fault status */
 		fault->status = kbase_reg_read(kbdev, MMU_AS_REG(as_no,
 				AS_FAULTSTATUS));
-
-		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) {
-			fault->extra_addr = kbase_reg_read(kbdev,
-					MMU_AS_REG(as_no, AS_FAULTEXTRA_HI));
-			fault->extra_addr <<= 32;
-			fault->extra_addr |= kbase_reg_read(kbdev,
-					MMU_AS_REG(as_no, AS_FAULTEXTRA_LO));
-		}
+		fault->extra_addr = kbase_reg_read(kbdev,
+				MMU_AS_REG(as_no, AS_FAULTEXTRA_HI));
+		fault->extra_addr <<= 32;
+		fault->extra_addr |= kbase_reg_read(kbdev,
+				MMU_AS_REG(as_no, AS_FAULTEXTRA_LO));
 
 		if (kbase_as_has_bus_fault(as, fault)) {
 			/* Mark bus fault as handled.
@@ -418,7 +416,7 @@ int kbase_mmu_switch_to_ir(struct kbase_context *const kctx,
 	struct kbase_va_region *const reg)
 {
 	dev_dbg(kctx->kbdev->dev,
-		"Switching to incremental rendering for region %p\n",
+		"Switching to incremental rendering for region %pK\n",
 		(void *)reg);
 	return kbase_job_slot_softstop_start_rp(kctx, reg);
 }
diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c
index a5cda009426d..0761f68c1234 100644
--- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c
+++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,13 +17,10 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /**
- * @file mali_kbase_mmu.c
- * Base kernel MMU management.
+ * DOC: Base kernel MMU management.
  */
 
 #include <linux/kernel.h>
@@ -37,8 +35,6 @@
 #include <mali_kbase_defs.h>
 #include <mali_kbase_hw.h>
 #include <mmu/mali_kbase_mmu_hw.h>
-#include <mali_kbase_hwaccess_jm.h>
-#include <mali_kbase_hwaccess_time.h>
 #include <mali_kbase_mem.h>
 #include <mali_kbase_reset_gpu.h>
 #include <mmu/mali_kbase_mmu.h>
@@ -84,21 +80,20 @@ static void kbase_mmu_flush_invalidate_no_ctx(struct kbase_device *kbdev,
 		u64 vpfn, size_t nr, bool sync, int as_nr);
 
 /**
- * kbase_mmu_sync_pgd - sync page directory to memory
+ * kbase_mmu_sync_pgd() - sync page directory to memory when needed.
  * @kbdev:	Device pointer.
  * @handle:	Address of DMA region.
  * @size:       Size of the region to sync.
  *
  * This should be called after each page directory update.
  */
-
 static void kbase_mmu_sync_pgd(struct kbase_device *kbdev,
 		dma_addr_t handle, size_t size)
 {
-	/* If page table is not coherent then ensure the gpu can read
+	/* In non-coherent system, ensure the GPU can read
 	 * the pages from memory
 	 */
-	if (kbdev->system_coherency != COHERENCY_ACE)
+	if (kbdev->system_coherency == COHERENCY_NONE)
 		dma_sync_single_for_device(kbdev->dev, handle, size,
 				DMA_TO_DEVICE);
 }
@@ -119,7 +114,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
 /**
  * reg_grow_calc_extra_pages() - Calculate the number of backed pages to add to
  *                               a region on a GPU page fault
- *
+ * @kbdev:         KBase device
  * @reg:           The region that will be backed with more pages
  * @fault_rel_pfn: PFN of the fault relative to the start of the region
  *
@@ -135,20 +130,21 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
 static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev,
 		struct kbase_va_region *reg, size_t fault_rel_pfn)
 {
-	size_t multiple = reg->extent;
+	size_t multiple = reg->extension;
 	size_t reg_current_size = kbase_reg_current_backed_size(reg);
 	size_t minimum_extra = fault_rel_pfn - reg_current_size + 1;
 	size_t remainder;
 
 	if (!multiple) {
-		dev_warn(kbdev->dev,
-			"VA Region 0x%llx extent was 0, allocator needs to set this properly for KBASE_REG_PF_GROW\n",
+		dev_warn(
+			kbdev->dev,
+			"VA Region 0x%llx extension was 0, allocator needs to set this properly for KBASE_REG_PF_GROW\n",
 			((unsigned long long)reg->start_pfn) << PAGE_SHIFT);
 		return minimum_extra;
 	}
 
 	/* Calculate the remainder to subtract from minimum_extra to make it
-	 * the desired (rounded down) multiple of the extent.
+	 * the desired (rounded down) multiple of the extension.
 	 * Depending on reg's flags, the base used for calculating multiples is
 	 * different
 	 */
@@ -565,7 +561,7 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
 
 	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
 	dev_dbg(kbdev->dev,
-		"Entering %s %p, fault_pfn %lld, as_no %d\n",
+		"Entering %s %pK, fault_pfn %lld, as_no %d\n",
 		__func__, (void *)data, fault_pfn, as_no);
 
 	/* Grab the context that was already refcounted in kbase_mmu_interrupt()
@@ -638,21 +634,13 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
 		goto fault_done;
 
 	case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT:
-		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
-			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
-					"Address size fault", fault);
-		else
-			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
-					"Unknown fault code", fault);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Address size fault", fault);
 		goto fault_done;
 
 	case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT:
-		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
-			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
-					"Memory attributes fault", fault);
-		else
-			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
-					"Unknown fault code", fault);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Memory attributes fault", fault);
 		goto fault_done;
 
 	default:
@@ -718,6 +706,10 @@ page_fault_retry:
 		goto fault_done;
 	}
 
+	if (AS_FAULTSTATUS_ACCESS_TYPE_GET(fault_status) ==
+		AS_FAULTSTATUS_ACCESS_TYPE_READ)
+		dev_warn(kbdev->dev, "Grow on pagefault while reading");
+
 	/* find the size we need to grow it by
 	 * we know the result fit in a size_t due to
 	 * kbase_region_tracker_find_region_enclosing_address
@@ -852,7 +844,7 @@ page_fault_retry:
 
 			if (kbase_mmu_switch_to_ir(kctx, region) >= 0) {
 				dev_dbg(kctx->kbdev->dev,
-					"Get region %p for IR\n",
+					"Get region %pK for IR\n",
 					(void *)region);
 				kbase_va_region_alloc_get(kctx, region);
 			}
@@ -980,7 +972,7 @@ fault_done:
 	release_ctx(kbdev, kctx);
 
 	atomic_dec(&kbdev->faults_pending);
-	dev_dbg(kbdev->dev, "Leaving page_fault_worker %p\n", (void *)data);
+	dev_dbg(kbdev->dev, "Leaving page_fault_worker %pK\n", (void *)data);
 }
 
 static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev,
@@ -1557,7 +1549,7 @@ static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx,
 		 */
 		dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n");
 
-		if (kbase_prepare_to_reset_gpu_locked(kbdev))
+		if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE))
 			kbase_reset_gpu_locked(kbdev);
 	}
 }
@@ -1570,10 +1562,29 @@ static void kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev,
 {
 	int err;
 	u32 op;
+	bool gpu_powered;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	gpu_powered = kbdev->pm.backend.gpu_powered;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* GPU is off so there's no need to perform flush/invalidate.
+	 * But even if GPU is not actually powered down, after gpu_powered flag
+	 * was set to false, it is still safe to skip the flush/invalidate.
+	 * The TLB invalidation will anyways be performed due to AS_COMMAND_UPDATE
+	 * which is sent when address spaces are restored after gpu_powered flag
+	 * is set to true. Flushing of L2 cache is certainly not required as L2
+	 * cache is definitely off if gpu_powered is false.
+	 */
+	if (!gpu_powered)
+		return;
 
 	if (kbase_pm_context_active_handle_suspend(kbdev,
 				KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
-		/* GPU is off so there's no need to perform flush/invalidate */
+		/* GPU has just been powered off due to system suspend.
+		 * So again, no need to perform flush/invalidate.
+		 */
 		return;
 	}
 
@@ -1592,9 +1603,10 @@ static void kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev,
 		/* Flush failed to complete, assume the GPU has hung and
 		 * perform a reset to recover
 		 */
-		dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n");
+		dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n");
 
-		if (kbase_prepare_to_reset_gpu(kbdev))
+		if (kbase_prepare_to_reset_gpu(
+			    kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
 			kbase_reset_gpu(kbdev);
 	}
 
@@ -1627,10 +1639,10 @@ static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
 	kbdev = kctx->kbdev;
 #if !MALI_USE_CSF
 	mutex_lock(&kbdev->js_data.queue_mutex);
-#endif /* !MALI_USE_CSF */
 	ctx_is_in_runpool = kbase_ctx_sched_inc_refcount(kctx);
-#if !MALI_USE_CSF
 	mutex_unlock(&kbdev->js_data.queue_mutex);
+#else
+	ctx_is_in_runpool = kbase_ctx_sched_inc_refcount_if_as_valid(kctx);
 #endif /* !MALI_USE_CSF */
 
 	if (ctx_is_in_runpool) {
@@ -1673,6 +1685,7 @@ void kbase_mmu_disable(struct kbase_context *kctx)
 	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
 
 	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex);
 
 	/*
 	 * The address space is being disabled, drain all knowledge of it out
diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h
index f2613e881dac..bf4fd91d27bc 100644
--- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h
+++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
  * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_MMU_H_
diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h
index e6eef86d7ac0..ea088e921b03 100644
--- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h
+++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2014-2015, 2018-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2018-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /**
@@ -39,6 +38,11 @@ struct kbase_context;
 
 /**
  * enum kbase_mmu_fault_type - MMU fault type descriptor.
+ * @KBASE_MMU_FAULT_TYPE_UNKNOWN:         unknown fault
+ * @KBASE_MMU_FAULT_TYPE_PAGE:            page fault
+ * @KBASE_MMU_FAULT_TYPE_BUS:             nus fault
+ * @KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED: page_unexpected fault
+ * @KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED:  bus_unexpected fault
  */
 enum kbase_mmu_fault_type {
 	KBASE_MMU_FAULT_TYPE_UNKNOWN = 0,
diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c
index a820ab24ac05..88fd9cf9a864 100644
--- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c
+++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <linux/bitops.h>
@@ -26,7 +25,6 @@
 #include <mmu/mali_kbase_mmu_hw.h>
 #include <tl/mali_kbase_tracepoints.h>
 #include <device/mali_kbase_device.h>
-#include <mali_kbase_as_fault_debugfs.h>
 
 /**
  * lock_region() - Generate lockaddr to lock memory region in MMU
@@ -126,38 +124,33 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as)
 	struct kbase_mmu_setup *current_setup = &as->current_setup;
 	u64 transcfg = 0;
 
-	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) {
-		transcfg = current_setup->transcfg;
+	transcfg = current_setup->transcfg;
 
-		/* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK
-		 * Clear PTW_MEMATTR bits
+	/* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK
+	 * Clear PTW_MEMATTR bits
+	 */
+	transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK;
+	/* Enable correct PTW_MEMATTR bits */
+	transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK;
+	/* Ensure page-tables reads use read-allocate cache-policy in
+	 * the L2
+	 */
+	transcfg |= AS_TRANSCFG_R_ALLOCATE;
+
+	if (kbdev->system_coherency != COHERENCY_NONE) {
+		/* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable)
+		 * Clear PTW_SH bits
 		 */
-		transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK;
-		/* Enable correct PTW_MEMATTR bits */
-		transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK;
-		/* Ensure page-tables reads use read-allocate cache-policy in
-		 * the L2
-		 */
-		transcfg |= AS_TRANSCFG_R_ALLOCATE;
-
-		if (kbdev->system_coherency == COHERENCY_ACE) {
-			/* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable)
-			 * Clear PTW_SH bits
-			 */
-			transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK);
-			/* Enable correct PTW_SH bits */
-			transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS);
-		}
-
-		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO),
-				transcfg);
-		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI),
-				(transcfg >> 32) & 0xFFFFFFFFUL);
-	} else {
-		if (kbdev->system_coherency == COHERENCY_ACE)
-			current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER;
+		transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK);
+		/* Enable correct PTW_SH bits */
+		transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS);
 	}
 
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO),
+			transcfg);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI),
+			(transcfg >> 32) & 0xFFFFFFFFUL);
+
 	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO),
 			current_setup->transtab & 0xFFFFFFFFUL);
 	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI),
diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_internal.h b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_internal.h
index 8ecb14d72327..d3fcd3939ab6 100644
--- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_internal.h
+++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_internal.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
  * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KBASE_MMU_INTERNAL_H_
diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c
index 02493e9b2621..fac515cea403 100644
--- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c
+++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2010-2014, 2016-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2014, 2016-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include "mali_kbase.h"
@@ -48,25 +47,7 @@
  */
 static inline void page_table_entry_set(u64 *pte, u64 phy)
 {
-#if KERNEL_VERSION(3, 18, 13) <= LINUX_VERSION_CODE
 	WRITE_ONCE(*pte, phy);
-#else
-#ifdef CONFIG_64BIT
-	barrier();
-	*pte = phy;
-	barrier();
-#elif defined(CONFIG_ARM)
-	barrier();
-	asm volatile("ldrd r0, [%1]\n\t"
-		     "strd r0, %0\n\t"
-		     : "=m" (*pte)
-		     : "r" (&phy)
-		     : "r0", "r1");
-	barrier();
-#else
-#error "64-bit atomic write must be implemented for your architecture"
-#endif
-#endif
 }
 
 static void mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_lpae.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_lpae.c
deleted file mode 100644
index 91a2d7ac4dcb..000000000000
--- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_lpae.c
+++ /dev/null
@@ -1,215 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- */
-
-
-#include "mali_kbase.h"
-#include <gpu/mali_kbase_gpu_regmap.h>
-#include "mali_kbase_defs.h"
-
-#define ENTRY_TYPE_MASK     3ULL
-#define ENTRY_IS_ATE        1ULL
-#define ENTRY_IS_INVAL      2ULL
-#define ENTRY_IS_PTE        3ULL
-
-#define ENTRY_ATTR_BITS (7ULL << 2)	/* bits 4:2 */
-#define ENTRY_RD_BIT (1ULL << 6)
-#define ENTRY_WR_BIT (1ULL << 7)
-#define ENTRY_SHARE_BITS (3ULL << 8)	/* bits 9:8 */
-#define ENTRY_ACCESS_BIT (1ULL << 10)
-#define ENTRY_NX_BIT (1ULL << 54)
-
-#define ENTRY_FLAGS_MASK (ENTRY_ATTR_BITS | ENTRY_RD_BIT | ENTRY_WR_BIT | \
-		ENTRY_SHARE_BITS | ENTRY_ACCESS_BIT | ENTRY_NX_BIT)
-
-/* Helper Function to perform assignment of page table entries, to
- * ensure the use of strd, which is required on LPAE systems.
- */
-static inline void page_table_entry_set(u64 *pte, u64 phy)
-{
-#if KERNEL_VERSION(3, 18, 13) <= LINUX_VERSION_CODE
-	WRITE_ONCE(*pte, phy);
-#else
-#ifdef CONFIG_64BIT
-	barrier();
-	*pte = phy;
-	barrier();
-#elif defined(CONFIG_ARM)
-	barrier();
-	asm volatile("ldrd r0, [%1]\n\t"
-		     "strd r0, %0\n\t"
-		     : "=m" (*pte)
-		     : "r" (&phy)
-		     : "r0", "r1");
-	barrier();
-#else
-#error "64-bit atomic write must be implemented for your architecture"
-#endif
-#endif
-}
-
-static void mmu_get_as_setup(struct kbase_mmu_table *mmut,
-		struct kbase_mmu_setup * const setup)
-{
-	/* Set up the required caching policies at the correct indices
-	 * in the memattr register.
-	 */
-	setup->memattr =
-		(AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY <<
-		(AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) |
-		(AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL    <<
-		(AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8))    |
-		(AS_MEMATTR_LPAE_WRITE_ALLOC           <<
-		(AS_MEMATTR_INDEX_WRITE_ALLOC * 8))           |
-		(AS_MEMATTR_LPAE_OUTER_IMPL_DEF        <<
-		(AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8))        |
-		(AS_MEMATTR_LPAE_OUTER_WA              <<
-		(AS_MEMATTR_INDEX_OUTER_WA * 8))              |
-		0; /* The other indices are unused for now */
-
-	setup->transtab = ((u64)mmut->pgd &
-		((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK)) |
-		AS_TRANSTAB_LPAE_ADRMODE_TABLE |
-		AS_TRANSTAB_LPAE_READ_INNER;
-
-	setup->transcfg = 0;
-}
-
-static void mmu_update(struct kbase_device *kbdev,
-		struct kbase_mmu_table *mmut,
-		int as_nr)
-{
-	struct kbase_as *as;
-	struct kbase_mmu_setup *current_setup;
-
-	if (WARN_ON(as_nr == KBASEP_AS_NR_INVALID))
-		return;
-
-	as = &kbdev->as[as_nr];
-	current_setup = &as->current_setup;
-
-	mmu_get_as_setup(mmut, current_setup);
-
-	/* Apply the address space setting */
-	kbase_mmu_hw_configure(kbdev, as);
-}
-
-static void mmu_disable_as(struct kbase_device *kbdev, int as_nr)
-{
-	struct kbase_as * const as = &kbdev->as[as_nr];
-	struct kbase_mmu_setup * const current_setup = &as->current_setup;
-
-	current_setup->transtab = AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED;
-
-	/* Apply the address space setting */
-	kbase_mmu_hw_configure(kbdev, as);
-}
-
-static phys_addr_t pte_to_phy_addr(u64 entry)
-{
-	if (!(entry & 1))
-		return 0;
-
-	return entry & ~0xFFF;
-}
-
-static int ate_is_valid(u64 ate, int const level)
-{
-	return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE);
-}
-
-static int pte_is_valid(u64 pte, int const level)
-{
-	return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE);
-}
-
-/*
- * Map KBASE_REG flags to MMU flags
- */
-static u64 get_mmu_flags(unsigned long flags)
-{
-	u64 mmu_flags;
-	unsigned long memattr_idx;
-
-	memattr_idx = KBASE_REG_MEMATTR_VALUE(flags);
-	if (WARN(memattr_idx == AS_MEMATTR_INDEX_NON_CACHEABLE,
-			"Legacy Mode MMU cannot honor GPU non-cachable memory, will use default instead\n"))
-		memattr_idx = AS_MEMATTR_INDEX_DEFAULT;
-	/* store mem_attr index as 4:2, noting that:
-	 * - macro called above ensures 3 bits already
-	 * - all AS_MEMATTR_INDEX_<...> macros only use 3 bits
-	 */
-	mmu_flags = memattr_idx << 2;
-
-	/* write perm if requested */
-	mmu_flags |= (flags & KBASE_REG_GPU_WR) ? ENTRY_WR_BIT : 0;
-	/* read perm if requested */
-	mmu_flags |= (flags & KBASE_REG_GPU_RD) ? ENTRY_RD_BIT : 0;
-	/* nx if requested */
-	mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0;
-
-	if (flags & KBASE_REG_SHARE_BOTH) {
-		/* inner and outer shareable */
-		mmu_flags |= SHARE_BOTH_BITS;
-	} else if (flags & KBASE_REG_SHARE_IN) {
-		/* inner shareable coherency */
-		mmu_flags |= SHARE_INNER_BITS;
-	}
-
-	return mmu_flags;
-}
-
-static void entry_set_ate(u64 *entry,
-		struct tagged_addr phy,
-		unsigned long flags,
-		int const level)
-{
-	page_table_entry_set(entry, as_phys_addr_t(phy) | get_mmu_flags(flags) |
-			     ENTRY_IS_ATE);
-}
-
-static void entry_set_pte(u64 *entry, phys_addr_t phy)
-{
-	page_table_entry_set(entry, (phy & ~0xFFF) | ENTRY_IS_PTE);
-}
-
-static void entry_invalidate(u64 *entry)
-{
-	page_table_entry_set(entry, ENTRY_IS_INVAL);
-}
-
-static struct kbase_mmu_mode const lpae_mode = {
-	.update = mmu_update,
-	.get_as_setup = mmu_get_as_setup,
-	.disable_as = mmu_disable_as,
-	.pte_to_phy_addr = pte_to_phy_addr,
-	.ate_is_valid = ate_is_valid,
-	.pte_is_valid = pte_is_valid,
-	.entry_set_ate = entry_set_ate,
-	.entry_set_pte = entry_set_pte,
-	.entry_invalidate = entry_invalidate,
-	.flags = 0
-};
-
-struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void)
-{
-	return &lpae_mode;
-}
diff --git a/drivers/gpu/arm/bifrost/platform/Kconfig b/drivers/gpu/arm/bifrost/platform/Kconfig
index ef9fb963ecf5..ac385aa6b021 100644
--- a/drivers/gpu/arm/bifrost/platform/Kconfig
+++ b/drivers/gpu/arm/bifrost/platform/Kconfig
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # (C) COPYRIGHT 2012-2013, 2017 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,11 +16,7 @@
 # along with this program; if not, you can access it online at
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
-# SPDX-License-Identifier: GPL-2.0
 #
-#
-
-
 
 # Add your platform specific Kconfig file here
 #
@@ -27,4 +24,3 @@
 #
 # Where xxx is the platform name is the name set in MALI_PLATFORM_NAME
 #
-
diff --git a/drivers/gpu/arm/bifrost/platform/devicetree/Kbuild b/drivers/gpu/arm/bifrost/platform/devicetree/Kbuild
index 78343c0570d1..f151ee8680ad 100644
--- a/drivers/gpu/arm/bifrost/platform/devicetree/Kbuild
+++ b/drivers/gpu/arm/bifrost/platform/devicetree/Kbuild
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # (C) COPYRIGHT 2012-2017, 2020 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,11 +16,9 @@
 # along with this program; if not, you can access it online at
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
-# SPDX-License-Identifier: GPL-2.0
-#
 #
 
-mali_kbase-y += \
+bifrost_kbase-y += \
 	$(MALI_PLATFORM_DIR)/mali_kbase_config_devicetree.o \
 	$(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o \
 	$(MALI_PLATFORM_DIR)/mali_kbase_clk_rate_trace.o
diff --git a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_clk_rate_trace.c b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_clk_rate_trace.c
index 11a8b77dca06..f149554c28ed 100644
--- a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_clk_rate_trace.c
+++ b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_clk_rate_trace.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2015, 2017-2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <mali_kbase.h>
@@ -25,18 +24,45 @@
 #include <linux/clk.h>
 #include "mali_kbase_config_platform.h"
 
+#if MALI_USE_CSF
+#include <asm/arch_timer.h>
+#endif
+
 static void *enumerate_gpu_clk(struct kbase_device *kbdev,
 		unsigned int index)
 {
 	if (index >= kbdev->nr_clocks)
 		return NULL;
 
+#if MALI_USE_CSF
+	if (of_machine_is_compatible("arm,juno"))
+		WARN_ON(kbdev->nr_clocks != 1);
+#endif
+
 	return kbdev->clocks[index];
 }
 
 static unsigned long get_gpu_clk_rate(struct kbase_device *kbdev,
 		void *gpu_clk_handle)
 {
+#if MALI_USE_CSF
+	/* On Juno fpga platforms, the GPU clock rate is reported as 600 MHZ at
+	 * the boot time. Then after the first call to kbase_devfreq_target()
+	 * the clock rate is reported as 450 MHZ and the frequency does not
+	 * change after that. But the actual frequency at which GPU operates
+	 * is always 50 MHz, which is equal to the frequency of system counter
+	 * and HW counters also increment at the same rate.
+	 * DVFS, which is a client of kbase_ipa_control, needs normalization of
+	 * GPU_ACTIVE counter to calculate the time for which GPU has been busy.
+	 * So for the correct normalization need to return the system counter
+	 * frequency value.
+	 * This is a reasonable workaround as the frequency value remains same
+	 * throughout. It can be removed after GPUCORE-25693.
+	 */
+	if (of_machine_is_compatible("arm,juno"))
+		return arch_timer_get_cntfrq();
+#endif
+
 	return clk_get_rate((struct clk *)gpu_clk_handle);
 }
 
@@ -51,12 +77,23 @@ static int gpu_clk_notifier_register(struct kbase_device *kbdev,
 	     sizeof(((struct kbase_gpu_clk_notifier_data *)0)->gpu_clk_handle),
 	     "mismatch in the size of clk member");
 
+#if MALI_USE_CSF
+	/* Frequency is fixed on Juno platforms */
+	if (of_machine_is_compatible("arm,juno"))
+		return 0;
+#endif
+
 	return clk_notifier_register((struct clk *)gpu_clk_handle, nb);
 }
 
 static void gpu_clk_notifier_unregister(struct kbase_device *kbdev,
 		void *gpu_clk_handle, struct notifier_block *nb)
 {
+#if MALI_USE_CSF
+	if (of_machine_is_compatible("arm,juno"))
+		return;
+#endif
+
 	clk_notifier_unregister((struct clk *)gpu_clk_handle, nb);
 }
 
diff --git a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_config_devicetree.c b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_config_devicetree.c
index ccefddf882fd..5f300b1913e1 100644
--- a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_config_devicetree.c
+++ b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_config_devicetree.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2015, 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015, 2017, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <mali_kbase_config.h>
diff --git a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_config_platform.h b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_config_platform.h
index 2137b425c1ab..ee9ae52c8608 100644
--- a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_config_platform.h
+++ b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_config_platform.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
  * (C) COPYRIGHT 2014-2017, 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /**
diff --git a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_runtime_pm.c b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_runtime_pm.c
index 8772edb56f73..008f8a45f86d 100644
--- a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_runtime_pm.c
+++ b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_runtime_pm.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2015, 2017-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <mali_kbase.h>
diff --git a/drivers/gpu/arm/bifrost/platform/vexpress/Kbuild b/drivers/gpu/arm/bifrost/platform/vexpress/Kbuild
index 6780e4c9433b..c1646478a9dc 100644
--- a/drivers/gpu/arm/bifrost/platform/vexpress/Kbuild
+++ b/drivers/gpu/arm/bifrost/platform/vexpress/Kbuild
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 #
-# (C) COPYRIGHT 2012-2013, 2016-2017 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2012-2013, 2016-2017, 2020 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,10 +16,8 @@
 # along with this program; if not, you can access it online at
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
-# SPDX-License-Identifier: GPL-2.0
-#
 #
 
-mali_kbase-y += \
+bifrost_kbase-y += \
 	$(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \
 	mali_kbase_platform_fake.o
diff --git a/drivers/gpu/arm/bifrost/platform/vexpress/mali_kbase_config_platform.h b/drivers/gpu/arm/bifrost/platform/vexpress/mali_kbase_config_platform.h
index fac3cd52182f..97fdd13f22fb 100644
--- a/drivers/gpu/arm/bifrost/platform/vexpress/mali_kbase_config_platform.h
+++ b/drivers/gpu/arm/bifrost/platform/vexpress/mali_kbase_config_platform.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2017, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /**
diff --git a/drivers/gpu/arm/bifrost/platform/vexpress/mali_kbase_config_vexpress.c b/drivers/gpu/arm/bifrost/platform/vexpress/mali_kbase_config_vexpress.c
index d165ce262814..181681797030 100644
--- a/drivers/gpu/arm/bifrost/platform/vexpress/mali_kbase_config_vexpress.c
+++ b/drivers/gpu/arm/bifrost/platform/vexpress/mali_kbase_config_vexpress.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2017, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 #include <linux/ioport.h>
 #include <mali_kbase.h>
 #include <mali_kbase_defs.h>
diff --git a/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/Kbuild b/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/Kbuild
index 51b408efd48a..51ac81b1f189 100644
--- a/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/Kbuild
+++ b/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/Kbuild
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 #
-# (C) COPYRIGHT 2013-2014, 2016-2017 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2013-2014, 2016-2017, 2020 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,10 +16,8 @@
 # along with this program; if not, you can access it online at
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
-# SPDX-License-Identifier: GPL-2.0
-#
 #
 
-mali_kbase-y += \
+bifrost_kbase-y += \
 	$(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \
 	mali_kbase_platform_fake.o
diff --git a/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h b/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
index fac3cd52182f..97fdd13f22fb 100644
--- a/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
+++ b/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2017, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /**
diff --git a/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c b/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
index efca0a5b3493..3bb5caf1bbf6 100644
--- a/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
+++ b/drivers/gpu/arm/bifrost/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2011-2014, 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2014, 2017, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <linux/ioport.h>
diff --git a/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/Kbuild b/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/Kbuild
index e07709c9b1a5..0000b6d8307c 100644
--- a/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/Kbuild
+++ b/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/Kbuild
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # (C) COPYRIGHT 2012-2013, 2016-2017 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,11 +16,9 @@
 # along with this program; if not, you can access it online at
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
-# SPDX-License-Identifier: GPL-2.0
-#
 #
 
-mali_kbase-y += \
+bifrost_kbase-y += \
 	$(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \
 	$(MALI_PLATFORM_DIR)/mali_kbase_cpu_vexpress.o \
 	mali_kbase_platform_fake.o
diff --git a/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h b/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
index fac3cd52182f..97fdd13f22fb 100644
--- a/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
+++ b/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2017, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /**
diff --git a/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c b/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c
index b6714b95b776..a7a842abf2d1 100644
--- a/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c
+++ b/drivers/gpu/arm/bifrost/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2011-2014, 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2014, 2017, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 #include <linux/ioport.h>
 #include <mali_kbase.h>
 #include <mali_kbase_defs.h>
diff --git a/drivers/gpu/arm/bifrost/protected_mode_switcher.h b/drivers/gpu/arm/bifrost/protected_mode_switcher.h
index 8778d812aea0..d9bab5622f4b 100644
--- a/drivers/gpu/arm/bifrost/protected_mode_switcher.h
+++ b/drivers/gpu/arm/bifrost/protected_mode_switcher.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _PROTECTED_MODE_SWITCH_H_
@@ -28,35 +27,23 @@ struct protected_mode_device;
 /**
  * struct protected_mode_ops - Callbacks for protected mode switch operations
  *
- * @protected_mode_enable:  Callback to enable protected mode for device
+ * @protected_mode_enable:  Callback to enable protected mode for device, and
+ *                          reset device
+ *                          Returns 0 on success, non-zero on error
  * @protected_mode_disable: Callback to disable protected mode for device
+ *                          Returns 0 on success, non-zero on error
  */
 struct protected_mode_ops {
-	/**
-	 * protected_mode_enable() - Enable protected mode on device
-	 * @dev:	The struct device
-	 *
-	 * Return: 0 on success, non-zero on error
-	 */
 	int (*protected_mode_enable)(
 			struct protected_mode_device *protected_dev);
-
-	/**
-	 * protected_mode_disable() - Disable protected mode on device, and
-	 *                            reset device
-	 * @dev:	The struct device
-	 *
-	 * Return: 0 on success, non-zero on error
-	 */
 	int (*protected_mode_disable)(
 			struct protected_mode_device *protected_dev);
 };
 
 /**
  * struct protected_mode_device - Device structure for protected mode devices
- *
- * @ops  - Callbacks associated with this device
- * @data - Pointer to device private data
+ * @ops:  Callbacks associated with this device
+ * @data: Pointer to device private data
  *
  * This structure should be registered with the platform device using
  * platform_set_drvdata().
diff --git a/drivers/gpu/arm/bifrost/tests/Kbuild b/drivers/gpu/arm/bifrost/tests/Kbuild
index c26bef780781..cf9d7fdc9b17 100644
--- a/drivers/gpu/arm/bifrost/tests/Kbuild
+++ b/drivers/gpu/arm/bifrost/tests/Kbuild
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # (C) COPYRIGHT 2017, 2020 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,8 +16,6 @@
 # along with this program; if not, you can access it online at
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
-# SPDX-License-Identifier: GPL-2.0
-#
 #
 
 obj-$(CONFIG_MALI_KUTF) += kutf/
diff --git a/drivers/gpu/arm/bifrost/tests/Kconfig b/drivers/gpu/arm/bifrost/tests/Kconfig
index 83a4d7764a50..cf2a23a7df11 100644
--- a/drivers/gpu/arm/bifrost/tests/Kconfig
+++ b/drivers/gpu/arm/bifrost/tests/Kconfig
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # (C) COPYRIGHT 2017, 2020 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,8 +16,6 @@
 # along with this program; if not, you can access it online at
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
-# SPDX-License-Identifier: GPL-2.0
-#
 #
 
 source "drivers/gpu/arm/midgard/tests/kutf/Kconfig"
diff --git a/drivers/gpu/arm/bifrost/tests/Mconfig b/drivers/gpu/arm/bifrost/tests/Mconfig
index bba96b3d9e48..e9313ef14fd1 100644
--- a/drivers/gpu/arm/bifrost/tests/Mconfig
+++ b/drivers/gpu/arm/bifrost/tests/Mconfig
@@ -1,19 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0
 #
-# (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
 #
-# A copy of the licence is included with the program, and can also be obtained
-# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-# Boston, MA  02110-1301, USA.
 #
 
 config UNIT_TEST_KERNEL_MODULES
 	bool
-	default y if UNIT_TEST_CODE && BUILD_KERNEL_MODULES
+	default y if UNIT_TEST_CODE && BACKEND_KERNEL
 	default n
 
 config BUILD_IPA_TESTS
@@ -33,6 +40,10 @@ config BUILD_CSF_TESTS
 
 config BUILD_ARBIF_TESTS
 	bool
-	default y if UNIT_TEST_KERNEL_MODULES && MALI_ARBITER_SUPPORT
+	default y if UNIT_TEST_CODE && MALI_ARBITER_SUPPORT
 	default n
 
+config BUILD_ARBIF_KERNEL_TESTS
+	bool
+	default y if BUILD_KERNEL_MODULES && BUILD_ARBIF_TESTS
+	default n
diff --git a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers.h b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers.h
index 858b9c38b49a..3b8613a06f68 100644
--- a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers.h
+++ b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
  * (C) COPYRIGHT 2017, 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KERNEL_UTF_HELPERS_H_
diff --git a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers_user.h b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers_user.h
index 3b1300e1ce6f..49aad2921327 100644
--- a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers_user.h
+++ b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers_user.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KERNEL_UTF_HELPERS_USER_H_
@@ -63,7 +62,8 @@ struct kutf_helper_named_val {
  * unrecoverable)
  *
  * Positive values indicate correct access but invalid parsing (can be
- * recovered from assuming data in the future is correct) */
+ * recovered from assuming data in the future is correct)
+ */
 enum kutf_helper_err {
 	/* No error - must be zero */
 	KUTF_HELPER_ERR_NONE = 0,
@@ -71,14 +71,16 @@ enum kutf_helper_err {
 	KUTF_HELPER_ERR_INVALID_NAME,
 	/* Named value parsing of string or u64 type encountered extra
 	 * characters after the value (after the last digit for a u64 type or
-	 * after the string end delimiter for string type) */
+	 * after the string end delimiter for string type)
+	 */
 	KUTF_HELPER_ERR_CHARS_AFTER_VAL,
 	/* Named value parsing of string type couldn't find the string end
 	 * delimiter.
 	 *
 	 * This cannot be encountered when the NAME="value" message exceeds the
 	 * textbuf's maximum line length, because such messages are not checked
-	 * for an end string delimiter */
+	 * for an end string delimiter
+	 */
 	KUTF_HELPER_ERR_NO_END_DELIMITER,
 	/* Named value didn't parse as any of the known types */
 	KUTF_HELPER_ERR_INVALID_VALUE,
@@ -122,7 +124,8 @@ int kutf_helper_max_str_len_for_kern(const char *val_name, int kern_buf_sz);
  *
  * Any failure will be logged on the suite's current test fixture
  *
- * Returns 0 on success, non-zero on failure */
+ * Returns 0 on success, non-zero on failure
+ */
 int kutf_helper_send_named_str(struct kutf_context *context,
 		const char *val_name, const char *val_str);
 
@@ -138,7 +141,8 @@ int kutf_helper_send_named_str(struct kutf_context *context,
  *
  * Returns 0 on success. Negative value on failure to receive from the 'run'
  * file, positive value indicates an enum kutf_helper_err value for correct
- * reception of data but invalid parsing */
+ * reception of data but invalid parsing
+ */
 int kutf_helper_receive_named_val(
 		struct kutf_context *context,
 		struct kutf_helper_named_val *named_val);
@@ -165,7 +169,8 @@ int kutf_helper_receive_named_val(
  * - return value will be 0 to indicate success
  *
  * The rationale behind this is that we'd prefer to continue the rest of the
- * test with failures propagated, rather than hitting a timeout */
+ * test with failures propagated, rather than hitting a timeout
+ */
 int kutf_helper_receive_check_val(
 		struct kutf_helper_named_val *named_val,
 		struct kutf_context *context,
diff --git a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_mem.h b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_mem.h
index 988559de1edf..47c4beacf12f 100644
--- a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_mem.h
+++ b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_mem.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2017, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KERNEL_UTF_MEM_H_
diff --git a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_resultset.h b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_resultset.h
index 49ebeb4ec546..1f2dcfab49ec 100644
--- a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_resultset.h
+++ b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_resultset.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2017, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KERNEL_UTF_RESULTSET_H_
diff --git a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_suite.h b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_suite.h
index 8d75f506f9eb..426e435a2c67 100644
--- a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_suite.h
+++ b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_suite.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2017, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KERNEL_UTF_SUITE_H_
@@ -264,9 +263,10 @@ struct kutf_suite {
 	struct list_head               test_list;
 };
 
-/* ============================================================================
-	Application functions
-============================================================================ */
+/** ===========================================================================
+ * Application functions
+ * ============================================================================
+ */
 
 /**
  * kutf_create_application() - Create an in kernel test application.
@@ -284,9 +284,10 @@ struct kutf_application *kutf_create_application(const char *name);
  */
 void kutf_destroy_application(struct kutf_application *app);
 
-/* ============================================================================
-	Suite functions
-============================================================================ */
+/**============================================================================
+ * Suite functions
+ * ============================================================================
+ */
 
 /**
  * kutf_create_suite() - Create a kernel test suite.
@@ -416,10 +417,10 @@ void kutf_add_test_with_filters_and_data(
 		unsigned int filters,
 		union kutf_callback_data test_data);
 
-
-/* ============================================================================
-	Test functions
-============================================================================ */
+/** ===========================================================================
+ * Test functions
+ * ============================================================================
+ */
 /**
  * kutf_test_log_result_external() - Log a result which has been created
  *                                   externally into a in a standard form
diff --git a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_utils.h b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_utils.h
index 25b8285500d7..e5e2f02621df 100644
--- a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_utils.h
+++ b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_utils.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2017, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KERNEL_UTF_UTILS_H_
diff --git a/drivers/gpu/arm/bifrost/tests/kutf/Kbuild b/drivers/gpu/arm/bifrost/tests/kutf/Kbuild
index 2531d41ca28d..bd6540c5006e 100644
--- a/drivers/gpu/arm/bifrost/tests/kutf/Kbuild
+++ b/drivers/gpu/arm/bifrost/tests/kutf/Kbuild
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 #
-# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2017, 2020 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,8 +16,6 @@
 # along with this program; if not, you can access it online at
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
-# SPDX-License-Identifier: GPL-2.0
-#
 #
 
 ccflags-y += -I$(src)/../include
diff --git a/drivers/gpu/arm/bifrost/tests/kutf/Kconfig b/drivers/gpu/arm/bifrost/tests/kutf/Kconfig
index 0cdb474c06a3..7ea95b600160 100644
--- a/drivers/gpu/arm/bifrost/tests/kutf/Kconfig
+++ b/drivers/gpu/arm/bifrost/tests/kutf/Kconfig
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 #
-# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2017, 2020 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,10 +16,7 @@
 # along with this program; if not, you can access it online at
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
-# SPDX-License-Identifier: GPL-2.0
 #
-#
-
 
 config MALI_KUTF
  tristate "Mali Kernel Unit Test Framework"
diff --git a/drivers/gpu/arm/bifrost/tests/kutf/Makefile b/drivers/gpu/arm/bifrost/tests/kutf/Makefile
index d848e8774bd0..041f5394bf32 100644
--- a/drivers/gpu/arm/bifrost/tests/kutf/Makefile
+++ b/drivers/gpu/arm/bifrost/tests/kutf/Makefile
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 #
-# (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2014-2017, 2020 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,8 +16,6 @@
 # along with this program; if not, you can access it online at
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
-# SPDX-License-Identifier: GPL-2.0
-#
 #
 
 # linux build system bootstrap for out-of-tree module
diff --git a/drivers/gpu/arm/bifrost/tests/kutf/build.bp b/drivers/gpu/arm/bifrost/tests/kutf/build.bp
index 32eab143e669..707a0531b3b9 100644
--- a/drivers/gpu/arm/bifrost/tests/kutf/build.bp
+++ b/drivers/gpu/arm/bifrost/tests/kutf/build.bp
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
  * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved.
@@ -5,11 +6,16 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
- * A copy of the licence is included with the program, and can also be obtained
- * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
  *
  */
 
diff --git a/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers.c b/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers.c
index 4463b04792f5..13923fa3121f 100644
--- a/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers.c
+++ b/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2017, 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /* Kernel UTF test helpers */
diff --git a/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers_user.c b/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers_user.c
index 108fa82d9b21..84c63be0bed7 100644
--- a/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers_user.c
+++ b/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers_user.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /* Kernel UTF test helpers that mirror those for kutf-userside */
@@ -42,7 +41,8 @@ static const char *get_val_type_name(enum kutf_helper_valtype valtype)
 	 * a) "<0 comparison on unsigned type" warning - if we did both upper
 	 *    and lower bound check
 	 * b) incorrect range checking if it was a signed type - if we did
-	 *    upper bound check only */
+	 *    upper bound check only
+	 */
 	unsigned int type_idx = (unsigned int)valtype;
 
 	if (type_idx >= (unsigned int)KUTF_HELPER_VALTYPE_COUNT)
@@ -54,7 +54,8 @@ static const char *get_val_type_name(enum kutf_helper_valtype valtype)
 /* Check up to str_len chars of val_str to see if it's a valid value name:
  *
  * - Has between 1 and KUTF_HELPER_MAX_VAL_NAME_LEN characters before the \0 terminator
- * - And, each char is in the character set [A-Z0-9_] */
+ * - And, each char is in the character set [A-Z0-9_]
+ */
 static int validate_val_name(const char *val_str, int str_len)
 {
 	int i = 0;
@@ -87,7 +88,8 @@ static int validate_val_name(const char *val_str, int str_len)
  * e.g. "str"
  *
  * That is, before any '\\', '\n' or '"' characters. This is so we don't have
- * to escape the string */
+ * to escape the string
+ */
 static int find_quoted_string_valid_len(const char *str)
 {
 	char *ptr;
@@ -207,7 +209,8 @@ int kutf_helper_send_named_str(struct kutf_context *context,
 	str_buf_sz = val_name_len + start_delim_len + val_str_len + end_delim_len + 1;
 
 	/* Using kmalloc() here instead of mempool since we know we need to free
-	 * before we return */
+	 * before we return
+	 */
 	str_buf = kmalloc(str_buf_sz, GFP_KERNEL);
 	if (!str_buf) {
 		errmsg = kutf_dsprintf(&context->fixture_pool,
@@ -218,7 +221,8 @@ int kutf_helper_send_named_str(struct kutf_context *context,
 	copy_ptr = str_buf;
 
 	/* Manually copy each string component instead of snprintf because
-	 * val_str may need to end early, and less error path handling */
+	 * val_str may need to end early, and less error path handling
+	 */
 
 	/* name */
 	memcpy(copy_ptr, val_name, val_name_len);
@@ -331,7 +335,8 @@ int kutf_helper_receive_named_val(
 		/* possibly a number value - strtoull will parse it */
 		err = kstrtoull(recv_str, 0, &u64val);
 		/* unlike userspace can't get an end ptr, but if kstrtoull()
-		 * reads characters after the number it'll report -EINVAL */
+		 * reads characters after the number it'll report -EINVAL
+		 */
 		if (!err) {
 			int len_remain = strnlen(recv_str, recv_sz);
 
@@ -399,7 +404,8 @@ int kutf_helper_receive_check_val(
 		goto out_fail_and_fixup;
 	}
 
-	if (strcmp(named_val->val_name, expect_val_name) != 0) {
+	if (named_val->val_name != NULL &&
+			strcmp(named_val->val_name, expect_val_name) != 0) {
 		const char *msg = kutf_dsprintf(&context->fixture_pool,
 				"Expecting to receive value named '%s' but got '%s'",
 				expect_val_name, named_val->val_name);
diff --git a/drivers/gpu/arm/bifrost/tests/kutf/kutf_mem.c b/drivers/gpu/arm/bifrost/tests/kutf/kutf_mem.c
index fd98beaeb84a..b005b683474c 100644
--- a/drivers/gpu/arm/bifrost/tests/kutf/kutf_mem.c
+++ b/drivers/gpu/arm/bifrost/tests/kutf/kutf_mem.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2017, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /* Kernel UTF memory management functions */
diff --git a/drivers/gpu/arm/bifrost/tests/kutf/kutf_resultset.c b/drivers/gpu/arm/bifrost/tests/kutf/kutf_resultset.c
index 94ecfa4421e1..1eea08c38a85 100644
--- a/drivers/gpu/arm/bifrost/tests/kutf/kutf_resultset.c
+++ b/drivers/gpu/arm/bifrost/tests/kutf/kutf_resultset.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2017, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /* Kernel UTF result management functions */
diff --git a/drivers/gpu/arm/bifrost/tests/kutf/kutf_suite.c b/drivers/gpu/arm/bifrost/tests/kutf/kutf_suite.c
index 9dc6e2b4bad4..4b1dde455e8e 100644
--- a/drivers/gpu/arm/bifrost/tests/kutf/kutf_suite.c
+++ b/drivers/gpu/arm/bifrost/tests/kutf/kutf_suite.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2014, 2017-2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,11 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /* Kernel UTF suite, test and fixture management including user to kernel
- * interaction */
+ * interaction
+ */
 
 #include <linux/list.h>
 #include <linux/slab.h>
@@ -598,7 +598,7 @@ static int create_fixture_variant(struct kutf_test_function *test_func,
 		goto fail_file;
 	}
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+#if KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE
 	tmp = debugfs_create_file_unsafe(
 #else
 	tmp = debugfs_create_file(
@@ -634,7 +634,7 @@ static void kutf_remove_test_variant(struct kutf_test_fixture *test_fix)
 	kfree(test_fix);
 }
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)
+#if KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE
 /* Adapting to the upstream debugfs_create_x32() change */
 static int ktufp_u32_get(void *data, u64 *val)
 {
@@ -679,7 +679,7 @@ void kutf_add_test_with_filters_and_data(
 	}
 
 	test_func->filters = filters;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)
+#if KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE
 	tmp = debugfs_create_file_unsafe("filters", S_IROTH, test_func->dir,
 					 &test_func->filters, &kutfp_fops_x32_ro);
 #else
@@ -692,7 +692,7 @@ void kutf_add_test_with_filters_and_data(
 	}
 
 	test_func->test_id = id;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)
+#if KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE
 	debugfs_create_u32("test_id", S_IROTH, test_func->dir,
                        &test_func->test_id);
 #else
diff --git a/drivers/gpu/arm/bifrost/tests/kutf/kutf_utils.c b/drivers/gpu/arm/bifrost/tests/kutf/kutf_utils.c
index 7f5ac517fdb4..f0dad1f68ffb 100644
--- a/drivers/gpu/arm/bifrost/tests/kutf/kutf_utils.c
+++ b/drivers/gpu/arm/bifrost/tests/kutf/kutf_utils.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2017, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /* Kernel UTF utility functions */
diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/Kbuild b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/Kbuild
index f5565d30f9cf..00b2a41607e1 100644
--- a/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/Kbuild
+++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/Kbuild
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,8 +16,6 @@
 # along with this program; if not, you can access it online at
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
-# SPDX-License-Identifier: GPL-2.0
-#
 #
 
 ccflags-y += -I$(src)/../include -I$(src)/../../../ -I$(src)/../../ -I$(src)/../../backend/gpu -I$(srctree)/drivers/staging/android
diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/Kconfig b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/Kconfig
index 8196e4cc6b37..17081ba0f6cf 100644
--- a/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/Kconfig
+++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/Kconfig
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,8 +16,6 @@
 # along with this program; if not, you can access it online at
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
-# SPDX-License-Identifier: GPL-2.0
-#
 #
 
 config CONFIG_MALI_CLK_RATE_TRACE_PORTAL
diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/Makefile b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/Makefile
index 71c78b84830c..950acd89b267 100644
--- a/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/Makefile
+++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/Makefile
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,8 +16,6 @@
 # along with this program; if not, you can access it online at
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
-# SPDX-License-Identifier: GPL-2.0
-#
 #
 
 ifneq ($(KERNELRELEASE),)
diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/build.bp b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/build.bp
index 0cc2904db542..c16b3dea0918 100644
--- a/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/build.bp
+++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/build.bp
@@ -1,14 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
+ *
  * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * A copy of the licence is included with the program, and can also be obtained
- * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA  02110-1301, USA.
  */
 
 bob_kernel_module {
diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c
index d74a278bffa7..bd091fa6bc8f 100644
--- a/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c
+++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
  * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <linux/fdtable.h>
diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h
index f46afd5086bd..600b025795df 100644
--- a/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h
+++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
  * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _KUTF_CLK_RATE_TRACE_TEST_H_
diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/Kbuild b/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/Kbuild
index ca8c51273b4c..cc38cb84406c 100644
--- a/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/Kbuild
+++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/Kbuild
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 #
-# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2017, 2020 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,8 +16,6 @@
 # along with this program; if not, you can access it online at
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
-# SPDX-License-Identifier: GPL-2.0
-#
 #
 
 ccflags-y += -I$(src)/../include -I$(src)/../../../ -I$(src)/../../ -I$(src)/../../backend/gpu -I$(srctree)/drivers/staging/android
diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/Kconfig b/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/Kconfig
index 78283307713d..90f4ec0a7c7f 100644
--- a/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/Kconfig
+++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/Kconfig
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 #
-# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2017, 2020 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,8 +16,6 @@
 # along with this program; if not, you can access it online at
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
-# SPDX-License-Identifier: GPL-2.0
-#
 #
 
 config MALI_IRQ_LATENCY
diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/Makefile b/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/Makefile
index bc4d654a90ca..cbec0d3ce8ca 100644
--- a/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/Makefile
+++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/Makefile
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # (C) COPYRIGHT 2015, 2017-2018, 2020 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
-# of such GNU licence.
+# of such GNU license.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,8 +16,6 @@
 # along with this program; if not, you can access it online at
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
-# SPDX-License-Identifier: GPL-2.0
-#
 #
 
 # linux build system bootstrap for out-of-tree module
diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/build.bp b/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/build.bp
index 90efdcf9ad9c..58021c80188c 100644
--- a/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/build.bp
+++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/build.bp
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
  * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved.
@@ -5,11 +6,16 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
- * A copy of the licence is included with the program, and can also be obtained
- * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
  *
  */
 
diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c b/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c
index 5f27c3a7e9b2..e9f276ec4f92 100644
--- a/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c
+++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2016-2018, 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2018, 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include <linux/module.h>
@@ -25,8 +24,8 @@
 #include <linux/interrupt.h>
 
 #include "mali_kbase.h"
-#include <midgard/device/mali_kbase_device.h>
-#include <midgard/backend/gpu/mali_kbase_pm_internal.h>
+#include <device/mali_kbase_device.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
 
 #include <kutf/kutf_suite.h>
 #include <kutf/kutf_utils.h>
@@ -242,7 +241,7 @@ int mali_kutf_irq_test_main_init(void)
 
 	irq_app = kutf_create_application("irq");
 
-	if (NULL == irq_app) {
+	if (irq_app == NULL) {
 		pr_warn("Creation of test application failed!\n");
 		return -ENOMEM;
 	}
@@ -251,7 +250,7 @@ int mali_kutf_irq_test_main_init(void)
 			1, mali_kutf_irq_default_create_fixture,
 			mali_kutf_irq_default_remove_fixture);
 
-	if (NULL == suite) {
+	if (suite == NULL) {
 		pr_warn("Creation of test suite failed!\n");
 		kutf_destroy_application(irq_app);
 		return -ENOMEM;
diff --git a/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c b/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c
index cd90ea0ec285..a3b4a74ba94a 100644
--- a/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c
+++ b/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c
@@ -1,24 +1,4 @@
 /*
- *
- * (C) COPYRIGHT ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- *//*
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
@@ -209,7 +189,8 @@ check_current:
 			return -ENOMEM;
 		if (gap_start <= high_limit && gap_end - gap_start >= length) {
 			/* We found a suitable gap. Clip it with the original
-			 * high_limit. */
+			 * high_limit.
+			 */
 			if (gap_end > info->high_limit)
 				gap_end = info->high_limit;
 
@@ -270,6 +251,26 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx,
 	bool is_same_4gb_page = false;
 	unsigned long ret;
 
+	/* the 'nolock' form is used here:
+	 * - the base_pfn of the SAME_VA zone does not change
+	 * - in normal use, va_size_pages is constant once the first allocation
+	 *   begins
+	 *
+	 * However, in abnormal use this function could be processing whilst
+	 * another new zone is being setup in a different thread (e.g. to
+	 * borrow part of the SAME_VA zone). In the worst case, this path may
+	 * witness a higher SAME_VA end_pfn than the code setting up the new
+	 * zone.
+	 *
+	 * This is safe because once we reach the main allocation functions,
+	 * we'll see the updated SAME_VA end_pfn and will determine that there
+	 * is no free region at the address found originally by too large a
+	 * same_va_end_addr here, and will fail the allocation gracefully.
+	 */
+	struct kbase_reg_zone *zone =
+		kbase_ctx_reg_zone_get_nolock(kctx, KBASE_REG_ZONE_SAME_VA);
+	u64 same_va_end_addr = kbase_reg_zone_end_pfn(zone) << PAGE_SHIFT;
+
 	/* err on fixed address */
 	if ((flags & MAP_FIXED) || addr)
 		return -EINVAL;
@@ -280,9 +281,8 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx,
 		return -ENOMEM;
 
 	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
-
-		high_limit = min_t(unsigned long, mm->mmap_base,
-				(kctx->same_va_end << PAGE_SHIFT));
+		high_limit =
+			min_t(unsigned long, mm->mmap_base, same_va_end_addr);
 
 		/* If there's enough (> 33 bits) of GPU VA space, align
 		 * to 2MB boundaries.
@@ -319,18 +319,22 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx,
 				}
 #if !MALI_USE_CSF
 			} else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) {
-				unsigned long extent_bytes =
-				     (unsigned long)(reg->extent << PAGE_SHIFT);
+				unsigned long extension_bytes =
+					(unsigned long)(reg->extension
+							<< PAGE_SHIFT);
 				/* kbase_check_alloc_sizes() already satisfies
 				 * these checks, but they're here to avoid
 				 * maintenance hazards due to the assumptions
-				 * involved */
-				WARN_ON(reg->extent > (ULONG_MAX >> PAGE_SHIFT));
+				 * involved
+				 */
+				WARN_ON(reg->extension >
+					(ULONG_MAX >> PAGE_SHIFT));
 				WARN_ON(reg->initial_commit > (ULONG_MAX >> PAGE_SHIFT));
-				WARN_ON(!is_power_of_2(extent_bytes));
-				align_mask = extent_bytes - 1;
+				WARN_ON(!is_power_of_2(extension_bytes));
+				align_mask = extension_bytes - 1;
 				align_offset =
-				      extent_bytes - (reg->initial_commit << PAGE_SHIFT);
+					extension_bytes -
+					(reg->initial_commit << PAGE_SHIFT);
 #endif /* !MALI_USE_CSF */
 			} else if (reg->flags & KBASE_REG_GPU_VA_SAME_4GB_PAGE) {
 				is_same_4gb_page = true;
@@ -354,11 +358,10 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx,
 			is_same_4gb_page);
 
 	if (IS_ERR_VALUE(ret) && high_limit == mm->mmap_base &&
-			high_limit < (kctx->same_va_end << PAGE_SHIFT)) {
+	    high_limit < same_va_end_addr) {
 		/* Retry above mmap_base */
 		info.low_limit = mm->mmap_base;
-		info.high_limit = min_t(u64, TASK_SIZE,
-					(kctx->same_va_end << PAGE_SHIFT));
+		info.high_limit = min_t(u64, TASK_SIZE, same_va_end_addr);
 
 		ret = kbase_unmapped_area_topdown(&info, is_shader_code,
 				is_same_4gb_page);
diff --git a/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_csf.c b/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_csf.c
index abaa6bb12b9d..7455ce28843d 100644
--- a/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_csf.c
+++ b/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_csf.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include "../mali_kbase_tracepoints.h"
@@ -60,7 +59,7 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev)
 	/* Lock the context list, to ensure no changes to the list are made
 	 * while we're summarizing the contexts and their contents.
 	 */
-	mutex_lock(&kbdev->kctx_list_lock);
+	mutex_lock(&timeline->tl_kctx_list_lock);
 
 	/* Hold the scheduler lock while we emit the current state
 	 * We also need to continue holding the lock until after the first body
@@ -90,7 +89,7 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev)
 	mutex_unlock(&kbdev->csf.scheduler.lock);
 
 	/* For each context in the device... */
-	list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) {
+	list_for_each_entry(kctx, &timeline->tl_kctx_list, tl_kctx_list_node) {
 		size_t i;
 		struct kbase_tlstream *body =
 			&timeline->streams[TL_STREAM_TYPE_OBJ];
@@ -160,9 +159,9 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev)
 		 * this iteration of the loop, so will start to correctly update
 		 * the object model state.
 		 */
-	};
+	}
 
-	mutex_unlock(&kbdev->kctx_list_lock);
+	mutex_unlock(&timeline->tl_kctx_list_lock);
 
 	/* Static object are placed into summary packet that needs to be
 	 * transmitted first. Flush all streams to make it available to
diff --git a/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_jm.c b/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_jm.c
index c368ac7288da..6659d2dc2eb0 100644
--- a/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_jm.c
+++ b/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_jm.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include "../mali_kbase_tracepoints.h"
@@ -66,16 +65,16 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev)
 	/* Lock the context list, to ensure no changes to the list are made
 	 * while we're summarizing the contexts and their contents.
 	 */
-	mutex_lock(&kbdev->kctx_list_lock);
+	mutex_lock(&timeline->tl_kctx_list_lock);
 
 	/* For each context in the device... */
-	list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) {
+	list_for_each_entry(kctx, &timeline->tl_kctx_list, tl_kctx_list_node) {
 		/* Summarize the context itself */
 		__kbase_tlstream_tl_new_ctx(summary,
 				kctx,
 				kctx->id,
 				(u32)(kctx->tgid));
-	};
+	}
 
 	/* Reset body stream buffers while holding the kctx lock.
 	 * This ensures we can't fire both summary and normal tracepoints for
@@ -87,11 +86,11 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev)
 	 */
 	kbase_timeline_streams_body_reset(timeline);
 
-	mutex_unlock(&kbdev->kctx_list_lock);
+	mutex_unlock(&timeline->tl_kctx_list_lock);
 
 	/* Static object are placed into summary packet that needs to be
 	 * transmitted first. Flush all streams to make it available to
 	 * user space.
 	 */
 	kbase_timeline_streams_flush(timeline);
-}
\ No newline at end of file
+}
diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.c
index 8d8834fdcda6..3370343f8020 100644
--- a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.c
+++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include "mali_kbase_timeline.h"
@@ -116,7 +115,7 @@ int kbase_timeline_init(struct kbase_timeline **timeline,
 	if (!timeline || !timeline_flags)
 		return -EINVAL;
 
-	result = kzalloc(sizeof(*result), GFP_KERNEL);
+	result = vzalloc(sizeof(*result));
 	if (!result)
 		return -ENOMEM;
 
@@ -128,6 +127,10 @@ int kbase_timeline_init(struct kbase_timeline **timeline,
 		kbase_tlstream_init(&result->streams[i], i,
 			&result->event_queue);
 
+	/* Initialize the kctx list */
+	mutex_init(&result->tl_kctx_list_lock);
+	INIT_LIST_HEAD(&result->tl_kctx_list);
+
 	/* Initialize autoflush timer. */
 	atomic_set(&result->autoflush_timer_active, 0);
 	kbase_timer_setup(&result->autoflush_timer,
@@ -154,10 +157,12 @@ void kbase_timeline_term(struct kbase_timeline *timeline)
 	kbase_csf_tl_reader_term(&timeline->csf_tl_reader);
 #endif
 
+	WARN_ON(!list_empty(&timeline->tl_kctx_list));
+
 	for (i = (enum tl_stream_type)0; i < TL_STREAM_TYPE_COUNT; i++)
 		kbase_tlstream_term(&timeline->streams[i]);
 
-	kfree(timeline);
+	vfree(timeline);
 }
 
 #ifdef CONFIG_MALI_BIFROST_DEVFREQ
@@ -172,11 +177,7 @@ static void kbase_tlstream_current_devfreq_target(struct kbase_device *kbdev)
 		unsigned long cur_freq = 0;
 
 		mutex_lock(&devfreq->lock);
-#if KERNEL_VERSION(4, 3, 0) > LINUX_VERSION_CODE
-		cur_freq = kbdev->current_nominal_freq;
-#else
 		cur_freq = devfreq->last_status.current_frequency;
-#endif
 		KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(kbdev, (u64)cur_freq);
 		mutex_unlock(&devfreq->lock);
 	}
@@ -185,7 +186,7 @@ static void kbase_tlstream_current_devfreq_target(struct kbase_device *kbdev)
 
 int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags)
 {
-	int ret;
+	int ret = 0;
 	u32 timeline_flags = TLSTREAM_ENABLED | flags;
 	struct kbase_timeline *timeline = kbdev->timeline;
 
@@ -261,19 +262,30 @@ int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags)
 		ret = -EBUSY;
 	}
 
+	if (ret >= 0)
+		timeline->last_acquire_time = ktime_get();
+
 	return ret;
 }
 
-void kbase_timeline_streams_flush(struct kbase_timeline *timeline)
+int kbase_timeline_streams_flush(struct kbase_timeline *timeline)
 {
 	enum tl_stream_type stype;
-
+	bool has_bytes = false;
+	size_t nbytes = 0;
 #if MALI_USE_CSF
-	kbase_csf_tl_reader_flush_buffer(&timeline->csf_tl_reader);
+	int ret = kbase_csf_tl_reader_flush_buffer(&timeline->csf_tl_reader);
+
+	if (ret > 0)
+		has_bytes = true;
 #endif
 
-	for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++)
-		kbase_tlstream_flush_stream(&timeline->streams[stype]);
+	for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) {
+		nbytes = kbase_tlstream_flush_stream(&timeline->streams[stype]);
+		if (nbytes > 0)
+			has_bytes = true;
+	}
+	return has_bytes ? 0 : -EIO;
 }
 
 void kbase_timeline_streams_body_reset(struct kbase_timeline *timeline)
@@ -288,6 +300,74 @@ void kbase_timeline_streams_body_reset(struct kbase_timeline *timeline)
 #endif
 }
 
+void kbase_timeline_pre_kbase_context_destroy(struct kbase_context *kctx)
+{
+	struct kbase_device *const kbdev = kctx->kbdev;
+	struct kbase_timeline *timeline = kbdev->timeline;
+
+	/* Remove the context from the list to ensure we don't try and
+	 * summarize a context that is being destroyed.
+	 *
+	 * It's unsafe to try and summarize a context being destroyed as the
+	 * locks we might normally attempt to acquire, and the data structures
+	 * we would normally attempt to traverse could already be destroyed.
+	 *
+	 * In the case where the tlstream is acquired between this pre destroy
+	 * call and the post destroy call, we will get a context destroy
+	 * tracepoint without the corresponding context create tracepoint,
+	 * but this will not affect the correctness of the object model.
+	 */
+	mutex_lock(&timeline->tl_kctx_list_lock);
+	list_del_init(&kctx->tl_kctx_list_node);
+	mutex_unlock(&timeline->tl_kctx_list_lock);
+}
+
+void kbase_timeline_post_kbase_context_create(struct kbase_context *kctx)
+{
+	struct kbase_device *const kbdev = kctx->kbdev;
+	struct kbase_timeline *timeline = kbdev->timeline;
+
+	/* On context create, add the context to the list to ensure it is
+	 * summarized when timeline is acquired
+	 */
+	mutex_lock(&timeline->tl_kctx_list_lock);
+
+	list_add(&kctx->tl_kctx_list_node, &timeline->tl_kctx_list);
+
+	/* Fire the tracepoints with the lock held to ensure the tracepoints
+	 * are either fired before or after the summarization,
+	 * never in parallel with it. If fired in parallel, we could get
+	 * duplicate creation tracepoints.
+	 */
+#if MALI_USE_CSF
+	KBASE_TLSTREAM_TL_KBASE_NEW_CTX(
+		kbdev, kctx->id, kbdev->gpu_props.props.raw_props.gpu_id);
+#endif
+	/* Trace with the AOM tracepoint even in CSF for dumping */
+	KBASE_TLSTREAM_TL_NEW_CTX(kbdev, kctx, kctx->id, 0);
+
+	mutex_unlock(&timeline->tl_kctx_list_lock);
+}
+
+void kbase_timeline_post_kbase_context_destroy(struct kbase_context *kctx)
+{
+	struct kbase_device *const kbdev = kctx->kbdev;
+
+	/* Trace with the AOM tracepoint even in CSF for dumping */
+	KBASE_TLSTREAM_TL_DEL_CTX(kbdev, kctx);
+#if MALI_USE_CSF
+	KBASE_TLSTREAM_TL_KBASE_DEL_CTX(kbdev, kctx->id);
+#endif
+
+	/* Flush the timeline stream, so the user can see the termination
+	 * tracepoints being fired.
+	 * The "if" statement below is for optimization. It is safe to call
+	 * kbase_timeline_streams_flush when timeline is disabled.
+	 */
+	if (atomic_read(&kbdev->timeline_flags) != 0)
+		kbase_timeline_streams_flush(kbdev->timeline);
+}
+
 #if MALI_UNIT_TEST
 void kbase_timeline_stats(struct kbase_timeline *timeline,
 		u32 *bytes_collected, u32 *bytes_generated)
diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.h
index cd48411b45cf..04653521285d 100644
--- a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.h
+++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #if !defined(_KBASE_TIMELINE_H)
@@ -70,8 +69,10 @@ int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags);
  * @timeline:     Timeline instance
  *
  * Function will flush pending data in all timeline streams.
+ *
+ * Return: Zero on success, errno on failure.
  */
-void kbase_timeline_streams_flush(struct kbase_timeline *timeline);
+int kbase_timeline_streams_flush(struct kbase_timeline *timeline);
 
 /**
  * kbase_timeline_streams_body_reset - reset timeline body streams.
@@ -81,33 +82,31 @@ void kbase_timeline_streams_flush(struct kbase_timeline *timeline);
  */
 void kbase_timeline_streams_body_reset(struct kbase_timeline *timeline);
 
-#if MALI_UNIT_TEST
 /**
- * kbase_timeline_test - start timeline stream data generator
- * @kbdev:     Kernel common context
- * @tpw_count: Number of trace point writers in each context
- * @msg_delay: Time delay in milliseconds between trace points written by one
- *             writer
- * @msg_count: Number of trace points written by one writer
- * @aux_msg:   If non-zero aux messages will be included
- *
- * This test starts a requested number of asynchronous writers in both IRQ and
- * thread context. Each writer will generate required number of test
- * tracepoints (tracepoints with embedded information about writer that
- * should be verified by user space reader). Tracepoints will be emitted in
- * all timeline body streams. If aux_msg is non-zero writer will also
- * generate not testable tracepoints (tracepoints without information about
- * writer). These tracepoints are used to check correctness of remaining
- * timeline message generating functions. Writer will wait requested time
- * between generating another set of messages. This call blocks until all
- * writers finish.
+ * kbase_timeline_post_kbase_context_create - Inform timeline that a new KBase
+ *                                            Context has been created.
+ * @kctx:    KBase Context
  */
-void kbase_timeline_test(
-	struct kbase_device *kbdev,
-	unsigned int tpw_count,
-	unsigned int msg_delay,
-	unsigned int msg_count,
-	int          aux_msg);
+void kbase_timeline_post_kbase_context_create(struct kbase_context *kctx);
+
+/**
+ * kbase_timeline_pre_kbase_context_destroy - Inform timeline that a KBase
+ *                                            Context is about to be destroyed.
+ * @kctx:    KBase Context
+ */
+void kbase_timeline_pre_kbase_context_destroy(struct kbase_context *kctx);
+
+/**
+ * kbase_timeline_post_kbase_context_destroy - Inform timeline that a KBase
+ *                                             Context has been destroyed.
+ * @kctx:    KBase Context
+ *
+ * Should be called immediately before the memory is freed, and the context ID
+ * and kbdev pointer should still be valid.
+ */
+void kbase_timeline_post_kbase_context_destroy(struct kbase_context *kctx);
+
+#if MALI_UNIT_TEST
 
 /**
  * kbase_timeline_stats - read timeline stream statistics
diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c
index 724f5fa23725..e3b6fbc1eaeb 100644
--- a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c
+++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,35 +17,38 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include "mali_kbase_timeline_priv.h"
 #include "mali_kbase_tlstream.h"
 #include "mali_kbase_tracepoints.h"
+#include "mali_kbase_timeline.h"
 
+#include <linux/delay.h>
 #include <linux/poll.h>
 
 /* The timeline stream file operations functions. */
-static ssize_t kbasep_timeline_io_read(
-		struct file *filp,
-		char __user *buffer,
-		size_t      size,
-		loff_t      *f_pos);
-static unsigned int kbasep_timeline_io_poll(struct file *filp, poll_table *wait);
+static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer,
+				       size_t size, loff_t *f_pos);
+static unsigned int kbasep_timeline_io_poll(struct file *filp,
+					    poll_table *wait);
 static int kbasep_timeline_io_release(struct inode *inode, struct file *filp);
+static int kbasep_timeline_io_fsync(struct file *filp, loff_t start, loff_t end,
+				    int datasync);
 
 /* The timeline stream file operations structure. */
 const struct file_operations kbasep_tlstream_fops = {
 	.owner = THIS_MODULE,
 	.release = kbasep_timeline_io_release,
-	.read    = kbasep_timeline_io_read,
-	.poll    = kbasep_timeline_io_poll,
+	.read = kbasep_timeline_io_read,
+	.poll = kbasep_timeline_io_poll,
+	.fsync = kbasep_timeline_io_fsync,
 };
 
 /**
- * kbasep_timeline_io_packet_pending - check timeline streams for pending packets
+ * kbasep_timeline_io_packet_pending - check timeline streams for pending
+ *                                     packets
+ *
  * @timeline:      Timeline instance
  * @ready_stream:  Pointer to variable where stream will be placed
  * @rb_idx_raw:    Pointer to variable where read buffer index will be placed
@@ -56,10 +60,10 @@ const struct file_operations kbasep_tlstream_fops = {
  *
  * Return: non-zero if any of timeline streams has at last one packet ready
  */
-static int kbasep_timeline_io_packet_pending(
-		struct kbase_timeline  *timeline,
-		struct kbase_tlstream **ready_stream,
-		unsigned int           *rb_idx_raw)
+static int
+kbasep_timeline_io_packet_pending(struct kbase_timeline *timeline,
+				  struct kbase_tlstream **ready_stream,
+				  unsigned int *rb_idx_raw)
 {
 	enum tl_stream_type i;
 
@@ -78,27 +82,24 @@ static int kbasep_timeline_io_packet_pending(
 			*ready_stream = stream;
 			return 1;
 		}
-
 	}
 
 	return 0;
 }
 
 /**
- * kbasep_timeline_has_header_data() -
- *	check timeline headers for pending packets
+ * kbasep_timeline_has_header_data() - check timeline headers for pending
+ *                                     packets
  *
  * @timeline:      Timeline instance
  *
  * Return: non-zero if any of timeline headers has at last one packet ready.
  */
-static int kbasep_timeline_has_header_data(
-	struct kbase_timeline *timeline)
+static int kbasep_timeline_has_header_data(struct kbase_timeline *timeline)
 {
-	return timeline->obj_header_btc
-		|| timeline->aux_header_btc
+	return timeline->obj_header_btc || timeline->aux_header_btc
 #if MALI_USE_CSF
-		|| timeline->csf_tl_reader.tl_header.btc
+	       || timeline->csf_tl_reader.tl_header.btc
 #endif
 		;
 }
@@ -116,11 +117,9 @@ static int kbasep_timeline_has_header_data(
  *
  * Returns: 0 if success, -1 otherwise.
  */
-static inline int copy_stream_header(
-	char __user *buffer, size_t size, ssize_t *copy_len,
-	const char *hdr,
-	size_t hdr_size,
-	size_t *hdr_btc)
+static inline int copy_stream_header(char __user *buffer, size_t size,
+				     ssize_t *copy_len, const char *hdr,
+				     size_t hdr_size, size_t *hdr_btc)
 {
 	const size_t offset = hdr_size - *hdr_btc;
 	const size_t copy_size = MIN(size - *copy_len, *hdr_btc);
@@ -142,6 +141,7 @@ static inline int copy_stream_header(
 
 /**
  * kbasep_timeline_copy_header - copy timeline headers to the user
+ *
  * @timeline:    Timeline instance
  * @buffer:      Pointer to the buffer provided by user
  * @size:        Maximum amount of data that can be stored in the buffer
@@ -154,36 +154,30 @@ static inline int copy_stream_header(
  *
  * Returns: 0 if success, -1 if copy_to_user has failed.
  */
-static inline int kbasep_timeline_copy_headers(
-	struct kbase_timeline *timeline,
-	char __user *buffer,
-	size_t size,
-	ssize_t *copy_len)
+static inline int kbasep_timeline_copy_headers(struct kbase_timeline *timeline,
+					       char __user *buffer, size_t size,
+					       ssize_t *copy_len)
 {
-	if (copy_stream_header(buffer, size, copy_len,
-			obj_desc_header,
-			obj_desc_header_size,
-			&timeline->obj_header_btc))
+	if (copy_stream_header(buffer, size, copy_len, obj_desc_header,
+			       obj_desc_header_size, &timeline->obj_header_btc))
 		return -1;
 
-	if (copy_stream_header(buffer, size, copy_len,
-			aux_desc_header,
-			aux_desc_header_size,
-			&timeline->aux_header_btc))
+	if (copy_stream_header(buffer, size, copy_len, aux_desc_header,
+			       aux_desc_header_size, &timeline->aux_header_btc))
 		return -1;
 #if MALI_USE_CSF
 	if (copy_stream_header(buffer, size, copy_len,
-			timeline->csf_tl_reader.tl_header.data,
-			timeline->csf_tl_reader.tl_header.size,
-			&timeline->csf_tl_reader.tl_header.btc))
+			       timeline->csf_tl_reader.tl_header.data,
+			       timeline->csf_tl_reader.tl_header.size,
+			       &timeline->csf_tl_reader.tl_header.btc))
 		return -1;
 #endif
 	return 0;
 }
 
-
 /**
  * kbasep_timeline_io_read - copy data from streams to buffer provided by user
+ *
  * @filp:   Pointer to file structure
  * @buffer: Pointer to the buffer provided by user
  * @size:   Maximum amount of data that can be stored in the buffer
@@ -191,11 +185,8 @@ static inline int kbasep_timeline_copy_headers(
  *
  * Return: number of bytes stored in the buffer
  */
-static ssize_t kbasep_timeline_io_read(
-		struct file *filp,
-		char __user *buffer,
-		size_t      size,
-		loff_t      *f_pos)
+static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer,
+				       size_t size, loff_t *f_pos)
 {
 	ssize_t copy_len = 0;
 	struct kbase_timeline *timeline;
@@ -206,25 +197,25 @@ static ssize_t kbasep_timeline_io_read(
 	if (WARN_ON(!filp->private_data))
 		return -EFAULT;
 
-	timeline = (struct kbase_timeline *) filp->private_data;
+	timeline = (struct kbase_timeline *)filp->private_data;
 
 	if (!buffer)
 		return -EINVAL;
 
-	if ((*f_pos < 0) || (size < PACKET_SIZE))
+	if (*f_pos < 0)
 		return -EINVAL;
 
 	mutex_lock(&timeline->reader_lock);
 
 	while (copy_len < size) {
 		struct kbase_tlstream *stream = NULL;
-		unsigned int        rb_idx_raw = 0;
-		unsigned int        wb_idx_raw;
-		unsigned int        rb_idx;
-		size_t              rb_size;
+		unsigned int rb_idx_raw = 0;
+		unsigned int wb_idx_raw;
+		unsigned int rb_idx;
+		size_t rb_size;
 
-		if (kbasep_timeline_copy_headers(
-			    timeline, buffer, size, &copy_len)) {
+		if (kbasep_timeline_copy_headers(timeline, buffer, size,
+						 &copy_len)) {
 			copy_len = -EFAULT;
 			break;
 		}
@@ -236,17 +227,13 @@ static ssize_t kbasep_timeline_io_read(
 		 */
 		if (copy_len > 0) {
 			if (!kbasep_timeline_io_packet_pending(
-						timeline,
-						&stream,
-						&rb_idx_raw))
+				    timeline, &stream, &rb_idx_raw))
 				break;
 		} else {
 			if (wait_event_interruptible(
-						timeline->event_queue,
-						kbasep_timeline_io_packet_pending(
-							timeline,
-							&stream,
-							&rb_idx_raw))) {
+				    timeline->event_queue,
+				    kbasep_timeline_io_packet_pending(
+					    timeline, &stream, &rb_idx_raw))) {
 				copy_len = -ERESTARTSYS;
 				break;
 			}
@@ -264,10 +251,8 @@ static ssize_t kbasep_timeline_io_read(
 		rb_size = atomic_read(&stream->buffer[rb_idx].size);
 		if (rb_size > size - copy_len)
 			break;
-		if (copy_to_user(
-					&buffer[copy_len],
-					stream->buffer[rb_idx].data,
-					rb_size)) {
+		if (copy_to_user(&buffer[copy_len], stream->buffer[rb_idx].data,
+				 rb_size)) {
 			copy_len = -EFAULT;
 			break;
 		}
@@ -309,7 +294,7 @@ static ssize_t kbasep_timeline_io_read(
 static unsigned int kbasep_timeline_io_poll(struct file *filp, poll_table *wait)
 {
 	struct kbase_tlstream *stream;
-	unsigned int        rb_idx;
+	unsigned int rb_idx;
 	struct kbase_timeline *timeline;
 
 	KBASE_DEBUG_ASSERT(filp);
@@ -318,7 +303,7 @@ static unsigned int kbasep_timeline_io_poll(struct file *filp, poll_table *wait)
 	if (WARN_ON(!filp->private_data))
 		return -EFAULT;
 
-	timeline = (struct kbase_timeline *) filp->private_data;
+	timeline = (struct kbase_timeline *)filp->private_data;
 
 	/* If there are header bytes to copy, read will not block */
 	if (kbasep_timeline_has_header_data(timeline))
@@ -340,6 +325,8 @@ static unsigned int kbasep_timeline_io_poll(struct file *filp, poll_table *wait)
 static int kbasep_timeline_io_release(struct inode *inode, struct file *filp)
 {
 	struct kbase_timeline *timeline;
+	ktime_t elapsed_time;
+	s64 elapsed_time_ms, time_to_sleep;
 
 	KBASE_DEBUG_ASSERT(inode);
 	KBASE_DEBUG_ASSERT(filp);
@@ -347,7 +334,19 @@ static int kbasep_timeline_io_release(struct inode *inode, struct file *filp)
 
 	CSTD_UNUSED(inode);
 
-	timeline = (struct kbase_timeline *) filp->private_data;
+	timeline = (struct kbase_timeline *)filp->private_data;
+
+	/* Get the amount of time passed since the timeline was acquired and ensure
+	 * we sleep for long enough such that it has been at least
+	 * TIMELINE_HYSTERESIS_TIMEOUT_MS amount of time between acquire and release.
+	 * This prevents userspace from spamming acquire and release too quickly.
+	 */
+	elapsed_time = ktime_sub(ktime_get(), timeline->last_acquire_time);
+	elapsed_time_ms = ktime_to_ms(elapsed_time);
+	time_to_sleep = MIN(TIMELINE_HYSTERESIS_TIMEOUT_MS,
+	                    TIMELINE_HYSTERESIS_TIMEOUT_MS - elapsed_time_ms);
+	if (time_to_sleep > 0)
+		msleep(time_to_sleep);
 
 #if MALI_USE_CSF
 	kbase_csf_tl_reader_stop(&timeline->csf_tl_reader);
@@ -360,3 +359,20 @@ static int kbasep_timeline_io_release(struct inode *inode, struct file *filp)
 	atomic_set(timeline->timeline_flags, 0);
 	return 0;
 }
+
+static int kbasep_timeline_io_fsync(struct file *filp, loff_t start, loff_t end,
+				    int datasync)
+{
+	struct kbase_timeline *timeline;
+
+	CSTD_UNUSED(start);
+	CSTD_UNUSED(end);
+	CSTD_UNUSED(datasync);
+
+	if (WARN_ON(!filp->private_data))
+		return -EFAULT;
+
+	timeline = (struct kbase_timeline *)filp->private_data;
+
+	return kbase_timeline_streams_flush(timeline);
+}
diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_priv.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_priv.h
index 35eec467af90..8a58a13a78ea 100644
--- a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_priv.h
+++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_priv.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #if !defined(_KBASE_TIMELINE_PRIV_H)
@@ -35,9 +34,16 @@
 #include <linux/atomic.h>
 #include <linux/mutex.h>
 
+/* The minimum amount of time timeline must be acquired for before release is
+ * allowed, to prevent DoS attacks.
+ */
+#define TIMELINE_HYSTERESIS_TIMEOUT_MS ((s64)500)
+
 /**
  * struct kbase_timeline - timeline state structure
  * @streams:                The timeline streams generated by kernel
+ * @tl_kctx_list:           List of contexts for timeline.
+ * @tl_kctx_list_lock:      Lock to protect @tl_kctx_list.
  * @autoflush_timer:        Autoflush timer
  * @autoflush_timer_active: If non-zero autoflush timer is active
  * @reader_lock:            Reader lock. Only one reader is allowed to
@@ -48,9 +54,13 @@
  *                          otherwise. See kbase_timeline_io_acquire().
  * @obj_header_btc:         Remaining bytes to copy for the object stream header
  * @aux_header_btc:         Remaining bytes to copy for the aux stream header
+ * @last_acquire_time:      The time at which timeline was last acquired.
+ * @csf_tl_reader:          CSFFW timeline reader
  */
 struct kbase_timeline {
 	struct kbase_tlstream streams[TL_STREAM_TYPE_COUNT];
+	struct list_head  tl_kctx_list;
+	struct mutex      tl_kctx_list_lock;
 	struct timer_list autoflush_timer;
 	atomic_t          autoflush_timer_active;
 	struct mutex      reader_lock;
@@ -61,6 +71,7 @@ struct kbase_timeline {
 	atomic_t         *timeline_flags;
 	size_t            obj_header_btc;
 	size_t            aux_header_btc;
+	ktime_t           last_acquire_time;
 #if MALI_USE_CSF
 	struct kbase_csf_tl_reader csf_tl_reader;
 #endif
diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tl_serialize.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_tl_serialize.h
index 3e378279cf2c..f8cad4a6f8f8 100644
--- a/drivers/gpu/arm/bifrost/tl/mali_kbase_tl_serialize.h
+++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tl_serialize.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
  * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
@@ -5,7 +6,7 @@
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #if !defined(_KBASE_TL_SERIALIZE_H)
diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.c
index f4239cfafb9d..202c12f57572 100644
--- a/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.c
+++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #include "mali_kbase_tlstream.h"
@@ -57,20 +56,19 @@ static void kbasep_packet_header_setup(
  * @numbered:   non-zero if the stream is numbered
  *
  * Function updates mutable part of packet header in the given buffer.
- * Note that value of data_size must not including size of the header.
+ * Note that value of data_size must not include size of the header.
  */
 static void kbasep_packet_header_update(
 		char  *buffer,
 		size_t data_size,
 		int    numbered)
 {
-	u32 word0;
 	u32 word1 = MIPE_PACKET_HEADER_W1((u32)data_size, !!numbered);
 
 	KBASE_DEBUG_ASSERT(buffer);
-	CSTD_UNUSED(word0);
 
-	memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1));
+	/* we copy the contents of word1 to its respective position in the buffer */
+	memcpy(&buffer[sizeof(u32)], &word1, sizeof(word1));
 }
 
 /**
@@ -149,12 +147,12 @@ void kbase_tlstream_init(
 	unsigned int i;
 
 	KBASE_DEBUG_ASSERT(stream);
-	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+	KBASE_DEBUG_ASSERT(stream_type < TL_STREAM_TYPE_COUNT);
 
 	spin_lock_init(&stream->lock);
 
 	/* All packets carrying tracepoints shall be numbered. */
-	if (TL_PACKET_TYPE_BODY == tl_stream_cfg[stream_type].pkt_type)
+	if (tl_stream_cfg[stream_type].pkt_type == TL_PACKET_TYPE_BODY)
 		stream->numbered = 1;
 	else
 		stream->numbered = 0;
@@ -217,7 +215,8 @@ static size_t kbasep_tlstream_msgbuf_submit(
 
 	/* Increasing write buffer index will expose this packet to the reader.
 	 * As stream->lock is not taken on reader side we must make sure memory
-	 * is updated correctly before this will happen. */
+	 * is updated correctly before this will happen.
+	 */
 	smp_wmb();
 	atomic_inc(&stream->wbi);
 
@@ -251,7 +250,7 @@ char *kbase_tlstream_msgbuf_acquire(
 	wb_size    = atomic_read(&stream->buffer[wb_idx].size);
 
 	/* Select next buffer if data will not fit into current one. */
-	if (PACKET_SIZE < wb_size + msg_size) {
+	if (wb_size + msg_size > PACKET_SIZE) {
 		wb_size = kbasep_tlstream_msgbuf_submit(
 				stream, wb_idx_raw, wb_size);
 		wb_idx  = (wb_idx_raw + 1) % PACKET_COUNT;
@@ -277,7 +276,7 @@ void kbase_tlstream_msgbuf_release(
 	spin_unlock_irqrestore(&stream->lock, flags);
 }
 
-void kbase_tlstream_flush_stream(
+size_t kbase_tlstream_flush_stream(
 	struct kbase_tlstream *stream)
 {
 	unsigned long    flags;
@@ -286,6 +285,7 @@ void kbase_tlstream_flush_stream(
 	size_t           wb_size;
 	size_t           min_size = PACKET_HEADER_SIZE;
 
+
 	if (stream->numbered)
 		min_size += PACKET_NUMBER_SIZE;
 
@@ -300,7 +300,14 @@ void kbase_tlstream_flush_stream(
 				stream, wb_idx_raw, wb_size);
 		wb_idx = (wb_idx_raw + 1) % PACKET_COUNT;
 		atomic_set(&stream->buffer[wb_idx].size, wb_size);
+	} else {
+		/* we return that there is no bytes to be read.*/
+		/* Timeline io fsync will use this info the decide whether
+		 * fsync should return an error
+		 */
+		wb_size = 0;
 	}
-	spin_unlock_irqrestore(&stream->lock, flags);
-}
 
+	spin_unlock_irqrestore(&stream->lock, flags);
+	return wb_size;
+}
diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.h
index faf88d676b5d..2d3bbc8ccac9 100644
--- a/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.h
+++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #if !defined(_KBASE_TLSTREAM_H)
@@ -44,6 +43,8 @@
  * struct kbase_tlstream - timeline stream structure
  * @lock:              Message order lock
  * @buffer:            Array of buffers
+ * @buffer.size:       Number of bytes in buffer
+ * @buffer.data:       Buffer's data
  * @wbi:               Write buffer index
  * @rbi:               Read buffer index
  * @numbered:          If non-zero stream's packets are sequentially numbered
@@ -76,8 +77,8 @@ struct kbase_tlstream {
 	spinlock_t lock;
 
 	struct {
-		atomic_t size;              /* number of bytes in buffer */
-		char     data[PACKET_SIZE]; /* buffer's data */
+		atomic_t size;
+		char data[PACKET_SIZE];
 	} buffer[PACKET_COUNT];
 
 	atomic_t wbi;
@@ -162,8 +163,10 @@ void kbase_tlstream_msgbuf_release(struct kbase_tlstream *stream,
  * @stream:     Pointer to the stream structure
  *
  * Flush pending data in the timeline stream.
+ *
+ * Return: Number of bytes available flushed and available to be read
+ *
  */
-void kbase_tlstream_flush_stream(struct kbase_tlstream *stream);
+size_t kbase_tlstream_flush_stream(struct kbase_tlstream *stream);
 
 #endif /* _KBASE_TLSTREAM_H */
-
diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c
index de76fa57051e..ece23b318b31 100644
--- a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c
+++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
@@ -70,6 +69,7 @@ enum tl_msg_id_obj {
 	KBASE_TL_ARBITER_STARTED,
 	KBASE_TL_ARBITER_STOP_REQUESTED,
 	KBASE_TL_ARBITER_STOPPED,
+	KBASE_TL_ARBITER_REQUESTED,
 	KBASE_JD_GPU_SOFT_RESET,
 	KBASE_TL_KBASE_NEW_DEVICE,
 	KBASE_TL_KBASE_DEVICE_PROGRAM_CSG,
@@ -87,6 +87,8 @@ enum tl_msg_id_obj {
 	KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT,
 	KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT,
 	KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE,
+	KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER,
+	KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND,
 	KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC,
 	KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC,
 	KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC,
@@ -114,7 +116,9 @@ enum tl_msg_id_obj {
 	KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END,
 	KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END,
 	KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END,
-	KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER,
+	KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER,
+	KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START,
+	KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END,
 	KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW,
 	KBASE_TL_KBASE_CSFFW_RESET,
 	KBASE_OBJ_MSG_COUNT,
@@ -131,6 +135,7 @@ enum tl_msg_id_aux {
 	KBASE_AUX_PROTECTED_LEAVE_START,
 	KBASE_AUX_PROTECTED_LEAVE_END,
 	KBASE_AUX_JIT_STATS,
+	KBASE_AUX_TILER_HEAP_STATS,
 	KBASE_AUX_EVENT_JOB_SLOT,
 	KBASE_AUX_MSG_COUNT,
 };
@@ -284,6 +289,10 @@ enum tl_msg_id_aux {
 		"Driver has stopped using gpu", \
 		"@p", \
 		"gpu") \
+	TRACEPOINT_DESC(KBASE_TL_ARBITER_REQUESTED, \
+		"Driver has requested the arbiter for gpu access", \
+		"@p", \
+		"gpu") \
 	TRACEPOINT_DESC(KBASE_JD_GPU_SOFT_RESET, \
 		"gpu soft reset", \
 		"@p", \
@@ -334,8 +343,8 @@ enum tl_msg_id_aux {
 		"kcpu_queue,fence") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT, \
 		"KCPU Queue enqueues Wait on Cross Queue Sync Object", \
-		"@pLI", \
-		"kcpu_queue,cqs_obj_gpu_addr,cqs_obj_compare_value") \
+		"@pLII", \
+		"kcpu_queue,cqs_obj_gpu_addr,cqs_obj_compare_value,cqs_obj_inherit_error") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET, \
 		"KCPU Queue enqueues Set on Cross Queue Sync Object", \
 		"@pL", \
@@ -352,6 +361,14 @@ enum tl_msg_id_aux {
 		"KCPU Queue enqueues Unmap Import ignoring reference count", \
 		"@pL", \
 		"kcpu_queue,map_import_buf_gpu_addr") \
+	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER, \
+		"KCPU Queue enqueues Error Barrier", \
+		"@p", \
+		"kcpu_queue") \
+	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND, \
+		"KCPU Queue enqueues Group Suspend", \
+		"@ppI", \
+		"kcpu_queue,group_suspend_buf,gpu_cmdq_grp_handle") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \
 		"Begin array of KCPU Queue enqueues JIT Alloc", \
 		"@p", \
@@ -382,52 +399,52 @@ enum tl_msg_id_aux {
 		"kcpu_queue") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END, \
 		"KCPU Queue ends a Signal on Fence", \
-		"@p", \
-		"kcpu_queue") \
+		"@pI", \
+		"kcpu_queue,execute_error") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START, \
 		"KCPU Queue starts a Wait on Fence", \
 		"@p", \
 		"kcpu_queue") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END, \
 		"KCPU Queue ends a Wait on Fence", \
-		"@p", \
-		"kcpu_queue") \
+		"@pI", \
+		"kcpu_queue,execute_error") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START, \
 		"KCPU Queue starts a Wait on an array of Cross Queue Sync Objects", \
 		"@p", \
 		"kcpu_queue") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END, \
 		"KCPU Queue ends a Wait on an array of Cross Queue Sync Objects", \
-		"@p", \
-		"kcpu_queue") \
+		"@pI", \
+		"kcpu_queue,execute_error") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET, \
 		"KCPU Queue executes a Set on an array of Cross Queue Sync Objects", \
-		"@p", \
-		"kcpu_queue") \
+		"@pI", \
+		"kcpu_queue,execute_error") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START, \
 		"KCPU Queue starts a Map Import", \
 		"@p", \
 		"kcpu_queue") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END, \
 		"KCPU Queue ends a Map Import", \
-		"@p", \
-		"kcpu_queue") \
+		"@pI", \
+		"kcpu_queue,execute_error") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START, \
 		"KCPU Queue starts an Unmap Import", \
 		"@p", \
 		"kcpu_queue") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END, \
 		"KCPU Queue ends an Unmap Import", \
-		"@p", \
-		"kcpu_queue") \
+		"@pI", \
+		"kcpu_queue,execute_error") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START, \
 		"KCPU Queue starts an Unmap Import ignoring reference count", \
 		"@p", \
 		"kcpu_queue") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END, \
 		"KCPU Queue ends an Unmap Import ignoring reference count", \
-		"@p", \
-		"kcpu_queue") \
+		"@pI", \
+		"kcpu_queue,execute_error") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START, \
 		"KCPU Queue starts an array of JIT Allocs", \
 		"@p", \
@@ -438,8 +455,8 @@ enum tl_msg_id_aux {
 		"kcpu_queue") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \
 		"Array item of KCPU Queue ends an array of JIT Allocs", \
-		"@pLL", \
-		"kcpu_queue,jit_alloc_gpu_alloc_addr,jit_alloc_mmu_flags") \
+		"@pILL", \
+		"kcpu_queue,execute_error,jit_alloc_gpu_alloc_addr,jit_alloc_mmu_flags") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \
 		"End array of KCPU Queue ends an array of JIT Allocs", \
 		"@p", \
@@ -454,16 +471,24 @@ enum tl_msg_id_aux {
 		"kcpu_queue") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END, \
 		"Array item of KCPU Queue ends an array of JIT Frees", \
-		"@pL", \
-		"kcpu_queue,jit_free_pages_used") \
+		"@pIL", \
+		"kcpu_queue,execute_error,jit_free_pages_used") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END, \
 		"End array of KCPU Queue ends an array of JIT Frees", \
 		"@p", \
 		"kcpu_queue") \
-	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER, \
+	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER, \
 		"KCPU Queue executes an Error Barrier", \
 		"@p", \
 		"kcpu_queue") \
+	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START, \
+		"KCPU Queue starts a group suspend", \
+		"@p", \
+		"kcpu_queue") \
+	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END, \
+		"KCPU Queue ends a group suspend", \
+		"@pI", \
+		"kcpu_queue,execute_error") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW, \
 		"An overflow has happened with the CSFFW Timeline stream", \
 		"@LL", \
@@ -521,6 +546,10 @@ const size_t  obj_desc_header_size = sizeof(__obj_desc_header);
 		"per-bin JIT statistics", \
 		"@IIIIII", \
 		"ctx_nr,bid,max_allocs,allocs,va_pages,ph_pages") \
+	TRACEPOINT_DESC(KBASE_AUX_TILER_HEAP_STATS, \
+		"Tiler Heap statistics", \
+		"@ILIIIIIII", \
+		"ctx_nr,heap_id,va_pages,ph_pages,max_chunks,chunk_size,chunk_count,target_in_flight,nr_in_flight") \
 	TRACEPOINT_DESC(KBASE_AUX_EVENT_JOB_SLOT, \
 		"event on a given job slot", \
 		"@pIII", \
@@ -1541,6 +1570,28 @@ void __kbase_tlstream_tl_arbiter_stopped(
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
+void __kbase_tlstream_tl_arbiter_requested(
+	struct kbase_tlstream *stream,
+	const void *gpu)
+{
+	const u32 msg_id = KBASE_TL_ARBITER_REQUESTED;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(gpu)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &gpu, sizeof(gpu));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
 void __kbase_tlstream_jd_gpu_soft_reset(
 	struct kbase_tlstream *stream,
 	const void *gpu)
@@ -1797,6 +1848,60 @@ void __kbase_tlstream_aux_jit_stats(
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
+void __kbase_tlstream_aux_tiler_heap_stats(
+	struct kbase_tlstream *stream,
+	u32 ctx_nr,
+	u64 heap_id,
+	u32 va_pages,
+	u32 ph_pages,
+	u32 max_chunks,
+	u32 chunk_size,
+	u32 chunk_count,
+	u32 target_in_flight,
+	u32 nr_in_flight)
+{
+	const u32 msg_id = KBASE_AUX_TILER_HEAP_STATS;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(ctx_nr)
+		+ sizeof(heap_id)
+		+ sizeof(va_pages)
+		+ sizeof(ph_pages)
+		+ sizeof(max_chunks)
+		+ sizeof(chunk_size)
+		+ sizeof(chunk_count)
+		+ sizeof(target_in_flight)
+		+ sizeof(nr_in_flight)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &heap_id, sizeof(heap_id));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &va_pages, sizeof(va_pages));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ph_pages, sizeof(ph_pages));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &max_chunks, sizeof(max_chunks));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &chunk_size, sizeof(chunk_size));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &chunk_count, sizeof(chunk_count));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &target_in_flight, sizeof(target_in_flight));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &nr_in_flight, sizeof(nr_in_flight));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
 void __kbase_tlstream_aux_event_job_slot(
 	struct kbase_tlstream *stream,
 	const void *ctx,
@@ -2125,13 +2230,15 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
 	u64 cqs_obj_gpu_addr,
-	u32 cqs_obj_compare_value)
+	u32 cqs_obj_compare_value,
+	u32 cqs_obj_inherit_error)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
 		+ sizeof(kcpu_queue)
 		+ sizeof(cqs_obj_gpu_addr)
 		+ sizeof(cqs_obj_compare_value)
+		+ sizeof(cqs_obj_inherit_error)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -2147,6 +2254,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait(
 		pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr));
 	pos = kbasep_serialize_bytes(buffer,
 		pos, &cqs_obj_compare_value, sizeof(cqs_obj_compare_value));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &cqs_obj_inherit_error, sizeof(cqs_obj_inherit_error));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
@@ -2255,6 +2364,58 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force(
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	const void *group_suspend_buf,
+	u32 gpu_cmdq_grp_handle)
+{
+	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(group_suspend_buf)
+		+ sizeof(gpu_cmdq_grp_handle)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &group_suspend_buf, sizeof(group_suspend_buf));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &gpu_cmdq_grp_handle, sizeof(gpu_cmdq_grp_handle));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
 void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_alloc(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue)
@@ -2451,11 +2612,13 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_start(
 
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_end(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue,
+	u32 execute_error)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
 		+ sizeof(kcpu_queue)
+		+ sizeof(execute_error)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -2467,6 +2630,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_end(
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
 		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &execute_error, sizeof(execute_error));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
@@ -2495,11 +2660,13 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_start(
 
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_end(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue,
+	u32 execute_error)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
 		+ sizeof(kcpu_queue)
+		+ sizeof(execute_error)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -2511,6 +2678,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_end(
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
 		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &execute_error, sizeof(execute_error));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
@@ -2539,11 +2708,13 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_start(
 
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_end(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue,
+	u32 execute_error)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
 		+ sizeof(kcpu_queue)
+		+ sizeof(execute_error)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -2555,17 +2726,21 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_end(
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
 		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &execute_error, sizeof(execute_error));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue,
+	u32 execute_error)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
 		+ sizeof(kcpu_queue)
+		+ sizeof(execute_error)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -2577,6 +2752,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set(
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
 		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &execute_error, sizeof(execute_error));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
@@ -2605,11 +2782,13 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start(
 
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_end(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue,
+	u32 execute_error)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
 		+ sizeof(kcpu_queue)
+		+ sizeof(execute_error)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -2621,6 +2800,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_end(
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
 		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &execute_error, sizeof(execute_error));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
@@ -2649,11 +2830,13 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_start(
 
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_end(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue,
+	u32 execute_error)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
 		+ sizeof(kcpu_queue)
+		+ sizeof(execute_error)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -2665,6 +2848,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_end(
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
 		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &execute_error, sizeof(execute_error));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
@@ -2693,11 +2878,13 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_start(
 
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_end(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue,
+	u32 execute_error)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
 		+ sizeof(kcpu_queue)
+		+ sizeof(execute_error)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -2709,6 +2896,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_end(
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
 		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &execute_error, sizeof(execute_error));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
@@ -2760,12 +2949,14 @@ void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_alloc_end(
 void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_alloc_end(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
+	u32 execute_error,
 	u64 jit_alloc_gpu_alloc_addr,
 	u64 jit_alloc_mmu_flags)
 {
 	const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
 		+ sizeof(kcpu_queue)
+		+ sizeof(execute_error)
 		+ sizeof(jit_alloc_gpu_alloc_addr)
 		+ sizeof(jit_alloc_mmu_flags)
 		;
@@ -2779,6 +2970,8 @@ void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_alloc_end(
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
 		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &execute_error, sizeof(execute_error));
 	pos = kbasep_serialize_bytes(buffer,
 		pos, &jit_alloc_gpu_alloc_addr, sizeof(jit_alloc_gpu_alloc_addr));
 	pos = kbasep_serialize_bytes(buffer,
@@ -2856,11 +3049,13 @@ void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_free_end(
 void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_free_end(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
+	u32 execute_error,
 	u64 jit_free_pages_used)
 {
 	const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
 		+ sizeof(kcpu_queue)
+		+ sizeof(execute_error)
 		+ sizeof(jit_free_pages_used)
 		;
 	char *buffer;
@@ -2873,6 +3068,8 @@ void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_free_end(
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
 		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &execute_error, sizeof(execute_error));
 	pos = kbasep_serialize_bytes(buffer,
 		pos, &jit_free_pages_used, sizeof(jit_free_pages_used));
 
@@ -2901,11 +3098,11 @@ void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_free_end(
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
-void __kbase_tlstream_tl_kbase_kcpuqueue_execute_errorbarrier(
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_error_barrier(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue)
 {
-	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER;
+	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
 		+ sizeof(kcpu_queue)
 		;
@@ -2923,6 +3120,54 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_errorbarrier(
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u32 execute_error)
+{
+	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(execute_error)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &execute_error, sizeof(execute_error));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
 void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow(
 	struct kbase_tlstream *stream,
 	u64 csffw_timestamp,
diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h
index 5651f0a0fc57..7a8164b06291 100644
--- a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h
+++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
@@ -238,6 +237,9 @@ void __kbase_tlstream_tl_arbiter_stop_requested(
 void __kbase_tlstream_tl_arbiter_stopped(
 	struct kbase_tlstream *stream,
 	const void *gpu);
+void __kbase_tlstream_tl_arbiter_requested(
+	struct kbase_tlstream *stream,
+	const void *gpu);
 void __kbase_tlstream_jd_gpu_soft_reset(
 	struct kbase_tlstream *stream,
 	const void *gpu);
@@ -277,6 +279,17 @@ void __kbase_tlstream_aux_jit_stats(
 	u32 allocs,
 	u32 va_pages,
 	u32 ph_pages);
+void __kbase_tlstream_aux_tiler_heap_stats(
+	struct kbase_tlstream *stream,
+	u32 ctx_nr,
+	u64 heap_id,
+	u32 va_pages,
+	u32 ph_pages,
+	u32 max_chunks,
+	u32 chunk_size,
+	u32 chunk_count,
+	u32 target_in_flight,
+	u32 nr_in_flight);
 void __kbase_tlstream_aux_event_job_slot(
 	struct kbase_tlstream *stream,
 	const void *ctx,
@@ -332,7 +345,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
 	u64 cqs_obj_gpu_addr,
-	u32 cqs_obj_compare_value);
+	u32 cqs_obj_compare_value,
+	u32 cqs_obj_inherit_error);
 void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
@@ -349,6 +363,14 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
 	u64 map_import_buf_gpu_addr);
+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	const void *group_suspend_buf,
+	u32 gpu_cmdq_grp_handle);
 void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_alloc(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue);
@@ -382,40 +404,47 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_start(
 	const void *kcpu_queue);
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_end(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue,
+	u32 execute_error);
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_start(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue);
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_end(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue,
+	u32 execute_error);
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_start(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue);
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_end(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue,
+	u32 execute_error);
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue,
+	u32 execute_error);
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue);
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_end(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue,
+	u32 execute_error);
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_start(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue);
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_end(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue,
+	u32 execute_error);
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_start(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue);
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_end(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue,
+	u32 execute_error);
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_alloc_start(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue);
@@ -425,6 +454,7 @@ void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_alloc_end(
 void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_alloc_end(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
+	u32 execute_error,
 	u64 jit_alloc_gpu_alloc_addr,
 	u64 jit_alloc_mmu_flags);
 void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_alloc_end(
@@ -439,13 +469,21 @@ void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_free_end(
 void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_free_end(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
+	u32 execute_error,
 	u64 jit_free_pages_used);
 void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_free_end(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue);
-void __kbase_tlstream_tl_kbase_kcpuqueue_execute_errorbarrier(
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_error_barrier(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue);
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u32 execute_error);
 void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow(
 	struct kbase_tlstream *stream,
 	u64 csffw_timestamp,
@@ -1265,6 +1303,25 @@ struct kbase_tlstream;
 				gpu);	\
 	} while (0)
 
+/**
+ * KBASE_TLSTREAM_TL_ARBITER_REQUESTED -
+ *   Driver has requested the arbiter for gpu access
+ *
+ * @kbdev: Kbase device
+ * @gpu: Name of the GPU object
+ */
+#define KBASE_TLSTREAM_TL_ARBITER_REQUESTED(	\
+	kbdev,	\
+	gpu	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_flags);	\
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_arbiter_requested(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				gpu);	\
+	} while (0)
+
 /**
  * KBASE_TLSTREAM_JD_GPU_SOFT_RESET -
  *   gpu soft reset
@@ -1474,6 +1531,42 @@ struct kbase_tlstream;
 				ctx_nr, bid, max_allocs, allocs, va_pages, ph_pages);	\
 	} while (0)
 
+/**
+ * KBASE_TLSTREAM_AUX_TILER_HEAP_STATS -
+ *   Tiler Heap statistics
+ *
+ * @kbdev: Kbase device
+ * @ctx_nr: Kernel context number
+ * @heap_id: Unique id used to represent a heap under a context
+ * @va_pages: Number of virtual pages allocated in this bin
+ * @ph_pages: Number of physical pages allocated in this bin
+ * @max_chunks: The maximum number of chunks that the heap should be allowed to use
+ * @chunk_size: Size of each chunk in tiler heap, in bytes
+ * @chunk_count: The number of chunks currently allocated in the tiler heap
+ * @target_in_flight: Number of render-passes that the driver should attempt
+ * to keep in flight for which allocation of new chunks is allowed
+ * @nr_in_flight: Number of render-passes that are in flight
+ */
+#define KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(	\
+	kbdev,	\
+	ctx_nr,	\
+	heap_id,	\
+	va_pages,	\
+	ph_pages,	\
+	max_chunks,	\
+	chunk_size,	\
+	chunk_count,	\
+	target_in_flight,	\
+	nr_in_flight	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_flags);	\
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_aux_tiler_heap_stats(	\
+				__TL_DISPATCH_STREAM(kbdev, aux),	\
+				ctx_nr, heap_id, va_pages, ph_pages, max_chunks, chunk_size, chunk_count, target_in_flight, nr_in_flight);	\
+	} while (0)
+
 /**
  * KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT -
  *   event on a given job slot
@@ -1842,27 +1935,30 @@ struct kbase_tlstream;
  * @cqs_obj_gpu_addr: CQS Object GPU ptr
  * @cqs_obj_compare_value: Semaphore value that should be exceeded
  * for the WAIT to pass
+ * @cqs_obj_inherit_error: Indicates the error state should be inherited into the queue or not
  */
 #if MALI_USE_CSF
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT(	\
 	kbdev,	\
 	kcpu_queue,	\
 	cqs_obj_gpu_addr,	\
-	cqs_obj_compare_value	\
+	cqs_obj_compare_value,	\
+	cqs_obj_inherit_error	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue, cqs_obj_gpu_addr, cqs_obj_compare_value);	\
+				kcpu_queue, cqs_obj_gpu_addr, cqs_obj_compare_value, cqs_obj_inherit_error);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT(	\
 	kbdev,	\
 	kcpu_queue,	\
 	cqs_obj_gpu_addr,	\
-	cqs_obj_compare_value	\
+	cqs_obj_compare_value,	\
+	cqs_obj_inherit_error	\
 	)	\
 	do { } while (0)
 #endif /* MALI_USE_CSF */
@@ -1987,6 +2083,66 @@ struct kbase_tlstream;
 	do { } while (0)
 #endif /* MALI_USE_CSF */
 
+/**
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER -
+ *   KCPU Queue enqueues Error Barrier
+ *
+ * @kbdev: Kbase device
+ * @kcpu_queue: KCPU queue
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_flags);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
+			__kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				kcpu_queue);	\
+	} while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND -
+ *   KCPU Queue enqueues Group Suspend
+ *
+ * @kbdev: Kbase device
+ * @kcpu_queue: KCPU queue
+ * @group_suspend_buf: Pointer to the suspend buffer structure
+ * @gpu_cmdq_grp_handle: GPU Command Queue Group handle which will match userspace
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND(	\
+	kbdev,	\
+	kcpu_queue,	\
+	group_suspend_buf,	\
+	gpu_cmdq_grp_handle	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_flags);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
+			__kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				kcpu_queue, group_suspend_buf, gpu_cmdq_grp_handle);	\
+	} while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND(	\
+	kbdev,	\
+	kcpu_queue,	\
+	group_suspend_buf,	\
+	gpu_cmdq_grp_handle	\
+	)	\
+	do { } while (0)
+#endif /* MALI_USE_CSF */
+
 /**
  * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC -
  *   Begin array of KCPU Queue enqueues JIT Alloc
@@ -2223,23 +2379,26 @@ struct kbase_tlstream;
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero
  */
 #if MALI_USE_CSF
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END(	\
 	kbdev,	\
-	kcpu_queue	\
+	kcpu_queue,	\
+	execute_error	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_end(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue, execute_error);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END(	\
 	kbdev,	\
-	kcpu_queue	\
+	kcpu_queue,	\
+	execute_error	\
 	)	\
 	do { } while (0)
 #endif /* MALI_USE_CSF */
@@ -2277,23 +2436,26 @@ struct kbase_tlstream;
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero
  */
 #if MALI_USE_CSF
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END(	\
 	kbdev,	\
-	kcpu_queue	\
+	kcpu_queue,	\
+	execute_error	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_end(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue, execute_error);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END(	\
 	kbdev,	\
-	kcpu_queue	\
+	kcpu_queue,	\
+	execute_error	\
 	)	\
 	do { } while (0)
 #endif /* MALI_USE_CSF */
@@ -2331,23 +2493,26 @@ struct kbase_tlstream;
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero
  */
 #if MALI_USE_CSF
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END(	\
 	kbdev,	\
-	kcpu_queue	\
+	kcpu_queue,	\
+	execute_error	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_end(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue, execute_error);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END(	\
 	kbdev,	\
-	kcpu_queue	\
+	kcpu_queue,	\
+	execute_error	\
 	)	\
 	do { } while (0)
 #endif /* MALI_USE_CSF */
@@ -2358,23 +2523,26 @@ struct kbase_tlstream;
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero
  */
 #if MALI_USE_CSF
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET(	\
 	kbdev,	\
-	kcpu_queue	\
+	kcpu_queue,	\
+	execute_error	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue, execute_error);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET(	\
 	kbdev,	\
-	kcpu_queue	\
+	kcpu_queue,	\
+	execute_error	\
 	)	\
 	do { } while (0)
 #endif /* MALI_USE_CSF */
@@ -2412,23 +2580,26 @@ struct kbase_tlstream;
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero
  */
 #if MALI_USE_CSF
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END(	\
 	kbdev,	\
-	kcpu_queue	\
+	kcpu_queue,	\
+	execute_error	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_end(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue, execute_error);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END(	\
 	kbdev,	\
-	kcpu_queue	\
+	kcpu_queue,	\
+	execute_error	\
 	)	\
 	do { } while (0)
 #endif /* MALI_USE_CSF */
@@ -2466,23 +2637,26 @@ struct kbase_tlstream;
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero
  */
 #if MALI_USE_CSF
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END(	\
 	kbdev,	\
-	kcpu_queue	\
+	kcpu_queue,	\
+	execute_error	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_end(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue, execute_error);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END(	\
 	kbdev,	\
-	kcpu_queue	\
+	kcpu_queue,	\
+	execute_error	\
 	)	\
 	do { } while (0)
 #endif /* MALI_USE_CSF */
@@ -2520,23 +2694,26 @@ struct kbase_tlstream;
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero
  */
 #if MALI_USE_CSF
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END(	\
 	kbdev,	\
-	kcpu_queue	\
+	kcpu_queue,	\
+	execute_error	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_end(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue, execute_error);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END(	\
 	kbdev,	\
-	kcpu_queue	\
+	kcpu_queue,	\
+	execute_error	\
 	)	\
 	do { } while (0)
 #endif /* MALI_USE_CSF */
@@ -2601,6 +2778,7 @@ struct kbase_tlstream;
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero
  * @jit_alloc_gpu_alloc_addr: The JIT allocated GPU virtual address
  * @jit_alloc_mmu_flags: The MMU flags for the JIT allocation
  */
@@ -2608,6 +2786,7 @@ struct kbase_tlstream;
 #define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(	\
 	kbdev,	\
 	kcpu_queue,	\
+	execute_error,	\
 	jit_alloc_gpu_alloc_addr,	\
 	jit_alloc_mmu_flags	\
 	)	\
@@ -2616,12 +2795,13 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_alloc_end(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue, jit_alloc_gpu_alloc_addr, jit_alloc_mmu_flags);	\
+				kcpu_queue, execute_error, jit_alloc_gpu_alloc_addr, jit_alloc_mmu_flags);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(	\
 	kbdev,	\
 	kcpu_queue,	\
+	execute_error,	\
 	jit_alloc_gpu_alloc_addr,	\
 	jit_alloc_mmu_flags	\
 	)	\
@@ -2715,6 +2895,7 @@ struct kbase_tlstream;
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero
  * @jit_free_pages_used: The actual number of pages used by the JIT
  * allocation
  */
@@ -2722,6 +2903,7 @@ struct kbase_tlstream;
 #define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END(	\
 	kbdev,	\
 	kcpu_queue,	\
+	execute_error,	\
 	jit_free_pages_used	\
 	)	\
 	do {	\
@@ -2729,12 +2911,13 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_free_end(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue, jit_free_pages_used);	\
+				kcpu_queue, execute_error, jit_free_pages_used);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END(	\
 	kbdev,	\
 	kcpu_queue,	\
+	execute_error,	\
 	jit_free_pages_used	\
 	)	\
 	do { } while (0)
@@ -2768,32 +2951,89 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER -
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER -
  *   KCPU Queue executes an Error Barrier
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
  */
 #if MALI_USE_CSF
-#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER(	\
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER(	\
 	kbdev,	\
 	kcpu_queue	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
-			__kbase_tlstream_tl_kbase_kcpuqueue_execute_errorbarrier(	\
+			__kbase_tlstream_tl_kbase_kcpuqueue_execute_error_barrier(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
 				kcpu_queue);	\
 	} while (0)
 #else
-#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER(	\
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER(	\
 	kbdev,	\
 	kcpu_queue	\
 	)	\
 	do { } while (0)
 #endif /* MALI_USE_CSF */
 
+/**
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START -
+ *   KCPU Queue starts a group suspend
+ *
+ * @kbdev: Kbase device
+ * @kcpu_queue: KCPU queue
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_flags);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
+			__kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_start(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				kcpu_queue);	\
+	} while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END -
+ *   KCPU Queue ends a group suspend
+ *
+ * @kbdev: Kbase device
+ * @kcpu_queue: KCPU queue
+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END(	\
+	kbdev,	\
+	kcpu_queue,	\
+	execute_error	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_flags);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
+			__kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				kcpu_queue, execute_error);	\
+	} while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END(	\
+	kbdev,	\
+	kcpu_queue,	\
+	execute_error	\
+	)	\
+	do { } while (0)
+#endif /* MALI_USE_CSF */
+
 /**
  * KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW -
  *   An overflow has happened with the CSFFW Timeline stream
diff --git a/include/linux/memory_group_manager.h b/include/linux/memory_group_manager.h
index b1ac253d9e15..2045840e1742 100644
--- a/include/linux/memory_group_manager.h
+++ b/include/linux/memory_group_manager.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 #ifndef _MEMORY_GROUP_MANAGER_H_
diff --git a/include/linux/priority_control_manager.h b/include/linux/priority_control_manager.h
new file mode 100644
index 000000000000..df3b3cd07a14
--- /dev/null
+++ b/include/linux/priority_control_manager.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _PRIORITY_CONTROL_MANAGER_H_
+#define _PRIORITY_CONTROL_MANAGER_H_
+
+#include <linux/mm.h>
+#include <linux/of.h>
+#include <linux/version.h>
+
+struct priority_control_manager_device;
+
+/**
+ * struct priority_control_manager_ops - Callbacks for priority control manager operations
+ *
+ * @pcm_scheduler_priority_check: Callback to check if scheduling priority level can be requested
+ */
+struct priority_control_manager_ops {
+	/**
+	 * pcm_scheduler_priority_check: This function can be used to check what priority its work
+	 *                               would be treated as based on the requested_priority value.
+	 *
+	 * @pcm_dev:                     The priority control manager through which the request is
+	 *                               being made.
+	 * @task:                        The task struct of the process requesting the priority check.
+	 * @requested_priority:          The priority level being requested.
+	 *
+	 * The returned value will be:
+	 *   The same as requested_priority if the process has permission to use requested_priority
+	 *   A lower priority value if the process does not have permission to use requested_priority
+	 *
+	 * requested_priority has the following value range:
+	 *   0-3 : Priority level, 0 being highest and 3 being lowest
+	 *
+	 * Return: The priority that would actually be given, could be lower than requested_priority
+	 */
+	int (*pcm_scheduler_priority_check)(
+		struct priority_control_manager_device *pcm_dev,
+		struct task_struct *task, int requested_priority);
+};
+
+/**
+ * struct priority_control_manager_device - Device structure for priority
+ *                                          control manager
+ *
+ * @ops:   Callbacks associated with this device
+ * @data:  Pointer to device private data
+ * @owner: Pointer to the module owner
+ *
+ * This structure should be registered with the platform device using
+ * platform_set_drvdata().
+ */
+struct priority_control_manager_device {
+	struct priority_control_manager_ops ops;
+	void *data;
+	struct module *owner;
+};
+
+#endif /* _PRIORITY_CONTROL_MANAGER_H_ */
diff --git a/include/linux/protected_memory_allocator.h b/include/linux/protected_memory_allocator.h
new file mode 100644
index 000000000000..1ccb403cc0d6
--- /dev/null
+++ b/include/linux/protected_memory_allocator.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _PROTECTED_MEMORY_ALLOCATOR_H_
+#define _PROTECTED_MEMORY_ALLOCATOR_H_
+
+#include <linux/mm.h>
+
+/**
+ * struct protected_memory_allocation - Protected memory allocation
+ *
+ * @pa:    Physical address of the protected memory allocation.
+ * @order: Size of memory allocation in pages, as a base-2 logarithm.
+ */
+struct protected_memory_allocation {
+	phys_addr_t pa;
+	unsigned int order;
+};
+
+struct protected_memory_allocator_device;
+
+/**
+ * struct protected_memory_allocator_ops - Callbacks for protected memory
+ *                                         allocator operations
+ *
+ * @pma_alloc_page:    Callback to allocate protected memory
+ * @pma_get_phys_addr: Callback to get the physical address of an allocation
+ * @pma_free_page:     Callback to free protected memory
+ */
+struct protected_memory_allocator_ops {
+	/**
+	 * pma_alloc_page - Allocate protected memory pages
+	 *
+	 * @pma_dev: The protected memory allocator the request is being made
+	 *           through.
+	 * @order:   How many pages to allocate, as a base-2 logarithm.
+	 *
+	 * Return: Pointer to allocated memory, or NULL if allocation failed.
+	 */
+	struct protected_memory_allocation *(*pma_alloc_page)(
+		struct protected_memory_allocator_device *pma_dev,
+		unsigned int order);
+
+	/**
+	 * pma_get_phys_addr - Get the physical address of the protected memory
+	 *                     allocation
+	 *
+	 * @pma_dev: The protected memory allocator the request is being made
+	 *           through.
+	 * @pma:     The protected memory allocation whose physical address
+	 *           shall be retrieved
+	 *
+	 * Return: The physical address of the given allocation.
+	 */
+	phys_addr_t (*pma_get_phys_addr)(
+		struct protected_memory_allocator_device *pma_dev,
+		struct protected_memory_allocation *pma);
+
+	/**
+	 * pma_free_page - Free a page of memory
+	 *
+	 * @pma_dev: The protected memory allocator the request is being made
+	 *           through.
+	 * @pma:     The protected memory allocation to free.
+	 */
+	void (*pma_free_page)(
+		struct protected_memory_allocator_device *pma_dev,
+		struct protected_memory_allocation *pma);
+};
+
+/**
+ * struct protected_memory_allocator_device - Device structure for protected
+ *                                            memory allocator
+ *
+ * @ops:   Callbacks associated with this device
+ * @owner: Pointer to the module owner
+ *
+ * In order for a system integrator to provide custom behaviors for protected
+ * memory operations performed by the kbase module (controller driver),
+ * they shall provide a platform-specific driver module which implements
+ * this interface.
+ *
+ * This structure should be registered with the platform device using
+ * platform_set_drvdata().
+ */
+struct protected_memory_allocator_device {
+	struct protected_memory_allocator_ops ops;
+	struct module *owner;
+};
+
+#endif /* _PROTECTED_MEMORY_ALLOCATOR_H_ */
diff --git a/include/linux/protected_mode_switcher.h b/include/linux/protected_mode_switcher.h
new file mode 100644
index 000000000000..d2c7eef764a9
--- /dev/null
+++ b/include/linux/protected_mode_switcher.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * (C) COPYRIGHT 2017, 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _PROTECTED_MODE_SWITCH_H_
+#define _PROTECTED_MODE_SWITCH_H_
+
+struct protected_mode_device;
+
+/**
+ * struct protected_mode_ops - Callbacks for protected mode switch operations
+ *
+ * @protected_mode_enable:  Callback to enable protected mode for device
+ * @protected_mode_disable: Callback to disable protected mode for device
+ */
+struct protected_mode_ops {
+	/**
+	 * protected_mode_enable() - Enable protected mode on device
+	 * @dev:	The struct device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*protected_mode_enable)(
+			struct protected_mode_device *protected_dev);
+
+	/**
+	 * protected_mode_disable() - Disable protected mode on device, and
+	 *                            reset device
+	 * @dev:	The struct device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*protected_mode_disable)(
+			struct protected_mode_device *protected_dev);
+};
+
+/**
+ * struct protected_mode_device - Device structure for protected mode devices
+ *
+ * @ops  - Callbacks associated with this device
+ * @data - Pointer to device private data
+ *
+ * This structure should be registered with the platform device using
+ * platform_set_drvdata().
+ */
+struct protected_mode_device {
+	struct protected_mode_ops ops;
+	void *data;
+};
+
+#endif /* _PROTECTED_MODE_SWITCH_H_ */
diff --git a/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h b/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h
new file mode 100644
index 000000000000..1223f6eade49
--- /dev/null
+++ b/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Dummy Model interface
+ */
+
+#ifndef _UAPI_KBASE_MODEL_DUMMY_H_
+#define _UAPI_KBASE_MODEL_DUMMY_H_
+
+#include <linux/types.h>
+
+#define KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS (4)
+#define KBASE_DUMMY_MODEL_COUNTER_PER_CORE      (60)
+#define KBASE_DUMMY_MODEL_COUNTER_PER_CORE_TYPE  \
+		(64*KBASE_DUMMY_MODEL_COUNTER_PER_CORE)
+#define KBASE_DUMMY_MODEL_COUNTERS_PER_BIT      (4)
+#define KBASE_DUMMY_MODEL_COUNTER_ENABLED(enable_mask, ctr_idx) \
+	(enable_mask & (1 << (ctr_idx / KBASE_DUMMY_MODEL_COUNTERS_PER_BIT)))
+
+#define KBASE_DUMMY_MODEL_HEADERS_PER_BLOCK 4
+#define KBASE_DUMMY_MODEL_COUNTERS_PER_BLOCK 60
+#define KBASE_DUMMY_MODEL_VALUES_PER_BLOCK                                     \
+	(KBASE_DUMMY_MODEL_COUNTERS_PER_BLOCK +                                \
+	 KBASE_DUMMY_MODEL_HEADERS_PER_BLOCK)
+#define KBASE_DUMMY_MODEL_BLOCK_SIZE                                           \
+	(KBASE_DUMMY_MODEL_VALUES_PER_BLOCK * sizeof(__u32))
+#define KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS      8
+#define KBASE_DUMMY_MODEL_MAX_SHADER_CORES       32
+#define KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS    \
+	(1 + 1 + KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS + KBASE_DUMMY_MODEL_MAX_SHADER_CORES)
+#define KBASE_DUMMY_MODEL_COUNTER_TOTAL          \
+	(KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * KBASE_DUMMY_MODEL_COUNTER_PER_CORE_TYPE)
+
+#endif /* _UAPI_KBASE_MODEL_DUMMY_H_ */
diff --git a/drivers/gpu/arm/bifrost/csf/mali_base_csf_kernel.h b/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h
similarity index 63%
rename from drivers/gpu/arm/bifrost/csf/mali_base_csf_kernel.h
rename to include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h
index 301146cbedd3..72572e5cf319 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_base_csf_kernel.h
+++ b/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,12 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-#ifndef _BASE_CSF_KERNEL_H_
-#define _BASE_CSF_KERNEL_H_
+#ifndef _UAPI_BASE_CSF_KERNEL_H_
+#define _UAPI_BASE_CSF_KERNEL_H_
+
+#include <linux/types.h>
 
 /* Memory allocation, access/hint flags.
  *
@@ -204,7 +205,7 @@
 /**
  * Valid set of just-in-time memory allocation flags
  */
-#define BASE_JIT_ALLOC_VALID_FLAGS ((u8)0)
+#define BASE_JIT_ALLOC_VALID_FLAGS ((__u8)0)
 
 /* Flags to pass to ::base_context_init.
  * Flags can be ORed together to enable multiple things.
@@ -212,7 +213,7 @@
  * These share the same space as BASEP_CONTEXT_FLAG_*, and so must
  * not collide with them.
  */
-typedef u32 base_context_create_flags;
+typedef __u32 base_context_create_flags;
 
 /* No flags set */
 #define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0)
@@ -229,11 +230,10 @@ typedef u32 base_context_create_flags;
 #define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \
 	((base_context_create_flags)1 << 1)
 
-/* Create CSF event thread.
+/* Base context creates a CSF event notification thread.
  *
- * The creation of a CSF event thread is conditional and only allowed in
- * unit tests for the moment, in order to avoid clashes with the existing
- * Base unit tests.
+ * The creation of a CSF event notification thread is conditional but
+ * mandatory for the handling of CSF events.
  */
 #define BASE_CONTEXT_CSF_EVENT_THREAD ((base_context_create_flags)1 << 2)
 
@@ -290,25 +290,75 @@ typedef u32 base_context_create_flags;
 
 #define BASE_QUEUE_MAX_PRIORITY (15U)
 
-/* CQS Sync object is an array of u32 event_mem[2], error field index is 1 */
+/* CQS Sync object is an array of __u32 event_mem[2], error field index is 1 */
 #define BASEP_EVENT_VAL_INDEX (0U)
 #define BASEP_EVENT_ERR_INDEX (1U)
 
 /* The upper limit for number of objects that could be waited/set per command.
  * This limit is now enforced as internally the error inherit inputs are
- * converted to 32-bit flags in a u32 variable occupying a previously padding
+ * converted to 32-bit flags in a __u32 variable occupying a previously padding
  * field.
  */
 #define BASEP_KCPU_CQS_MAX_NUM_OBJS ((size_t)32)
 
+#if MALI_UNIT_TEST
 /**
  * enum base_kcpu_command_type - Kernel CPU queue command type.
+ * @BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL:       fence_signal,
+ * @BASE_KCPU_COMMAND_TYPE_FENCE_WAIT:         fence_wait,
+ * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT:           cqs_wait,
+ * @BASE_KCPU_COMMAND_TYPE_CQS_SET:            cqs_set,
+ * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: cqs_wait_operation,
+ * @BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION:  cqs_set_operation,
+ * @BASE_KCPU_COMMAND_TYPE_MAP_IMPORT:         map_import,
+ * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT:       unmap_import,
+ * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: unmap_import_force,
+ * @BASE_KCPU_COMMAND_TYPE_JIT_ALLOC:          jit_alloc,
+ * @BASE_KCPU_COMMAND_TYPE_JIT_FREE:           jit_free,
+ * @BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND:      group_suspend,
+ * @BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER:      error_barrier,
+ * @BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME:        sample_time,
  */
 enum base_kcpu_command_type {
 	BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL,
 	BASE_KCPU_COMMAND_TYPE_FENCE_WAIT,
 	BASE_KCPU_COMMAND_TYPE_CQS_WAIT,
 	BASE_KCPU_COMMAND_TYPE_CQS_SET,
+	BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION,
+	BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION,
+	BASE_KCPU_COMMAND_TYPE_MAP_IMPORT,
+	BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT,
+	BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE,
+	BASE_KCPU_COMMAND_TYPE_JIT_ALLOC,
+	BASE_KCPU_COMMAND_TYPE_JIT_FREE,
+	BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND,
+	BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER,
+	BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME,
+};
+#else
+/**
+ * enum base_kcpu_command_type - Kernel CPU queue command type.
+ * @BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL:       fence_signal,
+ * @BASE_KCPU_COMMAND_TYPE_FENCE_WAIT:         fence_wait,
+ * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT:           cqs_wait,
+ * @BASE_KCPU_COMMAND_TYPE_CQS_SET:            cqs_set,
+ * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: cqs_wait_operation,
+ * @BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION:  cqs_set_operation,
+ * @BASE_KCPU_COMMAND_TYPE_MAP_IMPORT:         map_import,
+ * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT:       unmap_import,
+ * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: unmap_import_force,
+ * @BASE_KCPU_COMMAND_TYPE_JIT_ALLOC:          jit_alloc,
+ * @BASE_KCPU_COMMAND_TYPE_JIT_FREE:           jit_free,
+ * @BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND:      group_suspend,
+ * @BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER:      error_barrier,
+ */
+enum base_kcpu_command_type {
+	BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL,
+	BASE_KCPU_COMMAND_TYPE_FENCE_WAIT,
+	BASE_KCPU_COMMAND_TYPE_CQS_WAIT,
+	BASE_KCPU_COMMAND_TYPE_CQS_SET,
+	BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION,
+	BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION,
 	BASE_KCPU_COMMAND_TYPE_MAP_IMPORT,
 	BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT,
 	BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE,
@@ -317,17 +367,20 @@ enum base_kcpu_command_type {
 	BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND,
 	BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER,
 };
+#endif /* MALI_UNIT_TEST */
 
 /**
  * enum base_queue_group_priority - Priority of a GPU Command Queue Group.
- * @BASE_QUEUE_GROUP_PRIORITY_HIGH:   GPU Command Queue Group is of high
- *                                    priority.
- * @BASE_QUEUE_GROUP_PRIORITY_MEDIUM: GPU Command Queue Group is of medium
- *                                    priority.
- * @BASE_QUEUE_GROUP_PRIORITY_LOW:    GPU Command Queue Group is of low
- *                                    priority.
- * @BASE_QUEUE_GROUP_PRIORITY_COUNT:  Number of GPU Command Queue Group
- *                                    priority levels.
+ * @BASE_QUEUE_GROUP_PRIORITY_HIGH:     GPU Command Queue Group is of high
+ *                                      priority.
+ * @BASE_QUEUE_GROUP_PRIORITY_MEDIUM:   GPU Command Queue Group is of medium
+ *                                      priority.
+ * @BASE_QUEUE_GROUP_PRIORITY_LOW:      GPU Command Queue Group is of low
+ *                                      priority.
+ * @BASE_QUEUE_GROUP_PRIORITY_REALTIME: GPU Command Queue Group is of real-time
+ *                                      priority.
+ * @BASE_QUEUE_GROUP_PRIORITY_COUNT:    Number of GPU Command Queue Group
+ *                                      priority levels.
  *
  * Currently this is in order of highest to lowest, but if new levels are added
  * then those new levels may be out of order to preserve the ABI compatibility
@@ -342,33 +395,123 @@ enum base_queue_group_priority {
 	BASE_QUEUE_GROUP_PRIORITY_HIGH = 0,
 	BASE_QUEUE_GROUP_PRIORITY_MEDIUM,
 	BASE_QUEUE_GROUP_PRIORITY_LOW,
+	BASE_QUEUE_GROUP_PRIORITY_REALTIME,
 	BASE_QUEUE_GROUP_PRIORITY_COUNT
 };
 
 struct base_kcpu_command_fence_info {
-	u64 fence;
+	__u64 fence;
 };
 
-struct base_cqs_wait {
-	u64 addr;
-	u32 val;
-	u32 padding;
+struct base_cqs_wait_info {
+	__u64 addr;
+	__u32 val;
+	__u32 padding;
 };
 
 struct base_kcpu_command_cqs_wait_info {
-	u64 objs;
-	u32 nr_objs;
-	u32 inherit_err_flags;
+	__u64 objs;
+	__u32 nr_objs;
+	__u32 inherit_err_flags;
 };
 
 struct base_cqs_set {
-	u64 addr;
+	__u64 addr;
 };
 
 struct base_kcpu_command_cqs_set_info {
-	u64 objs;
-	u32 nr_objs;
-	u32 propagate_flags;
+	__u64 objs;
+	__u32 nr_objs;
+	__u32 padding;
+};
+
+/**
+ * basep_cqs_data_type - Enumeration of CQS Data Types
+ *
+ * @BASEP_CQS_DATA_TYPE_U32: The Data Type of a CQS Object's value
+ *                           is an unsigned 32-bit integer
+ * @BASEP_CQS_DATA_TYPE_U64: The Data Type of a CQS Object's value
+ *                           is an unsigned 64-bit integer
+ */
+typedef enum PACKED {
+	BASEP_CQS_DATA_TYPE_U32 = 0,
+	BASEP_CQS_DATA_TYPE_U64 = 1,
+} basep_cqs_data_type;
+
+/**
+ * basep_cqs_wait_operation_op - Enumeration of CQS Object Wait
+ *                                Operation conditions
+ *
+ * @BASEP_CQS_WAIT_OPERATION_LE: CQS Wait Operation indicating that a
+ *                                wait will be satisfied when a CQS Object's
+ *                                value is Less than or Equal to
+ *                                the Wait Operation value
+ * @BASEP_CQS_WAIT_OPERATION_GT: CQS Wait Operation indicating that a
+ *                                wait will be satisfied when a CQS Object's
+ *                                value is Greater than the Wait Operation value
+ */
+typedef enum {
+	BASEP_CQS_WAIT_OPERATION_LE = 0,
+	BASEP_CQS_WAIT_OPERATION_GT = 1,
+} basep_cqs_wait_operation_op;
+
+struct base_cqs_wait_operation_info {
+	__u64 addr;
+	__u64 val;
+	__u8 operation;
+	__u8 data_type;
+	__u8 padding[6];
+};
+
+/**
+ * struct base_kcpu_command_cqs_wait_operation_info - structure which contains information
+ *		about the Timeline CQS wait objects
+ *
+ * @objs:              An array of Timeline CQS waits.
+ * @nr_objs:           Number of Timeline CQS waits in the array.
+ * @inherit_err_flags: Bit-pattern for the CQSs in the array who's error field
+ *                     to be served as the source for importing into the
+ *                     queue's error-state.
+ */
+struct base_kcpu_command_cqs_wait_operation_info {
+	__u64 objs;
+	__u32 nr_objs;
+	__u32 inherit_err_flags;
+};
+
+/**
+ * basep_cqs_set_operation_op - Enumeration of CQS Set Operations
+ *
+ * @BASEP_CQS_SET_OPERATION_ADD: CQS Set operation for adding a value
+ *                                to a synchronization object
+ * @BASEP_CQS_SET_OPERATION_SET: CQS Set operation for setting the value
+ *                                of a synchronization object
+ */
+typedef enum {
+	BASEP_CQS_SET_OPERATION_ADD = 0,
+	BASEP_CQS_SET_OPERATION_SET = 1,
+} basep_cqs_set_operation_op;
+
+struct base_cqs_set_operation_info {
+	__u64 addr;
+	__u64 val;
+	__u8 operation;
+	__u8 data_type;
+	__u8 padding[6];
+};
+
+/**
+ * struct base_kcpu_command_cqs_set_operation_info - structure which contains information
+ *		about the Timeline CQS set objects
+ *
+ * @objs:    An array of Timeline CQS sets.
+ * @nr_objs: Number of Timeline CQS sets in the array.
+ * @padding: Structure padding, unused bytes.
+ */
+struct base_kcpu_command_cqs_set_operation_info {
+	__u64 objs;
+	__u32 nr_objs;
+	__u32 padding;
 };
 
 /**
@@ -378,7 +521,7 @@ struct base_kcpu_command_cqs_set_info {
  * @handle:	Address of imported user buffer.
  */
 struct base_kcpu_command_import_info {
-	u64 handle;
+	__u64 handle;
 };
 
 /**
@@ -391,9 +534,9 @@ struct base_kcpu_command_import_info {
  * @padding:	Padding to a multiple of 64 bits.
  */
 struct base_kcpu_command_jit_alloc_info {
-	u64 info;
-	u8 count;
-	u8 padding[7];
+	__u64 info;
+	__u8 count;
+	__u8 padding[7];
 };
 
 /**
@@ -405,9 +548,9 @@ struct base_kcpu_command_jit_alloc_info {
  * @padding:	Padding to a multiple of 64 bits.
  */
 struct base_kcpu_command_jit_free_info {
-	u64 ids;
-	u8 count;
-	u8 padding[7];
+	__u64 ids;
+	__u8 count;
+	__u8 padding[7];
 };
 
 /**
@@ -416,52 +559,71 @@ struct base_kcpu_command_jit_free_info {
  *
  * @buffer:		Pointer to an array of elements of the type char.
  * @size:		Number of elements in the @buffer array.
- * @group_handle:	Handle to the mapping of command stream group.
+ * @group_handle:	Handle to the mapping of CSG.
  * @padding:		padding to a multiple of 64 bits.
  */
 struct base_kcpu_command_group_suspend_info {
-	u64 buffer;
-	u32 size;
-	u8 group_handle;
-	u8 padding[3];
+	__u64 buffer;
+	__u32 size;
+	__u8 group_handle;
+	__u8 padding[3];
 };
 
+#if MALI_UNIT_TEST
+struct base_kcpu_command_sample_time_info {
+	__u64 time;
+};
+#endif /* MALI_UNIT_TEST */
+
 /**
  * struct base_kcpu_command - kcpu command.
- *
  * @type:	type of the kcpu command, one enum base_kcpu_command_type
+ * @padding:	padding to a multiple of 64 bits
  * @info:	structure which contains information about the kcpu command;
  *		actual type is determined by @p type
- * @padding:	padding to a multiple of 64 bits
+ * @info.fence:            Fence
+ * @info.cqs_wait:         CQS wait
+ * @info.cqs_set:          CQS set
+ * @info.import:           import
+ * @info.jit_alloc:        jit allocation
+ * @info.jit_free:         jit deallocation
+ * @info.suspend_buf_copy: suspend buffer copy
+ * @info.sample_time:      sample time
+ * @info.padding:          padding
  */
 struct base_kcpu_command {
-	u8 type;
-	u8 padding[sizeof(u64) - sizeof(u8)];
+	__u8 type;
+	__u8 padding[sizeof(__u64) - sizeof(__u8)];
 	union {
 		struct base_kcpu_command_fence_info fence;
 		struct base_kcpu_command_cqs_wait_info cqs_wait;
 		struct base_kcpu_command_cqs_set_info cqs_set;
+		struct base_kcpu_command_cqs_wait_operation_info cqs_wait_operation;
+		struct base_kcpu_command_cqs_set_operation_info cqs_set_operation;
 		struct base_kcpu_command_import_info import;
 		struct base_kcpu_command_jit_alloc_info jit_alloc;
 		struct base_kcpu_command_jit_free_info jit_free;
 		struct base_kcpu_command_group_suspend_info suspend_buf_copy;
-		u64 padding[2]; /* No sub-struct should be larger */
+#if MALI_UNIT_TEST
+		struct base_kcpu_command_sample_time_info sample_time;
+#endif /* MALI_UNIT_TEST */
+		__u64 padding[2]; /* No sub-struct should be larger */
 	} info;
 };
 
 /**
- * struct basep_cs_stream_control - Command Stream interface capabilities.
+ * struct basep_cs_stream_control - CSI capabilities.
  *
  * @features: Features of this stream
  * @padding:  Padding to a multiple of 64 bits.
  */
 struct basep_cs_stream_control {
-	u32 features;
-	u32 padding;
+	__u32 features;
+	__u32 padding;
 };
 
 /**
- * struct basep_cs_group_control - Command Stream Group interface capabilities.
+ * struct basep_cs_group_control - CSG interface capabilities.
  *
  * @features:     Features of this group
  * @stream_num:   Number of streams in this group
@@ -469,10 +631,10 @@ struct basep_cs_stream_control {
  * @padding:      Padding to a multiple of 64 bits.
  */
 struct basep_cs_group_control {
-	u32 features;
-	u32 stream_num;
-	u32 suspend_size;
-	u32 padding;
+	__u32 features;
+	__u32 stream_num;
+	__u32 suspend_size;
+	__u32 padding;
 };
 
 /**
@@ -487,9 +649,9 @@ struct basep_cs_group_control {
  * @padding:      Padding to make multiple of 64bits
  */
 struct base_gpu_queue_group_error_fatal_payload {
-	u64 sideband;
-	u32 status;
-	u32 padding;
+	__u64 sideband;
+	__u32 status;
+	__u32 padding;
 };
 
 /**
@@ -505,10 +667,10 @@ struct base_gpu_queue_group_error_fatal_payload {
  * @padding:      Padding to make multiple of 64bits
  */
 struct base_gpu_queue_error_fatal_payload {
-	u64 sideband;
-	u32 status;
-	u8 csi_index;
-	u8 padding[3];
+	__u64 sideband;
+	__u32 status;
+	__u8 csi_index;
+	__u8 padding[3];
 };
 
 /**
@@ -536,19 +698,17 @@ enum base_gpu_queue_group_error_type {
 
 /**
  * struct base_gpu_queue_group_error - Unrecoverable fault information
- *
- * @error_type:   Error type of @base_gpu_queue_group_error_type
- *                indicating which field in union payload is filled
- * @padding:      Unused bytes for 64bit boundary
- * @fatal_group:  Unrecoverable fault error associated with
- *                GPU command queue group
- * @fatal_queue:  Unrecoverable fault error associated with command queue
- *
- * @payload:      Input Payload
+ * @error_type:          Error type of @base_gpu_queue_group_error_type
+ *                       indicating which field in union payload is filled
+ * @padding:             Unused bytes for 64bit boundary
+ * @payload:             Input Payload
+ * @payload.fatal_group: Unrecoverable fault error associated with
+ *                       GPU command queue group
+ * @payload.fatal_queue: Unrecoverable fault error associated with command queue
  */
 struct base_gpu_queue_group_error {
-	u8 error_type;
-	u8 padding[7];
+	__u8 error_type;
+	__u8 padding[7];
 	union {
 		struct base_gpu_queue_group_error_fatal_payload fatal_group;
 		struct base_gpu_queue_error_fatal_payload fatal_queue;
@@ -561,6 +721,8 @@ struct base_gpu_queue_group_error {
  * @BASE_CSF_NOTIFICATION_EVENT:                 Notification with kernel event
  * @BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR: Notification with GPU fatal
  *                                               error
+ * @BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP:        Notification with dumping cpu
+ *                                               queue
  * @BASE_CSF_NOTIFICATION_COUNT:                 The number of notification type
  *
  * This type is used for &struct_base_csf_notification.type.
@@ -568,31 +730,36 @@ struct base_gpu_queue_group_error {
 enum base_csf_notification_type {
 	BASE_CSF_NOTIFICATION_EVENT = 0,
 	BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
+	BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP,
 	BASE_CSF_NOTIFICATION_COUNT
 };
 
 /**
  * struct base_csf_notification - Event or error notification
  *
- * @type:         Notification type of @base_csf_notification_type
- * @padding:      Padding for 64bit boundary
- * @handle:       Handle of GPU command queue group associated with fatal error
- * @error:        Unrecoverable fault error
- * @align:        To fit the struct into a 64-byte cache line
+ * @type:                      Notification type of @base_csf_notification_type
+ * @padding:                   Padding for 64bit boundary
+ * @payload:                   Input Payload
+ * @payload.align:             To fit the struct into a 64-byte cache line
+ * @payload.csg_error:         CSG error
+ * @payload.csg_error.handle:  Handle of GPU command queue group associated with
+ *                             fatal error
+ * @payload.csg_error.padding: Padding
+ * @payload.csg_error.error:   Unrecoverable fault error
  *
- * @payload:      Input Payload
  */
 struct base_csf_notification {
-	u8 type;
-	u8 padding[7];
+	__u8 type;
+	__u8 padding[7];
 	union {
 		struct {
-			u8 handle;
-			u8 padding[7];
+			__u8 handle;
+			__u8 padding[7];
 			struct base_gpu_queue_group_error error;
 		} csg_error;
-		u8 align[56];
+
+		__u8 align[56];
 	} payload;
 };
 
-#endif /* _BASE_CSF_KERNEL_H_ */
+#endif /* _UAPI_BASE_CSF_KERNEL_H_ */
diff --git a/drivers/gpu/arm/bifrost/csf/mali_gpu_csf_control_registers.h b/include/uapi/gpu/arm/bifrost/csf/mali_gpu_csf_control_registers.h
similarity index 74%
rename from drivers/gpu/arm/bifrost/csf/mali_gpu_csf_control_registers.h
rename to include/uapi/gpu/arm/bifrost/csf/mali_gpu_csf_control_registers.h
index 4fff80ca4023..b62a8b0eb76b 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_gpu_csf_control_registers.h
+++ b/include/uapi/gpu/arm/bifrost/csf/mali_gpu_csf_control_registers.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,18 +17,16 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
  * This header was autogenerated, it should not be edited.
  */
 
-#ifndef _GPU_CSF_CONTROL_REGISTERS_H_
-#define _GPU_CSF_CONTROL_REGISTERS_H_
+#ifndef _UAPI_GPU_CSF_CONTROL_REGISTERS_H_
+#define _UAPI_GPU_CSF_CONTROL_REGISTERS_H_
 
 /* GPU_REGISTERS register offsets */
 #define GPU_CONTROL_MCU 0x3000 /* () MCU control registers */
 
-#endif /* _GPU_CSF_CONTROL_REGISTERS_H_ */
+#endif /* _UAPI_GPU_CSF_CONTROL_REGISTERS_H_ */
diff --git a/drivers/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h b/include/uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h
similarity index 87%
rename from drivers/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h
rename to include/uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h
index 5c03445f3c79..d3b43fc2ea7d 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h
+++ b/include/uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,16 +17,14 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
  * This header was autogenerated, it should not be edited.
  */
 
-#ifndef _GPU_CSF_REGISTERS_H_
-#define _GPU_CSF_REGISTERS_H_
+#ifndef _UAPI_GPU_CSF_REGISTERS_H_
+#define _UAPI_GPU_CSF_REGISTERS_H_
 
 /*
  * Begin register sets
@@ -91,8 +90,8 @@
 #define DB_BLK_DOORBELL 0x0000 /* (WO) Doorbell request */
 
 /* CS_KERNEL_INPUT_BLOCK register offsets */
-#define CS_REQ 0x0000 /* () Command stream request flags */
-#define CS_CONFIG 0x0004 /* () Command stream configuration */
+#define CS_REQ 0x0000 /* () CS request flags */
+#define CS_CONFIG 0x0004 /* () CS configuration */
 #define CS_ACK_IRQ_MASK 0x000C /* () Command steam interrupt mask */
 #define CS_BASE_LO 0x0010 /* () Base pointer for the ring buffer, low word */
 #define CS_BASE_HI 0x0014 /* () Base pointer for the ring buffer, high word */
@@ -107,14 +106,16 @@
 #define CS_USER_OUTPUT_HI 0x003C /* () CS user mode input page address, high word */
 
 /* CS_KERNEL_OUTPUT_BLOCK register offsets */
-#define CS_ACK 0x0000 /* () Command stream acknowledge flags */
+#define CS_ACK 0x0000 /* () CS acknowledge flags */
 #define CS_STATUS_CMD_PTR_LO 0x0040 /* () Program pointer current value, low word */
 #define CS_STATUS_CMD_PTR_HI 0x0044 /* () Program pointer current value, high word */
 #define CS_STATUS_WAIT 0x0048 /* () Wait condition status register */
-#define CS_STATUS_REQ_RESOURCE 0x004C /* () Indicates the resources requested by the command stream */
+#define CS_STATUS_REQ_RESOURCE 0x004C /* () Indicates the resources requested by the CS */
 #define CS_STATUS_WAIT_SYNC_POINTER_LO 0x0050 /* () Sync object pointer, low word */
 #define CS_STATUS_WAIT_SYNC_POINTER_HI 0x0054 /* () Sync object pointer, high word */
 #define CS_STATUS_WAIT_SYNC_VALUE 0x0058 /* () Sync object test value */
+#define CS_STATUS_SCOREBOARDS 0x005C /* () Scoreboard status */
+#define CS_STATUS_BLOCKED_REASON 0x0060 /* () Blocked reason */
 #define CS_FAULT 0x0080 /* () Recoverable fault information */
 #define CS_FATAL 0x0084 /* () Unrecoverable fault information */
 #define CS_FAULT_INFO_LO 0x0088 /* () Additional information about a recoverable fault, low word */
@@ -136,13 +137,13 @@
 /* CS_USER_OUTPUT_BLOCK register offsets */
 #define CS_EXTRACT_LO 0x0000 /* () Current extract offset for ring buffer, low word */
 #define CS_EXTRACT_HI 0x0004 /* () Current extract offset for ring buffer, high word */
-#define CS_ACTIVE 0x0008 /* () Initial extract offset when the command stream is started */
+#define CS_ACTIVE 0x0008 /* () Initial extract offset when the CS is started */
 
 /* CSG_INPUT_BLOCK register offsets */
 #define CSG_REQ 0x0000 /* () CSG request */
 #define CSG_ACK_IRQ_MASK 0x0004 /* () Global acknowledge interrupt mask */
 #define CSG_DB_REQ 0x0008 /* () Global doorbell request */
-#define CSG_IRQ_ACK 0x000C /* () Command stream IRQ acknowledge */
+#define CSG_IRQ_ACK 0x000C /* () CS IRQ acknowledge */
 #define CSG_ALLOW_COMPUTE_LO 0x0020 /* () Allowed compute endpoints, low word */
 #define CSG_ALLOW_COMPUTE_HI 0x0024 /* () Allowed compute endpoints, high word */
 #define CSG_ALLOW_FRAGMENT_LO 0x0028 /* () Allowed fragment endpoints, low word */
@@ -154,11 +155,12 @@
 #define CSG_PROTM_SUSPEND_BUF_LO 0x0048 /* () Protected mode suspend buffer, low word */
 #define CSG_PROTM_SUSPEND_BUF_HI 0x004C /* () Protected mode suspend buffer, high word */
 #define CSG_CONFIG 0x0050 /* () CSG configuration options */
+#define CSG_ITER_TRACE_CONFIG 0x0054 /* () CSG trace configuration */
 
 /* CSG_OUTPUT_BLOCK register offsets */
-#define CSG_ACK 0x0000 /* () Command stream group acknowledge flags */
-#define CSG_DB_ACK 0x0008 /* () Command stream kernel doorbell acknowledge flags */
-#define CSG_IRQ_REQ 0x000C /* () Command stream interrupt request flags */
+#define CSG_ACK 0x0000 /* () CSG acknowledge flags */
+#define CSG_DB_ACK 0x0008 /* () CS kernel doorbell acknowledge flags */
+#define CSG_IRQ_REQ 0x000C /* () CS interrupt request flags */
 #define CSG_STATUS_EP_CURRENT 0x0010 /* () Endpoint allocation status register */
 #define CSG_STATUS_EP_REQ 0x0014 /* () Endpoint request status register */
 #define CSG_RESOURCE_DEP 0x001C /* () Current resource dependencies */
@@ -171,18 +173,19 @@
 #define GLB_GROUP_NUM 0x0010 /* () Number of CSG interfaces */
 #define GLB_GROUP_STRIDE 0x0014 /* () Stride between CSG interfaces */
 #define GLB_PRFCNT_SIZE 0x0018 /* () Size of CSF performance counters */
+#define GLB_INSTR_FEATURES 0x001C /* () TRACE_POINT instrumentation features */
 #define GROUP_CONTROL_0 0x1000 /* () CSG control and capabilities */
 #define GROUP_CONTROL(n) (GROUP_CONTROL_0 + (n)*256)
 #define GROUP_CONTROL_REG(n, r) (GROUP_CONTROL(n) + GROUP_CONTROL_BLOCK_REG(r))
 #define GROUP_CONTROL_COUNT 16
 
 /* STREAM_CONTROL_BLOCK register offsets */
-#define STREAM_FEATURES 0x0000 /* () Command Stream interface features */
+#define STREAM_FEATURES 0x0000 /* () CSI features */
 #define STREAM_INPUT_VA 0x0004 /* () Address of CS_KERNEL_INPUT_BLOCK */
 #define STREAM_OUTPUT_VA 0x0008 /* () Address of CS_KERNEL_OUTPUT_BLOCK */
 
 /* GROUP_CONTROL_BLOCK register offsets */
-#define GROUP_FEATURES 0x0000 /* () Command Stream Group interface features */
+#define GROUP_FEATURES 0x0000 /* () CSG interface features */
 #define GROUP_INPUT_VA 0x0004 /* () Address of CSG_INPUT_BLOCK */
 #define GROUP_OUTPUT_VA 0x0008 /* () Address of CSG_OUTPUT_BLOCK */
 #define GROUP_SUSPEND_SIZE 0x000C /* () Size of CSG suspend buffer */
@@ -207,6 +210,7 @@
 #define GLB_PRFCNT_JASID 0x0024 /* () Performance counter address space */
 #define GLB_PRFCNT_BASE_LO 0x0028 /* () Performance counter buffer address, low word */
 #define GLB_PRFCNT_BASE_HI 0x002C /* () Performance counter buffer address, high word */
+#define GLB_PRFCNT_EXTRACT 0x0030 /* () Performance counter buffer extract index */
 #define GLB_PRFCNT_CONFIG 0x0040 /* () Performance counter configuration */
 #define GLB_PRFCNT_CSG_SELECT 0x0044 /* () CSG performance counting enable */
 #define GLB_PRFCNT_FW_EN 0x0048 /* () Performance counter enable for firmware */
@@ -228,12 +232,20 @@
 #define GLB_DB_ACK 0x0008 /* () Global doorbell acknowledge */
 #define GLB_HALT_STATUS 0x0010 /* () Global halt status */
 #define GLB_PRFCNT_STATUS 0x0014 /* () Performance counter status */
+#define GLB_PRFCNT_INSERT 0x0018 /* () Performance counter buffer insert index */
 #define GLB_DEBUG_FWUTF_RESULT 0x0FE0 /* () Firmware debug test result */
 #define GLB_DEBUG_ACK 0x0FFC /* () Global debug acknowledge */
 
+/* USER register offsets */
+#define LATEST_FLUSH 0x0000 /* () Flush ID of latest clean-and-invalidate operation */
+
 /* End register offsets */
 
 /* CS_KERNEL_INPUT_BLOCK register set definitions */
+/* GLB_VERSION register */
+#define GLB_VERSION_PATCH_SHIFT (0)
+#define GLB_VERSION_MINOR_SHIFT (16)
+#define GLB_VERSION_MAJOR_SHIFT (24)
 
 /* CS_REQ register */
 #define CS_REQ_STATE_SHIFT 0
@@ -251,15 +263,6 @@
 #define CS_REQ_EXTRACT_EVENT_SET(reg_val, value) \
 	(((reg_val) & ~CS_REQ_EXTRACT_EVENT_MASK) | (((value) << CS_REQ_EXTRACT_EVENT_SHIFT) & CS_REQ_EXTRACT_EVENT_MASK))
 
-/* From 10.x.5, CS_REQ_ERROR_MODE is removed but TI2 bitfile upload not finished.
- * Need to remove on GPUCORE-23972
- */
-#define CS_REQ_ERROR_MODE_SHIFT 5
-#define CS_REQ_ERROR_MODE_MASK (0x1 << CS_REQ_ERROR_MODE_SHIFT)
-#define CS_REQ_ERROR_MODE_GET(reg_val) ((reg_val & CS_REQ_ERROR_MODE_MASK) >> CS_REQ_ERROR_MODE_SHIFT)
-#define CS_REQ_ERROR_MODE_SET(reg_val, value) \
-         ((reg_val & ~CS_REQ_ERROR_MODE_MASK) | ((value << CS_REQ_ERROR_MODE_SHIFT) & CS_REQ_ERROR_MODE_MASK))
-
 #define CS_REQ_IDLE_SYNC_WAIT_SHIFT 8
 #define CS_REQ_IDLE_SYNC_WAIT_MASK (0x1 << CS_REQ_IDLE_SYNC_WAIT_SHIFT)
 #define CS_REQ_IDLE_SYNC_WAIT_GET(reg_val) (((reg_val)&CS_REQ_IDLE_SYNC_WAIT_MASK) >> CS_REQ_IDLE_SYNC_WAIT_SHIFT)
@@ -551,6 +554,39 @@
 	(((reg_val) & ~CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK) |  \
 	 (((value) << CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT) & CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK))
 
+/* CS_STATUS_SCOREBOARDS register */
+#define CS_STATUS_SCOREBOARDS_NONZERO_SHIFT (0)
+#define CS_STATUS_SCOREBOARDS_NONZERO_MASK                                     \
+	((0xFFFF) << CS_STATUS_SCOREBOARDS_NONZERO_SHIFT)
+#define CS_STATUS_SCOREBOARDS_NONZERO_GET(reg_val)                             \
+	(((reg_val)&CS_STATUS_SCOREBOARDS_NONZERO_MASK) >>                     \
+	 CS_STATUS_SCOREBOARDS_NONZERO_SHIFT)
+#define CS_STATUS_SCOREBOARDS_NONZERO_SET(reg_val, value)                      \
+	(((reg_val) & ~CS_STATUS_SCOREBOARDS_NONZERO_MASK) |                   \
+	 (((value) << CS_STATUS_SCOREBOARDS_NONZERO_SHIFT) &                   \
+	  CS_STATUS_SCOREBOARDS_NONZERO_MASK))
+
+/* CS_STATUS_BLOCKED_REASON register */
+#define CS_STATUS_BLOCKED_REASON_REASON_SHIFT (0)
+#define CS_STATUS_BLOCKED_REASON_REASON_MASK                                   \
+	((0xF) << CS_STATUS_BLOCKED_REASON_REASON_SHIFT)
+#define CS_STATUS_BLOCKED_REASON_REASON_GET(reg_val)                           \
+	(((reg_val)&CS_STATUS_BLOCKED_REASON_REASON_MASK) >>                   \
+	 CS_STATUS_BLOCKED_REASON_REASON_SHIFT)
+#define CS_STATUS_BLOCKED_REASON_REASON_SET(reg_val, value)                    \
+	(((reg_val) & ~CS_STATUS_BLOCKED_REASON_REASON_MASK) |                 \
+	 (((value) << CS_STATUS_BLOCKED_REASON_REASON_SHIFT) &                 \
+	  CS_STATUS_BLOCKED_REASON_REASON_MASK))
+/* CS_STATUS_BLOCKED_REASON_reason values */
+#define CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED 0x0
+#define CS_STATUS_BLOCKED_REASON_REASON_WAIT 0x1
+#define CS_STATUS_BLOCKED_REASON_REASON_PROGRESS_WAIT 0x2
+#define CS_STATUS_BLOCKED_REASON_REASON_SYNC_WAIT 0x3
+#define CS_STATUS_BLOCKED_REASON_REASON_DEFERRED 0x4
+#define CS_STATUS_BLOCKED_REASON_REASON_RESOURCE 0x5
+#define CS_STATUS_BLOCKED_REASON_REASON_FLUSH 0x6
+/* End of CS_STATUS_BLOCKED_REASON_reason values */
+
 /* CS_FAULT register */
 #define CS_FAULT_EXCEPTION_TYPE_SHIFT 0
 #define CS_FAULT_EXCEPTION_TYPE_MASK (0xFF << CS_FAULT_EXCEPTION_TYPE_SHIFT)
@@ -1060,6 +1096,16 @@
 #define GLB_REQ_PING_GET(reg_val) (((reg_val)&GLB_REQ_PING_MASK) >> GLB_REQ_PING_SHIFT)
 #define GLB_REQ_PING_SET(reg_val, value) \
 	(((reg_val) & ~GLB_REQ_PING_MASK) | (((value) << GLB_REQ_PING_SHIFT) & GLB_REQ_PING_MASK))
+#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT 9
+#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK                                    \
+	(0x1 << GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT)
+#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_GET(reg_val)                            \
+	(((reg_val)&GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK) >>                    \
+	 GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT)
+#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_SET(reg_val, value)                     \
+	(((reg_val) & ~GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK) |                  \
+	 (((value) << GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT) &                  \
+	  GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK))
 #define GLB_REQ_INACTIVE_COMPUTE_SHIFT 20
 #define GLB_REQ_INACTIVE_COMPUTE_MASK (0x1 << GLB_REQ_INACTIVE_COMPUTE_SHIFT)
 #define GLB_REQ_INACTIVE_COMPUTE_GET(reg_val) \
@@ -1085,6 +1131,24 @@
 #define GLB_REQ_PROTM_EXIT_GET(reg_val) (((reg_val)&GLB_REQ_PROTM_EXIT_MASK) >> GLB_REQ_PROTM_EXIT_SHIFT)
 #define GLB_REQ_PROTM_EXIT_SET(reg_val, value) \
 	(((reg_val) & ~GLB_REQ_PROTM_EXIT_MASK) | (((value) << GLB_REQ_PROTM_EXIT_SHIFT) & GLB_REQ_PROTM_EXIT_MASK))
+#define GLB_REQ_PRFCNT_THRESHOLD_SHIFT 24
+#define GLB_REQ_PRFCNT_THRESHOLD_MASK (0x1 << GLB_REQ_PRFCNT_THRESHOLD_SHIFT)
+#define GLB_REQ_PRFCNT_THRESHOLD_GET(reg_val) \
+	(((reg_val)&GLB_REQ_PRFCNT_THRESHOLD_MASK) >> \
+	 GLB_REQ_PRFCNT_THRESHOLD_SHIFT)
+#define GLB_REQ_PRFCNT_THRESHOLD_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_PRFCNT_THRESHOLD_MASK) | \
+	 (((value) << GLB_REQ_PRFCNT_THRESHOLD_SHIFT) & \
+	  GLB_REQ_PRFCNT_THRESHOLD_MASK))
+#define GLB_REQ_PRFCNT_OVERFLOW_SHIFT 25
+#define GLB_REQ_PRFCNT_OVERFLOW_MASK (0x1 << GLB_REQ_PRFCNT_OVERFLOW_SHIFT)
+#define GLB_REQ_PRFCNT_OVERFLOW_GET(reg_val) \
+	(((reg_val)&GLB_REQ_PRFCNT_OVERFLOW_MASK) >> \
+	 GLB_REQ_PRFCNT_OVERFLOW_SHIFT)
+#define GLB_REQ_PRFCNT_OVERFLOW_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_PRFCNT_OVERFLOW_MASK) | \
+	 (((value) << GLB_REQ_PRFCNT_OVERFLOW_SHIFT) & \
+	  GLB_REQ_PRFCNT_OVERFLOW_MASK))
 #define GLB_REQ_DEBUG_CSF_REQ_SHIFT 30
 #define GLB_REQ_DEBUG_CSF_REQ_MASK (0x1 << GLB_REQ_DEBUG_CSF_REQ_SHIFT)
 #define GLB_REQ_DEBUG_CSF_REQ_GET(reg_val) (((reg_val)&GLB_REQ_DEBUG_CSF_REQ_MASK) >> GLB_REQ_DEBUG_CSF_REQ_SHIFT)
@@ -1160,6 +1224,16 @@
 #define GLB_ACK_IRQ_MASK_PING_SET(reg_val, value) \
 	(((reg_val) & ~GLB_ACK_IRQ_MASK_PING_MASK) |  \
 	 (((value) << GLB_ACK_IRQ_MASK_PING_SHIFT) & GLB_ACK_IRQ_MASK_PING_MASK))
+#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT 9
+#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK                           \
+	(0x1 << GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT)
+#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_GET(reg_val)                   \
+	(((reg_val)&GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK) >>           \
+	 GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT)
+#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SET(reg_val, value)            \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK) |         \
+	 (((value) << GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT) &         \
+	  GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK))
 #define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT 20
 #define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT)
 #define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_GET(reg_val) \
@@ -1188,6 +1262,26 @@
 #define GLB_ACK_IRQ_MASK_PROTM_EXIT_SET(reg_val, value) \
 	(((reg_val) & ~GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK) |  \
 	 (((value) << GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT) & GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK))
+#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT 24
+#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK \
+	(0x1 << GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT)
+#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_GET(reg_val) \
+	(((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK) >> \
+	 GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT)
+#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK) | \
+	 (((value) << GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT) & \
+	  GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK))
+#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT 25
+#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK \
+	(0x1 << GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT)
+#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_GET(reg_val) \
+	(((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK) >> \
+	 GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT)
+#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK) | \
+	 (((value) << GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT) & \
+	  GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK))
 #define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT 30
 #define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK (0x1 << GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT)
 #define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_GET(reg_val) \
@@ -1212,6 +1306,26 @@
 	(((reg_val) & ~GLB_PROGRESS_TIMER_TIMEOUT_MASK) |  \
 	 (((value) << GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) & GLB_PROGRESS_TIMER_TIMEOUT_MASK))
 
+/* GLB_PWROFF_TIMER register */
+#define GLB_PWROFF_TIMER_TIMEOUT_SHIFT 0
+#define GLB_PWROFF_TIMER_TIMEOUT_MASK (0x7FFFFFFF << GLB_PWROFF_TIMER_TIMEOUT_SHIFT)
+#define GLB_PWROFF_TIMER_TIMEOUT_GET(reg_val) \
+	(((reg_val)&GLB_PWROFF_TIMER_TIMEOUT_MASK) >> GLB_PWROFF_TIMER_TIMEOUT_SHIFT)
+#define GLB_PWROFF_TIMER_TIMEOUT_SET(reg_val, value) \
+	(((reg_val) & ~GLB_PWROFF_TIMER_TIMEOUT_MASK) |  \
+	 (((value) << GLB_PWROFF_TIMER_TIMEOUT_SHIFT) & GLB_PWROFF_TIMER_TIMEOUT_MASK))
+#define GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT 31
+#define GLB_PWROFF_TIMER_TIMER_SOURCE_MASK (0x1 << GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT)
+#define GLB_PWROFF_TIMER_TIMER_SOURCE_GET(reg_val) \
+	(((reg_val)&GLB_PWROFF_TIMER_TIMER_SOURCE_MASK) >> GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT)
+#define GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val, value) \
+	(((reg_val) & ~GLB_PWROFF_TIMER_TIMER_SOURCE_MASK) |  \
+	 (((value) << GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT) & GLB_PWROFF_TIMER_TIMER_SOURCE_MASK))
+/* GLB_PWROFF_TIMER_TIMER_SOURCE values */
+#define GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP 0x0
+#define GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER 0x1
+/* End of GLB_PWROFF_TIMER_TIMER_SOURCE values */
+
 /* GLB_ALLOC_EN register */
 #define GLB_ALLOC_EN_MASK_SHIFT 0
 #define GLB_ALLOC_EN_MASK_MASK (0xFFFFFFFFFFFFFFFF << GLB_ALLOC_EN_MASK_SHIFT)
@@ -1249,4 +1363,52 @@
 	(((reg_val) & ~GLB_ACK_CFG_ALLOC_EN_MASK) | (((value) << GLB_ACK_CFG_ALLOC_EN_SHIFT) & GLB_ACK_CFG_ALLOC_EN_MASK))
 /* End of GLB_OUTPUT_BLOCK register set definitions */
 
-#endif /* _GPU_CSF_REGISTERS_H_ */
+/* The following register and fields are for headers before 10.x.7/11.x.4 */
+#define GLB_REQ_IDLE_ENABLE_SHIFT (10)
+#define GLB_REQ_REQ_IDLE_ENABLE (1 << GLB_REQ_IDLE_ENABLE_SHIFT)
+#define GLB_REQ_REQ_IDLE_DISABLE (0 << GLB_REQ_IDLE_ENABLE_SHIFT)
+#define GLB_REQ_IDLE_ENABLE_MASK (0x1 << GLB_REQ_IDLE_ENABLE_SHIFT)
+#define GLB_REQ_IDLE_DISABLE_MASK (0x1 << GLB_REQ_IDLE_ENABLE_SHIFT)
+#define GLB_REQ_IDLE_EVENT_SHIFT (26)
+#define GLB_REQ_IDLE_EVENT_MASK (0x1 << GLB_REQ_IDLE_EVENT_SHIFT)
+#define GLB_ACK_IDLE_ENABLE_SHIFT (10)
+#define GLB_ACK_ACK_IDLE_ENABLE (1 << GLB_ACK_IDLE_ENABLE_SHIFT)
+#define GLB_ACK_ACK_IDLE_DISABLE (0 << GLB_ACK_IDLE_ENABLE_SHIFT)
+#define GLB_ACK_IDLE_ENABLE_MASK (0x1 << GLB_ACK_IDLE_ENABLE_SHIFT)
+#define GLB_ACK_IDLE_EVENT_SHIFT (26)
+#define GLB_ACK_IDLE_EVENT_MASK (0x1 << GLB_REQ_IDLE_EVENT_SHIFT)
+
+#define GLB_ACK_IRQ_MASK_IDLE_EVENT_SHIFT (26)
+#define GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK (0x1 << GLB_ACK_IRQ_MASK_IDLE_EVENT_SHIFT)
+
+#define GLB_IDLE_TIMER (0x0080)
+/* GLB_IDLE_TIMER register */
+#define GLB_IDLE_TIMER_TIMEOUT_SHIFT (0)
+#define GLB_IDLE_TIMER_TIMEOUT_MASK ((0x7FFFFFFF) << GLB_IDLE_TIMER_TIMEOUT_SHIFT)
+#define GLB_IDLE_TIMER_TIMEOUT_GET(reg_val) (((reg_val)&GLB_IDLE_TIMER_TIMEOUT_MASK) >> GLB_IDLE_TIMER_TIMEOUT_SHIFT)
+#define GLB_IDLE_TIMER_TIMEOUT_SET(reg_val, value) \
+	(((reg_val) & ~GLB_IDLE_TIMER_TIMEOUT_MASK) |  \
+	 (((value) << GLB_IDLE_TIMER_TIMEOUT_SHIFT) & GLB_IDLE_TIMER_TIMEOUT_MASK))
+#define GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT (31)
+#define GLB_IDLE_TIMER_TIMER_SOURCE_MASK ((0x1) << GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT)
+#define GLB_IDLE_TIMER_TIMER_SOURCE_GET(reg_val) \
+	(((reg_val)&GLB_IDLE_TIMER_TIMER_SOURCE_MASK) >> GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT)
+#define GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val, value) \
+	(((reg_val) & ~GLB_IDLE_TIMER_TIMER_SOURCE_MASK) |  \
+	 (((value) << GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT) & GLB_IDLE_TIMER_TIMER_SOURCE_MASK))
+/* GLB_IDLE_TIMER_TIMER_SOURCE values */
+#define GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP 0x0
+#define GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER 0x1
+/* End of GLB_IDLE_TIMER_TIMER_SOURCE values */
+
+#define CSG_STATUS_STATE (0x0018) /* CSG state status register */
+/* CSG_STATUS_STATE register */
+#define CSG_STATUS_STATE_IDLE_SHIFT (0)
+#define CSG_STATUS_STATE_IDLE_MASK ((0x1) << CSG_STATUS_STATE_IDLE_SHIFT)
+#define CSG_STATUS_STATE_IDLE_GET(reg_val) \
+	(((reg_val)&CSG_STATUS_STATE_IDLE_MASK) >> CSG_STATUS_STATE_IDLE_SHIFT)
+#define CSG_STATUS_STATE_IDLE_SET(reg_val, value) \
+	(((reg_val) & ~CSG_STATUS_STATE_IDLE_MASK) |  \
+	(((value) << CSG_STATUS_STATE_IDLE_SHIFT) & CSG_STATUS_STATE_IDLE_MASK))
+
+#endif /* _UAPI_GPU_CSF_REGISTERS_H_ */
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h b/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h
similarity index 61%
rename from drivers/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h
rename to include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h
index e9bb8d299754..15fac5e69061 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h
+++ b/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,10 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-#ifndef _KBASE_CSF_IOCTL_H_
-#define _KBASE_CSF_IOCTL_H_
+#ifndef _UAPI_KBASE_CSF_IOCTL_H_
+#define _UAPI_KBASE_CSF_IOCTL_H_
 
 #include <asm-generic/ioctl.h>
 #include <linux/types.h>
@@ -29,10 +28,22 @@
 /*
  * 1.0:
  * - CSF IOCTL header separated from JM
+ * 1.1:
+ * - Add a new priority level BASE_QUEUE_GROUP_PRIORITY_REALTIME
+ * - Add ioctl 54: This controls the priority setting.
+ * 1.2:
+ * - Add new CSF GPU_FEATURES register into the property structure
+ *   returned by KBASE_IOCTL_GET_GPUPROPS
+ * 1.3:
+ * - Add __u32 group_uid member to
+ *   &struct_kbase_ioctl_cs_queue_group_create.out
+ * 1.4:
+ * - Replace padding in kbase_ioctl_cs_get_glb_iface with
+ *   instr_features member of same size
  */
 
 #define BASE_UK_VERSION_MAJOR 1
-#define BASE_UK_VERSION_MINOR 0
+#define BASE_UK_VERSION_MINOR 4
 
 /**
  * struct kbase_ioctl_version_check - Check version compatibility between
@@ -46,9 +57,6 @@ struct kbase_ioctl_version_check {
 	__u16 minor;
 };
 
-#define KBASE_IOCTL_VERSION_CHECK \
-	_IOWR(KBASE_IOCTL_TYPE, 52, struct kbase_ioctl_version_check)
-
 #define KBASE_IOCTL_VERSION_CHECK_RESERVED \
 	_IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check)
 
@@ -88,16 +96,14 @@ struct kbase_ioctl_cs_queue_kick {
 /**
  * union kbase_ioctl_cs_queue_bind - Bind a GPU command queue to a group
  *
- * @buffer_gpu_addr: GPU address of the buffer backing the queue
- * @group_handle: Handle of the group to which the queue should be bound
- * @csi_index: Index of the CSF interface the queue should be bound to
- * @padding: Currently unused, must be zero
- * @mmap_handle: Handle to be used for creating the mapping of command stream
- *               input/output pages
- *
- * @in: Input parameters
- * @out: Output parameters
- *
+ * @in:                 Input parameters
+ * @in.buffer_gpu_addr: GPU address of the buffer backing the queue
+ * @in.group_handle:    Handle of the group to which the queue should be bound
+ * @in.csi_index:       Index of the CSF interface the queue should be bound to
+ * @in.padding:         Currently unused, must be zero
+ * @out:                Output parameters
+ * @out.mmap_handle:    Handle to be used for creating the mapping of CS
+ *                      input/output pages
  */
 union kbase_ioctl_cs_queue_bind {
 	struct {
@@ -130,24 +136,23 @@ struct kbase_ioctl_cs_queue_terminate {
 
 /**
  * union kbase_ioctl_cs_queue_group_create - Create a GPU command queue group
- *
- * @tiler_mask:		Mask of tiler endpoints the group is allowed to use.
- * @fragment_mask:	Mask of fragment endpoints the group is allowed to use.
- * @compute_mask:	Mask of compute endpoints the group is allowed to use.
- * @cs_min:		Minimum number of command streams required.
- * @priority:		Queue group's priority within a process.
- * @tiler_max:		Maximum number of tiler endpoints the group is allowed
- *			to use.
- * @fragment_max:	Maximum number of fragment endpoints the group is
- *			allowed to use.
- * @compute_max:	Maximum number of compute endpoints the group is allowed
- *			to use.
- * @padding:		Currently unused, must be zero
- * @group_handle:	Handle of a newly created queue group.
- *
- * @in: Input parameters
- * @out: Output parameters
- *
+ * @in:               Input parameters
+ * @in.tiler_mask:    Mask of tiler endpoints the group is allowed to use.
+ * @in.fragment_mask: Mask of fragment endpoints the group is allowed to use.
+ * @in.compute_mask:  Mask of compute endpoints the group is allowed to use.
+ * @in.cs_min:        Minimum number of CSs required.
+ * @in.priority:      Queue group's priority within a process.
+ * @in.tiler_max:     Maximum number of tiler endpoints the group is allowed
+ *                    to use.
+ * @in.fragment_max:  Maximum number of fragment endpoints the group is
+ *                    allowed to use.
+ * @in.compute_max:   Maximum number of compute endpoints the group is allowed
+ *                    to use.
+ * @in.padding:       Currently unused, must be zero
+ * @out:              Output parameters
+ * @out.group_handle: Handle of a newly created queue group.
+ * @out.padding:      Currently unused, must be zero
+ * @out.group_uid:    UID of the queue group available to base.
  */
 union kbase_ioctl_cs_queue_group_create {
 	struct {
@@ -164,7 +169,8 @@ union kbase_ioctl_cs_queue_group_create {
 	} in;
 	struct {
 		__u8 group_handle;
-		__u8 padding[7];
+		__u8 padding[3];
+		__u32 group_uid;
 	} out;
 };
 
@@ -238,23 +244,21 @@ struct kbase_ioctl_kcpu_queue_enqueue {
 
 /**
  * union kbase_ioctl_cs_tiler_heap_init - Initialize chunked tiler memory heap
- *
- * @chunk_size: Size of each chunk.
- * @initial_chunks: Initial number of chunks that heap will be created with.
- * @max_chunks: Maximum number of chunks that the heap is allowed to use.
- * @target_in_flight: Number of render-passes that the driver should attempt to
- *                    keep in flight for which allocation of new chunks is
- *                    allowed.
- * @group_id: Group ID to be used for physical allocations.
- * @gpu_heap_va: GPU VA (virtual address) of Heap context that was set up for
- *               the heap.
- * @first_chunk_va: GPU VA of the first chunk allocated for the heap, actually
- *                  points to the header of heap chunk and not to the low
- *                  address of free memory in the chunk.
- *
- * @in: Input parameters
- * @out: Output parameters
- *
+ * @in:                Input parameters
+ * @in.chunk_size:     Size of each chunk.
+ * @in.initial_chunks: Initial number of chunks that heap will be created with.
+ * @in.max_chunks:     Maximum number of chunks that the heap is allowed to use.
+ * @in.target_in_flight: Number of render-passes that the driver should attempt to
+ *                     keep in flight for which allocation of new chunks is
+ *                     allowed.
+ * @in.group_id:       Group ID to be used for physical allocations.
+ * @in.padding:        Padding
+ * @out:               Output parameters
+ * @out.gpu_heap_va:   GPU VA (virtual address) of Heap context that was set up
+ *                     for the heap.
+ * @out.first_chunk_va: GPU VA of the first chunk allocated for the heap,
+ *                     actually points to the header of heap chunk and not to
+ *                     the low address of free memory in the chunk.
  */
 union kbase_ioctl_cs_tiler_heap_init {
 	struct {
@@ -291,28 +295,25 @@ struct kbase_ioctl_cs_tiler_heap_term {
  * union kbase_ioctl_cs_get_glb_iface - Request the global control block
  *                                        of CSF interface capabilities
  *
- * @max_group_num:        The maximum number of groups to be read. Can be 0, in
- *                        which case groups_ptr is unused.
- * @max_total_stream_num: The maximum number of streams to be read. Can be 0, in
- *                        which case streams_ptr is unused.
- * @groups_ptr:       Pointer where to store all the group data (sequentially).
- * @streams_ptr:      Pointer where to store all the stream data (sequentially).
- * @glb_version:      Global interface version. Bits 31:16 hold the major
- *                    version number and 15:0 hold the minor version number.
- *                    A higher minor version is backwards-compatible with a
- *                    lower minor version for the same major version.
- * @features:         Bit mask of features (e.g. whether certain types of job
- *                    can be suspended).
- * @group_num:        Number of command stream groups supported.
- * @prfcnt_size:      Size of CSF performance counters, in bytes. Bits 31:16
- *                    hold the size of firmware performance counter data
- *                    and 15:0 hold the size of hardware performance counter
- *                    data.
- * @total_stream_num: Total number of command streams, summed across all groups.
- * @padding:          Will be zeroed.
- *
- * @in: Input parameters
- * @out: Output parameters
+ * @in:                    Input parameters
+ * @in.max_group_num:      The maximum number of groups to be read. Can be 0, in
+ *                         which case groups_ptr is unused.
+ * @in.max_total_stream    _num: The maximum number of CSs to be read. Can be 0, in
+ *                         which case streams_ptr is unused.
+ * @in.groups_ptr:         Pointer where to store all the group data (sequentially).
+ * @in.streams_ptr:        Pointer where to store all the CS data (sequentially).
+ * @out:                   Output parameters
+ * @out.glb_version:       Global interface version.
+ * @out.features:          Bit mask of features (e.g. whether certain types of job
+ *                         can be suspended).
+ * @out.group_num:         Number of CSGs supported.
+ * @out.prfcnt_size:       Size of CSF performance counters, in bytes. Bits 31:16
+ *                         hold the size of firmware performance counter data
+ *                         and 15:0 hold the size of hardware performance counter
+ *                         data.
+ * @out.total_stream_num:  Total number of CSs, summed across all groups.
+ * @out.instr_features:    Instrumentation features. Bits 7:4 hold the maximum
+ *                         size of events. Bits 3:0 hold the offset update rate.
  *
  */
 union kbase_ioctl_cs_get_glb_iface {
@@ -328,13 +329,24 @@ union kbase_ioctl_cs_get_glb_iface {
 		__u32 group_num;
 		__u32 prfcnt_size;
 		__u32 total_stream_num;
-		__u32 padding;
+		__u32 instr_features;
 	} out;
 };
 
 #define KBASE_IOCTL_CS_GET_GLB_IFACE \
 	_IOWR(KBASE_IOCTL_TYPE, 51, union kbase_ioctl_cs_get_glb_iface)
 
+struct kbase_ioctl_cs_cpu_queue_info {
+	__u64 buffer;
+	__u64 size;
+};
+
+#define KBASE_IOCTL_VERSION_CHECK \
+	_IOWR(KBASE_IOCTL_TYPE, 52, struct kbase_ioctl_version_check)
+
+#define KBASE_IOCTL_CS_CPU_QUEUE_DUMP \
+	_IOW(KBASE_IOCTL_TYPE, 53, struct kbase_ioctl_cs_cpu_queue_info)
+
 /***************
  * test ioctls *
  ***************/
@@ -357,12 +369,11 @@ struct kbase_ioctl_cs_event_memory_write {
 
 /**
  * union kbase_ioctl_cs_event_memory_read - Read an event memory address
- * @cpu_addr: Memory address to read
- * @value: Value read
- * @padding: Currently unused, must be zero
- *
  * @in: Input parameters
+ * @in.cpu_addr: Memory address to read
  * @out: Output parameters
+ * @out.value: Value read
+ * @out.padding: Currently unused, must be zero
  */
 union kbase_ioctl_cs_event_memory_read {
 	struct {
@@ -376,4 +387,4 @@ union kbase_ioctl_cs_event_memory_read {
 
 #endif /* MALI_UNIT_TEST */
 
-#endif /* _KBASE_CSF_IOCTL_H_ */
+#endif /* _UAPI_KBASE_CSF_IOCTL_H_ */
diff --git a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_csf.h b/include/uapi/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_csf.h
similarity index 78%
rename from drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_csf.h
rename to include/uapi/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_csf.h
index ff6e4ae47184..20417395750f 100644
--- a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_csf.h
+++ b/include/uapi/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_csf.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,20 +17,53 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-#ifndef _KBASE_GPU_REGMAP_CSF_H_
-#define _KBASE_GPU_REGMAP_CSF_H_
+#ifndef _UAPI_KBASE_GPU_REGMAP_CSF_H_
+#define _UAPI_KBASE_GPU_REGMAP_CSF_H_
 
-#if !MALI_USE_CSF
+#include <linux/types.h>
+
+#if !MALI_USE_CSF && defined(__KERNEL__)
 #error "Cannot be compiled with JM"
 #endif
 
-#include "csf/mali_gpu_csf_control_registers.h"
-#define GPU_CONTROL_MCU_REG(r)  (GPU_CONTROL_MCU + (r))
+/* IPA control registers */
 
+#define IPA_CONTROL_BASE       0x40000
+#define IPA_CONTROL_REG(r)     (IPA_CONTROL_BASE+(r))
+#define COMMAND                0x000 /* (WO) Command register */
+#define STATUS                 0x004 /* (RO) Status register */
+#define TIMER                  0x008 /* (RW) Timer control register */
+
+#define SELECT_CSHW_LO         0x010 /* (RW) Counter select for CS hardware, low word */
+#define SELECT_CSHW_HI         0x014 /* (RW) Counter select for CS hardware, high word */
+#define SELECT_MEMSYS_LO       0x018 /* (RW) Counter select for Memory system, low word */
+#define SELECT_MEMSYS_HI       0x01C /* (RW) Counter select for Memory system, high word */
+#define SELECT_TILER_LO        0x020 /* (RW) Counter select for Tiler cores, low word */
+#define SELECT_TILER_HI        0x024 /* (RW) Counter select for Tiler cores, high word */
+#define SELECT_SHADER_LO       0x028 /* (RW) Counter select for Shader cores, low word */
+#define SELECT_SHADER_HI       0x02C /* (RW) Counter select for Shader cores, high word */
+
+/* Accumulated counter values for CS hardware */
+#define VALUE_CSHW_BASE        0x100
+#define VALUE_CSHW_REG_LO(n)   (VALUE_CSHW_BASE + ((n) << 3))       /* (RO) Counter value #n, low word */
+#define VALUE_CSHW_REG_HI(n)   (VALUE_CSHW_BASE + ((n) << 3) + 4)   /* (RO) Counter value #n, high word */
+
+/* Accumulated counter values for memory system */
+#define VALUE_MEMSYS_BASE      0x140
+#define VALUE_MEMSYS_REG_LO(n) (VALUE_MEMSYS_BASE + ((n) << 3))     /* (RO) Counter value #n, low word */
+#define VALUE_MEMSYS_REG_HI(n) (VALUE_MEMSYS_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */
+
+#define VALUE_TILER_BASE       0x180
+#define VALUE_TILER_REG_LO(n)  (VALUE_TILER_BASE + ((n) << 3))      /* (RO) Counter value #n, low word */
+#define VALUE_TILER_REG_HI(n)  (VALUE_TILER_BASE + ((n) << 3) + 4)  /* (RO) Counter value #n, high word */
+
+#define VALUE_SHADER_BASE      0x1C0
+#define VALUE_SHADER_REG_LO(n) (VALUE_SHADER_BASE + ((n) << 3))     /* (RO) Counter value #n, low word */
+#define VALUE_SHADER_REG_HI(n) (VALUE_SHADER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */
+
+#include "../../csf/mali_gpu_csf_control_registers.h"
 
 /* Set to implementation defined, outer caching */
 #define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull
@@ -68,13 +102,14 @@
 /* Normal memory, shared between MCU and Host */
 #define AS_MEMATTR_INDEX_SHARED                6
 
-/* Configuration bits for the Command Stream Frontend. */
+/* Configuration bits for the CSF. */
 #define CSF_CONFIG 0xF00
 
 /* CSF_CONFIG register */
 #define CSF_CONFIG_FORCE_COHERENCY_FEATURES_SHIFT 2
 
 /* GPU control registers */
+#define CORE_FEATURES           0x008   /* () Shader Core Features */
 #define MCU_CONTROL             0x700
 #define MCU_STATUS              0x704
 
@@ -95,7 +130,7 @@
 				 */
 
 #define PRFCNT_CSHW_EN   0x06C  /* (RW) Performance counter
-				 * enable for Command Stream Hardware
+				 * enable for CS Hardware
 				 */
 
 #define PRFCNT_SHADER_EN 0x070  /* (RW) Performance counter enable
@@ -128,7 +163,7 @@
  */
 #define GPU_COMMAND_RESET_PAYLOAD_FAST_RESET 0x00
 
-/* This will leave the state of active command streams UNDEFINED, but will leave the external bus in a defined and
+/* This will leave the state of active CSs UNDEFINED, but will leave the external bus in a defined and
  * idle state.
  */
 #define GPU_COMMAND_RESET_PAYLOAD_SOFT_RESET 0x01
@@ -154,7 +189,7 @@
 
 /* GPU_COMMAND command + payload */
 #define GPU_COMMAND_CODE_PAYLOAD(opcode, payload) \
-	((u32)opcode | ((u32)payload << 8))
+	((__u32)opcode | ((__u32)payload << 8))
 
 /* Final GPU_COMMAND form */
 /* No operation, nothing happens */
@@ -245,9 +280,12 @@
 #define GPU_FAULTSTATUS_ACCESS_TYPE_WRITE 0x3
 /* End of GPU_FAULTSTATUS_ACCESS_TYPE values */
 
-/* TODO: Remove once 10.x.6 headers became available */
-#define GPU_EXCEPTION_TYPE_SW_FAULT_0 ((u8)0x70)
-#define GPU_EXCEPTION_TYPE_SW_FAULT_1 ((u8)0x71)
+/* Implementation-dependent exception codes used to indicate CSG
+ * and CS errors that are not specified in the specs.
+ */
+#define GPU_EXCEPTION_TYPE_SW_FAULT_0 ((__u8)0x70)
+#define GPU_EXCEPTION_TYPE_SW_FAULT_1 ((__u8)0x71)
+#define GPU_EXCEPTION_TYPE_SW_FAULT_2 ((__u8)0x72)
 
 /* GPU_FAULTSTATUS_EXCEPTION_TYPE values */
 #define GPU_FAULTSTATUS_EXCEPTION_TYPE_OK 0x00
@@ -294,4 +332,4 @@
 /* GPU_CONTROL_MCU.GPU_IRQ_RAWSTAT */
 #define PRFCNT_SAMPLE_COMPLETED (1 << 16)   /* Set when performance count sample has completed */
 
-#endif /* _KBASE_GPU_REGMAP_CSF_H_ */
+#endif /* _UAPI_KBASE_GPU_REGMAP_CSF_H_ */
diff --git a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h b/include/uapi/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h
similarity index 96%
rename from drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h
rename to include/uapi/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h
index c9c2fbd49058..1be3541c930f 100644
--- a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h
+++ b/include/uapi/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,14 +17,12 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-#ifndef _KBASE_GPU_REGMAP_JM_H_
-#define _KBASE_GPU_REGMAP_JM_H_
+#ifndef _UAPI_KBASE_GPU_REGMAP_JM_H_
+#define _UAPI_KBASE_GPU_REGMAP_JM_H_
 
-#if MALI_USE_CSF
+#if MALI_USE_CSF && defined(__KERNEL__)
 #error "Cannot be compiled with CSF"
 #endif
 
@@ -139,8 +138,8 @@
 #define JS_AFFINITY_LO         0x10	/* (RO) Core affinity mask for job slot n, low word */
 #define JS_AFFINITY_HI         0x14	/* (RO) Core affinity mask for job slot n, high word */
 #define JS_CONFIG              0x18	/* (RO) Configuration settings for job slot n */
-#define JS_XAFFINITY           0x1C	/* (RO) Extended affinity mask for job
-					   slot n */
+/* (RO) Extended affinity mask for job slot n*/
+#define JS_XAFFINITY           0x1C
 
 #define JS_COMMAND             0x20	/* (WO) Command register for job slot n */
 #define JS_STATUS              0x24	/* (RO) Status register for job slot n */
@@ -151,8 +150,8 @@
 #define JS_AFFINITY_NEXT_LO    0x50	/* (RW) Next core affinity mask for job slot n, low word */
 #define JS_AFFINITY_NEXT_HI    0x54	/* (RW) Next core affinity mask for job slot n, high word */
 #define JS_CONFIG_NEXT         0x58	/* (RW) Next configuration settings for job slot n */
-#define JS_XAFFINITY_NEXT      0x5C	/* (RW) Next extended affinity mask for
-					   job slot n */
+/* (RW) Next extended affinity mask for job slot n */
+#define JS_XAFFINITY_NEXT      0x5C
 
 #define JS_COMMAND_NEXT        0x60	/* (RW) Next command register for job slot n */
 
@@ -285,4 +284,4 @@
 #define GPU_IRQ_REG_COMMON (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \
 		| POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED)
 
-#endif /* _KBASE_GPU_REGMAP_JM_H_ */
+#endif /* _UAPI_KBASE_GPU_REGMAP_JM_H_ */
diff --git a/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_coherency.h b/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_coherency.h
similarity index 75%
rename from drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_coherency.h
rename to include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_coherency.h
index bb2b1613aa47..83d84137a034 100644
--- a/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_coherency.h
+++ b/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_coherency.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,16 +17,14 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-#ifndef _KBASE_GPU_COHERENCY_H_
-#define _KBASE_GPU_COHERENCY_H_
+#ifndef _UAPI_KBASE_GPU_COHERENCY_H_
+#define _UAPI_KBASE_GPU_COHERENCY_H_
 
 #define COHERENCY_ACE_LITE 0
 #define COHERENCY_ACE      1
 #define COHERENCY_NONE     31
 #define COHERENCY_FEATURE_BIT(x) (1 << (x))
 
-#endif /* _KBASE_GPU_COHERENCY_H_ */
+#endif /* _UAPI_KBASE_GPU_COHERENCY_H_ */
diff --git a/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h b/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h
similarity index 72%
rename from drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h
rename to include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h
index 31d55264c67f..2f0c065df161 100644
--- a/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h
+++ b/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,12 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-#ifndef _KBASE_GPU_ID_H_
-#define _KBASE_GPU_ID_H_
+#ifndef _UAPI_KBASE_GPU_ID_H_
+#define _UAPI_KBASE_GPU_ID_H_
+
+#include <linux/types.h>
 
 /* GPU_ID register */
 #define GPU_ID_VERSION_STATUS_SHIFT       0
@@ -53,19 +54,21 @@
 								GPU_ID2_VERSION_STATUS)
 
 /* Helper macro to create a partial GPU_ID (new format) that defines
-   a product ignoring its version. */
+ * a product ignoring its version.
+ */
 #define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \
-		((((u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
-		 (((u32)arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT)  | \
-		 (((u32)arch_rev) << GPU_ID2_ARCH_REV_SHIFT)      | \
-		 (((u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+		((((__u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+		 (((__u32)arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT)  | \
+		 (((__u32)arch_rev) << GPU_ID2_ARCH_REV_SHIFT)      | \
+		 (((__u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
 
 /* Helper macro to create a partial GPU_ID (new format) that specifies the
-   revision (major, minor, status) of a product */
+ * revision (major, minor, status) of a product
+ */
 #define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \
-		((((u32)version_major) << GPU_ID2_VERSION_MAJOR_SHIFT)  | \
-		 (((u32)version_minor) << GPU_ID2_VERSION_MINOR_SHIFT)  | \
-		 (((u32)version_status) << GPU_ID2_VERSION_STATUS_SHIFT))
+		((((__u32)version_major) << GPU_ID2_VERSION_MAJOR_SHIFT)  | \
+		 (((__u32)version_minor) << GPU_ID2_VERSION_MINOR_SHIFT)  | \
+		 (((__u32)version_status) << GPU_ID2_VERSION_STATUS_SHIFT))
 
 /* Helper macro to create a complete GPU_ID (new format) */
 #define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \
@@ -76,16 +79,18 @@
 			version_status))
 
 /* Helper macro to create a partial GPU_ID (new format) that identifies
-   a particular GPU model by its arch_major and product_major. */
+ * a particular GPU model by its arch_major and product_major.
+ */
 #define GPU_ID2_MODEL_MAKE(arch_major, product_major) \
-		((((u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
-		(((u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+		((((__u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+		(((__u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
 
 /* Strip off the non-relevant bits from a product_id value and make it suitable
-   for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU
-   model. */
+ * for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU
+ * model.
+ */
 #define GPU_ID2_MODEL_MATCH_VALUE(product_id) \
-		((((u32)product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \
+		((((__u32)product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \
 		    GPU_ID2_PRODUCT_MODEL)
 
 #define GPU_ID2_PRODUCT_TMIX              GPU_ID2_MODEL_MAKE(6, 0)
@@ -101,19 +106,15 @@
 #define GPU_ID2_PRODUCT_TBAX              GPU_ID2_MODEL_MAKE(9, 5)
 #define GPU_ID2_PRODUCT_TDUX              GPU_ID2_MODEL_MAKE(10, 1)
 #define GPU_ID2_PRODUCT_TODX              GPU_ID2_MODEL_MAKE(10, 2)
-#define GPU_ID2_PRODUCT_TGRX              GPU_ID2_MODEL_MAKE(10, 3)
-#define GPU_ID2_PRODUCT_TVAX              GPU_ID2_MODEL_MAKE(10, 4)
 #define GPU_ID2_PRODUCT_LODX              GPU_ID2_MODEL_MAKE(10, 7)
-#define GPU_ID2_PRODUCT_TTUX              GPU_ID2_MODEL_MAKE(11, 2)
-#define GPU_ID2_PRODUCT_LTUX              GPU_ID2_MODEL_MAKE(11, 3)
-#define GPU_ID2_PRODUCT_TE2X              GPU_ID2_MODEL_MAKE(11, 1)
 
 /* Helper macro to create a GPU_ID assuming valid values for id, major,
-   minor, status */
+ * minor, status
+ */
 #define GPU_ID_MAKE(id, major, minor, status) \
-		((((u32)id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \
-		(((u32)major) << GPU_ID_VERSION_MAJOR_SHIFT) |   \
-		(((u32)minor) << GPU_ID_VERSION_MINOR_SHIFT) |   \
-		(((u32)status) << GPU_ID_VERSION_STATUS_SHIFT))
+		((((__u32)id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \
+		(((__u32)major) << GPU_ID_VERSION_MAJOR_SHIFT) |   \
+		(((__u32)minor) << GPU_ID_VERSION_MINOR_SHIFT) |   \
+		(((__u32)status) << GPU_ID_VERSION_STATUS_SHIFT))
 
-#endif /* _KBASE_GPU_ID_H_ */
+#endif /* _UAPI_KBASE_GPU_ID_H_ */
diff --git a/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h b/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h
new file mode 100644
index 000000000000..be9c38984d3b
--- /dev/null
+++ b/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h
@@ -0,0 +1,424 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _UAPI_KBASE_GPU_REGMAP_H_
+#define _UAPI_KBASE_GPU_REGMAP_H_
+
+#include "mali_kbase_gpu_coherency.h"
+#include "mali_kbase_gpu_id.h"
+#if MALI_USE_CSF
+#include "backend/mali_kbase_gpu_regmap_csf.h"
+#else
+#include "backend/mali_kbase_gpu_regmap_jm.h"
+#endif
+
+/* Begin Register Offsets */
+/* GPU control registers */
+
+#define GPU_CONTROL_BASE        0x0000
+#define GPU_CONTROL_REG(r)      (GPU_CONTROL_BASE + (r))
+#define GPU_ID                  0x000   /* (RO) GPU and revision identifier */
+#define L2_FEATURES             0x004   /* (RO) Level 2 cache features */
+#define TILER_FEATURES          0x00C   /* (RO) Tiler Features */
+#define MEM_FEATURES            0x010   /* (RO) Memory system features */
+#define MMU_FEATURES            0x014   /* (RO) MMU features */
+#define AS_PRESENT              0x018   /* (RO) Address space slots present */
+#define GPU_IRQ_RAWSTAT         0x020   /* (RW) */
+#define GPU_IRQ_CLEAR           0x024   /* (WO) */
+#define GPU_IRQ_MASK            0x028   /* (RW) */
+#define GPU_IRQ_STATUS          0x02C   /* (RO) */
+
+#define GPU_COMMAND             0x030   /* (WO) */
+#define GPU_STATUS              0x034   /* (RO) */
+
+#define GPU_DBGEN               (1 << 8)    /* DBGEN wire status */
+
+#define GPU_FAULTSTATUS         0x03C   /* (RO) GPU exception type and fault status */
+#define GPU_FAULTADDRESS_LO     0x040   /* (RO) GPU exception fault address, low word */
+#define GPU_FAULTADDRESS_HI     0x044   /* (RO) GPU exception fault address, high word */
+
+#define L2_CONFIG               0x048   /* (RW) Level 2 cache configuration */
+
+#define GROUPS_L2_COHERENT      (1 << 0) /* Cores groups are l2 coherent */
+#define SUPER_L2_COHERENT       (1 << 1) /* Shader cores within a core
+					  * supergroup are l2 coherent
+					  */
+
+#define PWR_KEY                 0x050   /* (WO) Power manager key register */
+#define PWR_OVERRIDE0           0x054   /* (RW) Power manager override settings */
+#define PWR_OVERRIDE1           0x058   /* (RW) Power manager override settings */
+#define GPU_FEATURES_LO         0x060   /* (RO) GPU features, low word */
+#define GPU_FEATURES_HI         0x064   /* (RO) GPU features, high word */
+#define CYCLE_COUNT_LO          0x090   /* (RO) Cycle counter, low word */
+#define CYCLE_COUNT_HI          0x094   /* (RO) Cycle counter, high word */
+#define TIMESTAMP_LO            0x098   /* (RO) Global time stamp counter, low word */
+#define TIMESTAMP_HI            0x09C   /* (RO) Global time stamp counter, high word */
+
+#define THREAD_MAX_THREADS      0x0A0   /* (RO) Maximum number of threads per core */
+#define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */
+#define THREAD_MAX_BARRIER_SIZE 0x0A8   /* (RO) Maximum threads waiting at a barrier */
+#define THREAD_FEATURES         0x0AC   /* (RO) Thread features */
+#define THREAD_TLS_ALLOC        0x310   /* (RO) Number of threads per core that TLS must be allocated for */
+
+#define TEXTURE_FEATURES_0      0x0B0   /* (RO) Support flags for indexed texture formats 0..31 */
+#define TEXTURE_FEATURES_1      0x0B4   /* (RO) Support flags for indexed texture formats 32..63 */
+#define TEXTURE_FEATURES_2      0x0B8   /* (RO) Support flags for indexed texture formats 64..95 */
+#define TEXTURE_FEATURES_3      0x0BC   /* (RO) Support flags for texture order */
+
+#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2))
+
+#define SHADER_PRESENT_LO       0x100   /* (RO) Shader core present bitmap, low word */
+#define SHADER_PRESENT_HI       0x104   /* (RO) Shader core present bitmap, high word */
+
+#define TILER_PRESENT_LO        0x110   /* (RO) Tiler core present bitmap, low word */
+#define TILER_PRESENT_HI        0x114   /* (RO) Tiler core present bitmap, high word */
+
+#define L2_PRESENT_LO           0x120   /* (RO) Level 2 cache present bitmap, low word */
+#define L2_PRESENT_HI           0x124   /* (RO) Level 2 cache present bitmap, high word */
+
+#define STACK_PRESENT_LO        0xE00   /* (RO) Core stack present bitmap, low word */
+#define STACK_PRESENT_HI        0xE04   /* (RO) Core stack present bitmap, high word */
+
+#define SHADER_READY_LO         0x140   /* (RO) Shader core ready bitmap, low word */
+#define SHADER_READY_HI         0x144   /* (RO) Shader core ready bitmap, high word */
+
+#define TILER_READY_LO          0x150   /* (RO) Tiler core ready bitmap, low word */
+#define TILER_READY_HI          0x154   /* (RO) Tiler core ready bitmap, high word */
+
+#define L2_READY_LO             0x160   /* (RO) Level 2 cache ready bitmap, low word */
+#define L2_READY_HI             0x164   /* (RO) Level 2 cache ready bitmap, high word */
+
+#define STACK_READY_LO          0xE10   /* (RO) Core stack ready bitmap, low word */
+#define STACK_READY_HI          0xE14   /* (RO) Core stack ready bitmap, high word */
+
+#define SHADER_PWRON_LO         0x180   /* (WO) Shader core power on bitmap, low word */
+#define SHADER_PWRON_HI         0x184   /* (WO) Shader core power on bitmap, high word */
+
+#define TILER_PWRON_LO          0x190   /* (WO) Tiler core power on bitmap, low word */
+#define TILER_PWRON_HI          0x194   /* (WO) Tiler core power on bitmap, high word */
+
+#define L2_PWRON_LO             0x1A0   /* (WO) Level 2 cache power on bitmap, low word */
+#define L2_PWRON_HI             0x1A4   /* (WO) Level 2 cache power on bitmap, high word */
+
+#define STACK_PWRON_LO          0xE20   /* (RO) Core stack power on bitmap, low word */
+#define STACK_PWRON_HI          0xE24   /* (RO) Core stack power on bitmap, high word */
+
+#define SHADER_PWROFF_LO        0x1C0   /* (WO) Shader core power off bitmap, low word */
+#define SHADER_PWROFF_HI        0x1C4   /* (WO) Shader core power off bitmap, high word */
+
+#define TILER_PWROFF_LO         0x1D0   /* (WO) Tiler core power off bitmap, low word */
+#define TILER_PWROFF_HI         0x1D4   /* (WO) Tiler core power off bitmap, high word */
+
+#define L2_PWROFF_LO            0x1E0   /* (WO) Level 2 cache power off bitmap, low word */
+#define L2_PWROFF_HI            0x1E4   /* (WO) Level 2 cache power off bitmap, high word */
+
+#define STACK_PWROFF_LO         0xE30   /* (RO) Core stack power off bitmap, low word */
+#define STACK_PWROFF_HI         0xE34   /* (RO) Core stack power off bitmap, high word */
+
+#define SHADER_PWRTRANS_LO      0x200   /* (RO) Shader core power transition bitmap, low word */
+#define SHADER_PWRTRANS_HI      0x204   /* (RO) Shader core power transition bitmap, high word */
+
+#define TILER_PWRTRANS_LO       0x210   /* (RO) Tiler core power transition bitmap, low word */
+#define TILER_PWRTRANS_HI       0x214   /* (RO) Tiler core power transition bitmap, high word */
+
+#define L2_PWRTRANS_LO          0x220   /* (RO) Level 2 cache power transition bitmap, low word */
+#define L2_PWRTRANS_HI          0x224   /* (RO) Level 2 cache power transition bitmap, high word */
+
+#define ASN_HASH_0              0x02C0 /* (RW) ASN hash function argument 0 */
+#define ASN_HASH(n)             (ASN_HASH_0 + (n)*4)
+#define ASN_HASH_COUNT          3
+
+#define STACK_PWRTRANS_LO       0xE40   /* (RO) Core stack power transition bitmap, low word */
+#define STACK_PWRTRANS_HI       0xE44   /* (RO) Core stack power transition bitmap, high word */
+
+#define SHADER_PWRACTIVE_LO     0x240   /* (RO) Shader core active bitmap, low word */
+#define SHADER_PWRACTIVE_HI     0x244   /* (RO) Shader core active bitmap, high word */
+
+#define TILER_PWRACTIVE_LO      0x250   /* (RO) Tiler core active bitmap, low word */
+#define TILER_PWRACTIVE_HI      0x254   /* (RO) Tiler core active bitmap, high word */
+
+#define L2_PWRACTIVE_LO         0x260   /* (RO) Level 2 cache active bitmap, low word */
+#define L2_PWRACTIVE_HI         0x264   /* (RO) Level 2 cache active bitmap, high word */
+
+#define COHERENCY_FEATURES      0x300   /* (RO) Coherency features present */
+#define COHERENCY_ENABLE        0x304   /* (RW) Coherency enable */
+
+#define SHADER_CONFIG           0xF04   /* (RW) Shader core configuration (implementation-specific) */
+#define TILER_CONFIG            0xF08   /* (RW) Tiler core configuration (implementation-specific) */
+#define L2_MMU_CONFIG           0xF0C   /* (RW) L2 cache and MMU configuration (implementation-specific) */
+
+/* Job control registers */
+
+#define JOB_CONTROL_BASE        0x1000
+
+#define JOB_CONTROL_REG(r)      (JOB_CONTROL_BASE + (r))
+
+#define JOB_IRQ_RAWSTAT         0x000   /* Raw interrupt status register */
+#define JOB_IRQ_CLEAR           0x004   /* Interrupt clear register */
+#define JOB_IRQ_MASK            0x008   /* Interrupt mask register */
+#define JOB_IRQ_STATUS          0x00C   /* Interrupt status register */
+
+/* MMU control registers */
+
+#define MEMORY_MANAGEMENT_BASE  0x2000
+#define MMU_REG(r)              (MEMORY_MANAGEMENT_BASE + (r))
+
+#define MMU_IRQ_RAWSTAT         0x000   /* (RW) Raw interrupt status register */
+#define MMU_IRQ_CLEAR           0x004   /* (WO) Interrupt clear register */
+#define MMU_IRQ_MASK            0x008   /* (RW) Interrupt mask register */
+#define MMU_IRQ_STATUS          0x00C   /* (RO) Interrupt status register */
+
+#define MMU_AS0                 0x400   /* Configuration registers for address space 0 */
+#define MMU_AS1                 0x440   /* Configuration registers for address space 1 */
+#define MMU_AS2                 0x480   /* Configuration registers for address space 2 */
+#define MMU_AS3                 0x4C0   /* Configuration registers for address space 3 */
+#define MMU_AS4                 0x500   /* Configuration registers for address space 4 */
+#define MMU_AS5                 0x540   /* Configuration registers for address space 5 */
+#define MMU_AS6                 0x580   /* Configuration registers for address space 6 */
+#define MMU_AS7                 0x5C0   /* Configuration registers for address space 7 */
+#define MMU_AS8                 0x600   /* Configuration registers for address space 8 */
+#define MMU_AS9                 0x640   /* Configuration registers for address space 9 */
+#define MMU_AS10                0x680   /* Configuration registers for address space 10 */
+#define MMU_AS11                0x6C0   /* Configuration registers for address space 11 */
+#define MMU_AS12                0x700   /* Configuration registers for address space 12 */
+#define MMU_AS13                0x740   /* Configuration registers for address space 13 */
+#define MMU_AS14                0x780   /* Configuration registers for address space 14 */
+#define MMU_AS15                0x7C0   /* Configuration registers for address space 15 */
+
+/* MMU address space control registers */
+
+#define MMU_AS_REG(n, r)        (MMU_REG(MMU_AS0 + ((n) << 6)) + (r))
+
+#define AS_TRANSTAB_LO         0x00	/* (RW) Translation Table Base Address for address space n, low word */
+#define AS_TRANSTAB_HI         0x04	/* (RW) Translation Table Base Address for address space n, high word */
+#define AS_MEMATTR_LO          0x08	/* (RW) Memory attributes for address space n, low word. */
+#define AS_MEMATTR_HI          0x0C	/* (RW) Memory attributes for address space n, high word. */
+#define AS_LOCKADDR_LO         0x10	/* (RW) Lock region address for address space n, low word */
+#define AS_LOCKADDR_HI         0x14	/* (RW) Lock region address for address space n, high word */
+#define AS_COMMAND             0x18	/* (WO) MMU command register for address space n */
+#define AS_FAULTSTATUS         0x1C	/* (RO) MMU fault status register for address space n */
+#define AS_FAULTADDRESS_LO     0x20	/* (RO) Fault Address for address space n, low word */
+#define AS_FAULTADDRESS_HI     0x24	/* (RO) Fault Address for address space n, high word */
+#define AS_STATUS              0x28	/* (RO) Status flags for address space n */
+
+/* (RW) Translation table configuration for address space n, low word */
+#define AS_TRANSCFG_LO         0x30
+/* (RW) Translation table configuration for address space n, high word */
+#define AS_TRANSCFG_HI         0x34
+/* (RO) Secondary fault address for address space n, low word */
+#define AS_FAULTEXTRA_LO       0x38
+/* (RO) Secondary fault address for address space n, high word */
+#define AS_FAULTEXTRA_HI       0x3C
+
+/* End Register Offsets */
+
+#define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON)
+
+/*
+ * MMU_IRQ_RAWSTAT register values. Values are valid also for
+ * MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers.
+ */
+
+#define MMU_PAGE_FAULT_FLAGS    16
+
+/* Macros returning a bitmask to retrieve page fault or bus error flags from
+ * MMU registers
+ */
+#define MMU_PAGE_FAULT(n)       (1UL << (n))
+#define MMU_BUS_ERROR(n)        (1UL << ((n) + MMU_PAGE_FAULT_FLAGS))
+
+/*
+ * Begin AARCH64 MMU TRANSTAB register values
+ */
+#define MMU_HW_OUTA_BITS 40
+#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4))
+
+/*
+ * Begin MMU STATUS register values
+ */
+#define AS_STATUS_AS_ACTIVE 0x01
+
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK                      (0x7<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT         (0x0<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT          (0x1<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT        (0x2<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG               (0x3<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT        (0x4<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT   (0x5<<3)
+
+#define AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0
+#define AS_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFF << AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT)
+#define AS_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \
+	(((reg_val)&AS_FAULTSTATUS_EXCEPTION_TYPE_MASK) >> AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT)
+#define AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_0 0xC0
+
+#define AS_FAULTSTATUS_ACCESS_TYPE_SHIFT 8
+#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3 << AS_FAULTSTATUS_ACCESS_TYPE_SHIFT)
+#define AS_FAULTSTATUS_ACCESS_TYPE_GET(reg_val) \
+	(((reg_val)&AS_FAULTSTATUS_ACCESS_TYPE_MASK) >> AS_FAULTSTATUS_ACCESS_TYPE_SHIFT)
+
+#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC       (0x0)
+#define AS_FAULTSTATUS_ACCESS_TYPE_EX           (0x1)
+#define AS_FAULTSTATUS_ACCESS_TYPE_READ         (0x2)
+#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE        (0x3)
+
+#define AS_FAULTSTATUS_SOURCE_ID_SHIFT 16
+#define AS_FAULTSTATUS_SOURCE_ID_MASK (0xFFFF << AS_FAULTSTATUS_SOURCE_ID_SHIFT)
+#define AS_FAULTSTATUS_SOURCE_ID_GET(reg_val) \
+	(((reg_val)&AS_FAULTSTATUS_SOURCE_ID_MASK) >> AS_FAULTSTATUS_SOURCE_ID_SHIFT)
+
+/*
+ * Begin MMU TRANSCFG register values
+ */
+#define AS_TRANSCFG_ADRMODE_LEGACY      0
+#define AS_TRANSCFG_ADRMODE_UNMAPPED    1
+#define AS_TRANSCFG_ADRMODE_IDENTITY    2
+#define AS_TRANSCFG_ADRMODE_AARCH64_4K  6
+#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8
+
+#define AS_TRANSCFG_ADRMODE_MASK        0xF
+
+/*
+ * Begin TRANSCFG register values
+ */
+#define AS_TRANSCFG_PTW_MEMATTR_MASK (3ull << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1ull << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2ull << 24)
+
+#define AS_TRANSCFG_PTW_SH_MASK ((3ull << 28))
+#define AS_TRANSCFG_PTW_SH_OS (2ull << 28)
+#define AS_TRANSCFG_PTW_SH_IS (3ull << 28)
+#define AS_TRANSCFG_R_ALLOCATE (1ull << 30)
+
+/*
+ * Begin Command Values
+ */
+
+/* AS_COMMAND register commands */
+#define AS_COMMAND_NOP         0x00	/* NOP Operation */
+#define AS_COMMAND_UPDATE      0x01	/* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */
+#define AS_COMMAND_LOCK        0x02	/* Issue a lock region command to all MMUs */
+#define AS_COMMAND_UNLOCK      0x03	/* Issue a flush region command to all MMUs */
+/* Flush all L2 caches then issue a flush region command to all MMUs
+ * (deprecated - only for use with T60x)
+ */
+#define AS_COMMAND_FLUSH 0x04
+/* Flush all L2 caches then issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH_PT 0x04
+/* Wait for memory accesses to complete, flush all the L1s cache then flush all
+ * L2 caches then issue a flush region command to all MMUs
+ */
+#define AS_COMMAND_FLUSH_MEM 0x05
+
+/* GPU_STATUS values */
+#define GPU_STATUS_PRFCNT_ACTIVE            (1 << 2)    /* Set if the performance counters are active. */
+#define GPU_STATUS_CYCLE_COUNT_ACTIVE       (1 << 6)    /* Set if the cycle counter is active. */
+#define GPU_STATUS_PROTECTED_MODE_ACTIVE    (1 << 7)    /* Set if protected mode is active */
+
+/* PRFCNT_CONFIG register values */
+#define PRFCNT_CONFIG_MODE_SHIFT        0 /* Counter mode position. */
+#define PRFCNT_CONFIG_AS_SHIFT          4 /* Address space bitmap position. */
+#define PRFCNT_CONFIG_SETSELECT_SHIFT   8 /* Set select position. */
+
+/* The performance counters are disabled. */
+#define PRFCNT_CONFIG_MODE_OFF          0
+/* The performance counters are enabled, but are only written out when a
+ * PRFCNT_SAMPLE command is issued using the GPU_COMMAND register.
+ */
+#define PRFCNT_CONFIG_MODE_MANUAL       1
+/* The performance counters are enabled, and are written out each time a tile
+ * finishes rendering.
+ */
+#define PRFCNT_CONFIG_MODE_TILE         2
+
+/* AS<n>_MEMATTR values from MMU_MEMATTR_STAGE1: */
+/* Use GPU implementation-defined caching policy. */
+#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_FORCE_TO_CACHE_ALL    0x8Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_WRITE_ALLOC           0x8Dull
+
+/* Use GPU implementation-defined  caching policy. */
+#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL    0x4Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_LPAE_WRITE_ALLOC           0x4Dull
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF        0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_WA              0x8Dull
+/* There is no LPAE support for non-cacheable, since the memory type is always
+ * write-back.
+ * Marking this setting as reserved for LPAE
+ */
+#define AS_MEMATTR_LPAE_NON_CACHEABLE_RESERVED
+
+/* L2_MMU_CONFIG register */
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT       (23)
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY             (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT)
+
+/* End L2_MMU_CONFIG register */
+
+/* THREAD_* registers */
+
+/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */
+#define IMPLEMENTATION_UNSPECIFIED  0
+#define IMPLEMENTATION_SILICON      1
+#define IMPLEMENTATION_FPGA         2
+#define IMPLEMENTATION_MODEL        3
+
+/* Default values when registers are not supported by the implemented hardware */
+#define THREAD_MT_DEFAULT     256
+#define THREAD_MWS_DEFAULT    256
+#define THREAD_MBS_DEFAULT    256
+#define THREAD_MR_DEFAULT     1024
+#define THREAD_MTQ_DEFAULT    4
+#define THREAD_MTGS_DEFAULT   10
+
+/* End THREAD_* registers */
+
+/* SHADER_CONFIG register */
+#define SC_LS_ALLOW_ATTR_TYPES      (1ul << 16)
+#define SC_TLS_HASH_ENABLE          (1ul << 17)
+#define SC_LS_ATTR_CHECK_DISABLE    (1ul << 18)
+#define SC_VAR_ALGORITHM            (1ul << 29)
+/* End SHADER_CONFIG register */
+
+/* TILER_CONFIG register */
+#define TC_CLOCK_GATE_OVERRIDE      (1ul << 0)
+/* End TILER_CONFIG register */
+
+/* L2_CONFIG register */
+#define L2_CONFIG_SIZE_SHIFT        16
+#define L2_CONFIG_SIZE_MASK         (0xFFul << L2_CONFIG_SIZE_SHIFT)
+#define L2_CONFIG_HASH_SHIFT        24
+#define L2_CONFIG_HASH_MASK         (0xFFul << L2_CONFIG_HASH_SHIFT)
+#define L2_CONFIG_ASN_HASH_ENABLE_SHIFT        24
+#define L2_CONFIG_ASN_HASH_ENABLE_MASK         (1ul << L2_CONFIG_ASN_HASH_ENABLE_SHIFT)
+/* End L2_CONFIG register */
+
+/* IDVS_GROUP register */
+#define IDVS_GROUP_SIZE_SHIFT (16)
+#define IDVS_GROUP_MAX_SIZE (0x3F)
+
+#endif /* _UAPI_KBASE_GPU_REGMAP_H_ */
diff --git a/drivers/gpu/arm/bifrost/jm/mali_base_jm_kernel.h b/include/uapi/gpu/arm/bifrost/jm/mali_base_jm_kernel.h
similarity index 80%
rename from drivers/gpu/arm/bifrost/jm/mali_base_jm_kernel.h
rename to include/uapi/gpu/arm/bifrost/jm/mali_base_jm_kernel.h
index 9367cc5431cf..749e1fa62883 100644
--- a/drivers/gpu/arm/bifrost/jm/mali_base_jm_kernel.h
+++ b/include/uapi/gpu/arm/bifrost/jm/mali_base_jm_kernel.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,11 +17,12 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
-#ifndef _BASE_JM_KERNEL_H_
-#define _BASE_JM_KERNEL_H_
+
+#ifndef _UAPI_BASE_JM_KERNEL_H_
+#define _UAPI_BASE_JM_KERNEL_H_
+
+#include <linux/types.h>
 
 /* Memory allocation, access/hint flags.
  *
@@ -121,9 +123,9 @@
 #define BASE_MEM_RESERVED_BIT_19 ((base_mem_alloc_flags)1 << 19)
 
 /**
- * Memory starting from the end of the initial commit is aligned to 'extent'
- * pages, where 'extent' must be a power of 2 and no more than
- * BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES
+ * Memory starting from the end of the initial commit is aligned to 'extension'
+ * pages, where 'extension' must be a power of 2 and no more than
+ * BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES
  */
 #define BASE_MEM_TILER_ALIGN_TOP ((base_mem_alloc_flags)1 << 20)
 
@@ -201,14 +203,14 @@
 						BASE_MEM_COOKIE_BASE)
 
 /* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the
- * initial commit is aligned to 'extent' pages, where 'extent' must be a power
- * of 2 and no more than BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES
+ * initial commit is aligned to 'extension' pages, where 'extension' must be a power
+ * of 2 and no more than BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES
  */
 #define BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP  (1 << 0)
 
 /**
- * If set, the heap info address points to a u32 holding the used size in bytes;
- * otherwise it points to a u64 holding the lowest address of unused memory.
+ * If set, the heap info address points to a __u32 holding the used size in bytes;
+ * otherwise it points to a __u64 holding the lowest address of unused memory.
  */
 #define BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE  (1 << 1)
 
@@ -230,7 +232,7 @@
  * These share the same space as BASEP_CONTEXT_FLAG_*, and so must
  * not collide with them.
  */
-typedef u32 base_context_create_flags;
+typedef __u32 base_context_create_flags;
 
 /* No flags set */
 #define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0)
@@ -320,7 +322,7 @@ typedef u32 base_context_create_flags;
  * @blob: per-job data array
  */
 struct base_jd_udata {
-	u64 blob[2];
+	__u64 blob[2];
 };
 
 /**
@@ -333,7 +335,7 @@ struct base_jd_udata {
  * When the flag is set for a particular dependency to signal that it is an
  * ordering only dependency then errors will not be propagated.
  */
-typedef u8 base_jd_dep_type;
+typedef __u8 base_jd_dep_type;
 
 #define BASE_JD_DEP_TYPE_INVALID  (0)       /**< Invalid dependency */
 #define BASE_JD_DEP_TYPE_DATA     (1U << 0) /**< Data dependency */
@@ -349,7 +351,7 @@ typedef u8 base_jd_dep_type;
  * Special case is ::BASE_JD_REQ_DEP, which is used to express complex
  * dependencies, and that doesn't execute anything on the hardware.
  */
-typedef u32 base_jd_core_req;
+typedef __u32 base_jd_core_req;
 
 /* Requirements that come from the HW */
 
@@ -581,6 +583,13 @@ typedef u32 base_jd_core_req;
  */
 #define BASE_JD_REQ_END_RENDERPASS ((base_jd_core_req)1 << 19)
 
+/* SW-only requirement: The atom needs to run on a limited core mask affinity.
+ *
+ * If this bit is set then the kbase_context.limited_core_mask will be applied
+ * to the affinity.
+ */
+#define BASE_JD_REQ_LIMITED_CORE_MASK ((base_jd_core_req)1 << 20)
+
 /* These requirement bits are currently unused in base_jd_core_req
  */
 #define BASEP_JD_REQ_RESERVED \
@@ -591,7 +600,7 @@ typedef u32 base_jd_core_req;
 	BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \
 	BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END | \
 	BASE_JD_REQ_JOB_SLOT | BASE_JD_REQ_START_RENDERPASS | \
-	BASE_JD_REQ_END_RENDERPASS))
+	BASE_JD_REQ_END_RENDERPASS | BASE_JD_REQ_LIMITED_CORE_MASK))
 
 /* Mask of all bits in base_jd_core_req that control the type of the atom.
  *
@@ -636,7 +645,7 @@ enum kbase_jd_atom_state {
 /**
  * typedef base_atom_id - Type big enough to store an atom number in.
  */
-typedef u8 base_atom_id;
+typedef __u8 base_atom_id;
 
 /**
  * struct base_dependency -
@@ -699,10 +708,10 @@ struct base_dependency {
  * BASE_JD_REQ_END_RENDERPASS is set in the base_jd_core_req.
  */
 struct base_jd_fragment {
-	u64 norm_read_norm_write;
-	u64 norm_read_forced_write;
-	u64 forced_read_forced_write;
-	u64 forced_read_norm_write;
+	__u64 norm_read_norm_write;
+	__u64 norm_read_forced_write;
+	__u64 forced_read_forced_write;
+	__u64 forced_read_norm_write;
 };
 
 /**
@@ -742,7 +751,7 @@ struct base_jd_fragment {
  * the same context. See KBASE_JS_SYSTEM_PRIORITY_MODE and
  * KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE for more details.
  */
-typedef u8 base_jd_prio;
+typedef __u8 base_jd_prio;
 
 /* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */
 #define BASE_JD_PRIO_MEDIUM  ((base_jd_prio)0)
@@ -752,11 +761,15 @@ typedef u8 base_jd_prio;
 #define BASE_JD_PRIO_HIGH    ((base_jd_prio)1)
 /* Low atom priority. */
 #define BASE_JD_PRIO_LOW     ((base_jd_prio)2)
+/* Real-Time atom priority. This is a priority higher than BASE_JD_PRIO_HIGH,
+ * BASE_JD_PRIO_MEDIUM, and BASE_JD_PRIO_LOW
+ */
+#define BASE_JD_PRIO_REALTIME    ((base_jd_prio)3)
 
 /* Count of the number of priority levels. This itself is not a valid
  * base_jd_prio setting
  */
-#define BASE_JD_NR_PRIO_LEVELS 3
+#define BASE_JD_NR_PRIO_LEVELS 4
 
 /**
  * struct base_jd_atom_v2 - Node of a dependency graph used to submit a
@@ -789,32 +802,32 @@ typedef u8 base_jd_prio;
  * @padding:       Unused. Must be zero.
  *
  * This structure has changed since UK 10.2 for which base_jd_core_req was a
- * u16 value.
+ * __u16 value.
  *
- * In UK 10.3 a core_req field of a u32 type was added to the end of the
- * structure, and the place in the structure previously occupied by u16
+ * In UK 10.3 a core_req field of a __u32 type was added to the end of the
+ * structure, and the place in the structure previously occupied by __u16
  * core_req was kept but renamed to compat_core_req.
  *
- * From UK 11.20 - compat_core_req is now occupied by u8 jit_id[2].
+ * From UK 11.20 - compat_core_req is now occupied by __u8 jit_id[2].
  * Compatibility with UK 10.x from UK 11.y is not handled because
  * the major version increase prevents this.
  *
  * For UK 11.20 jit_id[2] must be initialized to zero.
  */
 struct base_jd_atom_v2 {
-	u64 jc;
+	__u64 jc;
 	struct base_jd_udata udata;
-	u64 extres_list;
-	u16 nr_extres;
-	u8 jit_id[2];
+	__u64 extres_list;
+	__u16 nr_extres;
+	__u8 jit_id[2];
 	struct base_dependency pre_dep[2];
 	base_atom_id atom_number;
 	base_jd_prio prio;
-	u8 device_nr;
-	u8 jobslot;
+	__u8 device_nr;
+	__u8 jobslot;
 	base_jd_core_req core_req;
-	u8 renderpass_id;
-	u8 padding[7];
+	__u8 renderpass_id;
+	__u8 padding[7];
 };
 
 /**
@@ -849,20 +862,20 @@ struct base_jd_atom_v2 {
  * @padding:       Unused. Must be zero.
  */
 typedef struct base_jd_atom {
-	u64 seq_nr;
-	u64 jc;
+	__u64 seq_nr;
+	__u64 jc;
 	struct base_jd_udata udata;
-	u64 extres_list;
-	u16 nr_extres;
-	u8 jit_id[2];
+	__u64 extres_list;
+	__u16 nr_extres;
+	__u8 jit_id[2];
 	struct base_dependency pre_dep[2];
 	base_atom_id atom_number;
 	base_jd_prio prio;
-	u8 device_nr;
-	u8 jobslot;
+	__u8 device_nr;
+	__u8 jobslot;
 	base_jd_core_req core_req;
-	u8 renderpass_id;
-	u8 padding[7];
+	__u8 renderpass_id;
+	__u8 padding[7];
 } base_jd_atom;
 
 /* Job chain event code bits
@@ -912,6 +925,109 @@ enum {
  *                                         Such codes are never returned to
  *                                         user-space.
  * @BASE_JD_EVENT_RANGE_KERNEL_ONLY_END: End of kernel-only status codes.
+ * @BASE_JD_EVENT_DONE: atom has completed successfull
+ * @BASE_JD_EVENT_JOB_CONFIG_FAULT: Atom dependencies configuration error which
+ *                                  shall result in a failed atom
+ * @BASE_JD_EVENT_JOB_POWER_FAULT:  The job could not be executed because the
+ *                                  part of the memory system required to access
+ *                                  job descriptors was not powered on
+ * @BASE_JD_EVENT_JOB_READ_FAULT:   Reading a job descriptor into the Job
+ *                                  manager failed
+ * @BASE_JD_EVENT_JOB_WRITE_FAULT:  Writing a job descriptor from the Job
+ *                                  manager failed
+ * @BASE_JD_EVENT_JOB_AFFINITY_FAULT: The job could not be executed because the
+ *                                    specified affinity mask does not intersect
+ *                                    any available cores
+ * @BASE_JD_EVENT_JOB_BUS_FAULT:    A bus access failed while executing a job
+ * @BASE_JD_EVENT_INSTR_INVALID_PC: A shader instruction with an illegal program
+ *                                  counter was executed.
+ * @BASE_JD_EVENT_INSTR_INVALID_ENC: A shader instruction with an illegal
+ *                                  encoding was executed.
+ * @BASE_JD_EVENT_INSTR_TYPE_MISMATCH: A shader instruction was executed where
+ *                                  the instruction encoding did not match the
+ *                                  instruction type encoded in the program
+ *                                  counter.
+ * @BASE_JD_EVENT_INSTR_OPERAND_FAULT: A shader instruction was executed that
+ *                                  contained invalid combinations of operands.
+ * @BASE_JD_EVENT_INSTR_TLS_FAULT:  A shader instruction was executed that tried
+ *                                  to access the thread local storage section
+ *                                  of another thread.
+ * @BASE_JD_EVENT_INSTR_ALIGN_FAULT: A shader instruction was executed that
+ *                                  tried to do an unsupported unaligned memory
+ *                                  access.
+ * @BASE_JD_EVENT_INSTR_BARRIER_FAULT: A shader instruction was executed that
+ *                                  failed to complete an instruction barrier.
+ * @BASE_JD_EVENT_DATA_INVALID_FAULT: Any data structure read as part of the job
+ *                                  contains invalid combinations of data.
+ * @BASE_JD_EVENT_TILE_RANGE_FAULT: Tile or fragment shading was asked to
+ *                                  process a tile that is entirely outside the
+ *                                  bounding box of the frame.
+ * @BASE_JD_EVENT_STATE_FAULT:      Matches ADDR_RANGE_FAULT. A virtual address
+ *                                  has been found that exceeds the virtual
+ *                                  address range.
+ * @BASE_JD_EVENT_OUT_OF_MEMORY:    The tiler ran out of memory when executing a job.
+ * @BASE_JD_EVENT_UNKNOWN:          If multiple jobs in a job chain fail, only
+ *                                  the first one the reports an error will set
+ *                                  and return full error information.
+ *                                  Subsequent failing jobs will not update the
+ *                                  error status registers, and may write an
+ *                                  error status of UNKNOWN.
+ * @BASE_JD_EVENT_DELAYED_BUS_FAULT: The GPU received a bus fault for access to
+ *                                  physical memory where the original virtual
+ *                                  address is no longer available.
+ * @BASE_JD_EVENT_SHAREABILITY_FAULT: Matches GPU_SHAREABILITY_FAULT. A cache
+ *                                  has detected that the same line has been
+ *                                  accessed as both shareable and non-shareable
+ *                                  memory from inside the GPU.
+ * @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1: A memory access hit an invalid table
+ *                                  entry at level 1 of the translation table.
+ * @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2: A memory access hit an invalid table
+ *                                  entry at level 2 of the translation table.
+ * @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3: A memory access hit an invalid table
+ *                                  entry at level 3 of the translation table.
+ * @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4: A memory access hit an invalid table
+ *                                  entry at level 4 of the translation table.
+ * @BASE_JD_EVENT_PERMISSION_FAULT: A memory access could not be allowed due to
+ *                                  the permission flags set in translation
+ *                                  table
+ * @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1: A bus fault occurred while reading
+ *                                  level 0 of the translation tables.
+ * @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2: A bus fault occurred while reading
+ *                                  level 1 of the translation tables.
+ * @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3: A bus fault occurred while reading
+ *                                  level 2 of the translation tables.
+ * @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4: A bus fault occurred while reading
+ *                                  level 3 of the translation tables.
+ * @BASE_JD_EVENT_ACCESS_FLAG:      Matches ACCESS_FLAG_0. A memory access hit a
+ *                                  translation table entry with the ACCESS_FLAG
+ *                                  bit set to zero in level 0 of the
+ *                                  page table, and the DISABLE_AF_FAULT flag
+ *                                  was not set.
+ * @BASE_JD_EVENT_MEM_GROWTH_FAILED: raised for JIT_ALLOC atoms that failed to
+ *                                   grow memory on demand
+ * @BASE_JD_EVENT_JOB_CANCELLED: raised when this atom was hard-stopped or its
+ *                               dependencies failed
+ * @BASE_JD_EVENT_JOB_INVALID: raised for many reasons, including invalid data
+ *                             in the atom which overlaps with
+ *                             BASE_JD_EVENT_JOB_CONFIG_FAULT, or if the
+ *                             platform doesn't support the feature specified in
+ *                             the atom.
+ * @BASE_JD_EVENT_PM_EVENT:   TODO: remove as it's not used
+ * @BASE_JD_EVENT_TIMED_OUT:   TODO: remove as it's not used
+ * @BASE_JD_EVENT_BAG_INVALID:   TODO: remove as it's not used
+ * @BASE_JD_EVENT_PROGRESS_REPORT:   TODO: remove as it's not used
+ * @BASE_JD_EVENT_BAG_DONE:   TODO: remove as it's not used
+ * @BASE_JD_EVENT_DRV_TERMINATED: this is a special event generated to indicate
+ *                                to userspace that the KBase context has been
+ *                                destroyed and Base should stop listening for
+ *                                further events
+ * @BASE_JD_EVENT_REMOVED_FROM_NEXT: raised when an atom that was configured in
+ *                                   the GPU has to be retried (but it has not
+ *                                   started) due to e.g., GPU reset
+ * @BASE_JD_EVENT_END_RP_DONE: this is used for incremental rendering to signal
+ *                             the completion of a renderpass. This value
+ *                             shouldn't be returned to userspace but I haven't
+ *                             seen where it is reset back to JD_EVENT_DONE.
  *
  * HW and low-level SW events are represented by event codes.
  * The status of jobs which succeeded are also represented by
@@ -1058,6 +1174,11 @@ struct base_jd_event_v2 {
  * struct base_dump_cpu_gpu_counters - Structure for
  *                                     BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS
  *                                     jobs.
+ * @system_time:   gpu timestamp
+ * @cycle_counter: gpu cycle count
+ * @sec:           cpu time(sec)
+ * @usec:          cpu time(usec)
+ * @padding:       padding
  *
  * This structure is stored into the memory pointed to by the @jc field
  * of &struct base_jd_atom.
@@ -1069,11 +1190,11 @@ struct base_jd_event_v2 {
  */
 
 struct base_dump_cpu_gpu_counters {
-	u64 system_time;
-	u64 cycle_counter;
-	u64 sec;
-	u32 usec;
-	u8 padding[36];
+	__u64 system_time;
+	__u64 cycle_counter;
+	__u64 sec;
+	__u32 usec;
+	__u8 padding[36];
 };
 
-#endif /* _BASE_JM_KERNEL_H_ */
+#endif /* _UAPI_BASE_JM_KERNEL_H_ */
diff --git a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h b/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h
similarity index 93%
rename from drivers/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h
rename to include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h
index 305a9eb221ae..72d75cb34ec5 100644
--- a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h
+++ b/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,10 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-#ifndef _KBASE_JM_IOCTL_H_
-#define _KBASE_JM_IOCTL_H_
+#ifndef _UAPI_KBASE_JM_IOCTL_H_
+#define _UAPI_KBASE_JM_IOCTL_H_
 
 #include <asm-generic/ioctl.h>
 #include <linux/types.h>
@@ -106,17 +105,23 @@
  *   'scheduling'.
  * 11.25:
  * - Enabled JIT pressure limit in base/kbase by default
- * 11.26:
+ * 11.26
  * - Added kinstr_jm API
- * 11.27:
+ * 11.27
  * - Backwards compatible extension to HWC ioctl.
  * 11.28:
  * - Added kernel side cache ops needed hint
  * 11.29:
  * - Reserve ioctl 52
+ * 11.30:
+ * - Add a new priority level BASE_JD_PRIO_REALTIME
+ * - Add ioctl 54: This controls the priority setting.
+ * 11.31:
+ * - Added BASE_JD_REQ_LIMITED_CORE_MASK.
+ * - Added ioctl 55: set_limited_core_count.
  */
 #define BASE_UK_VERSION_MAJOR 11
-#define BASE_UK_VERSION_MINOR 29
+#define BASE_UK_VERSION_MINOR 31
 
 /**
  * struct kbase_ioctl_version_check - Check version compatibility between
@@ -133,8 +138,6 @@ struct kbase_ioctl_version_check {
 #define KBASE_IOCTL_VERSION_CHECK \
 	_IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check)
 
-#define KBASE_IOCTL_VERSION_CHECK_RESERVED \
-	_IOWR(KBASE_IOCTL_TYPE, 52, struct kbase_ioctl_version_check)
 
 /**
  * struct kbase_ioctl_job_submit - Submit jobs/atoms to the kernel
@@ -213,4 +216,8 @@ union kbase_kinstr_jm_fd {
 #define KBASE_IOCTL_KINSTR_JM_FD \
 	_IOWR(KBASE_IOCTL_TYPE, 51, union kbase_kinstr_jm_fd)
 
-#endif /* _KBASE_JM_IOCTL_H_ */
+
+#define KBASE_IOCTL_VERSION_CHECK_RESERVED \
+	_IOWR(KBASE_IOCTL_TYPE, 52, struct kbase_ioctl_version_check)
+
+#endif /* _UAPI_KBASE_JM_IOCTL_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_base_kernel.h b/include/uapi/gpu/arm/bifrost/mali_base_kernel.h
similarity index 72%
rename from drivers/gpu/arm/bifrost/mali_base_kernel.h
rename to include/uapi/gpu/arm/bifrost/mali_base_kernel.h
index 086171adb6e5..a46c41f89af0 100644
--- a/drivers/gpu/arm/bifrost/mali_base_kernel.h
+++ b/include/uapi/gpu/arm/bifrost/mali_base_kernel.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,48 +17,52 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /*
  * Base structures shared with the kernel.
  */
 
-#ifndef _BASE_KERNEL_H_
-#define _BASE_KERNEL_H_
+#ifndef _UAPI_BASE_KERNEL_H_
+#define _UAPI_BASE_KERNEL_H_
+
+#include <linux/types.h>
 
 struct base_mem_handle {
 	struct {
-		u64 handle;
+		__u64 handle;
 	} basep;
 };
 
 #include "mali_base_mem_priv.h"
-#include "gpu/mali_kbase_gpu_coherency.h"
 #include "gpu/mali_kbase_gpu_id.h"
+#include "gpu/mali_kbase_gpu_coherency.h"
 
 #define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4
 
 #define BASE_MAX_COHERENT_GROUPS 16
 
-#if defined CDBG_ASSERT
+#if defined(CDBG_ASSERT)
 #define LOCAL_ASSERT CDBG_ASSERT
-#elif defined KBASE_DEBUG_ASSERT
+#elif defined(KBASE_DEBUG_ASSERT)
 #define LOCAL_ASSERT KBASE_DEBUG_ASSERT
 #else
+#if defined(__KERNEL__)
 #error assert macro not defined!
+#else
+#define LOCAL_ASSERT(...)	((void)#__VA_ARGS__)
+#endif
 #endif
 
 #if defined(PAGE_MASK) && defined(PAGE_SHIFT)
 #define LOCAL_PAGE_SHIFT PAGE_SHIFT
 #define LOCAL_PAGE_LSB ~PAGE_MASK
 #else
-#include <osu/mali_osu.h>
+#ifndef OSU_CONFIG_CPU_PAGE_SIZE_LOG2
+#define OSU_CONFIG_CPU_PAGE_SIZE_LOG2 12
+#endif
 
-#if defined OSU_CONFIG_CPU_PAGE_SIZE_LOG2
+#if defined(OSU_CONFIG_CPU_PAGE_SIZE_LOG2)
 #define LOCAL_PAGE_SHIFT OSU_CONFIG_CPU_PAGE_SIZE_LOG2
 #define LOCAL_PAGE_LSB ((1ul << OSU_CONFIG_CPU_PAGE_SIZE_LOG2) - 1)
 #else
@@ -85,7 +90,7 @@ struct base_mem_handle {
  * More flags can be added to this list, as long as they don't clash
  * (see BASE_MEM_FLAGS_NR_BITS for the number of the first free bit).
  */
-typedef u32 base_mem_alloc_flags;
+typedef __u32 base_mem_alloc_flags;
 
 /* A mask for all the flags which are modifiable via the base_mem_set_flags
  * interface.
@@ -121,7 +126,7 @@ typedef u32 base_mem_alloc_flags;
  */
 enum base_mem_import_type {
 	BASE_MEM_IMPORT_TYPE_INVALID = 0,
-	/**
+	/*
 	 * Import type with value 1 is deprecated.
 	 */
 	BASE_MEM_IMPORT_TYPE_UMM = 2,
@@ -138,8 +143,8 @@ enum base_mem_import_type {
  */
 
 struct base_mem_import_user_buffer {
-	u64 ptr;
-	u64 length;
+	__u64 ptr;
+	__u64 length;
 };
 
 /* Mask to detect 4GB boundary alignment */
@@ -147,15 +152,15 @@ struct base_mem_import_user_buffer {
 /* Mask to detect 4GB boundary (in page units) alignment */
 #define BASE_MEM_PFN_MASK_4GB  (BASE_MEM_MASK_4GB >> LOCAL_PAGE_SHIFT)
 
-/* Limit on the 'extent' parameter for an allocation with the
+/* Limit on the 'extension' parameter for an allocation with the
  * BASE_MEM_TILER_ALIGN_TOP flag set
  *
  * This is the same as the maximum limit for a Buffer Descriptor's chunk size
  */
-#define BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES_LOG2 \
-		(21u - (LOCAL_PAGE_SHIFT))
-#define BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES \
-		(1ull << (BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES_LOG2))
+#define BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES_LOG2                      \
+	(21u - (LOCAL_PAGE_SHIFT))
+#define BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES                           \
+	(1ull << (BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES_LOG2))
 
 /* Bit mask of cookies used for for memory allocation setup */
 #define KBASE_COOKIE_MASK  ~1UL /* bit 0 is reserved */
@@ -163,7 +168,7 @@ struct base_mem_import_user_buffer {
 /* Maximum size allowed in a single KBASE_IOCTL_MEM_ALLOC call */
 #define KBASE_MEM_ALLOC_MAX_SIZE ((8ull << 30) >> PAGE_SHIFT) /* 8 GB */
 
-/**
+/*
  * struct base_fence - Cross-device synchronisation fence.
  *
  * A fence is used to signal when the GPU has finished accessing a resource that
@@ -200,8 +205,8 @@ struct base_fence {
  */
 struct base_mem_aliasing_info {
 	struct base_mem_handle handle;
-	u64 offset;
-	u64 length;
+	__u64 offset;
+	__u64 length;
 };
 
 /* Maximum percentage of just-in-time memory allocation trimming to perform
@@ -223,11 +228,11 @@ struct base_mem_aliasing_info {
  * An array of structures was not supported
  */
 struct base_jit_alloc_info_10_2 {
-	u64 gpu_alloc_addr;
-	u64 va_pages;
-	u64 commit_pages;
-	u64 extent;
-	u8 id;
+	__u64 gpu_alloc_addr;
+	__u64 va_pages;
+	__u64 commit_pages;
+	__u64 extension;
+	__u8 id;
 };
 
 /* base_jit_alloc_info introduced by kernel driver version 11.5, and in use up
@@ -250,16 +255,16 @@ struct base_jit_alloc_info_10_2 {
  * 11.10: Arrays of this structure are supported
  */
 struct base_jit_alloc_info_11_5 {
-	u64 gpu_alloc_addr;
-	u64 va_pages;
-	u64 commit_pages;
-	u64 extent;
-	u8 id;
-	u8 bin_id;
-	u8 max_allocations;
-	u8 flags;
-	u8 padding[2];
-	u16 usage_id;
+	__u64 gpu_alloc_addr;
+	__u64 va_pages;
+	__u64 commit_pages;
+	__u64 extension;
+	__u8 id;
+	__u8 bin_id;
+	__u8 max_allocations;
+	__u8 flags;
+	__u8 padding[2];
+	__u16 usage_id;
 };
 
 /**
@@ -270,7 +275,7 @@ struct base_jit_alloc_info_11_5 {
  * @va_pages:                   The minimum number of virtual pages required.
  * @commit_pages:               The minimum number of physical pages which
  *                              should back the allocation.
- * @extent:                     Granularity of physical pages to grow the
+ * @extension:                     Granularity of physical pages to grow the
  *                              allocation by during a fault.
  * @id:                         Unique ID provided by the caller, this is used
  *                              to pair allocation and free requests.
@@ -305,17 +310,17 @@ struct base_jit_alloc_info_11_5 {
  * 11.20: added @heap_info_gpu_addr
  */
 struct base_jit_alloc_info {
-	u64 gpu_alloc_addr;
-	u64 va_pages;
-	u64 commit_pages;
-	u64 extent;
-	u8 id;
-	u8 bin_id;
-	u8 max_allocations;
-	u8 flags;
-	u8 padding[2];
-	u16 usage_id;
-	u64 heap_info_gpu_addr;
+	__u64 gpu_alloc_addr;
+	__u64 va_pages;
+	__u64 commit_pages;
+	__u64 extension;
+	__u8 id;
+	__u8 bin_id;
+	__u8 max_allocations;
+	__u8 flags;
+	__u8 padding[2];
+	__u16 usage_id;
+	__u64 heap_info_gpu_addr;
 };
 
 enum base_external_resource_access {
@@ -324,7 +329,7 @@ enum base_external_resource_access {
 };
 
 struct base_external_resource {
-	u64 ext_resource;
+	__u64 ext_resource;
 };
 
 
@@ -342,13 +347,13 @@ struct base_external_resource {
  *                                      sized at allocation time.
  */
 struct base_external_resource_list {
-	u64 count;
+	__u64 count;
 	struct base_external_resource ext_res[1];
 };
 
 struct base_jd_debug_copy_buffer {
-	u64 address;
-	u64 size;
+	__u64 address;
+	__u64 size;
 	struct base_external_resource extres;
 };
 
@@ -460,7 +465,7 @@ struct base_jd_debug_copy_buffer {
  * population count, since faulty cores may be disabled during production,
  * producing a non-contiguous mask.
  *
- * The memory requirements for this algorithm can be determined either by a u64
+ * The memory requirements for this algorithm can be determined either by a __u64
  * population count on the L2_PRESENT mask (a LUT helper already is
  * required for the above), or simple assumption that there can be no more than
  * 16 coherent groups, since core groups are typically 4 cores.
@@ -469,165 +474,170 @@ struct base_jd_debug_copy_buffer {
 #define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4
 
 #define BASE_MAX_COHERENT_GROUPS 16
-
+/**
+ * struct mali_base_gpu_core_props - GPU core props info
+ * @product_id: Pro specific value.
+ * @version_status: Status of the GPU release. No defined values, but starts at
+ * 	0 and increases by one for each release status (alpha, beta, EAC, etc.).
+ * 	4 bit values (0-15).
+ * @minor_revision: Minor release number of the GPU. "P" part of an "RnPn"
+ * 	release number.
+ * 	8 bit values (0-255).
+ * @major_revision: Major release number of the GPU. "R" part of an "RnPn"
+ * 	release number.
+ * 	4 bit values (0-15).
+ * @padding: padding to allign to 8-byte
+ * @gpu_freq_khz_max: The maximum GPU frequency. Reported to applications by
+ * 	clGetDeviceInfo()
+ * @log2_program_counter_size: Size of the shader program counter, in bits.
+ * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU. This
+ * 	is a bitpattern where a set bit indicates that the format is supported.
+ * 	Before using a texture format, it is recommended that the corresponding
+ * 	bit be checked.
+ * @gpu_available_memory_size: Theoretical maximum memory available to the GPU.
+ * 	It is unlikely that a client will be able to allocate all of this memory
+ * 	for their own purposes, but this at least provides an upper bound on the
+ * 	memory available to the GPU.
+ * 	This is required for OpenCL's clGetDeviceInfo() call when
+ * 	CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The
+ * 	client will not be expecting to allocate anywhere near this value.
+ * @num_exec_engines: The number of execution engines.
+ */
 struct mali_base_gpu_core_props {
-	/**
-	 * Product specific value.
-	 */
-	u32 product_id;
-
-	/**
-	 * Status of the GPU release.
-	 * No defined values, but starts at 0 and increases by one for each
-	 * release status (alpha, beta, EAC, etc.).
-	 * 4 bit values (0-15).
-	 */
-	u16 version_status;
-
-	/**
-	 * Minor release number of the GPU. "P" part of an "RnPn" release number.
-     * 8 bit values (0-255).
-	 */
-	u16 minor_revision;
-
-	/**
-	 * Major release number of the GPU. "R" part of an "RnPn" release number.
-     * 4 bit values (0-15).
-	 */
-	u16 major_revision;
-
-	u16 padding;
-
-	/* The maximum GPU frequency. Reported to applications by
-	 * clGetDeviceInfo()
-	 */
-	u32 gpu_freq_khz_max;
-
-	/**
-	 * Size of the shader program counter, in bits.
-	 */
-	u32 log2_program_counter_size;
-
-	/**
-	 * TEXTURE_FEATURES_x registers, as exposed by the GPU. This is a
-	 * bitpattern where a set bit indicates that the format is supported.
-	 *
-	 * Before using a texture format, it is recommended that the corresponding
-	 * bit be checked.
-	 */
-	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
-
-	/**
-	 * Theoretical maximum memory available to the GPU. It is unlikely that a
-	 * client will be able to allocate all of this memory for their own
-	 * purposes, but this at least provides an upper bound on the memory
-	 * available to the GPU.
-	 *
-	 * This is required for OpenCL's clGetDeviceInfo() call when
-	 * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The
-	 * client will not be expecting to allocate anywhere near this value.
-	 */
-	u64 gpu_available_memory_size;
-
-	/**
-	 * The number of execution engines.
-	 */
-	u8 num_exec_engines;
+	__u32 product_id;
+	__u16 version_status;
+	__u16 minor_revision;
+	__u16 major_revision;
+	__u16 padding;
+	__u32 gpu_freq_khz_max;
+	__u32 log2_program_counter_size;
+	__u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+	__u64 gpu_available_memory_size;
+	__u8 num_exec_engines;
 };
 
-/**
- *
+/*
  * More information is possible - but associativity and bus width are not
  * required by upper-level apis.
  */
 struct mali_base_gpu_l2_cache_props {
-	u8 log2_line_size;
-	u8 log2_cache_size;
-	u8 num_l2_slices; /* Number of L2C slices. 1 or higher */
-	u8 padding[5];
+	__u8 log2_line_size;
+	__u8 log2_cache_size;
+	__u8 num_l2_slices; /* Number of L2C slices. 1 or higher */
+	__u8 padding[5];
 };
 
 struct mali_base_gpu_tiler_props {
-	u32 bin_size_bytes;	/* Max is 4*2^15 */
-	u32 max_active_levels;	/* Max is 2^15 */
+	__u32 bin_size_bytes;	/* Max is 4*2^15 */
+	__u32 max_active_levels;	/* Max is 2^15 */
 };
 
 /**
- * GPU threading system details.
+ * struct mali_base_gpu_thread_props - GPU threading system details.
+ * @max_threads: Max. number of threads per core
+ * @max_workgroup_size:     Max. number of threads per workgroup
+ * @max_barrier_size:       Max. number of threads that can synchronize on a
+ *                          simple barrier
+ * @max_registers:          Total size [1..65535] of the register file available
+ *                          per core.
+ * @max_task_queue:         Max. tasks [1..255] which may be sent to a core
+ *                          before it becomes blocked.
+ * @max_thread_group_split: Max. allowed value [1..15] of the Thread Group Split
+ *                          field.
+ * @impl_tech:              0 = Not specified, 1 = Silicon, 2 = FPGA,
+ *                          3 = SW Model/Emulation
+ * @padding:                padding to allign to 8-byte
+ * @tls_alloc:              Number of threads per core that TLS must be
+ *                          allocated for
  */
 struct mali_base_gpu_thread_props {
-	u32 max_threads;            /* Max. number of threads per core */
-	u32 max_workgroup_size;     /* Max. number of threads per workgroup */
-	u32 max_barrier_size;       /* Max. number of threads that can synchronize on a simple barrier */
-	u16 max_registers;          /* Total size [1..65535] of the register file available per core. */
-	u8  max_task_queue;         /* Max. tasks [1..255] which may be sent to a core before it becomes blocked. */
-	u8  max_thread_group_split; /* Max. allowed value [1..15] of the Thread Group Split field. */
-	u8  impl_tech;              /* 0 = Not specified, 1 = Silicon, 2 = FPGA, 3 = SW Model/Emulation */
-	u8  padding[3];
-	u32 tls_alloc;              /* Number of threads per core that TLS must
-				     * be allocated for
-				     */
+	__u32 max_threads;
+	__u32 max_workgroup_size;
+	__u32 max_barrier_size;
+	__u16 max_registers;
+	__u8 max_task_queue;
+	__u8 max_thread_group_split;
+	__u8 impl_tech;
+	__u8  padding[3];
+	__u32 tls_alloc;
 };
 
 /**
  * struct mali_base_gpu_coherent_group - descriptor for a coherent group
+ * @core_mask: Core restriction mask required for the group
+ * @num_cores: Number of cores in the group
+ * @padding:   padding to allign to 8-byte
  *
  * \c core_mask exposes all cores in that coherent group, and \c num_cores
- * provides a cached population-count for that mask.
+ * 	provides a cached population-count for that mask.
  *
  * @note Whilst all cores are exposed in the mask, not all may be available to
- * the application, depending on the Kernel Power policy.
+ * 	the application, depending on the Kernel Power policy.
  *
- * @note if u64s must be 8-byte aligned, then this structure has 32-bits of wastage.
+ * @note if u64s must be 8-byte aligned, then this structure has 32-bits of
+ * 	wastage.
  */
 struct mali_base_gpu_coherent_group {
-	u64 core_mask;	       /**< Core restriction mask required for the group */
-	u16 num_cores;	       /**< Number of cores in the group */
-	u16 padding[3];
+	__u64 core_mask;
+	__u16 num_cores;
+	__u16 padding[3];
 };
 
 /**
  * struct mali_base_gpu_coherent_group_info - Coherency group information
+ * @num_groups: Number of coherent groups in the GPU.
+ * @num_core_groups: Number of core groups (coherent or not) in the GPU.
+ * 	Equivalent to the number of L2 Caches.
+ * 	  The GPU Counter dumping writes 2048 bytes per core group, regardless
+ * 	of whether the core groups are coherent or not. Hence this member is
+ * 	needed to calculate how much memory is required for dumping.
+ * 	  @note Do not use it to work out how many valid elements are in the
+ * 	group[] member. Use num_groups instead.
+ * @coherency: Coherency features of the memory, accessed by gpu_mem_features
+ * 	methods
+ * @padding: padding to allign to 8-byte
+ * @group: Descriptors of coherent groups
  *
  * Note that the sizes of the members could be reduced. However, the \c group
- * member might be 8-byte aligned to ensure the u64 core_mask is 8-byte
+ * member might be 8-byte aligned to ensure the __u64 core_mask is 8-byte
  * aligned, thus leading to wastage if the other members sizes were reduced.
  *
  * The groups are sorted by core mask. The core masks are non-repeating and do
  * not intersect.
  */
 struct mali_base_gpu_coherent_group_info {
-	u32 num_groups;
-
-	/**
-	 * Number of core groups (coherent or not) in the GPU. Equivalent to the number of L2 Caches.
-	 *
-	 * The GPU Counter dumping writes 2048 bytes per core group, regardless of
-	 * whether the core groups are coherent or not. Hence this member is needed
-	 * to calculate how much memory is required for dumping.
-	 *
-	 * @note Do not use it to work out how many valid elements are in the
-	 * group[] member. Use num_groups instead.
-	 */
-	u32 num_core_groups;
-
-	/**
-	 * Coherency features of the memory, accessed by gpu_mem_features
-	 * methods
-	 */
-	u32 coherency;
-
-	u32 padding;
-
-	/**
-	 * Descriptors of coherent groups
-	 */
+	__u32 num_groups;
+	__u32 num_core_groups;
+	__u32 coherency;
+	__u32 padding;
 	struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS];
 };
 
 /**
  * struct gpu_raw_gpu_props - A complete description of the GPU's Hardware
  *                            Configuration Discovery registers.
+ * @shader_present: Shader core present bitmap
+ * @tiler_present: Tiler core present bitmap
+ * @l2_present: Level 2 cache present bitmap
+ * @stack_present: Core stack present bitmap
+ * @l2_features: L2 features
+ * @core_features: Core features
+ * @mem_features: Mem features
+ * @mmu_features: Mmu features
+ * @as_present: Bitmap of address spaces present
+ * @js_present: Job slots present
+ * @js_features: Array of job slot features.
+ * @tiler_features: Tiler features
+ * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU
+ * @gpu_id: GPU and revision identifier
+ * @thread_max_threads: Maximum number of threads per core
+ * @thread_max_workgroup_size: Maximum number of threads per workgroup
+ * @thread_max_barrier_size: Maximum number of threads per barrier
+ * @thread_features: Thread features
+ * @coherency_mode: Note: This is the _selected_ coherency mode rather than the
+ * 	available modes as exposed in the coherency_features register
+ * @thread_tls_alloc: Number of threads per core that TLS must be allocated for
+ * @gpu_features: GPU features
  *
  * The information is presented inefficiently for access. For frequent access,
  * the values should be better expressed in an unpacked form in the
@@ -642,55 +652,58 @@ struct mali_base_gpu_coherent_group_info {
  *
  */
 struct gpu_raw_gpu_props {
-	u64 shader_present;
-	u64 tiler_present;
-	u64 l2_present;
-	u64 stack_present;
+	__u64 shader_present;
+	__u64 tiler_present;
+	__u64 l2_present;
+	__u64 stack_present;
+	__u32 l2_features;
+	__u32 core_features;
+	__u32 mem_features;
+	__u32 mmu_features;
 
-	u32 l2_features;
-	u32 core_features;
-	u32 mem_features;
-	u32 mmu_features;
+	__u32 as_present;
 
-	u32 as_present;
+	__u32 js_present;
+	__u32 js_features[GPU_MAX_JOB_SLOTS];
+	__u32 tiler_features;
+	__u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
 
-	u32 js_present;
-	u32 js_features[GPU_MAX_JOB_SLOTS];
-	u32 tiler_features;
-	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+	__u32 gpu_id;
 
-	u32 gpu_id;
-
-	u32 thread_max_threads;
-	u32 thread_max_workgroup_size;
-	u32 thread_max_barrier_size;
-	u32 thread_features;
+	__u32 thread_max_threads;
+	__u32 thread_max_workgroup_size;
+	__u32 thread_max_barrier_size;
+	__u32 thread_features;
 
 	/*
 	 * Note: This is the _selected_ coherency mode rather than the
 	 * available modes as exposed in the coherency_features register.
 	 */
-	u32 coherency_mode;
+	__u32 coherency_mode;
 
-	u32 thread_tls_alloc;
+	__u32 thread_tls_alloc;
+	__u64 gpu_features;
 };
 
 /**
  * struct base_gpu_props - Return structure for base_get_gpu_props().
+ * @core_props:     Core props.
+ * @l2_props:       L2 props.
+ * @unused_1:       Keep for backwards compatibility.
+ * @tiler_props:    Tiler props.
+ * @thread_props:   Thread props.
+ * @raw_props:      This member is large, likely to be 128 bytes.
+ * @coherency_info: This must be last member of the structure.
  *
  * NOTE: the raw_props member in this data structure contains the register
  * values from which the value of the other members are derived. The derived
  * members exist to allow for efficient access and/or shielding the details
  * of the layout of the registers.
- *
- * @unused_1:       Keep for backwards compatibility.
- * @raw_props:      This member is large, likely to be 128 bytes.
- * @coherency_info: This must be last member of the structure.
- */
+ * */
 struct base_gpu_props {
 	struct mali_base_gpu_core_props core_props;
 	struct mali_base_gpu_l2_cache_props l2_props;
-	u64 unused_1;
+	__u64 unused_1;
 	struct mali_base_gpu_tiler_props tiler_props;
 	struct mali_base_gpu_thread_props thread_props;
 	struct gpu_raw_gpu_props raw_props;
@@ -712,7 +725,7 @@ struct base_gpu_props {
  *
  * Return: group ID(0~15) extracted from the parameter
  */
-static inline int base_mem_group_id_get(base_mem_alloc_flags flags)
+static __inline__ int base_mem_group_id_get(base_mem_alloc_flags flags)
 {
 	LOCAL_ASSERT((flags & ~BASE_MEM_FLAGS_INPUT_MASK) == 0);
 	return (int)((flags & BASE_MEM_GROUP_ID_MASK) >>
@@ -731,7 +744,7 @@ static inline int base_mem_group_id_get(base_mem_alloc_flags flags)
  * The return value can be combined with other flags against base_mem_alloc
  * to identify a specific memory group.
  */
-static inline base_mem_alloc_flags base_mem_group_id_set(int id)
+static __inline__ base_mem_alloc_flags base_mem_group_id_set(int id)
 {
 	if ((id < 0) || (id >= BASE_MEM_GROUP_COUNT)) {
 		/* Set to default value when id is out of range. */
@@ -752,7 +765,7 @@ static inline base_mem_alloc_flags base_mem_group_id_set(int id)
  *
  * Return: Bitmask of flags to pass to base_context_init.
  */
-static inline base_context_create_flags base_context_mmu_group_id_set(
+static __inline__ base_context_create_flags base_context_mmu_group_id_set(
 	int const group_id)
 {
 	LOCAL_ASSERT(group_id >= 0);
@@ -772,7 +785,7 @@ static inline base_context_create_flags base_context_mmu_group_id_set(
  *
  * Return: Physical memory group ID. Valid range is 0..(BASE_MEM_GROUP_COUNT-1).
  */
-static inline int base_context_mmu_group_id_get(
+static __inline__ int base_context_mmu_group_id_get(
 	base_context_create_flags const flags)
 {
 	LOCAL_ASSERT(flags == (flags & BASEP_CONTEXT_CREATE_ALLOWED_FLAGS));
@@ -804,4 +817,10 @@ static inline int base_context_mmu_group_id_get(
 		BASE_TIMEINFO_KERNEL_SOURCE_FLAG | \
 		BASE_TIMEINFO_USER_SOURCE_FLAG)
 
-#endif				/* _BASE_KERNEL_H_ */
+/* Maximum number of source allocations allowed to create an alias allocation.
+ * This needs to be 4096 * 6 to allow cube map arrays with up to 4096 array
+ * layers, since each cube map in the array will have 6 faces.
+ */
+#define BASE_MEM_ALIAS_MAX_ENTS ((size_t)24576)
+
+#endif /* _UAPI_BASE_KERNEL_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_base_mem_priv.h b/include/uapi/gpu/arm/bifrost/mali_base_mem_priv.h
similarity index 80%
rename from drivers/gpu/arm/bifrost/mali_base_mem_priv.h
rename to include/uapi/gpu/arm/bifrost/mali_base_mem_priv.h
index 844a025b715d..304a334a4986 100644
--- a/drivers/gpu/arm/bifrost/mali_base_mem_priv.h
+++ b/include/uapi/gpu/arm/bifrost/mali_base_mem_priv.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2010-2015, 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2015, 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,14 +17,14 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
+#ifndef _UAPI_BASE_MEM_PRIV_H_
+#define _UAPI_BASE_MEM_PRIV_H_
 
+#include <linux/types.h>
 
-#ifndef _BASE_MEM_PRIV_H_
-#define _BASE_MEM_PRIV_H_
+#include "mali_base_kernel.h"
 
 #define BASE_SYNCSET_OP_MSYNC	(1U << 0)
 #define BASE_SYNCSET_OP_CSYNC	(1U << 1)
@@ -48,10 +49,10 @@
  */
 struct basep_syncset {
 	struct base_mem_handle mem_handle;
-	u64 user_addr;
-	u64 size;
-	u8 type;
-	u8 padding[7];
+	__u64 user_addr;
+	__u64 size;
+	__u8 type;
+	__u8 padding[7];
 };
 
-#endif
+#endif /* _UAPI_BASE_MEM_PRIV_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_reader.h b/include/uapi/gpu/arm/bifrost/mali_kbase_hwcnt_reader.h
similarity index 83%
rename from drivers/gpu/arm/bifrost/mali_kbase_hwcnt_reader.h
rename to include/uapi/gpu/arm/bifrost/mali_kbase_hwcnt_reader.h
index 8cd3835595f7..9baaec11e096 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_reader.h
+++ b/include/uapi/gpu/arm/bifrost/mali_kbase_hwcnt_reader.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2015, 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015, 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,21 +17,20 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-#ifndef _KBASE_HWCNT_READER_H_
-#define _KBASE_HWCNT_READER_H_
+#ifndef _UAPI_KBASE_HWCNT_READER_H_
+#define _UAPI_KBASE_HWCNT_READER_H_
 
 #include <stddef.h>
+#include <linux/types.h>
 
 /* The ids of ioctl commands. */
 #define KBASE_HWCNT_READER 0xBE
-#define KBASE_HWCNT_READER_GET_HWVER       _IOR(KBASE_HWCNT_READER, 0x00, u32)
-#define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, u32)
-#define KBASE_HWCNT_READER_DUMP            _IOW(KBASE_HWCNT_READER, 0x10, u32)
-#define KBASE_HWCNT_READER_CLEAR           _IOW(KBASE_HWCNT_READER, 0x11, u32)
+#define KBASE_HWCNT_READER_GET_HWVER       _IOR(KBASE_HWCNT_READER, 0x00, __u32)
+#define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, __u32)
+#define KBASE_HWCNT_READER_DUMP            _IOW(KBASE_HWCNT_READER, 0x10, __u32)
+#define KBASE_HWCNT_READER_CLEAR           _IOW(KBASE_HWCNT_READER, 0x11, __u32)
 #define KBASE_HWCNT_READER_GET_BUFFER      _IOC(_IOC_READ, KBASE_HWCNT_READER, 0x20,\
 		offsetof(struct kbase_hwcnt_reader_metadata, cycles))
 #define KBASE_HWCNT_READER_GET_BUFFER_WITH_CYCLES      _IOR(KBASE_HWCNT_READER, 0x20,\
@@ -39,10 +39,10 @@
 		offsetof(struct kbase_hwcnt_reader_metadata, cycles))
 #define KBASE_HWCNT_READER_PUT_BUFFER_WITH_CYCLES      _IOW(KBASE_HWCNT_READER, 0x21,\
 		struct kbase_hwcnt_reader_metadata)
-#define KBASE_HWCNT_READER_SET_INTERVAL    _IOW(KBASE_HWCNT_READER, 0x30, u32)
-#define KBASE_HWCNT_READER_ENABLE_EVENT    _IOW(KBASE_HWCNT_READER, 0x40, u32)
-#define KBASE_HWCNT_READER_DISABLE_EVENT   _IOW(KBASE_HWCNT_READER, 0x41, u32)
-#define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, u32)
+#define KBASE_HWCNT_READER_SET_INTERVAL    _IOW(KBASE_HWCNT_READER, 0x30, __u32)
+#define KBASE_HWCNT_READER_ENABLE_EVENT    _IOW(KBASE_HWCNT_READER, 0x40, __u32)
+#define KBASE_HWCNT_READER_DISABLE_EVENT   _IOW(KBASE_HWCNT_READER, 0x41, __u32)
+#define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, __u32)
 #define KBASE_HWCNT_READER_GET_API_VERSION_WITH_FEATURES \
 		_IOW(KBASE_HWCNT_READER, 0xFF, \
 		     struct kbase_hwcnt_reader_api_version)
@@ -54,8 +54,8 @@
  * @shader_cores:  the cycles that have elapsed on the GPU shader cores
  */
 struct kbase_hwcnt_reader_metadata_cycles {
-	u64 top;
-	u64 shader_cores;
+	__u64 top;
+	__u64 shader_cores;
 };
 
 /**
@@ -66,9 +66,9 @@ struct kbase_hwcnt_reader_metadata_cycles {
  * @cycles:     the GPU cycles that occurred since the last sample
  */
 struct kbase_hwcnt_reader_metadata {
-	u64 timestamp;
-	u32 event_id;
-	u32 buffer_idx;
+	__u64 timestamp;
+	__u32 event_id;
+	__u32 buffer_idx;
 	struct kbase_hwcnt_reader_metadata_cycles cycles;
 };
 
@@ -85,22 +85,21 @@ enum base_hwcnt_reader_event {
 	BASE_HWCNT_READER_EVENT_PERIODIC,
 	BASE_HWCNT_READER_EVENT_PREJOB,
 	BASE_HWCNT_READER_EVENT_POSTJOB,
-
 	BASE_HWCNT_READER_EVENT_COUNT
 };
 
+#define KBASE_HWCNT_READER_API_VERSION_NO_FEATURE (0)
+#define KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_TOP (1 << 0)
+#define KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_SHADER_CORES (1 << 1)
 /**
  * struct kbase_hwcnt_reader_api_version - hwcnt reader API version
- * @versoin:  API version
+ * @version:  API version
  * @features: available features in this API version
  */
-#define KBASE_HWCNT_READER_API_VERSION_NO_FEATURE                  (0)
-#define KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_TOP          (1 << 0)
-#define KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_SHADER_CORES (1 << 1)
 struct kbase_hwcnt_reader_api_version {
-	u32 version;
-	u32 features;
+	__u32 version;
+	__u32 features;
 };
 
-#endif /* _KBASE_HWCNT_READER_H_ */
+#endif /* _UAPI_KBASE_HWCNT_READER_H_ */
 
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ioctl.h b/include/uapi/gpu/arm/bifrost/mali_kbase_ioctl.h
similarity index 84%
rename from drivers/gpu/arm/bifrost/mali_kbase_ioctl.h
rename to include/uapi/gpu/arm/bifrost/mali_kbase_ioctl.h
index fed45100b4be..29ff32aeeb5c 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_ioctl.h
+++ b/include/uapi/gpu/arm/bifrost/mali_kbase_ioctl.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2017-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,10 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-#ifndef _KBASE_IOCTL_H_
-#define _KBASE_IOCTL_H_
+#ifndef _UAPI_KBASE_IOCTL_H_
+#define _UAPI_KBASE_IOCTL_H_
 
 #ifdef __cpluscplus
 extern "C" {
@@ -65,16 +64,16 @@ struct kbase_ioctl_set_flags {
  * @flags may be used in the future to request a different format for the
  * buffer. With @flags == 0 the following format is used.
  *
- * The buffer will be filled with pairs of values, a u32 key identifying the
+ * The buffer will be filled with pairs of values, a __u32 key identifying the
  * property followed by the value. The size of the value is identified using
  * the bottom bits of the key. The value then immediately followed the key and
  * is tightly packed (there is no padding). All keys and values are
  * little-endian.
  *
- * 00 = u8
- * 01 = u16
- * 10 = u32
- * 11 = u64
+ * 00 = __u8
+ * 01 = __u16
+ * 10 = __u32
+ * 11 = __u64
  */
 struct kbase_ioctl_get_gpuprops {
 	__u64 buffer;
@@ -87,22 +86,20 @@ struct kbase_ioctl_get_gpuprops {
 
 /**
  * union kbase_ioctl_mem_alloc - Allocate memory on the GPU
- *
- * @va_pages: The number of pages of virtual address space to reserve
- * @commit_pages: The number of physical pages to allocate
- * @extent: The number of extra pages to allocate on each GPU fault which grows
- *          the region
- * @flags: Flags
- * @gpu_va: The GPU virtual address which is allocated
- *
  * @in: Input parameters
+ * @in.va_pages: The number of pages of virtual address space to reserve
+ * @in.commit_pages: The number of physical pages to allocate
+ * @in.extension: The number of extra pages to allocate on each GPU fault which grows the region
+ * @in.flags: Flags
  * @out: Output parameters
+ * @out.flags: Flags
+ * @out.gpu_va: The GPU virtual address which is allocated
  */
 union kbase_ioctl_mem_alloc {
 	struct {
 		__u64 va_pages;
 		__u64 commit_pages;
-		__u64 extent;
+		__u64 extension;
 		__u64 flags;
 	} in;
 	struct {
@@ -116,14 +113,13 @@ union kbase_ioctl_mem_alloc {
 
 /**
  * struct kbase_ioctl_mem_query - Query properties of a GPU memory region
- * @gpu_addr: A GPU address contained within the region
- * @query: The type of query
- * @value: The result of the query
+ * @in: Input parameters
+ * @in.gpu_addr: A GPU address contained within the region
+ * @in.query: The type of query
+ * @out: Output parameters
+ * @out.value: The result of the query
  *
  * Use a %KBASE_MEM_QUERY_xxx flag as input for @query.
- *
- * @in: Input parameters
- * @out: Output parameters
  */
 union kbase_ioctl_mem_query {
 	struct {
@@ -138,9 +134,9 @@ union kbase_ioctl_mem_query {
 #define KBASE_IOCTL_MEM_QUERY \
 	_IOWR(KBASE_IOCTL_TYPE, 6, union kbase_ioctl_mem_query)
 
-#define KBASE_MEM_QUERY_COMMIT_SIZE	((u64)1)
-#define KBASE_MEM_QUERY_VA_SIZE		((u64)2)
-#define KBASE_MEM_QUERY_FLAGS		((u64)3)
+#define KBASE_MEM_QUERY_COMMIT_SIZE	((__u64)1)
+#define KBASE_MEM_QUERY_VA_SIZE		((__u64)2)
+#define KBASE_MEM_QUERY_FLAGS		((__u64)3)
 
 /**
  * struct kbase_ioctl_mem_free - Free a memory region
@@ -343,13 +339,12 @@ struct kbase_ioctl_mem_sync {
 /**
  * union kbase_ioctl_mem_find_cpu_offset - Find the offset of a CPU pointer
  *
- * @gpu_addr: The GPU address of the memory region
- * @cpu_addr: The CPU address to locate
- * @size: A size in bytes to validate is contained within the region
- * @offset: The offset from the start of the memory region to @cpu_addr
- *
  * @in: Input parameters
+ * @in.gpu_addr: The GPU address of the memory region
+ * @in.cpu_addr: The CPU address to locate
+ * @in.size: A size in bytes to validate is contained within the region
  * @out: Output parameters
+ * @out.offset: The offset from the start of the memory region to @cpu_addr
  */
 union kbase_ioctl_mem_find_cpu_offset {
 	struct {
@@ -414,15 +409,15 @@ struct kbase_ioctl_mem_commit {
 
 /**
  * union kbase_ioctl_mem_alias - Create an alias of memory regions
- * @flags: Flags, see BASE_MEM_xxx
- * @stride: Bytes between start of each memory region
- * @nents: The number of regions to pack together into the alias
- * @aliasing_info: Pointer to an array of struct base_mem_aliasing_info
- * @gpu_va: Address of the new alias
- * @va_pages: Size of the new alias
- *
  * @in: Input parameters
+ * @in.flags: Flags, see BASE_MEM_xxx
+ * @in.stride: Bytes between start of each memory region
+ * @in.nents: The number of regions to pack together into the alias
+ * @in.aliasing_info: Pointer to an array of struct base_mem_aliasing_info
  * @out: Output parameters
+ * @out.flags: Flags, see BASE_MEM_xxx
+ * @out.gpu_va: Address of the new alias
+ * @out.va_pages: Size of the new alias
  */
 union kbase_ioctl_mem_alias {
 	struct {
@@ -443,15 +438,15 @@ union kbase_ioctl_mem_alias {
 
 /**
  * union kbase_ioctl_mem_import - Import memory for use by the GPU
- * @flags: Flags, see BASE_MEM_xxx
- * @phandle: Handle to the external memory
- * @type: Type of external memory, see base_mem_import_type
- * @padding: Amount of extra VA pages to append to the imported buffer
- * @gpu_va: Address of the new alias
- * @va_pages: Size of the new alias
- *
  * @in: Input parameters
+ * @in.flags: Flags, see BASE_MEM_xxx
+ * @in.phandle: Handle to the external memory
+ * @in.type: Type of external memory, see base_mem_import_type
+ * @in.padding: Amount of extra VA pages to append to the imported buffer
  * @out: Output parameters
+ * @out.flags: Flags, see BASE_MEM_xxx
+ * @out.gpu_va: Address of the new alias
+ * @out.va_pages: Size of the new alias
  */
 union kbase_ioctl_mem_import {
 	struct {
@@ -534,7 +529,7 @@ struct kbase_ioctl_mem_profile_add {
 /**
  * struct kbase_ioctl_sticky_resource_map - Permanently map an external resource
  * @count: Number of resources
- * @address: Array of u64 GPU addresses of the external resources to map
+ * @address: Array of __u64 GPU addresses of the external resources to map
  */
 struct kbase_ioctl_sticky_resource_map {
 	__u64 count;
@@ -548,7 +543,7 @@ struct kbase_ioctl_sticky_resource_map {
  * struct kbase_ioctl_sticky_resource_map - Unmap a resource mapped which was
  *                                          previously permanently mapped
  * @count: Number of resources
- * @address: Array of u64 GPU addresses of the external resources to unmap
+ * @address: Array of __u64 GPU addresses of the external resources to unmap
  */
 struct kbase_ioctl_sticky_resource_unmap {
 	__u64 count;
@@ -564,15 +559,13 @@ struct kbase_ioctl_sticky_resource_unmap {
  *                                                   the given gpu address and
  *                                                   the offset of that address
  *                                                   into the region
- *
- * @gpu_addr: GPU virtual address
- * @size: Size in bytes within the region
- * @start: Address of the beginning of the memory region enclosing @gpu_addr
- *         for the length of @offset bytes
- * @offset: The offset from the start of the memory region to @gpu_addr
- *
  * @in: Input parameters
+ * @in.gpu_addr: GPU virtual address
+ * @in.size: Size in bytes within the region
  * @out: Output parameters
+ * @out.start: Address of the beginning of the memory region enclosing @gpu_addr
+ *             for the length of @offset bytes
+ * @out.offset: The offset from the start of the memory region to @gpu_addr
  */
 union kbase_ioctl_mem_find_gpu_start_and_offset {
 	struct {
@@ -588,7 +581,6 @@ union kbase_ioctl_mem_find_gpu_start_and_offset {
 #define KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET \
 	_IOWR(KBASE_IOCTL_TYPE, 31, union kbase_ioctl_mem_find_gpu_start_and_offset)
 
-
 #define KBASE_IOCTL_CINSTR_GWT_START \
 	_IO(KBASE_IOCTL_TYPE, 33)
 
@@ -597,14 +589,15 @@ union kbase_ioctl_mem_find_gpu_start_and_offset {
 
 /**
  * union kbase_ioctl_gwt_dump - Used to collect all GPU write fault addresses.
- * @addr_buffer: Address of buffer to hold addresses of gpu modified areas.
- * @size_buffer: Address of buffer to hold size of modified areas (in pages)
- * @len: Number of addresses the buffers can hold.
- * @more_data_available: Status indicating if more addresses are available.
- * @no_of_addr_collected: Number of addresses collected into addr_buffer.
- *
  * @in: Input parameters
+ * @in.addr_buffer: Address of buffer to hold addresses of gpu modified areas.
+ * @in.size_buffer: Address of buffer to hold size of modified areas (in pages)
+ * @in.len: Number of addresses the buffers can hold.
+ * @in.padding: padding
  * @out: Output parameters
+ * @out.no_of_addr_collected: Number of addresses collected into addr_buffer.
+ * @out.more_data_available: Status indicating if more addresses are available.
+ * @out.padding: padding
  *
  * This structure is used when performing a call to dump GPU write fault
  * addresses.
@@ -642,18 +635,15 @@ struct kbase_ioctl_mem_exec_init {
 /**
  * union kbase_ioctl_get_cpu_gpu_timeinfo - Request zero or more types of
  *                                          cpu/gpu time (counter values)
- *
- * @request_flags: Bit-flags indicating the requested types.
- * @paddings:      Unused, size alignment matching the out.
- * @sec:           Integer field of the monotonic time, unit in seconds.
- * @nsec:          Fractional sec of the monotonic time, in nano-seconds.
- * @padding:       Unused, for u64 alignment
- * @timestamp:     System wide timestamp (counter) value.
- * @cycle_counter: GPU cycle counter value.
- *
  * @in: Input parameters
+ * @in.request_flags: Bit-flags indicating the requested types.
+ * @in.paddings:      Unused, size alignment matching the out.
  * @out: Output parameters
- *
+ * @out.sec:           Integer field of the monotonic time, unit in seconds.
+ * @out.nsec:          Fractional sec of the monotonic time, in nano-seconds.
+ * @out.padding:       Unused, for __u64 alignment
+ * @out.timestamp:     System wide timestamp (counter) value.
+ * @out.cycle_counter: GPU cycle counter value.
  */
 union kbase_ioctl_get_cpu_gpu_timeinfo {
 	struct {
@@ -672,6 +662,31 @@ union kbase_ioctl_get_cpu_gpu_timeinfo {
 #define KBASE_IOCTL_GET_CPU_GPU_TIMEINFO \
 	_IOWR(KBASE_IOCTL_TYPE, 50, union kbase_ioctl_get_cpu_gpu_timeinfo)
 
+/**
+ * struct kbase_ioctl_context_priority_check - Check the max possible priority
+ * @priority: Input priority & output priority
+ */
+
+struct kbase_ioctl_context_priority_check {
+	__u8 priority;
+};
+
+#define KBASE_IOCTL_CONTEXT_PRIORITY_CHECK \
+	_IOWR(KBASE_IOCTL_TYPE, 54, struct kbase_ioctl_context_priority_check)
+
+/**
+ * struct kbase_ioctl_set_limited_core_count - Set the limited core count.
+ *
+ * @max_core_count: Maximum core count
+ */
+struct kbase_ioctl_set_limited_core_count {
+	__u8 max_core_count;
+};
+
+#define KBASE_IOCTL_SET_LIMITED_CORE_COUNT \
+	_IOW(KBASE_IOCTL_TYPE, 55, struct kbase_ioctl_set_limited_core_count)
+
+
 /***************
  * test ioctls *
  ***************/
@@ -682,23 +697,6 @@ union kbase_ioctl_get_cpu_gpu_timeinfo {
 
 #define KBASE_IOCTL_TEST_TYPE (KBASE_IOCTL_TYPE + 1)
 
-/**
- * struct kbase_ioctl_tlstream_test - Start a timeline stream test
- *
- * @tpw_count: number of trace point writers in each context
- * @msg_delay: time delay between tracepoints from one writer in milliseconds
- * @msg_count: number of trace points written by one writer
- * @aux_msg:   if non-zero aux messages will be included
- */
-struct kbase_ioctl_tlstream_test {
-	__u32 tpw_count;
-	__u32 msg_delay;
-	__u32 msg_count;
-	__u32 aux_msg;
-};
-
-#define KBASE_IOCTL_TLSTREAM_TEST \
-	_IOW(KBASE_IOCTL_TEST_TYPE, 1, struct kbase_ioctl_tlstream_test)
 
 /**
  * struct kbase_ioctl_tlstream_stats - Read tlstream stats for test purposes
@@ -826,13 +824,13 @@ struct kbase_ioctl_tlstream_stats {
 #define KBASE_GPUPROP_TEXTURE_FEATURES_3		80
 #define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_3		81
 
-#define KBASE_GPUPROP_NUM_EXEC_ENGINES                  82
+#define KBASE_GPUPROP_NUM_EXEC_ENGINES			82
 
 #define KBASE_GPUPROP_RAW_THREAD_TLS_ALLOC		83
 #define KBASE_GPUPROP_TLS_ALLOC				84
-
+#define KBASE_GPUPROP_RAW_GPU_FEATURES			85
 #ifdef __cpluscplus
 }
 #endif
 
-#endif
+#endif /* _UAPI_KBASE_IOCTL_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm_reader.h b/include/uapi/gpu/arm/bifrost/mali_kbase_kinstr_jm_reader.h
similarity index 90%
rename from drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm_reader.h
rename to include/uapi/gpu/arm/bifrost/mali_kbase_kinstr_jm_reader.h
index e267e6bc44de..72e1b9dbf245 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm_reader.h
+++ b/include/uapi/gpu/arm/bifrost/mali_kbase_kinstr_jm_reader.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,8 +17,6 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
 /*
@@ -35,8 +34,8 @@
  *    8. Close the file descriptor
  */
 
-#ifndef _KBASE_KINSTR_JM_READER_H_
-#define _KBASE_KINSTR_JM_READER_H_
+#ifndef _UAPI_KBASE_KINSTR_JM_READER_H_
+#define _UAPI_KBASE_KINSTR_JM_READER_H_
 
 /**
  * enum kbase_kinstr_jm_reader_atom_state - Determines the work state of an atom
@@ -67,4 +66,4 @@ enum kbase_kinstr_jm_reader_atom_state {
 	KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT
 };
 
-#endif /* _KBASE_KINSTR_JM_READER_H_ */
+#endif /* _UAPI_KBASE_KINSTR_JM_READER_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_uk.h b/include/uapi/gpu/arm/bifrost/mali_uk.h
similarity index 69%
rename from drivers/gpu/arm/bifrost/mali_uk.h
rename to include/uapi/gpu/arm/bifrost/mali_uk.h
index 701f3909042f..fcb6cb88980d 100644
--- a/drivers/gpu/arm/bifrost/mali_uk.h
+++ b/include/uapi/gpu/arm/bifrost/mali_uk.h
@@ -1,11 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2010, 2012-2015, 2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010, 2012-2015, 2018, 2020-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,32 +17,22 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /**
- * @file mali_uk.h
  * Types and definitions that are common across OSs for both the user
  * and kernel side of the User-Kernel interface.
  */
 
-#ifndef _UK_H_
-#define _UK_H_
+#ifndef _UAPI_UK_H_
+#define _UAPI_UK_H_
 
 #ifdef __cplusplus
 extern "C" {
-#endif				/* __cplusplus */
+#endif /* __cplusplus */
 
 /**
- * @addtogroup base_api
- * @{
- */
-
-/**
- * @defgroup uk_api User-Kernel Interface API
+ * DOC: uk_api User-Kernel Interface API
  *
  * The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device
  * drivers developed as part of the Midgard DDK. Currently that includes the Base driver.
@@ -51,12 +42,16 @@ extern "C" {
  *
  * This API is internal to the Midgard DDK and is not exposed to any applications.
  *
- * @{
  */
 
 /**
- * These are identifiers for kernel-side drivers implementing a UK interface, aka UKK clients. The
- * UK module maps this to an OS specific device name, e.g. "gpu_base" -> "GPU0:". Specify this
+ * enum uk_client_id - These are identifiers for kernel-side drivers
+ * implementing a UK interface, aka UKK clients.
+ * @UK_CLIENT_MALI_T600_BASE: Value used to identify the Base driver UK client.
+ * @UK_CLIENT_COUNT:          The number of uk clients supported. This must be
+ *                            the last member of the enum
+ *
+ * The UK module maps this to an OS specific device name, e.g. "gpu_base" -> "GPU0:". Specify this
  * identifier to select a UKK client to the uku_open() function.
  *
  * When a new UKK client driver is created a new identifier needs to be added to the uk_client_id
@@ -65,20 +60,11 @@ extern "C" {
  *
  */
 enum uk_client_id {
-	/**
-	 * Value used to identify the Base driver UK client.
-	 */
 	UK_CLIENT_MALI_T600_BASE,
-
-	/** The number of uk clients supported. This must be the last member of the enum */
 	UK_CLIENT_COUNT
 };
 
-/** @} end group uk_api */
-
-/** @} *//* end group base_api */
-
 #ifdef __cplusplus
 }
-#endif				/* __cplusplus */
-#endif				/* _UK_H_ */
+#endif /* __cplusplus */
+#endif /* _UAPI_UK_H_ */