MALI: rockchip: upgrade bifrost DDK to g18p0-01eac0, from g17p0-01eac0

Change-Id: I2c7e002c4b1a1834f89c52e4113e3b2f48f9cba6 Signed-off-by: Zhen Chen <chenzhen@rock-chips.com>
2026-06-06 10:58:48 +09:00 · 2023-04-04 10:34:07 +08:00
parent 038247591f
commit 7a76350c1e
82 changed files with 3200 additions and 1595 deletions
--- a/Documentation/ABI/testing/sysfs-device-mali
+++ b/Documentation/ABI/testing/sysfs-device-mali
@@ -62,8 +62,11 @@ What:		/sys/class/misc/mali%u/device/idle_hysteresis_time
 Description:
 		This attribute is available only with mali platform
 		device-driver that supports a CSF GPU. This attribute is
-		used to set the duration value in milliseconds for the
-		configuring hysteresis field for determining GPU idle detection.
+		used to configure the timeout value in microseconds for the
+		GPU idle handling. If GPU has been idle for this timeout
+		period, then it is put to sleep for GPUs where sleep feature
+		is supported or is powered down after suspending command
+		stream groups.

 What:		/sys/class/misc/mali%u/device/js_ctx_scheduling_mode
 Description:
--- a/drivers/base/arm/Makefile
+++ b/drivers/base/arm/Makefile
@@ -96,6 +96,12 @@ ifeq ($(CONFIG_GCOV_KERNEL), y)
    EXTRA_CFLAGS += -DGCOV_PROFILE=1
 endif

+ifeq ($(CONFIG_MALI_KCOV),y)
+    KBUILD_CFLAGS += $(call cc-option, -fsanitize-coverage=trace-cmp)
+    EXTRA_CFLAGS += -DKCOV=1
+    EXTRA_CFLAGS += -DKCOV_ENABLE_COMPARISONS=1
+endif
+
 # The following were added to align with W=1 in scripts/Makefile.extrawarn
 # from the Linux source tree (v5.18.14)
 KBUILD_CFLAGS += -Wextra -Wunused -Wno-unused-parameter
--- a/drivers/gpu/arm/bifrost/Kbuild
+++ b/drivers/gpu/arm/bifrost/Kbuild
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 #
-# (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -69,7 +69,7 @@ endif
 #

 # Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= '"g17p0-01eac0"'
+MALI_RELEASE_NAME ?= '"g18p0-01eac0"'
 # Set up defaults if not defined by build system
 ifeq ($(CONFIG_MALI_BIFROST_DEBUG), y)
    MALI_UNIT_TEST = 1
--- a/drivers/gpu/arm/bifrost/Kconfig
+++ b/drivers/gpu/arm/bifrost/Kconfig
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 #
-# (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -112,21 +112,6 @@ config MALI_BIFROST_ENABLE_TRACE
 	  Enables tracing in kbase. Trace log available through
 	  the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled

-config MALI_FW_CORE_DUMP
-	bool "Enable support for FW core dump"
-	depends on MALI_BIFROST && MALI_CSF_SUPPORT
-	default n
-	help
-	  Adds ability to request firmware core dump through the "fw_core_dump"
-	  debugfs file
-
-	  Example:
-	  * To explicitly request core dump:
-	      echo 1 > /sys/kernel/debug/mali0/fw_core_dump
-	  * To output current core dump (after explicitly requesting a core dump,
-	    or kernel driver reported an internal firmware error):
-	      cat /sys/kernel/debug/mali0/fw_core_dump
-
 config MALI_ARBITER_SUPPORT
 	bool "Enable arbiter support for Mali"
 	depends on MALI_BIFROST && !MALI_CSF_SUPPORT
@@ -178,7 +163,19 @@ menuconfig MALI_BIFROST_EXPERT

 if MALI_BIFROST_EXPERT

-config MALI_2MB_ALLOC
+config LARGE_PAGE_ALLOC_OVERRIDE
+	bool "Override default setting of 2MB pages"
+	depends on MALI_BIFROST && MALI_BIFROST_EXPERT
+	default n
+	help
+	  An override config for LARGE_PAGE_ALLOC config.
+	  When LARGE_PAGE_ALLOC_OVERRIDE is Y, 2MB page allocation will be
+	  enabled by LARGE_PAGE_ALLOC. When this is N, the feature will be
+	  enabled when GPU HW satisfies requirements.
+
+	  If in doubt, say N
+
+config LARGE_PAGE_ALLOC
 	bool "Attempt to allocate 2MB pages"
 	depends on MALI_BIFROST && MALI_BIFROST_EXPERT
 	default n
@@ -187,6 +184,10 @@ config MALI_2MB_ALLOC
 	  allocate 2MB pages from the kernel. This reduces TLB pressure and
 	  helps to prevent memory fragmentation.

+	  Note this config applies only when LARGE_PAGE_ALLOC_OVERRIDE config
+	  is enabled and enabling this on a GPU HW that does not satisfy
+	  requirements can cause serious problem.
+
 	  If in doubt, say N

 config MALI_MEMORY_FULLY_BACKED
@@ -222,14 +223,6 @@ config MALI_BIFROST_ERROR_INJECT
 	help
 	  Enables insertion of errors to test module failure and recovery mechanisms.

-config MALI_GEM5_BUILD
-	bool "Enable build of Mali kernel driver for GEM5"
-	depends on MALI_BIFROST && MALI_BIFROST_EXPERT
-	default n
-	help
-	  This option is to do a Mali GEM5 build.
-	  If unsure, say N.
-
 comment "Debug options"
 	depends on MALI_BIFROST && MALI_BIFROST_EXPERT

--- a/drivers/gpu/arm/bifrost/Makefile
+++ b/drivers/gpu/arm/bifrost/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 #
-# (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -58,10 +58,7 @@ ifeq ($(CONFIG_MALI_BIFROST),m)
    endif

    ifeq ($(CONFIG_MALI_CSF_SUPPORT), y)
-        CONFIG_MALI_FW_CORE_DUMP ?= y
        CONFIG_MALI_CORESIGHT ?= n
-    else
-        CONFIG_MALI_FW_CORE_DUMP ?= n
    endif

    #
@@ -101,7 +98,8 @@ ifeq ($(CONFIG_MALI_BIFROST),m)
    else
        # Prevent misuse when CONFIG_MALI_BIFROST_EXPERT=n
        CONFIG_MALI_CORESTACK = n
-        CONFIG_MALI_2MB_ALLOC = n
+        CONFIG_LARGE_PAGE_ALLOC_OVERRIDE = n
+        CONFIG_LARGE_PAGE_ALLOC = n
        CONFIG_MALI_PWRSOFT_765 = n
        CONFIG_MALI_MEMORY_FULLY_BACKED = n
        CONFIG_MALI_JOB_DUMP = n
@@ -143,7 +141,6 @@ else
    CONFIG_MALI_KUTF_IRQ_TEST = n
    CONFIG_MALI_KUTF_CLK_RATE_TRACE = n
    CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n
-    CONFIG_MALI_FW_CORE_DUMP = n
 endif

 # All Mali CONFIG should be listed here
@@ -155,14 +152,14 @@ CONFIGS := \
    CONFIG_MALI_ARBITRATION \
    CONFIG_MALI_PARTITION_MANAGER \
    CONFIG_MALI_REAL_HW \
-    CONFIG_MALI_GEM5_BUILD \
    CONFIG_MALI_BIFROST_DEVFREQ \
    CONFIG_MALI_BIFROST_DVFS \
    CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND \
    CONFIG_MALI_DMA_BUF_LEGACY_COMPAT \
    CONFIG_MALI_BIFROST_EXPERT \
    CONFIG_MALI_CORESTACK \
-    CONFIG_MALI_2MB_ALLOC \
+    CONFIG_LARGE_PAGE_ALLOC_OVERRIDE \
+    CONFIG_LARGE_PAGE_ALLOC \
    CONFIG_MALI_PWRSOFT_765 \
    CONFIG_MALI_MEMORY_FULLY_BACKED \
    CONFIG_MALI_JOB_DUMP \
@@ -183,7 +180,6 @@ CONFIGS := \
    CONFIG_MALI_KUTF_CLK_RATE_TRACE \
    CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST \
    CONFIG_MALI_XEN \
-    CONFIG_MALI_FW_CORE_DUMP \
    CONFIG_MALI_CORESIGHT


@@ -267,6 +263,12 @@ ifeq ($(CONFIG_GCOV_KERNEL),y)
    EXTRA_CFLAGS += -DGCOV_PROFILE=1
 endif

+ifeq ($(CONFIG_MALI_KCOV),y)
+    KBUILD_CFLAGS += $(call cc-option, -fsanitize-coverage=trace-cmp)
+    EXTRA_CFLAGS += -DKCOV=1
+    EXTRA_CFLAGS += -DKCOV_ENABLE_COMPARISONS=1
+endif
+
 all:
 	$(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules

--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -190,6 +190,27 @@ static u64 select_job_chain(struct kbase_jd_atom *katom)
 	return jc;
 }

+static inline bool kbasep_jm_wait_js_free(struct kbase_device *kbdev, unsigned int js,
+					  struct kbase_context *kctx)
+{
+	const ktime_t wait_loop_start = ktime_get_raw();
+	const s64 max_timeout = (s64)kbdev->js_data.js_free_wait_time_ms;
+	s64 diff = 0;
+
+	/* wait for the JS_COMMAND_NEXT register to reach the given status value */
+	do {
+		if (!kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT)))
+			return true;
+
+		diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start));
+	} while (diff < max_timeout);
+
+	dev_err(kbdev->dev, "Timeout in waiting for job slot %u to become free for ctx %d_%u", js,
+		kctx->tgid, kctx->id);
+
+	return false;
+}
+
 int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, unsigned int js)
 {
 	struct kbase_context *kctx;
@@ -203,8 +224,7 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
 	kctx = katom->kctx;

 	/* Command register must be available */
-	if (WARN(!kbasep_jm_is_js_free(kbdev, js, kctx),
-		 "Attempting to assign to occupied slot %d in kctx %pK\n", js, (void *)kctx))
+	if (!kbasep_jm_wait_js_free(kbdev, js, kctx))
 		return -EPERM;

 	dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %pK\n",
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2011-2016, 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016, 2018-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -52,14 +52,6 @@ static inline char *kbasep_make_job_slot_string(unsigned int js, char *js_string
 }
 #endif

-#if !MALI_USE_CSF
-static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, unsigned int js,
-				       struct kbase_context *kctx)
-{
-	return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT));
-}
-#endif
-
 /**
 * kbase_job_hw_submit() - Submit a job to the GPU
 * @kbdev:	Device pointer
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -1001,17 +1001,11 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
 						other_slots_busy(kbdev, js))
 					break;

-#ifdef CONFIG_MALI_GEM5_BUILD
-				if (!kbasep_jm_is_js_free(kbdev, js,
-						katom[idx]->kctx))
-					break;
-#endif
 				/* Check if this job needs the cycle counter
 				 * enabled before submission
 				 */
 				if (katom[idx]->core_req & BASE_JD_REQ_PERMON)
-					kbase_pm_request_gpu_cycle_counter_l2_is_on(
-									kbdev);
+					kbase_pm_request_gpu_cycle_counter_l2_is_on(kbdev);

 				if (!kbase_job_hw_submit(kbdev, katom[idx], js)) {
 					katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_SUBMITTED;
@@ -1025,9 +1019,12 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)

 					/* Inform platform at start/finish of atom */
 					kbasep_platform_event_atom_submit(katom[idx]);
-				}
-				else
+				} else {
+					if (katom[idx]->core_req & BASE_JD_REQ_PERMON)
+						kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+
 					break;
+				}

 				/* ***TRANSITION TO HIGHER STATE*** */
 				fallthrough;
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c
@@ -2024,8 +2024,6 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value)

 		*value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_SHADER,
 						    counter_index, is_low_word);
-	} else if (addr == USER_REG(LATEST_FLUSH)) {
-		*value = 0;
 	}
 #endif
 	else if (addr == GPU_CONTROL_REG(GPU_FEATURES_LO)) {
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2014-2015, 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2018-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -25,14 +25,17 @@

 static struct kbase_error_atom *error_track_list;

-unsigned int rand_seed;
+#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM
+
+/** Kernel 6.1.0 has dropped prandom_u32(), use get_random_u32() */
+#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
+#define prandom_u32 get_random_u32
+#endif

 /*following error probability are set quite high in order to stress the driver*/
-unsigned int error_probability = 50;	/* to be set between 0 and 100 */
+static unsigned int error_probability = 50; /* to be set between 0 and 100 */
 /* probability to have multiple error give that there is an error */
-unsigned int multiple_error_probability = 50;
-
-#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM
+static unsigned int multiple_error_probability = 50;

 /* all the error conditions supported by the model */
 #define TOTAL_FAULTS 27
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.c
@@ -105,7 +105,7 @@ static void serve_mmu_irq(struct work_struct *work)
 	kmem_cache_free(kbdev->irq_slab, data);
 }

-void gpu_device_raise_irq(void *model, enum model_linux_irqs irq)
+void gpu_device_raise_irq(void *model, u32 irq)
 {
 	struct model_irq_data *data;
 	struct kbase_device *kbdev = gpu_device_get_data(model);
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h
@@ -124,7 +124,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value);
 *
 * This hook is global to the model Linux framework.
 */
-void gpu_device_raise_irq(void *model, enum model_linux_irqs irq);
+void gpu_device_raise_irq(void *model, u32 irq);

 /**
 * gpu_device_set_data() - Private model set data function.
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c
@@ -2575,26 +2575,33 @@ void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
 KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts);

 #if MALI_USE_CSF
+/**
+ * update_user_reg_page_mapping - Update the mapping for USER Register page
+ *
+ * @kbdev: The kbase device structure for the device.
+ *
+ * This function must be called to unmap the dummy or real page from USER Register page
+ * mapping whenever GPU is powered up or down. The dummy or real page would get
+ * appropriately mapped in when Userspace reads the LATEST_FLUSH value.
+ */
 static void update_user_reg_page_mapping(struct kbase_device *kbdev)
 {
+	struct kbase_context *kctx, *n;
+
 	lockdep_assert_held(&kbdev->pm.lock);

 	mutex_lock(&kbdev->csf.reg_lock);
-
-	/* Only if the mappings for USER page exist, update all PTEs associated to it */
-	if (kbdev->csf.nr_user_page_mapped > 0) {
-		if (likely(kbdev->csf.mali_file_inode)) {
-			/* This would zap the pte corresponding to the mapping of User
-			 * register page for all the Kbase contexts.
-			 */
-			unmap_mapping_range(kbdev->csf.mali_file_inode->i_mapping,
-					    BASEP_MEM_CSF_USER_REG_PAGE_HANDLE, PAGE_SIZE, 1);
-		} else {
-			dev_err(kbdev->dev,
-				"Device file inode not exist even if USER page previously mapped");
-		}
+	list_for_each_entry_safe(kctx, n, &kbdev->csf.user_reg.list, csf.user_reg.link) {
+		/* This would zap the PTE corresponding to the mapping of User
+		 * Register page of the kbase context. The mapping will be reestablished
+		 * when the context (user process) needs to access to the page.
+		 */
+		unmap_mapping_range(kbdev->csf.user_reg.filp->f_inode->i_mapping,
+				    kctx->csf.user_reg.file_offset << PAGE_SHIFT, PAGE_SIZE, 1);
+		list_del_init(&kctx->csf.user_reg.link);
+		dev_dbg(kbdev->dev, "Updated USER Reg page mapping of ctx %d_%d", kctx->tgid,
+			kctx->id);
 	}
-
 	mutex_unlock(&kbdev->csf.reg_lock);
 }
 #endif
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,8 @@
 #include <mali_kbase.h>
 #include <mali_kbase_hwaccess_time.h>
 #if MALI_USE_CSF
+#include <asm/arch_timer.h>
+#include <linux/gcd.h>
 #include <csf/mali_kbase_csf_timeout.h>
 #endif
 #include <device/mali_kbase_device.h>
@@ -121,20 +123,29 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev,
 	/* Only for debug messages, safe default in case it's mis-maintained */
 	const char *selector_str = "(unknown)";

-	if (WARN(!kbdev->lowest_gpu_freq_khz,
-		 "Lowest frequency uninitialized! Using reference frequency for scaling")) {
+	if (!kbdev->lowest_gpu_freq_khz) {
+		dev_dbg(kbdev->dev,
+			"Lowest frequency uninitialized! Using reference frequency for scaling");
 		freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ;
 	} else {
 		freq_khz = kbdev->lowest_gpu_freq_khz;
 	}

 	switch (selector) {
+	case MMU_AS_INACTIVE_WAIT_TIMEOUT:
+		selector_str = "MMU_AS_INACTIVE_WAIT_TIMEOUT";
+		nr_cycles = MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES;
+		break;
 	case KBASE_TIMEOUT_SELECTOR_COUNT:
 	default:
 #if !MALI_USE_CSF
 		WARN(1, "Invalid timeout selector used! Using default value");
 		nr_cycles = JM_DEFAULT_TIMEOUT_CYCLES;
 		break;
+	case JM_DEFAULT_JS_FREE_TIMEOUT:
+		selector_str = "JM_DEFAULT_JS_FREE_TIMEOUT";
+		nr_cycles = JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES;
+		break;
 #else
 		/* Use Firmware timeout if invalid selection */
 		WARN(1,
@@ -204,3 +215,65 @@ u64 kbase_backend_get_cycle_cnt(struct kbase_device *kbdev)

 	return lo | (((u64) hi1) << 32);
 }
+
+#if MALI_USE_CSF
+u64 __maybe_unused kbase_backend_time_convert_gpu_to_cpu(struct kbase_device *kbdev, u64 gpu_ts)
+{
+	if (WARN_ON(!kbdev))
+		return 0;
+
+	return div64_u64(gpu_ts * kbdev->backend_time.multiplier, kbdev->backend_time.divisor) +
+	       kbdev->backend_time.offset;
+}
+
+/**
+ * get_cpu_gpu_time() - Get current CPU and GPU timestamps.
+ *
+ * @kbdev:	Kbase device.
+ * @cpu_ts:	Output CPU timestamp.
+ * @gpu_ts:	Output GPU timestamp.
+ * @gpu_cycle:  Output GPU cycle counts.
+ */
+static void get_cpu_gpu_time(struct kbase_device *kbdev, u64 *cpu_ts, u64 *gpu_ts, u64 *gpu_cycle)
+{
+	struct timespec64 ts;
+
+	kbase_backend_get_gpu_time(kbdev, gpu_cycle, gpu_ts, &ts);
+
+	if (cpu_ts)
+		*cpu_ts = ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
+}
+#endif
+
+int kbase_backend_time_init(struct kbase_device *kbdev)
+{
+#if MALI_USE_CSF
+	u64 cpu_ts = 0;
+	u64 gpu_ts = 0;
+	u64 freq;
+	u64 common_factor;
+
+	get_cpu_gpu_time(kbdev, &cpu_ts, &gpu_ts, NULL);
+	freq = arch_timer_get_cntfrq();
+
+	if (!freq) {
+		dev_warn(kbdev->dev, "arch_timer_get_rate() is zero!");
+		return -EINVAL;
+	}
+
+	common_factor = gcd(NSEC_PER_SEC, freq);
+
+	kbdev->backend_time.multiplier = div64_u64(NSEC_PER_SEC, common_factor);
+	kbdev->backend_time.divisor = div64_u64(freq, common_factor);
+
+	if (!kbdev->backend_time.divisor) {
+		dev_warn(kbdev->dev, "CPU to GPU divisor is zero!");
+		return -EINVAL;
+	}
+
+	kbdev->backend_time.offset = cpu_ts - div64_u64(gpu_ts * kbdev->backend_time.multiplier,
+							kbdev->backend_time.divisor);
+#endif
+
+	return 0;
+}
--- a/drivers/gpu/arm/bifrost/build.bp
+++ b/drivers/gpu/arm/bifrost/build.bp
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2017-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -62,8 +62,11 @@ bob_defaults {
    mali_dma_buf_legacy_compat: {
        kbuild_options: ["CONFIG_MALI_DMA_BUF_LEGACY_COMPAT=y"],
    },
+    large_page_alloc_override: {
+        kbuild_options: ["CONFIG_LARGE_PAGE_ALLOC_OVERRIDE=y"],
+    },
    large_page_alloc: {
-        kbuild_options: ["CONFIG_MALI_2MB_ALLOC=y"],
+        kbuild_options: ["CONFIG_LARGE_PAGE_ALLOC=y"],
    },
    mali_memory_fully_backed: {
        kbuild_options: ["CONFIG_MALI_MEMORY_FULLY_BACKED=y"],
@@ -86,9 +89,6 @@ bob_defaults {
    mali_error_inject: {
        kbuild_options: ["CONFIG_MALI_BIFROST_ERROR_INJECT=y"],
    },
-    mali_gem5_build: {
-        kbuild_options: ["CONFIG_MALI_GEM5_BUILD=y"],
-    },
    mali_debug: {
        kbuild_options: [
            "CONFIG_MALI_BIFROST_DEBUG=y",
@@ -137,9 +137,6 @@ bob_defaults {
    platform_is_fpga: {
        kbuild_options: ["CONFIG_MALI_IS_FPGA=y"],
    },
-    mali_fw_core_dump: {
-        kbuild_options: ["CONFIG_MALI_FW_CORE_DUMP=y"],
-    },
    mali_coresight: {
        kbuild_options: ["CONFIG_MALI_CORESIGHT=y"],
    },
@@ -194,6 +191,15 @@ bob_kernel_module {
        "platform/*/*.c",
        "platform/*/*.h",
        "platform/*/Kbuild",
+        "platform/*/*/*.c",
+        "platform/*/*/*.h",
+        "platform/*/*/Kbuild",
+        "platform/*/*/*.c",
+        "platform/*/*/*.h",
+        "platform/*/*/Kbuild",
+        "platform/*/*/*/*.c",
+        "platform/*/*/*/*.h",
+        "platform/*/*/*/Kbuild",
        "thirdparty/*.c",
        "thirdparty/Kbuild",
        "debug/*.c",
--- a/drivers/gpu/arm/bifrost/context/mali_kbase_context.c
+++ b/drivers/gpu/arm/bifrost/context/mali_kbase_context.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,12 @@
 /*
 * Base kernel context APIs
 */
+#include <linux/version.h>
+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
+#include <linux/sched/task.h>
+#else
+#include <linux/sched.h>
+#endif

 #include <mali_kbase.h>
 #include <gpu/mali_kbase_gpu_regmap.h>
@@ -129,13 +135,51 @@ int kbase_context_common_init(struct kbase_context *kctx)
 	/* creating a context is considered a disjoint event */
 	kbase_disjoint_event(kctx->kbdev);

-	spin_lock_init(&kctx->mm_update_lock);
 	kctx->process_mm = NULL;
+	kctx->task = NULL;
 	atomic_set(&kctx->nonmapped_pages, 0);
 	atomic_set(&kctx->permanent_mapped_pages, 0);
 	kctx->tgid = current->tgid;
 	kctx->pid = current->pid;

+	/* Check if this is a Userspace created context */
+	if (likely(kctx->filp)) {
+		struct pid *pid_struct;
+
+		rcu_read_lock();
+		pid_struct = find_get_pid(kctx->tgid);
+		if (likely(pid_struct)) {
+			struct task_struct *task = pid_task(pid_struct, PIDTYPE_PID);
+
+			if (likely(task)) {
+				/* Take a reference on the task to avoid slow lookup
+				 * later on from the page allocation loop.
+				 */
+				get_task_struct(task);
+				kctx->task = task;
+			} else {
+				dev_err(kctx->kbdev->dev,
+					"Failed to get task pointer for %s/%d",
+					current->comm, current->pid);
+				err = -ESRCH;
+			}
+
+			put_pid(pid_struct);
+		} else {
+			dev_err(kctx->kbdev->dev,
+				"Failed to get pid pointer for %s/%d",
+				current->comm, current->pid);
+			err = -ESRCH;
+		}
+		rcu_read_unlock();
+
+		if (unlikely(err))
+			return err;
+
+		kbase_mem_mmgrab();
+		kctx->process_mm = current->mm;
+	}
+
 	atomic_set(&kctx->used_pages, 0);

 	mutex_init(&kctx->reg_lock);
@@ -168,13 +212,16 @@ int kbase_context_common_init(struct kbase_context *kctx)
 	kctx->id = atomic_add_return(1, &(kctx->kbdev->ctx_num)) - 1;

 	mutex_lock(&kctx->kbdev->kctx_list_lock);
-
 	err = kbase_insert_kctx_to_process(kctx);
-	if (err)
-		dev_err(kctx->kbdev->dev,
-		"(err:%d) failed to insert kctx to kbase_process\n", err);
-
 	mutex_unlock(&kctx->kbdev->kctx_list_lock);
+	if (err) {
+		dev_err(kctx->kbdev->dev,
+			"(err:%d) failed to insert kctx to kbase_process", err);
+		if (likely(kctx->filp)) {
+			mmdrop(kctx->process_mm);
+			put_task_struct(kctx->task);
+		}
+	}

 	return err;
 }
@@ -260,6 +307,11 @@ void kbase_context_common_term(struct kbase_context *kctx)
 	kbase_remove_kctx_from_process(kctx);
 	mutex_unlock(&kctx->kbdev->kctx_list_lock);

+	if (likely(kctx->filp)) {
+		mmdrop(kctx->process_mm);
+		put_task_struct(kctx->task);
+	}
+
 	KBASE_KTRACE_ADD(kctx->kbdev, CORE_CTX_DESTROY, kctx, 0u);
 }

--- a/drivers/gpu/arm/bifrost/context/mali_kbase_context.h
+++ b/drivers/gpu/arm/bifrost/context/mali_kbase_context.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2011-2017, 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -92,6 +92,19 @@ static inline bool kbase_ctx_flag(struct kbase_context *kctx,
 	return atomic_read(&kctx->flags) & flag;
 }

+/**
+ * kbase_ctx_compat_mode - Indicate whether a kbase context needs to operate
+ *                         in compatibility mode for 32-bit userspace.
+ * @kctx: kbase context
+ *
+ * Return: True if needs to maintain compatibility, False otherwise.
+ */
+static inline bool kbase_ctx_compat_mode(struct kbase_context *kctx)
+{
+	return !IS_ENABLED(CONFIG_64BIT) ||
+	       (IS_ENABLED(CONFIG_64BIT) && kbase_ctx_flag(kctx, KCTX_COMPAT));
+}
+
 /**
 * kbase_ctx_flag_clear - Clear @flag on @kctx
 * @kctx: Pointer to kbase context
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c
@@ -39,7 +39,9 @@

 #define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK)
 #define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK)
-#define POWER_DOWN_LATEST_FLUSH_VALUE ((u32)1)
+
+#define CS_RING_BUFFER_MAX_SIZE ((uint32_t)(1 << 31)) /* 2GiB */
+#define CS_RING_BUFFER_MIN_SIZE ((uint32_t)4096)

 #define PROTM_ALLOC_MAX_RETRIES ((u8)5)

@@ -73,6 +75,38 @@ struct irq_idle_and_protm_track {
 	s8 idle_slot;
 };

+/**
+ * kbasep_ctx_user_reg_page_mapping_term() - Terminate resources for USER Register Page.
+ *
+ * @kctx:   Pointer to the kbase context
+ */
+static void kbasep_ctx_user_reg_page_mapping_term(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	if (unlikely(kctx->csf.user_reg.vma))
+		dev_err(kbdev->dev, "VMA for USER Register page exist on termination of ctx %d_%d",
+			kctx->tgid, kctx->id);
+	if (WARN_ON_ONCE(!list_empty(&kctx->csf.user_reg.link)))
+		list_del_init(&kctx->csf.user_reg.link);
+}
+
+/**
+ * kbasep_ctx_user_reg_page_mapping_init() - Initialize resources for USER Register Page.
+ *
+ * @kctx:   Pointer to the kbase context
+ *
+ * @return: 0 on success.
+ */
+static int kbasep_ctx_user_reg_page_mapping_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->csf.user_reg.link);
+	kctx->csf.user_reg.vma = NULL;
+	kctx->csf.user_reg.file_offset = 0;
+
+	return 0;
+}
+
 static void put_user_pages_mmap_handle(struct kbase_context *kctx,
 			struct kbase_queue *queue)
 {
@@ -267,7 +301,7 @@ int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, struct

 	ret = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
 					 KBASEP_NUM_CS_USER_IO_PAGES,
-					 queue->phys, false);
+					 queue->phys, false, kctx->task);
 	if (ret != KBASEP_NUM_CS_USER_IO_PAGES) {
 		/* Marking both the phys to zero for indicating there is no phys allocated */
 		queue->phys[0].tagged_addr = 0;
@@ -293,11 +327,8 @@ int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, struct

 	queue->db_file_offset = kbdev->csf.db_file_offsets;
 	kbdev->csf.db_file_offsets += BASEP_QUEUE_NR_MMAP_USER_PAGES;
-#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
-	WARN(atomic_read(&queue->refcount) != 1, "Incorrect refcounting for queue object\n");
-#else
-	WARN(refcount_read(&queue->refcount) != 1, "Incorrect refcounting for queue object\n");
-#endif
+	WARN(kbase_refcount_read(&queue->refcount) != 1,
+	     "Incorrect refcounting for queue object\n");
 	/* This is the second reference taken on the queue object and
 	 * would be dropped only when the IO mapping is removed either
 	 * explicitly by userspace or implicitly by kernel on process exit.
@@ -369,21 +400,13 @@ static struct kbase_queue *find_queue(struct kbase_context *kctx, u64 base_addr)

 static void get_queue(struct kbase_queue *queue)
 {
-#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
-	WARN_ON(!atomic_inc_not_zero(&queue->refcount));
-#else
-	WARN_ON(!refcount_inc_not_zero(&queue->refcount));
-#endif
+	WARN_ON(!kbase_refcount_inc_not_zero(&queue->refcount));
 }

 static void release_queue(struct kbase_queue *queue)
 {
 	lockdep_assert_held(&queue->kctx->csf.lock);
-#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
-	if (atomic_dec_and_test(&queue->refcount)) {
-#else
-	if (refcount_dec_and_test(&queue->refcount)) {
-#endif
+	if (kbase_refcount_dec_and_test(&queue->refcount)) {
 		/* The queue can't still be on the per context list. */
 		WARN_ON(!list_empty(&queue->link));
 		WARN_ON(queue->group);
@@ -399,7 +422,7 @@ static void release_queue(struct kbase_queue *queue)
 		 * would free up the GPU queue memory.
 		 */
 		kbase_gpu_vm_lock(queue->kctx);
-		kbase_va_region_no_user_free_put(queue->kctx, queue->queue_reg);
+		kbase_va_region_no_user_free_dec(queue->queue_reg);
 		kbase_gpu_vm_unlock(queue->kctx);

 		kfree(queue);
@@ -505,17 +528,16 @@ static int csf_queue_register_internal(struct kbase_context *kctx,

 	queue->kctx = kctx;
 	queue->base_addr = queue_addr;
-	queue->queue_reg = kbase_va_region_no_user_free_get(kctx, region);
+
+	queue->queue_reg = region;
+	kbase_va_region_no_user_free_inc(region);
+
 	queue->size = (queue_size << PAGE_SHIFT);
 	queue->csi_index = KBASEP_IF_NR_INVALID;
 	queue->enabled = false;

 	queue->priority = reg->priority;
-#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
-	atomic_set(&queue->refcount, 1);
-#else
-	refcount_set(&queue->refcount, 1);
-#endif
+	kbase_refcount_set(&queue->refcount, 1);

 	queue->group = NULL;
 	queue->bind_state = KBASE_CSF_QUEUE_UNBOUND;
@@ -572,6 +594,13 @@ out:
 int kbase_csf_queue_register(struct kbase_context *kctx,
 			     struct kbase_ioctl_cs_queue_register *reg)
 {
+	/* Validate the ring buffer configuration parameters */
+	if (reg->buffer_size < CS_RING_BUFFER_MIN_SIZE ||
+	    reg->buffer_size > CS_RING_BUFFER_MAX_SIZE ||
+	    reg->buffer_size & (reg->buffer_size - 1) || !reg->buffer_gpu_addr ||
+	    reg->buffer_gpu_addr & ~PAGE_MASK)
+		return -EINVAL;
+
 	return csf_queue_register_internal(kctx, reg, NULL);
 }

@@ -590,6 +619,13 @@ int kbase_csf_queue_register_ex(struct kbase_context *kctx,
 	if (glb_version < kbase_csf_interface_version(1, 1, 0))
 		return -EINVAL;

+	/* Validate the ring buffer configuration parameters */
+	if (reg->buffer_size < CS_RING_BUFFER_MIN_SIZE ||
+	    reg->buffer_size > CS_RING_BUFFER_MAX_SIZE ||
+	    reg->buffer_size & (reg->buffer_size - 1) || !reg->buffer_gpu_addr ||
+	    reg->buffer_gpu_addr & ~PAGE_MASK)
+		return -EINVAL;
+
 	/* Validate the cs_trace configuration parameters */
 	if (reg->ex_buffer_size &&
 		((reg->ex_event_size > max_size) ||
@@ -909,6 +945,9 @@ static void unbind_stopped_queue(struct kbase_context *kctx,
 {
 	lockdep_assert_held(&kctx->csf.lock);

+	if (WARN_ON(queue->csi_index < 0))
+		return;
+
 	if (queue->bind_state != KBASE_CSF_QUEUE_UNBOUND) {
 		unsigned long flags;

@@ -922,6 +961,7 @@ static void unbind_stopped_queue(struct kbase_context *kctx,
 		kbase_csf_scheduler_spin_unlock(kctx->kbdev, flags);

 		put_user_pages_mmap_handle(kctx, queue);
+		WARN_ON_ONCE(queue->doorbell_nr != KBASEP_USER_DB_NR_INVALID);
 		queue->bind_state = KBASE_CSF_QUEUE_UNBOUND;
 	}
 }
@@ -1099,7 +1139,7 @@ static int create_normal_suspend_buffer(struct kbase_context *const kctx,

 	/* Get physical page for a normal suspend buffer */
 	err = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages,
-					 &s_buf->phy[0], false);
+					 &s_buf->phy[0], false, kctx->task);

 	if (err < 0) {
 		kfree(s_buf->phy);
@@ -1539,6 +1579,7 @@ void kbase_csf_queue_group_terminate(struct kbase_context *kctx,
 }
 KBASE_EXPORT_TEST_API(kbase_csf_queue_group_terminate);

+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
 int kbase_csf_queue_group_suspend(struct kbase_context *kctx,
 				  struct kbase_suspend_copy_buffer *sus_buf,
 				  u8 group_handle)
@@ -1569,6 +1610,7 @@ int kbase_csf_queue_group_suspend(struct kbase_context *kctx,

 	return err;
 }
+#endif

 void kbase_csf_add_group_fatal_error(
 	struct kbase_queue_group *const group,
@@ -1637,8 +1679,6 @@ int kbase_csf_ctx_init(struct kbase_context *kctx)

 	kbase_csf_event_init(kctx);

-	kctx->csf.user_reg_vma = NULL;
-
 	/* Mark all the cookies as 'free' */
 	bitmap_fill(kctx->csf.cookies, KBASE_CSF_NUM_USER_IO_PAGES_HANDLE);

@@ -1658,7 +1698,14 @@ int kbase_csf_ctx_init(struct kbase_context *kctx)
 					mutex_init(&kctx->csf.lock);
 					INIT_WORK(&kctx->csf.pending_submission_work,
 						  pending_submission_worker);
-				} else
+
+					err = kbasep_ctx_user_reg_page_mapping_init(kctx);
+
+					if (unlikely(err))
+						kbase_csf_tiler_heap_context_term(kctx);
+				}
+
+				if (unlikely(err))
 					kbase_csf_kcpu_queue_context_term(kctx);
 			}

@@ -1816,17 +1863,14 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
 		 * only one reference left that was taken when queue was
 		 * registered.
 		 */
-#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
-		WARN_ON(atomic_read(&queue->refcount) != 1);
-#else
-		WARN_ON(refcount_read(&queue->refcount) != 1);
-#endif
+		WARN_ON(kbase_refcount_read(&queue->refcount) != 1);
 		list_del_init(&queue->link);
 		release_queue(queue);
 	}

 	mutex_unlock(&kctx->csf.lock);

+	kbasep_ctx_user_reg_page_mapping_term(kctx);
 	kbase_csf_tiler_heap_context_term(kctx);
 	kbase_csf_kcpu_queue_context_term(kctx);
 	kbase_csf_scheduler_context_term(kctx);
@@ -2746,6 +2790,9 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c
 	if ((req ^ ack) & CSG_REQ_IDLE_MASK) {
 		struct kbase_csf_scheduler *scheduler =	&kbdev->csf.scheduler;

+		KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE(
+			kbdev, kbdev->gpu_props.props.raw_props.gpu_id, csg_nr);
+
 		kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack,
 			CSG_REQ_IDLE_MASK);

@@ -3159,12 +3206,12 @@ int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev)
 	struct file *filp;
 	int ret;

-	filp = shmem_file_setup("mali csf", MAX_LFS_FILESIZE, VM_NORESERVE);
+	filp = shmem_file_setup("mali csf db", MAX_LFS_FILESIZE, VM_NORESERVE);
 	if (IS_ERR(filp))
 		return PTR_ERR(filp);

 	ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
-					 false);
+					 false, NULL);

 	if (ret <= 0) {
 		fput(filp);
@@ -3180,29 +3227,34 @@ int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev)

 void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev)
 {
-	if (as_phys_addr_t(kbdev->csf.dummy_user_reg_page)) {
-		struct page *page = as_page(kbdev->csf.dummy_user_reg_page);
+	if (kbdev->csf.user_reg.filp) {
+		struct page *page = as_page(kbdev->csf.user_reg.dummy_page);

-		kbase_mem_pool_free(
-			&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page,
-			false);
+		kbase_mem_pool_free(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, false);
+		fput(kbdev->csf.user_reg.filp);
 	}
 }

 int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev)
 {
 	struct tagged_addr phys;
+	struct file *filp;
 	struct page *page;
 	u32 *addr;
-	int ret;

-	kbdev->csf.dummy_user_reg_page = as_tagged(0);
+	kbdev->csf.user_reg.filp = NULL;

-	ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
-					 false);
+	filp = shmem_file_setup("mali csf user_reg", MAX_LFS_FILESIZE, VM_NORESERVE);
+	if (IS_ERR(filp)) {
+		dev_err(kbdev->dev, "failed to get an unlinked file for user_reg");
+		return PTR_ERR(filp);
+	}

-	if (ret <= 0)
-		return ret;
+	if (kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
+				       false, NULL) <= 0) {
+		fput(filp);
+		return -ENOMEM;
+	}

 	page = as_page(phys);
 	addr = kmap_atomic(page);
@@ -3212,12 +3264,13 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev)
 	 */
 	addr[LATEST_FLUSH / sizeof(u32)] = POWER_DOWN_LATEST_FLUSH_VALUE;

-	kbase_sync_single_for_device(kbdev, kbase_dma_addr(page), sizeof(u32),
+	kbase_sync_single_for_device(kbdev, kbase_dma_addr(page) + LATEST_FLUSH, sizeof(u32),
 				     DMA_BIDIRECTIONAL);
 	kunmap_atomic(addr);

-	kbdev->csf.dummy_user_reg_page = phys;
-
+	kbdev->csf.user_reg.filp = filp;
+	kbdev->csf.user_reg.dummy_page = phys;
+	kbdev->csf.user_reg.file_offset = 0;
 	return 0;
 }

--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h
@@ -274,6 +274,7 @@ void kbase_csf_queue_group_terminate(struct kbase_context *kctx,
 */
 void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group);

+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
 /**
 * kbase_csf_queue_group_suspend - Suspend a GPU command queue group
 *
@@ -291,6 +292,7 @@ void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group);
 */
 int kbase_csf_queue_group_suspend(struct kbase_context *kctx,
 	struct kbase_suspend_copy_buffer *sus_buf, u8 group_handle);
+#endif

 /**
 * kbase_csf_add_group_fatal_error - Report a fatal group error to userspace
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -30,6 +30,7 @@
 #include <linux/wait.h>

 #include "mali_kbase_csf_firmware.h"
+#include "mali_kbase_refcount_defs.h"
 #include "mali_kbase_csf_event.h"
 #include <uapi/gpu/arm/bifrost/csf/mali_kbase_csf_errors_dumpfault.h>

@@ -269,6 +270,8 @@ enum kbase_queue_group_priority {
 * @CSF_FIRMWARE_PING_TIMEOUT: Maximum time to wait for firmware to respond
 *                             to a ping from KBase.
 * @CSF_SCHED_PROTM_PROGRESS_TIMEOUT: Timeout used to prevent protected mode execution hang.
+ * @MMU_AS_INACTIVE_WAIT_TIMEOUT: Maximum waiting time in ms for the completion
+ *                                of a MMU operation
 * @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in
 *                                the enum.
 */
@@ -280,6 +283,7 @@ enum kbase_timeout_selector {
 	CSF_FIRMWARE_BOOT_TIMEOUT,
 	CSF_FIRMWARE_PING_TIMEOUT,
 	CSF_SCHED_PROTM_PROGRESS_TIMEOUT,
+	MMU_AS_INACTIVE_WAIT_TIMEOUT,

 	/* Must be the last in the enum */
 	KBASE_TIMEOUT_SELECTOR_COUNT
@@ -387,11 +391,7 @@ struct kbase_queue {
 	int doorbell_nr;
 	unsigned long db_file_offset;
 	struct list_head link;
-#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
-	atomic_t refcount;
-#else
-	refcount_t refcount;
-#endif
+	kbase_refcount_t refcount;
 	struct kbase_queue_group *group;
 	struct kbase_va_region *queue_reg;
 	struct work_struct oom_event_work;
@@ -778,6 +778,23 @@ struct kbase_csf_event {
 	spinlock_t lock;
 };

+/**
+ * struct kbase_csf_user_reg_context - Object containing members to manage the mapping
+ *                                     of USER Register page for a context.
+ *
+ * @vma:                Pointer to the VMA corresponding to the virtual mapping
+ *                      of the USER register page.
+ * @file_offset:        File offset value that is assigned to userspace mapping
+ *                      of the USER Register page. It is in page units.
+ * @link:               Links the context to the device list when mapping is pointing to
+ *                      either the dummy or the real Register page.
+ */
+struct kbase_csf_user_reg_context {
+	struct vm_area_struct *vma;
+	u32 file_offset;
+	struct list_head link;
+};
+
 /**
 * struct kbase_csf_context - Object representing CSF for a GPU address space.
 *
@@ -816,13 +833,11 @@ struct kbase_csf_event {
 *                    used by GPU command queues, and progress timeout events.
 * @link:             Link to this csf context in the 'runnable_kctxs' list of
 *                    the scheduler instance
- * @user_reg_vma:     Pointer to the vma corresponding to the virtual mapping
- *                    of the USER register page. Currently used only for sanity
- *                    checking.
 * @sched:            Object representing the scheduler's context
 * @pending_submission_work: Work item to process pending kicked GPU command queues.
 * @cpu_queue:        CPU queue information. Only be available when DEBUG_FS
 *                    is enabled.
+ * @user_reg:         Collective information to support mapping to USER Register page.
 */
 struct kbase_csf_context {
 	struct list_head event_pages_head;
@@ -837,12 +852,12 @@ struct kbase_csf_context {
 	struct kbase_csf_tiler_heap_context tiler_heaps;
 	struct workqueue_struct *wq;
 	struct list_head link;
-	struct vm_area_struct *user_reg_vma;
 	struct kbase_csf_scheduler_context sched;
 	struct work_struct pending_submission_work;
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 	struct kbase_csf_cpu_queue_context cpu_queue;
 #endif
+	struct kbase_csf_user_reg_context user_reg;
 };

 /**
@@ -1426,6 +1441,37 @@ struct kbase_csf_dump_on_fault {
 };
 #endif /* CONFIG_DEBUG_FS*/

+/**
+ * struct kbase_csf_user_reg - Object containing members to manage the mapping
+ *                             of USER Register page for all contexts
+ *
+ * @dummy_page:             Address of a dummy page that is mapped in place
+ *                          of the real USER Register page just before the GPU
+ *                          is powered down. The USER Register page is mapped
+ *                          in the address space of every process, that created
+ *                          a Base context, to enable the access to LATEST_FLUSH
+ *                          register from userspace.
+ * @filp:                   Pointer to a dummy file, that along with @file_offset,
+ *                          facilitates the use of unique file offset for the userspace mapping
+ *                          created for USER Register page.
+ *                          The userspace mapping is made to point to this file
+ *                          inside the mmap handler.
+ * @file_offset:            Counter that is incremented every time Userspace creates a mapping of
+ *                          USER Register page, to provide a unique file offset range for
+ *                          @filp file, so that the CPU PTE of the Userspace mapping can be zapped
+ *                          through the kernel function unmap_mapping_range().
+ *                          It is incremented in page units.
+ * @list:                   Linked list to maintain user processes(contexts)
+ *                          having the mapping to USER Register page.
+ *                          It's protected by &kbase_csf_device.reg_lock.
+ */
+struct kbase_csf_user_reg {
+	struct tagged_addr dummy_page;
+	struct file *filp;
+	u32 file_offset;
+	struct list_head list;
+};
+
 /**
 * struct kbase_csf_device - Object representing CSF for an instance of GPU
 *                           platform device.
@@ -1463,20 +1509,6 @@ struct kbase_csf_dump_on_fault {
 *                          of the real Hw doorbell page for the active GPU
 *                          command queues after they are stopped or after the
 *                          GPU is powered down.
- * @dummy_user_reg_page:    Address of the dummy page that is mapped in place
- *                          of the real User register page just before the GPU
- *                          is powered down. The User register page is mapped
- *                          in the address space of every process, that created
- *                          a Base context, to enable the access to LATEST_FLUSH
- *                          register from userspace.
- * @nr_user_page_mapped:    The number of clients using the mapping of USER page.
- *                          This is used to maintain backward compatibility.
- *                          It's protected by @reg_lock.
- * @mali_file_inode:        Pointer to the inode corresponding to mali device
- *                          file. This is needed in order to switch to the
- *                          @dummy_user_reg_page on GPU power down.
- *                          All instances of the mali device file will point to
- *                          the same inode. It's protected by @reg_lock.
 * @reg_lock:               Lock to serialize the MCU firmware related actions
 *                          that affect all contexts such as allocation of
 *                          regions from shared interface area, assignment of
@@ -1531,7 +1563,7 @@ struct kbase_csf_dump_on_fault {
 *                              the @p mcu_core_pwroff_dur_count as an update
 *                              to the latter is asynchronous.
 * @gpu_idle_hysteresis_us: Sysfs attribute for the idle hysteresis time
- *                          window in unit of microseconds. The firmware does not 
+ *                          window in unit of microseconds. The firmware does not
 *                          use it directly.
 * @gpu_idle_dur_count:     The counterpart of the hysteresis time window in
 *                          interface required format, ready to be used
@@ -1545,6 +1577,8 @@ struct kbase_csf_dump_on_fault {
 * @fw_core_dump:           Contain members required for handling the firmware
 *                          core dump.
 * @dof:                    Structure for dump on fault.
+ * @user_reg:               Collective information to support the mapping to
+ *                          USER Register page for user processes.
 */
 struct kbase_csf_device {
 	struct kbase_mmu_table mcu_mmu;
@@ -1558,9 +1592,6 @@ struct kbase_csf_device {
 	struct file *db_filp;
 	u32 db_file_offsets;
 	struct tagged_addr dummy_db_page;
-	struct tagged_addr dummy_user_reg_page;
-	u32 nr_user_page_mapped;
-	struct inode *mali_file_inode;
 	struct mutex reg_lock;
 	wait_queue_head_t event_wait;
 	bool interrupt_received;
@@ -1597,6 +1628,7 @@ struct kbase_csf_device {
 	 */
 	struct kbase_debug_coresight_device coresight;
 #endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
+	struct kbase_csf_user_reg user_reg;
 };

 /**
@@ -1613,6 +1645,10 @@ struct kbase_csf_device {
 * @bf_data:           Data relating to Bus fault.
 * @gf_data:           Data relating to GPU fault.
 * @current_setup:     Stores the MMU configuration for this address space.
+ * @is_unresponsive:   Flag to indicate MMU is not responding.
+ *                     Set if a MMU command isn't completed within
+ *                     &kbase_device:mmu_as_inactive_wait_time_ms.
+ *                     Clear by kbase_ctx_sched_restore_all_as() after GPU reset completes.
 */
 struct kbase_as {
 	int number;
@@ -1624,6 +1660,7 @@ struct kbase_as {
 	struct kbase_fault bf_data;
 	struct kbase_fault gf_data;
 	struct kbase_mmu_setup current_setup;
+	bool is_unresponsive;
 };

 #endif /* _KBASE_CSF_DEFS_H_ */
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -201,8 +201,8 @@ static int setup_shared_iface_static_region(struct kbase_device *kbdev)
 	if (!interface)
 		return -EINVAL;

-	reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
-			interface->num_pages_aligned, KBASE_REG_ZONE_MCU_SHARED);
+	reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0,
+				      interface->num_pages_aligned, KBASE_REG_ZONE_MCU_SHARED);
 	if (reg) {
 		mutex_lock(&kbdev->csf.reg_lock);
 		ret = kbase_add_va_region_rbtree(kbdev, reg,
@@ -296,19 +296,41 @@ static void boot_csf_firmware(struct kbase_device *kbdev)
 	wait_for_firmware_boot(kbdev);
 }

-static void wait_ready(struct kbase_device *kbdev)
+/**
+ * wait_ready() - Wait for previously issued MMU command to complete.
+ *
+ * @kbdev:        Kbase device to wait for a MMU command to complete.
+ *
+ * Reset GPU if the wait for previously issued command times out.
+ *
+ * Return:  0 on success, error code otherwise.
+ */
+static int wait_ready(struct kbase_device *kbdev)
 {
-	u32 max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
-	u32 val;
+	const ktime_t wait_loop_start = ktime_get_raw();
+	const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_as_inactive_wait_time_ms;
+	s64 diff;

-	val = kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS));
+	do {
+		unsigned int i;

-	/* Wait for a while for the update command to take effect */
-	while (--max_loops && (val & AS_STATUS_AS_ACTIVE))
-		val = kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS));
+		for (i = 0; i < 1000; i++) {
+			/* Wait for the MMU status to indicate there is no active command */
+			if (!(kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)) &
+			      AS_STATUS_AS_ACTIVE))
+				return 0;
+		}

-	if (max_loops == 0)
-		dev_err(kbdev->dev, "AS_ACTIVE bit stuck, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n");
+		diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start));
+	} while (diff < mmu_as_inactive_wait_time_ms);
+
+	dev_err(kbdev->dev,
+		"AS_ACTIVE bit stuck for MCU AS. Might be caused by unstable GPU clk/pwr or faulty system");
+
+	if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
+		kbase_reset_gpu_locked(kbdev);
+
+	return -ETIMEDOUT;
 }

 static void unload_mmu_tables(struct kbase_device *kbdev)
@@ -323,7 +345,7 @@ static void unload_mmu_tables(struct kbase_device *kbdev)
 	mutex_unlock(&kbdev->mmu_hw_mutex);
 }

-static void load_mmu_tables(struct kbase_device *kbdev)
+static int load_mmu_tables(struct kbase_device *kbdev)
 {
 	unsigned long irq_flags;

@@ -334,7 +356,7 @@ static void load_mmu_tables(struct kbase_device *kbdev)
 	mutex_unlock(&kbdev->mmu_hw_mutex);

 	/* Wait for a while for the update command to take effect */
-	wait_ready(kbdev);
+	return wait_ready(kbdev);
 }

 /**
@@ -695,7 +717,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
 			ret = kbase_mem_pool_alloc_pages(
 				kbase_mem_pool_group_select(kbdev, KBASE_MEM_GROUP_CSF_FW,
 							    is_small_page),
-				num_pages_aligned, phys, false);
+				num_pages_aligned, phys, false, NULL);
 			ignore_page_migration = false;
 		}
 	}
@@ -2240,6 +2262,7 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
 	INIT_LIST_HEAD(&kbdev->csf.firmware_config);
 	INIT_LIST_HEAD(&kbdev->csf.firmware_timeline_metadata);
 	INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list);
+	INIT_LIST_HEAD(&kbdev->csf.user_reg.list);
 	INIT_WORK(&kbdev->csf.firmware_reload_work,
 		  kbase_csf_firmware_reload_worker);
 	INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker);
@@ -2403,7 +2426,9 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
 	kbase_pm_wait_for_l2_powered(kbdev);

 	/* Load the MMU tables into the selected address space */
-	load_mmu_tables(kbdev);
+	ret = load_mmu_tables(kbdev);
+	if (ret != 0)
+		goto err_out;

 	boot_csf_firmware(kbdev);

@@ -2445,9 +2470,8 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
 		goto err_out;
 	}

-#ifdef CONFIG_MALI_FW_CORE_DUMP
-	kbase_csf_firmware_core_dump_init(kbdev);
-#endif
+	if (kbdev->csf.fw_core_dump.available)
+		kbase_csf_firmware_core_dump_init(kbdev);

 	/* Firmware loaded successfully, ret = 0 */
 	KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_BOOT, NULL,
@@ -3029,7 +3053,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
 		goto page_list_alloc_error;

 	ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
-					 phys, false);
+					 phys, false, NULL);
 	if (ret <= 0)
 		goto phys_mem_pool_alloc_error;

@@ -3040,8 +3064,8 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
 	if (!cpu_addr)
 		goto vmap_error;

-	va_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
-			num_pages, KBASE_REG_ZONE_MCU_SHARED);
+	va_reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, num_pages,
+					 KBASE_REG_ZONE_MCU_SHARED);
 	if (!va_reg)
 		goto va_region_alloc_error;

--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -1124,6 +1124,7 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
 	INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces);
 	INIT_LIST_HEAD(&kbdev->csf.firmware_config);
 	INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list);
+	INIT_LIST_HEAD(&kbdev->csf.user_reg.list);
 	INIT_WORK(&kbdev->csf.firmware_reload_work,
 		  kbase_csf_firmware_reload_worker);
 	INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker);
@@ -1569,7 +1570,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
 		goto page_list_alloc_error;

 	ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
-					 phys, false);
+					 phys, false, NULL);
 	if (ret <= 0)
 		goto phys_mem_pool_alloc_error;

@@ -1580,8 +1581,8 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
 	if (!cpu_addr)
 		goto vmap_error;

-	va_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
-			num_pages, KBASE_REG_ZONE_MCU_SHARED);
+	va_reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, num_pages,
+					 KBASE_REG_ZONE_MCU_SHARED);
 	if (!va_reg)
 		goto va_region_alloc_error;

--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -100,10 +100,10 @@ static void evict_heap_context(struct kbase_csf_heap_context_allocator *const ct

 	lockdep_assert_held(&ctx_alloc->lock);

-	/* There is no need to take vm_lock here as the ctx_alloc region is no_user_free
-	 * refcounted. The region and the backing page can't disappear whilst this
-	 * function is executing.
-	 * Flush type is passed as FLUSH_PT to CLN+INV L2 only.
+	/* There is no need to take vm_lock here as the ctx_alloc region is protected
+	 * via a nonzero no_user_free_count. The region and the backing page can't
+	 * disappear whilst this function is executing. Flush type is passed as FLUSH_PT
+	 * to CLN+INV L2 only.
 	 */
 	kbase_mmu_flush_pa_range(kctx->kbdev, kctx,
 				heap_context_pa, ctx_alloc->heap_context_size_aligned,
@@ -181,14 +181,9 @@ void kbase_csf_heap_context_allocator_term(

 	if (ctx_alloc->region) {
 		kbase_gpu_vm_lock(kctx);
-		/*
-		 * We can't enforce (nor check) the no_user_free refcount
-		 * to be 0 here as other code regions can take such a reference.
-		 * Anyway, this isn't an issue as the region will eventually
-		 * be freed by the region tracker if its refcount didn't drop
-		 * to 0.
-		 */
-		kbase_va_region_no_user_free_put(kctx, ctx_alloc->region);
+		WARN_ON(!kbase_va_region_is_no_user_free(ctx_alloc->region));
+
+		kbase_va_region_no_user_free_dec(ctx_alloc->region);
 		kbase_mem_free_region(kctx, ctx_alloc->region);
 		kbase_gpu_vm_unlock(kctx);
 	}
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -365,15 +365,16 @@ static int kbase_kcpu_jit_allocate_prepare(
 {
 	struct kbase_context *const kctx = kcpu_queue->kctx;
 	void __user *data = u64_to_user_ptr(alloc_info->info);
-	struct base_jit_alloc_info *info;
+	struct base_jit_alloc_info *info = NULL;
 	u32 count = alloc_info->count;
 	int ret = 0;
 	u32 i;

 	lockdep_assert_held(&kcpu_queue->lock);

-	if (!data || count > kcpu_queue->kctx->jit_max_allocations ||
-			count > ARRAY_SIZE(kctx->jit_alloc)) {
+	if ((count == 0) || (count > ARRAY_SIZE(kctx->jit_alloc)) ||
+	    (count > kcpu_queue->kctx->jit_max_allocations) || (!data) ||
+	    !kbase_mem_allow_alloc(kctx)) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -610,6 +611,7 @@ out:
 	return ret;
 }

+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
 static int kbase_csf_queue_group_suspend_prepare(
 		struct kbase_kcpu_command_queue *kcpu_queue,
 		struct base_kcpu_command_group_suspend_info *suspend_buf,
@@ -681,8 +683,7 @@ static int kbase_csf_queue_group_suspend_prepare(
 		    (kbase_reg_current_backed_size(reg) < nr_pages) ||
 		    !(reg->flags & KBASE_REG_CPU_WR) ||
 		    (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) ||
-		    (kbase_is_region_shrinkable(reg)) ||
-		    (kbase_va_region_is_no_user_free(kctx, reg))) {
+		    (kbase_is_region_shrinkable(reg)) || (kbase_va_region_is_no_user_free(reg))) {
 			ret = -EINVAL;
 			goto out_clean_pages;
 		}
@@ -726,6 +727,7 @@ static int kbase_csf_queue_group_suspend_process(struct kbase_context *kctx,
 {
 	return kbase_csf_queue_group_suspend(kctx, sus_buf, group_handle);
 }
+#endif

 static enum kbase_csf_event_callback_action event_cqs_callback(void *param)
 {
@@ -1037,9 +1039,12 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev,
 				queue->kctx, cqs_wait_operation->objs[i].addr, &mapping);
 			u64 val = 0;

-			/* GPUCORE-28172 RDT to review */
-			if (!queue->command_started)
+			if (!queue->command_started) {
 				queue->command_started = true;
+				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START(
+					kbdev, queue);
+			}
+

 			if (!evt) {
 				dev_warn(kbdev->dev,
@@ -1089,7 +1094,8 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev,
 					queue->has_error = true;
 				}

-				/* GPUCORE-28172 RDT to review */
+				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END(
+					kbdev, queue, *(u32 *)evt);

 				queue->command_started = false;
 			}
@@ -1232,8 +1238,6 @@ static void kbase_kcpu_cqs_set_operation_process(
 		evt = (uintptr_t)kbase_phy_alloc_mapping_get(
 			queue->kctx, cqs_set_operation->objs[i].addr, &mapping);

-		/* GPUCORE-28172 RDT to review */
-
 		if (!evt) {
 			dev_warn(kbdev->dev,
 				"Sync memory %llx already freed", cqs_set_operation->objs[i].addr);
@@ -1258,7 +1262,8 @@ static void kbase_kcpu_cqs_set_operation_process(
 				break;
 			}

-			/* GPUCORE-28172 RDT to review */
+			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION(
+				kbdev, queue, *(u32 *)evt ? 1 : 0);

 			/* Always propagate errors */
 			*(u32 *)evt = queue->has_error;
@@ -1622,11 +1627,7 @@ static int kbasep_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_q

 	/* Set reference to KCPU metadata and increment refcount */
 	kcpu_fence->metadata = kcpu_queue->metadata;
-#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
-	WARN_ON(!atomic_inc_not_zero(&kcpu_fence->metadata->refcount));
-#else
-	WARN_ON(!refcount_inc_not_zero(&kcpu_fence->metadata->refcount));
-#endif
+	WARN_ON(!kbase_refcount_inc_not_zero(&kcpu_fence->metadata->refcount));

 	/* create a sync_file fd representing the fence */
 	*sync_file = sync_file_create(fence_out);
@@ -2056,7 +2057,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,

 			break;
 		}
-		case BASE_KCPU_COMMAND_TYPE_JIT_FREE:
+		case BASE_KCPU_COMMAND_TYPE_JIT_FREE: {
 			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START(kbdev, queue);

 			status = kbase_kcpu_jit_free_process(queue, cmd);
@@ -2066,6 +2067,8 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END(
 				kbdev, queue);
 			break;
+		}
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
 		case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: {
 			struct kbase_suspend_copy_buffer *sus_buf =
 					cmd->info.suspend_buf_copy.sus_buf;
@@ -2082,24 +2085,25 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,

 				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END(
 					kbdev, queue, status);
+			}

-				if (!sus_buf->cpu_alloc) {
-					int i;
+			if (!sus_buf->cpu_alloc) {
+				int i;

-					for (i = 0; i < sus_buf->nr_pages; i++)
-						put_page(sus_buf->pages[i]);
-				} else {
-					kbase_mem_phy_alloc_kernel_unmapped(
-						sus_buf->cpu_alloc);
-					kbase_mem_phy_alloc_put(
-						sus_buf->cpu_alloc);
-				}
+				for (i = 0; i < sus_buf->nr_pages; i++)
+					put_page(sus_buf->pages[i]);
+			} else {
+				kbase_mem_phy_alloc_kernel_unmapped(
+					sus_buf->cpu_alloc);
+				kbase_mem_phy_alloc_put(
+					sus_buf->cpu_alloc);
 			}

 			kfree(sus_buf->pages);
 			kfree(sus_buf);
 			break;
 		}
+#endif
 		default:
 			dev_dbg(kbdev->dev,
 				"Unrecognized command type");
@@ -2174,12 +2178,29 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
 	}
 	case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION:
 	{
-		/* GPUCORE-28172 RDT to review */
+		const struct base_cqs_wait_operation_info *waits =
+			cmd->info.cqs_wait_operation.objs;
+		u32 inherit_err_flags = cmd->info.cqs_wait_operation.inherit_err_flags;
+		unsigned int i;
+
+		for (i = 0; i < cmd->info.cqs_wait_operation.nr_objs; i++) {
+			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION(
+				kbdev, queue, waits[i].addr, waits[i].val,
+				waits[i].operation, waits[i].data_type,
+				(inherit_err_flags & ((uint32_t)1 << i)) ? 1 : 0);
+		}
 		break;
 	}
 	case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION:
 	{
-		/* GPUCORE-28172 RDT to review */
+		const struct base_cqs_set_operation_info *sets = cmd->info.cqs_set_operation.objs;
+		unsigned int i;
+
+		for (i = 0; i < cmd->info.cqs_set_operation.nr_objs; i++) {
+			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION(
+				kbdev, queue, sets[i].addr, sets[i].val,
+				sets[i].operation, sets[i].data_type);
+		}
 		break;
 	}
 	case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER:
@@ -2226,11 +2247,13 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
 		KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE(kbdev, queue);
 		break;
 	}
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
 	case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND:
 		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND(
 			kbdev, queue, cmd->info.suspend_buf_copy.sus_buf,
 			cmd->info.suspend_buf_copy.group_handle);
 		break;
+#endif
 	default:
 		dev_dbg(kbdev->dev, "Unknown command type %u", cmd->type);
 		break;
@@ -2387,11 +2410,13 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
 			ret = kbase_kcpu_jit_free_prepare(queue,
 					&command.info.jit_free, kcpu_cmd);
 			break;
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
 		case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND:
 			ret = kbase_csf_queue_group_suspend_prepare(queue,
 					&command.info.suspend_buf_copy,
 					kcpu_cmd);
 			break;
+#endif
 		default:
 			dev_dbg(queue->kctx->kbdev->dev,
 				"Unknown command type %u", command.type);
@@ -2467,6 +2492,7 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
 {
 	struct kbase_kcpu_command_queue *queue;
 	int idx;
+	int n;
 	int ret = 0;
 #if IS_ENABLED(CONFIG_SYNC_FILE)
 	struct kbase_kcpu_dma_fence_meta *metadata;
@@ -2519,6 +2545,7 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,

 	metadata = kzalloc(sizeof(*metadata), GFP_KERNEL);
 	if (!metadata) {
+		destroy_workqueue(queue->wq);
 		kfree(queue);
 		ret = -ENOMEM;
 		goto out;
@@ -2526,14 +2553,17 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,

 	metadata->kbdev = kctx->kbdev;
 	metadata->kctx_id = kctx->id;
-	snprintf(metadata->timeline_name, MAX_TIMELINE_NAME, "%d-%d_%d-%lld-kcpu", kctx->kbdev->id,
-		 kctx->tgid, kctx->id, queue->fence_context);
+	n = snprintf(metadata->timeline_name, MAX_TIMELINE_NAME, "%d-%d_%d-%lld-kcpu",
+		     kctx->kbdev->id, kctx->tgid, kctx->id, queue->fence_context);
+	if (WARN_ON(n >= MAX_TIMELINE_NAME)) {
+		destroy_workqueue(queue->wq);
+		kfree(queue);
+		kfree(metadata);
+		ret = -EINVAL;
+		goto out;
+	}

-#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
-	atomic_set(&metadata->refcount, 1);
-#else
-	refcount_set(&metadata->refcount, 1);
-#endif
+	kbase_refcount_set(&metadata->refcount, 1);
 	queue->metadata = metadata;
 	atomic_inc(&kctx->kbdev->live_fence_metadata);
 #endif /* CONFIG_SYNC_FILE */
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -186,6 +186,7 @@ struct kbase_suspend_copy_buffer {
 	struct kbase_mem_phy_alloc *cpu_alloc;
 };

+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
 /**
 * struct kbase_kcpu_command_group_suspend_info - structure which contains
 *		suspend buffer data captured for a suspended queue group.
@@ -198,6 +199,7 @@ struct kbase_kcpu_command_group_suspend_info {
 	struct kbase_suspend_copy_buffer *sus_buf;
 	u8 group_handle;
 };
+#endif


 /**
@@ -232,7 +234,9 @@ struct kbase_kcpu_command {
 		struct kbase_kcpu_command_import_info import;
 		struct kbase_kcpu_command_jit_alloc_info jit_alloc;
 		struct kbase_kcpu_command_jit_free_info jit_free;
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
 		struct kbase_kcpu_command_group_suspend_info suspend_buf_copy;
+#endif
 	} info;
 };

--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.c
@@ -613,7 +613,7 @@ static int shared_mcu_csg_reg_init(struct kbase_device *kbdev,
 	int err, i;

 	INIT_LIST_HEAD(&csg_reg->link);
-	reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0, nr_csg_reg_pages,
+	reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, nr_csg_reg_pages,
 				      KBASE_REG_ZONE_MCU_SHARED);

 	if (!reg) {
@@ -668,16 +668,17 @@ fail_userio_pages_map_fail:
 	while (i-- > 0) {
 		vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages);
 		kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
-					 KBASEP_NUM_CS_USER_IO_PAGES, MCU_AS_NR, true);
+					 KBASEP_NUM_CS_USER_IO_PAGES, KBASEP_NUM_CS_USER_IO_PAGES,
+					 MCU_AS_NR, true);
 	}

 	vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages);
 	kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
-				 nr_susp_pages, MCU_AS_NR, true);
+				 nr_susp_pages, nr_susp_pages, MCU_AS_NR, true);
 fail_pmod_map_fail:
 	vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages);
 	kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
-				 nr_susp_pages, MCU_AS_NR, true);
+				 nr_susp_pages, nr_susp_pages, MCU_AS_NR, true);
 fail_susp_map_fail:
 	mutex_lock(&kbdev->csf.reg_lock);
 	kbase_remove_va_region(kbdev, reg);
@@ -701,15 +702,16 @@ static void shared_mcu_csg_reg_term(struct kbase_device *kbdev,
 	for (i = 0; i < nr_csis; i++) {
 		vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages);
 		kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
-					 KBASEP_NUM_CS_USER_IO_PAGES, MCU_AS_NR, true);
+					 KBASEP_NUM_CS_USER_IO_PAGES, KBASEP_NUM_CS_USER_IO_PAGES,
+					 MCU_AS_NR, true);
 	}

 	vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages);
 	kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
-				 nr_susp_pages, MCU_AS_NR, true);
+				 nr_susp_pages, nr_susp_pages, MCU_AS_NR, true);
 	vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages);
 	kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
-				 nr_susp_pages, MCU_AS_NR, true);
+				 nr_susp_pages, nr_susp_pages, MCU_AS_NR, true);

 	mutex_lock(&kbdev->csf.reg_lock);
 	kbase_remove_va_region(kbdev, reg);
@@ -738,7 +740,7 @@ int kbase_csf_mcu_shared_regs_data_init(struct kbase_device *kbdev)
 		return -ENOMEM;

 	if (kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1,
-				       &shared_regs->dummy_phys[0], false) <= 0)
+				       &shared_regs->dummy_phys[0], false, NULL) <= 0)
 		return -ENOMEM;

 	shared_regs->dummy_phys_allocated = true;
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -31,10 +31,6 @@
 * Begin register sets
 */

-/* DOORBELLS base address */
-#define DOORBELLS_BASE 0x0080000
-#define DOORBELLS_REG(r) (DOORBELLS_BASE + (r))
-
 /* CS_KERNEL_INPUT_BLOCK base address */
 #define CS_KERNEL_INPUT_BLOCK_BASE 0x0000
 #define CS_KERNEL_INPUT_BLOCK_REG(r) (CS_KERNEL_INPUT_BLOCK_BASE + (r))
@@ -71,10 +67,6 @@
 #define GLB_OUTPUT_BLOCK_BASE 0x0000
 #define GLB_OUTPUT_BLOCK_REG(r) (GLB_OUTPUT_BLOCK_BASE + (r))

-/* USER base address */
-#define USER_BASE 0x0010000
-#define USER_REG(r) (USER_BASE + (r))
-
 /* End register sets */

 /*
@@ -267,9 +259,6 @@
 #define GLB_DEBUG_ARG_OUT0 0x0FE0
 #endif /* CONFIG_MALI_CORESIGHT */

-/* USER register offsets */
-#define LATEST_FLUSH 0x0000 /* () Flush ID of latest clean-and-invalidate operation */
-
 /* End register offsets */

 /* CS_KERNEL_INPUT_BLOCK register set definitions */
@@ -728,6 +717,27 @@
 #define CS_FAULT_EXCEPTION_TYPE_ADDR_RANGE_FAULT 0x5A
 #define CS_FAULT_EXCEPTION_TYPE_IMPRECISE_FAULT 0x5B
 #define CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT 0x69
+#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L0 0xC0
+#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L1 0xC1
+#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L2 0xC2
+#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L3 0xC3
+#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L4 0xC4
+#define CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_0 0xC8
+#define CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_1 0xC9
+#define CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_2 0xCA
+#define CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_3 0xCB
+#define CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_1 0xD9
+#define CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_2 0xDA
+#define CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_3 0xDB
+#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN 0xE0
+#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_0 0xE4
+#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_1 0xE5
+#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_2 0xE6
+#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_3 0xE7
+#define CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_0 0xE8
+#define CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_1 0xE9
+#define CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_2 0xEA
+#define CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_3 0xEB
 /* End of CS_FAULT_EXCEPTION_TYPE values */
 #define CS_FAULT_EXCEPTION_DATA_SHIFT 8
 #define CS_FAULT_EXCEPTION_DATA_MASK (0xFFFFFF << CS_FAULT_EXCEPTION_DATA_SHIFT)
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -1562,11 +1562,13 @@ static void program_cs(struct kbase_device *kbdev,
 	    WARN_ON(csi_index >= ginfo->stream_num))
 		return;

-	assign_user_doorbell_to_queue(kbdev, queue);
-	if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)
-		return;
+	if (queue->enabled) {
+		assign_user_doorbell_to_queue(kbdev, queue);
+		if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)
+			return;

-	WARN_ON(queue->doorbell_nr != queue->group->doorbell_nr);
+		WARN_ON(queue->doorbell_nr != queue->group->doorbell_nr);
+	}

 	if (queue->enabled && queue_group_suspended_locked(group))
 		program_cs_extract_init(queue);
@@ -1868,6 +1870,7 @@ static void halt_csg_slot(struct kbase_queue_group *group, bool suspend)
 		unsigned long flags;
 		struct kbase_csf_cmd_stream_group_info *ginfo =
 						&global_iface->groups[slot];
+
 		u32 halt_cmd = suspend ? CSG_REQ_STATE_SUSPEND :
 					 CSG_REQ_STATE_TERMINATE;

@@ -1885,8 +1888,8 @@ static void halt_csg_slot(struct kbase_queue_group *group, bool suspend)
 		csg_slot[slot].trigger_jiffies = jiffies;
 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP_REQ, group, halt_cmd);

-		KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG(
-			kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot);
+		KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG(
+			kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot, suspend);
 	}
 }

@@ -3441,6 +3444,9 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev)
 				/* The on slot csg is now stopped */
 				clear_bit(i, slot_mask);

+				KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG(
+					kbdev, kbdev->gpu_props.props.raw_props.gpu_id, i);
+
 				if (likely(group)) {
 					bool as_fault;
 					/* Only do save/cleanup if the
@@ -5076,6 +5082,9 @@ static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slo
 				/* The on slot csg is now stopped */
 				clear_bit(i, slot_mask_local);

+				KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG(
+					kbdev, kbdev->gpu_props.props.raw_props.gpu_id, i);
+
 				group = scheduler->csg_slots[i].resident_group;
 				if (likely(group)) {
 					/* Only do save/cleanup if the
@@ -5134,8 +5143,13 @@ static void evict_lru_or_blocked_csg(struct kbase_device *kbdev)

 	if (all_addr_spaces_used) {
 		for (i = 0; i != total_csg_slots; ++i) {
-			if (scheduler->csg_slots[i].resident_group != NULL)
+			if (scheduler->csg_slots[i].resident_group != NULL) {
+				if (WARN_ON(scheduler->csg_slots[i].resident_group->kctx->as_nr <
+					    0))
+					continue;
+
 				as_usage[scheduler->csg_slots[i].resident_group->kctx->as_nr]++;
+			}
 		}
 	}

@@ -5156,6 +5170,9 @@ static void evict_lru_or_blocked_csg(struct kbase_device *kbdev)
 		    (group->priority != BASE_QUEUE_GROUP_PRIORITY_REALTIME) &&
 		    ((lru_idle_group == NULL) ||
 		     (lru_idle_group->prepared_seq_num < group->prepared_seq_num))) {
+			if (WARN_ON(group->kctx->as_nr < 0))
+				continue;
+
 			/* If all address spaces are used, we need to ensure the group does not
 			 * share the AS with other active CSGs. Or CSG would be freed without AS
 			 * and this optimization would not work.
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -228,11 +228,11 @@ static void remove_unlinked_chunk(struct kbase_context *kctx,
 	kbase_vunmap(kctx, &chunk->map);
 	/* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT
 	 * regions), and so we must clear that flag too before freeing.
-	 * For "no user free", we check that the refcount is 1 as it is a shrinkable region;
+	 * For "no user free count", we check that the count is 1 as it is a shrinkable region;
 	 * no other code part within kbase can take a reference to it.
 	 */
-	WARN_ON(chunk->region->no_user_free_refcnt > 1);
-	kbase_va_region_no_user_free_put(kctx, chunk->region);
+	WARN_ON(atomic_read(&chunk->region->no_user_free_count) > 1);
+	kbase_va_region_no_user_free_dec(chunk->region);
 #if !defined(CONFIG_MALI_VECTOR_DUMP)
 	chunk->region->flags &= ~KBASE_REG_DONT_NEED;
 #endif
@@ -315,8 +315,8 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context *
 	 * It should be fine and not a security risk if we let the region leak till
 	 * region tracker termination in such a case.
 	 */
-	if (unlikely(chunk->region->no_user_free_refcnt > 1)) {
-		dev_err(kctx->kbdev->dev, "Chunk region has no_user_free_refcnt > 1!\n");
+	if (unlikely(atomic_read(&chunk->region->no_user_free_count) > 1)) {
+		dev_err(kctx->kbdev->dev, "Chunk region has no_user_free_count > 1!\n");
 		goto unroll_region;
 	}

@@ -371,7 +371,7 @@ unroll_region:
 	/* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT
 	 * regions), and so we must clear that flag too before freeing.
 	 */
-	kbase_va_region_no_user_free_put(kctx, chunk->region);
+	kbase_va_region_no_user_free_dec(chunk->region);
 #if !defined(CONFIG_MALI_VECTOR_DUMP)
 	chunk->region->flags &= ~KBASE_REG_DONT_NEED;
 #endif
@@ -531,7 +531,7 @@ static void delete_heap(struct kbase_csf_tiler_heap *heap)
 	if (heap->buf_desc_reg) {
 		kbase_vunmap(kctx, &heap->buf_desc_map);
 		kbase_gpu_vm_lock(kctx);
-		kbase_va_region_no_user_free_put(kctx, heap->buf_desc_reg);
+		kbase_va_region_no_user_free_dec(heap->buf_desc_reg);
 		kbase_gpu_vm_unlock(kctx);
 	}

@@ -741,7 +741,8 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_
 		 */

 		heap->buf_desc_va = buf_desc_va;
-		heap->buf_desc_reg = kbase_va_region_no_user_free_get(kctx, buf_desc_reg);
+		heap->buf_desc_reg = buf_desc_reg;
+		kbase_va_region_no_user_free_inc(buf_desc_reg);

 		vmap_ptr = kbase_vmap_reg(kctx, buf_desc_reg, buf_desc_va, TILER_BUF_DESC_SIZE,
 					  KBASE_REG_CPU_RD, &heap->buf_desc_map,
@@ -834,7 +835,7 @@ heap_context_alloc_failed:
 buf_desc_vmap_failed:
 	if (heap->buf_desc_reg) {
 		kbase_gpu_vm_lock(kctx);
-		kbase_va_region_no_user_free_put(kctx, heap->buf_desc_reg);
+		kbase_va_region_no_user_free_dec(heap->buf_desc_reg);
 		kbase_gpu_vm_unlock(kctx);
 	}
 buf_desc_not_suitable:
@@ -967,7 +968,12 @@ int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx,

 	err = validate_allocation_request(heap, nr_in_flight, pending_frag_count);
 	if (unlikely(err)) {
-		dev_err(kctx->kbdev->dev,
+		/* The allocation request can be legitimate, but be invoked on a heap
+		 * that has already reached the maximum pre-configured capacity. This
+		 * is useful debug information, but should not be treated as an error,
+		 * since the request will be re-sent at a later point.
+		 */
+		dev_dbg(kctx->kbdev->dev,
 			"Not allocating new chunk for heap 0x%llX due to current heap state (err %d)",
 			gpu_heap_va, err);
 		mutex_unlock(&kctx->csf.tiler_heaps.lock);
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -31,9 +31,7 @@
 #include "mali_kbase_pm.h"
 #include "mali_kbase_hwaccess_time.h"

-#include <linux/gcd.h>
 #include <linux/math64.h>
-#include <asm/arch_timer.h>

 #if IS_ENABLED(CONFIG_DEBUG_FS)
 #include "tl/mali_kbase_timeline_priv.h"
@@ -96,81 +94,6 @@ void kbase_csf_tl_reader_debugfs_init(struct kbase_device *kbdev)
 }
 #endif

-/**
- * get_cpu_gpu_time() - Get current CPU and GPU timestamps.
- *
- * @kbdev:	Kbase device.
- * @cpu_ts:	Output CPU timestamp.
- * @gpu_ts:	Output GPU timestamp.
- * @gpu_cycle:  Output GPU cycle counts.
- */
-static void get_cpu_gpu_time(
-	struct kbase_device *kbdev,
-	u64 *cpu_ts,
-	u64 *gpu_ts,
-	u64 *gpu_cycle)
-{
-	struct timespec64 ts;
-
-	kbase_pm_context_active(kbdev);
-	kbase_backend_get_gpu_time(kbdev, gpu_cycle, gpu_ts, &ts);
-	kbase_pm_context_idle(kbdev);
-
-	if (cpu_ts)
-		*cpu_ts = ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
-}
-
-
-/**
- * kbase_ts_converter_init() - Initialize system timestamp converter.
- *
- * @self:	System Timestamp Converter instance.
- * @kbdev:	Kbase device pointer
- *
- * Return: Zero on success, -1 otherwise.
- */
-static int kbase_ts_converter_init(
-	struct kbase_ts_converter *self,
-	struct kbase_device *kbdev)
-{
-	u64 cpu_ts = 0;
-	u64 gpu_ts = 0;
-	u64 freq;
-	u64 common_factor;
-
-	get_cpu_gpu_time(kbdev, &cpu_ts, &gpu_ts, NULL);
-	freq = arch_timer_get_cntfrq();
-
-	if (!freq) {
-		dev_warn(kbdev->dev, "arch_timer_get_rate() is zero!");
-		return -1;
-	}
-
-	common_factor = gcd(NSEC_PER_SEC, freq);
-
-	self->multiplier = div64_u64(NSEC_PER_SEC, common_factor);
-	self->divisor = div64_u64(freq, common_factor);
-	self->offset =
-		cpu_ts - div64_u64(gpu_ts * self->multiplier, self->divisor);
-
-	return 0;
-}
-
-/**
- * kbase_ts_converter_convert() - Convert GPU timestamp to CPU timestamp.
- *
- * @self:	System Timestamp Converter instance.
- * @gpu_ts:	System timestamp value to converter.
- *
- * Return: The CPU timestamp.
- */
-static u64 __maybe_unused
-kbase_ts_converter_convert(const struct kbase_ts_converter *self, u64 gpu_ts)
-{
-	return div64_u64(gpu_ts * self->multiplier, self->divisor) +
-		  self->offset;
-}
-
 /**
 * tl_reader_overflow_notify() - Emit stream overflow tracepoint.
 *
@@ -321,8 +244,8 @@ int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self)
 		{
 			struct kbase_csffw_tl_message *msg =
 				(struct kbase_csffw_tl_message *) csffw_data_it;
-			msg->timestamp = kbase_ts_converter_convert(&self->ts_converter,
-						   msg->timestamp);
+			msg->timestamp =
+				kbase_backend_time_convert_gpu_to_cpu(kbdev, msg->timestamp);
 		}

 		/* Copy the message out to the tl_stream. */
@@ -396,9 +319,6 @@ static int tl_reader_init_late(
 		return -1;
 	}

-	if (kbase_ts_converter_init(&self->ts_converter, kbdev))
-		return -1;
-
 	self->kbdev = kbdev;
 	self->trace_buffer = tb;
 	self->tl_header.data = hdr;
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -39,37 +39,6 @@ struct firmware_trace_buffer;
 struct kbase_tlstream;
 struct kbase_device;

-/**
- * struct kbase_ts_converter - System timestamp to CPU timestamp converter state.
- *
- * @multiplier:		Numerator of the converter's fraction.
- * @divisor:		Denominator of the converter's fraction.
- * @offset:		Converter's offset term.
- *
- * According to Generic timer spec, system timer:
- * - Increments at a fixed frequency
- * - Starts operating from zero
- *
- * Hence CPU time is a linear function of System Time.
- *
- * CPU_ts = alpha * SYS_ts + beta
- *
- * Where
- * - alpha = 10^9/SYS_ts_freq
- * - beta is calculated by two timer samples taken at the same time:
- *   beta = CPU_ts_s - SYS_ts_s * alpha
- *
- * Since alpha is a rational number, we minimizing possible
- * rounding error by simplifying the ratio. Thus alpha is stored
- * as a simple `multiplier / divisor` ratio.
- *
- */
-struct kbase_ts_converter {
-	u64 multiplier;
-	u64 divisor;
-	s64 offset;
-};
-
 /**
 * struct kbase_csf_tl_reader - CSFFW timeline reader state.
 *
@@ -106,7 +75,6 @@ struct kbase_csf_tl_reader {
 		size_t size;
 		size_t btc;
 	} tl_header;
-	struct kbase_ts_converter ts_converter;

 	bool got_first_event;
 	bool is_active;
--- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c
+++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -123,6 +123,10 @@ static int kbase_backend_late_init(struct kbase_device *kbdev)
 	if (err)
 		goto fail_update_l2_features;

+	err = kbase_backend_time_init(kbdev);
+	if (err)
+		goto fail_update_l2_features;
+
 	init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);

 	kbase_pm_context_idle(kbdev);
@@ -285,8 +289,10 @@ static const struct kbase_device_init dev_init[] = {
 	  "Dummy model initialization failed" },
 #else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
 	{ assign_irqs, NULL, "IRQ search failed" },
-	{ registers_map, registers_unmap, "Register map failed" },
 #endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
+#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+	{ registers_map, registers_unmap, "Register map failed" },
+#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */
 	{ power_control_init, power_control_term, "Power control initialization failed" },
 	{ kbase_device_io_history_init, kbase_device_io_history_term,
 	  "Register access history initialization failed" },
@@ -359,7 +365,6 @@ static void kbase_device_term_partial(struct kbase_device *kbdev,

 void kbase_device_term(struct kbase_device *kbdev)
 {
-	kbdev->csf.mali_file_inode = NULL;
 	kbase_device_term_partial(kbdev, ARRAY_SIZE(dev_init));
 	kbase_mem_halt(kbdev);
 }
--- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c
+++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -100,6 +100,10 @@ static int kbase_backend_late_init(struct kbase_device *kbdev)
 	if (err)
 		goto fail_update_l2_features;

+	err = kbase_backend_time_init(kbdev);
+	if (err)
+		goto fail_update_l2_features;
+
 	init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);

 	/* Idle the GPU and/or cores, if the policy wants it to */
@@ -211,17 +215,19 @@ static void kbase_device_hwcnt_backend_jm_watchdog_term(struct kbase_device *kbd

 static const struct kbase_device_init dev_init[] = {
 #if !IS_ENABLED(CONFIG_MALI_REAL_HW)
-	{ kbase_gpu_device_create, kbase_gpu_device_destroy,
-	  "Dummy model initialization failed" },
+	{ kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" },
 #else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
 	{ assign_irqs, NULL, "IRQ search failed" },
-	{ registers_map, registers_unmap, "Register map failed" },
 #endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
+#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+	{ registers_map, registers_unmap, "Register map failed" },
+#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */
 	{ kbase_device_io_history_init, kbase_device_io_history_term,
 	  "Register access history initialization failed" },
 	{ kbase_device_pm_init, kbase_device_pm_term, "Power management initialization failed" },
 	{ kbase_device_early_init, kbase_device_early_term, "Early device initialization failed" },
 	{ kbase_device_populate_max_freq, NULL, "Populating max frequency failed" },
+	{ kbase_pm_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" },
 	{ kbase_device_misc_init, kbase_device_misc_term,
 	  "Miscellaneous device initialization failed" },
 	{ kbase_device_pcm_dev_init, kbase_device_pcm_dev_term,
@@ -237,7 +243,6 @@ static const struct kbase_device_init dev_init[] = {
 	  "Timeline stream initialization failed" },
 	{ kbase_clk_rate_trace_manager_init, kbase_clk_rate_trace_manager_term,
 	  "Clock rate trace manager initialization failed" },
-	{ kbase_pm_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" },
 	{ kbase_instr_backend_init, kbase_instr_backend_term,
 	  "Instrumentation backend initialization failed" },
 	{ kbase_device_hwcnt_watchdog_if_init, kbase_device_hwcnt_watchdog_if_term,
--- a/drivers/gpu/arm/bifrost/device/mali_kbase_device.c
+++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -35,6 +35,7 @@
 #include <mali_kbase.h>
 #include <mali_kbase_defs.h>
 #include <mali_kbase_hwaccess_instr.h>
+#include <mali_kbase_hwaccess_time.h>
 #include <mali_kbase_hw.h>
 #include <mali_kbase_config_defaults.h>
 #include <linux/priority_control_manager.h>
@@ -308,7 +309,8 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
 #endif /* MALI_USE_CSF */

 	kbdev->mmu_mode = kbase_mmu_mode_get_aarch64();
-
+	kbdev->mmu_as_inactive_wait_time_ms =
+		kbase_get_timeout_ms(kbdev, MMU_AS_INACTIVE_WAIT_TIMEOUT);
 	mutex_init(&kbdev->kctx_list_lock);
 	INIT_LIST_HEAD(&kbdev->kctx_list);

--- a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_csf.c
+++ b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_csf.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -105,6 +105,70 @@ const char *kbase_gpu_exception_name(u32 const exception_code)
 	case GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_CACHEABILITY_FAULT:
 		e = "GPU_CACHEABILITY_FAULT";
 		break;
+	/* MMU Fault */
+	case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L0:
+		e = "TRANSLATION_FAULT at level 0";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L1:
+		e = "TRANSLATION_FAULT at level 1";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L2:
+		e = "TRANSLATION_FAULT at level 2";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L3:
+		e = "TRANSLATION_FAULT at level 3";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L4:
+		e = "TRANSLATION_FAULT";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_0:
+		e = "PERMISSION_FAULT at level 0";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_1:
+		e = "PERMISSION_FAULT at level 1";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_2:
+		e = "PERMISSION_FAULT at level 2";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_3:
+		e = "PERMISSION_FAULT at level 3";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_1:
+		e = "ACCESS_FLAG at level 1";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_2:
+		e = "ACCESS_FLAG at level 2";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_3:
+		e = "ACCESS_FLAG at level 3";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN:
+		e = "ADDRESS_SIZE_FAULT_IN";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_0:
+		e = "ADDRESS_SIZE_FAULT_OUT_0 at level 0";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_1:
+		e = "ADDRESS_SIZE_FAULT_OUT_1 at level 1";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_2:
+		e = "ADDRESS_SIZE_FAULT_OUT_2 at level 2";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_3:
+		e = "ADDRESS_SIZE_FAULT_OUT_3 at level 3";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_0:
+		e = "MEMORY_ATTRIBUTE_FAULT_0 at level 0";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_1:
+		e = "MEMORY_ATTRIBUTE_FAULT_1 at level 1";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_2:
+		e = "MEMORY_ATTRIBUTE_FAULT_2 at level 2";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_3:
+		e = "MEMORY_ATTRIBUTE_FAULT_3 at level 3";
+		break;
 	/* Any other exception code is unknown */
 	default:
 		e = "UNKNOWN";
--- a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h
+++ b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h
@@ -108,7 +108,6 @@
 #define JOB_IRQ_JS_STATE        0x010   /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */
 #define JOB_IRQ_THROTTLE        0x014   /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt.  */

-#define JOB_SLOT0               0x800   /* Configuration registers for job slot 0 */
 #define JOB_SLOT1               0x880   /* Configuration registers for job slot 1 */
 #define JOB_SLOT2               0x900   /* Configuration registers for job slot 2 */
 #define JOB_SLOT3               0x980   /* Configuration registers for job slot 3 */
@@ -125,8 +124,6 @@
 #define JOB_SLOT14              0xF00   /* Configuration registers for job slot 14 */
 #define JOB_SLOT15              0xF80   /* Configuration registers for job slot 15 */

-#define JOB_SLOT_REG(n, r)      (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
-
 #define JS_XAFFINITY           0x1C /* (RO) Extended affinity mask for job slot n*/

 #define JS_COMMAND             0x20	/* (WO) Command register for job slot n */
--- a/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_fault.h
+++ b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_fault.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -27,9 +27,9 @@
 *
 * @exception_code: exception code
 *
- * This function is called from the interrupt handler when a GPU fault occurs.
+ * This function is called by error handlers when GPU reports an error.
 *
- * Return: name associated with the exception code
+ * Return: Error string associated with the exception code
 */
 const char *kbase_gpu_exception_name(u32 exception_code);

--- a/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h
+++ b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h
@@ -51,9 +51,7 @@
 #define MMU_FEATURES            0x014   /* (RO) MMU features */
 #define AS_PRESENT              0x018   /* (RO) Address space slots present */
 #define GPU_IRQ_RAWSTAT         0x020   /* (RW) */
-#define GPU_IRQ_CLEAR           0x024   /* (WO) */
 #define GPU_IRQ_MASK            0x028   /* (RW) */
-#define GPU_IRQ_STATUS          0x02C   /* (RO) */

 #define GPU_COMMAND             0x030   /* (WO) */
 #define GPU_STATUS              0x034   /* (RO) */
@@ -176,14 +174,9 @@
 /* Job control registers */

 #define JOB_IRQ_RAWSTAT         0x000   /* Raw interrupt status register */
-#define JOB_IRQ_STATUS          0x00C   /* Interrupt status register */

 /* MMU control registers */

-#define MMU_IRQ_CLEAR           0x004   /* (WO) Interrupt clear register */
-#define MMU_IRQ_MASK            0x008   /* (RW) Interrupt mask register */
-#define MMU_IRQ_STATUS          0x00C   /* (RO) Interrupt status register */
-
 #define MMU_AS1                 0x440   /* Configuration registers for address space 1 */
 #define MMU_AS2                 0x480   /* Configuration registers for address space 2 */
 #define MMU_AS3                 0x4C0   /* Configuration registers for address space 3 */
--- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c
+++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -289,6 +289,8 @@ kbasep_hwcnt_backend_csf_cc_initial_sample(struct kbase_hwcnt_backend_csf *backe
 	u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS];
 	size_t clk;

+	memset(cycle_counts, 0, sizeof(cycle_counts));
+
 	/* Read cycle count from CSF interface for both clock domains. */
 	backend_csf->info->csf_if->get_gpu_cycle_count(backend_csf->info->csf_if->ctx, cycle_counts,
 						       clk_enable_map);
@@ -308,6 +310,8 @@ static void kbasep_hwcnt_backend_csf_cc_update(struct kbase_hwcnt_backend_csf *b
 	u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS];
 	size_t clk;

+	memset(cycle_counts, 0, sizeof(cycle_counts));
+
 	backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx);

 	backend_csf->info->csf_if->get_gpu_cycle_count(backend_csf->info->csf_if->ctx, cycle_counts,
@@ -558,7 +562,7 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backe
 							u32 insert_index_to_stop)
 {
 	u32 raw_idx;
-	unsigned long flags;
+	unsigned long flags = 0UL;
 	u8 *cpu_dump_base = (u8 *)backend_csf->ring_buf_cpu_base;
 	const size_t ring_buf_cnt = backend_csf->info->ring_buf_cnt;
 	const size_t buf_dump_bytes = backend_csf->info->prfcnt_info.dump_bytes;
@@ -639,7 +643,7 @@ static void kbasep_hwcnt_backend_watchdog_timer_cb(void *info)
 {
 	struct kbase_hwcnt_backend_csf_info *csf_info = info;
 	struct kbase_hwcnt_backend_csf *backend_csf;
-	unsigned long flags;
+	unsigned long flags = 0UL;

 	csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags);

@@ -658,8 +662,8 @@ static void kbasep_hwcnt_backend_watchdog_timer_cb(void *info)
 	    /* 3. dump state indicates no other dumping is in progress. */
 	    ((backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) ||
 	     (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED))) {
-		u32 extract_index;
-		u32 insert_index;
+		u32 extract_index = 0U;
+		u32 insert_index = 0U;

 		/* Read the raw extract and insert indexes from the CSF interface. */
 		csf_info->csf_if->get_indexes(csf_info->csf_if->ctx, &extract_index, &insert_index);
@@ -700,11 +704,11 @@ static void kbasep_hwcnt_backend_watchdog_timer_cb(void *info)
 */
 static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work)
 {
-	unsigned long flags;
+	unsigned long flags = 0ULL;
 	struct kbase_hwcnt_backend_csf *backend_csf;
 	u32 insert_index_to_acc;
-	u32 extract_index;
-	u32 insert_index;
+	u32 extract_index = 0U;
+	u32 insert_index = 0U;

 	WARN_ON(!work);
 	backend_csf = container_of(work, struct kbase_hwcnt_backend_csf, hwc_dump_work);
@@ -776,10 +780,10 @@ static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work)
 */
 static void kbasep_hwcnt_backend_csf_threshold_worker(struct work_struct *work)
 {
-	unsigned long flags;
+	unsigned long flags = 0ULL;
 	struct kbase_hwcnt_backend_csf *backend_csf;
-	u32 extract_index;
-	u32 insert_index;
+	u32 extract_index = 0U;
+	u32 insert_index = 0U;

 	WARN_ON(!work);

@@ -920,7 +924,7 @@ static int kbasep_hwcnt_backend_csf_dump_enable(struct kbase_hwcnt_backend *back
 						const struct kbase_hwcnt_enable_map *enable_map)
 {
 	int errcode;
-	unsigned long flags;
+	unsigned long flags = 0UL;
 	struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;

 	if (!backend_csf)
@@ -954,7 +958,7 @@ static void kbasep_hwcnt_backend_csf_wait_enable_transition_complete(
 /* CSF backend implementation of kbase_hwcnt_backend_dump_disable_fn */
 static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend)
 {
-	unsigned long flags;
+	unsigned long flags = 0UL;
 	struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
 	bool do_disable = false;

@@ -1050,7 +1054,7 @@ static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *ba
 static int kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend,
 						 u64 *dump_time_ns)
 {
-	unsigned long flags;
+	unsigned long flags = 0UL;
 	struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
 	bool do_request = false;
 	bool watchdog_dumping = false;
@@ -1157,7 +1161,7 @@ static int kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *bac
 /* CSF backend implementation of kbase_hwcnt_backend_dump_wait_fn */
 static int kbasep_hwcnt_backend_csf_dump_wait(struct kbase_hwcnt_backend *backend)
 {
-	unsigned long flags;
+	unsigned long flags = 0UL;
 	struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
 	int errcode;

@@ -1365,7 +1369,7 @@ alloc_error:
 static int kbasep_hwcnt_backend_csf_init(const struct kbase_hwcnt_backend_info *info,
 					 struct kbase_hwcnt_backend **out_backend)
 {
-	unsigned long flags;
+	unsigned long flags = 0UL;
 	struct kbase_hwcnt_backend_csf *backend_csf = NULL;
 	struct kbase_hwcnt_backend_csf_info *csf_info = (struct kbase_hwcnt_backend_csf_info *)info;
 	int errcode;
@@ -1407,7 +1411,7 @@ static int kbasep_hwcnt_backend_csf_init(const struct kbase_hwcnt_backend_info *
 /* CSF backend implementation of kbase_hwcnt_backend_term_fn */
 static void kbasep_hwcnt_backend_csf_term(struct kbase_hwcnt_backend *backend)
 {
-	unsigned long flags;
+	unsigned long flags = 0UL;
 	struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;

 	if (!backend)
@@ -1619,7 +1623,7 @@ void kbase_hwcnt_backend_csf_protm_exited(struct kbase_hwcnt_backend_interface *

 void kbase_hwcnt_backend_csf_on_unrecoverable_error(struct kbase_hwcnt_backend_interface *iface)
 {
-	unsigned long flags;
+	unsigned long flags = 0UL;
 	struct kbase_hwcnt_backend_csf_info *csf_info;

 	csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
@@ -1639,7 +1643,7 @@ void kbase_hwcnt_backend_csf_on_unrecoverable_error(struct kbase_hwcnt_backend_i

 void kbase_hwcnt_backend_csf_on_before_reset(struct kbase_hwcnt_backend_interface *iface)
 {
-	unsigned long flags;
+	unsigned long flags = 0UL;
 	struct kbase_hwcnt_backend_csf_info *csf_info;
 	struct kbase_hwcnt_backend_csf *backend_csf;

--- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
+++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
@@ -329,7 +329,7 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(

 	/* Get physical page for the buffer */
 	ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
-					 phys, false);
+					 phys, false, NULL);
 	if (ret != num_pages)
 		goto phys_mem_pool_alloc_error;

@@ -482,7 +482,8 @@ kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_c

 		WARN_ON(kbase_mmu_teardown_pages(fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu,
 						 gpu_va_base >> PAGE_SHIFT, fw_ring_buf->phys,
-						 fw_ring_buf->num_pages, MCU_AS_NR, true));
+						 fw_ring_buf->num_pages, fw_ring_buf->num_pages,
+						 MCU_AS_NR, true));

 		vunmap(fw_ring_buf->cpu_dump_base);

--- a/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt.c
+++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -362,7 +362,7 @@ static int kbasep_hwcnt_accumulator_dump(struct kbase_hwcnt_context *hctx, u64 *
 	bool cur_map_any_enabled;
 	struct kbase_hwcnt_enable_map *cur_map;
 	bool new_map_any_enabled = false;
-	u64 dump_time_ns;
+	u64 dump_time_ns = 0;
 	struct kbase_hwcnt_accumulator *accum;

 	WARN_ON(!hctx);
--- a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c
+++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2016-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -455,16 +455,14 @@ static const struct kbase_ipa_group ipa_groups_def_tbax[] = {
 	},
 };

-
-#define IPA_POWER_MODEL_OPS(gpu, init_token) \
-	const struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \
-		.name = "mali-" #gpu "-power-model", \
-		.init = kbase_ ## init_token ## _power_model_init, \
-		.term = kbase_ipa_vinstr_common_model_term, \
-		.get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \
-		.reset_counter_data = kbase_ipa_vinstr_reset_data, \
-	}; \
-	KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops)
+#define IPA_POWER_MODEL_OPS(gpu, init_token)                                                       \
+	static const struct kbase_ipa_model_ops kbase_##gpu##_ipa_model_ops = {                    \
+		.name = "mali-" #gpu "-power-model",                                               \
+		.init = kbase_##init_token##_power_model_init,                                     \
+		.term = kbase_ipa_vinstr_common_model_term,                                        \
+		.get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff,                               \
+		.reset_counter_data = kbase_ipa_vinstr_reset_data,                                 \
+	}

 #define STANDARD_POWER_MODEL(gpu, reference_voltage) \
 	static int kbase_ ## gpu ## _power_model_init(\
--- a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h
+++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -127,10 +127,17 @@
 /**
 * enum kbase_timeout_selector - The choice of which timeout to get scaled
 *                               using the lowest GPU frequency.
+ * @MMU_AS_INACTIVE_WAIT_TIMEOUT: Maximum waiting time in ms for the completion
+ *                                of a MMU operation
+ * @JM_DEFAULT_JS_FREE_TIMEOUT: Maximum timeout to wait for JS_COMMAND_NEXT
+ *                              to be updated on HW side so a Job Slot is
+ *                              considered free.
 * @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in
 *                                the enum.
 */
 enum kbase_timeout_selector {
+	MMU_AS_INACTIVE_WAIT_TIMEOUT,
+	JM_DEFAULT_JS_FREE_TIMEOUT,

 	/* Must be the last in the enum */
 	KBASE_TIMEOUT_SELECTOR_COUNT
@@ -852,6 +859,10 @@ struct jsctx_queue {
 * @pf_data:           Data relating to Page fault.
 * @bf_data:           Data relating to Bus fault.
 * @current_setup:     Stores the MMU configuration for this address space.
+ * @is_unresponsive:   Flag to indicate MMU is not responding.
+ *                     Set if a MMU command isn't completed within
+ *                     &kbase_device:mmu_as_inactive_wait_time_ms.
+ *                     Clear by kbase_ctx_sched_restore_all_as() after GPU reset completes.
 */
 struct kbase_as {
 	int number;
@@ -861,6 +872,7 @@ struct kbase_as {
 	struct kbase_fault pf_data;
 	struct kbase_fault bf_data;
 	struct kbase_mmu_setup current_setup;
+	bool is_unresponsive;
 };

 #endif /* _KBASE_JM_DEFS_H_ */
--- a/drivers/gpu/arm/bifrost/jm/mali_kbase_js_defs.h
+++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_js_defs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2011-2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -277,6 +277,7 @@ typedef u32 kbase_atom_ordering_flag_t;
 * @nr_contexts_runnable:Number of contexts that can either be pulled from or
 *                       arecurrently running
 * @soft_job_timeout_ms:Value for JS_SOFT_JOB_TIMEOUT
+ * @js_free_wait_time_ms: Maximum waiting time in ms for a Job Slot to be seen free.
 * @queue_mutex: Queue Lock, used to access the Policy's queue of contexts
 *               independently of the Run Pool.
 *               Of course, you don't need the Run Pool lock to access this.
@@ -329,6 +330,8 @@ struct kbasep_js_device_data {
 	u32 nr_contexts_pullable;
 	atomic_t nr_contexts_runnable;
 	atomic_t soft_job_timeout_ms;
+	u32 js_free_wait_time_ms;
+
 	struct mutex queue_mutex;
 	/*
 	 * Run Pool mutex, for managing contexts within the runpool.
--- a/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h
+++ b/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h
@@ -40,6 +40,7 @@ enum base_hw_feature {
 	BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
 	BASE_HW_FEATURE_CORE_FEATURES,
 	BASE_HW_FEATURE_PBHA_HWU,
+	BASE_HW_FEATURE_LARGE_PAGE_ALLOC,
 	BASE_HW_FEATURE_END
 };

--- a/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h
+++ b/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -796,6 +796,19 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p2
 	BASE_HW_ISSUE_END
 };

+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p3[] = {
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_GPU2019_3878,
+	BASE_HW_ISSUE_TURSEHW_2716,
+	BASE_HW_ISSUE_GPU2019_3901,
+	BASE_HW_ISSUE_GPU2021PRO_290,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_TITANHW_2679,
+	BASE_HW_ISSUE_GPU2022PRO_148,
+	BASE_HW_ISSUE_END
+};
+
 __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTIx[] = {
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
--- a/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -221,6 +221,16 @@ enum {
 */
 #define JM_DEFAULT_RESET_TIMEOUT_MS (1) /* 1 ms */

+/* Default timeout in clock cycles to be used when checking if JS_COMMAND_NEXT
+ * is updated on HW side so a Job Slot is considered free.
+ * This timeout will only take effect on GPUs with low value for the minimum
+ * GPU clock frequency (<= 100MHz).
+ *
+ * Based on 1ms timeout at 100MHz. Will default to 0ms on GPUs with higher
+ * value for minimum GPU clock frequency.
+ */
+#define JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES (100000)
+
 #endif /* MALI_USE_CSF */

 /* Default timeslice that a context is scheduled in for, in nanoseconds.
@@ -257,5 +267,12 @@ enum {
 */
 #define DEFAULT_IR_THRESHOLD (192)

+/* Waiting time in clock cycles for the completion of a MMU operation.
+ *
+ * Ideally 1.6M GPU cycles required for the L2 cache (512KiB slice) flush.
+ *
+ * As a pessimistic value, 50M GPU cycles ( > 30 times bigger ) is chosen.
+ * It corresponds to 0.5s in GPU @ 100Mhz.
+ */
+#define MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES ((u64)50 * 1024 * 1024)
 #endif /* _KBASE_CONFIG_DEFAULTS_H_ */
-
--- a/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -1573,7 +1573,6 @@ static int kbasep_ioctl_cs_cpu_queue_dump(struct kbase_context *kctx,
 					cpu_queue_info->size);
 }

-#define POWER_DOWN_LATEST_FLUSH_VALUE ((u32)1)
 static int kbase_ioctl_read_user_page(struct kbase_context *kctx,
 				      union kbase_ioctl_read_user_page *user_page)
 {
@@ -2059,6 +2058,7 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 				struct kbase_ioctl_cs_cpu_queue_info,
 				kctx);
 		break;
+	/* This IOCTL will be kept for backward compatibility */
 	case KBASE_IOCTL_READ_USER_PAGE:
 		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_READ_USER_PAGE, kbase_ioctl_read_user_page,
 					 union kbase_ioctl_read_user_page, kctx);
@@ -2225,7 +2225,10 @@ KBASE_EXPORT_TEST_API(kbase_event_wakeup);
 #if MALI_USE_CSF
 int kbase_event_pending(struct kbase_context *ctx)
 {
-	WARN_ON_ONCE(!ctx);
+	KBASE_DEBUG_ASSERT(ctx);
+
+	if (unlikely(!ctx))
+		return -EPERM;

 	return (atomic_read(&ctx->event_count) != 0) ||
 		kbase_csf_event_error_pending(ctx) ||
@@ -2236,6 +2239,9 @@ int kbase_event_pending(struct kbase_context *ctx)
 {
 	KBASE_DEBUG_ASSERT(ctx);

+	if (unlikely(!ctx))
+		return -EPERM;
+
 	return (atomic_read(&ctx->event_count) != 0) ||
 		(atomic_read(&ctx->event_closed) != 0);
 }
@@ -4284,7 +4290,7 @@ void kbase_protected_mode_term(struct kbase_device *kbdev)
 	kfree(kbdev->protected_dev);
 }

-#if !IS_ENABLED(CONFIG_MALI_REAL_HW)
+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
 static int kbase_common_reg_map(struct kbase_device *kbdev)
 {
 	return 0;
@@ -4292,7 +4298,7 @@ static int kbase_common_reg_map(struct kbase_device *kbdev)
 static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
 {
 }
-#else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
+#else /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */
 static int kbase_common_reg_map(struct kbase_device *kbdev)
 {
 	int err = 0;
@@ -4328,7 +4334,7 @@ static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
 		kbdev->reg_size = 0;
 	}
 }
-#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
+#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */

 int registers_map(struct kbase_device * const kbdev)
 {
--- a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2017-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -242,6 +242,7 @@ void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev)
 	for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) {
 		struct kbase_context *kctx;

+		kbdev->as[i].is_unresponsive = false;
 #if MALI_USE_CSF
 		if ((i == MCU_AS_NR) && kbdev->csf.firmware_inited) {
 			kbase_mmu_update(kbdev, &kbdev->csf.mcu_mmu,
--- a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_allocs.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_allocs.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -26,7 +26,7 @@
 #include "mali_kbase_debug_mem_allocs.h"
 #include "mali_kbase.h"

-#include <string.h>
+#include <linux/string.h>
 #include <linux/list.h>
 #include <linux/file.h>

--- a/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -90,11 +90,10 @@ set_attr_from_string(char *const buf, void *const array, size_t const nelems,

 int kbase_debugfs_string_validator(char *const buf)
 {
-	size_t index;
 	int err = 0;
 	char *ptr = buf;

-	for (index = 0; *ptr; ++index) {
+	while (*ptr) {
 		unsigned long test_number;
 		size_t len;

--- a/drivers/gpu/arm/bifrost/mali_kbase_defs.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_defs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -245,12 +245,25 @@ struct kbase_fault {
 	bool protected_mode;
 };

+/** Maximum number of memory pages that should be allocated for the array
+ * of pointers to free PGDs.
+ *
+ * This number has been pre-calculated to deal with the maximum allocation
+ * size expressed by the default value of KBASE_MEM_ALLOC_MAX_SIZE.
+ * This is supposed to be enough for almost the entirety of MMU operations.
+ * Any size greater than KBASE_MEM_ALLOC_MAX_SIZE requires being broken down
+ * into multiple iterations, each dealing with at most KBASE_MEM_ALLOC_MAX_SIZE
+ * bytes.
+ *
+ * Please update this value if KBASE_MEM_ALLOC_MAX_SIZE changes.
+ */
+#define MAX_PAGES_FOR_FREE_PGDS ((size_t)9)
+
+/* Maximum number of pointers to free PGDs */
+#define MAX_FREE_PGDS ((PAGE_SIZE / sizeof(struct page *)) * MAX_PAGES_FOR_FREE_PGDS)
+
 /**
 * struct kbase_mmu_table  - object representing a set of GPU page tables
- * @mmu_teardown_pages:   Array containing pointers to 3 separate pages, used
- *                        to cache the entries of top (L0) & intermediate level
- *                        page tables (L1 & L2) to avoid repeated calls to
- *                        kmap_atomic() during the MMU teardown.
 * @mmu_lock:             Lock to serialize the accesses made to multi level GPU
 *                        page tables
 * @pgd:                  Physical address of the page allocated for the top
@@ -262,14 +275,40 @@ struct kbase_fault {
 *                        Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
 * @kctx:                 If this set of MMU tables belongs to a context then
 *                        this is a back-reference to the context, otherwise
- *                        it is NULL
+ *                        it is NULL.
+ * @scratch_mem:          Scratch memory used for MMU operations, which are
+ *                        serialized by the @mmu_lock.
 */
 struct kbase_mmu_table {
-	u64 *mmu_teardown_pages[MIDGARD_MMU_BOTTOMLEVEL];
 	struct mutex mmu_lock;
 	phys_addr_t pgd;
 	u8 group_id;
 	struct kbase_context *kctx;
+	union {
+		/**
+		 * @teardown_pages: Scratch memory used for backup copies of whole
+		 *                  PGD pages when tearing down levels upon
+		 *                  termination of the MMU table.
+		 */
+		struct {
+			/**
+			 * @levels: Array of PGD pages, large enough to copy one PGD
+			 *          for each level of the MMU table.
+			 */
+			u64 levels[MIDGARD_MMU_BOTTOMLEVEL][PAGE_SIZE / sizeof(u64)];
+		} teardown_pages;
+		/**
+		 * @free_pgds: Scratch memory user for insertion, update and teardown
+		 *             operations to store a temporary list of PGDs to be freed
+		 *             at the end of the operation.
+		 */
+		struct {
+			/** @pgds: Array of pointers to PGDs to free. */
+			struct page *pgds[MAX_FREE_PGDS];
+			/** @head_index: Index of first free element in the PGDs array. */
+			size_t head_index;
+		} free_pgds;
+	} scratch_mem;
 };

 /**
@@ -293,6 +332,8 @@ struct kbase_reg_zone {
 #include "jm/mali_kbase_jm_defs.h"
 #endif

+#include "mali_kbase_hwaccess_time.h"
+
 static inline int kbase_as_has_bus_fault(struct kbase_as *as,
 	struct kbase_fault *fault)
 {
@@ -762,6 +803,8 @@ struct kbase_mem_migrate {
 *                         GPU adrress spaces assigned to them.
 * @mmu_mask_change:       Lock to serialize the access to MMU interrupt mask
 *                         register used in the handling of Bus & Page faults.
+ * @pagesize_2mb:          Boolean to determine whether 2MiB page sizes are
+ *                         supported and used where possible.
 * @gpu_props:             Object containing complete information about the
 *                         configuration/properties of GPU HW device in use.
 * @hw_issues_mask:        List of SW workarounds for HW issues
@@ -807,6 +850,7 @@ struct kbase_mem_migrate {
 *                         GPU reset.
 * @lowest_gpu_freq_khz:   Lowest frequency in KHz that the GPU can run at. Used
 *                         to calculate suitable timeouts for wait operations.
+ * @backend_time:          Kbase backend time related attributes.
 * @cache_clean_in_progress: Set when a cache clean has been started, and
 *                         cleared when it has finished. This prevents multiple
 *                         cache cleans being done simultaneously.
@@ -1001,6 +1045,9 @@ struct kbase_mem_migrate {
 *                          KCPU queue. These structures may outlive kbase module
 *                          itself. Therefore, in such a case, a warning should be
 *                          be produced.
+ * @mmu_as_inactive_wait_time_ms: Maximum waiting time in ms for the completion of
+ *                          a MMU operation
+ * @va_region_slab:         kmem_cache (slab) for allocated kbase_va_region structures.
 */
 struct kbase_device {
 	u32 hw_quirks_sc;
@@ -1057,6 +1104,8 @@ struct kbase_device {

 	spinlock_t mmu_mask_change;

+	bool pagesize_2mb;
+
 	struct kbase_gpu_props gpu_props;

 	unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
@@ -1110,6 +1159,10 @@ struct kbase_device {

 	u64 lowest_gpu_freq_khz;

+#if MALI_USE_CSF
+	struct kbase_backend_time backend_time;
+#endif
+
 	bool cache_clean_in_progress;
 	u32 cache_clean_queued;
 	wait_queue_head_t cache_clean_wait;
@@ -1308,6 +1361,8 @@ struct kbase_device {
 #if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE)
 	atomic_t live_fence_metadata;
 #endif
+	u32 mmu_as_inactive_wait_time_ms;
+	struct kmem_cache *va_region_slab;
 };

 /**
@@ -1661,11 +1716,13 @@ struct kbase_sub_alloc {
 *                        is scheduled in and an atom is pulled from the context's per
 *                        slot runnable tree in JM GPU or GPU command queue
 *                        group is programmed on CSG slot in CSF GPU.
- * @mm_update_lock:       lock used for handling of special tracking page.
 * @process_mm:           Pointer to the memory descriptor of the process which
 *                        created the context. Used for accounting the physical
 *                        pages used for GPU allocations, done for the context,
- *                        to the memory consumed by the process.
+ *                        to the memory consumed by the process. A reference is taken
+ *                        on this descriptor for the Userspace created contexts so that
+ *                        Kbase can safely access it to update the memory usage counters.
+ *                        The reference is dropped on context termination.
 * @gpu_va_end:           End address of the GPU va space (in 4KB page units)
 * @running_total_tiler_heap_nr_chunks: Running total of number of chunks in all
 *                        tiler heaps of the kbase context.
@@ -1787,6 +1844,10 @@ struct kbase_sub_alloc {
 * @limited_core_mask:    The mask that is applied to the affinity in case of atoms
 *                        marked with BASE_JD_REQ_LIMITED_CORE_MASK.
 * @platform_data:        Pointer to platform specific per-context data.
+ * @task:                 Pointer to the task structure of the main thread of the process
+ *                        that created the Kbase context. It would be set only for the
+ *                        contexts created by the Userspace and not for the contexts
+ *                        created internally by the Kbase.
 *
 * A kernel base context is an entity among which the GPU is scheduled.
 * Each context has its own GPU address space.
@@ -1874,8 +1935,7 @@ struct kbase_context {

 	atomic_t refcount;

-	spinlock_t         mm_update_lock;
-	struct mm_struct __rcu *process_mm;
+	struct mm_struct *process_mm;
 	u64 gpu_va_end;
 #if MALI_USE_CSF
 	u32 running_total_tiler_heap_nr_chunks;
@@ -1938,6 +1998,8 @@ struct kbase_context {
 #if !MALI_USE_CSF
 	void *platform_data;
 #endif
+
+	struct task_struct *task;
 };

 #ifdef CONFIG_MALI_CINSTR_GWT
@@ -2040,5 +2102,4 @@ static inline u64 kbase_get_lock_region_min_size_log2(struct kbase_gpu_props con
 #define KBASE_AS_INACTIVE_MAX_LOOPS     100000000
 /* Maximum number of loops polling the GPU PRFCNT_ACTIVE bit before we assume the GPU has hung */
 #define KBASE_PRFCNT_ACTIVE_MAX_LOOPS   100000000
-
 #endif /* _KBASE_DEFS_H_ */
--- a/drivers/gpu/arm/bifrost/mali_kbase_fence.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_fence.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2010-2018, 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -32,6 +32,7 @@
 #include <linux/list.h>
 #include "mali_kbase_fence_defs.h"
 #include "mali_kbase.h"
+#include "mali_kbase_refcount_defs.h"

 #if MALI_USE_CSF
 /* Maximum number of characters in DMA fence timeline name. */
@@ -49,11 +50,7 @@
 * @timeline_name:  String of timeline name for associated fence object.
 */
 struct kbase_kcpu_dma_fence_meta {
-#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
-	atomic_t refcount;
-#else
-	refcount_t refcount;
-#endif
+	kbase_refcount_t refcount;
 	struct kbase_device *kbdev;
 	int kctx_id;
 	char timeline_name[MAX_TIMELINE_NAME];
@@ -225,11 +222,7 @@ static inline struct kbase_kcpu_dma_fence *kbase_kcpu_dma_fence_get(struct dma_f

 static inline void kbase_kcpu_dma_fence_meta_put(struct kbase_kcpu_dma_fence_meta *metadata)
 {
-#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
-	if (atomic_dec_and_test(&metadata->refcount)) {
-#else
-	if (refcount_dec_and_test(&metadata->refcount)) {
-#endif
+	if (kbase_refcount_dec_and_test(&metadata->refcount)) {
 		atomic_dec(&metadata->kbdev->live_fence_metadata);
 		kfree(metadata);
 	}
--- a/drivers/gpu/arm/bifrost/mali_kbase_gwt.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_gwt.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -125,14 +125,17 @@ int kbase_gpu_gwt_stop(struct kbase_context *kctx)
 	return 0;
 }

-
+#if (KERNEL_VERSION(5, 13, 0) <= LINUX_VERSION_CODE)
+static int list_cmp_function(void *priv, const struct list_head *a, const struct list_head *b)
+#else
 static int list_cmp_function(void *priv, struct list_head *a,
 				struct list_head *b)
+#endif
 {
-	struct kbasep_gwt_list_element *elementA = container_of(a,
-				struct kbasep_gwt_list_element, link);
-	struct kbasep_gwt_list_element *elementB = container_of(b,
-				struct kbasep_gwt_list_element, link);
+	const struct kbasep_gwt_list_element *elementA =
+		container_of(a, struct kbasep_gwt_list_element, link);
+	const struct kbasep_gwt_list_element *elementB =
+		container_of(b, struct kbasep_gwt_list_element, link);

 	CSTD_UNUSED(priv);

--- a/drivers/gpu/arm/bifrost/mali_kbase_hw.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hw.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -232,6 +232,7 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
 		    { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 },
 		    { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tTUx_r1p1 },
 		    { GPU_ID2_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p2 },
+		    { GPU_ID2_VERSION_MAKE(1, 3, 0), base_hw_issues_tTUx_r1p3 },
 		    { U32_MAX, NULL } } },

 		{ GPU_ID2_PRODUCT_LTUX,
@@ -239,6 +240,7 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
 		    { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 },
 		    { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tTUx_r1p1 },
 		    { GPU_ID2_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p2 },
+		    { GPU_ID2_VERSION_MAKE(1, 3, 0), base_hw_issues_tTUx_r1p3 },
 		    { U32_MAX, NULL } } },

 		{ GPU_ID2_PRODUCT_TTIX,
@@ -303,21 +305,20 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
 			 */
 			issues = fallback_issues;

-			dev_warn(kbdev->dev,
-				"GPU hardware issue table may need updating:\n"
-				"r%dp%d status %d is unknown; treating as r%dp%d status %d",
-				(gpu_id & GPU_ID2_VERSION_MAJOR) >>
-					GPU_ID2_VERSION_MAJOR_SHIFT,
-				(gpu_id & GPU_ID2_VERSION_MINOR) >>
-					GPU_ID2_VERSION_MINOR_SHIFT,
-				(gpu_id & GPU_ID2_VERSION_STATUS) >>
-					GPU_ID2_VERSION_STATUS_SHIFT,
-				(fallback_version & GPU_ID2_VERSION_MAJOR) >>
-					GPU_ID2_VERSION_MAJOR_SHIFT,
-				(fallback_version & GPU_ID2_VERSION_MINOR) >>
-					GPU_ID2_VERSION_MINOR_SHIFT,
-				(fallback_version & GPU_ID2_VERSION_STATUS) >>
-					GPU_ID2_VERSION_STATUS_SHIFT);
+			dev_notice(kbdev->dev, "r%dp%d status %d not found in HW issues table;\n",
+				   (gpu_id & GPU_ID2_VERSION_MAJOR) >> GPU_ID2_VERSION_MAJOR_SHIFT,
+				   (gpu_id & GPU_ID2_VERSION_MINOR) >> GPU_ID2_VERSION_MINOR_SHIFT,
+				   (gpu_id & GPU_ID2_VERSION_STATUS) >>
+					   GPU_ID2_VERSION_STATUS_SHIFT);
+			dev_notice(kbdev->dev, "falling back to closest match: r%dp%d status %d\n",
+				   (fallback_version & GPU_ID2_VERSION_MAJOR) >>
+					   GPU_ID2_VERSION_MAJOR_SHIFT,
+				   (fallback_version & GPU_ID2_VERSION_MINOR) >>
+					   GPU_ID2_VERSION_MINOR_SHIFT,
+				   (fallback_version & GPU_ID2_VERSION_STATUS) >>
+					   GPU_ID2_VERSION_STATUS_SHIFT);
+			dev_notice(kbdev->dev,
+				   "Execution proceeding normally with fallback match\n");

 			gpu_id &= ~GPU_ID2_VERSION;
 			gpu_id |= fallback_version;
@@ -343,7 +344,7 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
 		issues = kbase_hw_get_issues_for_new_id(kbdev);
 		if (issues == NULL) {
 			dev_err(kbdev->dev,
-				"Unknown GPU ID %x", gpu_id);
+				"HW product - Unknown GPU ID %x", gpu_id);
 			return -EINVAL;
 		}

@@ -407,7 +408,7 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
 			break;
 		default:
 			dev_err(kbdev->dev,
-				"Unknown GPU ID %x", gpu_id);
+				"HW issues - Unknown GPU ID %x", gpu_id);
 			return -EINVAL;
 		}
 	}
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_time.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_time.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2014, 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2018-2021, 2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,49 @@
 #ifndef _KBASE_BACKEND_TIME_H_
 #define _KBASE_BACKEND_TIME_H_

+#if MALI_USE_CSF
+/**
+ * struct kbase_backend_time - System timestamp attributes.
+ *
+ * @multiplier:		Numerator of the converter's fraction.
+ * @divisor:		Denominator of the converter's fraction.
+ * @offset:		Converter's offset term.
+ *
+ * According to Generic timer spec, system timer:
+ * - Increments at a fixed frequency
+ * - Starts operating from zero
+ *
+ * Hence CPU time is a linear function of System Time.
+ *
+ * CPU_ts = alpha * SYS_ts + beta
+ *
+ * Where
+ * - alpha = 10^9/SYS_ts_freq
+ * - beta is calculated by two timer samples taken at the same time:
+ *   beta = CPU_ts_s - SYS_ts_s * alpha
+ *
+ * Since alpha is a rational number, we minimizing possible
+ * rounding error by simplifying the ratio. Thus alpha is stored
+ * as a simple `multiplier / divisor` ratio.
+ *
+ */
+struct kbase_backend_time {
+	u64 multiplier;
+	u64 divisor;
+	s64 offset;
+};
+
+/**
+ * kbase_backend_time_convert_gpu_to_cpu() - Convert GPU timestamp to CPU timestamp.
+ *
+ * @kbdev:	Kbase device pointer
+ * @gpu_ts:	System timestamp value to converter.
+ *
+ * Return: The CPU timestamp.
+ */
+u64 __maybe_unused kbase_backend_time_convert_gpu_to_cpu(struct kbase_device *kbdev, u64 gpu_ts);
+#endif
+
 /**
 * kbase_backend_get_gpu_time() - Get current GPU time
 * @kbdev:              Device pointer
@@ -46,9 +89,6 @@ void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev,
 					  u64 *cycle_counter,
 					  u64 *system_time,
 					  struct timespec64 *ts);
-
-#endif /* _KBASE_BACKEND_TIME_H_ */
-
 /**
 * kbase_get_timeout_ms - Choose a timeout value to get a timeout scaled
 *                        GPU frequency, using a choice from
@@ -70,3 +110,17 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev,
 * Return: Snapshot of the GPU cycle count register.
 */
 u64 kbase_backend_get_cycle_cnt(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_time_init() - Initialize system timestamp converter.
+ *
+ * @kbdev:	Kbase device pointer
+ *
+ * This function should only be called after GPU is powered-up and
+ * L2 cached power-up has been initiated.
+ *
+ * Return: Zero on success, error code otherwise.
+ */
+int kbase_backend_time_init(struct kbase_device *kbdev);
+
+#endif /* _KBASE_BACKEND_TIME_H_ */
--- a/drivers/gpu/arm/bifrost/mali_kbase_js.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_js.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -34,6 +34,7 @@

 #include "mali_kbase_jm.h"
 #include "mali_kbase_hwaccess_jm.h"
+#include <mali_kbase_hwaccess_time.h>
 #include <linux/priority_control_manager.h>

 /*
@@ -531,6 +532,7 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev)
 	jsdd->gpu_reset_ticks_dumping = DEFAULT_JS_RESET_TICKS_DUMPING;
 	jsdd->ctx_timeslice_ns = DEFAULT_JS_CTX_TIMESLICE_NS;
 	atomic_set(&jsdd->soft_job_timeout_ms, DEFAULT_JS_SOFT_JOB_TIMEOUT);
+	jsdd->js_free_wait_time_ms = kbase_get_timeout_ms(kbdev, JM_DEFAULT_JS_FREE_TIMEOUT);

 	dev_dbg(kbdev->dev, "JS Config Attribs: ");
 	dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u",
@@ -555,6 +557,7 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev)
 			jsdd->ctx_timeslice_ns);
 	dev_dbg(kbdev->dev, "\tsoft_job_timeout:%i",
 		atomic_read(&jsdd->soft_job_timeout_ms));
+	dev_dbg(kbdev->dev, "\tjs_free_wait_time_ms:%u", jsdd->js_free_wait_time_ms);

 	if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss &&
 			jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss &&
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem.c
@@ -44,6 +44,9 @@
 #include <mali_kbase_config_defaults.h>
 #include <mali_kbase_trace_gpu_mem.h>

+#define VA_REGION_SLAB_NAME_PREFIX "va-region-slab-"
+#define VA_REGION_SLAB_NAME_SIZE (DEVNAME_SIZE + sizeof(VA_REGION_SLAB_NAME_PREFIX) + 1)
+
 #if MALI_JIT_PRESSURE_LIMIT_BASE

 /*
@@ -92,10 +95,8 @@ static size_t kbase_get_num_cpu_va_bits(struct kbase_context *kctx)
 #error "Unknown CPU VA width for this architecture"
 #endif

-#if IS_ENABLED(CONFIG_64BIT)
-	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+	if (kbase_ctx_compat_mode(kctx))
 		cpu_va_bits = 32;
-#endif

 	return cpu_va_bits;
 }
@@ -130,18 +131,14 @@ static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx,
 	else {
 		u64 same_va_end;

-#if IS_ENABLED(CONFIG_64BIT)
-		if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
-#endif /* CONFIG_64BIT */
+		if (kbase_ctx_compat_mode(kctx)) {
 			same_va_end = KBASE_REG_ZONE_CUSTOM_VA_BASE;
-#if IS_ENABLED(CONFIG_64BIT)
 		} else {
 			struct kbase_reg_zone *same_va_zone =
 				kbase_ctx_reg_zone_get(kctx,
 						       KBASE_REG_ZONE_SAME_VA);
 			same_va_end = kbase_reg_zone_end_pfn(same_va_zone);
 		}
-#endif /* CONFIG_64BIT */

 		if (gpu_pfn >= same_va_end)
 			rbtree = &kctx->reg_rbtree_custom;
@@ -383,6 +380,7 @@ void kbase_remove_va_region(struct kbase_device *kbdev,
 	struct rb_node *rbnext;
 	struct kbase_va_region *next = NULL;
 	struct rb_root *reg_rbtree = NULL;
+	struct kbase_va_region *orig_reg = reg;

 	int merged_front = 0;
 	int merged_back = 0;
@@ -447,9 +445,8 @@ void kbase_remove_va_region(struct kbase_device *kbdev,
 		 */
 		struct kbase_va_region *free_reg;

-		free_reg = kbase_alloc_free_region(reg_rbtree,
-				reg->start_pfn, reg->nr_pages,
-				reg->flags & KBASE_REG_ZONE_MASK);
+		free_reg = kbase_alloc_free_region(kbdev, reg_rbtree, reg->start_pfn, reg->nr_pages,
+						   reg->flags & KBASE_REG_ZONE_MASK);
 		if (!free_reg) {
 			/* In case of failure, we cannot allocate a replacement
 			 * free region, so we will be left with a 'gap' in the
@@ -480,6 +477,12 @@ void kbase_remove_va_region(struct kbase_device *kbdev,
 		rb_replace_node(&(reg->rblink), &(free_reg->rblink), reg_rbtree);
 	}

+	/* This operation is always safe because the function never frees
+	 * the region. If the region has been merged to both front and back,
+	 * then it's the previous region that is supposed to be freed.
+	 */
+	orig_reg->start_pfn = 0;
+
 out:
 	return;
 }
@@ -490,6 +493,7 @@ KBASE_EXPORT_TEST_API(kbase_remove_va_region);
 * kbase_insert_va_region_nolock - Insert a VA region to the list,
 * replacing the existing one.
 *
+ * @kbdev: The kbase device
 * @new_reg: The new region to insert
 * @at_reg: The region to replace
 * @start_pfn: The Page Frame Number to insert at
@@ -497,8 +501,10 @@ KBASE_EXPORT_TEST_API(kbase_remove_va_region);
 *
 * Return: 0 on success, error code otherwise.
 */
-static int kbase_insert_va_region_nolock(struct kbase_va_region *new_reg,
-		struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages)
+static int kbase_insert_va_region_nolock(struct kbase_device *kbdev,
+					 struct kbase_va_region *new_reg,
+					 struct kbase_va_region *at_reg, u64 start_pfn,
+					 size_t nr_pages)
 {
 	struct rb_root *reg_rbtree = NULL;
 	int err = 0;
@@ -542,10 +548,9 @@ static int kbase_insert_va_region_nolock(struct kbase_va_region *new_reg,
 	else {
 		struct kbase_va_region *new_front_reg;

-		new_front_reg = kbase_alloc_free_region(reg_rbtree,
-				at_reg->start_pfn,
-				start_pfn - at_reg->start_pfn,
-				at_reg->flags & KBASE_REG_ZONE_MASK);
+		new_front_reg = kbase_alloc_free_region(kbdev, reg_rbtree, at_reg->start_pfn,
+							start_pfn - at_reg->start_pfn,
+							at_reg->flags & KBASE_REG_ZONE_MASK);

 		if (new_front_reg) {
 			at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages;
@@ -682,8 +687,7 @@ int kbase_add_va_region_rbtree(struct kbase_device *kbdev,
 			goto exit;
 		}

-		err = kbase_insert_va_region_nolock(reg, tmp, gpu_pfn,
-				nr_pages);
+		err = kbase_insert_va_region_nolock(kbdev, reg, tmp, gpu_pfn, nr_pages);
 		if (err) {
 			dev_warn(dev, "Failed to insert va region");
 			err = -ENOMEM;
@@ -708,8 +712,7 @@ int kbase_add_va_region_rbtree(struct kbase_device *kbdev,
 				nr_pages, align_offset, align_mask,
 				&start_pfn);
 		if (tmp) {
-			err = kbase_insert_va_region_nolock(reg, tmp,
-							start_pfn, nr_pages);
+			err = kbase_insert_va_region_nolock(kbdev, reg, tmp, start_pfn, nr_pages);
 			if (unlikely(err)) {
 				dev_warn(dev, "Failed to insert region: 0x%08llx start_pfn, %zu nr_pages",
 					start_pfn, nr_pages);
@@ -847,7 +850,7 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
 		if (rbnode) {
 			rb_erase(rbnode, rbtree);
 			reg = rb_entry(rbnode, struct kbase_va_region, rblink);
-			WARN_ON(reg->va_refcnt != 1);
+			WARN_ON(kbase_refcount_read(&reg->va_refcnt) != 1);
 			if (kbase_page_migration_enabled)
 				kbase_gpu_munmap(kbase_reg_flags_to_kctx(reg), reg);
 			/* Reset the start_pfn - as the rbtree is being
@@ -933,9 +936,8 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
 #endif

 	/* all have SAME_VA */
-	same_va_reg =
-		kbase_alloc_free_region(&kctx->reg_rbtree_same, same_va_base,
-					same_va_pages, KBASE_REG_ZONE_SAME_VA);
+	same_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, same_va_base,
+					      same_va_pages, KBASE_REG_ZONE_SAME_VA);

 	if (!same_va_reg) {
 		err = -ENOMEM;
@@ -944,10 +946,7 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
 	kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_SAME_VA, same_va_base,
 				same_va_pages);

-#if IS_ENABLED(CONFIG_64BIT)
-	/* 32-bit clients have custom VA zones */
-	if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
-#endif
+	if (kbase_ctx_compat_mode(kctx)) {
 		if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) {
 			err = -EINVAL;
 			goto fail_free_same_va;
@@ -959,10 +958,9 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
 		if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit)
 			custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE;

-		custom_va_reg = kbase_alloc_free_region(
-				&kctx->reg_rbtree_custom,
-				KBASE_REG_ZONE_CUSTOM_VA_BASE,
-				custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
+		custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom,
+							KBASE_REG_ZONE_CUSTOM_VA_BASE,
+							custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);

 		if (!custom_va_reg) {
 			err = -ENOMEM;
@@ -971,11 +969,9 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
 		kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA,
 					KBASE_REG_ZONE_CUSTOM_VA_BASE,
 					custom_va_size);
-#if IS_ENABLED(CONFIG_64BIT)
 	} else {
 		custom_va_size = 0;
 	}
-#endif

 #if MALI_USE_CSF
 	/* The position of EXEC_VA depends on whether the client is 32-bit or 64-bit. */
@@ -986,17 +982,15 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
 	 */
 	fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_64;

-#if IS_ENABLED(CONFIG_64BIT)
-	if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+	if (kbase_ctx_compat_mode(kctx)) {
 		exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_32;
 		fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_32;
 	}
-#endif

 	kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_VA, exec_va_base,
 				KBASE_REG_ZONE_EXEC_VA_SIZE);

-	exec_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_exec, exec_va_base,
+	exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_base,
 					      KBASE_REG_ZONE_EXEC_VA_SIZE, KBASE_REG_ZONE_EXEC_VA);

 	if (!exec_va_reg) {
@@ -1010,8 +1004,8 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
 				KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE);

 	exec_fixed_va_reg =
-		kbase_alloc_free_region(&kctx->reg_rbtree_exec_fixed, exec_fixed_va_base,
-					KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE,
+		kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec_fixed,
+					exec_fixed_va_base, KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE,
 					KBASE_REG_ZONE_EXEC_FIXED_VA);

 	if (!exec_fixed_va_reg) {
@@ -1024,7 +1018,7 @@ int kbase_region_tracker_init(struct kbase_context *kctx)

 	kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_FIXED_VA, fixed_va_base, fixed_va_pages);

-	fixed_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_fixed, fixed_va_base,
+	fixed_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_fixed, fixed_va_base,
 					       fixed_va_pages, KBASE_REG_ZONE_FIXED_VA);

 	kctx->gpu_va_end = fixed_va_end;
@@ -1163,7 +1157,6 @@ static bool kbase_region_tracker_has_allocs(struct kbase_context *kctx)
 	return false;
 }

-#if IS_ENABLED(CONFIG_64BIT)
 static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
 		u64 jit_va_pages)
 {
@@ -1212,9 +1205,8 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
 	 * Create a custom VA zone at the end of the VA for allocations which
 	 * JIT can use so it doesn't have to allocate VA from the kernel.
 	 */
-	custom_va_reg =
-		kbase_alloc_free_region(&kctx->reg_rbtree_custom, jit_va_start,
-					jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+	custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, jit_va_start,
+						jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA);

 	/*
 	 * The context will be destroyed if we fail here so no point
@@ -1231,7 +1223,6 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
 	kbase_region_tracker_insert(custom_va_reg);
 	return 0;
 }
-#endif

 int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
 		int max_allocations, int trim_level, int group_id,
@@ -1272,10 +1263,8 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
 		goto exit_unlock;
 	}

-#if IS_ENABLED(CONFIG_64BIT)
-	if (!kbase_ctx_flag(kctx, KCTX_COMPAT))
+	if (!kbase_ctx_compat_mode(kctx))
 		err = kbase_region_tracker_init_jit_64(kctx, jit_va_pages);
-#endif
 	/*
 	 * Nothing to do for 32-bit clients, JIT uses the existing
 	 * custom VA zone.
@@ -1351,17 +1340,14 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages
 		goto exit_unlock;
 	}

-#if IS_ENABLED(CONFIG_64BIT)
-	if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
-#endif
+	if (kbase_ctx_compat_mode(kctx)) {
 		/* 32-bit client: take from CUSTOM_VA zone */
 		target_zone_bits = KBASE_REG_ZONE_CUSTOM_VA;
-#if IS_ENABLED(CONFIG_64BIT)
 	} else {
 		/* 64-bit client: take from SAME_VA zone */
 		target_zone_bits = KBASE_REG_ZONE_SAME_VA;
 	}
-#endif
+
 	target_zone = kbase_ctx_reg_zone_get(kctx, target_zone_bits);
 	target_zone_base_addr = target_zone->base_pfn << PAGE_SHIFT;

@@ -1389,10 +1375,8 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages
 	/* Taken from the end of the target zone */
 	exec_va_start = kbase_reg_zone_end_pfn(target_zone) - exec_va_pages;

-	exec_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_exec,
-			exec_va_start,
-			exec_va_pages,
-			KBASE_REG_ZONE_EXEC_VA);
+	exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_start,
+					      exec_va_pages, KBASE_REG_ZONE_EXEC_VA);
 	if (!exec_va_reg) {
 		err = -ENOMEM;
 		goto exit_unlock;
@@ -1435,10 +1419,9 @@ int kbase_mcu_shared_interface_region_tracker_init(struct kbase_device *kbdev)

 	kbdev->csf.shared_reg_rbtree = RB_ROOT;

-	shared_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree,
-					shared_reg_start_pfn,
-					shared_reg_size,
-					KBASE_REG_ZONE_MCU_SHARED);
+	shared_reg =
+		kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, shared_reg_start_pfn,
+					shared_reg_size, KBASE_REG_ZONE_MCU_SHARED);
 	if (!shared_reg)
 		return -ENOMEM;

@@ -1447,10 +1430,30 @@ int kbase_mcu_shared_interface_region_tracker_init(struct kbase_device *kbdev)
 }
 #endif

+static void kbasep_mem_page_size_init(struct kbase_device *kbdev)
+{
+#if IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE)
+#if IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC)
+	kbdev->pagesize_2mb = true;
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC) != 1) {
+		dev_warn(
+			kbdev->dev,
+			"2MB page is enabled by force while current GPU-HW doesn't meet the requirement to do so.\n");
+	}
+#else /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) */
+	kbdev->pagesize_2mb = false;
+#endif /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) */
+#else /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) */
+	/* Set it to the default based on which GPU is present */
+	kbdev->pagesize_2mb = kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC);
+#endif /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) */
+}
+
 int kbase_mem_init(struct kbase_device *kbdev)
 {
 	int err = 0;
 	struct kbasep_mem_device *memdev;
+	char va_region_slab_name[VA_REGION_SLAB_NAME_SIZE];
 #if IS_ENABLED(CONFIG_OF)
 	struct device_node *mgm_node = NULL;
 #endif
@@ -1459,6 +1462,19 @@ int kbase_mem_init(struct kbase_device *kbdev)

 	memdev = &kbdev->memdev;

+	kbasep_mem_page_size_init(kbdev);
+
+	scnprintf(va_region_slab_name, VA_REGION_SLAB_NAME_SIZE, VA_REGION_SLAB_NAME_PREFIX "%s",
+		  kbdev->devname);
+
+	/* Initialize slab cache for kbase_va_regions */
+	kbdev->va_region_slab =
+		kmem_cache_create(va_region_slab_name, sizeof(struct kbase_va_region), 0, 0, NULL);
+	if (kbdev->va_region_slab == NULL) {
+		dev_err(kbdev->dev, "Failed to create va_region_slab\n");
+		return -ENOMEM;
+	}
+
 	kbase_mem_migrate_init(kbdev);
 	kbase_mem_pool_group_config_set_max_size(&kbdev->mem_pool_defaults,
 		KBASE_MEM_POOL_MAX_SIZE_KCTX);
@@ -1550,6 +1566,9 @@ void kbase_mem_term(struct kbase_device *kbdev)

 	kbase_mem_migrate_term(kbdev);

+	kmem_cache_destroy(kbdev->va_region_slab);
+	kbdev->va_region_slab = NULL;
+
 	WARN_ON(kbdev->total_gpu_pages);
 	WARN_ON(!RB_EMPTY_ROOT(&kbdev->process_root));
 	WARN_ON(!RB_EMPTY_ROOT(&kbdev->dma_buf_root));
@@ -1563,6 +1582,7 @@ KBASE_EXPORT_TEST_API(kbase_mem_term);
 /**
 * kbase_alloc_free_region - Allocate a free region object.
 *
+ * @kbdev:     kbase device
 * @rbtree:    Backlink to the red-black tree of memory regions.
 * @start_pfn: The Page Frame Number in GPU virtual address space.
 * @nr_pages:  The size of the region in pages.
@@ -1575,8 +1595,8 @@ KBASE_EXPORT_TEST_API(kbase_mem_term);
 *
 * Return: pointer to the allocated region object on success, NULL otherwise.
 */
-struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree,
-		u64 start_pfn, size_t nr_pages, int zone)
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, struct rb_root *rbtree,
+						u64 start_pfn, size_t nr_pages, int zone)
 {
 	struct kbase_va_region *new_reg;

@@ -1588,13 +1608,13 @@ struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree,
 	/* 64-bit address range is the max */
 	KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE));

-	new_reg = kzalloc(sizeof(*new_reg), GFP_KERNEL);
+	new_reg = kmem_cache_zalloc(kbdev->va_region_slab, GFP_KERNEL);

 	if (!new_reg)
 		return NULL;

-	new_reg->va_refcnt = 1;
-	new_reg->no_user_free_refcnt = 0;
+	kbase_refcount_set(&new_reg->va_refcnt, 1);
+	atomic_set(&new_reg->no_user_free_count, 0);
 	new_reg->cpu_alloc = NULL; /* no alloc bound yet */
 	new_reg->gpu_alloc = NULL; /* no alloc bound yet */
 	new_reg->rbtree = rbtree;
@@ -1726,7 +1746,6 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
 	unsigned long gwt_mask = ~0;
 	int group_id;
 	struct kbase_mem_phy_alloc *alloc;
-	bool ignore_page_migration = false;

 #ifdef CONFIG_MALI_CINSTR_GWT
 	if (kctx->gwt_enabled)
@@ -1755,41 +1774,46 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
 		KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased);
 		for (i = 0; i < alloc->imported.alias.nents; i++) {
 			if (alloc->imported.alias.aliased[i].alloc) {
-				err = kbase_mmu_insert_pages(
+				err = kbase_mmu_insert_aliased_pages(
 					kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride),
 					alloc->imported.alias.aliased[i].alloc->pages +
 						alloc->imported.alias.aliased[i].offset,
 					alloc->imported.alias.aliased[i].length,
 					reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info,
-					NULL, ignore_page_migration);
+					NULL);
 				if (err)
-					goto bad_insert;
+					goto bad_aliased_insert;

 				/* Note: mapping count is tracked at alias
 				 * creation time
 				 */
 			} else {
-				err = kbase_mmu_insert_single_page(
-					kctx, reg->start_pfn + i * stride,
-					kctx->aliasing_sink_page,
+				err = kbase_mmu_insert_single_aliased_page(
+					kctx, reg->start_pfn + i * stride, kctx->aliasing_sink_page,
 					alloc->imported.alias.aliased[i].length,
-					(reg->flags & mask & gwt_mask) | attr,
-					group_id, mmu_sync_info);
+					(reg->flags & mask & gwt_mask) | attr, group_id,
+					mmu_sync_info);

 				if (err)
-					goto bad_insert;
+					goto bad_aliased_insert;
 			}
 		}
 	} else {
 		if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM ||
-		    reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF)
-			ignore_page_migration = true;
+		    reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) {
+
+			err = kbase_mmu_insert_imported_pages(
+				kctx->kbdev, &kctx->mmu, reg->start_pfn,
+				kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg),
+				reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info, reg);
+		} else {
+			err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+						     kbase_get_gpu_phy_pages(reg),
+						     kbase_reg_current_backed_size(reg),
+						     reg->flags & gwt_mask, kctx->as_nr, group_id,
+						     mmu_sync_info, reg, true);
+		}

-		err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
-					     kbase_get_gpu_phy_pages(reg),
-					     kbase_reg_current_backed_size(reg),
-					     reg->flags & gwt_mask, kctx->as_nr, group_id,
-					     mmu_sync_info, reg, ignore_page_migration);
 		if (err)
 			goto bad_insert;
 		kbase_mem_phy_alloc_gpu_mapped(alloc);
@@ -1799,9 +1823,9 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
 	    !WARN_ON(reg->nr_pages < reg->gpu_alloc->nents) &&
 	    reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM &&
 	    reg->gpu_alloc->imported.umm.current_mapping_usage_count) {
-		/* For padded imported dma-buf memory, map the dummy aliasing
-		 * page from the end of the dma-buf pages, to the end of the
-		 * region using a read only mapping.
+		/* For padded imported dma-buf or user-buf memory, map the dummy
+		 * aliasing page from the end of the imported pages, to the end of
+		 * the region using a read only mapping.
 		 *
 		 * Only map when it's imported dma-buf memory that is currently
 		 * mapped.
@@ -1809,22 +1833,32 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
 		 * Assume reg->gpu_alloc->nents is the number of actual pages
 		 * in the dma-buf memory.
 		 */
-		err = kbase_mmu_insert_single_page(
-			kctx, reg->start_pfn + reg->gpu_alloc->nents,
-			kctx->aliasing_sink_page,
+		err = kbase_mmu_insert_single_imported_page(
+			kctx, reg->start_pfn + reg->gpu_alloc->nents, kctx->aliasing_sink_page,
 			reg->nr_pages - reg->gpu_alloc->nents,
-			(reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR,
-			KBASE_MEM_GROUP_SINK, mmu_sync_info);
+			(reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR, KBASE_MEM_GROUP_SINK,
+			mmu_sync_info);
 		if (err)
 			goto bad_insert;
 	}

 	return err;

-bad_insert:
-	kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages,
-				 reg->nr_pages, kctx->as_nr, ignore_page_migration);
+bad_aliased_insert:
+	while (i-- > 0) {
+		struct tagged_addr *phys_alloc = NULL;
+		u64 const stride = alloc->imported.alias.stride;

+		if (alloc->imported.alias.aliased[i].alloc != NULL)
+			phys_alloc = alloc->imported.alias.aliased[i].alloc->pages +
+				     alloc->imported.alias.aliased[i].offset;
+
+		kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride),
+					 phys_alloc, alloc->imported.alias.aliased[i].length,
+					 alloc->imported.alias.aliased[i].length, kctx->as_nr,
+					 false);
+	}
+bad_insert:
 	kbase_remove_va_region(kctx->kbdev, reg);

 	return err;
@@ -1870,26 +1904,49 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
 					kctx->kbdev, &kctx->mmu,
 					reg->start_pfn + (i * alloc->imported.alias.stride),
 					phys_alloc, alloc->imported.alias.aliased[i].length,
-					kctx->as_nr, false);
+					alloc->imported.alias.aliased[i].length, kctx->as_nr,
+					false);

 				if (WARN_ON_ONCE(err_loop))
 					err = err_loop;
 			}
 		}
 		break;
-	case KBASE_MEM_TYPE_IMPORTED_UMM:
-		err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
-					       alloc->pages, reg->nr_pages, kctx->as_nr, true);
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+			size_t nr_phys_pages = reg->nr_pages;
+			size_t nr_virt_pages = reg->nr_pages;
+			/* If the region has import padding and falls under the threshold for
+			 * issuing a partial GPU cache flush, we want to reduce the number of
+			 * physical pages that get flushed.
+
+			 * This is symmetric with case of mapping the memory, which first maps
+			 * each imported physical page to a separate virtual page, and then
+			 * maps the single aliasing sink page to each of the virtual padding
+			 * pages.
+			 */
+			if (reg->flags & KBASE_REG_IMPORT_PAD)
+				nr_phys_pages = alloc->nents + 1;
+
+			err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+						       alloc->pages, nr_phys_pages, nr_virt_pages,
+						       kctx->as_nr, true);
+		}
 		break;
-	case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
-		err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
-					       alloc->pages, kbase_reg_current_backed_size(reg),
-					       kctx->as_nr, true);
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
+			size_t nr_reg_pages = kbase_reg_current_backed_size(reg);
+
+			err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+						       alloc->pages, nr_reg_pages, nr_reg_pages,
+						       kctx->as_nr, true);
+		}
 		break;
-	default:
-		err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
-					       alloc->pages, kbase_reg_current_backed_size(reg),
-					       kctx->as_nr, false);
+	default: {
+			size_t nr_reg_pages = kbase_reg_current_backed_size(reg);
+
+			err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+						       alloc->pages, nr_reg_pages, nr_reg_pages,
+						       kctx->as_nr, false);
+		}
 		break;
 	}

@@ -2214,7 +2271,7 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re
 		__func__, (void *)reg, (void *)kctx);
 	lockdep_assert_held(&kctx->reg_lock);

-	if (kbase_va_region_is_no_user_free(kctx, reg)) {
+	if (kbase_va_region_is_no_user_free(reg)) {
 		dev_warn(kctx->kbdev->dev, "Attempt to free GPU memory whose freeing by user space is forbidden!\n");
 		return -EINVAL;
 	}
@@ -2435,7 +2492,7 @@ int kbase_update_region_flags(struct kbase_context *kctx,

 	if (flags & BASEP_MEM_NO_USER_FREE) {
 		kbase_gpu_vm_lock(kctx);
-		kbase_va_region_no_user_free_get(kctx, reg);
+		kbase_va_region_no_user_free_inc(reg);
 		kbase_gpu_vm_unlock(kctx);
 	}

@@ -2489,15 +2546,14 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,

 	tp = alloc->pages + alloc->nents;

-#ifdef CONFIG_MALI_2MB_ALLOC
 	/* Check if we have enough pages requested so we can allocate a large
 	 * page (512 * 4KB = 2MB )
 	 */
-	if (nr_left >= (SZ_2M / SZ_4K)) {
+	if (kbdev->pagesize_2mb && nr_left >= (SZ_2M / SZ_4K)) {
 		int nr_lp = nr_left / (SZ_2M / SZ_4K);

 		res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.large[alloc->group_id],
-						 nr_lp * (SZ_2M / SZ_4K), tp, true);
+						 nr_lp * (SZ_2M / SZ_4K), tp, true, kctx->task);

 		if (res > 0) {
 			nr_left -= res;
@@ -2551,7 +2607,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,

 				err = kbase_mem_pool_grow(
 					&kctx->mem_pools.large[alloc->group_id],
-					1);
+					1, kctx->task);
 				if (err)
 					break;
 			} while (1);
@@ -2592,12 +2648,11 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
 			}
 		}
 	}
-no_new_partial:
-#endif

+no_new_partial:
 	if (nr_left) {
 		res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[alloc->group_id], nr_left,
-						 tp, false);
+						 tp, false, kctx->task);
 		if (res <= 0)
 			goto alloc_failed;
 	}
@@ -2656,18 +2711,17 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(

 	lockdep_assert_held(&pool->pool_lock);

-#if !defined(CONFIG_MALI_2MB_ALLOC)
-	WARN_ON(pool->order);
-#endif
+	kctx = alloc->imported.native.kctx;
+	kbdev = kctx->kbdev;
+
+	if (!kbdev->pagesize_2mb)
+		WARN_ON(pool->order);

 	if (alloc->reg) {
 		if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents)
 			goto invalid_request;
 	}

-	kctx = alloc->imported.native.kctx;
-	kbdev = kctx->kbdev;
-
 	lockdep_assert_held(&kctx->mem_partials_lock);

 	if (nr_pages_requested == 0)
@@ -2686,8 +2740,7 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
 	tp = alloc->pages + alloc->nents;
 	new_pages = tp;

-#ifdef CONFIG_MALI_2MB_ALLOC
-	if (pool->order) {
+	if (kbdev->pagesize_2mb && pool->order) {
 		int nr_lp = nr_left / (SZ_2M / SZ_4K);

 		res = kbase_mem_pool_alloc_pages_locked(pool,
@@ -2771,15 +2824,12 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
 		if (nr_left)
 			goto alloc_failed;
 	} else {
-#endif
 		res = kbase_mem_pool_alloc_pages_locked(pool,
 						 nr_left,
 						 tp);
 		if (res <= 0)
 			goto alloc_failed;
-#ifdef CONFIG_MALI_2MB_ALLOC
 	}
-#endif

 	KBASE_TLSTREAM_AUX_PAGESALLOC(
 			kbdev,
@@ -2800,8 +2850,7 @@ alloc_failed:

 		struct tagged_addr *start_free = alloc->pages + alloc->nents;

-#ifdef CONFIG_MALI_2MB_ALLOC
-		if (pool->order) {
+		if (kbdev->pagesize_2mb && pool->order) {
 			while (nr_pages_to_free) {
 				if (is_huge_head(*start_free)) {
 					kbase_mem_pool_free_pages_locked(
@@ -2819,15 +2868,12 @@ alloc_failed:
 				}
 			}
 		} else {
-#endif
 			kbase_mem_pool_free_pages_locked(pool,
 					nr_pages_to_free,
 					start_free,
 					false, /* not dirty */
 					true); /* return to pool */
-#ifdef CONFIG_MALI_2MB_ALLOC
 		}
-#endif
 	}

 	kbase_process_page_usage_dec(kctx, nr_pages_requested);
@@ -3816,8 +3862,8 @@ static void kbase_jit_destroy_worker(struct work_struct *work)
 		 * by implementing "free on putting the last reference",
 		 * but only for JIT regions.
 		 */
-		WARN_ON(reg->no_user_free_refcnt > 1);
-		kbase_va_region_no_user_free_put(kctx, reg);
+		WARN_ON(atomic_read(&reg->no_user_free_count) > 1);
+		kbase_va_region_no_user_free_dec(reg);
 		kbase_mem_free_region(kctx, reg);
 		kbase_gpu_vm_unlock(kctx);
 	} while (1);
@@ -4078,18 +4124,14 @@ static int kbase_jit_grow(struct kbase_context *kctx,
 	delta = info->commit_pages - reg->gpu_alloc->nents;
 	pages_required = delta;

-#ifdef CONFIG_MALI_2MB_ALLOC
-	if (pages_required >= (SZ_2M / SZ_4K)) {
+	if (kctx->kbdev->pagesize_2mb && pages_required >= (SZ_2M / SZ_4K)) {
 		pool = &kctx->mem_pools.large[kctx->jit_group_id];
 		/* Round up to number of 2 MB pages required */
 		pages_required += ((SZ_2M / SZ_4K) - 1);
 		pages_required /= (SZ_2M / SZ_4K);
 	} else {
-#endif
 		pool = &kctx->mem_pools.small[kctx->jit_group_id];
-#ifdef CONFIG_MALI_2MB_ALLOC
 	}
-#endif

 	if (reg->cpu_alloc != reg->gpu_alloc)
 		pages_required *= 2;
@@ -4110,7 +4152,7 @@ static int kbase_jit_grow(struct kbase_context *kctx,
 		spin_unlock(&kctx->mem_partials_lock);

 		kbase_gpu_vm_unlock(kctx);
-		ret = kbase_mem_pool_grow(pool, pool_delta);
+		ret = kbase_mem_pool_grow(pool, pool_delta, kctx->task);
 		kbase_gpu_vm_lock(kctx);

 		if (ret)
@@ -4374,14 +4416,14 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 	if (!jit_allow_allocate(kctx, info, ignore_pressure_limit))
 		return NULL;

-#ifdef CONFIG_MALI_2MB_ALLOC
-	/* Preallocate memory for the sub-allocation structs */
-	for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
-		prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
-		if (!prealloc_sas[i])
-			goto end;
+	if (kctx->kbdev->pagesize_2mb) {
+		/* Preallocate memory for the sub-allocation structs */
+		for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
+			prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
+			if (!prealloc_sas[i])
+				goto end;
+		}
 	}
-#endif

 	kbase_gpu_vm_lock(kctx);
 	mutex_lock(&kctx->jit_evict_lock);
@@ -4561,7 +4603,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,

 	/* Similarly to tiler heap init, there is a short window of time
 	 * where the (either recycled or newly allocated, in our case) region has
-	 * "no user free" refcount incremented but is still missing the DONT_NEED flag, and
+	 * "no user free" count incremented but is still missing the DONT_NEED flag, and
 	 * doesn't yet have the ACTIVE_JIT_ALLOC flag either. Temporarily leaking the
 	 * allocation is the least bad option that doesn't lead to a security issue down the
 	 * line (it will eventually be cleaned up during context termination).
@@ -4570,9 +4612,9 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 	 * flags.
 	 */
 	kbase_gpu_vm_lock(kctx);
-	if (unlikely(reg->no_user_free_refcnt > 1)) {
+	if (unlikely(atomic_read(&reg->no_user_free_count) > 1)) {
 		kbase_gpu_vm_unlock(kctx);
-		dev_err(kctx->kbdev->dev, "JIT region has no_user_free_refcnt > 1!\n");
+		dev_err(kctx->kbdev->dev, "JIT region has no_user_free_count > 1!\n");

 		mutex_lock(&kctx->jit_evict_lock);
 		list_move(&reg->jit_node, &kctx->jit_pool_head);
@@ -4728,8 +4770,8 @@ bool kbase_jit_evict(struct kbase_context *kctx)
 		 * by implementing "free on putting the last reference",
 		 * but only for JIT regions.
 		 */
-		WARN_ON(reg->no_user_free_refcnt > 1);
-		kbase_va_region_no_user_free_put(kctx, reg);
+		WARN_ON(atomic_read(&reg->no_user_free_count) > 1);
+		kbase_va_region_no_user_free_dec(reg);
 		kbase_mem_free_region(kctx, reg);
 	}

@@ -4757,8 +4799,8 @@ void kbase_jit_term(struct kbase_context *kctx)
 		 * by implementing "free on putting the last reference",
 		 * but only for JIT regions.
 		 */
-		WARN_ON(walker->no_user_free_refcnt > 1);
-		kbase_va_region_no_user_free_put(kctx, walker);
+		WARN_ON(atomic_read(&walker->no_user_free_count) > 1);
+		kbase_va_region_no_user_free_dec(walker);
 		kbase_mem_free_region(kctx, walker);
 		mutex_lock(&kctx->jit_evict_lock);
 	}
@@ -4776,8 +4818,8 @@ void kbase_jit_term(struct kbase_context *kctx)
 		 * by implementing "free on putting the last reference",
 		 * but only for JIT regions.
 		 */
-		WARN_ON(walker->no_user_free_refcnt > 1);
-		kbase_va_region_no_user_free_put(kctx, walker);
+		WARN_ON(atomic_read(&walker->no_user_free_count) > 1);
+		kbase_va_region_no_user_free_dec(walker);
 		kbase_mem_free_region(kctx, walker);
 		mutex_lock(&kctx->jit_evict_lock);
 	}
@@ -5023,9 +5065,13 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
 	 * region, otherwise the initial content of memory would be wrong.
 	 */
 	for (i = 0; i < pinned_pages; i++) {
-		dma_addr_t dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE,
-							 DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
-
+		dma_addr_t dma_addr;
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+		dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+#else
+		dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL,
+					      DMA_ATTR_SKIP_CPU_SYNC);
+#endif
 		err = dma_mapping_error(dev, dma_addr);
 		if (err)
 			goto unwind;
@@ -5041,9 +5087,10 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
 		gwt_mask = ~KBASE_REG_GPU_WR;
 #endif

-	err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa,
-				     kbase_reg_current_backed_size(reg), reg->flags & gwt_mask,
-				     kctx->as_nr, alloc->group_id, mmu_sync_info, NULL, true);
+	err = kbase_mmu_insert_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa,
+					      kbase_reg_current_backed_size(reg),
+					      reg->flags & gwt_mask, kctx->as_nr, alloc->group_id,
+					      mmu_sync_info, NULL);
 	if (err == 0)
 		return 0;

@@ -5064,8 +5111,12 @@ unwind:
 		dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];

 		dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+		dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+#else
 		dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
 				     DMA_ATTR_SKIP_CPU_SYNC);
+#endif
 	}

 	/* The user buffer could already have been previously pinned before
@@ -5182,9 +5233,13 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
 		}

 		/* Notice: use the original DMA address to unmap the whole memory page. */
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+		dma_unmap_page(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], PAGE_SIZE,
+			       DMA_BIDIRECTIONAL);
+#else
 		dma_unmap_page_attrs(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i],
 				     PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
-
+#endif
 		if (writeable)
 			set_page_dirty_lock(pages[i]);
 #if !MALI_USE_CSF
@@ -5308,6 +5363,7 @@ void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_r
 				kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
 							 alloc->pages,
 							 kbase_reg_current_backed_size(reg),
+							 kbase_reg_current_backed_size(reg),
 							 kctx->as_nr, true);
 			}

--- a/drivers/gpu/arm/bifrost/mali_kbase_mem.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem.h
@@ -38,6 +38,7 @@
 /* Required for kbase_mem_evictable_unmake */
 #include "mali_kbase_mem_linux.h"
 #include "mali_kbase_mem_migrate.h"
+#include "mali_kbase_refcount_defs.h"

 static inline void kbase_process_page_usage_inc(struct kbase_context *kctx,
 		int pages);
@@ -419,8 +420,8 @@ static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_m
 * @jit_usage_id: The last just-in-time memory usage ID for this region.
 * @jit_bin_id:   The just-in-time memory bin this region came from.
 * @va_refcnt:    Number of users of this region. Protected by reg_lock.
- * @no_user_free_refcnt:    Number of users that want to prevent the region from
- *                          being freed by userspace.
+ * @no_user_free_count:    Number of contexts that want to prevent the region
+ *                         from being freed by userspace.
 * @heap_info_gpu_addr: Pointer to an object in GPU memory defining an end of
 *                      an allocated region
 *                      The object can be one of:
@@ -681,8 +682,8 @@ struct kbase_va_region {
 	size_t used_pages;
 #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */

-	int    va_refcnt;
-	int no_user_free_refcnt;
+	kbase_refcount_t va_refcnt;
+	atomic_t no_user_free_count;
 };

 /**
@@ -759,15 +760,12 @@ static inline void kbase_region_refcnt_free(struct kbase_device *kbdev,
 static inline struct kbase_va_region *kbase_va_region_alloc_get(
 		struct kbase_context *kctx, struct kbase_va_region *region)
 {
-	lockdep_assert_held(&kctx->reg_lock);
+	WARN_ON(!kbase_refcount_read(&region->va_refcnt));
+	WARN_ON(kbase_refcount_read(&region->va_refcnt) == INT_MAX);

-	WARN_ON(!region->va_refcnt);
-	WARN_ON(region->va_refcnt == INT_MAX);
-
-	/* non-atomic as kctx->reg_lock is held */
 	dev_dbg(kctx->kbdev->dev, "va_refcnt %d before get %pK\n",
-		region->va_refcnt, (void *)region);
-	region->va_refcnt++;
+		kbase_refcount_read(&region->va_refcnt), (void *)region);
+	kbase_refcount_inc(&region->va_refcnt);

 	return region;
 }
@@ -775,17 +773,14 @@ static inline struct kbase_va_region *kbase_va_region_alloc_get(
 static inline struct kbase_va_region *kbase_va_region_alloc_put(
 		struct kbase_context *kctx, struct kbase_va_region *region)
 {
-	lockdep_assert_held(&kctx->reg_lock);
-
-	WARN_ON(region->va_refcnt <= 0);
+	WARN_ON(kbase_refcount_read(&region->va_refcnt) <= 0);
 	WARN_ON(region->flags & KBASE_REG_FREE);

-	/* non-atomic as kctx->reg_lock is held */
-	region->va_refcnt--;
-	dev_dbg(kctx->kbdev->dev, "va_refcnt %d after put %pK\n",
-		region->va_refcnt, (void *)region);
-	if (!region->va_refcnt)
+	if (kbase_refcount_dec_and_test(&region->va_refcnt))
 		kbase_region_refcnt_free(kctx->kbdev, region);
+	else
+		dev_dbg(kctx->kbdev->dev, "va_refcnt %d after put %pK\n",
+			kbase_refcount_read(&region->va_refcnt), (void *)region);

 	return NULL;
 }
@@ -799,58 +794,44 @@ static inline struct kbase_va_region *kbase_va_region_alloc_put(
 * Hence, callers cannot rely on this check alone to determine if a region might be shrunk
 * by any part of kbase. Instead they should use kbase_is_region_shrinkable().
 *
- * @kctx: Pointer to kbase context.
 * @region: Pointer to region.
 *
 * Return: true if userspace cannot free the region, false if userspace can free the region.
 */
-static inline bool kbase_va_region_is_no_user_free(struct kbase_context *kctx,
-						   struct kbase_va_region *region)
+static inline bool kbase_va_region_is_no_user_free(struct kbase_va_region *region)
 {
-	lockdep_assert_held(&kctx->reg_lock);
-	return region->no_user_free_refcnt > 0;
+	return atomic_read(&region->no_user_free_count) > 0;
 }

 /**
- * kbase_va_region_no_user_free_get - Increment "no user free" refcount for a region.
+ * kbase_va_region_no_user_free_inc - Increment "no user free" count for a region.
 * Calling this function will prevent the region to be shrunk by parts of kbase that
- * don't own the region (as long as the refcount stays above zero). Refer to
+ * don't own the region (as long as the count stays above zero). Refer to
 * kbase_va_region_is_no_user_free() for more information.
 *
- * @kctx: Pointer to kbase context.
 * @region: Pointer to region (not shrinkable).
 *
 * Return: the pointer to the region passed as argument.
 */
-static inline struct kbase_va_region *
-kbase_va_region_no_user_free_get(struct kbase_context *kctx, struct kbase_va_region *region)
+static inline void kbase_va_region_no_user_free_inc(struct kbase_va_region *region)
 {
-	lockdep_assert_held(&kctx->reg_lock);
-
 	WARN_ON(kbase_is_region_shrinkable(region));
-	WARN_ON(region->no_user_free_refcnt == INT_MAX);
+	WARN_ON(atomic_read(&region->no_user_free_count) == INT_MAX);

 	/* non-atomic as kctx->reg_lock is held */
-	region->no_user_free_refcnt++;
-
-	return region;
+	atomic_inc(&region->no_user_free_count);
 }

 /**
- * kbase_va_region_no_user_free_put - Decrement "no user free" refcount for a region.
+ * kbase_va_region_no_user_free_dec - Decrement "no user free" count for a region.
 *
- * @kctx: Pointer to kbase context.
 * @region: Pointer to region (not shrinkable).
 */
-static inline void kbase_va_region_no_user_free_put(struct kbase_context *kctx,
-						    struct kbase_va_region *region)
+static inline void kbase_va_region_no_user_free_dec(struct kbase_va_region *region)
 {
-	lockdep_assert_held(&kctx->reg_lock);
+	WARN_ON(!kbase_va_region_is_no_user_free(region));

-	WARN_ON(!kbase_va_region_is_no_user_free(kctx, region));
-
-	/* non-atomic as kctx->reg_lock is held */
-	region->no_user_free_refcnt--;
+	atomic_dec(&region->no_user_free_count);
 }

 /* Common functions */
@@ -1148,6 +1129,9 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
 * @pages:    Pointer to array where the physical address of the allocated
 *            pages will be stored.
 * @partial_allowed: If fewer pages allocated is allowed
+ * @page_owner: Pointer to the task that created the Kbase context for which
+ *              the pages are being allocated. It can be NULL if the pages
+ *              won't be associated with any Kbase context.
 *
 * Like kbase_mem_pool_alloc() but optimized for allocating many pages.
 *
@@ -1164,7 +1148,8 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
 * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead.
 */
 int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
-			       struct tagged_addr *pages, bool partial_allowed);
+			       struct tagged_addr *pages, bool partial_allowed,
+			       struct task_struct *page_owner);

 /**
 * kbase_mem_pool_alloc_pages_locked - Allocate pages from memory pool
@@ -1276,13 +1261,17 @@ void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size);
 * kbase_mem_pool_grow - Grow the pool
 * @pool:       Memory pool to grow
 * @nr_to_grow: Number of pages to add to the pool
+ * @page_owner: Pointer to the task that created the Kbase context for which
+ *              the memory pool is being grown. It can be NULL if the pages
+ *              to be allocated won't be associated with any Kbase context.
 *
 * Adds @nr_to_grow pages to the pool. Note that this may cause the pool to
 * become larger than the maximum size specified.
 *
 * Return: 0 on success, -ENOMEM if unable to allocate sufficent pages
 */
-int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow);
+int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow,
+			struct task_struct *page_owner);

 /**
 * kbase_mem_pool_trim - Grow or shrink the pool to a new size
@@ -1398,8 +1387,8 @@ struct kbase_va_region *kbase_region_tracker_find_region_base_address(
 struct kbase_va_region *kbase_find_region_base_address(struct rb_root *rbtree,
 		u64 gpu_addr);

-struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree,
-		u64 start_pfn, size_t nr_pages, int zone);
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, struct rb_root *rbtree,
+						u64 start_pfn, size_t nr_pages, int zone);
 void kbase_free_alloced_region(struct kbase_va_region *reg);
 int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg,
 		u64 addr, size_t nr_pages, size_t align);
@@ -1410,6 +1399,32 @@ int kbase_add_va_region_rbtree(struct kbase_device *kbdev,
 bool kbase_check_alloc_flags(unsigned long flags);
 bool kbase_check_import_flags(unsigned long flags);

+static inline bool kbase_import_size_is_valid(struct kbase_device *kbdev, u64 va_pages)
+{
+	if (va_pages > KBASE_MEM_ALLOC_MAX_SIZE) {
+		dev_dbg(
+			kbdev->dev,
+			"Import attempted with va_pages==%lld larger than KBASE_MEM_ALLOC_MAX_SIZE!",
+			(unsigned long long)va_pages);
+		return false;
+	}
+
+	return true;
+}
+
+static inline bool kbase_alias_size_is_valid(struct kbase_device *kbdev, u64 va_pages)
+{
+	if (va_pages > KBASE_MEM_ALLOC_MAX_SIZE) {
+		dev_dbg(
+			kbdev->dev,
+			"Alias attempted with va_pages==%lld larger than KBASE_MEM_ALLOC_MAX_SIZE!",
+			(unsigned long long)va_pages);
+		return false;
+	}
+
+	return true;
+}
+
 /**
 * kbase_check_alloc_sizes - check user space sizes parameters for an
 *                           allocation
@@ -1737,7 +1752,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
 *
 * @prealloc_sa:        Information about the partial allocation if the amount of memory requested
 *                      is not a multiple of 2MB. One instance of struct kbase_sub_alloc must be
- *                      allocated by the caller iff CONFIG_MALI_2MB_ALLOC is enabled.
+ *                      allocated by the caller if kbdev->pagesize_2mb is enabled.
 *
 * Allocates @nr_pages_requested and updates the alloc object. This function does not allocate new
 * pages from the kernel, and therefore will never trigger the OoM killer. Therefore, it can be
@@ -1765,7 +1780,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
 * This ensures that the pool can be grown to the required size and that the allocation can
 * complete without another thread using the newly grown pages.
 *
- * If CONFIG_MALI_2MB_ALLOC is defined and the allocation is >= 2MB, then @pool must be one of the
+ * If kbdev->pagesize_2mb is enabled and the allocation is >= 2MB, then @pool must be one of the
 * pools from alloc->imported.native.kctx->mem_pools.large[]. Otherwise it must be one of the
 * mempools from alloc->imported.native.kctx->mem_pools.small[].
 *
@@ -2494,8 +2509,7 @@ kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits)
 * kbase_mem_allow_alloc - Check if allocation of GPU memory is allowed
 * @kctx: Pointer to kbase context
 *
- * Don't allow the allocation of GPU memory until user space has set up the
- * tracking page (which sets kctx->process_mm) or if the ioctl has been issued
+ * Don't allow the allocation of GPU memory if the ioctl has been issued
 * from the forked child process using the mali device file fd inherited from
 * the parent process.
 *
@@ -2503,13 +2517,23 @@ kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits)
 */
 static inline bool kbase_mem_allow_alloc(struct kbase_context *kctx)
 {
-	bool allow_alloc = true;
+	return (kctx->process_mm == current->mm);
+}

-	rcu_read_lock();
-	allow_alloc = (rcu_dereference(kctx->process_mm) == current->mm);
-	rcu_read_unlock();
-
-	return allow_alloc;
+/**
+ * kbase_mem_mmgrab - Wrapper function to take reference on mm_struct of current process
+ */
+static inline void kbase_mem_mmgrab(void)
+{
+	/* This merely takes a reference on the memory descriptor structure
+	 * i.e. mm_struct of current process and not on its address space and
+	 * so won't block the freeing of address space on process exit.
+	 */
+#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE
+	atomic_inc(&current->mm->mm_count);
+#else
+	mmgrab(current->mm);
+#endif
 }

 /**
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c
@@ -37,7 +37,7 @@
 #include <linux/memory_group_manager.h>
 #include <linux/math64.h>
 #include <linux/migrate.h>
-
+#include <linux/version.h>
 #include <mali_kbase.h>
 #include <mali_kbase_mem_linux.h>
 #include <tl/mali_kbase_tracepoints.h>
@@ -385,8 +385,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
 		zone = KBASE_REG_ZONE_CUSTOM_VA;
 	}

-	reg = kbase_alloc_free_region(rbtree, PFN_DOWN(*gpu_va),
-			va_pages, zone);
+	reg = kbase_alloc_free_region(kctx->kbdev, rbtree, PFN_DOWN(*gpu_va), va_pages, zone);

 	if (!reg) {
 		dev_err(dev, "Failed to allocate free region");
@@ -481,22 +480,22 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
 	} else /* we control the VA */ {
 		size_t align = 1;

-#ifdef CONFIG_MALI_2MB_ALLOC
-		/* If there's enough (> 33 bits) of GPU VA space, align to 2MB
-		 * boundaries. The similar condition is used for mapping from
-		 * the SAME_VA zone inside kbase_context_get_unmapped_area().
-		 */
-		if (kctx->kbdev->gpu_props.mmu.va_bits > 33) {
-			if (va_pages >= (SZ_2M / SZ_4K))
-				align = (SZ_2M / SZ_4K);
-		}
-		if (*gpu_va)
-			align = 1;
+		if (kctx->kbdev->pagesize_2mb) {
+			/* If there's enough (> 33 bits) of GPU VA space, align to 2MB
+			* boundaries. The similar condition is used for mapping from
+			* the SAME_VA zone inside kbase_context_get_unmapped_area().
+			*/
+			if (kctx->kbdev->gpu_props.mmu.va_bits > 33) {
+				if (va_pages >= (SZ_2M / SZ_4K))
+					align = (SZ_2M / SZ_4K);
+			}
+			if (*gpu_va)
+				align = 1;
 #if !MALI_USE_CSF
-		if (reg->flags & KBASE_REG_TILER_ALIGN_TOP)
-			align = 1;
+			if (reg->flags & KBASE_REG_TILER_ALIGN_TOP)
+				align = 1;
 #endif /* !MALI_USE_CSF */
-#endif /* CONFIG_MALI_2MB_ALLOC */
+		}
 		if (kbase_gpu_mmap(kctx, reg, *gpu_va, va_pages, align,
 				   mmu_sync_info) != 0) {
 			dev_warn(dev, "Failed to map memory on GPU");
@@ -999,7 +998,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in
 	 * & GPU queue ringbuffer and none of them needs to be explicitly marked
 	 * as evictable by Userspace.
 	 */
-	if (kbase_va_region_is_no_user_free(kctx, reg))
+	if (kbase_va_region_is_no_user_free(reg))
 		goto out_unlock;

 	/* Is the region being transitioning between not needed and needed? */
@@ -1322,10 +1321,11 @@ int kbase_mem_umm_map(struct kbase_context *kctx,
 		gwt_mask = ~KBASE_REG_GPU_WR;
 #endif

-	err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
-				     kbase_get_gpu_phy_pages(reg),
-				     kbase_reg_current_backed_size(reg), reg->flags & gwt_mask,
-				     kctx->as_nr, alloc->group_id, mmu_sync_info, NULL, true);
+	err = kbase_mmu_insert_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+					      kbase_get_gpu_phy_pages(reg),
+					      kbase_reg_current_backed_size(reg),
+					      reg->flags & gwt_mask, kctx->as_nr, alloc->group_id,
+					      mmu_sync_info, NULL);
 	if (err)
 		goto bad_insert;

@@ -1338,11 +1338,11 @@ int kbase_mem_umm_map(struct kbase_context *kctx,
 		 * Assume alloc->nents is the number of actual pages in the
 		 * dma-buf memory.
 		 */
-		err = kbase_mmu_insert_single_page(
-			kctx, reg->start_pfn + alloc->nents,
-			kctx->aliasing_sink_page, reg->nr_pages - alloc->nents,
-			(reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR,
-			KBASE_MEM_GROUP_SINK, mmu_sync_info);
+		err = kbase_mmu_insert_single_imported_page(
+			kctx, reg->start_pfn + alloc->nents, kctx->aliasing_sink_page,
+			reg->nr_pages - alloc->nents,
+			(reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR, KBASE_MEM_GROUP_SINK,
+			mmu_sync_info);
 		if (err)
 			goto bad_pad_insert;
 	}
@@ -1351,7 +1351,7 @@ int kbase_mem_umm_map(struct kbase_context *kctx,

 bad_pad_insert:
 	kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages,
-				 alloc->nents, kctx->as_nr, true);
+				 alloc->nents, alloc->nents, kctx->as_nr, true);
 bad_insert:
 	kbase_mem_umm_unmap_attachment(kctx, alloc);
 bad_map_attachment:
@@ -1380,7 +1380,8 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx,
 		int err;

 		err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
-					       alloc->pages, reg->nr_pages, kctx->as_nr, true);
+					       alloc->pages, reg->nr_pages, reg->nr_pages,
+					       kctx->as_nr, true);
 		WARN_ON(err);
 	}

@@ -1452,6 +1453,9 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx,
 		return NULL;
 	}

+	if (!kbase_import_size_is_valid(kctx->kbdev, *va_pages))
+		return NULL;
+
 	/* ignore SAME_VA */
 	*flags &= ~BASE_MEM_SAME_VA;

@@ -1472,23 +1476,21 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx,
 	if (*flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP)
 		need_sync = true;

-#if IS_ENABLED(CONFIG_64BIT)
-	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+	if (!kbase_ctx_compat_mode(kctx)) {
 		/*
 		 * 64-bit tasks require us to reserve VA on the CPU that we use
 		 * on the GPU.
 		 */
 		shared_zone = true;
 	}
-#endif

 	if (shared_zone) {
 		*flags |= BASE_MEM_NEED_MMAP;
-		reg = kbase_alloc_free_region(&kctx->reg_rbtree_same,
-				0, *va_pages, KBASE_REG_ZONE_SAME_VA);
+		reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, *va_pages,
+					      KBASE_REG_ZONE_SAME_VA);
 	} else {
-		reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom,
-				0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+		reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, 0, *va_pages,
+					      KBASE_REG_ZONE_CUSTOM_VA);
 	}

 	if (!reg) {
@@ -1621,21 +1623,22 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 		/* 64-bit address range is the max */
 		goto bad_size;

+	if (!kbase_import_size_is_valid(kctx->kbdev, *va_pages))
+		goto bad_size;
+
 	/* SAME_VA generally not supported with imported memory (no known use cases) */
 	*flags &= ~BASE_MEM_SAME_VA;

 	if (*flags & BASE_MEM_IMPORT_SHARED)
 		shared_zone = true;

-#if IS_ENABLED(CONFIG_64BIT)
-	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+	if (!kbase_ctx_compat_mode(kctx)) {
 		/*
 		 * 64-bit tasks require us to reserve VA on the CPU that we use
 		 * on the GPU.
 		 */
 		shared_zone = true;
 	}
-#endif

 	if (shared_zone) {
 		*flags |= BASE_MEM_NEED_MMAP;
@@ -1644,7 +1647,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 	} else
 		rbtree = &kctx->reg_rbtree_custom;

-	reg = kbase_alloc_free_region(rbtree, 0, *va_pages, zone);
+	reg = kbase_alloc_free_region(kctx->kbdev, rbtree, 0, *va_pages, zone);

 	if (!reg)
 		goto no_region;
@@ -1670,11 +1673,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 	user_buf->address = address;
 	user_buf->nr_pages = *va_pages;
 	user_buf->mm = current->mm;
-#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE
-	atomic_inc(&current->mm->mm_count);
-#else
-	mmgrab(current->mm);
-#endif
+	kbase_mem_mmgrab();
 	if (reg->gpu_alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE)
 		user_buf->pages = vmalloc(*va_pages * sizeof(struct page *));
 	else
@@ -1749,10 +1748,13 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 		 * region, otherwise the initial content of memory would be wrong.
 		 */
 		for (i = 0; i < faulted_pages; i++) {
-			dma_addr_t dma_addr =
-				dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL,
-						   DMA_ATTR_SKIP_CPU_SYNC);
-
+			dma_addr_t dma_addr;
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+			dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+#else
+			dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE,
+						      DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
+#endif
 			if (dma_mapping_error(dev, dma_addr))
 				goto unwind_dma_map;

@@ -1779,8 +1781,12 @@ unwind_dma_map:
 		dma_addr_t dma_addr = user_buf->dma_addrs[i];

 		dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+		dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+#else
 		dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
 				     DMA_ATTR_SKIP_CPU_SYNC);
+#endif
 	}
 fault_mismatch:
 	if (pages) {
@@ -1856,22 +1862,19 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
 	/* calculate the number of pages this alias will cover */
 	*num_pages = nents * stride;

-#if IS_ENABLED(CONFIG_64BIT)
-	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+	if (!kbase_alias_size_is_valid(kctx->kbdev, *num_pages))
+		goto bad_size;
+
+	if (!kbase_ctx_compat_mode(kctx)) {
 		/* 64-bit tasks must MMAP anyway, but not expose this address to
 		 * clients
 		 */
 		*flags |= BASE_MEM_NEED_MMAP;
-		reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0,
-				*num_pages,
-				KBASE_REG_ZONE_SAME_VA);
+		reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, *num_pages,
+					      KBASE_REG_ZONE_SAME_VA);
 	} else {
-#else
-	if (1) {
-#endif
-		reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom,
-				0, *num_pages,
-				KBASE_REG_ZONE_CUSTOM_VA);
+		reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, 0, *num_pages,
+					      KBASE_REG_ZONE_CUSTOM_VA);
 	}

 	if (!reg)
@@ -1922,7 +1925,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
 				goto bad_handle; /* Not found/already free */
 			if (kbase_is_region_shrinkable(aliasing_reg))
 				goto bad_handle; /* Ephemeral region */
-			if (kbase_va_region_is_no_user_free(kctx, aliasing_reg))
+			if (kbase_va_region_is_no_user_free(aliasing_reg))
 				goto bad_handle; /* JIT regions can't be
 						  * aliased. NO_USER_FREE flag
 						  * covers the entire lifetime
@@ -1977,8 +1980,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
 		}
 	}

-#if IS_ENABLED(CONFIG_64BIT)
-	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+	if (!kbase_ctx_compat_mode(kctx)) {
 		/* Bind to a cookie */
 		if (bitmap_empty(kctx->cookies, BITS_PER_LONG)) {
 			dev_err(kctx->kbdev->dev, "No cookies available for allocation!");
@@ -1993,10 +1995,8 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
 		/* relocate to correct base */
 		gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
 		gpu_va <<= PAGE_SHIFT;
-	} else /* we control the VA */ {
-#else
-	if (1) {
-#endif
+	} else {
+		/* we control the VA */
 		if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1,
 				   mmu_sync_info) != 0) {
 			dev_warn(kctx->kbdev->dev, "Failed to map memory on GPU");
@@ -2013,9 +2013,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,

 	return gpu_va;

-#if IS_ENABLED(CONFIG_64BIT)
 no_cookie:
-#endif
 no_mmap:
 bad_handle:
 	/* Marking the source allocs as not being mapped on the GPU and putting
@@ -2230,7 +2228,7 @@ int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx,
 	int ret = 0;

 	ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + new_pages,
-				       alloc->pages + new_pages, delta, kctx->as_nr, false);
+				       alloc->pages + new_pages, delta, delta, kctx->as_nr, false);

 	return ret;
 }
@@ -2298,7 +2296,7 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages)
 	if (kbase_is_region_shrinkable(reg))
 		goto out_unlock;

-	if (kbase_va_region_is_no_user_free(kctx, reg))
+	if (kbase_va_region_is_no_user_free(reg))
 		goto out_unlock;

 #ifdef CONFIG_MALI_MEMORY_FULLY_BACKED
@@ -2401,18 +2399,19 @@ int kbase_mem_shrink(struct kbase_context *const kctx,
 		kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
 		if (reg->cpu_alloc != reg->gpu_alloc)
 			kbase_free_phy_pages_helper(reg->gpu_alloc, delta);
-#ifdef CONFIG_MALI_2MB_ALLOC
-		if (kbase_reg_current_backed_size(reg) > new_pages) {
-			old_pages = new_pages;
-			new_pages = kbase_reg_current_backed_size(reg);

-			/* Update GPU mapping. */
-			err = kbase_mem_grow_gpu_mapping(kctx, reg,
-					new_pages, old_pages, CALLER_MMU_ASYNC);
+		if (kctx->kbdev->pagesize_2mb) {
+			if (kbase_reg_current_backed_size(reg) > new_pages) {
+				old_pages = new_pages;
+				new_pages = kbase_reg_current_backed_size(reg);
+
+				/* Update GPU mapping. */
+				err = kbase_mem_grow_gpu_mapping(kctx, reg, new_pages, old_pages,
+								 CALLER_MMU_ASYNC);
+			}
+		} else {
+			WARN_ON(kbase_reg_current_backed_size(reg) != new_pages);
 		}
-#else
-		WARN_ON(kbase_reg_current_backed_size(reg) != new_pages);
-#endif
 	}

 	return err;
@@ -2710,8 +2709,8 @@ static int kbase_mmu_dump_mmap(struct kbase_context *kctx,
 		goto out;
 	}

-	new_reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0, nr_pages,
-			KBASE_REG_ZONE_SAME_VA);
+	new_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, nr_pages,
+					  KBASE_REG_ZONE_SAME_VA);
 	if (!new_reg) {
 		err = -ENOMEM;
 		WARN_ON(1);
@@ -3381,79 +3380,29 @@ static void kbasep_add_mm_counter(struct mm_struct *mm, int member, long value)

 void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages)
 {
-	struct mm_struct *mm;
+	struct mm_struct *mm = kctx->process_mm;

-	rcu_read_lock();
-	mm = rcu_dereference(kctx->process_mm);
-	if (mm) {
-		atomic_add(pages, &kctx->nonmapped_pages);
-#ifdef SPLIT_RSS_COUNTING
-		kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
-#else
-		spin_lock(&mm->page_table_lock);
-		kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
-		spin_unlock(&mm->page_table_lock);
-#endif
-	}
-	rcu_read_unlock();
-}
-
-static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx)
-{
-	int pages;
-	struct mm_struct *mm;
-
-	spin_lock(&kctx->mm_update_lock);
-	mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock));
-	if (!mm) {
-		spin_unlock(&kctx->mm_update_lock);
+	if (unlikely(!mm))
 		return;
-	}

-	rcu_assign_pointer(kctx->process_mm, NULL);
-	spin_unlock(&kctx->mm_update_lock);
-	synchronize_rcu();
-
-	pages = atomic_xchg(&kctx->nonmapped_pages, 0);
+	atomic_add(pages, &kctx->nonmapped_pages);
 #ifdef SPLIT_RSS_COUNTING
-	kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages);
+	kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
 #else
 	spin_lock(&mm->page_table_lock);
-	kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages);
+	kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
 	spin_unlock(&mm->page_table_lock);
 #endif
 }

-static void kbase_special_vm_close(struct vm_area_struct *vma)
-{
-	struct kbase_context *kctx;
-
-	kctx = vma->vm_private_data;
-	kbasep_os_process_page_usage_drain(kctx);
-}
-
-static const struct vm_operations_struct kbase_vm_special_ops = {
-	.close = kbase_special_vm_close,
-};
-
 static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma)
 {
-	/* check that this is the only tracking page */
-	spin_lock(&kctx->mm_update_lock);
-	if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) {
-		spin_unlock(&kctx->mm_update_lock);
-		return -EFAULT;
-	}
-
-	rcu_assign_pointer(kctx->process_mm, current->mm);
-
-	spin_unlock(&kctx->mm_update_lock);
+	if (vma_pages(vma) != 1)
+		return -EINVAL;

 	/* no real access */
 	vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
 	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
-	vma->vm_ops = &kbase_vm_special_ops;
-	vma->vm_private_data = kctx;

 	return 0;
 }
@@ -3726,23 +3675,27 @@ static void kbase_csf_user_reg_vm_open(struct vm_area_struct *vma)
 static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma)
 {
 	struct kbase_context *kctx = vma->vm_private_data;
+	struct kbase_device *kbdev;

-	if (!kctx) {
+	if (unlikely(!kctx)) {
 		pr_debug("Close function called for the unexpected mapping");
 		return;
 	}

-	if (unlikely(!kctx->csf.user_reg_vma))
-		dev_warn(kctx->kbdev->dev, "user_reg_vma pointer unexpectedly NULL");
+	kbdev = kctx->kbdev;

-	kctx->csf.user_reg_vma = NULL;
+	if (unlikely(!kctx->csf.user_reg.vma))
+		dev_warn(kbdev->dev, "user_reg VMA pointer unexpectedly NULL for ctx %d_%d",
+			 kctx->tgid, kctx->id);

-	mutex_lock(&kctx->kbdev->csf.reg_lock);
-	if (unlikely(kctx->kbdev->csf.nr_user_page_mapped == 0))
-		dev_warn(kctx->kbdev->dev, "Unexpected value for the USER page mapping counter");
-	else
-		kctx->kbdev->csf.nr_user_page_mapped--;
-	mutex_unlock(&kctx->kbdev->csf.reg_lock);
+	mutex_lock(&kbdev->csf.reg_lock);
+	list_del_init(&kctx->csf.user_reg.link);
+	mutex_unlock(&kbdev->csf.reg_lock);
+
+	kctx->csf.user_reg.vma = NULL;
+
+	/* Now as the VMA is closed, drop the reference on mali device file */
+	fput(kctx->filp);
 }

 /**
@@ -3787,10 +3740,11 @@ static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_fault *vmf)
 	unsigned long flags;

 	/* Few sanity checks up front */
-	if (!kctx || (nr_pages != 1) || (vma != kctx->csf.user_reg_vma) ||
-	    (vma->vm_pgoff != PFN_DOWN(BASEP_MEM_CSF_USER_REG_PAGE_HANDLE))) {
-		pr_warn("Unexpected CPU page fault on USER page mapping for process %s tgid %d pid %d\n",
-			current->comm, current->tgid, current->pid);
+
+	if (!kctx || (nr_pages != 1) || (vma != kctx->csf.user_reg.vma) ||
+	    (vma->vm_pgoff != kctx->csf.user_reg.file_offset)) {
+		pr_err("Unexpected CPU page fault on USER page mapping for process %s tgid %d pid %d\n",
+		       current->comm, current->tgid, current->pid);
 		return VM_FAULT_SIGBUS;
 	}

@@ -3799,22 +3753,22 @@ static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_fault *vmf)
 	pfn = PFN_DOWN(kbdev->reg_start + USER_BASE);

 	mutex_lock(&kbdev->csf.reg_lock);
+
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	/* Don't map in the actual register page if GPU is powered down.
-	 * Always map in the dummy page in no mali builds.
+	/* Dummy page will be mapped during GPU off.
+	 *
+	 * In no mail builds, always map in the dummy page.
 	 */
-#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
-	pfn = PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_user_reg_page));
-#else
-	if (!kbdev->pm.backend.gpu_powered)
-		pfn = PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_user_reg_page));
-#endif
+	if (IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) || !kbdev->pm.backend.gpu_powered)
+		pfn = PFN_DOWN(as_phys_addr_t(kbdev->csf.user_reg.dummy_page));
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);

+	list_move_tail(&kctx->csf.user_reg.link, &kbdev->csf.user_reg.list);
 	ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev,
 						   KBASE_MEM_GROUP_CSF_FW, vma,
 						   vma->vm_start, pfn,
 						   vma->vm_page_prot);
+
 	mutex_unlock(&kbdev->csf.reg_lock);

 	return ret;
@@ -3827,20 +3781,6 @@ static const struct vm_operations_struct kbase_csf_user_reg_vm_ops = {
 	.fault = kbase_csf_user_reg_vm_fault
 };

-/**
- * kbase_csf_cpu_mmap_user_reg_page - Memory map method for USER page.
- *
- * @kctx: Pointer of the kernel context.
- * @vma:  Pointer to the struct containing the information about
- *        the userspace mapping of USER page.
- *
- * Return: 0 on success, error code otherwise.
- *
- * Note:
- * New Base will request Kbase to read the LATEST_FLUSH of USER page on its behalf.
- * But this function needs to be kept for backward-compatibility as old Base (<=1.12)
- * will try to mmap USER page for direct access when it creates a base context.
- */
 static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx,
 				struct vm_area_struct *vma)
 {
@@ -3848,7 +3788,7 @@ static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx,
 	struct kbase_device *kbdev = kctx->kbdev;

 	/* Few sanity checks */
-	if (kctx->csf.user_reg_vma)
+	if (kctx->csf.user_reg.vma)
 		return -EBUSY;

 	if (nr_pages != 1)
@@ -3867,19 +3807,21 @@ static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx,
 	 */
 	vma->vm_flags |= VM_PFNMAP;

-	kctx->csf.user_reg_vma = vma;
+	kctx->csf.user_reg.vma = vma;

 	mutex_lock(&kbdev->csf.reg_lock);
-	kbdev->csf.nr_user_page_mapped++;
-
-	if (!kbdev->csf.mali_file_inode)
-		kbdev->csf.mali_file_inode = kctx->filp->f_inode;
-
-	if (unlikely(kbdev->csf.mali_file_inode != kctx->filp->f_inode))
-		dev_warn(kbdev->dev, "Device file inode pointer not same for all contexts");
-
+	kctx->csf.user_reg.file_offset = kbdev->csf.user_reg.file_offset++;
 	mutex_unlock(&kbdev->csf.reg_lock);

+	/* Make VMA point to the special internal file, but don't drop the
+	 * reference on mali device file (that would be done later when the
+	 * VMA is closed).
+	 */
+	vma->vm_file = kctx->kbdev->csf.user_reg.filp;
+	get_file(vma->vm_file);
+
+	/* Also adjust the vm_pgoff */
+	vma->vm_pgoff = kctx->csf.user_reg.file_offset;
 	vma->vm_ops = &kbase_csf_user_reg_vm_ops;
 	vma->vm_private_data = kctx;

--- a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c
@@ -28,6 +28,11 @@
 #include <linux/shrinker.h>
 #include <linux/atomic.h>
 #include <linux/version.h>
+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
+#include <linux/sched/signal.h>
+#else
+#include <linux/signal.h>
+#endif

 #define pool_dbg(pool, format, ...) \
 	dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format,	\
@@ -39,6 +44,47 @@
 #define NOT_DIRTY false
 #define NOT_RECLAIMED false

+/**
+ * can_alloc_page() - Check if the current thread can allocate a physical page
+ *
+ * @pool:                Pointer to the memory pool.
+ * @page_owner:          Pointer to the task/process that created the Kbase context
+ *                       for which a page needs to be allocated. It can be NULL if
+ *                       the page won't be associated with Kbase context.
+ * @alloc_from_kthread:  Flag indicating that the current thread is a kernel thread.
+ *
+ * This function checks if the current thread is a kernel thread and can make a
+ * request to kernel to allocate a physical page. If the kernel thread is allocating
+ * a page for the Kbase context and the process that created the context is exiting
+ * or is being killed, then there is no point in doing a page allocation.
+ *
+ * The check done by the function is particularly helpful when the system is running
+ * low on memory. When a page is allocated from the context of a kernel thread, OoM
+ * killer doesn't consider the kernel thread for killing and kernel keeps retrying
+ * to allocate the page as long as the OoM killer is able to kill processes.
+ * The check allows kernel thread to quickly exit the page allocation loop once OoM
+ * killer has initiated the killing of @page_owner, thereby unblocking the context
+ * termination for @page_owner and freeing of GPU memory allocated by it. This helps
+ * in preventing the kernel panic and also limits the number of innocent processes
+ * that get killed.
+ *
+ * Return: true if the page can be allocated otherwise false.
+ */
+static inline bool can_alloc_page(struct kbase_mem_pool *pool, struct task_struct *page_owner,
+				  const bool alloc_from_kthread)
+{
+	if (likely(!alloc_from_kthread || !page_owner))
+		return true;
+
+	if ((page_owner->flags & PF_EXITING) || fatal_signal_pending(page_owner)) {
+		dev_info(pool->kbdev->dev, "%s : Process %s/%d exiting",
+			__func__, page_owner->comm, task_pid_nr(page_owner));
+		return false;
+	}
+
+	return true;
+}
+
 static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool)
 {
 	ssize_t max_size = kbase_mem_pool_max_size(pool);
@@ -342,10 +388,12 @@ static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool,
 	return nr_freed;
 }

-int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow)
+int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow,
+			struct task_struct *page_owner)
 {
 	struct page *p;
 	size_t i;
+	const bool alloc_from_kthread = !!(current->flags & PF_KTHREAD);

 	kbase_mem_pool_lock(pool);

@@ -360,6 +408,9 @@ int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow)
 		}
 		kbase_mem_pool_unlock(pool);

+		if (unlikely(!can_alloc_page(pool, page_owner, alloc_from_kthread)))
+			return -ENOMEM;
+
 		p = kbase_mem_alloc_page(pool);
 		if (!p) {
 			kbase_mem_pool_lock(pool);
@@ -392,7 +443,7 @@ void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size)
 	if (new_size < cur_size)
 		kbase_mem_pool_shrink(pool, cur_size - new_size);
 	else if (new_size > cur_size)
-		err = kbase_mem_pool_grow(pool, new_size - cur_size);
+		err = kbase_mem_pool_grow(pool, new_size - cur_size, NULL);

 	if (err) {
 		size_t grown_size = kbase_mem_pool_size(pool);
@@ -656,13 +707,15 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
 }

 int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
-			       struct tagged_addr *pages, bool partial_allowed)
+			       struct tagged_addr *pages, bool partial_allowed,
+			       struct task_struct *page_owner)
 {
 	struct page *p;
 	size_t nr_from_pool;
 	size_t i = 0;
 	int err = -ENOMEM;
 	size_t nr_pages_internal;
+	const bool alloc_from_kthread = !!(current->flags & PF_KTHREAD);

 	nr_pages_internal = nr_4k_pages / (1u << (pool->order));

@@ -697,7 +750,7 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
 	if (i != nr_4k_pages && pool->next_pool) {
 		/* Allocate via next pool */
 		err = kbase_mem_pool_alloc_pages(pool->next_pool, nr_4k_pages - i, pages + i,
-						 partial_allowed);
+						 partial_allowed, page_owner);

 		if (err < 0)
 			goto err_rollback;
@@ -706,6 +759,9 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
 	} else {
 		/* Get any remaining pages from kernel */
 		while (i != nr_4k_pages) {
+			if (unlikely(!can_alloc_page(pool, page_owner, alloc_from_kthread)))
+				goto err_rollback;
+
 			p = kbase_mem_alloc_page(pool);
 			if (!p) {
 				if (partial_allowed)
--- a/drivers/gpu/arm/bifrost/mali_kbase_refcount_defs.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_refcount_defs.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_REFCOUNT_DEFS_H_
+#define _KBASE_REFCOUNT_DEFS_H_
+
+/*
+ * The Refcount API is available from 4.11 onwards
+ * This file hides the compatibility issues with this for the rest the driver
+ */
+
+#include <linux/version.h>
+#include <linux/types.h>
+
+#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
+
+#define kbase_refcount_t atomic_t
+#define kbase_refcount_read(x) atomic_read(x)
+#define kbase_refcount_set(x, v) atomic_set(x, v)
+#define kbase_refcount_dec_and_test(x) atomic_dec_and_test(x)
+#define kbase_refcount_dec(x) atomic_dec(x)
+#define kbase_refcount_inc_not_zero(x) atomic_inc_not_zero(x)
+#define kbase_refcount_inc(x) atomic_inc(x)
+
+#else
+
+#include <linux/refcount.h>
+
+#define kbase_refcount_t refcount_t
+#define kbase_refcount_read(x) refcount_read(x)
+#define kbase_refcount_set(x, v) refcount_set(x, v)
+#define kbase_refcount_dec_and_test(x) refcount_dec_and_test(x)
+#define kbase_refcount_dec(x) refcount_dec(x)
+#define kbase_refcount_inc_not_zero(x) refcount_inc_not_zero(x)
+#define kbase_refcount_inc(x) refcount_inc(x)
+
+#endif /* (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) */
+
+#endif /* _KBASE_REFCOUNT_DEFS_H_ */
--- a/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c
@@ -943,6 +943,13 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
 	int ret;
 	u32 i;

+	if (!kbase_mem_allow_alloc(kctx)) {
+		dev_dbg(kbdev->dev, "Invalid attempt to allocate JIT memory by %s/%d for ctx %d_%d",
+			current->comm, current->pid, kctx->tgid, kctx->id);
+		ret = -EINVAL;
+		goto fail;
+	}
+
 	/* For backwards compatibility, and to prevent reading more than 1 jit
 	 * info struct on jit version 1
 	 */
--- a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c
+++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -150,17 +150,18 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx,
 					"true" : "false";
 	int as_no = as->number;
 	unsigned long flags;
+	const uintptr_t fault_addr = fault->addr;

 	/* terminal fault, print info about the fault */
 	dev_err(kbdev->dev,
-		"GPU bus fault in AS%d at PA 0x%016llX\n"
+		"GPU bus fault in AS%d at PA %pK\n"
 		"PA_VALID: %s\n"
 		"raw fault status: 0x%X\n"
 		"exception type 0x%X: %s\n"
 		"access type 0x%X: %s\n"
 		"source id 0x%X\n"
 		"pid: %d\n",
-		as_no, fault->addr,
+		as_no, (void *)fault_addr,
 		addr_valid,
 		status,
 		exception_type, kbase_gpu_exception_name(exception_type),
@@ -557,6 +558,7 @@ int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i)
 	kbdev->as[i].bf_data.addr = 0ULL;
 	kbdev->as[i].pf_data.addr = 0ULL;
 	kbdev->as[i].gf_data.addr = 0ULL;
+	kbdev->as[i].is_unresponsive = false;

 	kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", WQ_UNBOUND, 1, i);
 	if (!kbdev->as[i].pf_wq)
--- a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c
+++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -63,15 +63,16 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx,
 	u32 const exception_data = (status >> 8) & 0xFFFFFF;
 	int const as_no = as->number;
 	unsigned long flags;
+	const uintptr_t fault_addr = fault->addr;

 	/* terminal fault, print info about the fault */
 	dev_err(kbdev->dev,
-		"GPU bus fault in AS%d at PA 0x%016llX\n"
+		"GPU bus fault in AS%d at PA %pK\n"
 		"raw fault status: 0x%X\n"
 		"exception type 0x%X: %s\n"
 		"exception data 0x%X\n"
 		"pid: %d\n",
-		as_no, fault->addr,
+		as_no, (void *)fault_addr,
 		status,
 		exception_type, kbase_gpu_exception_name(exception_type),
 		exception_data,
@@ -428,6 +429,7 @@ int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i)
 	kbdev->as[i].number = i;
 	kbdev->as[i].bf_data.addr = 0ULL;
 	kbdev->as[i].pf_data.addr = 0ULL;
+	kbdev->as[i].is_unresponsive = false;

 	kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%u", 0, 1, i);
 	if (!kbdev->as[i].pf_wq)
--- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c
+++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c
--- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h
+++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h
@@ -152,21 +152,71 @@ u64 kbase_mmu_create_ate(struct kbase_device *kbdev,
 	struct tagged_addr phy, unsigned long flags, int level, int group_id);

 int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
-				    const u64 start_vpfn, struct tagged_addr *phys, size_t nr,
+				    u64 vpfn, struct tagged_addr *phys, size_t nr,
 				    unsigned long flags, int group_id, u64 *dirty_pgds,
 				    struct kbase_va_region *reg, bool ignore_page_migration);
 int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
 			   struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr,
 			   int group_id, enum kbase_caller_mmu_sync_info mmu_sync_info,
 			   struct kbase_va_region *reg, bool ignore_page_migration);
-int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
-				 struct tagged_addr phys, size_t nr,
-				 unsigned long flags, int group_id,
-				 enum kbase_caller_mmu_sync_info mmu_sync_info);
+int kbase_mmu_insert_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+				    u64 vpfn, struct tagged_addr *phys, size_t nr,
+				    unsigned long flags, int as_nr, int group_id,
+				    enum kbase_caller_mmu_sync_info mmu_sync_info,
+				    struct kbase_va_region *reg);
+int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+				   u64 vpfn, struct tagged_addr *phys, size_t nr,
+				   unsigned long flags, int as_nr, int group_id,
+				   enum kbase_caller_mmu_sync_info mmu_sync_info,
+				   struct kbase_va_region *reg);
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, struct tagged_addr phys,
+				 size_t nr, unsigned long flags, int group_id,
+				 enum kbase_caller_mmu_sync_info mmu_sync_info,
+				 bool ignore_page_migration);
+int kbase_mmu_insert_single_imported_page(struct kbase_context *kctx, u64 vpfn,
+					  struct tagged_addr phys, size_t nr, unsigned long flags,
+					  int group_id,
+					  enum kbase_caller_mmu_sync_info mmu_sync_info);
+int kbase_mmu_insert_single_aliased_page(struct kbase_context *kctx, u64 vpfn,
+					 struct tagged_addr phys, size_t nr, unsigned long flags,
+					 int group_id,
+					 enum kbase_caller_mmu_sync_info mmu_sync_info);

+/**
+ * kbase_mmu_teardown_pages - Remove GPU virtual addresses from the MMU page table
+ *
+ * @kbdev:    Pointer to kbase device.
+ * @mmut:     Pointer to GPU MMU page table.
+ * @vpfn:     Start page frame number of the GPU virtual pages to unmap.
+ * @phys:     Array of physical pages currently mapped to the virtual
+ *            pages to unmap, or NULL. This is used for GPU cache maintenance
+ *            and page migration support.
+ * @nr_phys_pages: Number of physical pages to flush.
+ * @nr_virt_pages: Number of virtual pages whose PTEs should be destroyed.
+ * @as_nr:    Address space number, for GPU cache maintenance operations
+ *            that happen outside a specific kbase context.
+ * @ignore_page_migration: Whether page migration metadata should be ignored.
+ *
+ * We actually discard the ATE and free the page table pages if no valid entries
+ * exist in PGD.
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ *
+ * The @p phys pointer to physical pages is not necessary for unmapping virtual memory,
+ * but it is used for fine-grained GPU cache maintenance. If @p phys is NULL,
+ * GPU cache maintenance will be done as usual, that is invalidating the whole GPU caches
+ * instead of specific physical address ranges.
+ *
+ * Return: 0 on success, otherwise an error code.
+ */
 int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
-			     struct tagged_addr *phys, size_t nr, int as_nr,
-			     bool ignore_page_migration);
+			     struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages,
+			     int as_nr, bool ignore_page_migration);
+
 int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
 			   struct tagged_addr *phys, size_t nr,
 			   unsigned long flags, int const group_id);
--- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c
+++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -24,6 +24,7 @@
 #include <mali_kbase.h>
 #include <mali_kbase_ctx_sched.h>
 #include <mali_kbase_mem.h>
+#include <mali_kbase_reset_gpu.h>
 #include <mmu/mali_kbase_mmu_hw.h>
 #include <tl/mali_kbase_tracepoints.h>
 #include <linux/delay.h>
@@ -156,37 +157,60 @@ static int lock_region(struct kbase_gpu_props const *gpu_props, u64 *lockaddr,
 	return 0;
 }

-static int wait_ready(struct kbase_device *kbdev,
-		unsigned int as_nr)
+/**
+ * wait_ready() - Wait for previously issued MMU command to complete.
+ *
+ * @kbdev:        Kbase device to wait for a MMU command to complete.
+ * @as_nr:        Address space to wait for a MMU command to complete.
+ *
+ * Reset GPU if the wait for previously issued command fails.
+ *
+ * Return: 0 on successful completion. negative error on failure.
+ */
+static int wait_ready(struct kbase_device *kbdev, unsigned int as_nr)
 {
-	u32 max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
+	const ktime_t wait_loop_start = ktime_get_raw();
+	const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_as_inactive_wait_time_ms;
+	s64 diff;

-	/* Wait for the MMU status to indicate there is no active command. */
-	while (--max_loops &&
-	       kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)) &
-		       AS_STATUS_AS_ACTIVE) {
-		;
-	}
+	if (unlikely(kbdev->as[as_nr].is_unresponsive))
+		return -EBUSY;

-	if (WARN_ON_ONCE(max_loops == 0)) {
-		dev_err(kbdev->dev,
-			"AS_ACTIVE bit stuck for as %u, might be caused by slow/unstable GPU clock or possible faulty FPGA connector",
-			as_nr);
-		return -1;
-	}
+	do {
+		unsigned int i;

-	return 0;
+		for (i = 0; i < 1000; i++) {
+			/* Wait for the MMU status to indicate there is no active command */
+			if (!(kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)) &
+			      AS_STATUS_AS_ACTIVE))
+				return 0;
+		}
+
+		diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start));
+	} while (diff < mmu_as_inactive_wait_time_ms);
+
+	dev_err(kbdev->dev,
+		"AS_ACTIVE bit stuck for as %u. Might be caused by unstable GPU clk/pwr or faulty system",
+		as_nr);
+	kbdev->as[as_nr].is_unresponsive = true;
+	if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
+		kbase_reset_gpu_locked(kbdev);
+
+	return -ETIMEDOUT;
 }

 static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd)
 {
-	int status;
-
 	/* write AS_COMMAND when MMU is ready to accept another command */
-	status = wait_ready(kbdev, as_nr);
-	if (status == 0)
+	const int status = wait_ready(kbdev, as_nr);
+
+	if (likely(status == 0))
 		kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd);
-	else {
+	else if (status == -EBUSY) {
+		dev_dbg(kbdev->dev,
+			"Skipped the wait for AS_ACTIVE bit for as %u, before sending MMU command %u",
+			as_nr, cmd);
+	} else {
 		dev_err(kbdev->dev,
 			"Wait for AS_ACTIVE bit failed for as %u, before sending MMU command %u",
 			as_nr, cmd);
@@ -259,17 +283,21 @@ static int apply_hw_issue_GPU2019_3901_wa(struct kbase_device *kbdev, u32 *mmu_c

 		/* Wait for the LOCK MMU command to complete, issued by the caller */
 		ret = wait_ready(kbdev, as_nr);
-		if (ret)
+		if (unlikely(ret))
 			return ret;

 		ret = kbase_gpu_cache_flush_and_busy_wait(kbdev,
 				GPU_COMMAND_CACHE_CLN_INV_LSC);
-		if (ret)
+		if (unlikely(ret))
 			return ret;

 		ret = wait_cores_power_trans_complete(kbdev);
-		if (ret)
+		if (unlikely(ret)) {
+			if (kbase_prepare_to_reset_gpu_locked(kbdev,
+							      RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
+				kbase_reset_gpu_locked(kbdev);
 			return ret;
+		}

 		/* As LSC is guaranteed to have been flushed we can use FLUSH_PT
 		 * MMU command to only flush the L2.
@@ -397,12 +425,21 @@ static int mmu_hw_do_lock_no_wait(struct kbase_device *kbdev, struct kbase_as *a

 	ret = mmu_hw_set_lock_addr(kbdev, as->number, lock_addr, op_param);

-	if (!ret)
-		write_cmd(kbdev, as->number, AS_COMMAND_LOCK);
+	if (likely(!ret))
+		ret = write_cmd(kbdev, as->number, AS_COMMAND_LOCK);

 	return ret;
 }

+/**
+ * mmu_hw_do_lock - Issue LOCK command to the MMU and wait for its completion.
+ *
+ * @kbdev:      Kbase device to issue the MMU operation on.
+ * @as:         Address space to issue the MMU operation on.
+ * @op_param:   Pointer to a struct containing information about the MMU operation.
+ *
+ * Return: 0 if issuing the LOCK command was successful, otherwise an error code.
+ */
 static int mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as,
 			  const struct kbase_mmu_hw_op_param *op_param)
 {
@@ -443,10 +480,10 @@ int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *
 	ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK);

 	/* Wait for UNLOCK command to complete */
-	if (!ret)
+	if (likely(!ret))
 		ret = wait_ready(kbdev, as->number);

-	if (!ret) {
+	if (likely(!ret)) {
 		u64 lock_addr = 0x0;
 		/* read MMU_AS_CONTROL.LOCKADDR register */
 		lock_addr |= (u64)kbase_reg_read(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI))
@@ -478,6 +515,16 @@ int kbase_mmu_hw_do_unlock(struct kbase_device *kbdev, struct kbase_as *as,
 	return ret;
 }

+/**
+ * mmu_hw_do_flush - Flush MMU and wait for its completion.
+ *
+ * @kbdev:           Kbase device to issue the MMU operation on.
+ * @as:              Address space to issue the MMU operation on.
+ * @op_param:        Pointer to a struct containing information about the MMU operation.
+ * @hwaccess_locked: Flag to indicate if the lock has been held.
+ *
+ * Return: 0 if flushing MMU was successful, otherwise an error code.
+ */
 static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as,
 	const struct kbase_mmu_hw_op_param *op_param, bool hwaccess_locked)
 {
@@ -508,12 +555,9 @@ static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as,
 		return ret;

 #if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
-	/* WA for the BASE_HW_ISSUE_GPU2019_3901. No runtime check is used here
-	 * as the WA is applicable to all CSF GPUs where FLUSH_MEM/PT command is
-	 * supported, and this function doesn't gets called for the GPUs where
-	 * FLUSH_MEM/PT command is deprecated.
-	 */
-	if (mmu_cmd == AS_COMMAND_FLUSH_MEM) {
+	/* WA for the BASE_HW_ISSUE_GPU2019_3901. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2019_3901) &&
+	    mmu_cmd == AS_COMMAND_FLUSH_MEM) {
 		if (!hwaccess_locked) {
 			unsigned long flags = 0;

@@ -529,12 +573,13 @@ static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as,
 	}
 #endif

-	write_cmd(kbdev, as->number, mmu_cmd);
+	ret = write_cmd(kbdev, as->number, mmu_cmd);

 	/* Wait for the command to complete */
-	ret = wait_ready(kbdev, as->number);
+	if (likely(!ret))
+		ret = wait_ready(kbdev, as->number);

-	if (!ret)
+	if (likely(!ret))
 		mmu_command_instr(kbdev, op_param->kctx_id, mmu_cmd, lock_addr,
 				  op_param->mmu_sync_info);

--- a/drivers/gpu/arm/bifrost/tests/build.bp
+++ b/drivers/gpu/arm/bifrost/tests/build.bp
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -38,3 +38,9 @@ bob_defaults {
        kbuild_options: ["CONFIG_UNIT_TEST_KERNEL_MODULES=y"],
    },
 }
+
+bob_defaults {
+    name: "kernel_unit_tests",
+    add_to_alias: ["unit_tests"],
+    srcs: [".*_unit_test/"],
+}
--- a/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c
+++ b/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c
@@ -10,6 +10,7 @@
 */

 #include "linux/mman.h"
+#include <linux/version_compat_defs.h>
 #include <mali_kbase.h>

 /* mali_kbase_mmap.c
@@ -90,7 +91,6 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
 	if ((*gap_end < info->low_limit) || (*gap_end < gap_start))
 		return false;

-
 	return true;
 }

@@ -132,6 +132,7 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
 static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info
 		*info, bool is_shader_code, bool is_same_4gb_page)
 {
+#if (KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE)
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
 	unsigned long length, low_limit, high_limit, gap_start, gap_end;
@@ -225,7 +226,37 @@ check_current:
 			}
 		}
 	}
+#else
+	unsigned long length, high_limit, gap_start, gap_end;

+	MA_STATE(mas, &current->mm->mm_mt, 0, 0);
+	/* Adjust search length to account for worst case alignment overhead */
+	length = info->length + info->align_mask;
+	if (length < info->length)
+		return -ENOMEM;
+
+	/*
+	 * Adjust search limits by the desired length.
+	 * See implementation comment at top of unmapped_area().
+	 */
+	gap_end = info->high_limit;
+	if (gap_end < length)
+		return -ENOMEM;
+	high_limit = gap_end - length;
+
+	if (info->low_limit > high_limit)
+		return -ENOMEM;
+
+	while (true) {
+		if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1, length))
+			return -ENOMEM;
+		gap_end = mas.last + 1;
+		gap_start = mas.min;
+
+		if (align_and_check(&gap_end, gap_start, info, is_shader_code, is_same_4gb_page))
+			return gap_end;
+	}
+#endif
 	return -ENOMEM;
 }

@@ -242,8 +273,13 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx,
 	struct vm_unmapped_area_info info;
 	unsigned long align_offset = 0;
 	unsigned long align_mask = 0;
+#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
+	unsigned long high_limit = arch_get_mmap_base(addr, mm->mmap_base);
+	unsigned long low_limit = max_t(unsigned long, PAGE_SIZE, kbase_mmap_min_addr);
+#else
 	unsigned long high_limit = mm->mmap_base;
 	unsigned long low_limit = PAGE_SIZE;
+#endif
 	int cpu_va_bits = BITS_PER_LONG;
 	int gpu_pc_bits =
 	      kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
@@ -270,6 +306,13 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx,
 	struct kbase_reg_zone *zone =
 		kbase_ctx_reg_zone_get_nolock(kctx, KBASE_REG_ZONE_SAME_VA);
 	u64 same_va_end_addr = kbase_reg_zone_end_pfn(zone) << PAGE_SHIFT;
+#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
+	const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
+
+	/* requested length too big for entire address space */
+	if (len > mmap_end - kbase_mmap_min_addr)
+		return -ENOMEM;
+#endif

 	/* err on fixed address */
 	if ((flags & MAP_FIXED) || addr)
@@ -282,7 +325,7 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx,

 	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
 		high_limit =
-			min_t(unsigned long, mm->mmap_base, same_va_end_addr);
+			min_t(unsigned long, high_limit, same_va_end_addr);

 		/* If there's enough (> 33 bits) of GPU VA space, align
 		 * to 2MB boundaries.
@@ -359,9 +402,15 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx,

 	if (IS_ERR_VALUE(ret) && high_limit == mm->mmap_base &&
 	    high_limit < same_va_end_addr) {
+#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
+		/* Retry above TASK_UNMAPPED_BASE */
+		info.low_limit = TASK_UNMAPPED_BASE;
+		info.high_limit = min_t(u64, mmap_end, same_va_end_addr);
+#else
 		/* Retry above mmap_base */
 		info.low_limit = mm->mmap_base;
 		info.high_limit = min_t(u64, TASK_SIZE, same_va_end_addr);
+#endif

 		ret = kbase_unmapped_area_topdown(&info, is_shader_code,
 				is_same_4gb_page);
--- a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.c
+++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2015-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -24,8 +24,6 @@
 #include "mali_kbase_tracepoints.h"

 #include <mali_kbase.h>
-#include <mali_kbase_jm.h>
-
 #include <linux/atomic.h>
 #include <linux/file.h>
 #include <linux/mutex.h>
--- a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c
+++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -35,6 +35,47 @@
 #include <uapi/linux/eventpoll.h>
 #endif

+static int kbase_unprivileged_global_profiling;
+
+/**
+ * kbase_unprivileged_global_profiling_set - set permissions for unprivileged processes
+ *
+ * @val: String containing value to set. Only strings representing positive
+ *       integers are accepted as valid; any non-positive integer (including 0)
+ *       is rejected.
+ * @kp: Module parameter associated with this method.
+ *
+ * This method can only be used to enable permissions for unprivileged processes,
+ * if they are disabled: for this reason, the only values which are accepted are
+ * strings representing positive integers. Since it's impossible to disable
+ * permissions once they're set, any integer which is non-positive is rejected,
+ * including 0.
+ *
+ * Return: 0 if success, otherwise error code.
+ */
+static int kbase_unprivileged_global_profiling_set(const char *val, const struct kernel_param *kp)
+{
+	int new_val;
+	int ret = kstrtoint(val, 0, &new_val);
+
+	if (ret == 0) {
+		if (new_val < 1)
+			return -EINVAL;
+
+		kbase_unprivileged_global_profiling = 1;
+	}
+
+	return ret;
+}
+
+static const struct kernel_param_ops kbase_global_unprivileged_profiling_ops = {
+	.get = param_get_int,
+	.set = kbase_unprivileged_global_profiling_set,
+};
+
+module_param_cb(kbase_unprivileged_global_profiling, &kbase_global_unprivileged_profiling_ops,
+		&kbase_unprivileged_global_profiling, 0600);
+
 /* The timeline stream file operations functions. */
 static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer,
 				       size_t size, loff_t *f_pos);
@@ -43,6 +84,15 @@ static int kbasep_timeline_io_release(struct inode *inode, struct file *filp);
 static int kbasep_timeline_io_fsync(struct file *filp, loff_t start, loff_t end,
 				    int datasync);

+static bool timeline_is_permitted(void)
+{
+#if KERNEL_VERSION(5, 8, 0) <= LINUX_VERSION_CODE
+	return kbase_unprivileged_global_profiling || perfmon_capable();
+#else
+	return kbase_unprivileged_global_profiling || capable(CAP_SYS_ADMIN);
+#endif
+}
+
 /**
 * kbasep_timeline_io_packet_pending - check timeline streams for pending
 *                                     packets
@@ -328,6 +378,9 @@ int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags)
 	};
 	int err;

+	if (!timeline_is_permitted())
+		return -EPERM;
+
 	if (WARN_ON(!kbdev) || (flags & ~BASE_TLSTREAM_FLAGS_MASK))
 		return -EINVAL;

@@ -371,7 +424,7 @@ void kbase_timeline_io_debugfs_init(struct kbase_device *const kbdev)
 	if (WARN_ON(!kbdev) || WARN_ON(IS_ERR_OR_NULL(kbdev->mali_debugfs_directory)))
 		return;

-	file = debugfs_create_file("tlstream", 0444, kbdev->mali_debugfs_directory, kbdev,
+	file = debugfs_create_file("tlstream", 0400, kbdev->mali_debugfs_directory, kbdev,
 				   &kbasep_tlstream_debugfs_fops);

 	if (IS_ERR_OR_NULL(file))
--- a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c
+++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -87,7 +87,9 @@ enum tl_msg_id_obj {
 	KBASE_TL_KBASE_GPUCMDQUEUE_KICK,
 	KBASE_TL_KBASE_DEVICE_PROGRAM_CSG,
 	KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG,
-	KBASE_TL_KBASE_DEVICE_HALT_CSG,
+	KBASE_TL_KBASE_DEVICE_HALTING_CSG,
+	KBASE_TL_KBASE_DEVICE_SUSPEND_CSG,
+	KBASE_TL_KBASE_DEVICE_CSG_IDLE,
 	KBASE_TL_KBASE_NEW_CTX,
 	KBASE_TL_KBASE_DEL_CTX,
 	KBASE_TL_KBASE_CTX_ASSIGN_AS,
@@ -98,6 +100,8 @@ enum tl_msg_id_obj {
 	KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT,
 	KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT,
 	KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET,
+	KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION,
+	KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION,
 	KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT,
 	KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT,
 	KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE,
@@ -116,6 +120,9 @@ enum tl_msg_id_obj {
 	KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START,
 	KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END,
 	KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET,
+	KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START,
+	KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END,
+	KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION,
 	KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START,
 	KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END,
 	KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START,
@@ -360,13 +367,21 @@ enum tl_msg_id_obj {
 	TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, \
 		"CSG is programmed to a slot", \
 		"@IIIII", \
-		"kbase_device_id,kernel_ctx_id,gpu_cmdq_grp_handle,kbase_device_csg_slot_index,kbase_device_csg_slot_resumed") \
+		"kbase_device_id,kernel_ctx_id,gpu_cmdq_grp_handle,kbase_device_csg_slot_index,kbase_device_csg_slot_resuming") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG, \
 		"CSG is deprogrammed from a slot", \
 		"@II", \
 		"kbase_device_id,kbase_device_csg_slot_index") \
-	TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_HALT_CSG, \
-		"CSG is halted", \
+	TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_HALTING_CSG, \
+		"CSG is halting", \
+		"@III", \
+		"kbase_device_id,kbase_device_csg_slot_index,kbase_device_csg_slot_suspending") \
+	TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_SUSPEND_CSG, \
+		"CSG is suspended", \
+		"@II", \
+		"kbase_device_id,kbase_device_csg_slot_index") \
+	TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_CSG_IDLE, \
+		"KBase device is notified that CSG is idle.", \
 		"@II", \
 		"kbase_device_id,kbase_device_csg_slot_index") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_CTX, \
@@ -404,11 +419,19 @@ enum tl_msg_id_obj {
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT, \
 		"KCPU Queue enqueues Wait on Cross Queue Sync Object", \
 		"@pLII", \
-		"kcpu_queue,cqs_obj_gpu_addr,cqs_obj_compare_value,cqs_obj_inherit_error") \
+		"kcpu_queue,cqs_obj_gpu_addr,compare_value,inherit_error") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET, \
 		"KCPU Queue enqueues Set on Cross Queue Sync Object", \
 		"@pL", \
 		"kcpu_queue,cqs_obj_gpu_addr") \
+	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION, \
+		"KCPU Queue enqueues Wait Operation on Cross Queue Sync Object", \
+		"@pLLIII", \
+		"kcpu_queue,cqs_obj_gpu_addr,compare_value,condition,data_type,inherit_error") \
+	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION, \
+		"KCPU Queue enqueues Set Operation on Cross Queue Sync Object", \
+		"@pLLII", \
+		"kcpu_queue,cqs_obj_gpu_addr,value,operation,data_type") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT, \
 		"KCPU Queue enqueues Map Import", \
 		"@pL", \
@@ -481,6 +504,18 @@ enum tl_msg_id_obj {
 		"KCPU Queue executes a Set on Cross Queue Sync Object", \
 		"@pI", \
 		"kcpu_queue,execute_error") \
+	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START, \
+		"KCPU Queue starts a Wait Operation on Cross Queue Sync Object", \
+		"@p", \
+		"kcpu_queue") \
+	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END, \
+		"KCPU Queue ends a Wait Operation on Cross Queue Sync Object", \
+		"@pI", \
+		"kcpu_queue,execute_error") \
+	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION, \
+		"KCPU Queue executes a Set Operation on Cross Queue Sync Object", \
+		"@pI", \
+		"kcpu_queue,execute_error") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START, \
 		"KCPU Queue starts a Map Import", \
 		"@p", \
@@ -2130,7 +2165,7 @@ void __kbase_tlstream_tl_kbase_device_program_csg(
 	u32 kernel_ctx_id,
 	u32 gpu_cmdq_grp_handle,
 	u32 kbase_device_csg_slot_index,
-	u32 kbase_device_csg_slot_resumed
+	u32 kbase_device_csg_slot_resuming
 )
 {
 	const u32 msg_id = KBASE_TL_KBASE_DEVICE_PROGRAM_CSG;
@@ -2139,7 +2174,7 @@ void __kbase_tlstream_tl_kbase_device_program_csg(
 		+ sizeof(kernel_ctx_id)
 		+ sizeof(gpu_cmdq_grp_handle)
 		+ sizeof(kbase_device_csg_slot_index)
-		+ sizeof(kbase_device_csg_slot_resumed)
+		+ sizeof(kbase_device_csg_slot_resuming)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -2158,7 +2193,7 @@ void __kbase_tlstream_tl_kbase_device_program_csg(
 	pos = kbasep_serialize_bytes(buffer,
 		pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index));
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &kbase_device_csg_slot_resumed, sizeof(kbase_device_csg_slot_resumed));
+		pos, &kbase_device_csg_slot_resuming, sizeof(kbase_device_csg_slot_resuming));

 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
@@ -2190,13 +2225,71 @@ void __kbase_tlstream_tl_kbase_device_deprogram_csg(
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }

-void __kbase_tlstream_tl_kbase_device_halt_csg(
+void __kbase_tlstream_tl_kbase_device_halting_csg(
+	struct kbase_tlstream *stream,
+	u32 kbase_device_id,
+	u32 kbase_device_csg_slot_index,
+	u32 kbase_device_csg_slot_suspending
+)
+{
+	const u32 msg_id = KBASE_TL_KBASE_DEVICE_HALTING_CSG;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kbase_device_id)
+		+ sizeof(kbase_device_csg_slot_index)
+		+ sizeof(kbase_device_csg_slot_suspending)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kbase_device_id, sizeof(kbase_device_id));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kbase_device_csg_slot_suspending, sizeof(kbase_device_csg_slot_suspending));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_kbase_device_suspend_csg(
 	struct kbase_tlstream *stream,
 	u32 kbase_device_id,
 	u32 kbase_device_csg_slot_index
 )
 {
-	const u32 msg_id = KBASE_TL_KBASE_DEVICE_HALT_CSG;
+	const u32 msg_id = KBASE_TL_KBASE_DEVICE_SUSPEND_CSG;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kbase_device_id)
+		+ sizeof(kbase_device_csg_slot_index)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kbase_device_id, sizeof(kbase_device_id));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_kbase_device_csg_idle(
+	struct kbase_tlstream *stream,
+	u32 kbase_device_id,
+	u32 kbase_device_csg_slot_index
+)
+{
+	const u32 msg_id = KBASE_TL_KBASE_DEVICE_CSG_IDLE;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
 		+ sizeof(kbase_device_id)
 		+ sizeof(kbase_device_csg_slot_index)
@@ -2433,16 +2526,16 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
 	u64 cqs_obj_gpu_addr,
-	u32 cqs_obj_compare_value,
-	u32 cqs_obj_inherit_error
+	u32 compare_value,
+	u32 inherit_error
 )
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
 		+ sizeof(kcpu_queue)
 		+ sizeof(cqs_obj_gpu_addr)
-		+ sizeof(cqs_obj_compare_value)
-		+ sizeof(cqs_obj_inherit_error)
+		+ sizeof(compare_value)
+		+ sizeof(inherit_error)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -2457,9 +2550,9 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait(
 	pos = kbasep_serialize_bytes(buffer,
 		pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr));
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &cqs_obj_compare_value, sizeof(cqs_obj_compare_value));
+		pos, &compare_value, sizeof(compare_value));
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &cqs_obj_inherit_error, sizeof(cqs_obj_inherit_error));
+		pos, &inherit_error, sizeof(inherit_error));

 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
@@ -2491,6 +2584,88 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set(
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }

+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait_operation(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 cqs_obj_gpu_addr,
+	u64 compare_value,
+	u32 condition,
+	u32 data_type,
+	u32 inherit_error
+)
+{
+	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(cqs_obj_gpu_addr)
+		+ sizeof(compare_value)
+		+ sizeof(condition)
+		+ sizeof(data_type)
+		+ sizeof(inherit_error)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &compare_value, sizeof(compare_value));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &condition, sizeof(condition));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &data_type, sizeof(data_type));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &inherit_error, sizeof(inherit_error));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set_operation(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 cqs_obj_gpu_addr,
+	u64 value,
+	u32 operation,
+	u32 data_type
+)
+{
+	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(cqs_obj_gpu_addr)
+		+ sizeof(value)
+		+ sizeof(operation)
+		+ sizeof(data_type)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &value, sizeof(value));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &operation, sizeof(operation));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &data_type, sizeof(data_type));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
@@ -2981,6 +3156,83 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set(
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }

+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue
+)
+{
+	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u32 execute_error
+)
+{
+	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(execute_error)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &execute_error, sizeof(execute_error));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set_operation(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u32 execute_error
+)
+{
+	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(execute_error)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &execute_error, sizeof(execute_error));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue
--- a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h
+++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -408,7 +408,7 @@ void __kbase_tlstream_tl_kbase_device_program_csg(
 	u32 kernel_ctx_id,
 	u32 gpu_cmdq_grp_handle,
 	u32 kbase_device_csg_slot_index,
-	u32 kbase_device_csg_slot_resumed
+	u32 kbase_device_csg_slot_resuming
 );

 void __kbase_tlstream_tl_kbase_device_deprogram_csg(
@@ -417,7 +417,20 @@ void __kbase_tlstream_tl_kbase_device_deprogram_csg(
 	u32 kbase_device_csg_slot_index
 );

-void __kbase_tlstream_tl_kbase_device_halt_csg(
+void __kbase_tlstream_tl_kbase_device_halting_csg(
+	struct kbase_tlstream *stream,
+	u32 kbase_device_id,
+	u32 kbase_device_csg_slot_index,
+	u32 kbase_device_csg_slot_suspending
+);
+
+void __kbase_tlstream_tl_kbase_device_suspend_csg(
+	struct kbase_tlstream *stream,
+	u32 kbase_device_id,
+	u32 kbase_device_csg_slot_index
+);
+
+void __kbase_tlstream_tl_kbase_device_csg_idle(
 	struct kbase_tlstream *stream,
 	u32 kbase_device_id,
 	u32 kbase_device_csg_slot_index
@@ -474,8 +487,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
 	u64 cqs_obj_gpu_addr,
-	u32 cqs_obj_compare_value,
-	u32 cqs_obj_inherit_error
+	u32 compare_value,
+	u32 inherit_error
 );

 void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set(
@@ -484,6 +497,25 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set(
 	u64 cqs_obj_gpu_addr
 );

+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait_operation(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 cqs_obj_gpu_addr,
+	u64 compare_value,
+	u32 condition,
+	u32 data_type,
+	u32 inherit_error
+);
+
+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set_operation(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 cqs_obj_gpu_addr,
+	u64 value,
+	u32 operation,
+	u32 data_type
+);
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
@@ -593,6 +625,23 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set(
 	u32 execute_error
 );

+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue
+);
+
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u32 execute_error
+);
+
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set_operation(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u32 execute_error
+);
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue
@@ -2026,7 +2075,7 @@ struct kbase_tlstream;
 * @kernel_ctx_id: Unique ID for the KBase Context
 * @gpu_cmdq_grp_handle: GPU Command Queue Group handle which will match userspace
 * @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed
- * @kbase_device_csg_slot_resumed: Whether the csg is being resumed
+ * @kbase_device_csg_slot_resuming: Whether the csg is being resumed
 */
 #if MALI_USE_CSF
 #define KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG(	\
@@ -2035,7 +2084,7 @@ struct kbase_tlstream;
 	kernel_ctx_id,	\
 	gpu_cmdq_grp_handle,	\
 	kbase_device_csg_slot_index,	\
-	kbase_device_csg_slot_resumed	\
+	kbase_device_csg_slot_resuming	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
@@ -2046,7 +2095,7 @@ struct kbase_tlstream;
 				kernel_ctx_id,	\
 				gpu_cmdq_grp_handle,	\
 				kbase_device_csg_slot_index,	\
-				kbase_device_csg_slot_resumed	\
+				kbase_device_csg_slot_resuming	\
 				);	\
 	} while (0)
 #else
@@ -2056,7 +2105,7 @@ struct kbase_tlstream;
 	kernel_ctx_id,	\
 	gpu_cmdq_grp_handle,	\
 	kbase_device_csg_slot_index,	\
-	kbase_device_csg_slot_resumed	\
+	kbase_device_csg_slot_resuming	\
 	)	\
 	do { } while (0)
 #endif /* MALI_USE_CSF */
@@ -2066,7 +2115,7 @@ struct kbase_tlstream;
 *
 * @kbdev: Kbase device
 * @kbase_device_id: The ID of the physical hardware
- * @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed
+ * @kbase_device_csg_slot_index: The index of the slot in the scheduler whose CSG is being deprogrammed
 */
 #if MALI_USE_CSF
 #define KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG(	\
@@ -2093,14 +2142,49 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */

 /**
- * KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG - CSG is halted
+ * KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG - CSG is halting
 *
 * @kbdev: Kbase device
 * @kbase_device_id: The ID of the physical hardware
- * @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed
+ * @kbase_device_csg_slot_index: The index of the slot in the scheduler whose CSG is being halted
+ * @kbase_device_csg_slot_suspending: Whether the csg is being suspended
 */
 #if MALI_USE_CSF
-#define KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG(	\
+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG(	\
+	kbdev,	\
+	kbase_device_id,	\
+	kbase_device_csg_slot_index,	\
+	kbase_device_csg_slot_suspending	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_flags);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
+			__kbase_tlstream_tl_kbase_device_halting_csg(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				kbase_device_id,	\
+				kbase_device_csg_slot_index,	\
+				kbase_device_csg_slot_suspending	\
+				);	\
+	} while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG(	\
+	kbdev,	\
+	kbase_device_id,	\
+	kbase_device_csg_slot_index,	\
+	kbase_device_csg_slot_suspending	\
+	)	\
+	do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
+ * KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG - CSG is suspended
+ *
+ * @kbdev: Kbase device
+ * @kbase_device_id: The ID of the physical hardware
+ * @kbase_device_csg_slot_index: The index of the slot in the scheduler whose CSG is being suspended
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG(	\
 	kbdev,	\
 	kbase_device_id,	\
 	kbase_device_csg_slot_index	\
@@ -2108,14 +2192,45 @@ struct kbase_tlstream;
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
-			__kbase_tlstream_tl_kbase_device_halt_csg(	\
+			__kbase_tlstream_tl_kbase_device_suspend_csg(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
 				kbase_device_id,	\
 				kbase_device_csg_slot_index	\
 				);	\
 	} while (0)
 #else
-#define KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG(	\
+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG(	\
+	kbdev,	\
+	kbase_device_id,	\
+	kbase_device_csg_slot_index	\
+	)	\
+	do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
+ * KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE - KBase device is notified that CSG is idle.
+ *
+ * @kbdev: Kbase device
+ * @kbase_device_id: The ID of the physical hardware
+ * @kbase_device_csg_slot_index: The index of the slot in the scheduler whose CSG for which we are receiving an idle notification
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE(	\
+	kbdev,	\
+	kbase_device_id,	\
+	kbase_device_csg_slot_index	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_flags);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
+			__kbase_tlstream_tl_kbase_device_csg_idle(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				kbase_device_id,	\
+				kbase_device_csg_slot_index	\
+				);	\
+	} while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE(	\
 	kbdev,	\
 	kbase_device_id,	\
 	kbase_device_csg_slot_index	\
@@ -2373,16 +2488,16 @@ struct kbase_tlstream;
 * @kbdev: Kbase device
 * @kcpu_queue: KCPU queue
 * @cqs_obj_gpu_addr: CQS Object GPU pointer
- * @cqs_obj_compare_value: Semaphore value that should be exceeded for the WAIT to pass
- * @cqs_obj_inherit_error: Flag which indicates if the CQS object error state should be inherited by the queue
+ * @compare_value: Semaphore value that should be exceeded for the WAIT to pass
+ * @inherit_error: Flag which indicates if the CQS object error state should be inherited by the queue
 */
 #if MALI_USE_CSF
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT(	\
 	kbdev,	\
 	kcpu_queue,	\
 	cqs_obj_gpu_addr,	\
-	cqs_obj_compare_value,	\
-	cqs_obj_inherit_error	\
+	compare_value,	\
+	inherit_error	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
@@ -2391,8 +2506,8 @@ struct kbase_tlstream;
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
 				kcpu_queue,	\
 				cqs_obj_gpu_addr,	\
-				cqs_obj_compare_value,	\
-				cqs_obj_inherit_error	\
+				compare_value,	\
+				inherit_error	\
 				);	\
 	} while (0)
 #else
@@ -2400,8 +2515,8 @@ struct kbase_tlstream;
 	kbdev,	\
 	kcpu_queue,	\
 	cqs_obj_gpu_addr,	\
-	cqs_obj_compare_value,	\
-	cqs_obj_inherit_error	\
+	compare_value,	\
+	inherit_error	\
 	)	\
 	do { } while (0)
 #endif /* MALI_USE_CSF */
@@ -2437,6 +2552,96 @@ struct kbase_tlstream;
 	do { } while (0)
 #endif /* MALI_USE_CSF */

+/**
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION - KCPU Queue enqueues Wait Operation on Cross Queue Sync Object
+ *
+ * @kbdev: Kbase device
+ * @kcpu_queue: KCPU queue
+ * @cqs_obj_gpu_addr: CQS Object GPU pointer
+ * @compare_value: Value that should be compared to semaphore value for the WAIT to pass
+ * @condition: Condition for unblocking WAITs on Timeline Cross Queue Sync Object (e.g. greater than, less or equal)
+ * @data_type: Data type of a CQS Object's value
+ * @inherit_error: Flag which indicates if the CQS object error state should be inherited by the queue
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION(	\
+	kbdev,	\
+	kcpu_queue,	\
+	cqs_obj_gpu_addr,	\
+	compare_value,	\
+	condition,	\
+	data_type,	\
+	inherit_error	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_flags);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
+			__kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait_operation(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				kcpu_queue,	\
+				cqs_obj_gpu_addr,	\
+				compare_value,	\
+				condition,	\
+				data_type,	\
+				inherit_error	\
+				);	\
+	} while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION(	\
+	kbdev,	\
+	kcpu_queue,	\
+	cqs_obj_gpu_addr,	\
+	compare_value,	\
+	condition,	\
+	data_type,	\
+	inherit_error	\
+	)	\
+	do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION - KCPU Queue enqueues Set Operation on Cross Queue Sync Object
+ *
+ * @kbdev: Kbase device
+ * @kcpu_queue: KCPU queue
+ * @cqs_obj_gpu_addr: CQS Object GPU pointer
+ * @value: Value that will be set or added to semaphore
+ * @operation: Operation type performed on semaphore value (SET or ADD)
+ * @data_type: Data type of a CQS Object's value
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION(	\
+	kbdev,	\
+	kcpu_queue,	\
+	cqs_obj_gpu_addr,	\
+	value,	\
+	operation,	\
+	data_type	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_flags);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
+			__kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set_operation(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				kcpu_queue,	\
+				cqs_obj_gpu_addr,	\
+				value,	\
+				operation,	\
+				data_type	\
+				);	\
+	} while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION(	\
+	kbdev,	\
+	kcpu_queue,	\
+	cqs_obj_gpu_addr,	\
+	value,	\
+	operation,	\
+	data_type	\
+	)	\
+	do { } while (0)
+#endif /* MALI_USE_CSF */
+
 /**
 * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT - KCPU Queue enqueues Map Import
 *
@@ -2999,6 +3204,95 @@ struct kbase_tlstream;
 	do { } while (0)
 #endif /* MALI_USE_CSF */

+/**
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START - KCPU Queue starts a Wait Operation on Cross Queue Sync Object
+ *
+ * @kbdev: Kbase device
+ * @kcpu_queue: KCPU queue
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_flags);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
+			__kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_start(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				kcpu_queue	\
+				);	\
+	} while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END - KCPU Queue ends a Wait Operation on Cross Queue Sync Object
+ *
+ * @kbdev: Kbase device
+ * @kcpu_queue: KCPU queue
+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END(	\
+	kbdev,	\
+	kcpu_queue,	\
+	execute_error	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_flags);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
+			__kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				kcpu_queue,	\
+				execute_error	\
+				);	\
+	} while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END(	\
+	kbdev,	\
+	kcpu_queue,	\
+	execute_error	\
+	)	\
+	do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION - KCPU Queue executes a Set Operation on Cross Queue Sync Object
+ *
+ * @kbdev: Kbase device
+ * @kcpu_queue: KCPU queue
+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION(	\
+	kbdev,	\
+	kcpu_queue,	\
+	execute_error	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_flags);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
+			__kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set_operation(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				kcpu_queue,	\
+				execute_error	\
+				);	\
+	} while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION(	\
+	kbdev,	\
+	kcpu_queue,	\
+	execute_error	\
+	)	\
+	do { } while (0)
+#endif /* MALI_USE_CSF */
+
 /**
 * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START - KCPU Queue starts a Map Import
 *
--- a/include/linux/version_compat_defs.h
+++ b/include/linux/version_compat_defs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -48,4 +48,32 @@ typedef unsigned int __poll_t;

 #endif

+#if KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE
+/* This is defined inside kbase for matching the default to kernel's
+ * mmap_min_addr, used inside file mali_kbase_mmap.c.
+ * Note: the value is set at compile time, matching a kernel's configuration
+ * value. It would not be able to track any runtime update of mmap_min_addr.
+ */
+#ifdef CONFIG_MMU
+#define kbase_mmap_min_addr CONFIG_DEFAULT_MMAP_MIN_ADDR
+
+#ifdef CONFIG_LSM_MMAP_MIN_ADDR
+#if (CONFIG_LSM_MMAP_MIN_ADDR > CONFIG_DEFAULT_MMAP_MIN_ADDR)
+/* Replace the default definition with CONFIG_LSM_MMAP_MIN_ADDR */
+#undef kbase_mmap_min_addr
+#define kbase_mmap_min_addr CONFIG_LSM_MMAP_MIN_ADDR
+#pragma message "kbase_mmap_min_addr compiled to CONFIG_LSM_MMAP_MIN_ADDR, no runtime update!"
+#endif /* (CONFIG_LSM_MMAP_MIN_ADDR > CONFIG_DEFAULT_MMAP_MIN_ADDR) */
+#endif /* CONFIG_LSM_MMAP_MIN_ADDR */
+
+#if (kbase_mmap_min_addr == CONFIG_DEFAULT_MMAP_MIN_ADDR)
+#pragma message "kbase_mmap_min_addr compiled to CONFIG_DEFAULT_MMAP_MIN_ADDR, no runtime update!"
+#endif
+
+#else /* CONFIG_MMU */
+#define kbase_mmap_min_addr (0UL)
+#pragma message "kbase_mmap_min_addr compiled to (0UL), no runtime update!"
+#endif /* CONFIG_MMU */
+#endif /* KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE */
+
 #endif /* _VERSION_COMPAT_DEFS_H_ */
--- a/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h
+++ b/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h
@@ -27,12 +27,10 @@
 #define _UAPI_KBASE_MODEL_LINUX_H_

 /* Generic model IRQs */
-enum model_linux_irqs {
-	MODEL_LINUX_JOB_IRQ,
-	MODEL_LINUX_GPU_IRQ,
-	MODEL_LINUX_MMU_IRQ,
-	MODEL_LINUX_NONE_IRQ,
-	MODEL_LINUX_NUM_TYPE_IRQ
-};
+#define MODEL_LINUX_JOB_IRQ (0x1 << 0)
+#define MODEL_LINUX_GPU_IRQ (0x1 << 1)
+#define MODEL_LINUX_MMU_IRQ (0x1 << 2)
+
+#define MODEL_LINUX_IRQ_MASK (MODEL_LINUX_JOB_IRQ | MODEL_LINUX_GPU_IRQ | MODEL_LINUX_MMU_IRQ)

 #endif /* _UAPI_KBASE_MODEL_LINUX_H_ */
--- a/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h
+++ b/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h
@@ -145,6 +145,9 @@
 #define BASE_CSF_TILER_OOM_EXCEPTION_FLAG (1u << 0)
 #define BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK (BASE_CSF_TILER_OOM_EXCEPTION_FLAG)

+/* Initial value for LATEST_FLUSH register */
+#define POWER_DOWN_LATEST_FLUSH_VALUE ((uint32_t)1)
+
 /**
 * enum base_kcpu_command_type - Kernel CPU queue command type.
 * @BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL:       fence_signal,
--- a/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h
+++ b/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -79,11 +79,13 @@
 *   - prfcnt_block_metadata::block_idx gaps.
 *   - PRFCNT_CONTROL_CMD_SAMPLE_ASYNC is removed.
 * 1.18:
+ * - Relax the requirement to create a mapping with BASE_MEM_MAP_TRACKING_HANDLE
+ *   before allocating GPU memory for the context.
 * - CPU mappings of USER_BUFFER imported memory handles must be cached.
 */

 #define BASE_UK_VERSION_MAJOR 1
-#define BASE_UK_VERSION_MINOR 17
+#define BASE_UK_VERSION_MINOR 18

 /**
 * struct kbase_ioctl_version_check - Check version compatibility between
--- a/include/uapi/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_csf.h
+++ b/include/uapi/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_csf.h
@@ -27,4 +27,15 @@
 #define IPA_CONTROL_REG(r) (IPA_CONTROL_BASE + (r))
 #define STATUS 0x004 /* (RO) Status register */

+/* USER base address */
+#define USER_BASE 0x0010000
+#define USER_REG(r) (USER_BASE + (r))
+
+/* USER register offsets */
+#define LATEST_FLUSH 0x0000 /* () Flush ID of latest clean-and-invalidate operation */
+
+/* DOORBELLS base address */
+#define DOORBELLS_BASE 0x0080000
+#define DOORBELLS_REG(r) (DOORBELLS_BASE + (r))
+
 #endif /* _UAPI_KBASE_GPU_REGMAP_CSF_H_ */
--- a/include/uapi/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h
+++ b/include/uapi/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h
@@ -43,4 +43,8 @@
 #define JS_CONFIG_NEXT         0x58	/* (RW) Next configuration settings for job slot n */
 #define JS_COMMAND_NEXT        0x60	/* (RW) Next command register for job slot n */

+#define JOB_SLOT0               0x800   /* Configuration registers for job slot 0 */
+
+#define JOB_SLOT_REG(n, r)      (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
+
 #endif /* _UAPI_KBASE_GPU_REGMAP_JM_H_ */
--- a/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h
+++ b/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h
@@ -36,6 +36,9 @@

 #define GPU_ID                  0x000   /* (RO) GPU and revision identifier */

+#define GPU_IRQ_CLEAR           0x024   /* (WO) */
+#define GPU_IRQ_STATUS          0x02C   /* (RO) */
+
 #define SHADER_READY_LO         0x140   /* (RO) Shader core ready bitmap, low word */
 #define SHADER_READY_HI         0x144   /* (RO) Shader core ready bitmap, high word */

@@ -62,6 +65,7 @@

 #define JOB_IRQ_CLEAR           0x004   /* Interrupt clear register */
 #define JOB_IRQ_MASK            0x008   /* Interrupt mask register */
+#define JOB_IRQ_STATUS          0x00C   /* Interrupt status register */

 /* MMU control registers */

@@ -70,6 +74,9 @@
 #define MMU_REG(r)              (MEMORY_MANAGEMENT_BASE + (r))

 #define MMU_IRQ_RAWSTAT         0x000   /* (RW) Raw interrupt status register */
+#define MMU_IRQ_CLEAR           0x004   /* (WO) Interrupt clear register */
+#define MMU_IRQ_MASK            0x008   /* (RW) Interrupt mask register */
+#define MMU_IRQ_STATUS          0x00C   /* (RO) Interrupt status register */

 #define MMU_AS0                 0x400   /* Configuration registers for address space 0 */

--- a/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h
+++ b/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -140,10 +140,12 @@
 *   - prfcnt_block_metadata::block_idx gaps.
 *   - PRFCNT_CONTROL_CMD_SAMPLE_ASYNC is removed.
 * 11.38:
+ * - Relax the requirement to create a mapping with BASE_MEM_MAP_TRACKING_HANDLE
+ *   before allocating GPU memory for the context.
 * - CPU mappings of USER_BUFFER imported memory handles must be cached.
 */
 #define BASE_UK_VERSION_MAJOR 11
-#define BASE_UK_VERSION_MINOR 37
+#define BASE_UK_VERSION_MINOR 38

 /**
 * struct kbase_ioctl_version_check - Check version compatibility between