MALI: rockchip: upgrade midgard DDK to r9p0-05rel0

Conflicts: drivers/gpu/arm/midgard/Kconfig Change-Id: Ib7975ebe959624bedd92f126768987f2e2f0f84b Signed-off-by: chenzhen <chenzhen@rock-chips.com>
2026-04-10 15:08:06 +09:00 · 2016-02-19 16:58:15 +08:00
parent c9a873e3ed
commit 206f372ede
34 changed files with 494 additions and 271 deletions
--- a/drivers/gpu/arm/midgard/Kbuild
+++ b/drivers/gpu/arm/midgard/Kbuild
@@ -15,7 +15,7 @@


 # Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= "r8p0-02rel0"
+MALI_RELEASE_NAME ?= "r9p0-05rel0"

 # Paths required for build
 KBASE_PATH = $(src)
--- a/drivers/gpu/arm/midgard/Kconfig
+++ b/drivers/gpu/arm/midgard/Kconfig
@@ -63,14 +63,6 @@ config MALI_MIDGARD_DVFS
 	help
 	  Choose this option to enable legacy DVFS in the Mali Midgard DDK.

-config MALI_MIDGARD_RT_PM
-	bool "Enable Runtime power management"
-	depends on MALI_MIDGARD
-	depends on PM
-	default y
-	help
-	  Choose this option to enable runtime power management in the Mali Midgard DDK.
-
 config MALI_MIDGARD_ENABLE_TRACE
 	bool "Enable kbase tracing"
 	depends on MALI_MIDGARD
@@ -79,13 +71,6 @@ config MALI_MIDGARD_ENABLE_TRACE
 	  Enables tracing in kbase.  Trace log available through
 	  the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled

-config MALI_MIDGARD_DEBUG_SYS
-	bool "Enable sysfs for the Mali Midgard DDK "
-	depends on MALI_MIDGARD && SYSFS
-	default n
-	help
-	  Enables sysfs for the Mali Midgard DDK. Set/Monitor the Mali Midgard DDK
-
 config MALI_DEVFREQ
 	bool "devfreq support for Mali"
 	depends on MALI_MIDGARD && PM_DEVFREQ
@@ -109,7 +94,7 @@ menuconfig MALI_EXPERT

 config MALI_DEBUG_SHADER_SPLIT_FS
 	bool "Allow mapping of shader cores via sysfs"
-	depends on MALI_MIDGARD && MALI_MIDGARD_DEBUG_SYS && MALI_EXPERT
+	depends on MALI_MIDGARD && MALI_EXPERT
 	default n
 	help
 	  Select this option to provide a sysfs entry for runtime configuration of shader
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
@@ -63,6 +63,9 @@ struct slot_rb {
 * @scheduling_timer:		The timer tick used for rescheduling jobs
 * @timer_running:		Is the timer running? The runpool_mutex must be
 *				held whilst modifying this.
+ * @suspend_timer:              Is the timer suspended? Set when a suspend
+ *                              occurs and cleared on resume. The runpool_mutex
+ *                              must be held whilst modifying this.
 * @reset_gpu:			Set to a KBASE_RESET_xxx value (see comments)
 * @reset_workq:		Work queue for performing the reset
 * @reset_work:			Work item for performing the reset
@@ -80,6 +83,7 @@ struct kbase_backend_data {
 	struct hrtimer scheduling_timer;

 	bool timer_running;
+	bool suspend_timer;

 	atomic_t reset_gpu;

--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
@@ -144,14 +144,14 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
 			"ctx_nr,atom_nr");
 #endif
 #ifdef CONFIG_GPU_TRACEPOINTS
-	if (kbase_backend_nr_atoms_submitted(kbdev, js) == 1) {
+	if (!kbase_backend_nr_atoms_submitted(kbdev, js)) {
 		/* If this is the only job on the slot, trace it as starting */
 		char js_string[16];

 		trace_gpu_sched_switch(
 				kbasep_make_job_slot_string(js, js_string),
 				ktime_to_ns(katom->start_timestamp),
-				(u32)katom->kctx, 0, katom->work_id);
+				(u32)katom->kctx->id, 0, katom->work_id);
 		kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx;
 	}
 #endif
@@ -413,6 +413,8 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)

 			failed = done >> 16;
 			finished = (done & 0xFFFF) | failed;
+			if (done)
+				end_timestamp = ktime_get();
 		} while (finished & (1 << i));

 		kbasep_job_slot_update_head_start_timestamp(kbdev, i,
@@ -819,6 +821,14 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
 	mutex_unlock(&kctx->jctx.lock);
 }

+u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev)
+{
+	u32 flush_id = 0;
+
+
+	return flush_id;
+}
+
 int kbase_job_slot_init(struct kbase_device *kbdev)
 {
 #if KBASE_GPU_RESET_EN
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
@@ -1060,7 +1060,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
 			trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
 								js_string),
 						ktime_to_ns(*end_timestamp),
-						(u32)next_katom->kctx, 0,
+						(u32)next_katom->kctx->id, 0,
 						next_katom->work_id);
 			kbdev->hwaccess.backend.slot_rb[js].last_context =
 							next_katom->kctx;
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
@@ -38,10 +38,15 @@
 */
 static inline bool timer_callback_should_run(struct kbase_device *kbdev)
 {
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
 	s8 nr_running_ctxs;

 	lockdep_assert_held(&kbdev->js_data.runpool_mutex);

+	/* Timer must stop if we are suspending */
+	if (backend->suspend_timer)
+		return false;
+
 	/* nr_contexts_pullable is updated with the runpool_mutex. However, the
 	 * locking in the caller gives us a barrier that ensures
 	 * nr_contexts_pullable is up-to-date for reading */
@@ -270,7 +275,6 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
 		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 		backend->timer_running = false;
 		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
-
 		/* From now on, return value of timer_callback_should_run() will
 		 * also cause the timer to not requeue itself. Its return value
 		 * cannot change, because it depends on variables updated with
@@ -284,7 +288,6 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
 		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 		backend->timer_running = true;
 		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
-
 		hrtimer_start(&backend->scheduling_timer,
 			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
 							HRTIMER_MODE_REL);
@@ -314,3 +317,21 @@ void kbase_backend_timer_term(struct kbase_device *kbdev)
 	hrtimer_cancel(&backend->scheduling_timer);
 }

+void kbase_backend_timer_suspend(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->suspend_timer = true;
+
+	kbase_backend_ctx_count_changed(kbdev);
+}
+
+void kbase_backend_timer_resume(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->suspend_timer = false;
+
+	kbase_backend_ctx_count_changed(kbdev);
+}
+
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h
@@ -41,4 +41,29 @@ int kbase_backend_timer_init(struct kbase_device *kbdev);
 */
 void kbase_backend_timer_term(struct kbase_device *kbdev);

+/**
+ * kbase_backend_timer_suspend - Suspend is happening, stop the JS scheduling
+ *                               timer
+ * @kbdev: Device pointer
+ *
+ * This function should be called on suspend, after the active count has reached
+ * zero. This is required as the timer may have been started on job submission
+ * to the job scheduler, but before jobs are submitted to the GPU.
+ *
+ * Caller must hold runpool_mutex.
+ */
+void kbase_backend_timer_suspend(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_resume - Resume is happening, re-evaluate the JS
+ *                              scheduling timer
+ * @kbdev: Device pointer
+ *
+ * This function should be called on resume. Note that is is not guaranteed to
+ * re-start the timer, only evalute whether it should be re-started.
+ *
+ * Caller must hold runpool_mutex.
+ */
+void kbase_backend_timer_resume(struct kbase_device *kbdev);
+
 #endif /* _KBASE_JS_BACKEND_H_ */
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
@@ -29,6 +29,7 @@

 #include <mali_kbase_pm.h>
 #include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>

 void kbase_pm_register_access_enable(struct kbase_device *kbdev)
@@ -363,6 +364,8 @@ void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
 		WARN_ON(!kbase_pm_do_poweroff(kbdev, false));
 	}

+	kbase_backend_timer_suspend(kbdev);
+
 	mutex_unlock(&kbdev->pm.lock);
 	mutex_unlock(&js_devdata->runpool_mutex);
 }
@@ -373,8 +376,12 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)

 	mutex_lock(&js_devdata->runpool_mutex);
 	mutex_lock(&kbdev->pm.lock);
+
 	kbdev->pm.suspending = false;
 	kbase_pm_do_poweron(kbdev, true);
+
+	kbase_backend_timer_resume(kbdev);
+
 	mutex_unlock(&kbdev->pm.lock);
 	mutex_unlock(&js_devdata->runpool_mutex);
 }
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
@@ -1066,6 +1066,13 @@ static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
 	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10797))
 		kbdev->hw_quirks_sc |= SC_ENABLE_TEXGRD_FLAGS;

+	if (!kbase_hw_has_issue(kbdev, GPUCORE_1619)) {
+		if (prod_id < 0x760 || prod_id == 0x6956) /* T60x, T62x, T72x */
+			kbdev->hw_quirks_sc |= SC_LS_ATTR_CHECK_DISABLE;
+		else if (prod_id >= 0x760 && prod_id <= 0x880) /* T76x, T8xx */
+			kbdev->hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES;
+	}
+
 	kbdev->hw_quirks_tiler = kbase_reg_read(kbdev,
 			GPU_CONTROL_REG(TILER_CONFIG), NULL);

@@ -1230,10 +1237,10 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
 							RESET_COMPLETED) {
 		/* The interrupt is set in the RAWSTAT; this suggests that the
 		 * interrupts are not getting to the CPU */
-		dev_warn(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n");
+		dev_err(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n");
 		/* If interrupts aren't working we can't continue. */
 		destroy_hrtimer_on_stack(&rtdata.timer);
-		goto out;
+		return -EINVAL;
 	}

 	/* The GPU doesn't seem to be responding to the reset so try a hard
--- a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
+++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
@@ -57,7 +57,6 @@ enum base_hw_issue {
 	BASE_HW_ISSUE_8986,
 	BASE_HW_ISSUE_8987,
 	BASE_HW_ISSUE_9010,
-	BASE_HW_ISSUE_9275,
 	BASE_HW_ISSUE_9418,
 	BASE_HW_ISSUE_9423,
 	BASE_HW_ISSUE_9435,
@@ -102,6 +101,7 @@ enum base_hw_issue {
 	BASE_HW_ISSUE_T76X_3953,
 	BASE_HW_ISSUE_T76X_3960,
 	BASE_HW_ISSUE_T76X_3966,
+	GPUCORE_1619,
 	BASE_HW_ISSUE_END
 };

@@ -141,7 +141,6 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = {
 	BASE_HW_ISSUE_8986,
 	BASE_HW_ISSUE_8987,
 	BASE_HW_ISSUE_9010,
-	BASE_HW_ISSUE_9275,
 	BASE_HW_ISSUE_9418,
 	BASE_HW_ISSUE_9423,
 	BASE_HW_ISSUE_9435,
@@ -168,6 +167,7 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = {
 	BASE_HW_ISSUE_11035,
 	BASE_HW_ISSUE_11051,
 	BASE_HW_ISSUE_T76X_1909,
+	GPUCORE_1619,
 	BASE_HW_ISSUE_END
 };

@@ -182,7 +182,6 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = {
 	BASE_HW_ISSUE_8778,
 	BASE_HW_ISSUE_8975,
 	BASE_HW_ISSUE_9010,
-	BASE_HW_ISSUE_9275,
 	BASE_HW_ISSUE_9418,
 	BASE_HW_ISSUE_9423,
 	BASE_HW_ISSUE_9435,
@@ -219,7 +218,6 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = {
 	BASE_HW_ISSUE_8778,
 	BASE_HW_ISSUE_8975,
 	BASE_HW_ISSUE_9010,
-	BASE_HW_ISSUE_9275,
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_9510,
 	BASE_HW_ISSUE_10410,
@@ -494,7 +492,6 @@ static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = {
 static const enum base_hw_issue base_hw_issues_model_t72x[] = {
 	BASE_HW_ISSUE_5736,
 	BASE_HW_ISSUE_6402,
-	BASE_HW_ISSUE_9275,
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_10471,
 	BASE_HW_ISSUE_10797,
@@ -502,12 +499,12 @@ static const enum base_hw_issue base_hw_issues_model_t72x[] = {
 	BASE_HW_ISSUE_11051,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
+	GPUCORE_1619,
 	BASE_HW_ISSUE_END
 };

 static const enum base_hw_issue base_hw_issues_model_t76x[] = {
 	BASE_HW_ISSUE_5736,
-	BASE_HW_ISSUE_9275,
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_11020,
 	BASE_HW_ISSUE_11024,
@@ -518,6 +515,7 @@ static const enum base_hw_issue base_hw_issues_model_t76x[] = {
 	BASE_HW_ISSUE_T76X_3086,
 	BASE_HW_ISSUE_T76X_3700,
 	BASE_HW_ISSUE_T76X_3793,
+	GPUCORE_1619,
 	BASE_HW_ISSUE_END
 };

@@ -525,7 +523,6 @@ static const enum base_hw_issue base_hw_issues_model_t60x[] = {
 	BASE_HW_ISSUE_5736,
 	BASE_HW_ISSUE_6402,
 	BASE_HW_ISSUE_8778,
-	BASE_HW_ISSUE_9275,
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_10472,
 	BASE_HW_ISSUE_10931,
@@ -535,6 +532,7 @@ static const enum base_hw_issue base_hw_issues_model_t60x[] = {
 	BASE_HW_ISSUE_11051,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
+	GPUCORE_1619,
 	BASE_HW_ISSUE_END
 };

@@ -551,6 +549,7 @@ static const enum base_hw_issue base_hw_issues_model_t62x[] = {
 	BASE_HW_ISSUE_11051,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
+	GPUCORE_1619,
 	BASE_HW_ISSUE_END
 };

@@ -619,13 +618,13 @@ static const enum base_hw_issue base_hw_issues_tFRx_r2p0[] = {

 static const enum base_hw_issue base_hw_issues_model_tFRx[] = {
 	BASE_HW_ISSUE_5736,
-	BASE_HW_ISSUE_9275,
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_11051,
 	BASE_HW_ISSUE_T76X_1963,
 	BASE_HW_ISSUE_T76X_3086,
 	BASE_HW_ISSUE_T76X_3700,
 	BASE_HW_ISSUE_T76X_3793,
+	GPUCORE_1619,
 	BASE_HW_ISSUE_END
 };

@@ -677,13 +676,13 @@ static const enum base_hw_issue base_hw_issues_t86x_r2p0[] = {

 static const enum base_hw_issue base_hw_issues_model_t86x[] = {
 	BASE_HW_ISSUE_5736,
-	BASE_HW_ISSUE_9275,
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_11051,
 	BASE_HW_ISSUE_T76X_1963,
 	BASE_HW_ISSUE_T76X_3086,
 	BASE_HW_ISSUE_T76X_3700,
 	BASE_HW_ISSUE_T76X_3793,
+	GPUCORE_1619,
 	BASE_HW_ISSUE_END
 };

@@ -720,7 +719,6 @@ static const enum base_hw_issue base_hw_issues_t83x_r1p0[] = {

 static const enum base_hw_issue base_hw_issues_model_t83x[] = {
 	BASE_HW_ISSUE_5736,
-	BASE_HW_ISSUE_9275,
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_11051,
 	BASE_HW_ISSUE_T76X_1909,
@@ -728,6 +726,7 @@ static const enum base_hw_issue base_hw_issues_model_t83x[] = {
 	BASE_HW_ISSUE_T76X_3086,
 	BASE_HW_ISSUE_T76X_3700,
 	BASE_HW_ISSUE_T76X_3793,
+	GPUCORE_1619,
 	BASE_HW_ISSUE_END
 };

@@ -780,7 +779,6 @@ static const enum base_hw_issue base_hw_issues_t82x_r1p0[] = {

 static const enum base_hw_issue base_hw_issues_model_t82x[] = {
 	BASE_HW_ISSUE_5736,
-	BASE_HW_ISSUE_9275,
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_11051,
 	BASE_HW_ISSUE_T76X_1909,
@@ -788,6 +786,7 @@ static const enum base_hw_issue base_hw_issues_model_t82x[] = {
 	BASE_HW_ISSUE_T76X_3086,
 	BASE_HW_ISSUE_T76X_3700,
 	BASE_HW_ISSUE_T76X_3793,
+	GPUCORE_1619,
 	BASE_HW_ISSUE_END
 };

--- a/drivers/gpu/arm/midgard/mali_base_kernel.h
+++ b/drivers/gpu/arm/midgard/mali_base_kernel.h
@@ -772,11 +772,12 @@ static inline void base_jd_fence_wait_setup_v2(struct base_jd_atom_v2 *atom, str
 /**
 * @brief External resource info initialization.
 *
- * Sets up a external resource object to reference
+ * Sets up an external resource object to reference
 * a memory allocation and the type of access requested.
 *
 * @param[in] res     The resource object to initialize
- * @param     handle  The handle to the imported memory object
+ * @param     handle  The handle to the imported memory object, must be
+ *                    obtained by calling @ref base_mem_as_import_handle().
 * @param     access  The type of access requested
 */
 static inline void base_external_resource_init(struct base_external_resource *res, struct base_import_handle handle, base_external_resource_access access)
--- a/drivers/gpu/arm/midgard/mali_kbase.h
+++ b/drivers/gpu/arm/midgard/mali_kbase.h
@@ -196,7 +196,8 @@ bool kbase_replay_process(struct kbase_jd_atom *katom);

 /* api used internally for register access. Contains validation and tracing */
 void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value);
-void kbase_device_trace_buffer_install(struct kbase_context *kctx, u32 *tb, size_t size);
+int kbase_device_trace_buffer_install(
+		struct kbase_context *kctx, u32 *tb, size_t size);
 void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx);

 /* api to be ported per OS, only need to do the raw register access */
--- a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
@@ -66,7 +66,9 @@
 #include <linux/io.h>
 #include <linux/mm.h>
 #include <linux/compat.h>	/* is_compat_task */
+#include <linux/mman.h>
 #include <linux/version.h>
+#include <linux/security.h>
 #ifdef CONFIG_MALI_PLATFORM_DEVICETREE
 #include <linux/pm_runtime.h>
 #endif /* CONFIG_MALI_PLATFORM_DEVICETREE */
@@ -1483,99 +1485,154 @@ static int kbase_check_flags(int flags)
 	return 0;
 }

+#ifdef CONFIG_64BIT
+/* The following function is taken from the kernel and just
+ * renamed. As it's not exported to modules we must copy-paste it here.
+ */
+
+static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info
+		*info)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long length, low_limit, high_limit, gap_start, gap_end;
+
+	/* Adjust search length to account for worst case alignment overhead */
+	length = info->length + info->align_mask;
+	if (length < info->length)
+		return -ENOMEM;
+
+	/*
+	 * Adjust search limits by the desired length.
+	 * See implementation comment at top of unmapped_area().
+	 */
+	gap_end = info->high_limit;
+	if (gap_end < length)
+		return -ENOMEM;
+	high_limit = gap_end - length;
+
+	if (info->low_limit > high_limit)
+		return -ENOMEM;
+	low_limit = info->low_limit + length;
+
+	/* Check highest gap, which does not precede any rbtree node */
+	gap_start = mm->highest_vm_end;
+	if (gap_start <= high_limit)
+		goto found_highest;
+
+	/* Check if rbtree root looks promising */
+	if (RB_EMPTY_ROOT(&mm->mm_rb))
+		return -ENOMEM;
+	vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
+	if (vma->rb_subtree_gap < length)
+		return -ENOMEM;
+
+	while (true) {
+		/* Visit right subtree if it looks promising */
+		gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+		if (gap_start <= high_limit && vma->vm_rb.rb_right) {
+			struct vm_area_struct *right =
+				rb_entry(vma->vm_rb.rb_right,
+					 struct vm_area_struct, vm_rb);
+			if (right->rb_subtree_gap >= length) {
+				vma = right;
+				continue;
+			}
+		}
+
+check_current:
+		/* Check if current node has a suitable gap */
+		gap_end = vma->vm_start;
+		if (gap_end < low_limit)
+			return -ENOMEM;
+		if (gap_start <= high_limit && gap_end - gap_start >= length)
+			goto found;
+
+		/* Visit left subtree if it looks promising */
+		if (vma->vm_rb.rb_left) {
+			struct vm_area_struct *left =
+				rb_entry(vma->vm_rb.rb_left,
+					 struct vm_area_struct, vm_rb);
+			if (left->rb_subtree_gap >= length) {
+				vma = left;
+				continue;
+			}
+		}
+
+		/* Go back up the rbtree to find next candidate node */
+		while (true) {
+			struct rb_node *prev = &vma->vm_rb;
+			if (!rb_parent(prev))
+				return -ENOMEM;
+			vma = rb_entry(rb_parent(prev),
+				       struct vm_area_struct, vm_rb);
+			if (prev == vma->vm_rb.rb_right) {
+				gap_start = vma->vm_prev ?
+					vma->vm_prev->vm_end : 0;
+				goto check_current;
+			}
+		}
+	}
+
+found:
+	/* We found a suitable gap. Clip it with the original high_limit. */
+	if (gap_end > info->high_limit)
+		gap_end = info->high_limit;
+
+found_highest:
+	/* Compute highest gap address at the desired alignment */
+	gap_end -= info->length;
+	gap_end -= (gap_end - info->align_offset) & info->align_mask;
+
+	VM_BUG_ON(gap_end < info->low_limit);
+	VM_BUG_ON(gap_end < gap_start);
+	return gap_end;
+}
+
+
 static unsigned long kbase_get_unmapped_area(struct file *filp,
 		const unsigned long addr, const unsigned long len,
 		const unsigned long pgoff, const unsigned long flags)
 {
-#ifdef CONFIG_64BIT
 	/* based on get_unmapped_area, but simplified slightly due to that some
 	 * values are known in advance */
 	struct kbase_context *kctx = filp->private_data;
+	struct mm_struct *mm = current->mm;
+	struct vm_unmapped_area_info info;

-	if (!kctx->is_compat && !addr &&
-		kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) {
-		struct mm_struct *mm = current->mm;
-		struct vm_area_struct *vma;
-		unsigned long low_limit, high_limit, gap_start, gap_end;
+	/* err on fixed address */
+	if ((flags & MAP_FIXED) || addr)
+		return -EINVAL;

-		/* Hardware has smaller VA than userspace, ensure the page
-		 * comes from a VA which can be used on the GPU */
+	/* too big? */
+	if (len > TASK_SIZE - SZ_2M)
+		return -ENOMEM;

-		gap_end = (1UL<<33);
-		if (gap_end < len)
-			return -ENOMEM;
-		high_limit = gap_end - len;
-		low_limit = PAGE_SIZE + len;
+	if (kctx->is_compat)
+		return current->mm->get_unmapped_area(filp, addr, len, pgoff,
+				flags);

-		gap_start = mm->highest_vm_end;
-		if (gap_start <= high_limit)
-			goto found_highest;
-
-		if (RB_EMPTY_ROOT(&mm->mm_rb))
-			return -ENOMEM;
-		vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
-		if (vma->rb_subtree_gap < len)
-			return -ENOMEM;
-
-		while (true) {
-			gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
-			if (gap_start <= high_limit && vma->vm_rb.rb_right) {
-				struct vm_area_struct *right =
-					rb_entry(vma->vm_rb.rb_right,
-						 struct vm_area_struct, vm_rb);
-				if (right->rb_subtree_gap >= len) {
-					vma = right;
-					continue;
-				}
-			}
-check_current:
-			gap_end = vma->vm_start;
-			if (gap_end < low_limit)
-				return -ENOMEM;
-			if (gap_start <= high_limit &&
-			    gap_end - gap_start >= len)
-				goto found;
-
-			if (vma->vm_rb.rb_left) {
-				struct vm_area_struct *left =
-					rb_entry(vma->vm_rb.rb_left,
-						 struct vm_area_struct, vm_rb);
-
-				if (left->rb_subtree_gap >= len) {
-					vma = left;
-					continue;
-				}
-			}
-			while (true) {
-				struct rb_node *prev = &vma->vm_rb;
-
-				if (!rb_parent(prev))
-					return -ENOMEM;
-				vma = rb_entry(rb_parent(prev),
-						struct vm_area_struct, vm_rb);
-				if (prev == vma->vm_rb.rb_right) {
-					gap_start = vma->vm_prev ?
-						vma->vm_prev->vm_end : 0;
-					goto check_current;
-				}
-			}
+	if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) {
+		info.high_limit = 1ul << 33;
+		info.align_mask = 0;
+		info.align_offset = 0;
+	} else {
+		info.high_limit = mm->mmap_base;
+		if (len >= SZ_2M) {
+			info.align_offset = SZ_2M;
+			info.align_mask = SZ_2M - 1;
+		} else {
+			info.align_mask = 0;
+			info.align_offset = 0;
 		}
-
-found:
-		if (gap_end > (1UL<<33))
-			gap_end = (1UL<<33);
-
-found_highest:
-		gap_end -= len;
-
-		VM_BUG_ON(gap_end < PAGE_SIZE);
-		VM_BUG_ON(gap_end < gap_start);
-		return gap_end;
 	}
-#endif
-	/* No special requirements - fallback to the default version */
-	return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
+
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = SZ_2M;
+	return kbase_unmapped_area_topdown(&info);
 }
+#endif

 static const struct file_operations kbase_fops = {
 	.owner = THIS_MODULE,
@@ -1587,7 +1644,9 @@ static const struct file_operations kbase_fops = {
 	.compat_ioctl = kbase_ioctl,
 	.mmap = kbase_mmap,
 	.check_flags = kbase_check_flags,
+#ifdef CONFIG_64BIT
 	.get_unmapped_area = kbase_get_unmapped_area,
+#endif
 };

 #ifndef CONFIG_MALI_NO_MALI
@@ -3876,7 +3935,7 @@ static int kbase_device_runtime_suspend(struct device *dev)
 */

 #ifdef KBASE_PM_RUNTIME
-int kbase_device_runtime_resume(struct device *dev)
+static int kbase_device_runtime_resume(struct device *dev)
 {
 	int ret = 0;
 	struct kbase_device *kbdev = to_kbase_device(dev);
--- a/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c
@@ -433,12 +433,12 @@ void kbase_debug_job_fault_context_init(struct kbase_context *kctx)
 	/* We need allocate double size register range
 	 * Because this memory will keep the register address and value
 	 */
-	kctx->reg_dump = kmalloc(0x4000 * 2, GFP_KERNEL);
+	kctx->reg_dump = vmalloc(0x4000 * 2);
 	if (kctx->reg_dump == NULL)
 		return;

 	if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == false) {
-		kfree(kctx->reg_dump);
+		vfree(kctx->reg_dump);
 		kctx->reg_dump = NULL;
 	}
 	INIT_LIST_HEAD(&kctx->job_fault_resume_event_list);
@@ -451,7 +451,7 @@ void kbase_debug_job_fault_context_init(struct kbase_context *kctx)
 */
 void kbase_debug_job_fault_context_term(struct kbase_context *kctx)
 {
-	kfree(kctx->reg_dump);
+	vfree(kctx->reg_dump);
 }

 #else /* CONFIG_DEBUG_FS */
--- a/drivers/gpu/arm/midgard/mali_kbase_defs.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_defs.h
@@ -144,6 +144,8 @@
 #define MIDGARD_MMU_TOPLEVEL    1
 #endif

+#define MIDGARD_MMU_BOTTOMLEVEL 3
+
 #define GROWABLE_FLAGS_REQUIRED (KBASE_REG_PF_GROW | KBASE_REG_GPU_WR)

 /** setting in kbase_context::as_nr that indicates it's invalid */
@@ -386,6 +388,9 @@ struct kbase_jd_atom {
 	/* Pointer to atom that has cross-slot dependency on this atom */
 	struct kbase_jd_atom *x_post_dep;

+	/* The GPU's flush count recorded at the time of submission, used for
+	 * the cache flush optimisation */
+	u32 flush_id;

 	struct kbase_jd_atom_backend backend;
 #ifdef CONFIG_DEBUG_FS
@@ -934,10 +939,6 @@ struct kbase_device {
 	struct list_head        kctx_list;
 	struct mutex            kctx_list_lock;

-#ifdef CONFIG_MALI_MIDGARD_RT_PM
-	struct delayed_work runtime_pm_workqueue;
-#endif
-
 #ifdef CONFIG_PM_DEVFREQ
 	struct devfreq_dev_profile devfreq_profile;
 	struct devfreq *devfreq;
@@ -1216,6 +1217,9 @@ struct kbase_context {
 	struct list_head completed_jobs;
 	/* Number of work items currently pending on job_done_wq */
 	atomic_t work_count;
+
+	/* true if context is counted in kbdev->js_data.nr_contexts_runnable */
+	bool ctx_runnable_ref;
 };

 enum kbase_reg_access_type {
--- a/drivers/gpu/arm/midgard/mali_kbase_device.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_device.c
@@ -257,13 +257,20 @@ void kbase_device_free(struct kbase_device *kbdev)
 	kfree(kbdev);
 }

-void kbase_device_trace_buffer_install(struct kbase_context *kctx, u32 *tb, size_t size)
+int kbase_device_trace_buffer_install(
+		struct kbase_context *kctx, u32 *tb, size_t size)
 {
 	unsigned long flags;

 	KBASE_DEBUG_ASSERT(kctx);
 	KBASE_DEBUG_ASSERT(tb);

+	/* Interface uses 16-bit value to track last accessed entry. Each entry
+	 * is composed of two 32-bit words.
+	 * This limits the size that can be handled without an overflow. */
+	if (0xFFFF * (2 * sizeof(u32)) < size)
+		return -EINVAL;
+
 	/* set up the header */
 	/* magic number in the first 4 bytes */
 	tb[0] = TRACE_BUFFER_HEADER_SPECIAL;
@@ -278,6 +285,8 @@ void kbase_device_trace_buffer_install(struct kbase_context *kctx, u32 *tb, size
 	kctx->jctx.tb_wrap_offset = size / 8;
 	kctx->jctx.tb = tb;
 	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+
+	return 0;
 }

 void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx)
--- a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
@@ -221,7 +221,7 @@ static const char * const hardware_counters_mali_t60x[] = {
 	"T60x_LSC_DIRTY_LINE",
 	"T60x_LSC_SNOOPS",
 	"T60x_AXI_TLB_STALL",
-	"T60x_AXI_TLB_MIESS",
+	"T60x_AXI_TLB_MISS",
 	"T60x_AXI_TLB_TRANSACTION",
 	"T60x_LS_TLB_MISS",
 	"T60x_LS_TLB_HIT",
@@ -486,7 +486,7 @@ static const char * const hardware_counters_mali_t62x[] = {
 	"T62x_LSC_DIRTY_LINE",
 	"T62x_LSC_SNOOPS",
 	"T62x_AXI_TLB_STALL",
-	"T62x_AXI_TLB_MIESS",
+	"T62x_AXI_TLB_MISS",
 	"T62x_AXI_TLB_TRANSACTION",
 	"T62x_LS_TLB_MISS",
 	"T62x_LS_TLB_HIT",
@@ -1018,7 +1018,7 @@ static const char * const hardware_counters_mali_t76x[] = {
 	"T76x_LSC_DIRTY_LINE",
 	"T76x_LSC_SNOOPS",
 	"T76x_AXI_TLB_STALL",
-	"T76x_AXI_TLB_MIESS",
+	"T76x_AXI_TLB_MISS",
 	"T76x_AXI_TLB_TRANSACTION",
 	"T76x_LS_TLB_MISS",
 	"T76x_LS_TLB_HIT",
@@ -1284,7 +1284,7 @@ static const char * const hardware_counters_mali_t82x[] = {
 	"T82x_LSC_DIRTY_LINE",
 	"T82x_LSC_SNOOPS",
 	"T82x_AXI_TLB_STALL",
-	"T82x_AXI_TLB_MIESS",
+	"T82x_AXI_TLB_MISS",
 	"T82x_AXI_TLB_TRANSACTION",
 	"T82x_LS_TLB_MISS",
 	"T82x_LS_TLB_HIT",
@@ -1550,7 +1550,7 @@ static const char * const hardware_counters_mali_t83x[] = {
 	"T83x_LSC_DIRTY_LINE",
 	"T83x_LSC_SNOOPS",
 	"T83x_AXI_TLB_STALL",
-	"T83x_AXI_TLB_MIESS",
+	"T83x_AXI_TLB_MISS",
 	"T83x_AXI_TLB_TRANSACTION",
 	"T83x_LS_TLB_MISS",
 	"T83x_LS_TLB_HIT",
@@ -1816,7 +1816,7 @@ static const char * const hardware_counters_mali_t86x[] = {
 	"T86x_LSC_DIRTY_LINE",
 	"T86x_LSC_SNOOPS",
 	"T86x_AXI_TLB_STALL",
-	"T86x_AXI_TLB_MIESS",
+	"T86x_AXI_TLB_MISS",
 	"T86x_AXI_TLB_TRANSACTION",
 	"T86x_LS_TLB_MISS",
 	"T86x_LS_TLB_HIT",
@@ -2082,7 +2082,7 @@ static const char * const hardware_counters_mali_t88x[] = {
 	"T88x_LSC_DIRTY_LINE",
 	"T88x_LSC_SNOOPS",
 	"T88x_AXI_TLB_STALL",
-	"T88x_AXI_TLB_MIESS",
+	"T88x_AXI_TLB_MISS",
 	"T88x_AXI_TLB_TRANSACTION",
 	"T88x_LS_TLB_MISS",
 	"T88x_LS_TLB_HIT",
--- a/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
@@ -32,7 +32,6 @@

 static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data)
 {
-	ssize_t ret = 0;
 	struct list_head *entry;
 	const struct list_head *kbdev_list;

@@ -58,7 +57,7 @@ static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data)
 		mutex_unlock(&kbdev->kctx_list_lock);
 	}
 	kbase_dev_list_put(kbdev_list);
-	return ret;
+	return 0;
 }

 /*
--- a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
@@ -254,6 +254,15 @@ void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx);
 */
 void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx);

+/**
+ * kbase_backend_get_current_flush_id - Return the current flush ID
+ *
+ * @kbdev: Device pointer
+ *
+ * Return: the current flush ID to be recorded for each job chain
+ */
+u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev);
+
 #if KBASE_GPU_RESET_EN
 /**
 * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU.
--- a/drivers/gpu/arm/midgard/mali_kbase_jd.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_jd.c
@@ -248,11 +248,6 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
 		dma_addr_t dma_addr;
 		unsigned long min;

-		/* if page already is private, we can't store our
-		 * private data. */
-		if (PagePrivate(pages[i]))
-			goto unwind;
-
 		min = MIN(PAGE_SIZE - offset, local_size);
 		dma_addr = dma_map_page(dev, pages[i],
 				offset, min,
@@ -260,7 +255,7 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
 		if (dma_mapping_error(dev, dma_addr))
 			goto unwind;

-		kbase_set_dma_addr(pages[i], dma_addr);
+		alloc->imported.user_buf.dma_addrs[i] = dma_addr;
 		pa[i] = page_to_phys(pages[i]);

 		local_size -= min;
@@ -279,7 +274,8 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
 	/* fall down */
 unwind:
 	while (i--) {
-		dma_unmap_page(kctx->kbdev->dev, kbase_dma_addr(pages[i]),
+		dma_unmap_page(kctx->kbdev->dev,
+				alloc->imported.user_buf.dma_addrs[i],
 				PAGE_SIZE, DMA_BIDIRECTIONAL);
 		put_page(pages[i]);
 		pages[i] = NULL;
@@ -299,12 +295,11 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx,
 	pages = alloc->imported.user_buf.pages;
 	for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
 		unsigned long local_size;
-		dma_addr_t dma_addr = kbase_dma_addr(pages[i]);
+		dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];

 		local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK));
 		dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size,
 				DMA_BIDIRECTIONAL);
-		ClearPagePrivate(pages[i]);
 		if (writeable)
 			set_page_dirty_lock(pages[i]);
 		put_page(pages[i]);
@@ -1258,7 +1253,8 @@ bool jd_submit_atom(struct kbase_context *kctx,

 #ifdef CONFIG_GPU_TRACEPOINTS
 	katom->work_id = atomic_inc_return(&jctx->work_id);
-	trace_gpu_job_enqueue((u32)kctx, katom->work_id, kbasep_map_core_reqs_to_string(katom->core_req));
+	trace_gpu_job_enqueue((u32)kctx->id, katom->work_id,
+			kbasep_map_core_reqs_to_string(katom->core_req));
 #endif

 	if (queued && !IS_GPU_ATOM(katom)) {
@@ -1320,6 +1316,7 @@ int kbase_jd_submit(struct kbase_context *kctx,
 	bool need_to_try_schedule_context = false;
 	struct kbase_device *kbdev;
 	void __user *user_addr;
+	u32 latest_flush;

 	/*
 	 * kbase_jd_submit isn't expected to fail and so all errors with the jobs
@@ -1349,6 +1346,9 @@ int kbase_jd_submit(struct kbase_context *kctx,

 	KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_add_return(submit_data->nr_atoms, &kctx->timeline.jd_atoms_in_flight));

+	/* All atoms submitted in this call have the same flush ID */
+	latest_flush = kbase_backend_get_current_flush_id(kbdev);
+
 	for (i = 0; i < submit_data->nr_atoms; i++) {
 		struct base_jd_atom_v2 user_atom;
 		struct kbase_jd_atom *katom;
@@ -1424,6 +1424,9 @@ while (false)
 #endif
 		katom = &jctx->atoms[user_atom.atom_number];

+		/* Record the flush ID for the cache flush optimisation */
+		katom->flush_id = latest_flush;
+
 		while (katom->status != KBASE_JD_ATOM_STATE_UNUSED) {
 			/* Atom number is already in use, wait for the atom to
 			 * complete
--- a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
@@ -1,6 +1,6 @@
 /*
 *
- * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -17,6 +17,8 @@

 #include <linux/seq_file.h>

+#include <mali_kbase.h>
+
 #include <mali_kbase_jd_debugfs.h>

 #ifdef CONFIG_DEBUG_FS
@@ -41,6 +43,13 @@ static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data)

 	KBASE_DEBUG_ASSERT(kctx != NULL);

+	/* Print version */
+	seq_printf(sfile, "v%u\n", MALI_JD_DEBUGFS_VERSION);
+
+	/* Print U/K API version */
+	seq_printf(sfile, "ukv%u.%u\n", BASE_UK_VERSION_MAJOR,
+			BASE_UK_VERSION_MINOR);
+
 	/* Print table heading */
 	seq_puts(sfile, "atom id,core reqs,status,coreref status,predeps,start time,time on gpu\n");

--- a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
@@ -1,6 +1,6 @@
 /*
 *
- * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -27,6 +27,8 @@

 #include <mali_kbase.h>

+#define MALI_JD_DEBUGFS_VERSION 1
+
 /**
 * kbasep_jd_debugfs_ctx_add() - Add debugfs entries for JD system
 *
--- a/drivers/gpu/arm/midgard/mali_kbase_js.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_js.c
@@ -921,6 +921,7 @@ void kbasep_js_kctx_term(struct kbase_context *kctx)
 	struct kbasep_js_kctx_info *js_kctx_info;
 	union kbasep_js_policy *js_policy;
 	int js;
+	bool update_ctx_count = false;

 	KBASE_DEBUG_ASSERT(kctx != NULL);

@@ -937,14 +938,31 @@ void kbasep_js_kctx_term(struct kbase_context *kctx)
 	}

 	mutex_lock(&kbdev->js_data.queue_mutex);
+	mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
 	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
 		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	if (kctx->ctx_runnable_ref) {
+		WARN_ON(atomic_read(&kbdev->js_data.nr_contexts_runnable) <= 0);
+		atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		update_ctx_count = true;
+		kctx->ctx_runnable_ref = false;
+	}
+
+	mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
 	mutex_unlock(&kbdev->js_data.queue_mutex);

 	if ((js_kctx_info->init_status & JS_KCTX_INIT_POLICY))
 		kbasep_js_policy_term_ctx(js_policy, kctx);

 	js_kctx_info->init_status = JS_KCTX_INIT_NONE;
+
+	if (update_ctx_count) {
+		mutex_lock(&kbdev->js_data.runpool_mutex);
+		kbase_backend_ctx_count_changed(kbdev);
+		mutex_unlock(&kbdev->js_data.runpool_mutex);
+	}
 }

 /**
@@ -982,8 +1000,11 @@ static bool kbase_js_ctx_list_add_pullable(struct kbase_device *kbdev,
 	if (!kctx->slots_pullable) {
 		kbdev->js_data.nr_contexts_pullable++;
 		ret = true;
-		if (!atomic_read(&kctx->atoms_pulled))
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(kctx->ctx_runnable_ref);
+			kctx->ctx_runnable_ref = true;
 			atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+		}
 	}
 	kctx->slots_pullable |= (1 << js);

@@ -1025,8 +1046,11 @@ static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev,
 	if (!kctx->slots_pullable) {
 		kbdev->js_data.nr_contexts_pullable++;
 		ret = true;
-		if (!atomic_read(&kctx->atoms_pulled))
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(kctx->ctx_runnable_ref);
+			kctx->ctx_runnable_ref = true;
 			atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+		}
 	}
 	kctx->slots_pullable |= (1 << js);

@@ -1065,8 +1089,11 @@ static bool kbase_js_ctx_list_add_unpullable(struct kbase_device *kbdev,
 	if (kctx->slots_pullable == (1 << js)) {
 		kbdev->js_data.nr_contexts_pullable--;
 		ret = true;
-		if (!atomic_read(&kctx->atoms_pulled))
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(!kctx->ctx_runnable_ref);
+			kctx->ctx_runnable_ref = false;
 			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		}
 	}
 	kctx->slots_pullable &= ~(1 << js);

@@ -1105,8 +1132,11 @@ static bool kbase_js_ctx_list_remove(struct kbase_device *kbdev,
 	if (kctx->slots_pullable == (1 << js)) {
 		kbdev->js_data.nr_contexts_pullable--;
 		ret = true;
-		if (!atomic_read(&kctx->atoms_pulled))
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(!kctx->ctx_runnable_ref);
+			kctx->ctx_runnable_ref = false;
 			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		}
 	}
 	kctx->slots_pullable &= ~(1 << js);

@@ -1368,6 +1398,10 @@ bool kbasep_js_add_job(struct kbase_context *kctx,
 		/* Dependencies could not be represented */
 		--(js_kctx_info->ctx.nr_jobs);

+		/* Setting atom status back to queued as it still has unresolved
+		 * dependencies */
+		atom->status = KBASE_JD_ATOM_STATE_QUEUED;
+
 		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 		mutex_unlock(&js_devdata->runpool_mutex);

@@ -2442,8 +2476,11 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)

 	kctx->pulled = true;
 	pulled = atomic_inc_return(&kctx->atoms_pulled);
-	if (pulled == 1 && !kctx->slots_pullable)
+	if (pulled == 1 && !kctx->slots_pullable) {
+		WARN_ON(kctx->ctx_runnable_ref);
+		kctx->ctx_runnable_ref = true;
 		atomic_inc(&kctx->kbdev->js_data.nr_contexts_runnable);
+	}
 	atomic_inc(&kctx->atoms_pulled_slot[katom->slot_nr]);
 	jsctx_rb_pull(kctx, katom);

@@ -2495,8 +2532,11 @@ static void js_return_worker(struct work_struct *data)
 		timer_sync |= kbase_js_ctx_list_remove(kbdev, kctx, js);

 	if (!atomic_read(&kctx->atoms_pulled)) {
-		if (!kctx->slots_pullable)
+		if (!kctx->slots_pullable) {
+			WARN_ON(!kctx->ctx_runnable_ref);
+			kctx->ctx_runnable_ref = false;
 			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		}

 		if (kctx->as_nr != KBASEP_AS_NR_INVALID &&
 				!js_kctx_info->ctx.is_dying) {
@@ -2698,8 +2738,12 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
 		context_idle = !atomic_dec_return(&kctx->atoms_pulled);
 		atomic_dec(&kctx->atoms_pulled_slot[atom_slot]);

-		if (!atomic_read(&kctx->atoms_pulled) && !kctx->slots_pullable)
+		if (!atomic_read(&kctx->atoms_pulled) &&
+				!kctx->slots_pullable) {
+			WARN_ON(!kctx->ctx_runnable_ref);
+			kctx->ctx_runnable_ref = false;
 			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		}

 		if (katom->event_code != BASE_JD_EVENT_DONE)
 			kbase_js_compact(kctx);
--- a/drivers/gpu/arm/midgard/mali_kbase_js_policy.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_policy.h
@@ -708,7 +708,7 @@ void kbasep_js_policy_deregister_job(union kbasep_js_policy *js_policy, struct k
 bool kbasep_js_policy_dequeue_job(struct kbase_device *kbdev, int job_slot_idx, struct kbase_jd_atom ** const katom_ptr);

 /**
- * @brief Requeue a Job back into the the Job Scheduler Policy Run Pool
+ * @brief Requeue a Job back into the Job Scheduler Policy Run Pool
 *
 * This will be used to enqueue a job after its creation and also to requeue
 * a job into the Run Pool that was previously dequeued (running). It notifies
--- a/drivers/gpu/arm/midgard/mali_kbase_mem.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem.c
@@ -1124,6 +1124,8 @@ int kbase_alloc_phy_pages_helper(
 	struct kbase_mem_phy_alloc *alloc,
 	size_t nr_pages_requested)
 {
+	int new_page_count __maybe_unused;
+
 	KBASE_DEBUG_ASSERT(alloc);
 	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
 	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
@@ -1131,7 +1133,8 @@ int kbase_alloc_phy_pages_helper(
 	if (nr_pages_requested == 0)
 		goto done; /*nothing to do*/

-	kbase_atomic_add_pages(nr_pages_requested, &alloc->imported.kctx->used_pages);
+	new_page_count = kbase_atomic_add_pages(
+			nr_pages_requested, &alloc->imported.kctx->used_pages);
 	kbase_atomic_add_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages);

 	/* Increase mm counters before we allocate pages so that this
@@ -1143,7 +1146,9 @@ int kbase_alloc_phy_pages_helper(
 		goto no_alloc;

 #if defined(CONFIG_MALI_MIPE_ENABLED)
-	kbase_tlstream_aux_pagesalloc((s64)nr_pages_requested);
+	kbase_tlstream_aux_pagesalloc(
+			(u32)alloc->imported.kctx->id,
+			(u64)new_page_count);
 #endif

 	alloc->nents += nr_pages_requested;
@@ -1164,6 +1169,7 @@ int kbase_free_phy_pages_helper(
 {
 	bool syncback;
 	phys_addr_t *start_free;
+	int new_page_count __maybe_unused;

 	KBASE_DEBUG_ASSERT(alloc);
 	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
@@ -1185,11 +1191,14 @@ int kbase_free_phy_pages_helper(

 	alloc->nents -= nr_pages_to_free;
 	kbase_process_page_usage_dec(alloc->imported.kctx, nr_pages_to_free);
-	kbase_atomic_sub_pages(nr_pages_to_free, &alloc->imported.kctx->used_pages);
+	new_page_count = kbase_atomic_sub_pages(
+			nr_pages_to_free, &alloc->imported.kctx->used_pages);
 	kbase_atomic_sub_pages(nr_pages_to_free, &alloc->imported.kctx->kbdev->memdev.used_pages);

 #if defined(CONFIG_MALI_MIPE_ENABLED)
-	kbase_tlstream_aux_pagesalloc(-(s64)nr_pages_to_free);
+	kbase_tlstream_aux_pagesalloc(
+			(u32)alloc->imported.kctx->id,
+			(u64)new_page_count);
 #endif

 	return 0;
--- a/drivers/gpu/arm/midgard/mali_kbase_mem.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem.h
@@ -144,6 +144,7 @@ struct kbase_mem_phy_alloc {
 			struct page **pages;
 			unsigned int current_mapping_usage_count;
 			struct task_struct *owner;
+			dma_addr_t *dma_addrs;
 		} user_buf;
 	} imported;
 };
@@ -317,12 +318,22 @@ static inline size_t kbase_reg_current_backed_size(struct kbase_va_region *reg)
 static inline struct kbase_mem_phy_alloc *kbase_alloc_create(size_t nr_pages, enum kbase_memory_type type)
 {
 	struct kbase_mem_phy_alloc *alloc;
-	const size_t alloc_size =
-			sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages;
+	size_t alloc_size = sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages;
+	size_t per_page_size = sizeof(*alloc->pages);

-	/* Prevent nr_pages*sizeof + sizeof(*alloc) from wrapping around. */
+	/* Imported pages may have page private data already in use */
+	if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) {
+		alloc_size += nr_pages *
+				sizeof(*alloc->imported.user_buf.dma_addrs);
+		per_page_size += sizeof(*alloc->imported.user_buf.dma_addrs);
+	}
+
+	/*
+	 * Prevent nr_pages*per_page_size + sizeof(*alloc) from
+	 * wrapping around.
+	 */
 	if (nr_pages > ((((size_t) -1) - sizeof(*alloc))
-			/ sizeof(*alloc->pages)))
+			/ per_page_size))
 		return ERR_PTR(-ENOMEM);

 	/* Allocate based on the size to reduce internal fragmentation of vmem */
@@ -345,6 +356,10 @@ static inline struct kbase_mem_phy_alloc *kbase_alloc_create(size_t nr_pages, en
 	INIT_LIST_HEAD(&alloc->mappings);
 	alloc->type = type;

+	if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF)
+		alloc->imported.user_buf.dma_addrs =
+				(void *) (alloc->pages + nr_pages);
+
 	return alloc;
 }

@@ -757,7 +772,7 @@ static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr)
 	SetPagePrivate(p);
 	if (sizeof(dma_addr_t) > sizeof(p->private)) {
 		/* on 32-bit ARM with LPAE dma_addr_t becomes larger, but the
-		 * private filed stays the same. So we have to be clever and
+		 * private field stays the same. So we have to be clever and
 		 * use the fact that we only store DMA addresses of whole pages,
 		 * so the low bits should be zero */
 		KBASE_DEBUG_ASSERT(!(dma_addr & (PAGE_SIZE - 1)));
--- a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
@@ -145,7 +145,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
 		unsigned long prot = PROT_NONE;
 		unsigned long va_size = va_pages << PAGE_SHIFT;
 		unsigned long va_map = va_size;
-		unsigned long cookie;
+		unsigned long cookie, cookie_nr;
 		unsigned long cpu_addr;

 		/* Bind to a cookie */
@@ -155,15 +155,15 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
 			goto no_cookie;
 		}
 		/* return a cookie */
-		cookie = __ffs(kctx->cookies);
-		kctx->cookies &= ~(1UL << cookie);
-		BUG_ON(kctx->pending_regions[cookie]);
-		kctx->pending_regions[cookie] = reg;
+		cookie_nr = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << cookie_nr);
+		BUG_ON(kctx->pending_regions[cookie_nr]);
+		kctx->pending_regions[cookie_nr] = reg;

 		kbase_gpu_vm_unlock(kctx);

 		/* relocate to correct base */
-		cookie += PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		cookie = cookie_nr + PFN_DOWN(BASE_MEM_COOKIE_BASE);
 		cookie <<= PAGE_SHIFT;

 		/* See if we must align memory due to GPU PC bits vs CPU VA */
@@ -197,8 +197,11 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages

 		cpu_addr = vm_mmap(kctx->filp, 0, va_map, prot, MAP_SHARED,
 				cookie);
-		if (IS_ERR_VALUE(cpu_addr))
+		if (IS_ERR_VALUE(cpu_addr)) {
+			kctx->pending_regions[cookie_nr] = NULL;
+			kctx->cookies |= (1UL << cookie_nr);
 			goto no_mmap;
+		}

 		/*
 		 * If we had to allocate extra VA space to force the
@@ -1440,7 +1443,11 @@ static int kbase_trace_buffer_mmap(struct kbase_context *kctx, struct vm_area_st
 			goto out;
 		}

-		kbase_device_trace_buffer_install(kctx, tb, size);
+		err = kbase_device_trace_buffer_install(kctx, tb, size);
+		if (err) {
+			vfree(tb);
+			goto out;
+		}
 	} else {
 		err = -EINVAL;
 		goto out;
--- a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
@@ -32,18 +32,16 @@
 static int kbasep_mem_profile_seq_show(struct seq_file *sfile, void *data)
 {
 	struct kbase_context *kctx = sfile->private;
-	int err = 0;

 	mutex_lock(&kctx->mem_profile_lock);

-	err = seq_write(sfile, kctx->mem_profile_data, kctx->mem_profile_size);
+	seq_write(sfile, kctx->mem_profile_data, kctx->mem_profile_size);

-	if (!err)
-		seq_putc(sfile, '\n');
+	seq_putc(sfile, '\n');

 	mutex_unlock(&kctx->mem_profile_lock);

-	return err;
+	return 0;
 }

 /*
--- a/drivers/gpu/arm/midgard/mali_kbase_mmu.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu.c
@@ -46,18 +46,18 @@

 /**
 * kbase_mmu_sync_pgd - sync page directory to memory
- * @dev:	Device pointer.
+ * @kbdev:	Device pointer.
 * @handle:	Address of DMA region.
 * @size:       Size of the region to sync.
 *
 * This should be called after each page directory update.
 */

-static void kbase_mmu_sync_pgd(struct device *dev,
+static void kbase_mmu_sync_pgd(struct kbase_device *kbdev,
 		dma_addr_t handle, size_t size)
 {

-	dma_sync_single_for_device(dev, handle, size, DMA_TO_DEVICE);
+	dma_sync_single_for_device(kbdev->dev, handle, size, DMA_TO_DEVICE);
 }

 /*
@@ -260,9 +260,7 @@ void page_fault_worker(struct work_struct *data)
 		kbase_trace_mali_page_fault_insert_pages(as_no, new_pages);
 #endif
 #if defined(CONFIG_MALI_MIPE_ENABLED)
-		kbase_tlstream_aux_pagefault(
-				kctx->id,
-				atomic_read(&kctx->used_pages));
+		kbase_tlstream_aux_pagefault(kctx->id, (u64)new_pages);
 #endif

 		/* flush L2 and unlock the VA (resumes the MMU) */
@@ -316,15 +314,22 @@ phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx)
 	u64 *page;
 	int i;
 	struct page *p;
+	int new_page_count __maybe_unused;

 	KBASE_DEBUG_ASSERT(NULL != kctx);
-	kbase_atomic_add_pages(1, &kctx->used_pages);
+	new_page_count = kbase_atomic_add_pages(1, &kctx->used_pages);
 	kbase_atomic_add_pages(1, &kctx->kbdev->memdev.used_pages);

 	p = kbase_mem_pool_alloc(&kctx->mem_pool);
 	if (!p)
 		goto sub_pages;

+#if defined(CONFIG_MALI_MIPE_ENABLED)
+	kbase_tlstream_aux_pagesalloc(
+			(u32)kctx->id,
+			(u64)new_page_count);
+#endif
+
 	page = kmap(p);
 	if (NULL == page)
 		goto alloc_free;
@@ -334,7 +339,7 @@ phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx)
 	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++)
 		kctx->kbdev->mmu_mode->entry_invalidate(&page[i]);

-	kbase_mmu_sync_pgd(kctx->kbdev->dev, kbase_dma_addr(p), PAGE_SIZE);
+	kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);

 	kunmap(p);
 	return page_to_phys(p);
@@ -388,8 +393,7 @@ static phys_addr_t mmu_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd,

 		kctx->kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd);

-		kbase_mmu_sync_pgd(kctx->kbdev->dev,
-				kbase_dma_addr(p), PAGE_SIZE);
+		kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
 		/* Rely on the caller to update the address space flags. */
 	}

@@ -404,7 +408,7 @@ static phys_addr_t mmu_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn)

 	pgd = kctx->pgd;

-	for (l = MIDGARD_MMU_TOPLEVEL; l < 3; l++) {
+	for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) {
 		pgd = mmu_get_next_pgd(kctx, pgd, vpfn, l);
 		/* Handle failure condition */
 		if (!pgd) {
@@ -451,7 +455,7 @@ static phys_addr_t mmu_insert_pages_recover_get_bottom_pgd(struct kbase_context

 	pgd = kctx->pgd;

-	for (l = MIDGARD_MMU_TOPLEVEL; l < 3; l++) {
+	for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) {
 		pgd = mmu_insert_pages_recover_get_next_pgd(kctx, pgd, vpfn, l);
 		/* Should never fail */
 		KBASE_DEBUG_ASSERT(0 != pgd);
@@ -500,9 +504,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx, u64 vp
 		vpfn += count;
 		nr -= count;

-		kbase_mmu_sync_pgd(kctx->kbdev->dev,
-					   kbase_dma_addr(p),
-					   PAGE_SIZE);
+		kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);

 		kunmap_atomic(pgd_page);
 	}
@@ -584,10 +586,9 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
 		vpfn += count;
 		nr -= count;

-		kbase_mmu_sync_pgd(kctx->kbdev->dev,
-					   kbase_dma_addr(p) +
-					   (index * sizeof(u64)),
-					   count * sizeof(u64));
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));

 		kunmap(p);
 		/* We have started modifying the page table.
@@ -676,10 +677,9 @@ int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
 		vpfn += count;
 		nr -= count;

-		kbase_mmu_sync_pgd(kctx->kbdev->dev,
-					   kbase_dma_addr(p) +
-					   (index * sizeof(u64)),
-					   count * sizeof(u64));
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));

 		kunmap(p);
 		/* We have started modifying the page table. If further pages
@@ -824,10 +824,9 @@ int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr)
 		vpfn += count;
 		nr -= count;

-		kbase_mmu_sync_pgd(kctx->kbdev->dev,
-					   kbase_dma_addr(p) +
-					   (index * sizeof(u64)),
-					   count * sizeof(u64));
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));

 		kunmap(p);
 	}
@@ -898,10 +897,9 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *ph
 		vpfn += count;
 		nr -= count;

-		kbase_mmu_sync_pgd(kctx->kbdev->dev,
-					   kbase_dma_addr(p) +
-					   (index * sizeof(u64)),
-					   count * sizeof(u64));
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));

 		kunmap(pfn_to_page(PFN_DOWN(pgd)));
 	}
@@ -952,7 +950,7 @@ static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd, int
 		target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]);

 		if (target_pgd) {
-			if (level < 2) {
+			if (level < (MIDGARD_MMU_BOTTOMLEVEL - 1)) {
 				mmu_teardown_level(kctx, target_pgd, level + 1, zap, pgd_page_buffer + (PAGE_SIZE / sizeof(u64)));
 			} else {
 				/*
@@ -1001,6 +999,8 @@ void kbase_mmu_term(struct kbase_context *kctx)

 void kbase_mmu_free_pgd(struct kbase_context *kctx)
 {
+	int new_page_count __maybe_unused;
+
 	KBASE_DEBUG_ASSERT(NULL != kctx);
 	KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);

@@ -1011,8 +1011,14 @@ void kbase_mmu_free_pgd(struct kbase_context *kctx)
 	beenthere(kctx, "pgd %lx", (unsigned long)kctx->pgd);
 	kbase_mem_pool_free(&kctx->mem_pool, phys_to_page(kctx->pgd), true);
 	kbase_process_page_usage_dec(kctx, 1);
-	kbase_atomic_sub_pages(1, &kctx->used_pages);
+	new_page_count = kbase_atomic_sub_pages(1, &kctx->used_pages);
 	kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+	kbase_tlstream_aux_pagesalloc(
+			(u32)kctx->id,
+			(u64)new_page_count);
+#endif
 }

 KBASE_EXPORT_TEST_API(kbase_mmu_free_pgd);
@@ -1052,16 +1058,21 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd,
 		*size_left -= size;
 	}

-	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
-		if (mmu_mode->pte_is_valid(pgd_page[i])) {
-			target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]);
+	if (level < MIDGARD_MMU_BOTTOMLEVEL) {
+		for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+			if (mmu_mode->pte_is_valid(pgd_page[i])) {
+				target_pgd = mmu_mode->pte_to_phy_addr(
+						pgd_page[i]);

-			dump_size = kbasep_mmu_dump_level(kctx, target_pgd, level + 1, buffer, size_left);
-			if (!dump_size) {
-				kunmap(pfn_to_page(PFN_DOWN(pgd)));
-				return 0;
+				dump_size = kbasep_mmu_dump_level(kctx,
+						target_pgd, level + 1,
+						buffer, size_left);
+				if (!dump_size) {
+					kunmap(pfn_to_page(PFN_DOWN(pgd)));
+					return 0;
+				}
+				size += dump_size;
 			}
-			size += dump_size;
 		}
 	}

--- a/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
@@ -448,15 +448,15 @@ static const struct tp_desc tp_desc_aux[] = {
 		KBASE_AUX_PAGEFAULT,
 		__stringify(KBASE_AUX_PAGEFAULT),
 		"Page fault",
-		"@II",
-		"ctx_nr,page_cnt"
+		"@IL",
+		"ctx_nr,page_cnt_change"
 	},
 	{
 		KBASE_AUX_PAGESALLOC,
 		__stringify(KBASE_AUX_PAGESALLOC),
 		"Total alloc pages change",
-		"@l",
-		"page_cnt_change"
+		"@IL",
+		"ctx_nr,page_cnt"
 	}
 };

@@ -1998,9 +1998,34 @@ void kbase_tlstream_aux_job_softstop(u32 js_id)
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
 }

-void kbase_tlstream_aux_pagefault(u32 ctx_nr, u32 page_count)
+void kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change)
 {
 	const u32     msg_id = KBASE_AUX_PAGEFAULT;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) +
+		sizeof(page_count_change);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos,
+			&page_count_change, sizeof(page_count_change));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count)
+{
+	const u32     msg_id = KBASE_AUX_PAGESALLOC;
 	const size_t  msg_size =
 		sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) +
 		sizeof(page_count);
@@ -2022,26 +2047,3 @@ void kbase_tlstream_aux_pagefault(u32 ctx_nr, u32 page_count)
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
 }

-void kbase_tlstream_aux_pagesalloc(s64 page_count_change)
-{
-	const u32     msg_id = KBASE_AUX_PAGESALLOC;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(page_count_change);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_AUX, msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos,
-			&page_count_change, sizeof(page_count_change));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
-}
-
--- a/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
@@ -375,18 +375,18 @@ void kbase_tlstream_aux_job_softstop(u32 js_id);
 /**
 * kbase_tlstream_aux_pagefault - timeline message: MMU page fault event
 *                                resulting in new pages being mapped
- * @ctx_nr:     kernel context number
- * @page_count: number of currently used pages
+ * @ctx_nr:            kernel context number
+ * @page_count_change: number of pages to be added
 */
-void kbase_tlstream_aux_pagefault(u32 ctx_nr, u32 page_count);
+void kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change);

 /**
 * kbase_tlstream_aux_pagesalloc - timeline message: total number of allocated
 *                                 pages is changed
- * @page_count_change: number of pages to be added or subtracted (according to
- *                     the sign)
+ * @ctx_nr:     kernel context number
+ * @page_count: number of pages used by the context
 */
-void kbase_tlstream_aux_pagesalloc(s64 page_count_change);
+void kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count);

 #endif /* _KBASE_TLSTREAM_H */

--- a/drivers/gpu/arm/midgard/mali_midg_regmap.h
+++ b/drivers/gpu/arm/midgard/mali_midg_regmap.h
@@ -493,7 +493,9 @@
 #define SC_ALT_COUNTERS             (1ul << 3)
 #define SC_OVERRIDE_FWD_PIXEL_KILL  (1ul << 4)
 #define SC_SDC_DISABLE_OQ_DISCARD   (1ul << 6)
+#define SC_LS_ALLOW_ATTR_TYPES      (1ul << 16)
 #define SC_LS_PAUSEBUFFER_DISABLE   (1ul << 16)
+#define SC_LS_ATTR_CHECK_DISABLE    (1ul << 18)
 #define SC_ENABLE_TEXGRD_FLAGS      (1ul << 25)
 /* End SHADER_CONFIG register */

--- a/drivers/gpu/arm/midgard/tests/customer/sconscript
+++ b/drivers/gpu/arm/midgard/tests/customer/sconscript
@@ -1,8 +0,0 @@
-# This confidential and proprietary software may be used only as
-# authorised by a licensing agreement from ARM Limited
-# (C) COPYRIGHT 2010-2011, 2013 ARM Limited
-# ALL RIGHTS RESERVED
-# The entire notice above must be reproduced on all authorised
-# copies and copies may only be made to the extent permitted
-# by a licensing agreement from ARM Limited.
-
--- a/drivers/gpu/arm/midgard/tests/sconscript
+++ b/drivers/gpu/arm/midgard/tests/sconscript
@@ -1,11 +0,0 @@
-# This confidential and proprietary software may be used only as
-# authorised by a licensing agreement from ARM Limited
-# (C) COPYRIGHT 2010-2011, 2013 ARM Limited
-# ALL RIGHTS RESERVED
-# The entire notice above must be reproduced on all authorised
-# copies and copies may only be made to the extent permitted
-# by a licensing agreement from ARM Limited.
-
-SConscript( 'customer/sconscript' )
-if Glob('internal/sconscript'):
-	SConscript( 'internal/sconscript' )