MALI: rockchip: upgrade bifrost DDK to g11p0-01eac0, from g10p0-01eac0

Change-Id: I0642ec37f151711b8b19c3206488d3301422971d Signed-off-by: Zhen Chen <chenzhen@rock-chips.com>
2026-06-05 18:41:58 +09:00 · 2022-02-16 17:26:04 +08:00
parent 451a9752b5
commit 643f7908a0
201 changed files with 10229 additions and 6812 deletions
--- a/drivers/base/arm/dma_buf_lock/src/dma_buf_lock.c
+++ b/drivers/base/arm/dma_buf_lock/src/dma_buf_lock.c
@@ -83,7 +83,7 @@
 static dev_t dma_buf_lock_dev;
 static struct cdev dma_buf_lock_cdev;
 static struct class *dma_buf_lock_class;
-static char dma_buf_lock_dev_name[] = "dma_buf_lock";
+static const char dma_buf_lock_dev_name[] = "dma_buf_lock";

 #if defined(HAVE_UNLOCKED_IOCTL) || defined(HAVE_COMPAT_IOCTL) || ((KERNEL_VERSION(5, 9, 0) <= LINUX_VERSION_CODE))
 static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
@@ -91,8 +91,7 @@ static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned lon
 static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
 #endif

-static struct file_operations dma_buf_lock_fops =
-{
+static const struct file_operations dma_buf_lock_fops = {
 	.owner   = THIS_MODULE,
 #if defined(HAVE_UNLOCKED_IOCTL) || ((KERNEL_VERSION(5, 9, 0) <= LINUX_VERSION_CODE))
 	.unlocked_ioctl   = dma_buf_lock_ioctl,
@@ -105,8 +104,7 @@ static struct file_operations dma_buf_lock_fops =
 #endif
 };

-typedef struct dma_buf_lock_resource
-{
+struct dma_buf_lock_resource {
 #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 	struct fence fence;
 #else
@@ -123,7 +121,7 @@ typedef struct dma_buf_lock_resource
 	struct list_head link;
 	struct work_struct work;
 	int count;
-} dma_buf_lock_resource;
+};

 /**
 * struct dma_buf_lock_fence_cb - Callback data struct for dma-fence
@@ -199,7 +197,7 @@ const struct dma_fence_ops dma_buf_lock_fence_ops = {
 };

 static void
-dma_buf_lock_fence_init(dma_buf_lock_resource *resource)
+dma_buf_lock_fence_init(struct dma_buf_lock_resource *resource)
 {
 	dma_fence_init(&resource->fence,
 		       &dma_buf_lock_fence_ops,
@@ -209,7 +207,7 @@ dma_buf_lock_fence_init(dma_buf_lock_resource *resource)
 }

 static void
-dma_buf_lock_fence_free_callbacks(dma_buf_lock_resource *resource)
+dma_buf_lock_fence_free_callbacks(struct dma_buf_lock_resource *resource)
 {
 	struct dma_buf_lock_fence_cb *cb, *tmp;

@@ -228,8 +226,8 @@ dma_buf_lock_fence_free_callbacks(dma_buf_lock_resource *resource)
 static void
 dma_buf_lock_fence_work(struct work_struct *pwork)
 {
-	dma_buf_lock_resource *resource =
-		container_of(pwork, dma_buf_lock_resource, work);
+	struct dma_buf_lock_resource *resource =
+		container_of(pwork, struct dma_buf_lock_resource, work);

 	WARN_ON(atomic_read(&resource->fence_dep_count));
 	WARN_ON(!atomic_read(&resource->locked));
@@ -250,10 +248,10 @@ dma_buf_lock_fence_callback(struct dma_fence *fence, struct dma_fence_cb *cb)
 	struct dma_buf_lock_fence_cb *dma_buf_lock_cb = container_of(cb,
 				struct dma_buf_lock_fence_cb,
 				fence_cb);
-	dma_buf_lock_resource *resource = dma_buf_lock_cb->res;
+	struct dma_buf_lock_resource *resource = dma_buf_lock_cb->res;

 #if DMA_BUF_LOCK_DEBUG
-	printk(KERN_DEBUG "dma_buf_lock_fence_callback\n");
+	pr_debug("%s\n", __func__);
 #endif

 	/* Callback function will be invoked in atomic context. */
@@ -270,12 +268,12 @@ dma_buf_lock_fence_callback(struct dma_fence *fence, struct dma_fence_cb *cb)

 #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 static int
-dma_buf_lock_fence_add_callback(dma_buf_lock_resource *resource,
+dma_buf_lock_fence_add_callback(struct dma_buf_lock_resource *resource,
 				struct fence *fence,
 				fence_func_t callback)
 #else
 static int
-dma_buf_lock_fence_add_callback(dma_buf_lock_resource *resource,
+dma_buf_lock_fence_add_callback(struct dma_buf_lock_resource *resource,
 				struct dma_fence *fence,
 				dma_fence_func_t callback)
 #endif
@@ -324,12 +322,12 @@ dma_buf_lock_fence_add_callback(dma_buf_lock_resource *resource,

 #if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
 static int
-dma_buf_lock_add_fence_reservation_callback(dma_buf_lock_resource *resource,
+dma_buf_lock_add_fence_reservation_callback(struct dma_buf_lock_resource *resource,
 					    struct reservation_object *resv,
 					    bool exclusive)
 #else
 static int
-dma_buf_lock_add_fence_reservation_callback(dma_buf_lock_resource *resource,
+dma_buf_lock_add_fence_reservation_callback(struct dma_buf_lock_resource *resource,
 					    struct dma_resv *resv,
 					    bool exclusive)
 #endif
@@ -398,7 +396,7 @@ out:
 }

 static void
-dma_buf_lock_release_fence_reservation(dma_buf_lock_resource *resource,
+dma_buf_lock_release_fence_reservation(struct dma_buf_lock_resource *resource,
 				       struct ww_acquire_ctx *ctx)
 {
 	unsigned int r;
@@ -409,7 +407,7 @@ dma_buf_lock_release_fence_reservation(dma_buf_lock_resource *resource,
 }

 static int
-dma_buf_lock_acquire_fence_reservation(dma_buf_lock_resource *resource,
+dma_buf_lock_acquire_fence_reservation(struct dma_buf_lock_resource *resource,
 				       struct ww_acquire_ctx *ctx)
 {
 #if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
@@ -451,7 +449,7 @@ error:
 	/* If we deadlock try with lock_slow and retry */
 	if (err == -EDEADLK) {
 #if DMA_BUF_LOCK_DEBUG
-		printk(KERN_DEBUG "deadlock at dma_buf fd %i\n",
+		pr_debug("deadlock at dma_buf fd %i\n",
 		       resource->list_of_dma_buf_fds[content_resv_idx]);
 #endif
 		content_resv = resource->dma_bufs[content_resv_idx]->resv;
@@ -466,14 +464,14 @@ error:

 static int dma_buf_lock_handle_release(struct inode *inode, struct file *file)
 {
-	dma_buf_lock_resource *resource;
+	struct dma_buf_lock_resource *resource;

 	if (!is_dma_buf_lock_file(file))
 		return -EINVAL;

 	resource = file->private_data;
 #if DMA_BUF_LOCK_DEBUG
-	printk("dma_buf_lock_handle_release\n");
+	pr_debug("%s\n", __func__);
 #endif
 	mutex_lock(&dma_buf_lock_mutex);
 	kref_put(&resource->refcount, dma_buf_lock_dounlock);
@@ -482,10 +480,11 @@ static int dma_buf_lock_handle_release(struct inode *inode, struct file *file)
 	return 0;
 }

-static unsigned int dma_buf_lock_handle_poll(struct file *file,
-                                             struct poll_table_struct *wait)
+static unsigned int dma_buf_lock_handle_poll(
+	struct file *file,
+	struct poll_table_struct *wait)
 {
-	dma_buf_lock_resource *resource;
+	struct dma_buf_lock_resource *resource;
 	unsigned int ret = 0;

 	if (!is_dma_buf_lock_file(file))
@@ -493,21 +492,19 @@ static unsigned int dma_buf_lock_handle_poll(struct file *file,

 	resource = file->private_data;
 #if DMA_BUF_LOCK_DEBUG
-	printk("dma_buf_lock_handle_poll\n");
+	pr_debug("%s\n", __func__);
 #endif
 	if (atomic_read(&resource->locked) == 1) {
 		/* Resources have been locked */
 		ret = POLLIN | POLLRDNORM;
 		if (resource->exclusive)
 			ret |=  POLLOUT | POLLWRNORM;
-	}
-	else
-	{
+	} else {
 		if (!poll_does_not_wait(wait))
 			poll_wait(file, &resource->wait, wait);
 	}
 #if DMA_BUF_LOCK_DEBUG
-	printk("dma_buf_lock_handle_poll : return %i\n", ret);
+	pr_debug("%s : return %i\n", __func__, ret);
 #endif
 	return ret;
 }
@@ -526,17 +523,15 @@ static inline int is_dma_buf_lock_file(struct file *file)
 	return file->f_op == &dma_buf_lock_handle_fops;
 }

-
-
 /*
 * Start requested lock.
 *
 * Allocates required memory, copies dma_buf_fd list from userspace,
 * acquires related reservation objects, and starts the lock.
 */
-static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
+static int dma_buf_lock_dolock(struct dma_buf_lock_k_request *request)
 {
-	dma_buf_lock_resource *resource;
+	struct dma_buf_lock_resource *resource;
 	struct ww_acquire_ctx ww_ctx;
 	int size;
 	int fd;
@@ -553,7 +548,7 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
 	    request->exclusive != DMA_BUF_LOCK_EXCLUSIVE)
 		return -EINVAL;

-	resource = kzalloc(sizeof(dma_buf_lock_resource), GFP_KERNEL);
+	resource = kzalloc(sizeof(*resource), GFP_KERNEL);
 	if (resource == NULL)
 		return -ENOMEM;

@@ -594,7 +589,7 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
 	}
 #if DMA_BUF_LOCK_DEBUG
 	for (i = 0; i < request->count; i++)
-		printk("dma_buf %i = %X\n", i, resource->list_of_dma_buf_fds[i]);
+		pr_debug("dma_buf %i = %X\n", i, resource->list_of_dma_buf_fds[i]);
 #endif

 	/* Initialize the fence associated with dma_buf_lock resource */
@@ -611,13 +606,11 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)

 	mutex_unlock(&dma_buf_lock_mutex);

-	for (i = 0; i < request->count; i++)
-	{
+	for (i = 0; i < request->count; i++) {
 		/* Convert fd into dma_buf structure */
 		resource->dma_bufs[i] = dma_buf_get(resource->list_of_dma_buf_fds[i]);

-		if (IS_ERR_VALUE(PTR_ERR(resource->dma_bufs[i])))
-		{
+		if (IS_ERR_VALUE(PTR_ERR(resource->dma_bufs[i]))) {
 			mutex_lock(&dma_buf_lock_mutex);
 			kref_put(&resource->refcount, dma_buf_lock_dounlock);
 			mutex_unlock(&dma_buf_lock_mutex);
@@ -632,8 +625,8 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
 			return -EINVAL;
 		}
 #if DMA_BUF_LOCK_DEBUG
-		printk(KERN_DEBUG "dma_buf_lock_dolock : dma_buf_fd %i dma_buf %p dma_fence reservation %p\n",
-		       resource->list_of_dma_buf_fds[i], resource->dma_bufs[i], resource->dma_bufs[i]->resv);
+		pr_debug("%s : dma_buf_fd %i dma_buf %p dma_fence reservation %p\n",
+		       __func__, resource->list_of_dma_buf_fds[i], resource->dma_bufs[i], resource->dma_bufs[i]->resv);
 #endif
 	}

@@ -643,9 +636,8 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)

 	/* Create file descriptor associated with lock request */
 	fd = anon_inode_getfd("dma_buf_lock", &dma_buf_lock_handle_fops,
-	                      (void *)resource, 0);
-	if (fd < 0)
-	{
+		(void *)resource, 0);
+	if (fd < 0) {
 		mutex_lock(&dma_buf_lock_mutex);
 		kref_put(&resource->refcount, dma_buf_lock_dounlock);
 		kref_put(&resource->refcount, dma_buf_lock_dounlock);
@@ -659,7 +651,7 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
 	ret = dma_buf_lock_acquire_fence_reservation(resource, &ww_ctx);
 	if (ret) {
 #if DMA_BUF_LOCK_DEBUG
-		printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d locking reservations.\n", ret);
+		pr_debug("%s : Error %d locking reservations.\n", __func__, ret);
 #endif
 		put_unused_fd(fd);
 		mutex_lock(&dma_buf_lock_mutex);
@@ -698,7 +690,7 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
 #endif
 			if (ret) {
 #if DMA_BUF_LOCK_DEBUG
-				printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d reserving space for shared fence.\n", ret);
+				pr_debug("%s : Error %d reserving space for shared fence.\n", __func__, ret);
 #endif
 				break;
 			}
@@ -708,7 +700,7 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
 									  false);
 			if (ret) {
 #if DMA_BUF_LOCK_DEBUG
-				printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d adding reservation to callback.\n", ret);
+				pr_debug("%s : Error %d adding reservation to callback.\n", __func__, ret);
 #endif
 				break;
 			}
@@ -724,7 +716,7 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
 									  true);
 			if (ret) {
 #if DMA_BUF_LOCK_DEBUG
-				printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d adding reservation to callback.\n", ret);
+				pr_debug("%s : Error %d adding reservation to callback.\n", __func__, ret);
 #endif
 				break;
 			}
@@ -748,8 +740,7 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
 			dma_buf_lock_fence_work(&resource->work);
 	}

-	if (IS_ERR_VALUE((unsigned long)ret))
-	{
+	if (IS_ERR_VALUE((unsigned long)ret)) {
 		put_unused_fd(fd);

 		mutex_lock(&dma_buf_lock_mutex);
@@ -761,7 +752,7 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
 	}

 #if DMA_BUF_LOCK_DEBUG
-	printk("dma_buf_lock_dolock : complete\n");
+	pr_debug("%s : complete\n", __func__);
 #endif
 	mutex_lock(&dma_buf_lock_mutex);
 	kref_put(&resource->refcount, dma_buf_lock_dounlock);
@@ -773,7 +764,7 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
 static void dma_buf_lock_dounlock(struct kref *ref)
 {
 	int i;
-	dma_buf_lock_resource *resource = container_of(ref, dma_buf_lock_resource, refcount);
+	struct dma_buf_lock_resource *resource = container_of(ref, struct dma_buf_lock_resource, refcount);

 	atomic_set(&resource->locked, 0);

@@ -784,8 +775,7 @@ static void dma_buf_lock_dounlock(struct kref *ref)

 	list_del(&resource->link);

-	for (i = 0; i < resource->count; i++)
-	{
+	for (i = 0; i < resource->count; i++) {
 		if (resource->dma_bufs[i])
 			dma_buf_put(resource->dma_bufs[i]);
 	}
@@ -799,7 +789,7 @@ static int __init dma_buf_lock_init(void)
 {
 	int err;
 #if DMA_BUF_LOCK_DEBUG
-	printk("dma_buf_lock_init\n");
+	pr_debug("%s\n", __func__);
 #endif
 	err = alloc_chrdev_region(&dma_buf_lock_dev, 0, 1, dma_buf_lock_dev_name);

@@ -812,10 +802,8 @@ static int __init dma_buf_lock_init(void)
 			dma_buf_lock_class = class_create(THIS_MODULE, dma_buf_lock_dev_name);
 			if (IS_ERR(dma_buf_lock_class))
 				err = PTR_ERR(dma_buf_lock_class);
-			else
-			{
-				struct device *mdev;
-				mdev = device_create(
+			else {
+				struct device *mdev = device_create(
 					dma_buf_lock_class, NULL, dma_buf_lock_dev,
 					NULL, "%s", dma_buf_lock_dev_name);
 				if (!IS_ERR(mdev))
@@ -830,7 +818,7 @@ static int __init dma_buf_lock_init(void)
 		unregister_chrdev_region(dma_buf_lock_dev, 1);
 	}
 #if DMA_BUF_LOCK_DEBUG
-	printk("dma_buf_lock_init failed\n");
+	pr_debug("%s failed\n", __func__);
 #endif
 	return err;
 }
@@ -838,25 +826,24 @@ static int __init dma_buf_lock_init(void)
 static void __exit dma_buf_lock_exit(void)
 {
 #if DMA_BUF_LOCK_DEBUG
-	printk("dma_buf_lock_exit\n");
+	pr_debug("%s\n", __func__);
 #endif

 	/* Unlock all outstanding references */
-	while (1)
-	{
+	while (1) {
+		struct dma_buf_lock_resource *resource;
+
 		mutex_lock(&dma_buf_lock_mutex);
-		if (list_empty(&dma_buf_lock_resource_list))
-		{
+		if (list_empty(&dma_buf_lock_resource_list)) {
 			mutex_unlock(&dma_buf_lock_mutex);
 			break;
 		}
-		else
-		{
-			dma_buf_lock_resource *resource = list_entry(dma_buf_lock_resource_list.next,
-			                                             dma_buf_lock_resource, link);
-			kref_put(&resource->refcount, dma_buf_lock_dounlock);
-			mutex_unlock(&dma_buf_lock_mutex);
-		}
+
+		resource = list_entry(dma_buf_lock_resource_list.next,
+			struct dma_buf_lock_resource, link);
+
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		mutex_unlock(&dma_buf_lock_mutex);
 	}

 	device_destroy(dma_buf_lock_class, dma_buf_lock_dev);
@@ -874,7 +861,7 @@ static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned lon
 static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
 #endif
 {
-	dma_buf_lock_k_request request;
+	struct dma_buf_lock_k_request request;
 	int size = _IOC_SIZE(cmd);

 	if (_IOC_TYPE(cmd) != DMA_BUF_LOCK_IOC_MAGIC)
@@ -882,17 +869,16 @@ static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned i
 	if ((_IOC_NR(cmd) < DMA_BUF_LOCK_IOC_MINNR) || (_IOC_NR(cmd) > DMA_BUF_LOCK_IOC_MAXNR))
 		return -ENOTTY;

-	switch (cmd)
-	{
-		case DMA_BUF_LOCK_FUNC_LOCK_ASYNC:
-			if (size != sizeof(dma_buf_lock_k_request))
-				return -ENOTTY;
-			if (copy_from_user(&request, (void __user *)arg, size))
-				return -EFAULT;
+	switch (cmd) {
+	case DMA_BUF_LOCK_FUNC_LOCK_ASYNC:
+		if (size != sizeof(request))
+			return -ENOTTY;
+		if (copy_from_user(&request, (void __user *)arg, size))
+			return -EFAULT;
 #if DMA_BUF_LOCK_DEBUG
-			printk("DMA_BUF_LOCK_FUNC_LOCK_ASYNC - %i\n", request.count);
+		pr_debug("DMA_BUF_LOCK_FUNC_LOCK_ASYNC - %i\n", request.count);
 #endif
-			return dma_buf_lock_dolock(&request);
+		return dma_buf_lock_dolock(&request);
 	}

 	return -ENOTTY;
--- a/drivers/base/arm/dma_buf_lock/src/dma_buf_lock.h
+++ b/drivers/base/arm/dma_buf_lock/src/dma_buf_lock.h
@@ -22,23 +22,21 @@
 #ifndef _DMA_BUF_LOCK_H
 #define _DMA_BUF_LOCK_H

-typedef enum dma_buf_lock_exclusive
-{
+enum dma_buf_lock_exclusive {
 	DMA_BUF_LOCK_NONEXCLUSIVE = 0,
 	DMA_BUF_LOCK_EXCLUSIVE = -1
-} dma_buf_lock_exclusive;
+};

-typedef struct dma_buf_lock_k_request
-{
+struct dma_buf_lock_k_request {
 	int count;
 	int *list_of_dma_buf_fds;
 	int timeout;
-	dma_buf_lock_exclusive exclusive;
-} dma_buf_lock_k_request;
+	enum dma_buf_lock_exclusive exclusive;
+};

 #define DMA_BUF_LOCK_IOC_MAGIC '~'

-#define DMA_BUF_LOCK_FUNC_LOCK_ASYNC       _IOW(DMA_BUF_LOCK_IOC_MAGIC, 11, dma_buf_lock_k_request)
+#define DMA_BUF_LOCK_FUNC_LOCK_ASYNC       _IOW(DMA_BUF_LOCK_IOC_MAGIC, 11, struct dma_buf_lock_k_request)

 #define DMA_BUF_LOCK_IOC_MINNR 11
 #define DMA_BUF_LOCK_IOC_MAXNR 11
--- a/drivers/base/arm/dma_buf_test_exporter/dma-buf-test-exporter.c
+++ b/drivers/base/arm/dma_buf_test_exporter/dma-buf-test-exporter.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -81,6 +81,7 @@ static int dma_buf_te_attach(struct dma_buf *buf, struct dma_buf_attachment *att
 #endif
 {
 	struct dma_buf_te_alloc	*alloc;
+
 	alloc = buf->priv;

 	if (alloc->fail_attach)
@@ -95,6 +96,12 @@ static int dma_buf_te_attach(struct dma_buf *buf, struct dma_buf_attachment *att
 	return 0;
 }

+/**
+ * dma_buf_te_detach - The detach callback function to release &attachment
+ *
+ * @buf: buffer for the &attachment
+ * @attachment: attachment data to be released
+ */
 static void dma_buf_te_detach(struct dma_buf *buf, struct dma_buf_attachment *attachment)
 {
 	struct dma_buf_te_alloc *alloc = buf->priv;
@@ -199,6 +206,7 @@ static void dma_buf_te_release(struct dma_buf *buf)
 {
 	size_t i;
 	struct dma_buf_te_alloc *alloc;
+
 	alloc = buf->priv;
 	/* no need for locking */

@@ -240,6 +248,7 @@ static int dma_buf_te_sync(struct dma_buf *dmabuf,
 	list_for_each_entry(attachment, &dmabuf->attachments, node) {
 		struct dma_buf_te_attachment *pa = attachment->priv;
 		struct sg_table *sg = pa->sg;
+
 		if (!sg) {
 			dev_dbg(te_device.this_device, "no mapping for device %s\n", dev_name(attachment->dev));
 			continue;
@@ -291,6 +300,7 @@ static void dma_buf_te_mmap_open(struct vm_area_struct *vma)
 {
 	struct dma_buf *dma_buf;
 	struct dma_buf_te_alloc *alloc;
+
 	dma_buf = vma->vm_private_data;
 	alloc = dma_buf->priv;

@@ -303,6 +313,7 @@ static void dma_buf_te_mmap_close(struct vm_area_struct *vma)
 {
 	struct dma_buf *dma_buf;
 	struct dma_buf_te_alloc *alloc;
+
 	dma_buf = vma->vm_private_data;
 	alloc = dma_buf->priv;

@@ -344,7 +355,7 @@ static vm_fault_t dma_buf_te_mmap_fault(struct vm_fault *vmf)
 	return 0;
 }

-struct vm_operations_struct dma_buf_te_vm_ops = {
+static const struct vm_operations_struct dma_buf_te_vm_ops = {
 	.open = dma_buf_te_mmap_open,
 	.close = dma_buf_te_mmap_close,
 	.fault = dma_buf_te_mmap_fault
@@ -353,6 +364,7 @@ struct vm_operations_struct dma_buf_te_vm_ops = {
 static int dma_buf_te_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
 {
 	struct dma_buf_te_alloc *alloc;
+
 	alloc = dmabuf->priv;

 	if (alloc->fail_mmap)
@@ -398,7 +410,6 @@ static void dma_buf_te_kunmap(struct dma_buf *buf,
 		return;

 	kunmap(alloc->pages[page_num]);
-	return;
 }

 static struct dma_buf_ops dma_buf_te_ops = {
@@ -798,13 +809,14 @@ static const struct file_operations dma_buf_te_fops = {
 static int __init dma_buf_te_init(void)
 {
 	int res;
+
 	te_device.minor = MISC_DYNAMIC_MINOR;
 	te_device.name = "dma_buf_te";
 	te_device.fops = &dma_buf_te_fops;

 	res = misc_register(&te_device);
 	if (res) {
-		printk(KERN_WARNING"Misc device registration failed of 'dma_buf_te'\n");
+		pr_warn("Misc device registration failed of 'dma_buf_te'\n");
 		return res;
 	}
 	te_device.this_device->coherent_dma_mask = DMA_BIT_MASK(32);
--- a/drivers/base/arm/memory_group_manager/memory_group_manager.c
+++ b/drivers/base/arm/memory_group_manager/memory_group_manager.c
@@ -367,7 +367,7 @@ static vm_fault_t example_mgm_vmf_insert_pfn_prot(
 	dev_dbg(data->dev,
 		"%s(mgm_dev=%p, group_id=%d, vma=%p, addr=0x%lx, pfn=0x%lx, prot=0x%llx)\n",
 		__func__, (void *)mgm_dev, group_id, (void *)vma, addr, pfn,
-		(unsigned long long int) pgprot_val(prot));
+		(unsigned long long) pgprot_val(prot));

 	if (WARN_ON(group_id < 0) ||
 		WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS))
--- a/drivers/base/arm/protected_memory_allocator/protected_memory_allocator.c
+++ b/drivers/base/arm/protected_memory_allocator/protected_memory_allocator.c
@@ -107,20 +107,20 @@ static void small_granularity_alloc(struct simple_pma_device *const epma_dev,
 	alloc_pages_bitfield_size = ALLOC_PAGES_BITFIELD_ARR_SIZE(epma_dev->rmem_size);

 	WARN(alloc_bitfield_idx >= alloc_pages_bitfield_size,
-	     "%s: idx>bf_size: %zu %zu", __FUNCTION__,
+	     "%s: idx>bf_size: %zu %zu", __func__,
 	     alloc_bitfield_idx, alloc_pages_bitfield_size);

 	WARN((start_bit + (1 << order)) > PAGES_PER_BITFIELD_ELEM,
 	     "%s: start=%zu order=%zu ppbe=%zu",
-	     __FUNCTION__, start_bit, order, PAGES_PER_BITFIELD_ELEM);
+	     __func__, start_bit, order, PAGES_PER_BITFIELD_ELEM);

 	bitfield = &epma_dev->allocated_pages_bitfield_arr[alloc_bitfield_idx];

 	for (i = 0; i < (1 << order); i++) {
 		/* Check the pages represented by this bit are actually free */
-		WARN (*bitfield & (1ULL << (start_bit + i)),
+		WARN(*bitfield & (1ULL << (start_bit + i)),
 		      "in %s: page not free: %zu %zu %.16llx %zu\n",
-		      __FUNCTION__, i, order, *bitfield, alloc_pages_bitfield_size);
+		      __func__, i, order, *bitfield, alloc_pages_bitfield_size);

 		/* Mark the pages as now allocated */
 		*bitfield |= (1ULL << (start_bit + i));
@@ -172,7 +172,7 @@ static void large_granularity_alloc(struct simple_pma_device *const epma_dev,
 	 */
 	WARN((start_alloc_bitfield_idx + order) >= ALLOC_PAGES_BITFIELD_ARR_SIZE(epma_dev->rmem_size),
 	     "%s: start=%zu order=%zu ms=%zu",
-	     __FUNCTION__, start_alloc_bitfield_idx, order, epma_dev->rmem_size);
+	     __func__, start_alloc_bitfield_idx, order, epma_dev->rmem_size);

 	for (i = 0; i < num_bitfield_elements_needed; i++) {
 		u64 *bitfield = &epma_dev->allocated_pages_bitfield_arr[start_alloc_bitfield_idx + i];
@@ -180,7 +180,7 @@ static void large_granularity_alloc(struct simple_pma_device *const epma_dev,
 		/* We expect all pages that relate to this bitfield element to be free */
 		WARN((*bitfield != 0),
 		     "in %s: pages not free: i=%zu o=%zu bf=%.16llx\n",
-		     __FUNCTION__, i, order, *bitfield);
+		     __func__, i, order, *bitfield);

 		/* Mark all the pages for this element as not free */
 		*bitfield = ~0ULL;
@@ -318,9 +318,7 @@ static struct protected_memory_allocation *simple_pma_alloc_page(
 					spin_unlock(&epma_dev->rmem_lock);
 					return pma;
 				}
-			}
-			else
-			{
+			} else {
 				count = 0;
 			}
 		}
@@ -402,11 +400,10 @@ static void simple_pma_free_page(

 		/* Clear the bits for the pages we're now freeing */
 		*bitfield &= ~(((1ULL << num_pages_in_allocation) - 1) << bitfield_start_bit);
-	}
-	else {
+	} else {
 		WARN(page_num % PAGES_PER_BITFIELD_ELEM,
 		     "%s: Expecting allocs of order >= %d to be %zu-page aligned\n",
-		     __FUNCTION__, ORDER_OF_PAGES_PER_BITFIELD_ELEM, PAGES_PER_BITFIELD_ELEM);
+		     __func__, ORDER_OF_PAGES_PER_BITFIELD_ELEM, PAGES_PER_BITFIELD_ELEM);

 		for (i = 0; i < num_bitfield_elems_used_by_alloc; i++) {
 			bitfield = &epma_dev->allocated_pages_bitfield_arr[bitfield_idx + i];
@@ -414,7 +411,7 @@ static void simple_pma_free_page(
 			/* We expect all bits to be set (all pages allocated) */
 			WARN((*bitfield != ~0),
 			     "%s: alloc being freed is not fully allocated: of=%zu np=%zu bf=%.16llx\n",
-			     __FUNCTION__, offset, num_pages_in_allocation, *bitfield);
+			     __func__, offset, num_pages_in_allocation, *bitfield);

 			/*
 			 * Now clear all the bits in the bitfield element to mark all the pages
--- a/drivers/gpu/arm/bifrost/Kbuild
+++ b/drivers/gpu/arm/bifrost/Kbuild
@@ -71,7 +71,7 @@ endif
 #

 # Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= '"g10p0-01eac0"'
+MALI_RELEASE_NAME ?= '"g11p0-01eac0"'
 # Set up defaults if not defined by build system
 ifeq ($(CONFIG_MALI_BIFROST_DEBUG), y)
    MALI_UNIT_TEST = 1
@@ -164,6 +164,7 @@ bifrost_kbase-y := \
    mali_kbase_hwcnt_gpu_narrow.o \
    mali_kbase_hwcnt_types.o \
    mali_kbase_hwcnt_virtualizer.o \
+    mali_kbase_hwcnt_watchdog_if_timer.o \
    mali_kbase_softjobs.o \
    mali_kbase_hw.o \
    mali_kbase_debug.o \
@@ -201,12 +202,12 @@ bifrost_kbase-$(CONFIG_SYNC_FILE) += \
 ifeq ($(CONFIG_MALI_CSF_SUPPORT),y)
    bifrost_kbase-y += \
        mali_kbase_hwcnt_backend_csf.o \
-        mali_kbase_hwcnt_watchdog_if_timer.o \
        mali_kbase_hwcnt_backend_csf_if_fw.o
 else
    bifrost_kbase-y += \
        mali_kbase_jm.o \
        mali_kbase_hwcnt_backend_jm.o \
+        mali_kbase_hwcnt_backend_jm_watchdog.o \
        mali_kbase_dummy_job_wa.o \
        mali_kbase_debug_job_fault.o \
        mali_kbase_event.o \
--- a/drivers/gpu/arm/bifrost/Mconfig
+++ b/drivers/gpu/arm/bifrost/Mconfig
@@ -47,6 +47,14 @@ config MALI_REAL_HW
 	default y
 	default n if NO_MALI

+config MALI_PLATFORM_DT_PIN_RST
+	bool "Enable Juno GPU Pin reset"
+	depends on MALI_BIFROST
+	default n
+	default y if BUSLOG
+	help
+	  Enables support for GPUs pin reset on Juno platforms.
+
 config MALI_CSF_SUPPORT
 	bool "Enable Mali CSF based GPU support"
 	depends on MALI_BIFROST
--- a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_defs.h
+++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_defs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,7 @@
 */

 /**
- * Mali structures define to support arbitration feature
+ * DOC: Mali structures define to support arbitration feature
 */

 #ifndef _MALI_KBASE_ARBITER_DEFS_H_
--- a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_interface.h
+++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_interface.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -20,14 +20,14 @@
 */

 /**
- * Defines the Mali arbiter interface
+ * DOC: Defines the Mali arbiter interface
 */

 #ifndef _MALI_KBASE_ARBITER_INTERFACE_H_
 #define _MALI_KBASE_ARBITER_INTERFACE_H_

 /**
- *  Mali arbiter interface version
+ * DOC: Mali arbiter interface version
 *
 * This specifies the current version of the configuration interface. Whenever
 * the arbiter interface changes, so that integration effort is required, the
@@ -44,7 +44,7 @@
 #define MALI_KBASE_ARBITER_INTERFACE_VERSION 5

 /**
- * NO_FREQ is used in case platform doesn't support reporting frequency
+ * DOC: NO_FREQ is used in case platform doesn't support reporting frequency
 */
 #define NO_FREQ 0

@@ -53,14 +53,6 @@ struct arbiter_if_dev;
 /**
 * struct arbiter_if_arb_vm_ops - Interface to communicate messages to VM
 *
- * This struct contains callbacks used to deliver messages
- * from the arbiter to the corresponding VM.
- *
- * Note that calls into these callbacks may have synchronous calls back into
- * the arbiter arbiter_if_vm_arb_ops callbacks below.
- * For example vm_arb_gpu_stopped() may be called as a side effect of
- * arb_vm_gpu_stop() being called here.
- *
 * @arb_vm_gpu_stop: Callback to ask VM to stop using GPU.
 *                   dev: The arbif kernel module device.
 *
@@ -94,6 +86,13 @@ struct arbiter_if_dev;
 *                      freq: GPU clock frequency value reported from arbiter
 *
 *                      Informs KBase that the GPU clock frequency has been updated.
+ *
+ * This struct contains callbacks used to deliver messages
+ * from the arbiter to the corresponding VM.
+ * Note that calls into these callbacks may have synchronous calls back into
+ * the arbiter arbiter_if_vm_arb_ops callbacks below.
+ * For example vm_arb_gpu_stopped() may be called as a side effect of
+ * arb_vm_gpu_stop() being called here.
 */
 struct arbiter_if_arb_vm_ops {
 	void (*arb_vm_gpu_stop)(struct device *dev);
@@ -107,12 +106,6 @@ struct arbiter_if_arb_vm_ops {
 /**
 * struct arbiter_if_vm_arb_ops - Interface to communicate messages to arbiter
 *
- * This struct contains callbacks used to request operations
- * from the VM to the arbiter
- *
- * Note that we must not make any synchronous calls back in to the VM
- * (via arbiter_if_arb_vm_ops above) in the context of these callbacks.
- *
 * @vm_arb_register_dev: Callback to register VM device driver callbacks.
 *                       arbif_dev: The arbiter interface to register
 *                                  with for device callbacks
@@ -142,6 +135,11 @@ struct arbiter_if_arb_vm_ops {
 *                      using the GPU
 *                      arbif_dev: The arbiter interface device to notify.
 *                      gpu_required: The GPU is still needed to do more work.
+ *
+ * This struct contains callbacks used to request operations
+ * from the VM to the arbiter.
+ * Note that we must not make any synchronous calls back in to the VM
+ * (via arbiter_if_arb_vm_ops above) in the context of these callbacks.
 */
 struct arbiter_if_vm_arb_ops {
 	int (*vm_arb_register_dev)(struct arbiter_if_dev *arbif_dev,
--- a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c
+++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,7 @@
 */

 /**
- * Mali arbiter power manager state machine and APIs
+ * DOC: Mali arbiter power manager state machine and APIs
 */

 #include <mali_kbase.h>
@@ -394,6 +394,8 @@ void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev)
 * @kbdev: The kbase device structure for the device (must be a valid pointer)
 *
 * Install interrupts and set the interrupt_install flag to true.
+ *
+ * Return: 0 if success, or a Linux error code
 */
 int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev)
 {
@@ -619,18 +621,6 @@ static void kbase_arbiter_pm_vm_gpu_stop(struct kbase_device *kbdev)
 	case KBASE_VM_STATE_SUSPEND_PENDING:
 		/* Suspend finishes with a stop so nothing else to do */
 		break;
-	case KBASE_VM_STATE_INITIALIZING:
-	case KBASE_VM_STATE_STOPPED_GPU_REQUESTED:
-		/*
-		 * Case stop() is received when in a GPU REQUESTED state, it
-		 * means that the granted() was missed so the GPU needs to be
-		 * requested again.
-		 */
-		dev_dbg(kbdev->dev,
-			"GPU stop while already stopped with GPU requested");
-		kbase_arbif_gpu_stopped(kbdev, true);
-		start_request_timer(kbdev);
-		break;
 	default:
 		dev_warn(kbdev->dev, "GPU_STOP when not expected - state %s\n",
 			kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state));
@@ -668,19 +658,8 @@ static void kbase_gpu_lost(struct kbase_device *kbdev)
 		break;
 	case KBASE_VM_STATE_SUSPENDED:
 	case KBASE_VM_STATE_STOPPED:
-		dev_dbg(kbdev->dev, "GPU lost while already stopped");
-		break;
-	case KBASE_VM_STATE_INITIALIZING:
 	case KBASE_VM_STATE_STOPPED_GPU_REQUESTED:
-		/*
-		 * Case lost() is received when in a GPU REQUESTED state, it
-		 * means that the granted() and stop() were missed so the GPU
-		 * needs to be requested again. Very unlikely to happen.
-		 */
-		dev_dbg(kbdev->dev,
-			"GPU lost while already stopped with GPU requested");
-		kbase_arbif_gpu_request(kbdev);
-		start_request_timer(kbdev);
+		dev_dbg(kbdev->dev, "GPU lost while already stopped");
 		break;
 	case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT:
 		dev_dbg(kbdev->dev, "GPU lost while waiting to suspend");
@@ -947,6 +926,8 @@ static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev)
 * @kbdev: The kbase device structure for the device (must be a valid pointer)
 *
 * Checks if the virtual machine holds VM state lock.
+ *
+ * Return: true if GPU is assigned, else false.
 */
 static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld(
 	struct kbase_device *kbdev)
@@ -1067,14 +1048,14 @@ void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq,
 }

 /**
- * enumerate_arb_gpu_clk() - Enumerate a GPU clock on the given index
+ * get_arb_gpu_clk() - Enumerate a GPU clock on the given index
 * @kbdev: kbase_device pointer
 * @index: GPU clock index
 *
- * Returns pointer to structure holding GPU clock frequency data reported from
+ * Return: Pointer to structure holding GPU clock frequency data reported from
 * arbiter, only index 0 is valid.
 */
-static void *enumerate_arb_gpu_clk(struct kbase_device *kbdev,
+static void *get_arb_gpu_clk(struct kbase_device *kbdev,
 		unsigned int index)
 {
 	if (index == 0)
@@ -1084,10 +1065,10 @@ static void *enumerate_arb_gpu_clk(struct kbase_device *kbdev,

 /**
 * get_arb_gpu_clk_rate() - Get the current rate of GPU clock frequency value
- * @kbdev: kbase_device pointer
- * @index: GPU clock index
+ * @kbdev:          kbase_device pointer
+ * @gpu_clk_handle: Handle unique to the enumerated GPU clock
 *
- * Returns the GPU clock frequency value saved when gpu is granted from arbiter
+ * Return: The GPU clock frequency value saved when gpu is granted from arbiter
 */
 static unsigned long get_arb_gpu_clk_rate(struct kbase_device *kbdev,
 		void *gpu_clk_handle)
@@ -1109,10 +1090,10 @@ static unsigned long get_arb_gpu_clk_rate(struct kbase_device *kbdev,
 * @gpu_clk_handle:  Handle unique to the enumerated GPU clock
 * @nb:              notifier block containing the callback function pointer
 *
- * Returns 0 on success, negative error code otherwise.
- *
 * This function registers a callback function that is invoked whenever the
 * frequency of the clock corresponding to @gpu_clk_handle changes.
+ *
+ * Return: 0 on success, negative error code otherwise.
 */
 static int arb_gpu_clk_notifier_register(struct kbase_device *kbdev,
 	void *gpu_clk_handle, struct notifier_block *nb)
@@ -1154,7 +1135,7 @@ static void arb_gpu_clk_notifier_unregister(struct kbase_device *kbdev,

 struct kbase_clk_rate_trace_op_conf arb_clk_rate_trace_ops = {
 	.get_gpu_clk_rate = get_arb_gpu_clk_rate,
-	.enumerate_gpu_clk = enumerate_arb_gpu_clk,
+	.enumerate_gpu_clk = get_arb_gpu_clk,
 	.gpu_clk_notifier_register = arb_gpu_clk_notifier_register,
 	.gpu_clk_notifier_unregister = arb_gpu_clk_notifier_unregister
 };
--- a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h
+++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,7 @@
 */

 /**
- * Mali arbiter power manager state machine and APIs
+ * DOC: Mali arbiter power manager state machine and APIs
 */

 #ifndef _MALI_KBASE_ARBITER_PM_H_
@@ -101,6 +101,8 @@ void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev);
 * @kbdev: The kbase device structure for the device (must be a valid pointer)
 *
 * Install interrupts and set the interrupt_install flag to true.
+ *
+ * Return: 0 if success, or a Linux error code
 */
 int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev);

--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c
@@ -33,7 +33,7 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev,
 					struct kbase_gpuprops_regdump *regdump)
 {
 	int i;
-	struct kbase_gpuprops_regdump registers;
+	struct kbase_gpuprops_regdump registers = { 0 };

 	/* Fill regdump with the content of the relevant registers */
 	registers.gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID));
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c
@@ -421,12 +421,12 @@ int kbase_instr_backend_init(struct kbase_device *kbdev)
 #ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS
 /* Use the build time option for the override default. */
 #if defined(CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY)
-	kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_SET_SECONDARY;
+	kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_PHYSICAL_SET_SECONDARY;
 #elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY)
-	kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_SET_TERTIARY;
+	kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_PHYSICAL_SET_TERTIARY;
 #else
 	/* Default to primary */
-	kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_SET_PRIMARY;
+	kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_PHYSICAL_SET_PRIMARY;
 #endif
 #endif
 	return 0;
@@ -446,8 +446,8 @@ void kbase_instr_backend_debugfs_init(struct kbase_device *kbdev)
 	 *
 	 * Valid inputs are the values accepted bythe SET_SELECT bits of the
 	 * PRFCNT_CONFIG register as defined in the architecture specification.
-	*/
-	debugfs_create_u8("hwcnt_set_select", S_IRUGO | S_IWUSR,
+	 */
+	debugfs_create_u8("hwcnt_set_select", 0644,
 			  kbdev->mali_debugfs_directory,
 			  (u8 *)&kbdev->hwcnt.backend.override_counter_set);
 }
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_defs.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_defs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -48,6 +48,7 @@ struct rb_entry {
 /**
 * SLOT_RB_TAG_KCTX() - a function-like macro for converting a pointer to a
 *			u64 for serving as tagged value.
+ * @kctx: Pointer to kbase context.
 */
 #define SLOT_RB_TAG_KCTX(kctx) (u64)((uintptr_t)(kctx))
 /**
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -425,6 +425,8 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
 					JOB_SLOT_REG(i, JS_STATUS));

 				if (completion_code == BASE_JD_EVENT_STOPPED) {
+					u64 job_head;
+
 					KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(
 						kbdev, NULL,
 						i, 0, TL_JS_EVENT_SOFT_STOP);
@@ -441,6 +443,21 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
 						((u64)kbase_reg_read(kbdev,
 						JOB_SLOT_REG(i, JS_TAIL_HI))
 						 << 32);
+					job_head = (u64)kbase_reg_read(kbdev,
+						JOB_SLOT_REG(i, JS_HEAD_LO)) |
+						((u64)kbase_reg_read(kbdev,
+						JOB_SLOT_REG(i, JS_HEAD_HI))
+						 << 32);
+					/* For a soft-stopped job chain js_tail should
+					 * same as the js_head, but if not then the
+					 * job chain was incorrectly marked as
+					 * soft-stopped. In such case we should not
+					 * be resuming the job chain from js_tail and
+					 * report the completion_code as UNKNOWN.
+					 */
+					if (job_tail != job_head)
+						completion_code = BASE_JD_EVENT_UNKNOWN;
+
 				} else if (completion_code ==
 						BASE_JD_EVENT_NOT_STARTED) {
 					/* PRLAM-10673 can cause a TERMINATED
@@ -922,33 +939,12 @@ void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
 			JS_COMMAND_SOFT_STOP | sw_flags);
 }

-/**
- * kbase_job_slot_softstop - Soft-stop the specified job slot
- * @kbdev:         The kbase device
- * @js:            The job slot to soft-stop
- * @target_katom:  The job that should be soft-stopped (or NULL for any job)
- * Context:
- *   The job slot lock must be held when calling this function.
- *   The job slot must not already be in the process of being soft-stopped.
- *
- * Where possible any job in the next register is evicted before the soft-stop.
- */
 void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
 				struct kbase_jd_atom *target_katom)
 {
 	kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u);
 }

-/**
- * kbase_job_slot_hardstop - Hard-stop the specified job slot
- * @kctx:         The kbase context that contains the job(s) that should
- *                be hard-stopped
- * @js:           The job slot to hard-stop
- * @target_katom: The job that should be hard-stopped (or NULL for all
- *                jobs from the context)
- * Context:
- *   The job slot lock must be held when calling this function.
- */
 void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
 				struct kbase_jd_atom *target_katom)
 {
@@ -961,26 +957,6 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
 	CSTD_UNUSED(stopped);
 }

-/**
- * kbase_job_check_enter_disjoint - potentiall enter disjoint mode
- * @kbdev: kbase device
- * @action: the event which has occurred
- * @core_reqs: core requirements of the atom
- * @target_katom: the atom which is being affected
- *
- * For a certain soft-stop action, work out whether to enter disjoint
- * state.
- *
- * This does not register multiple disjoint events if the atom has already
- * started a disjoint period
- *
- * @core_reqs can be supplied as 0 if the atom had not started on the hardware
- * (and so a 'real' soft/hard-stop was not required, but it still interrupted
- * flow, perhaps on another context)
- *
- * kbase_job_check_leave_disjoint() should be used to end the disjoint
- * state when the soft/hard-stop action is complete
- */
 void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
 		base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom)
 {
@@ -1002,14 +978,6 @@ void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
 	kbase_disjoint_state_up(kbdev);
 }

-/**
- * kbase_job_check_enter_disjoint - potentially leave disjoint state
- * @kbdev: kbase device
- * @target_katom: atom which is finishing
- *
- * Work out whether to leave disjoint state when finishing an atom that was
- * originated by kbase_job_check_enter_disjoint().
- */
 void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
 		struct kbase_jd_atom *target_katom)
 {
@@ -1340,8 +1308,7 @@ static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev)
 * This function soft-stops all the slots to ensure that as many jobs as
 * possible are saved.
 *
- * Return:
- *   The function returns a boolean which should be interpreted as follows:
+ * Return: boolean which should be interpreted as follows:
 *   true - Prepared for reset, kbase_reset_gpu_locked should be called.
 *   false - Another thread is performing a reset, kbase_reset_gpu should
 *   not be called.
@@ -1518,9 +1485,9 @@ static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev,
 #ifdef CONFIG_MALI_BIFROST_DEBUG
 	dev_dbg(kbdev->dev,
 				"Limiting affinity due to BASE_JD_REQ_LIMITED_CORE_MASK from 0x%lx to 0x%lx (mask is 0x%lx)\n",
-				(unsigned long int)affinity,
-				(unsigned long int)result,
-				(unsigned long int)limited_core_mask);
+				(unsigned long)affinity,
+				(unsigned long)result,
+				(unsigned long)limited_core_mask);
 #else
 	CSTD_UNUSED(kbdev);
 #endif
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -37,14 +37,23 @@
 #include <backend/gpu/mali_kbase_jm_internal.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>

-/* Return whether the specified ringbuffer is empty. HW access lock must be
- * held
+/**
+ * SLOT_RB_EMPTY - Return whether the specified ringbuffer is empty.
+ *
+ * @rb: ring buffer
+ *
+ * Note: HW access lock must be held
 */
 #define SLOT_RB_EMPTY(rb)   (rb->write_idx == rb->read_idx)
-/* Return number of atoms currently in the specified ringbuffer. HW access lock
- * must be held
+
+/**
+ * SLOT_RB_ENTRIES - Return number of atoms currently in the specified ringbuffer.
+ *
+ * @rb: ring buffer
+ *
+ * Note: HW access lock must be held
 */
-#define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx)
+#define SLOT_RB_ENTRIES(rb) ((int)(s8)(rb->write_idx - rb->read_idx))

 static void kbase_gpu_release_atom(struct kbase_device *kbdev,
 					struct kbase_jd_atom *katom,
@@ -304,10 +313,10 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev,
 				[katom->slot_nr]);

 		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
-
+		fallthrough;
 	case KBASE_ATOM_GPU_RB_READY:
 		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
-
+		fallthrough;
 	case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
 		break;

@@ -367,13 +376,13 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev,
 		}

 		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
-
+		fallthrough;
 	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV:
 		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
-
+		fallthrough;
 	case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
 		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
-
+		fallthrough;
 	case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
 		break;
 	}
@@ -1813,7 +1822,7 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev)

 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);

-	dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n");
+	dev_info(kbdev->dev, "%s:\n", __func__);

 	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
 		int idx;
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_internal.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_internal.h
@@ -62,7 +62,7 @@ void kbase_backend_timer_suspend(struct kbase_device *kbdev);
 *                              scheduling timer
 * @kbdev: Device pointer
 *
- * This function should be called on resume. Note that is is not guaranteed to
+ * This function should be called on resume. Note that is not guaranteed to
 * re-start the timer, only evalute whether it should be re-started.
 *
 * Caller must hold runpool_mutex.
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.c
@@ -121,9 +121,9 @@ int kbase_set_mmu_quirks(struct kbase_device *kbdev)

 	if (kbdev->system_coherency == COHERENCY_ACE) {
 		/* Allow memory configuration disparity to be ignored,
-		* we optimize the use of shared memory and thus we
-		* expect some disparity in the memory configuration.
-		*/
+		 * we optimize the use of shared memory and thus we
+		 * expect some disparity in the memory configuration.
+		 */
 		kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY;
 	}

--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c
@@ -1470,9 +1470,8 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
 		pr_debug("JS_IRQ_MASK being read %x", *value);
 	}
 #else /* !MALI_USE_CSF */
-	else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) {
-		/* ignore JOB_IRQ_MASK as it is handled by CSFFW */
-	}
+	else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK))
+		; /* ignore JOB_IRQ_MASK as it is handled by CSFFW */
 #endif /* !MALI_USE_CSF */
 	else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) {
 		*value = (dummy->reset_completed_mask << 8) |
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2017-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -143,7 +143,6 @@ void midgard_model_destroy(void *h);
 u8 midgard_model_write_reg(void *h, u32 addr, u32 value);
 u8 midgard_model_read_reg(void *h, u32 addr,
 							u32 * const value);
-void gpu_generate_error(void);
 void midgard_set_error(int job_slot);
 int job_atom_inject_error(struct kbase_error_params *params);
 int gpu_model_control(void *h,
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2018-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -39,7 +39,11 @@ unsigned int error_probability = 50;	/* to be set between 0 and 100 */
 /* probability to have multiple error give that there is an error */
 unsigned int multiple_error_probability = 50;

-void gpu_generate_error(void)
+#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM
+/**
+ * gpu_generate_error - Generate GPU error
+ */
+static void gpu_generate_error(void)
 {
 	unsigned int errors_num = 0;

@@ -94,6 +98,7 @@ void gpu_generate_error(void)
 		}
 	}
 }
+#endif

 int job_atom_inject_error(struct kbase_error_params *params)
 {
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2010, 2012-2015, 2017-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010, 2012-2015, 2017-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -135,8 +135,12 @@ void gpu_device_raise_irq(void *model,
 	default:
 		dev_warn(kbdev->dev, "Unknown IRQ");
 		kmem_cache_free(kbdev->irq_slab, data);
+		data = NULL;
+		break;
 	}
-	queue_work(kbdev->irq_workq, &data->work);
+
+	if (data != NULL)
+		queue_work(kbdev->irq_workq, &data->work);
 }

 void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value)
@@ -248,6 +252,11 @@ int kbase_gpu_device_create(struct kbase_device *kbdev)
 	return 0;
 }

+/**
+ * kbase_gpu_device_destroy - Destroy GPU device
+ *
+ * @kbdev: kbase device
+ */
 void kbase_gpu_device_destroy(struct kbase_device *kbdev)
 {
 	midgard_model_destroy(kbdev->model);
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_always_on.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2010-2015, 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2015, 2018-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -41,6 +41,11 @@ static void always_on_init(struct kbase_device *kbdev)
 	CSTD_UNUSED(kbdev);
 }

+/**
+ * always_on_term - Term callback function for always-on power policy
+ *
+ * @kbdev: kbase device
+ */
 static void always_on_term(struct kbase_device *kbdev)
 {
 	CSTD_UNUSED(kbdev);
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -101,9 +101,8 @@ int kbase_pm_runtime_init(struct kbase_device *kbdev)

 void kbase_pm_runtime_term(struct kbase_device *kbdev)
 {
-	if (kbdev->pm.callback_power_runtime_term) {
+	if (kbdev->pm.callback_power_runtime_term)
 		kbdev->pm.callback_power_runtime_term(kbdev);
-	}
 }

 void kbase_pm_register_access_enable(struct kbase_device *kbdev)
@@ -202,6 +201,13 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
 		kbase_pm_hwcnt_disable_worker);
 	kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);

+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
+	kbdev->pm.backend.gpu_sleep_supported =
+		kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_GPU_SLEEP) &&
+		!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TURSEHW_1997) &&
+		kbdev->pm.backend.callback_power_runtime_gpu_active &&
+		kbdev->pm.backend.callback_power_runtime_gpu_idle;
+#endif

 	if (IS_ENABLED(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED)) {
 		kbdev->pm.backend.l2_always_on = false;
@@ -288,7 +294,7 @@ static void pm_handle_power_off(struct kbase_device *kbdev)

 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 #if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
-	if (kbdev->pm.backend.gpu_wakeup_override ) {
+	if (kbdev->pm.backend.gpu_wakeup_override) {
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 		return;
 	}
@@ -362,11 +368,6 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data)

 	kbase_pm_lock(kbdev);

-#ifdef CONFIG_MALI_ARBITER_SUPPORT
-	if (kbase_pm_is_gpu_lost(kbdev))
-		backend->poweron_required = false;
-#endif
-
 	pm_handle_power_off(kbdev);

 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@@ -683,6 +684,13 @@ void kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev)
 }
 KBASE_EXPORT_TEST_API(kbase_pm_wait_for_poweroff_work_complete);

+/**
+ * is_gpu_powered_down - Check whether GPU is powered down
+ *
+ * @kbdev: kbase device
+ *
+ * Return: true if GPU is powered down, false otherwise
+ */
 static bool is_gpu_powered_down(struct kbase_device *kbdev)
 {
 	bool ret;
@@ -882,7 +890,7 @@ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
 	lockdep_assert_held(&kbdev->pm.lock);

 	if (kbase_dummy_job_wa_enabled(kbdev)) {
-		dev_warn(kbdev->dev, "Change of core mask not supported for slot 0 as dummy job WA is enabled");
+		dev_warn_once(kbdev->dev, "Change of core mask not supported for slot 0 as dummy job WA is enabled");
 		new_core_mask_js0 = kbdev->pm.debug_core_mask[0];
 	}

--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c
@@ -55,6 +55,9 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
 {
 	struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend;
 	unsigned long flags;
+#if MALI_USE_CSF
+	u64 old_core_mask = 0;
+#endif

 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);

@@ -65,6 +68,8 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
 			core_mask, kbdev->pm.debug_core_mask);
 		goto unlock;
 	}
+
+	old_core_mask = pm_backend->ca_cores_enabled;
 #else
 	if (!(core_mask & kbdev->pm.debug_core_mask_all)) {
 		dev_err(kbdev->dev, "OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n",
@@ -73,20 +78,53 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
 	}

 	if (kbase_dummy_job_wa_enabled(kbdev)) {
-		dev_err(kbdev->dev, "Dynamic core scaling not supported as dummy job WA is enabled");
+		dev_err_once(kbdev->dev, "Dynamic core scaling not supported as dummy job WA is enabled");
 		goto unlock;
 	}
 #endif /* MALI_USE_CSF */
-
 	pm_backend->ca_cores_enabled = core_mask;

 	kbase_pm_update_state(kbdev);
-
-unlock:
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);

+#if MALI_USE_CSF
+	/* Check if old_core_mask contained the undesired cores and wait
+	 * for those cores to get powered down
+	 */
+	if ((core_mask & old_core_mask) != old_core_mask) {
+		bool can_wait;
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		can_wait = kbdev->pm.backend.gpu_ready && kbase_pm_is_mcu_desired(kbdev);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		/* This check is ideally not required, the wait function can
+		 * deal with the GPU power down. But it has been added to
+		 * address the scenario where down-scaling request comes from
+		 * the platform specific code soon after the GPU power down
+		 * and at the time same time application thread tries to
+		 * power up the GPU (on the flush of GPU queue).
+		 * The platform specific @ref callback_power_on that gets
+		 * invoked on power up does not return until down-scaling
+		 * request is complete. The check mitigates the race caused by
+		 * the problem in platform specific code.
+		 */
+		if (likely(can_wait)) {
+			if (kbase_pm_wait_for_desired_state(kbdev)) {
+				dev_warn(kbdev->dev,
+					 "Wait for update of core_mask from %llx to %llx failed",
+					 old_core_mask, core_mask);
+			}
+		}
+	}
+#endif
+
 	dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX\n",
 			pm_backend->ca_cores_enabled);
+
+	return;
+unlock:
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
 KBASE_EXPORT_TEST_API(kbase_devfreq_set_core_mask);
 #endif
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c
@@ -101,6 +101,8 @@ static u64 kbase_pm_get_state(
 		enum kbase_pm_core_type core_type,
 		enum kbasep_pm_action action);

+static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev);
+
 #if MALI_USE_CSF
 bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev)
 {
@@ -655,6 +657,35 @@ static void kbase_pm_enable_mcu_db_notification(struct kbase_device *kbdev)
 }
 #endif

+
+/**
+ * kbasep_pm_toggle_power_interrupt - Toggles the IRQ mask for power interrupts
+ *                                    from the firmware
+ *
+ * @kbdev:  Pointer to the device
+ * @enable: boolean indicating to enable interrupts or not
+ *
+ * The POWER_CHANGED_ALL and POWER_CHANGED_SINGLE interrupts can be disabled
+ * after L2 has been turned on when FW is controlling the power for the shader
+ * cores. Correspondingly, the interrupts can be re-enabled after the MCU has
+ * been disabled before the power down of L2.
+ */
+static void kbasep_pm_toggle_power_interrupt(struct kbase_device *kbdev, bool enable)
+{
+	u32 irq_mask;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
+
+	if (enable)
+		irq_mask |= POWER_CHANGED_ALL | POWER_CHANGED_SINGLE;
+	else
+		irq_mask &= ~(POWER_CHANGED_ALL | POWER_CHANGED_SINGLE);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask);
+}
+
 static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
 {
 	struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
@@ -698,6 +729,8 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
 					kbase_pm_ca_get_core_mask(kbdev);
 				kbase_csf_firmware_global_reinit(kbdev,
 					backend->shaders_desired_mask);
+				if (!kbdev->csf.firmware_hctl_core_pwr)
+					kbasep_pm_toggle_power_interrupt(kbdev, false);
 				backend->mcu_state =
 					KBASE_MCU_ON_GLB_REINIT_PEND;
 			}
@@ -906,6 +939,8 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
 		case KBASE_MCU_PEND_OFF:
 			/* wait synchronously for the MCU to get disabled */
 			kbase_csf_firmware_disable_mcu_wait(kbdev);
+			if (!kbdev->csf.firmware_hctl_core_pwr)
+				kbasep_pm_toggle_power_interrupt(kbdev, true);
 			backend->mcu_state = KBASE_MCU_OFF;
 			break;
 #ifdef KBASE_PM_RUNTIME
@@ -924,6 +959,11 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
 				backend->mcu_state = KBASE_MCU_IN_SLEEP;
 				kbase_pm_enable_db_mirror_interrupt(kbdev);
 				kbase_csf_scheduler_reval_idleness_post_sleep(kbdev);
+				/* Enable PM interrupt, after MCU has been put
+				 * to sleep, for the power down of L2.
+				 */
+				if (!kbdev->csf.firmware_hctl_core_pwr)
+					kbasep_pm_toggle_power_interrupt(kbdev, true);
 			}
 			break;

@@ -934,6 +974,11 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
 					kbdev, kbase_backend_get_cycle_cnt(kbdev));
 				kbase_pm_enable_mcu_db_notification(kbdev);
 				kbase_pm_disable_db_mirror_interrupt(kbdev);
+				/* Disable PM interrupt after L2 has been
+				 * powered up for the wakeup of MCU.
+				 */
+				if (!kbdev->csf.firmware_hctl_core_pwr)
+					kbasep_pm_toggle_power_interrupt(kbdev, false);
 				backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE;
 			}
 			break;
@@ -1017,6 +1062,18 @@ static void kbase_pm_l2_clear_backend_slot_submit_kctx(struct kbase_device *kbde
 }
 #endif

+static bool can_power_down_l2(struct kbase_device *kbdev)
+{
+#if MALI_USE_CSF
+	/* Due to the HW issue GPU2019-3878, need to prevent L2 power off
+	 * whilst MMU command is in progress.
+	 */
+	return !kbdev->mmu_hw_operation_in_progress;
+#else
+	return true;
+#endif
+}
+
 static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
 {
 	struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
@@ -1258,9 +1315,8 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
 			}

 			backend->hwcnt_desired = false;
-			if (!backend->hwcnt_disabled) {
+			if (!backend->hwcnt_disabled)
 				kbase_pm_trigger_hwcnt_disable(kbdev);
-			}
 #endif

 			if (backend->hwcnt_disabled) {
@@ -1297,27 +1353,31 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
 			break;

 		case KBASE_L2_POWER_DOWN:
-			if (!backend->l2_always_on)
-				/* Powering off the L2 will also power off the
-				 * tiler.
-				 */
-				kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2,
-						l2_present,
-						ACTION_PWROFF);
-			else
-				/* If L2 cache is powered then we must flush it
-				 * before we power off the GPU. Normally this
-				 * would have been handled when the L2 was
-				 * powered off.
-				 */
-				kbase_gpu_start_cache_clean_nolock(
-					kbdev, GPU_COMMAND_CACHE_CLN_INV_L2);
+			if (kbase_pm_is_l2_desired(kbdev))
+				backend->l2_state = KBASE_L2_PEND_ON;
+			else if (can_power_down_l2(kbdev)) {
+				if (!backend->l2_always_on)
+					/* Powering off the L2 will also power off the
+					 * tiler.
+					 */
+					kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2,
+							l2_present,
+							ACTION_PWROFF);
+				else
+					/* If L2 cache is powered then we must flush it
+					 * before we power off the GPU. Normally this
+					 * would have been handled when the L2 was
+					 * powered off.
+					 */
+					kbase_gpu_start_cache_clean_nolock(
+						kbdev, GPU_COMMAND_CACHE_CLN_INV_L2);
 #if !MALI_USE_CSF
-			KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, 0u);
+				KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, 0u);
 #else
-			KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL, 0u);
+				KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL, 0u);
 #endif
-			backend->l2_state = KBASE_L2_PEND_OFF;
+				backend->l2_state = KBASE_L2_PEND_OFF;
+			}
 			break;

 		case KBASE_L2_PEND_OFF:
@@ -1803,12 +1863,7 @@ static bool kbase_pm_is_in_desired_state_nolock(struct kbase_device *kbdev)

 	lockdep_assert_held(&kbdev->hwaccess_lock);

-	if (kbase_pm_is_l2_desired(kbdev) &&
-			kbdev->pm.backend.l2_state != KBASE_L2_ON)
-		in_desired_state = false;
-	else if (!kbase_pm_is_l2_desired(kbdev) &&
-			kbdev->pm.backend.l2_state != KBASE_L2_OFF)
-		in_desired_state = false;
+	in_desired_state = kbase_pm_l2_is_in_desired_state(kbdev);

 #if !MALI_USE_CSF
 	if (kbdev->pm.backend.shaders_desired &&
@@ -1818,13 +1873,7 @@ static bool kbase_pm_is_in_desired_state_nolock(struct kbase_device *kbdev)
 			kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF)
 		in_desired_state = false;
 #else
-	if (kbase_pm_is_mcu_desired(kbdev) &&
-	    kbdev->pm.backend.mcu_state != KBASE_MCU_ON)
-		in_desired_state = false;
-	else if (!kbase_pm_is_mcu_desired(kbdev) &&
-		 (kbdev->pm.backend.mcu_state != KBASE_MCU_OFF) &&
-		 (kbdev->pm.backend.mcu_state != KBASE_MCU_IN_SLEEP))
-		in_desired_state = false;
+	in_desired_state = kbase_pm_mcu_is_in_desired_state(kbdev);
 #endif

 	return in_desired_state;
@@ -2077,11 +2126,13 @@ void kbase_pm_reset_complete(struct kbase_device *kbdev)
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }

-/* Timeout for kbase_pm_wait_for_desired_state when wait_event_killable has
- * aborted due to a fatal signal. If the time spent waiting has exceeded this
- * threshold then there is most likely a hardware issue.
+#if !MALI_USE_CSF
+/* Timeout in milliseconds for GPU Power Management to reach the desired
+ * Shader and L2 state. If the time spent waiting has exceeded this threshold
+ * then there is most likely a hardware issue.
 */
 #define PM_TIMEOUT_MS (5000) /* 5s */
+#endif

 static void kbase_pm_timed_out(struct kbase_device *kbdev)
 {
@@ -2156,7 +2207,7 @@ int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev)
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);

 #if MALI_USE_CSF
-	timeout = kbase_csf_timeout_in_jiffies(PM_TIMEOUT_MS);
+	timeout = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT));
 #else
 	timeout = msecs_to_jiffies(PM_TIMEOUT_MS);
 #endif
@@ -2188,7 +2239,7 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev)
 	unsigned long flags;
 	long remaining;
 #if MALI_USE_CSF
-	long timeout = kbase_csf_timeout_in_jiffies(PM_TIMEOUT_MS);
+	long timeout = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT));
 #else
 	long timeout = msecs_to_jiffies(PM_TIMEOUT_MS);
 #endif
@@ -2285,6 +2336,7 @@ static void update_user_reg_page_mapping(struct kbase_device *kbdev)
 {
 	lockdep_assert_held(&kbdev->pm.lock);

+	mutex_lock(&kbdev->csf.reg_lock);
 	if (kbdev->csf.mali_file_inode) {
 		/* This would zap the pte corresponding to the mapping of User
 		 * register page for all the Kbase contexts.
@@ -2293,6 +2345,7 @@ static void update_user_reg_page_mapping(struct kbase_device *kbdev)
 				    BASEP_MEM_CSF_USER_REG_PAGE_HANDLE,
 				    PAGE_SIZE, 1);
 	}
+	mutex_unlock(&kbdev->csf.reg_lock);
 }
 #endif

@@ -2358,6 +2411,7 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
 	update_user_reg_page_mapping(kbdev);
 #endif

+
 	if (reset_required) {
 		/* GPU state was lost, reset GPU to ensure it is in a
 		 * consistent state
@@ -2659,8 +2713,8 @@ static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
 {
 	struct device_node *np = kbdev->dev->of_node;
 	const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
-	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
-				GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	const u32 prod_id =
+		(gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT;
 	int error = 0;

 	kbdev->hw_quirks_gpu = 0;
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -301,6 +301,8 @@ void kbase_pm_update_state(struct kbase_device *kbdev);
 * kbase_pm_state_machine_init - Initialize the state machines, primarily the
 *                               shader poweroff timer
 * @kbdev: Device pointer
+ *
+ * Return: 0 on success, error code on error
 */
 int kbase_pm_state_machine_init(struct kbase_device *kbdev);

@@ -453,6 +455,8 @@ void kbase_pm_wait_for_gpu_power_down(struct kbase_device *kbdev);
 * Setup the power management callbacks and initialize/enable the runtime-pm
 * for the Mali GPU platform device, using the callback function. This must be
 * called before the kbase_pm_register_access_enable() function.
+ *
+ * Return: 0 on success, error code on error
 */
 int kbase_pm_runtime_init(struct kbase_device *kbdev);

@@ -810,8 +814,49 @@ static inline bool kbase_pm_no_mcu_core_pwroff(struct kbase_device *kbdev)
 	return kbdev->pm.backend.csf_pm_sched_flags &
 		CSF_DYNAMIC_PM_CORE_KEEP_ON;
 }
+
+/**
+ * kbase_pm_mcu_is_in_desired_state - Check if MCU is in stable ON/OFF state.
+ *
+ * @kbdev: Device pointer
+ *
+ * Return: true if MCU is in stable ON/OFF state.
+ */
+static inline bool kbase_pm_mcu_is_in_desired_state(struct kbase_device *kbdev)
+{
+	bool in_desired_state = true;
+
+	if (kbase_pm_is_mcu_desired(kbdev) && kbdev->pm.backend.mcu_state != KBASE_MCU_ON)
+		in_desired_state = false;
+	else if (!kbase_pm_is_mcu_desired(kbdev) &&
+		 (kbdev->pm.backend.mcu_state != KBASE_MCU_OFF) &&
+		 (kbdev->pm.backend.mcu_state != KBASE_MCU_IN_SLEEP))
+		in_desired_state = false;
+
+	return in_desired_state;
+}
+
 #endif

+/**
+ * kbase_pm_l2_is_in_desired_state - Check if L2 is in stable ON/OFF state.
+ *
+ * @kbdev: Device pointer
+ *
+ * Return: true if L2 is in stable ON/OFF state.
+ */
+static inline bool kbase_pm_l2_is_in_desired_state(struct kbase_device *kbdev)
+{
+	bool in_desired_state = true;
+
+	if (kbase_pm_is_l2_desired(kbdev) && kbdev->pm.backend.l2_state != KBASE_L2_ON)
+		in_desired_state = false;
+	else if (!kbase_pm_is_l2_desired(kbdev) && kbdev->pm.backend.l2_state != KBASE_L2_OFF)
+		in_desired_state = false;
+
+	return in_desired_state;
+}
+
 /**
 * kbase_pm_lock - Lock all necessary mutexes to perform PM actions
 *
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c
@@ -491,8 +491,7 @@ static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev)
 					BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)
 						? katom->device_nr : 0;
 				if (!WARN_ON(device_nr >= 2))
-					kbdev->pm.backend.metrics.
-						active_cl_ctx[device_nr] = 1;
+					kbdev->pm.backend.metrics.active_cl_ctx[device_nr] = 1;
 			} else {
 				kbdev->pm.backend.metrics.active_gl_ctx[js] = 1;
 				trace_sysgraph(SGR_ACTIVE, 0, js);
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c
@@ -180,9 +180,8 @@ void kbase_pm_update_dynamic_cores_onoff(struct kbase_device *kbdev)

 	shaders_desired = kbdev->pm.backend.pm_current_policy->shaders_needed(kbdev);

-	if (shaders_desired && kbase_pm_is_l2_desired(kbdev)) {
+	if (shaders_desired && kbase_pm_is_l2_desired(kbdev))
 		kbase_pm_update_state(kbdev);
-	}
 #endif
 }

@@ -249,9 +248,8 @@ KBASE_EXPORT_TEST_API(kbase_pm_get_policy);
 #if MALI_USE_CSF
 static int policy_change_wait_for_L2_off(struct kbase_device *kbdev)
 {
-#define WAIT_DURATION_MS (3000)
 	long remaining;
-	long timeout = kbase_csf_timeout_in_jiffies(WAIT_DURATION_MS);
+	long timeout = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT));
 	int err = 0;

 	/* Wait for L2 becoming off, by which the MCU is also implicitly off
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c
@@ -113,39 +113,60 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev,
 	 */

 	u64 timeout, nr_cycles = 0;
+	/* Default value to mean 'no cap' */
+	u64 timeout_cap = U64_MAX;
 	u64 freq_khz = kbdev->lowest_gpu_freq_khz;
+	/* Only for debug messages, safe default in case it's mis-maintained */
+	const char *selector_str = "(unknown)";

 	WARN_ON(!freq_khz);

 	switch (selector) {
-	/* use Firmware timeout if invalid selection */
+	case KBASE_TIMEOUT_SELECTOR_COUNT:
 	default:
 #if !MALI_USE_CSF
 		WARN(1, "Invalid timeout selector used! Using default value");
-		timeout = JM_DEFAULT_TIMEOUT_CYCLES;
-		CSTD_UNUSED(nr_cycles);
+		nr_cycles = JM_DEFAULT_TIMEOUT_CYCLES;
+		break;
 #else
+		/* Use Firmware timeout if invalid selection */
 		WARN(1,
 		     "Invalid timeout selector used! Using CSF Firmware timeout");
 		fallthrough;
 	case CSF_FIRMWARE_TIMEOUT:
+		selector_str = "CSF_FIRMWARE_TIMEOUT";
 		nr_cycles = CSF_FIRMWARE_TIMEOUT_CYCLES;
-		timeout = div_u64(nr_cycles, freq_khz);
-		/* cap CSF FW timeout to FIRMWARE_PING_INTERVAL_MS
-		 * if calculated timeout exceeds it. This should be adapted to a
-		 * direct timeout comparison once the FIRMWARE_PING_INTERVAL_MS
-		 * option is added to this timeout function. A compile-time check
-		 * such as BUILD_BUG_ON can also be done once the firmware ping
-		 * interval in cycles becomes available as a macro.
+		/* Setup a cap on CSF FW timeout to FIRMWARE_PING_INTERVAL_MS,
+		 * if calculated timeout exceeds it. This should be adapted to
+		 * a direct timeout comparison once the
+		 * FIRMWARE_PING_INTERVAL_MS option is added to this timeout
+		 * function. A compile-time check such as BUILD_BUG_ON can also
+		 * be done once the firmware ping interval in cycles becomes
+		 * available as a macro.
 		 */
-		if (timeout > FIRMWARE_PING_INTERVAL_MS) {
-			dev_dbg(kbdev->dev, "Capped CSF_FIRMWARE_TIMEOUT %llu to %d",
-				timeout, FIRMWARE_PING_INTERVAL_MS);
-			timeout = FIRMWARE_PING_INTERVAL_MS;
-		}
-#endif
+		timeout_cap = FIRMWARE_PING_INTERVAL_MS;
 		break;
+	case CSF_PM_TIMEOUT:
+		selector_str = "CSF_PM_TIMEOUT";
+		nr_cycles = CSF_PM_TIMEOUT_CYCLES;
+		break;
+	case CSF_GPU_RESET_TIMEOUT:
+		selector_str = "CSF_GPU_RESET_TIMEOUT";
+		nr_cycles = CSF_GPU_RESET_TIMEOUT_CYCLES;
+		break;
+#endif
 	}
+
+	timeout = div_u64(nr_cycles, freq_khz);
+	if (timeout > timeout_cap) {
+		dev_dbg(kbdev->dev, "Capped %s %llu to %llu", selector_str,
+			(unsigned long long)timeout, (unsigned long long)timeout_cap);
+		timeout = timeout_cap;
+	}
+	if (WARN(timeout > UINT_MAX,
+		 "Capping excessive timeout %llums for %s at freq %llukHz to UINT_MAX ms",
+		 (unsigned long long)timeout, selector_str, (unsigned long long)freq_khz))
+		timeout = UINT_MAX;
 	return (unsigned int)timeout;
 }

--- a/drivers/gpu/arm/bifrost/build.bp
+++ b/drivers/gpu/arm/bifrost/build.bp
@@ -34,6 +34,9 @@ bob_defaults {
            "CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}",
        ],
    },
+    mali_platform_dt_pin_rst: {
+        kbuild_options: ["CONFIG_MALI_PLATFORM_DT_PIN_RST=y"],
+    },
    gpu_has_csf: {
        kbuild_options: ["CONFIG_MALI_CSF_SUPPORT=y"],
    },
--- a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c
+++ b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -110,6 +110,11 @@ static void kbase_context_flush_jobs(struct kbase_context *kctx)
 	flush_workqueue(kctx->jctx.job_done_wq);
 }

+/**
+ * kbase_context_free - Free kcontext at its destruction
+ *
+ * @kctx: kcontext to be freed
+ */
 static void kbase_context_free(struct kbase_context *kctx)
 {
 	kbase_timeline_post_kbase_context_destroy(kctx);
--- a/drivers/gpu/arm/bifrost/context/mali_kbase_context.c
+++ b/drivers/gpu/arm/bifrost/context/mali_kbase_context.c
@@ -152,6 +152,7 @@ int kbase_context_common_init(struct kbase_context *kctx)

 	init_waitqueue_head(&kctx->event_queue);
 	atomic_set(&kctx->event_count, 0);
+
 #if !MALI_USE_CSF
 	atomic_set(&kctx->event_closed, false);
 #if IS_ENABLED(CONFIG_GPU_TRACEPOINTS)
@@ -159,6 +160,11 @@ int kbase_context_common_init(struct kbase_context *kctx)
 #endif
 #endif

+#if MALI_USE_CSF
+	atomic64_set(&kctx->num_fixable_allocs, 0);
+	atomic64_set(&kctx->num_fixed_allocs, 0);
+#endif
+
 	bitmap_copy(kctx->cookies, &cookies_mask, BITS_PER_LONG);

 	kctx->id = atomic_add_return(1, &(kctx->kbdev->ctx_num)) - 1;
--- a/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.c
+++ b/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.c
@@ -52,7 +52,7 @@
 /*
 * Maximum number of loops polling the GPU before we assume the GPU has hung.
 */
-#define IPA_INACTIVE_MAX_LOOPS ((unsigned int)8000000)
+#define IPA_INACTIVE_MAX_LOOPS (8000000U)

 /*
 * Number of bits used to configure a performance counter in SELECT registers.
@@ -347,9 +347,8 @@ void kbase_ipa_control_init(struct kbase_device *kbdev)

 	spin_lock_init(&ipa_ctrl->lock);
 	ipa_ctrl->num_active_sessions = 0;
-	for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) {
+	for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++)
 		ipa_ctrl->sessions[i].active = false;
-	}

 	listener_data = kmalloc(sizeof(struct kbase_ipa_control_listener_data),
 				GFP_KERNEL);
@@ -514,8 +513,10 @@ int kbase_ipa_control_register(
 	struct kbase_ipa_control_session *session = NULL;
 	unsigned long flags;

-	if (WARN_ON(kbdev == NULL) || WARN_ON(perf_counters == NULL) ||
-	    WARN_ON(client == NULL) ||
+	if (WARN_ON(unlikely(kbdev == NULL)))
+		return -ENODEV;
+
+	if (WARN_ON(perf_counters == NULL) || WARN_ON(client == NULL) ||
 	    WARN_ON(num_counters > KBASE_IPA_CONTROL_MAX_COUNTERS)) {
 		dev_err(kbdev->dev, "%s: wrong input arguments", __func__);
 		return -EINVAL;
@@ -697,7 +698,10 @@ int kbase_ipa_control_unregister(struct kbase_device *kbdev, const void *client)
 	unsigned long flags;
 	bool new_config = false, valid_session = false;

-	if (WARN_ON(kbdev == NULL) || WARN_ON(client == NULL)) {
+	if (WARN_ON(unlikely(kbdev == NULL)))
+		return -ENODEV;
+
+	if (WARN_ON(client == NULL)) {
 		dev_err(kbdev->dev, "%s: wrong input arguments", __func__);
 		return -EINVAL;
 	}
@@ -779,8 +783,10 @@ int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client,
 	unsigned long flags;
 	bool gpu_ready;

-	if (WARN_ON(kbdev == NULL) || WARN_ON(client == NULL) ||
-	    WARN_ON(values == NULL)) {
+	if (WARN_ON(unlikely(kbdev == NULL)))
+		return -ENODEV;
+
+	if (WARN_ON(client == NULL) || WARN_ON(values == NULL)) {
 		dev_err(kbdev->dev, "%s: wrong input arguments", __func__);
 		return -EINVAL;
 	}
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c
@@ -27,7 +27,7 @@
 #include <linux/export.h>
 #include <linux/priority_control_manager.h>
 #include <linux/shmem_fs.h>
-#include <uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h>
+#include <csf/mali_kbase_csf_registers.h>
 #include "mali_kbase_csf_tiler_heap.h"
 #include <mmu/mali_kbase_mmu.h>
 #include "mali_kbase_csf_timeout.h"
@@ -561,6 +561,10 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
 	queue->sync_ptr = 0;
 	queue->sync_value = 0;

+#if IS_ENABLED(CONFIG_DEBUG_FS)
+	queue->saved_cmd_ptr = 0;
+#endif
+
 	queue->sb_status = 0;
 	queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED;

@@ -572,6 +576,8 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
 	INIT_WORK(&queue->fatal_event_work, fatal_event_worker);
 	list_add(&queue->link, &kctx->csf.queue_list);

+	queue->extract_ofs = 0;
+
 	region->flags |= KBASE_REG_NO_USER_FREE;
 	region->user_data = queue;

@@ -621,13 +627,13 @@ int kbase_csf_queue_register_ex(struct kbase_context *kctx,
 		return -EINVAL;

 	/* Validate the cs_trace configuration parameters */
-        if (reg->ex_buffer_size &&
-            ((reg->ex_event_size > max_size) ||
-             (reg->ex_buffer_size & (reg->ex_buffer_size - 1)) ||
-             (reg->ex_buffer_size < min_buf_size)))
-          return -EINVAL;
+	if (reg->ex_buffer_size &&
+		((reg->ex_event_size > max_size) ||
+			(reg->ex_buffer_size & (reg->ex_buffer_size - 1)) ||
+			(reg->ex_buffer_size < min_buf_size)))
+		return -EINVAL;

-        return csf_queue_register_internal(kctx, NULL, reg);
+	return csf_queue_register_internal(kctx, NULL, reg);
 }

 static void unbind_queue(struct kbase_context *kctx,
@@ -1195,7 +1201,7 @@ static int create_protected_suspend_buffer(struct kbase_device *const kbdev,
 	}

 	s_buf->pma = kbase_csf_protected_memory_alloc(kbdev, phys,
-			nr_pages);
+			nr_pages, true);
 	if (s_buf->pma == NULL) {
 		err = -ENOMEM;
 		goto pma_alloc_failed;
@@ -1229,7 +1235,7 @@ mmu_insert_failed:
 	mutex_unlock(&kbdev->csf.reg_lock);

 add_va_region_failed:
-	kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages);
+	kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages, true);
 pma_alloc_failed:
 	kfree(phys);
 phy_alloc_failed:
@@ -1479,7 +1485,7 @@ static void term_protected_suspend_buffer(struct kbase_device *const kbdev,
 	kbase_remove_va_region(kbdev, s_buf->reg);
 	mutex_unlock(&kbdev->csf.reg_lock);

-	kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages);
+	kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages, true);
 	s_buf->pma = NULL;
 	kfree(s_buf->reg);
 	s_buf->reg = NULL;
@@ -1925,7 +1931,7 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
 * This function will handle the OoM event request from the firmware for the
 * CS. It will retrieve the address of heap context and heap's
 * statistics (like number of render passes in-flight) from the CS's kernel
- * kernel output page and pass them to the tiler heap function to allocate a
+ * output page and pass them to the tiler heap function to allocate a
 * new chunk.
 * It will also update the CS's kernel input page with the address
 * of a new chunk that was allocated.
@@ -2521,8 +2527,24 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
 		}
 	}

-	if (protm_pend)
-		queue_work(group->kctx->csf.wq, &group->protm_event_work);
+	if (protm_pend) {
+		struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+		u32 current_protm_pending_seq =
+			scheduler->tick_protm_pending_seq;
+
+		if (current_protm_pending_seq > group->scan_seq_num) {
+			scheduler->tick_protm_pending_seq = group->scan_seq_num;
+			queue_work(group->kctx->csf.wq, &group->protm_event_work);
+		}
+
+		if (test_bit(group->csg_nr, scheduler->csg_slots_idle_mask)) {
+			clear_bit(group->csg_nr,
+				  scheduler->csg_slots_idle_mask);
+			dev_dbg(kbdev->dev,
+				"Group-%d on slot %d de-idled by protm request",
+				group->handle, group->csg_nr);
+		}
+	}
 }

 /**
@@ -2593,6 +2615,10 @@ static void process_csg_interrupts(struct kbase_device *const kbdev,
 			CSG_REQ, ack, CSG_REQ_SYNC_UPDATE_MASK);

 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SYNC_UPDATE_INTERRUPT, group, req ^ ack);
+
+		/* SYNC_UPDATE events shall invalidate GPU idle event */
+		atomic_set(&kbdev->csf.scheduler.gpu_no_longer_idle, true);
+
 		kbase_csf_event_signal_cpu_only(group->kctx);
 	}

@@ -2609,15 +2635,25 @@ static void process_csg_interrupts(struct kbase_device *const kbdev,
 		dev_dbg(kbdev->dev, "Idle notification received for Group %u on slot %d\n",
 			 group->handle, csg_nr);

-		/* Check if the scheduling tick can be advanced */
-		if (kbase_csf_scheduler_all_csgs_idle(kbdev)) {
-			if (!scheduler->gpu_idle_fw_timer_enabled)
-				kbase_csf_scheduler_advance_tick_nolock(kbdev);
-		} else if (atomic_read(&scheduler->non_idle_offslot_grps)) {
+		if (atomic_read(&scheduler->non_idle_offslot_grps)) {
 			/* If there are non-idle CSGs waiting for a slot, fire
 			 * a tock for a replacement.
 			 */
 			mod_delayed_work(scheduler->wq, &scheduler->tock_work, 0);
+		} else {
+			u32 current_protm_pending_seq =
+				scheduler->tick_protm_pending_seq;
+
+			if ((current_protm_pending_seq !=
+				KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID) &&
+			    (group->scan_seq_num < current_protm_pending_seq)) {
+				/* If the protm enter was prevented due to groups
+				 * priority, then fire a tock for the scheduler
+				 * to re-examine the case.
+				 */
+				mod_delayed_work(scheduler->wq,
+						 &scheduler->tock_work, 0);
+			}
 		}
 	}

@@ -2803,20 +2839,29 @@ static inline void process_protm_exit(struct kbase_device *kbdev, u32 glb_ack)
 void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
 {
 	unsigned long flags;
-	u32 remaining = val;
+	u32 csg_interrupts = val & ~JOB_IRQ_GLOBAL_IF;

 	lockdep_assert_held(&kbdev->hwaccess_lock);

 	KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT, NULL, val);
 	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val);

+	if (csg_interrupts != 0) {
+		kbase_csf_scheduler_spin_lock(kbdev, &flags);
+		while (csg_interrupts != 0) {
+			int const csg_nr = ffs(csg_interrupts) - 1;
+
+			process_csg_interrupts(kbdev, csg_nr);
+			csg_interrupts &= ~(1 << csg_nr);
+		}
+		kbase_csf_scheduler_spin_unlock(kbdev, flags);
+	}
+
 	if (val & JOB_IRQ_GLOBAL_IF) {
 		const struct kbase_csf_global_iface *const global_iface =
 			&kbdev->csf.global_iface;
-		struct kbase_csf_scheduler *scheduler =	&kbdev->csf.scheduler;

 		kbdev->csf.interrupt_received = true;
-		remaining &= ~JOB_IRQ_GLOBAL_IF;

 		if (!kbdev->csf.firmware_reloaded)
 			kbase_csf_firmware_reload_completed(kbdev);
@@ -2837,31 +2882,12 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)

 			/* Handle IDLE Hysteresis notification event */
 			if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) {
-				int non_idle_offslot_grps;
-				bool can_suspend_on_idle;
-
 				dev_dbg(kbdev->dev, "Idle-hysteresis event flagged");
 				kbase_csf_firmware_global_input_mask(
 						global_iface, GLB_REQ, glb_ack,
 						GLB_REQ_IDLE_EVENT_MASK);

-				non_idle_offslot_grps = atomic_read(&scheduler->non_idle_offslot_grps);
-				can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev);
-				KBASE_KTRACE_ADD(kbdev, SCHEDULER_CAN_IDLE, NULL,
-					((u64)(u32)non_idle_offslot_grps) | (((u64)can_suspend_on_idle) << 32));
-
-				if (!non_idle_offslot_grps) {
-					if (can_suspend_on_idle)
-						queue_work(system_highpri_wq,
-							   &scheduler->gpu_idle_work);
-				} else {
-					/* Advance the scheduling tick to get
-					 * the non-idle suspended groups loaded
-					 * soon.
-					 */
-					kbase_csf_scheduler_advance_tick_nolock(
-						kbdev);
-				}
+				kbase_csf_scheduler_process_gpu_idle_event(kbdev);
 			}

 			process_prfcnt_interrupts(kbdev, glb_req, glb_ack);
@@ -2873,23 +2899,8 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
 			 */
 			kbase_pm_update_state(kbdev);
 		}
-
-		if (!remaining) {
-			wake_up_all(&kbdev->csf.event_wait);
-			KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val);
-			return;
-		}
 	}

-	kbase_csf_scheduler_spin_lock(kbdev, &flags);
-	while (remaining != 0) {
-		int const csg_nr = ffs(remaining) - 1;
-
-		process_csg_interrupts(kbdev, csg_nr);
-		remaining &= ~(1 << csg_nr);
-	}
-	kbase_csf_scheduler_spin_unlock(kbdev, flags);
-
 	wake_up_all(&kbdev->csf.event_wait);
 	KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val);
 }
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h
@@ -40,7 +40,12 @@
 */
 #define KBASEP_USER_DB_NR_INVALID ((s8)-1)

-#define FIRMWARE_PING_INTERVAL_MS (8000) /* 8 seconds */
+/* Indicates an invalid value for the scan out sequence number, used to
+ * signify there is no group that has protected mode execution pending.
+ */
+#define KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID (U32_MAX)
+
+#define FIRMWARE_PING_INTERVAL_MS (12000) /* 12 seconds */

 #define FIRMWARE_IDLE_HYSTERESIS_TIME_MS (10) /* Default 10 milliseconds */

@@ -312,7 +317,7 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev);

 /**
 * kbase_csf_free_dummy_user_reg_page - Free the dummy page that was used
- *                                 used to replace the User register page
+ *                                      to replace the User register page
 *
 * @kbdev: Instance of a GPU platform device that implements a CSF interface.
 */
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue_debugfs.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_cpu_queue_debugfs.c
@@ -54,7 +54,7 @@ static int kbasep_csf_cpu_queue_debugfs_show(struct seq_file *file, void *data)
 	mutex_lock(&kctx->csf.lock);
 	if (atomic_read(&kctx->csf.cpu_queue.dump_req_status) !=
 				BASE_CSF_CPU_QUEUE_DUMP_COMPLETE) {
-		seq_printf(file, "Dump request already started! (try again)\n");
+		seq_puts(file, "Dump request already started! (try again)\n");
 		mutex_unlock(&kctx->csf.lock);
 		return -EBUSY;
 	}
@@ -64,7 +64,8 @@ static int kbasep_csf_cpu_queue_debugfs_show(struct seq_file *file, void *data)
 	kbase_event_wakeup(kctx);
 	mutex_unlock(&kctx->csf.lock);

-	seq_printf(file, "CPU Queues table (version:v%u):\n", MALI_CSF_CPU_QUEUE_DEBUGFS_VERSION);
+	seq_puts(file,
+		"CPU Queues table (version:v" __stringify(MALI_CSF_CPU_QUEUE_DEBUGFS_VERSION) "):\n");

 	wait_for_completion_timeout(&kctx->csf.cpu_queue.dump_cmp,
 			msecs_to_jiffies(3000));
@@ -79,9 +80,8 @@ static int kbasep_csf_cpu_queue_debugfs_show(struct seq_file *file, void *data)
 		kfree(kctx->csf.cpu_queue.buffer);
 		kctx->csf.cpu_queue.buffer = NULL;
 		kctx->csf.cpu_queue.buffer_size = 0;
-	}
-	else
-		seq_printf(file, "Dump error! (time out)\n");
+	} else
+		seq_puts(file, "Dump error! (time out)\n");

 	atomic_set(&kctx->csf.cpu_queue.dump_req_status,
 			BASE_CSF_CPU_QUEUE_DUMP_COMPLETE);
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c
@@ -172,16 +172,18 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file,
 	cs_active = addr[CS_ACTIVE/4];

 #define KBASEP_CSF_DEBUGFS_CS_HEADER_USER_IO \
-	"Bind Idx,     Ringbuf addr, Prio,    Insert offset,   Extract offset, Active, Doorbell\n"
+	"Bind Idx,     Ringbuf addr,     Size, Prio,    Insert offset,   Extract offset, Active, Doorbell\n"

-	seq_printf(file, KBASEP_CSF_DEBUGFS_CS_HEADER_USER_IO "%8d, %16llx, %4u, %16llx, %16llx, %6u, %8d\n",
-			queue->csi_index, queue->base_addr, queue->priority,
-			cs_insert, cs_extract, cs_active, queue->doorbell_nr);
+	seq_printf(file, KBASEP_CSF_DEBUGFS_CS_HEADER_USER_IO "%8d, %16llx, %8x, %4u, %16llx, %16llx, %6u, %8d\n",
+			queue->csi_index, queue->base_addr,
+			queue->size,
+			queue->priority, cs_insert, cs_extract, cs_active, queue->doorbell_nr);

 	/* Print status information for blocked group waiting for sync object. For on-slot queues,
 	 * if cs_trace is enabled, dump the interface's cs_trace configuration.
 	 */
 	if (kbase_csf_scheduler_group_get_slot(queue->group) < 0) {
+		seq_printf(file, "SAVED_CMD_PTR: 0x%llx\n", queue->saved_cmd_ptr);
 		if (CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) {
 			wait_status = queue->status_wait;
 			wait_sync_value = queue->sync_value;
@@ -268,17 +270,13 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file,
 	seq_puts(file, "\n");
 }

-/* Waiting timeout for STATUS_UPDATE acknowledgment, in milliseconds */
-#define CSF_STATUS_UPDATE_TO_MS (100)
-
 static void update_active_group_status(struct seq_file *file,
 		struct kbase_queue_group *const group)
 {
 	struct kbase_device *const kbdev = group->kctx->kbdev;
 	struct kbase_csf_cmd_stream_group_info const *const ginfo =
 		&kbdev->csf.global_iface.groups[group->csg_nr];
-	long remaining =
-		kbase_csf_timeout_in_jiffies(CSF_STATUS_UPDATE_TO_MS);
+	long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
 	unsigned long flags;

 	/* Global doorbell ring for CSG STATUS_UPDATE request or User doorbell
@@ -327,6 +325,7 @@ static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file,
 		struct kbase_device *const kbdev = group->kctx->kbdev;
 		u32 ep_c, ep_r;
 		char exclusive;
+		char idle = 'N';
 		struct kbase_csf_cmd_stream_group_info const *const ginfo =
 			&kbdev->csf.global_iface.groups[group->csg_nr];
 		u8 slot_priority =
@@ -345,8 +344,12 @@ static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file,
 		else
 			exclusive = '0';

-		seq_puts(file, "GroupID, CSG NR, CSG Prio, Run State, Priority, C_EP(Alloc/Req), F_EP(Alloc/Req), T_EP(Alloc/Req), Exclusive\n");
-		seq_printf(file, "%7d, %6d, %8d, %9d, %8d, %11d/%3d, %11d/%3d, %11d/%3d, %9c\n",
+		if (kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) &
+				CSG_STATUS_STATE_IDLE_MASK)
+			idle = 'Y';
+
+		seq_puts(file, "GroupID, CSG NR, CSG Prio, Run State, Priority, C_EP(Alloc/Req), F_EP(Alloc/Req), T_EP(Alloc/Req), Exclusive, Idle\n");
+		seq_printf(file, "%7d, %6d, %8d, %9d, %8d, %11d/%3d, %11d/%3d, %11d/%3d, %9c, %4c\n",
 			group->handle,
 			group->csg_nr,
 			slot_priority,
@@ -358,7 +361,8 @@ static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file,
 			CSG_STATUS_EP_REQ_FRAGMENT_EP_GET(ep_r),
 			CSG_STATUS_EP_CURRENT_TILER_EP_GET(ep_c),
 			CSG_STATUS_EP_REQ_TILER_EP_GET(ep_r),
-			exclusive);
+			exclusive,
+			idle);

 		/* Wait for the User doobell ring to take effect */
 		if (kbdev->csf.scheduler.state != SCHED_SLEEPING)
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -252,6 +252,24 @@ enum kbase_queue_group_priority {
 	KBASE_QUEUE_GROUP_PRIORITY_COUNT
 };

+/**
+ * enum kbase_timeout_selector - The choice of which timeout to get scaled
+ *                               using the lowest GPU frequency.
+ * @CSF_FIRMWARE_TIMEOUT: Response timeout from CSF firmware.
+ * @CSF_PM_TIMEOUT: Timeout for GPU Power Management to reach the desired
+ *                  Shader, L2 and MCU state.
+ * @CSF_GPU_RESET_TIMEOUT: Waiting timeout for GPU reset to complete.
+ * @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in
+ *                                the enum.
+ */
+enum kbase_timeout_selector {
+	CSF_FIRMWARE_TIMEOUT,
+	CSF_PM_TIMEOUT,
+	CSF_GPU_RESET_TIMEOUT,
+
+	/* Must be the last in the enum */
+	KBASE_TIMEOUT_SELECTOR_COUNT
+};

 /**
 * struct kbase_csf_notification - Event or error generated as part of command
@@ -333,6 +351,13 @@ struct kbase_csf_notification {
 * @cs_fatal_info:    Records additional information about the CS fatal event.
 * @cs_fatal:         Records information about the CS fatal event.
 * @pending:          Indicating whether the queue has new submitted work.
+ * @extract_ofs: The current EXTRACT offset, this is updated during certain
+ *               events such as GPU idle IRQ in order to help detect a
+ *               queue's true idle status.
+ * @saved_cmd_ptr: The command pointer value for the GPU queue, saved when the
+ *                 group to which queue is bound is suspended.
+ *                 This can be useful in certain cases to know that till which
+ *                 point the execution reached in the Linear command buffer.
 */
 struct kbase_queue {
 	struct kbase_context *kctx;
@@ -367,6 +392,10 @@ struct kbase_queue {
 	u64 cs_fatal_info;
 	u32 cs_fatal;
 	atomic_t pending;
+	u64 extract_ofs;
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+	u64 saved_cmd_ptr;
+#endif
 };

 /**
@@ -851,11 +880,14 @@ struct kbase_csf_csg_slot {
 *                          This pointer being set doesn't necessarily indicates
 *                          that GPU is in protected mode, kbdev->protected_mode
 *                          needs to be checked for that.
- * @gpu_idle_fw_timer_enabled: Whether the CSF scheduler has activiated the
- *                            firmware idle hysteresis timer for preparing a
- *                            GPU suspend on idle.
+ * @idle_wq:                Workqueue for executing GPU idle notification
+ *                          handler.
 * @gpu_idle_work:          Work item for facilitating the scheduler to bring
 *                          the GPU to a low-power mode on becoming idle.
+ * @gpu_no_longer_idle:     Effective only when the GPU idle worker has been
+ *                          queued for execution, this indicates whether the
+ *                          GPU has become non-idle since the last time the
+ *                          idle notification was received.
 * @non_idle_offslot_grps:  Count of off-slot non-idle groups. Reset during
 *                          the scheduler active phase in a tick. It then
 *                          tracks the count of non-idle groups across all the
@@ -876,6 +908,12 @@ struct kbase_csf_csg_slot {
 *                          when scheduling tick needs to be advanced from
 *                          interrupt context, without actually deactivating
 *                          the @tick_timer first and then enqueing @tick_work.
+ * @tick_protm_pending_seq: Scan out sequence number of the group that has
+ *                          protected mode execution pending for the queue(s)
+ *                          bound to it and will be considered first for the
+ *                          protected mode execution compared to other such
+ *                          groups. It is updated on every tick/tock.
+ *                          @interrupt_lock is used to serialize the access.
 */
 struct kbase_csf_scheduler {
 	struct mutex lock;
@@ -907,13 +945,15 @@ struct kbase_csf_scheduler {
 	struct kbase_queue_group *top_grp;
 	bool tock_pending_request;
 	struct kbase_queue_group *active_protm_grp;
-	bool gpu_idle_fw_timer_enabled;
+	struct workqueue_struct *idle_wq;
 	struct work_struct gpu_idle_work;
+	atomic_t gpu_no_longer_idle;
 	atomic_t non_idle_offslot_grps;
 	u32 non_idle_scanout_grps;
 	u32 pm_active_count;
 	unsigned int csg_scheduling_period_ms;
 	bool tick_timer_active;
+	u32 tick_protm_pending_seq;
 };

 /*
@@ -1050,8 +1090,7 @@ struct kbase_ipa_control_prfcnt_config {
 *
 */
 struct kbase_ipa_control_prfcnt_block {
-	struct kbase_ipa_control_prfcnt_config
-		select[KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS];
+	struct kbase_ipa_control_prfcnt_config select[KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS];
 	size_t num_available_counters;
 };

@@ -1074,8 +1113,7 @@ struct kbase_ipa_control_prfcnt_block {
 */
 struct kbase_ipa_control {
 	struct kbase_ipa_control_prfcnt_block blocks[KBASE_IPA_CORE_TYPE_NUM];
-	struct kbase_ipa_control_session
-		sessions[KBASE_IPA_CONTROL_MAX_SESSIONS];
+	struct kbase_ipa_control_session sessions[KBASE_IPA_CONTROL_MAX_SESSIONS];
 	spinlock_t lock;
 	void *rtm_listener_data;
 	size_t num_active_sessions;
@@ -1089,8 +1127,15 @@ struct kbase_ipa_control {
 * @node:  Interface objects are on the kbase_device:csf.firmware_interfaces
 *         list using this list_head to link them
 * @phys:  Array of the physical (tagged) addresses making up this interface
+ * @reuse_pages: Flag used to identify if the FW interface entry reuses
+ *               physical pages allocated for another FW interface entry.
+ * @is_small_page: Flag used to identify if small pages are used for
+ *                 the FW interface entry.
 * @name:  NULL-terminated string naming the interface
 * @num_pages: Number of entries in @phys and @pma (and length of the interface)
+ * @num_pages_aligned: Same as @num_pages except for the case when @is_small_page
+ *                     is false and @reuse_pages is false and therefore will be
+ *                     aligned to NUM_4K_PAGES_IN_2MB_PAGE.
 * @virtual: Starting GPU virtual address this interface is mapped at
 * @flags: bitmask of CSF_FIRMWARE_ENTRY_* conveying the interface attributes
 * @data_start: Offset into firmware image at which the interface data starts
@@ -1102,8 +1147,11 @@ struct kbase_ipa_control {
 struct kbase_csf_firmware_interface {
 	struct list_head node;
 	struct tagged_addr *phys;
+	bool reuse_pages;
+	bool is_small_page;
 	char *name;
 	u32 num_pages;
+	u32 num_pages_aligned;
 	u32 virtual;
 	u32 flags;
 	u32 data_start;
@@ -1177,7 +1225,7 @@ struct kbase_csf_hwcnt {
 * @reg_lock:               Lock to serialize the MCU firmware related actions
 *                          that affect all contexts such as allocation of
 *                          regions from shared interface area, assignment of
- *                          of hardware doorbell pages, assignment of CSGs,
+ *                          hardware doorbell pages, assignment of CSGs,
 *                          sending global requests.
 * @event_wait:             Wait queue to wait for receiving csf events, i.e.
 *                          the interrupt from CSF firmware, or scheduler state
@@ -1200,6 +1248,10 @@ struct kbase_csf_hwcnt {
 *                          in GPU reset has completed.
 * @firmware_reload_needed: Flag for indicating that the firmware needs to be
 *                          reloaded as part of the GPU reset action.
+ * @firmware_full_reload_needed: Flag for indicating that the firmware needs to
+ *                               be fully re-loaded. This may be set when the
+ *                               boot or re-init of MCU fails after a successful
+ *                               soft reset.
 * @firmware_hctl_core_pwr: Flag for indicating that the host diver is in
 *                          charge of the shader core's power transitions, and
 *                          the mcu_core_pwroff timeout feature is disabled
@@ -1259,6 +1311,7 @@ struct kbase_csf_device {
 	bool firmware_inited;
 	bool firmware_reloaded;
 	bool firmware_reload_needed;
+	bool firmware_full_reload_needed;
 	bool firmware_hctl_core_pwr;
 	struct work_struct firmware_reload_work;
 	bool glb_init_request_pending;
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -110,9 +110,9 @@ static inline void kbase_csf_event_signal_cpu_only(struct kbase_context *kctx)
 /**
 * kbase_csf_event_init - Initialize event object
 *
- * This function initializes the event object.
- *
 * @kctx: The kbase context whose event object will be initialized.
+ *
+ * This function initializes the event object.
 */
 void kbase_csf_event_init(struct kbase_context *const kctx);

--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -24,6 +24,7 @@
 #include "mali_kbase_csf_trace_buffer.h"
 #include "mali_kbase_csf_timeout.h"
 #include "mali_kbase_mem.h"
+#include "mali_kbase_mem_pool_group.h"
 #include "mali_kbase_reset_gpu.h"
 #include "mali_kbase_ctx_sched.h"
 #include "mali_kbase_csf_scheduler.h"
@@ -35,7 +36,7 @@
 #include "mali_kbase_csf_tl_reader.h"
 #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
 #include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
-#include <uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h>
+#include <csf/mali_kbase_csf_registers.h>

 #include <linux/list.h>
 #include <linux/slab.h>
@@ -50,7 +51,6 @@
 #include <asm/arch_timer.h>

 #define MALI_MAX_FIRMWARE_NAME_LEN ((size_t)20)
-#define ACK_TIMEOUT_MILLISECONDS 1000

 static char fw_name[MALI_MAX_FIRMWARE_NAME_LEN] = "mali_csffw.bin";
 module_param_string(fw_name, fw_name, sizeof(fw_name), 0644);
@@ -105,9 +105,9 @@ MODULE_PARM_DESC(fw_debug,

 #define CSF_MAX_FW_STOP_LOOPS            (100000)

-#define CSF_GLB_REQ_CFG_MASK                                                   \
-	(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK |         \
-	 GLB_REQ_CFG_PWROFF_TIMER_MASK)
+#define CSF_GLB_REQ_CFG_MASK                                                                       \
+	(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK |                             \
+	 GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK)

 static inline u32 input_page_read(const u32 *const input, const u32 offset)
 {
@@ -190,11 +190,11 @@ static int setup_shared_iface_static_region(struct kbase_device *kbdev)
 		return -EINVAL;

 	reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
-			interface->num_pages, KBASE_REG_ZONE_MCU_SHARED);
+			interface->num_pages_aligned, KBASE_REG_ZONE_MCU_SHARED);
 	if (reg) {
 		mutex_lock(&kbdev->csf.reg_lock);
 		ret = kbase_add_va_region_rbtree(kbdev, reg,
-				interface->virtual, interface->num_pages, 1);
+				interface->virtual, interface->num_pages_aligned, 1);
 		mutex_unlock(&kbdev->csf.reg_lock);
 		if (ret)
 			kfree(reg);
@@ -423,7 +423,7 @@ static void load_fw_image_section(struct kbase_device *kbdev, const u8 *data,
 	}
 }

-static int reload_fw_data_sections(struct kbase_device *kbdev)
+static int reload_fw_image(struct kbase_device *kbdev)
 {
 	const u32 magic = FIRMWARE_HEADER_MAGIC;
 	struct kbase_csf_firmware_interface *interface;
@@ -451,23 +451,78 @@ static int reload_fw_data_sections(struct kbase_device *kbdev)
 	}

 	list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) {
-		/* Skip reload of text & read only data sections */
-		if ((interface->flags & CSF_FIRMWARE_ENTRY_EXECUTE) ||
-		    !(interface->flags & CSF_FIRMWARE_ENTRY_WRITE))
-			continue;
+		/* Dont skip re-loading any section if full reload was requested */
+		if (!kbdev->csf.firmware_full_reload_needed) {
+			/* Skip reload of text & read only data sections */
+			if ((interface->flags & CSF_FIRMWARE_ENTRY_EXECUTE) ||
+			    !(interface->flags & CSF_FIRMWARE_ENTRY_WRITE))
+				continue;
+		}

 		load_fw_image_section(kbdev, firmware->data, interface->phys,
 			interface->num_pages, interface->flags,
 			interface->data_start, interface->data_end);
 	}

-	kbase_csf_firmware_reload_trace_buffers_data(kbdev);
+	kbdev->csf.firmware_full_reload_needed = false;

+	kbase_csf_firmware_reload_trace_buffers_data(kbdev);
 out:
 	release_firmware(firmware);
 	return ret;
 }

+/**
+ * entry_find_large_page_to_reuse() - Find if the large page of previously parsed
+ *                                    FW interface entry can be reused to store
+ *                                    the contents of new FW interface entry.
+ *
+ * @kbdev: Kbase device structure
+ * @virtual_start: Start of the virtual address range required for an entry allocation
+ * @virtual_end: End of the virtual address range required for an entry allocation
+ * @phys: Pointer to the array of physical (tagged) addresses making up the new
+ *        FW interface entry. It is an output parameter which would be made to
+ *        point to an already existing array allocated for the previously parsed
+ *        FW interface entry using large page(s). If no appropriate entry is
+ *        found it is set to NULL.
+ * @pma:  Pointer to a protected memory allocation. It is an output parameter
+ *        which would be made to the protected memory allocation of a previously
+ *        parsed FW interface entry using large page(s) from protected memory.
+ *        If no appropriate entry is found it is set to NULL.
+ * @num_pages: Number of pages requested.
+ * @num_pages_aligned: This is an output parameter used to carry the number of 4KB pages
+ *                     within the 2MB pages aligned allocation.
+ * @is_small_page: This is an output flag used to select between the small and large page
+ *                 to be used for the FW entry allocation.
+ *
+ * Go through all the already initialized interfaces and find if a previously
+ * allocated large page can be used to store contents of new FW interface entry.
+ *
+ * Return: true if a large page can be reused, false otherwise.
+ */
+static inline bool entry_find_large_page_to_reuse(
+	struct kbase_device *kbdev, const u32 virtual_start, const u32 virtual_end,
+	struct tagged_addr **phys, struct protected_memory_allocation ***pma,
+	u32 num_pages, u32 *num_pages_aligned, bool *is_small_page)
+{
+	struct kbase_csf_firmware_interface *interface = NULL;
+	struct kbase_csf_firmware_interface *target_interface = NULL;
+	u32 virtual_diff_min = U32_MAX;
+	bool reuse_large_page = false;
+
+	CSTD_UNUSED(interface);
+	CSTD_UNUSED(target_interface);
+	CSTD_UNUSED(virtual_diff_min);
+
+	*num_pages_aligned = num_pages;
+	*is_small_page = true;
+	*phys = NULL;
+	*pma = NULL;
+
+
+	return reuse_large_page;
+}
+
 /**
 * parse_memory_setup_entry() - Process an "interface memory setup" section
 *
@@ -493,6 +548,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
 	const u32 data_start = entry[3];
 	const u32 data_end = entry[4];
 	u32 num_pages;
+	u32 num_pages_aligned;
 	char *name;
 	struct tagged_addr *phys = NULL;
 	struct kbase_csf_firmware_interface *interface = NULL;
@@ -500,6 +556,8 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
 	unsigned long mem_flags = 0;
 	u32 cache_mode = 0;
 	struct protected_memory_allocation **pma = NULL;
+	bool reuse_pages = false;
+	bool is_small_page = true;

 	if (data_end < data_start) {
 		dev_err(kbdev->dev, "Firmware corrupt, data_end < data_start (0x%x<0x%x)\n",
@@ -542,23 +600,37 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
 	num_pages = (virtual_end - virtual_start)
 		>> PAGE_SHIFT;

-	phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL);
+	reuse_pages = entry_find_large_page_to_reuse(
+		kbdev, virtual_start, virtual_end, &phys, &pma,
+		num_pages, &num_pages_aligned, &is_small_page);
+	if (!reuse_pages)
+		phys = kmalloc_array(num_pages_aligned, sizeof(*phys), GFP_KERNEL);
+
 	if (!phys)
 		return -ENOMEM;

 	if (protected_mode) {
-		pma = kbase_csf_protected_memory_alloc(kbdev, phys, num_pages);
-
-		if (pma == NULL) {
-			ret = -ENOMEM;
-			goto out;
+		if (!reuse_pages) {
+			pma = kbase_csf_protected_memory_alloc(
+				kbdev, phys, num_pages_aligned, is_small_page);
 		}
+
+		if (!pma)
+			ret = -ENOMEM;
 	} else {
-		ret = kbase_mem_pool_alloc_pages(
-			&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
-			num_pages, phys, false);
-		if (ret < 0)
-			goto out;
+		if (!reuse_pages) {
+			ret = kbase_mem_pool_alloc_pages(
+				kbase_mem_pool_group_select(
+					kbdev, KBASE_MEM_GROUP_CSF_FW, is_small_page),
+				num_pages_aligned, phys, false);
+		}
+	}
+
+	if (ret < 0) {
+		dev_err(kbdev->dev,
+			"Failed to allocate %u physical pages for the firmware interface entry at VA 0x%x\n",
+			num_pages_aligned, virtual_start);
+		goto out;
 	}

 	allocated_pages = true;
@@ -584,7 +656,10 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,

 	interface->name = name;
 	interface->phys = phys;
+	interface->reuse_pages = reuse_pages;
+	interface->is_small_page = is_small_page;
 	interface->num_pages = num_pages;
+	interface->num_pages_aligned = num_pages_aligned;
 	interface->virtual = virtual_start;
 	interface->kernel_map = NULL;
 	interface->flags = flags;
@@ -645,15 +720,17 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,

 	list_add(&interface->node, &kbdev->csf.firmware_interfaces);

-	ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu,
-			virtual_start >> PAGE_SHIFT, phys, num_pages, mem_flags,
-			KBASE_MEM_GROUP_CSF_FW);
+	if (!reuse_pages) {
+		ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu,
+				virtual_start >> PAGE_SHIFT, phys, num_pages_aligned, mem_flags,
+				KBASE_MEM_GROUP_CSF_FW);

-	if (ret != 0) {
-		dev_err(kbdev->dev, "Failed to insert firmware pages\n");
-		/* The interface has been added to the list, so cleanup will
-		 * be handled by firmware unloading
-		 */
+		if (ret != 0) {
+			dev_err(kbdev->dev, "Failed to insert firmware pages\n");
+			/* The interface has been added to the list, so cleanup will
+			 * be handled by firmware unloading
+			 */
+		}
 	}

 	dev_dbg(kbdev->dev, "Processed section '%s'", name);
@@ -662,16 +739,22 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,

 out:
 	if (allocated_pages) {
-		if (protected_mode) {
-			kbase_csf_protected_memory_free(kbdev, pma, num_pages);
-		} else {
-			kbase_mem_pool_free_pages(
-				&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
-				num_pages, phys, false, false);
+		if (!reuse_pages) {
+			if (protected_mode) {
+				kbase_csf_protected_memory_free(
+					kbdev, pma, num_pages_aligned, is_small_page);
+			} else {
+				kbase_mem_pool_free_pages(
+					kbase_mem_pool_group_select(
+						kbdev, KBASE_MEM_GROUP_CSF_FW, is_small_page),
+					num_pages_aligned, phys, false, false);
+			}
 		}
 	}

-	kfree(phys);
+	if (!reuse_pages)
+		kfree(phys);
+
 	kfree(interface);
 	return ret;
 }
@@ -994,11 +1077,10 @@ static int parse_capabilities(struct kbase_device *kbdev)
 	iface->group_stride = shared_info[GLB_GROUP_STRIDE/4];
 	iface->prfcnt_size = shared_info[GLB_PRFCNT_SIZE/4];

-	if (iface->version >= kbase_csf_interface_version(1, 1, 0)) {
+	if (iface->version >= kbase_csf_interface_version(1, 1, 0))
 		iface->instr_features = shared_info[GLB_INSTR_FEATURES / 4];
-	} else {
+	else
 		iface->instr_features = 0;
-	}

 	if ((GROUP_CONTROL_0 +
 		(unsigned long)iface->group_num * iface->group_stride) >
@@ -1378,16 +1460,28 @@ static void set_timeout_global(
 	set_global_request(global_iface, GLB_REQ_CFG_PROGRESS_TIMER_MASK);
 }

+static void enable_gpu_idle_timer(struct kbase_device *const kbdev)
+{
+	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+
+	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
+	kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER,
+					kbdev->csf.gpu_idle_dur_count);
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE,
+					     GLB_REQ_IDLE_ENABLE_MASK);
+	dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x",
+		kbdev->csf.gpu_idle_dur_count);
+}
+
 static void global_init(struct kbase_device *const kbdev, u64 core_mask)
 {
-	u32 const ack_irq_mask = GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK |
-				 GLB_ACK_IRQ_MASK_PING_MASK |
-				 GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK |
-				 GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
-				 GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK |
-				 GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
-				 GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK |
-				 GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK;
+	u32 const ack_irq_mask =
+		GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK | GLB_ACK_IRQ_MASK_PING_MASK |
+		GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
+		GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
+		GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK |
+		GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK;

 	const struct kbase_csf_global_iface *const global_iface =
 		&kbdev->csf.global_iface;
@@ -1401,6 +1495,12 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask)

 	set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev));

+	/* The GPU idle timer is always enabled for simplicity. Checks will be
+	 * done before scheduling the GPU idle worker to see if it is
+	 * appropriate for the current power policy.
+	 */
+	enable_gpu_idle_timer(kbdev);
+
 	/* Unmask the interrupts */
 	kbase_csf_firmware_global_input(global_iface,
 		GLB_ACK_IRQ_MASK, ack_irq_mask);
@@ -1507,7 +1607,7 @@ static void kbase_csf_firmware_reload_worker(struct work_struct *work)
 	KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_RELOADING(kbdev, kbase_backend_get_cycle_cnt(kbdev));

 	/* Reload just the data sections from firmware binary image */
-	err = reload_fw_data_sections(kbdev);
+	err = reload_fw_image(kbdev);
 	if (err)
 		return;

@@ -1598,7 +1698,14 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_m

 u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev)
 {
-	return kbdev->csf.gpu_idle_hysteresis_ms;
+	unsigned long flags;
+	u32 dur;
+
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	dur = kbdev->csf.gpu_idle_hysteresis_ms;
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+	return dur;
 }

 u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur)
@@ -1606,11 +1713,53 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
 	unsigned long flags;
 	const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur);

+	/* The 'fw_load_lock' is taken to synchronize against the deferred
+	 * loading of FW, where the idle timer will be enabled.
+	 */
+	mutex_lock(&kbdev->fw_load_lock);
+	if (unlikely(!kbdev->csf.firmware_inited)) {
+		kbase_csf_scheduler_spin_lock(kbdev, &flags);
+		kbdev->csf.gpu_idle_hysteresis_ms = dur;
+		kbdev->csf.gpu_idle_dur_count = hysteresis_val;
+		kbase_csf_scheduler_spin_unlock(kbdev, flags);
+		mutex_unlock(&kbdev->fw_load_lock);
+		goto end;
+	}
+	mutex_unlock(&kbdev->fw_load_lock);
+
+	kbase_csf_scheduler_pm_active(kbdev);
+	if (kbase_csf_scheduler_wait_mcu_active(kbdev)) {
+		dev_err(kbdev->dev,
+			"Unable to activate the MCU, the idle hysteresis value shall remain unchanged");
+		kbase_csf_scheduler_pm_idle(kbdev);
+		return kbdev->csf.gpu_idle_dur_count;
+	}
+
+	/* The 'reg_lock' is also taken and is held till the update is not
+	 * complete, to ensure the update of idle timer value by multiple Users
+	 * gets serialized.
+	 */
+	mutex_lock(&kbdev->csf.reg_lock);
+	/* The firmware only reads the new idle timer value when the timer is
+	 * disabled.
+	 */
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	kbase_csf_firmware_disable_gpu_idle_timer(kbdev);
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+	/* Ensure that the request has taken effect */
+	wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK);
+
 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
 	kbdev->csf.gpu_idle_hysteresis_ms = dur;
 	kbdev->csf.gpu_idle_dur_count = hysteresis_val;
+	kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+	wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK);
+	mutex_unlock(&kbdev->csf.reg_lock);

+	kbase_csf_scheduler_pm_idle(kbdev);
+
+end:
 	dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x",
 		hysteresis_val);

@@ -1711,7 +1860,7 @@ static int kbase_device_csf_iterator_trace_init(struct kbase_device *kbdev)
 				long ack_timeout;

 				ack_timeout = kbase_csf_timeout_in_jiffies(
-						ACK_TIMEOUT_MILLISECONDS);
+					kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT));

 				/* write enable request to global input */
 				kbase_csf_firmware_global_input_mask(
@@ -1748,6 +1897,20 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
 	kbdev->csf.fw_timeout_ms =
 		kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT);

+	kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
+#ifdef KBASE_PM_RUNTIME
+	if (kbase_pm_gpu_sleep_allowed(kbdev))
+		kbdev->csf.gpu_idle_hysteresis_ms /=
+			FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
+#endif
+	WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
+	kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
+		kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
+
+	kbdev->csf.mcu_core_pwroff_dur_us = DEFAULT_GLB_PWROFF_TIMEOUT_US;
+	kbdev->csf.mcu_core_pwroff_dur_count = convert_dur_to_core_pwroff_count(
+		kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_US);
+
 	INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces);
 	INIT_LIST_HEAD(&kbdev->csf.firmware_config);
 	INIT_LIST_HEAD(&kbdev->csf.firmware_timeline_metadata);
@@ -1786,20 +1949,6 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
 		return ret;
 	}

-	kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
-#ifdef KBASE_PM_RUNTIME
-	if (kbase_pm_gpu_sleep_allowed(kbdev))
-		kbdev->csf.gpu_idle_hysteresis_ms /=
-			FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
-#endif
-	WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
-	kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
-		kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
-
-	kbdev->csf.mcu_core_pwroff_dur_us = DEFAULT_GLB_PWROFF_TIMEOUT_US;
-	kbdev->csf.mcu_core_pwroff_dur_count = convert_dur_to_core_pwroff_count(
-		kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_US);
-
 	ret = kbase_mcu_shared_interface_region_tracker_init(kbdev);
 	if (ret != 0) {
 		dev_err(kbdev->dev,
@@ -1992,17 +2141,25 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
 		list_del(&interface->node);

 		vunmap(interface->kernel_map);
-		if (interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) {
-			kbase_csf_protected_memory_free(kbdev, interface->pma,
-				interface->num_pages);
-		} else {
-			kbase_mem_pool_free_pages(
-				&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
-				interface->num_pages, interface->phys,
-				true, false);
+
+		if (!interface->reuse_pages) {
+			if (interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) {
+				kbase_csf_protected_memory_free(
+					kbdev, interface->pma, interface->num_pages_aligned,
+					interface->is_small_page);
+			} else {
+				kbase_mem_pool_free_pages(
+					kbase_mem_pool_group_select(
+						kbdev, KBASE_MEM_GROUP_CSF_FW,
+						interface->is_small_page),
+					interface->num_pages_aligned,
+					interface->phys,
+					true, false);
+			}
+
+			kfree(interface->phys);
 		}

-		kfree(interface->phys);
 		kfree(interface);
 	}

@@ -2034,29 +2191,19 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
 void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev)
 {
 	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
-	const u32 glb_req =
-		kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
+	const u32 glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);

 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
-
 	/* The scheduler is assumed to only call the enable when its internal
 	 * state indicates that the idle timer has previously been disabled. So
 	 * on entry the expected field values are:
 	 *   1. GLOBAL_INPUT_BLOCK.GLB_REQ.IDLE_ENABLE: 0
 	 *   2. GLOBAL_OUTPUT_BLOCK.GLB_ACK.IDLE_ENABLE: 0, or, on 1 -> 0
 	 */
-
 	if (glb_req & GLB_REQ_IDLE_ENABLE_MASK)
 		dev_err(kbdev->dev, "Incoherent scheduler state on REQ_IDLE_ENABLE!");

-	kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER,
-					kbdev->csf.gpu_idle_dur_count);
-
-	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ,
-				GLB_REQ_REQ_IDLE_ENABLE, GLB_REQ_IDLE_ENABLE_MASK);
-
-	dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x",
-		kbdev->csf.gpu_idle_dur_count);
+	enable_gpu_idle_timer(kbdev);
 	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
 }

@@ -2120,6 +2267,8 @@ void kbase_csf_enter_protected_mode(struct kbase_device *kbdev)
 {
 	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;

+	KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev, kbdev);
+
 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
 	set_global_request(global_iface, GLB_REQ_PROTM_ENTER_MASK);
 	dev_dbg(kbdev->dev, "Sending request to enter protected mode");
@@ -2134,6 +2283,8 @@ void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev)
 		if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
 			kbase_reset_gpu(kbdev);
 	}
+
+	KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev, kbdev);
 }

 void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev)
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -23,7 +23,7 @@
 #define _KBASE_CSF_FIRMWARE_H_

 #include "device/mali_kbase_device.h"
-#include <uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h>
+#include <csf/mali_kbase_csf_registers.h>

 /*
 * PAGE_KERNEL_RO was only defined on 32bit ARM in 4.19 in:
@@ -75,7 +75,7 @@
 #define MAX_SUPPORTED_CSGS 31
 /* GROUP_STREAM_NUM: At least 8 CSs per CSG, but no more than 32 */
 #define MIN_SUPPORTED_STREAMS_PER_GROUP 8
-/* Maximum CSs per csg. */
+/* MAX_SUPPORTED_STREAMS_PER_GROUP: Maximum CSs per csg. */
 #define MAX_SUPPORTED_STREAMS_PER_GROUP 32

 struct kbase_device;
@@ -777,7 +777,7 @@ u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32
 /**
 * kbase_csf_interface_version - Helper function to build the full firmware
 *                               interface version in a format compatible with
- *                               with GLB_VERSION register
+ *                               GLB_VERSION register
 *
 * @major:     major version of csf interface
 * @minor:     minor version of csf interface
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c
@@ -67,9 +67,9 @@ struct firmware_config {
 			.mode = VERIFY_OCTAL_PERMISSIONS(_mode),	\
 	}

-static FW_CFG_ATTR(min, S_IRUGO);
-static FW_CFG_ATTR(max, S_IRUGO);
-static FW_CFG_ATTR(cur, S_IRUGO | S_IWUSR);
+static FW_CFG_ATTR(min, 0444);
+static FW_CFG_ATTR(max, 0444);
+static FW_CFG_ATTR(cur, 0644);

 static void fw_cfg_kobj_release(struct kobject *kobj)
 {
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c
@@ -101,7 +101,7 @@ struct dummy_firmware_interface {

 #define CSF_GLB_REQ_CFG_MASK                                                   \
 	(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK |         \
-	 GLB_REQ_CFG_PWROFF_TIMER_MASK)
+	 GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK)

 static inline u32 input_page_read(const u32 *const input, const u32 offset)
 {
@@ -193,9 +193,8 @@ static int invent_cmd_stream_group_info(struct kbase_device *kbdev,
 	ginfo->stream_stride = 0;

 	ginfo->streams = kcalloc(ginfo->stream_num, sizeof(*ginfo->streams), GFP_KERNEL);
-	if (ginfo->streams == NULL) {
+	if (ginfo->streams == NULL)
 		return -ENOMEM;
-	}

 	for (sid = 0; sid < ginfo->stream_num; ++sid) {
 		struct kbase_csf_cmd_stream_info *stream = &ginfo->streams[sid];
@@ -241,9 +240,8 @@ static int invent_capabilities(struct kbase_device *kbdev)
 	iface->group_stride = 0;

 	iface->groups = kcalloc(iface->group_num, sizeof(*iface->groups), GFP_KERNEL);
-	if (iface->groups == NULL) {
+	if (iface->groups == NULL)
 		return -ENOMEM;
-	}

 	for (gid = 0; gid < iface->group_num; ++gid) {
 		int err;
@@ -619,6 +617,20 @@ static void set_timeout_global(
 	set_global_request(global_iface, GLB_REQ_CFG_PROGRESS_TIMER_MASK);
 }

+static void enable_gpu_idle_timer(struct kbase_device *const kbdev)
+{
+	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+
+	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
+	kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER,
+					kbdev->csf.gpu_idle_dur_count);
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE,
+					     GLB_REQ_IDLE_ENABLE_MASK);
+	dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x",
+		kbdev->csf.gpu_idle_dur_count);
+}
+
 static void global_init(struct kbase_device *const kbdev, u64 core_mask)
 {
 	u32 const ack_irq_mask = GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK |
@@ -628,7 +640,8 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask)
 				 GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
 				 GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK |
 				 GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK |
-				 GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK;
+				 GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK |
+				 GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK;

 	const struct kbase_csf_global_iface *const global_iface =
 		&kbdev->csf.global_iface;
@@ -642,6 +655,12 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask)

 	set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev));

+	/* The GPU idle timer is always enabled for simplicity. Checks will be
+	 * done before scheduling the GPU idle worker to see if it is
+	 * appropriate for the current power policy.
+	 */
+	enable_gpu_idle_timer(kbdev);
+
 	/* Unmask the interrupts */
 	kbase_csf_firmware_global_input(global_iface,
 		GLB_ACK_IRQ_MASK, ack_irq_mask);
@@ -809,7 +828,14 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_m

 u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev)
 {
-	return kbdev->csf.gpu_idle_hysteresis_ms;
+	unsigned long flags;
+	u32 dur;
+
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	dur = kbdev->csf.gpu_idle_hysteresis_ms;
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+	return dur;
 }

 u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur)
@@ -817,11 +843,53 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
 	unsigned long flags;
 	const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur);

+	/* The 'fw_load_lock' is taken to synchronize against the deferred
+	 * loading of FW, where the idle timer will be enabled.
+	 */
+	mutex_lock(&kbdev->fw_load_lock);
+	if (unlikely(!kbdev->csf.firmware_inited)) {
+		kbase_csf_scheduler_spin_lock(kbdev, &flags);
+		kbdev->csf.gpu_idle_hysteresis_ms = dur;
+		kbdev->csf.gpu_idle_dur_count = hysteresis_val;
+		kbase_csf_scheduler_spin_unlock(kbdev, flags);
+		mutex_unlock(&kbdev->fw_load_lock);
+		goto end;
+	}
+	mutex_unlock(&kbdev->fw_load_lock);
+
+	kbase_csf_scheduler_pm_active(kbdev);
+	if (kbase_csf_scheduler_wait_mcu_active(kbdev)) {
+		dev_err(kbdev->dev,
+			"Unable to activate the MCU, the idle hysteresis value shall remain unchanged");
+		kbase_csf_scheduler_pm_idle(kbdev);
+		return kbdev->csf.gpu_idle_dur_count;
+	}
+
+	/* The 'reg_lock' is also taken and is held till the update is not
+	 * complete, to ensure the update of idle timer value by multiple Users
+	 * gets serialized.
+	 */
+	mutex_lock(&kbdev->csf.reg_lock);
+	/* The firmware only reads the new idle timer value when the timer is
+	 * disabled.
+	 */
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	kbase_csf_firmware_disable_gpu_idle_timer(kbdev);
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+	/* Ensure that the request has taken effect */
+	wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK);
+
 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
 	kbdev->csf.gpu_idle_hysteresis_ms = dur;
 	kbdev->csf.gpu_idle_dur_count = hysteresis_val;
+	kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+	wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK);
+	mutex_unlock(&kbdev->csf.reg_lock);

+	kbase_csf_scheduler_pm_idle(kbdev);
+
+end:
 	dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x",
 		hysteresis_val);

@@ -897,6 +965,16 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
 	kbdev->csf.fw_timeout_ms =
 		kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT);

+	kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
+#ifdef KBASE_PM_RUNTIME
+	if (kbase_pm_gpu_sleep_allowed(kbdev))
+		kbdev->csf.gpu_idle_hysteresis_ms /=
+			FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
+#endif
+	WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
+	kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
+		kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
+
 	INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces);
 	INIT_LIST_HEAD(&kbdev->csf.firmware_config);
 	INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list);
@@ -928,16 +1006,6 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
 		return ret;
 	}

-	kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
-#ifdef KBASE_PM_RUNTIME
-	if (kbase_pm_gpu_sleep_allowed(kbdev))
-		kbdev->csf.gpu_idle_hysteresis_ms /=
-			FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
-#endif
-	WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
-	kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
-		kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
-
 	ret = kbase_mcu_shared_interface_region_tracker_init(kbdev);
 	if (ret != 0) {
 		dev_err(kbdev->dev,
@@ -1035,29 +1103,19 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
 void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev)
 {
 	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
-	u32 glb_req;
+	const u32 glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);

 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
-
 	/* The scheduler is assumed to only call the enable when its internal
 	 * state indicates that the idle timer has previously been disabled. So
 	 * on entry the expected field values are:
 	 *   1. GLOBAL_INPUT_BLOCK.GLB_REQ.IDLE_ENABLE: 0
 	 *   2. GLOBAL_OUTPUT_BLOCK.GLB_ACK.IDLE_ENABLE: 0, or, on 1 -> 0
 	 */
-
-	glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
 	if (glb_req & GLB_REQ_IDLE_ENABLE_MASK)
 		dev_err(kbdev->dev, "Incoherent scheduler state on REQ_IDLE_ENABLE!");

-	kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER,
-					kbdev->csf.gpu_idle_dur_count);
-
-	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ,
-				GLB_REQ_REQ_IDLE_ENABLE, GLB_REQ_IDLE_ENABLE_MASK);
-
-	dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x",
-		kbdev->csf.gpu_idle_dur_count);
+	enable_gpu_idle_timer(kbdev);
 	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
 }

--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c
@@ -174,17 +174,15 @@ u64 kbase_csf_heap_context_allocator_alloc(
 	 * allocate it.
 	 */
 	if (!ctx_alloc->region) {
-		ctx_alloc->region =
-			kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
-					&ctx_alloc->gpu_va, mmu_sync_info);
+		ctx_alloc->region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
+						    &ctx_alloc->gpu_va, mmu_sync_info);
 	}

 	/* If the pool still isn't allocated then an error occurred. */
-	if (unlikely(!ctx_alloc->region)) {
+	if (unlikely(!ctx_alloc->region))
 		dev_dbg(kctx->kbdev->dev, "Failed to allocate a pool of tiler heap contexts");
-	} else {
+	else
 		heap_gpu_va = sub_alloc(ctx_alloc);
-	}

 	mutex_unlock(&ctx_alloc->lock);

--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -45,6 +45,10 @@ static int kbase_kcpu_map_import_prepare(
 {
 	struct kbase_context *const kctx = kcpu_queue->kctx;
 	struct kbase_va_region *reg;
+	struct kbase_mem_phy_alloc *alloc;
+	struct page **pages;
+	struct tagged_addr *pa;
+	long i;
 	int ret = 0;

 	lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
@@ -76,6 +80,13 @@ static int kbase_kcpu_map_import_prepare(
 		ret = kbase_jd_user_buf_pin_pages(kctx, reg);
 		if (ret)
 			goto out;
+
+		alloc = reg->gpu_alloc;
+		pa = kbase_get_gpu_phy_pages(reg);
+		pages = alloc->imported.user_buf.pages;
+
+		for (i = 0; i < alloc->nents; i++)
+			pa[i] = as_tagged(page_to_phys(pages[i]));
 	}

 	current_command->type = BASE_KCPU_COMMAND_TYPE_MAP_IMPORT;
@@ -172,8 +183,8 @@ static void kbase_jit_add_to_pending_alloc_list(
 	list_for_each_entry(blocked_queue,
 			&kctx->csf.kcpu_queues.jit_blocked_queues,
 			jit_blocked) {
-		struct kbase_kcpu_command const*const jit_alloc_cmd =
-				&blocked_queue->commands[blocked_queue->start_offset];
+		struct kbase_kcpu_command const *const jit_alloc_cmd =
+			&blocked_queue->commands[blocked_queue->start_offset];

 		WARN_ON(jit_alloc_cmd->type != BASE_KCPU_COMMAND_TYPE_JIT_ALLOC);
 		if (cmd->enqueue_ts < jit_alloc_cmd->enqueue_ts) {
@@ -244,7 +255,7 @@ static int kbase_kcpu_jit_allocate_process(
 					break;

 				if (jit_cmd->type == BASE_KCPU_COMMAND_TYPE_JIT_FREE) {
-					u8 const*const free_ids = jit_cmd->info.jit_free.ids;
+					u8 const *const free_ids = jit_cmd->info.jit_free.ids;

 					if (free_ids && *free_ids && kctx->jit_alloc[*free_ids]) {
 						/*
@@ -456,8 +467,8 @@ static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue,

 	lockdep_assert_held(&kctx->csf.kcpu_queues.lock);

-	KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END(
-		queue->kctx->kbdev, queue);
+	KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END(queue->kctx->kbdev,
+									   queue);

 	for (i = 0; i < count; i++) {
 		u64 pages_used = 0;
@@ -636,7 +647,7 @@ static int kbase_csf_queue_group_suspend_prepare(
 		struct tagged_addr *page_array;
 		u64 start, end, i;

-		if (!(reg->flags & BASE_MEM_SAME_VA) ||
+		if (((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_SAME_VA) ||
 				reg->nr_pages < nr_pages ||
 				kbase_reg_current_backed_size(reg) !=
 					reg->nr_pages) {
@@ -734,8 +745,8 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev,
 						cqs_wait->objs[i].addr, &mapping);

 			if (!queue->command_started) {
-				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START(
-					kbdev, queue);
+				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START(kbdev,
+											 queue);
 				queue->command_started = true;
 				KBASE_KTRACE_ADD_CSF_KCPU(kbdev, CQS_WAIT_START,
 						   queue, cqs_wait->nr_objs, 0);
@@ -764,8 +775,7 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev,
 						error);

 				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END(
-					kbdev, queue,
-					evt[BASEP_EVENT_ERR_INDEX]);
+					kbdev, queue, evt[BASEP_EVENT_ERR_INDEX]);
 				queue->command_started = false;
 			}

@@ -855,8 +865,7 @@ static void kbase_kcpu_cqs_set_process(struct kbase_device *kbdev,
 		evt = (u32 *)kbase_phy_alloc_mapping_get(
 			queue->kctx, cqs_set->objs[i].addr, &mapping);

-		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET(kbdev, queue,
-								  evt ? 0 : 1);
+		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET(kbdev, queue, evt ? 0 : 1);

 		if (!evt) {
 			dev_warn(kbdev->dev,
@@ -1490,8 +1499,7 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_INFO(
 {
 	u8 i;

-	KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(
-		kbdev, queue);
+	KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(kbdev, queue);
 	for (i = 0; i < jit_alloc->count; i++) {
 		const u8 id = jit_alloc->info[i].id;
 		const struct kbase_va_region *reg = queue->kctx->jit_alloc[id];
@@ -1521,16 +1529,14 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(
 	struct kbase_device *kbdev,
 	const struct kbase_kcpu_command_queue *queue)
 {
-	KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(
-		kbdev, queue);
+	KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(kbdev, queue);
 }

 static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END(
 	struct kbase_device *kbdev,
 	const struct kbase_kcpu_command_queue *queue)
 {
-	KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END(
-		kbdev, queue);
+	KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END(kbdev, queue);
 }

 static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
@@ -1550,8 +1556,8 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 		switch (cmd->type) {
 		case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT:
 			if (!queue->command_started) {
-				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START(
-					kbdev, queue);
+				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START(kbdev,
+											   queue);
 				queue->command_started = true;
 			}

@@ -1584,8 +1590,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 			}
 			break;
 		case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL:
-			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START(
-				kbdev, queue);
+			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START(kbdev, queue);

 			status = 0;

@@ -1603,8 +1608,8 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 			queue->has_error = true;
 #endif

-			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END(
-				kbdev, queue, status);
+			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END(kbdev, queue,
+										   status);
 			break;
 		case BASE_KCPU_COMMAND_TYPE_CQS_WAIT:
 			status = kbase_kcpu_cqs_wait_process(kbdev, queue,
@@ -1654,15 +1659,14 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 			/* Clear the queue's error state */
 			queue->has_error = false;

-			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER(
-				kbdev, queue);
+			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER(kbdev, queue);
 			break;
 		case BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: {
 			struct kbase_ctx_ext_res_meta *meta = NULL;

 			if (!drain_queue) {
-				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START(
-					kbdev, queue);
+				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START(kbdev,
+											   queue);

 				kbase_gpu_vm_lock(queue->kctx);
 				meta = kbase_sticky_resource_acquire(
@@ -1684,8 +1688,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 		case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: {
 			bool ret;

-			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START(
-				kbdev, queue);
+			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START(kbdev, queue);

 			kbase_gpu_vm_lock(queue->kctx);
 			ret = kbase_sticky_resource_release(
@@ -1698,15 +1701,15 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 						"failed to release the reference. resource not found");
 			}

-			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END(
-				kbdev, queue, ret ? 0 : 1);
+			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END(kbdev, queue,
+										   ret ? 0 : 1);
 			break;
 		}
 		case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: {
 			bool ret;

-			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START(
-					kbdev, queue);
+			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START(kbdev,
+											   queue);

 			kbase_gpu_vm_lock(queue->kctx);
 			ret = kbase_sticky_resource_release_force(
@@ -1729,8 +1732,8 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 				/* We still need to call this function to clean the JIT alloc info up */
 				kbase_kcpu_jit_allocate_finish(queue, cmd);
 			} else {
-				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START(
-					kbdev, queue);
+				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START(kbdev,
+											  queue);

 				status = kbase_kcpu_jit_allocate_process(queue,
 									 cmd);
@@ -1754,8 +1757,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 			break;
 		}
 		case BASE_KCPU_COMMAND_TYPE_JIT_FREE:
-			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START(
-				kbdev, queue);
+			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START(kbdev, queue);

 			status = kbase_kcpu_jit_free_process(queue, cmd);
 			if (status)
@@ -1838,12 +1840,12 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(

 	switch (cmd->type) {
 	case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT:
-		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT(
-			kbdev, queue, cmd->info.fence.fence);
+		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT(kbdev, queue,
+								     cmd->info.fence.fence);
 		break;
 	case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL:
-		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL(
-			kbdev, queue, cmd->info.fence.fence);
+		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL(kbdev, queue,
+								       cmd->info.fence.fence);
 		break;
 	case BASE_KCPU_COMMAND_TYPE_CQS_WAIT:
 	{
@@ -1865,8 +1867,8 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
 		unsigned int i;

 		for (i = 0; i < cmd->info.cqs_set.nr_objs; i++) {
-			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET(
-				kbdev, queue, sets[i].addr);
+			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET(kbdev, queue,
+									  sets[i].addr);
 		}
 		break;
 	}
@@ -1881,16 +1883,15 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
 		break;
 	}
 	case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER:
-		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER(kbdev,
-									queue);
+		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER(kbdev, queue);
 		break;
 	case BASE_KCPU_COMMAND_TYPE_MAP_IMPORT:
-		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT(
-			kbdev, queue, cmd->info.import.gpu_va);
+		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT(kbdev, queue,
+								     cmd->info.import.gpu_va);
 		break;
 	case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT:
-		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT(
-			kbdev, queue, cmd->info.import.gpu_va);
+		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT(kbdev, queue,
+								       cmd->info.import.gpu_va);
 		break;
 	case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE:
 		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE(
@@ -1900,35 +1901,29 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
 	{
 		u8 i;

-		KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC(
-			kbdev, queue);
+		KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC(kbdev, queue);
 		for (i = 0; i < cmd->info.jit_alloc.count; i++) {
 			const struct base_jit_alloc_info *info =
 				&cmd->info.jit_alloc.info[i];

 			KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC(
-				kbdev, queue, info->gpu_alloc_addr,
-				info->va_pages, info->commit_pages,
-				info->extension, info->id, info->bin_id,
-				info->max_allocations, info->flags,
-				info->usage_id);
+				kbdev, queue, info->gpu_alloc_addr, info->va_pages,
+				info->commit_pages, info->extension, info->id, info->bin_id,
+				info->max_allocations, info->flags, info->usage_id);
 		}
-		KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC(
-			kbdev, queue);
+		KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC(kbdev, queue);
 		break;
 	}
 	case BASE_KCPU_COMMAND_TYPE_JIT_FREE:
 	{
 		u8 i;

-		KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE(
-			kbdev, queue);
+		KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE(kbdev, queue);
 		for (i = 0; i < cmd->info.jit_free.count; i++) {
 			KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE(
 				kbdev, queue, cmd->info.jit_free.ids[i]);
 		}
-		KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE(
-			kbdev, queue);
+		KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE(kbdev, queue);
 		break;
 	}
 	case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND:
@@ -1936,6 +1931,9 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
 			kbdev, queue, cmd->info.suspend_buf_copy.sus_buf,
 			cmd->info.suspend_buf_copy.group_handle);
 		break;
+	default:
+		dev_dbg(kbdev->dev, "Unknown command type %u", cmd->type);
+		break;
 	}
 }

@@ -2210,8 +2208,8 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
 	/* Fire the tracepoint with the mutex held to enforce correct ordering
 	 * with the summary stream.
 	 */
-	KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE(
-		kctx->kbdev, queue, kctx->id, queue->num_pending_cmds);
+	KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE(kctx->kbdev, queue, queue->id, kctx->id,
+					      queue->num_pending_cmds);

 	KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_NEW, queue,
 		queue->fence_context, 0);
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -206,14 +206,16 @@ struct kbase_kcpu_command_group_suspend_info {
 *		indicates that it has been enqueued earlier.
 * @info:	Structure which holds information about the command
 *		dependent on the command type.
- * @info.fence:            Fence
- * @info.cqs_wait:         CQS wait
- * @info.cqs_set:          CQS set
- * @info.import:           import
- * @info.jit_alloc:        jit allocation
- * @info.jit_free:         jit deallocation
- * @info.suspend_buf_copy: suspend buffer copy
- * @info.sample_time:      sample time
+ * @info.fence:              Fence
+ * @info.cqs_wait:           CQS wait
+ * @info.cqs_set:            CQS set
+ * @info.cqs_wait_operation: CQS wait operation
+ * @info.cqs_set_operation:  CQS set operation
+ * @info.import:             import
+ * @info.jit_alloc:          JIT allocation
+ * @info.jit_free:           JIT deallocation
+ * @info.suspend_buf_copy:   suspend buffer copy
+ * @info.sample_time:        sample time
 */
 struct kbase_kcpu_command {
 	enum base_kcpu_command_type type;
@@ -303,8 +305,6 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
 /**
 * kbase_csf_kcpu_queue_delete - Delete KCPU command queue.
 *
- * Return: 0 if successful, -EINVAL if the queue ID is invalid.
- *
 * @kctx:	Pointer to the kbase context from which the KCPU command
 *		queue is to be deleted.
 * @del:	Pointer to the structure which specifies the KCPU command
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.c
@@ -71,29 +71,60 @@ struct protected_memory_allocation **
 		kbase_csf_protected_memory_alloc(
 		struct kbase_device *const kbdev,
 		struct tagged_addr *phys,
-		size_t num_pages)
+		size_t num_pages,
+		bool is_small_page)
 {
 	size_t i;
 	struct protected_memory_allocator_device *pma_dev =
 		kbdev->csf.pma_dev;
-	struct protected_memory_allocation **pma =
-		kmalloc_array(num_pages, sizeof(*pma), GFP_KERNEL);
+	struct protected_memory_allocation **pma = NULL;
+	unsigned int order = KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER;
+	unsigned int num_pages_order;
+
+	if (is_small_page)
+		order = KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER;
+
+	num_pages_order = (1u << order);
+
+	/* Ensure the requested num_pages is aligned with
+	 * the order type passed as argument.
+	 *
+	 * pma_alloc_page() will then handle the granularity
+	 * of the allocation based on order.
+	 */
+	num_pages = div64_u64(num_pages + num_pages_order - 1, num_pages_order);
+
+	pma = kmalloc_array(num_pages, sizeof(*pma), GFP_KERNEL);

 	if (WARN_ON(!pma_dev) || WARN_ON(!phys) || !pma)
 		return NULL;

 	for (i = 0; i < num_pages; i++) {
-		pma[i] = pma_dev->ops.pma_alloc_page(pma_dev,
-				KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER);
+		phys_addr_t phys_addr;
+
+		pma[i] = pma_dev->ops.pma_alloc_page(pma_dev, order);
 		if (!pma[i])
 			break;

-		phys[i] = as_tagged(pma_dev->ops.pma_get_phys_addr(pma_dev,
-					pma[i]));
+		phys_addr = pma_dev->ops.pma_get_phys_addr(pma_dev, pma[i]);
+
+		if (order) {
+			size_t j;
+
+			*phys++ = as_tagged_tag(phys_addr, HUGE_HEAD | HUGE_PAGE);
+
+			for (j = 1; j < num_pages_order; j++) {
+				*phys++ = as_tagged_tag(phys_addr +
+							PAGE_SIZE * j,
+							HUGE_PAGE);
+			}
+		} else {
+			phys[i] = as_tagged(phys_addr);
+		}
 	}

 	if (i != num_pages) {
-		kbase_csf_protected_memory_free(kbdev, pma, i);
+		kbase_csf_protected_memory_free(kbdev, pma, i * num_pages_order, is_small_page);
 		return NULL;
 	}

@@ -103,15 +134,28 @@ struct protected_memory_allocation **
 void kbase_csf_protected_memory_free(
 		struct kbase_device *const kbdev,
 		struct protected_memory_allocation **pma,
-		size_t num_pages)
+		size_t num_pages,
+		bool is_small_page)
 {
 	size_t i;
 	struct protected_memory_allocator_device *pma_dev =
 		kbdev->csf.pma_dev;
+	unsigned int num_pages_order = (1u << KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER);
+
+	if (is_small_page)
+		num_pages_order = (1u << KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER);

 	if (WARN_ON(!pma_dev) || WARN_ON(!pma))
 		return;

+	/* Ensure the requested num_pages is aligned with
+	 * the order type passed as argument.
+	 *
+	 * pma_alloc_page() will then handle the granularity
+	 * of the allocation based on order.
+	 */
+	num_pages = div64_u64(num_pages + num_pages_order - 1, num_pages_order);
+
 	for (i = 0; i < num_pages; i++)
 		pma_dev->ops.pma_free_page(pma_dev, pma[i]);

--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_protected_memory.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -46,6 +46,7 @@ void kbase_csf_protected_memory_term(struct kbase_device *const kbdev);
 * @phys:	Array of physical addresses to be filled in by the protected
 *		memory allocator.
 * @num_pages:	Number of pages requested to be allocated.
+ * @is_small_page: Flag used to select the order of protected memory page.
 *
 * Return: Pointer to an array of protected memory allocations on success,
 *		or NULL on failure.
@@ -54,7 +55,8 @@ struct protected_memory_allocation **
 	kbase_csf_protected_memory_alloc(
 		struct kbase_device *const kbdev,
 		struct tagged_addr *phys,
-		size_t num_pages);
+		size_t num_pages,
+		bool is_small_page);

 /**
 * kbase_csf_protected_memory_free - Free the allocated
@@ -63,9 +65,11 @@ struct protected_memory_allocation **
 * @kbdev:	Device pointer.
 * @pma:	Array of pointer to protected memory allocations.
 * @num_pages:	Number of pages to be freed.
+ * @is_small_page: Flag used to select the order of protected memory page.
 */
 void kbase_csf_protected_memory_free(
 		struct kbase_device *const kbdev,
 		struct protected_memory_allocation **pma,
-		size_t num_pages);
+		size_t num_pages,
+		bool is_small_page);
 #endif
--- a/include/uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h
+++ b/include/uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h
@@ -24,8 +24,8 @@
 * expected) to have to add to it.
 */

-#ifndef _UAPI_GPU_CSF_REGISTERS_H_
-#define _UAPI_GPU_CSF_REGISTERS_H_
+#ifndef _KBASE_CSF_REGISTERS_H_
+#define _KBASE_CSF_REGISTERS_H_

 /*
 * Begin register sets
@@ -480,7 +480,7 @@
 /* CS_INSTR_BUFFER_OFFSET_POINTER register */
 #define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT (0)
 #define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK \
-	((u64)0xFFFFFFFFFFFFFFFF) << CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT)
+	(((u64)0xFFFFFFFFFFFFFFFF) << CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT)
 #define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_GET(reg_val) \
 	(((reg_val)&CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK) >> CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT)
 #define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SET(reg_val, value) \
@@ -1448,6 +1448,9 @@
 #define GLB_ACK_IRQ_MASK_IDLE_EVENT_SHIFT (26)
 #define GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK (0x1 << GLB_ACK_IRQ_MASK_IDLE_EVENT_SHIFT)

+#define GLB_ACK_IRQ_MASK_IDLE_ENABLE_SHIFT GPU_U(10)
+#define GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK (GPU_U(0x1) << GLB_ACK_IRQ_MASK_IDLE_ENABLE_SHIFT)
+
 #define GLB_IDLE_TIMER (0x0080)
 /* GLB_IDLE_TIMER register */
 #define GLB_IDLE_TIMER_TIMEOUT_SHIFT (0)
@@ -1518,4 +1521,4 @@
 	 (((value) << GLB_REQ_ITER_TRACE_ENABLE_SHIFT) &                       \
 	  GLB_REQ_ITER_TRACE_ENABLE_MASK))

-#endif /* _UAPI_GPU_CSF_REGISTERS_H_ */
+#endif /* _KBASE_CSF_REGISTERS_H_ */
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c
@@ -29,14 +29,14 @@
 #include <csf/mali_kbase_csf_trace_buffer.h>
 #include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
 #include <mali_kbase_reset_gpu.h>
+#include <linux/string.h>

-/* Waiting timeout for GPU reset to complete */
-#define GPU_RESET_TIMEOUT_MS (5000) /* 5 seconds */
-#define DUMP_DWORDS_PER_LINE (4)
-/* 16 characters needed for a 8 byte value in hex & 1 character for space */
-#define DUMP_HEX_CHARS_PER_DWORD ((2 * 8) + 1)
-#define DUMP_HEX_CHARS_PER_LINE  \
-	(DUMP_DWORDS_PER_LINE * DUMP_HEX_CHARS_PER_DWORD)
+enum kbasep_soft_reset_status {
+	RESET_SUCCESS = 0,
+	SOFT_RESET_FAILED,
+	L2_ON_FAILED,
+	MCU_REINIT_FAILED
+};

 static inline bool
 kbase_csf_reset_state_is_silent(enum kbase_csf_reset_gpu_state state)
@@ -259,8 +259,8 @@ static void kbase_csf_debug_dump_registers(struct kbase_device *kbdev)

 static void kbase_csf_dump_firmware_trace_buffer(struct kbase_device *kbdev)
 {
-	u8 *buf, *line_str;
-	unsigned int read_size;
+	u8 *buf, *p, *pnewline, *pend, *pendbuf;
+	unsigned int read_size, remaining_size;
 	struct firmware_trace_buffer *tb =
 		kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME);

@@ -269,41 +269,53 @@ static void kbase_csf_dump_firmware_trace_buffer(struct kbase_device *kbdev)
 		return;
 	}

-	buf = kmalloc(PAGE_SIZE + DUMP_HEX_CHARS_PER_LINE + 1, GFP_KERNEL);
+	buf = kmalloc(PAGE_SIZE + 1, GFP_KERNEL);
 	if (buf == NULL) {
 		dev_err(kbdev->dev, "Short of memory, firmware trace dump skipped");
 		return;
 	}
-	line_str = &buf[PAGE_SIZE];
+
+	buf[PAGE_SIZE] = 0;
+
+	p = buf;
+	pendbuf = &buf[PAGE_SIZE];

 	dev_err(kbdev->dev, "Firmware trace buffer dump:");
-	while ((read_size = kbase_csf_firmware_trace_buffer_read_data(tb, buf,
-								PAGE_SIZE))) {
-		u64 *ptr = (u64 *)buf;
-		u32 num_dwords;
+	while ((read_size = kbase_csf_firmware_trace_buffer_read_data(tb, p,
+								pendbuf - p))) {
+		pend = p + read_size;
+		p = buf;

-		for (num_dwords = read_size / sizeof(u64);
-		     num_dwords >= DUMP_DWORDS_PER_LINE;
-		     num_dwords -= DUMP_DWORDS_PER_LINE) {
-			dev_err(kbdev->dev, "%016llx %016llx %016llx %016llx",
-				ptr[0], ptr[1], ptr[2], ptr[3]);
-			ptr += DUMP_DWORDS_PER_LINE;
+		while (p < pend && (pnewline = memchr(p, '\n', pend - p))) {
+			/* Null-terminate the string */
+			*pnewline = 0;
+
+			dev_err(kbdev->dev, "FW> %s", p);
+
+			p = pnewline + 1;
 		}

-		if (num_dwords) {
-			int pos = 0;
+		remaining_size = pend - p;

-			while (num_dwords--) {
-				pos += snprintf(line_str + pos,
-						DUMP_HEX_CHARS_PER_DWORD + 1,
-						"%016llx ", ptr[0]);
-				ptr++;
-			}
-
-			dev_err(kbdev->dev, "%s", line_str);
+		if (!remaining_size) {
+			p = buf;
+		} else if (remaining_size < PAGE_SIZE) {
+			/* Copy unfinished string to the start of the buffer */
+			memmove(buf, p, remaining_size);
+			p = &buf[remaining_size];
+		} else {
+			/* Print abnormal page-long string without newlines */
+			dev_err(kbdev->dev, "FW> %s", buf);
+			p = buf;
 		}
 	}

+	if (p != buf) {
+		/* Null-terminate and print last unfinished string */
+		*p = 0;
+		dev_err(kbdev->dev, "FW> %s", buf);
+	}
+
 	kfree(buf);
 }

@@ -332,36 +344,12 @@ static void kbase_csf_hwcnt_on_reset_error(struct kbase_device *kbdev)
 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
 }

-static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev,
-				   bool firmware_inited, bool silent)
+static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_device *kbdev,
+							      bool firmware_inited, bool silent)
 {
 	unsigned long flags;
 	int err;
-
-	WARN_ON(kbdev->irq_reset_flush);
-	/* The reset must now be happening otherwise other threads will not
-	 * have been synchronized with to stop their access to the HW
-	 */
-#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE
-	lockdep_assert_held_write(&kbdev->csf.reset.sem);
-#elif KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE
-	lockdep_assert_held_exclusive(&kbdev->csf.reset.sem);
-#else
-	lockdep_assert_held(&kbdev->csf.reset.sem);
-#endif
-	WARN_ON(!kbase_reset_gpu_is_active(kbdev));
-
-	/* Reset the scheduler state before disabling the interrupts as suspend
-	 * of active CSG slots would also be done as a part of reset.
-	 */
-	if (likely(firmware_inited))
-		kbase_csf_scheduler_reset(kbdev);
-	cancel_work_sync(&kbdev->csf.firmware_reload_work);
-
-	dev_dbg(kbdev->dev, "Disable GPU hardware counters.\n");
-	/* This call will block until counters are disabled.
-	 */
-	kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
+	enum kbasep_soft_reset_status ret = RESET_SUCCESS;

 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	spin_lock(&kbdev->mmu_mask_change);
@@ -380,8 +368,7 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev,
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);

 	dev_dbg(kbdev->dev, "Ensure that any IRQ handlers have finished\n");
-	/* Must be done without any locks IRQ handlers will take.
-	 */
+	/* Must be done without any locks IRQ handlers will take. */
 	kbase_synchronize_irqs(kbdev);

 	dev_dbg(kbdev->dev, "Flush out any in-flight work items\n");
@@ -421,10 +408,8 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev,

 	mutex_unlock(&kbdev->pm.lock);

-	if (WARN_ON(err)) {
-		kbase_csf_hwcnt_on_reset_error(kbdev);
-		return err;
-	}
+	if (WARN_ON(err))
+		return SOFT_RESET_FAILED;

 	mutex_lock(&kbdev->mmu_hw_mutex);
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@@ -441,20 +426,78 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev,
 	err = kbase_pm_wait_for_desired_state(kbdev);
 	mutex_unlock(&kbdev->pm.lock);

-	if (WARN_ON(err)) {
-		kbase_csf_hwcnt_on_reset_error(kbdev);
-		return err;
+	if (err) {
+		if (!kbase_pm_l2_is_in_desired_state(kbdev))
+			ret = L2_ON_FAILED;
+		else if (!kbase_pm_mcu_is_in_desired_state(kbdev))
+			ret = MCU_REINIT_FAILED;
+	}
+
+	return ret;
+}
+
+static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev, bool firmware_inited, bool silent)
+{
+	unsigned long flags;
+	enum kbasep_soft_reset_status ret;
+
+	WARN_ON(kbdev->irq_reset_flush);
+	/* The reset must now be happening otherwise other threads will not
+	 * have been synchronized with to stop their access to the HW
+	 */
+#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE
+	lockdep_assert_held_write(&kbdev->csf.reset.sem);
+#elif KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE
+	lockdep_assert_held_exclusive(&kbdev->csf.reset.sem);
+#else
+	lockdep_assert_held(&kbdev->csf.reset.sem);
+#endif
+	WARN_ON(!kbase_reset_gpu_is_active(kbdev));
+
+	/* Reset the scheduler state before disabling the interrupts as suspend
+	 * of active CSG slots would also be done as a part of reset.
+	 */
+	if (likely(firmware_inited))
+		kbase_csf_scheduler_reset(kbdev);
+	cancel_work_sync(&kbdev->csf.firmware_reload_work);
+
+	dev_dbg(kbdev->dev, "Disable GPU hardware counters.\n");
+	/* This call will block until counters are disabled. */
+	kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
+
+	ret = kbase_csf_reset_gpu_once(kbdev, firmware_inited, silent);
+	if (ret == SOFT_RESET_FAILED) {
+		dev_err(kbdev->dev, "Soft-reset failed");
+		goto err;
+	} else if (ret == L2_ON_FAILED) {
+		dev_err(kbdev->dev, "L2 power up failed after the soft-reset");
+		goto err;
+	} else if (ret == MCU_REINIT_FAILED) {
+		dev_err(kbdev->dev, "MCU re-init failed trying full firmware reload");
+		/* Since MCU reinit failed despite successful soft reset, we can try
+		 * the firmware full reload.
+		 */
+		kbdev->csf.firmware_full_reload_needed = true;
+		ret = kbase_csf_reset_gpu_once(kbdev, firmware_inited, true);
+		if (ret != RESET_SUCCESS) {
+			dev_err(kbdev->dev,
+				"MCU Re-init failed even after trying full firmware reload, ret = [%d]",
+				ret);
+			goto err;
+		}
 	}

 	/* Re-enable GPU hardware counters */
 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
 	kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
-
 	if (!silent)
 		dev_err(kbdev->dev, "Reset complete");
-
 	return 0;
+err:
+
+	kbase_csf_hwcnt_on_reset_error(kbdev);
+	return -1;
 }

 static void kbase_csf_reset_gpu_worker(struct work_struct *data)
@@ -593,7 +636,7 @@ bool kbase_reset_gpu_is_active(struct kbase_device *kbdev)
 int kbase_reset_gpu_wait(struct kbase_device *kbdev)
 {
 	const long wait_timeout =
-		kbase_csf_timeout_in_jiffies(GPU_RESET_TIMEOUT_MS);
+		kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_GPU_RESET_TIMEOUT));
 	long remaining;

 	/* Inform lockdep we might be trying to wait on a reset (as
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -28,7 +28,7 @@
 #include <tl/mali_kbase_tracepoints.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
 #include <linux/export.h>
-#include <uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h>
+#include <csf/mali_kbase_csf_registers.h>
 #include <uapi/gpu/arm/bifrost/mali_base_kernel.h>
 #include <mali_kbase_hwaccess_time.h>

@@ -246,7 +246,7 @@ static enum hrtimer_restart tick_timer_callback(struct hrtimer *timer)
 *
 * This function will start the scheduling tick hrtimer and is supposed to
 * be called only from the tick work item function. The tick hrtimer should
- * should not be active already.
+ * not be active already.
 */
 static void start_tick_timer(struct kbase_device *kbdev)
 {
@@ -372,7 +372,7 @@ static void assign_user_doorbell_to_queue(struct kbase_device *kbdev,
 	mutex_lock(&kbdev->csf.reg_lock);

 	/* If bind operation for the queue hasn't completed yet, then the
-	 * the CSI can't be programmed for the queue
+	 * CSI can't be programmed for the queue
 	 * (even in stopped state) and so the doorbell also can't be assigned
 	 * to it.
 	 */
@@ -406,6 +406,85 @@ static void scheduler_doorbell_init(struct kbase_device *kbdev)
 	WARN_ON(doorbell_nr != CSF_KERNEL_DOORBELL_NR);
 }

+/**
+ * update_on_slot_queues_offsets - Update active queues' INSERT & EXTRACT ofs
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * This function updates the EXTRACT offset for all queues which groups have
+ * been assigned a physical slot. These values could be used to detect a
+ * queue's true idleness status. This is intended to be an additional check
+ * on top of the GPU idle notification to account for race conditions.
+ * This function is supposed to be called only when GPU idle notification
+ * interrupt is received.
+ */
+static void update_on_slot_queues_offsets(struct kbase_device *kbdev)
+{
+	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+	/* All CSGs have the same number of CSs */
+	size_t const max_streams = kbdev->csf.global_iface.groups[0].stream_num;
+	size_t i;
+
+	lockdep_assert_held(&scheduler->interrupt_lock);
+
+	/* csg_slots_idle_mask is not used here for the looping, as it could get
+	 * updated concurrently when Scheduler re-evaluates the idle status of
+	 * the CSGs for which idle notification was received previously.
+	 */
+	for_each_set_bit(i, scheduler->csg_inuse_bitmap, kbdev->csf.global_iface.group_num) {
+		struct kbase_queue_group *const group = scheduler->csg_slots[i].resident_group;
+		size_t j;
+
+		if (WARN_ON(!group))
+			continue;
+
+		for (j = 0; j < max_streams; ++j) {
+			struct kbase_queue *const queue = group->bound_queues[j];
+
+			if (queue) {
+				u64 const *const output_addr =
+					(u64 const *)(queue->user_io_addr + PAGE_SIZE);
+
+				queue->extract_ofs = output_addr[CS_EXTRACT_LO / sizeof(u64)];
+			}
+		}
+	}
+}
+
+static void enqueue_gpu_idle_work(struct kbase_csf_scheduler *const scheduler)
+{
+	atomic_set(&scheduler->gpu_no_longer_idle, false);
+	queue_work(scheduler->idle_wq, &scheduler->gpu_idle_work);
+}
+
+void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev)
+{
+	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+	int non_idle_offslot_grps;
+	bool can_suspend_on_idle;
+
+	lockdep_assert_held(&scheduler->interrupt_lock);
+
+	non_idle_offslot_grps = atomic_read(&scheduler->non_idle_offslot_grps);
+	can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev);
+	KBASE_KTRACE_ADD(kbdev, SCHEDULER_CAN_IDLE, NULL,
+			 ((u64)(u32)non_idle_offslot_grps) | (((u64)can_suspend_on_idle) << 32));
+
+	if (!non_idle_offslot_grps) {
+		if (can_suspend_on_idle) {
+			/* The GPU idle worker relies on update_on_slot_queues_offsets() to have
+			 * finished. It's queued before to reduce the time it takes till execution
+			 * but it'll eventually be blocked by the scheduler->interrupt_lock.
+			 */
+			enqueue_gpu_idle_work(scheduler);
+			update_on_slot_queues_offsets(kbdev);
+		}
+	} else {
+		/* Advance the scheduling tick to get the non-idle suspended groups loaded soon */
+		kbase_csf_scheduler_advance_tick_nolock(kbdev);
+	}
+}
+
 u32 kbase_csf_scheduler_get_nr_active_csgs_locked(struct kbase_device *kbdev)
 {
 	u32 nr_active_csgs;
@@ -551,54 +630,6 @@ static bool scheduler_timer_is_enabled_nolock(struct kbase_device *kbdev)
 	return kbdev->csf.scheduler.timer_enabled;
 }

-static void enable_gpu_idle_fw_timer(struct kbase_device *kbdev)
-{
-	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
-	unsigned long flags;
-
-	lockdep_assert_held(&scheduler->lock);
-
-	if (scheduler->gpu_idle_fw_timer_enabled)
-		return;
-
-	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
-
-	/* Update the timer_enabled flag requires holding interrupt_lock */
-	scheduler->gpu_idle_fw_timer_enabled = true;
-	kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
-
-	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
-}
-
-static void disable_gpu_idle_fw_timer_locked(struct kbase_device *kbdev)
-{
-	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
-
-	lockdep_assert_held(&scheduler->lock);
-	lockdep_assert_held(&scheduler->interrupt_lock);
-
-	/* Update of the timer_enabled flag requires holding interrupt_lock */
-	if (scheduler->gpu_idle_fw_timer_enabled) {
-		scheduler->gpu_idle_fw_timer_enabled = false;
-		kbase_csf_firmware_disable_gpu_idle_timer(kbdev);
-	}
-}
-
-static void disable_gpu_idle_fw_timer(struct kbase_device *kbdev)
-{
-	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
-	unsigned long flags;
-
-	lockdep_assert_held(&scheduler->lock);
-
-	if (!scheduler->gpu_idle_fw_timer_enabled)
-		return;
-
-	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
-	disable_gpu_idle_fw_timer_locked(kbdev);
-	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
-}
-
 /**
 * scheduler_pm_active_handle_suspend() - Acquire the PM reference count for
 *                                        Scheduler
@@ -631,12 +662,15 @@ static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev,
 	if (!prev_count) {
 		ret = kbase_pm_context_active_handle_suspend(kbdev,
 							suspend_handler);
-		if (ret) {
-			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		/* Invoke the PM state machines again as the change in MCU
+		 * desired status, due to the update of scheduler.pm_active_count,
+		 * may be missed by the thread that called pm_wait_for_desired_state()
+		 */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		if (ret)
 			kbdev->csf.scheduler.pm_active_count--;
-			kbase_pm_update_state(kbdev);
-			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-		}
+		kbase_pm_update_state(kbdev);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 	}

 	return ret;
@@ -716,8 +750,16 @@ static void scheduler_pm_idle(struct kbase_device *kbdev)
 		kbdev->csf.scheduler.pm_active_count--;
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);

-	if (prev_count == 1)
+	if (prev_count == 1) {
 		kbase_pm_context_idle(kbdev);
+		/* Invoke the PM state machines again as the change in MCU
+		 * desired status, due to the update of scheduler.pm_active_count,
+		 * may be missed by the thread that called pm_wait_for_desired_state()
+		 */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_pm_update_state(kbdev);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
 }

 #ifdef KBASE_PM_RUNTIME
@@ -746,8 +788,16 @@ static void scheduler_pm_idle_before_sleep(struct kbase_device *kbdev)
 	kbdev->pm.backend.exit_gpu_sleep_mode = false;
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);

-	if (prev_count == 1)
+	if (prev_count == 1) {
 		kbase_pm_context_idle(kbdev);
+		/* Invoke the PM state machines again as the change in MCU
+		 * desired status, due to the update of scheduler.pm_active_count,
+		 * may be missed by the thread that called pm_wait_for_desired_state()
+		 */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_pm_update_state(kbdev);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
 }
 #endif

@@ -1735,6 +1785,13 @@ bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo,
 	u32 status = kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT);
 	bool is_waiting = false;

+#if IS_ENABLED(CONFIG_DEBUG_FS)
+	u64 cmd_ptr = kbase_csf_firmware_cs_output(stream, CS_STATUS_CMD_PTR_LO);
+
+	cmd_ptr |= (u64)kbase_csf_firmware_cs_output(stream, CS_STATUS_CMD_PTR_HI) << 32;
+	queue->saved_cmd_ptr = cmd_ptr;
+#endif
+
 	KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_STATUS_WAIT,
 				   queue->group, queue, status);

@@ -1948,7 +2005,7 @@ void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler,
 		cancel_tick_timer(kctx->kbdev);
 		WARN_ON(atomic_read(&scheduler->non_idle_offslot_grps));
 		if (scheduler->state != SCHED_SUSPENDED)
-			queue_work(system_wq, &scheduler->gpu_idle_work);
+			enqueue_gpu_idle_work(scheduler);
 	}
 	KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp,
 			scheduler->num_active_address_spaces |
@@ -2078,7 +2135,7 @@ static void update_offslot_non_idle_cnt_on_grp_suspend(
 	}
 }

-static bool confirm_cmd_buf_empty(struct kbase_queue *queue)
+static bool confirm_cmd_buf_empty(struct kbase_queue const *queue)
 {
 	bool cs_empty;
 	bool cs_idle;
@@ -2090,8 +2147,8 @@ static bool confirm_cmd_buf_empty(struct kbase_queue *queue)

 	u32 glb_version = iface->version;

-	u64 *input_addr = (u64 *)queue->user_io_addr;
-	u64 *output_addr = (u64 *)(queue->user_io_addr + PAGE_SIZE);
+	u64 const *input_addr = (u64 const *)queue->user_io_addr;
+	u64 const *output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE);

 	if (glb_version >= kbase_csf_interface_version(1, 0, 0)) {
 		/* CS_STATUS_SCOREBOARD supported from CSF 1.0 */
@@ -2605,7 +2662,7 @@ void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group)
 			if (kbase_csf_scheduler_wait_mcu_active(kbdev))
 				dev_warn(
 					kbdev->dev,
-					"[%llu] Wait for MCU active failed when when terminating group %d of context %d_%d on slot %d",
+					"[%llu] Wait for MCU active failed when terminating group %d of context %d_%d on slot %d",
 					kbase_backend_get_cycle_cnt(kbdev),
 					group->handle, group->kctx->tgid,
 					group->kctx->id, group->csg_nr);
@@ -2704,6 +2761,7 @@ static int scheduler_group_schedule(struct kbase_queue_group *group)
 		}
 	} else if (!queue_group_scheduled_locked(group)) {
 		int new_val;
+
 		insert_group_to_runnable(&kbdev->csf.scheduler, group,
 			KBASE_CSF_GROUP_RUNNABLE);
 		/* A new group into the scheduler */
@@ -3033,9 +3091,9 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev)
 				struct kbase_queue_group *group =
 					scheduler->csg_slots[i].resident_group;

-				if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i))) {
+				if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i)))
 					continue;
-				}
+
 				/* The on slot csg is now stopped */
 				clear_bit(i, slot_mask);

@@ -3533,13 +3591,13 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
 				 * GPUCORE-21394.
 				 */

-				/* Disable the idle timer */
-				disable_gpu_idle_fw_timer_locked(kbdev);
-
 				/* Switch to protected mode */
 				scheduler->active_protm_grp = input_grp;
 				KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_ENTER_PROTM,
 							 input_grp, 0u);
+				/* Reset the tick's pending protm seq number */
+				scheduler->tick_protm_pending_seq =
+					KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID;

 				kbase_csf_enter_protected_mode(kbdev);
 				spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
@@ -3637,6 +3695,7 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev,
 	struct kbase_queue_group *group;

 	lockdep_assert_held(&scheduler->lock);
+	lockdep_assert_held(&scheduler->interrupt_lock);
 	if (WARN_ON(priority < 0) ||
 	    WARN_ON(priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT))
 		return;
@@ -3656,6 +3715,14 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev,
 		/* Set the scanout sequence number, starting from 0 */
 		group->scan_seq_num = scheduler->csg_scan_count_for_tick++;

+		if (scheduler->tick_protm_pending_seq ==
+				KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID) {
+			if (!bitmap_empty(group->protm_pending_bitmap,
+			     kbdev->csf.global_iface.groups[0].stream_num))
+				scheduler->tick_protm_pending_seq =
+					group->scan_seq_num;
+		}
+
 		if (queue_group_idle_locked(group)) {
 			if (on_slot_group_idle_locked(group))
 				list_add_tail(&group->link_to_schedule,
@@ -3738,6 +3805,7 @@ static void scheduler_rotate_groups(struct kbase_device *kbdev)
 		WARN_ON(top_grp->kctx != top_ctx);
 		if (!WARN_ON(list_empty(list))) {
 			struct kbase_queue_group *new_head_grp;
+
 			list_move_tail(&top_grp->link, list);
 			new_head_grp = (!list_empty(list)) ?
 						list_first_entry(list, struct kbase_queue_group, link) :
@@ -3774,6 +3842,7 @@ static void scheduler_rotate_ctxs(struct kbase_device *kbdev)

 			if (!WARN_ON(!found)) {
 				struct kbase_context *new_head_kctx;
+
 				list_move_tail(&pos->csf.link, list);
 				KBASE_KTRACE_ADD(kbdev, SCHEDULER_ROTATE_RUNNABLE, pos,
 						 0u);
@@ -4042,6 +4111,59 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
 	return 0;
 }

+/**
+ * all_on_slot_groups_remained_idle - Live check for all groups' idleness
+ *
+ * @kbdev: Pointer to the device.
+ *
+ * Returns false if any of the queues inside any of the groups that have been
+ * assigned a physical CSG slot have work to execute, or have executed work
+ * since having received a GPU idle notification. This function is used to
+ * handle a rance condition between firmware reporting GPU idle and userspace
+ * submitting more work by directly ringing a doorbell.
+ *
+ * Return: false if any queue inside any resident group has work to be processed
+ *         or has processed work since GPU idle event, true otherwise.
+ */
+static bool all_on_slot_groups_remained_idle(struct kbase_device *kbdev)
+{
+	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+	/* All CSGs have the same number of CSs */
+	size_t const max_streams = kbdev->csf.global_iface.groups[0].stream_num;
+	size_t i;
+
+	lockdep_assert_held(&scheduler->lock);
+	lockdep_assert_held(&scheduler->interrupt_lock);
+
+	for_each_set_bit(i, scheduler->csg_slots_idle_mask,
+			  kbdev->csf.global_iface.group_num) {
+		struct kbase_queue_group *const group =
+			scheduler->csg_slots[i].resident_group;
+		size_t j;
+
+		for (j = 0; j < max_streams; ++j) {
+			struct kbase_queue const *const queue =
+				group->bound_queues[j];
+			u64 const *output_addr;
+			u64 cur_extract_ofs;
+
+			if (!queue)
+				continue;
+
+			output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE);
+			cur_extract_ofs = output_addr[CS_EXTRACT_LO / sizeof(u64)];
+			if (cur_extract_ofs != queue->extract_ofs) {
+				/* More work has been executed since the idle
+				 * notification.
+				 */
+				return false;
+			}
+		}
+	}
+
+	return true;
+}
+
 static bool scheduler_idle_suspendable(struct kbase_device *kbdev)
 {
 	bool suspend;
@@ -4055,18 +4177,28 @@ static bool scheduler_idle_suspendable(struct kbase_device *kbdev)
 		return false;

 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock(&scheduler->interrupt_lock);
 	if (scheduler->total_runnable_grps) {
-		spin_lock(&scheduler->interrupt_lock);

 		/* Check both on-slots and off-slots groups idle status */
 		suspend = kbase_csf_scheduler_all_csgs_idle(kbdev) &&
 			  !atomic_read(&scheduler->non_idle_offslot_grps) &&
 			  kbase_pm_idle_groups_sched_suspendable(kbdev);
-
-		spin_unlock(&scheduler->interrupt_lock);
 	} else
 		suspend = kbase_pm_no_runnables_sched_suspendable(kbdev);

+	/* Confirm that all groups are actually idle before proceeding with
+	 * suspension as groups might potentially become active again without
+	 * informing the scheduler in case userspace rings a doorbell directly.
+	 */
+	if (suspend && (unlikely(atomic_read(&scheduler->gpu_no_longer_idle)) ||
+			unlikely(!all_on_slot_groups_remained_idle(kbdev)))) {
+		dev_info(kbdev->dev,
+			 "GPU suspension skipped due to active CSGs");
+		suspend = false;
+	}
+
+	spin_unlock(&scheduler->interrupt_lock);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);

 	return suspend;
@@ -4150,8 +4282,6 @@ static void gpu_idle_worker(struct work_struct *work)
 	}
 	mutex_lock(&scheduler->lock);

-	/* Cycle completed, disable the firmware idle timer */
-	disable_gpu_idle_fw_timer(kbdev);
 	scheduler_is_idle_suspendable = scheduler_idle_suspendable(kbdev);
 	if (scheduler_is_idle_suspendable) {
 		KBASE_KTRACE_ADD(kbdev, GPU_IDLE_HANDLING_START, NULL,
@@ -4177,6 +4307,7 @@ static void gpu_idle_worker(struct work_struct *work)
 static int scheduler_prepare(struct kbase_device *kbdev)
 {
 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+	unsigned long flags;
 	int i;

 	lockdep_assert_held(&scheduler->lock);
@@ -4202,6 +4333,9 @@ static int scheduler_prepare(struct kbase_device *kbdev)
 	scheduler->num_csg_slots_for_tick = 0;
 	bitmap_zero(scheduler->csg_slots_prio_update, MAX_SUPPORTED_CSGS);

+	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+	scheduler->tick_protm_pending_seq =
+		KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID;
 	/* Scan out to run groups */
 	for (i = 0; i < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++i) {
 		struct kbase_context *kctx;
@@ -4209,6 +4343,7 @@ static int scheduler_prepare(struct kbase_device *kbdev)
 		list_for_each_entry(kctx, &scheduler->runnable_kctxs, csf.link)
 			scheduler_ctx_scan_groups(kbdev, kctx, i);
 	}
+	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);

 	/* Update this tick's non-idle groups */
 	scheduler->non_idle_scanout_grps = scheduler->ngrp_to_schedule;
@@ -4237,42 +4372,17 @@ static int scheduler_prepare(struct kbase_device *kbdev)
 	return 0;
 }

-static void scheduler_handle_idle_timer_onoff(struct kbase_device *kbdev)
-{
-	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
-
-	lockdep_assert_held(&scheduler->lock);
-
-	/* After the scheduler apply operation, the internal variable
-	 * scheduler->non_idle_offslot_grps reflects the end-point view
-	 * of the count at the end of the active phase.
-	 *
-	 * Any changes that follow (after the scheduler has dropped the
-	 * scheduler->lock), reflects async operations to the scheduler,
-	 * such as a group gets killed (evicted) or a new group inserted,
-	 * cqs wait-sync triggered state transtion etc.
-	 *
-	 * The condition for enable the idle timer is that there is no
-	 * non-idle groups off-slots. If there is non-idle group off-slot,
-	 * the timer should be disabled.
-	 */
-	if (atomic_read(&scheduler->non_idle_offslot_grps))
-		disable_gpu_idle_fw_timer(kbdev);
-	else
-		enable_gpu_idle_fw_timer(kbdev);
-}
-
 /**
 * keep_lru_on_slots() - Check the condition for LRU is met.
 *
+ * @kbdev: Pointer to the device.
+ *
 * This function tries to maintain the Last-Recent-Use case on slots, when
 * the scheduler has no non-idle off-slot CSGs for a replacement
 * consideration. This effectively extends the previous scheduling results
 * for the new one. That is, the last recent used CSGs are retained on slots
 * for the new tick/tock action.
 *
- * @kbdev: Pointer to the device.
- *
 * Return: true for avoiding on-slot CSGs changes (i.e. keep existing LRU),
 *         otherwise false.
 */
@@ -4294,10 +4404,6 @@ static bool keep_lru_on_slots(struct kbase_device *kbdev)
 		 */
 		keep_lru = kbase_csf_scheduler_all_csgs_idle(kbdev);

-		if (keep_lru && !scheduler->gpu_idle_fw_timer_enabled) {
-			scheduler->gpu_idle_fw_timer_enabled = true;
-			kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
-		}
 		spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);

 		dev_dbg(kbdev->dev, "Keep_LRU: %d, CSGs on-slots: %d\n",
@@ -4311,6 +4417,8 @@ static bool keep_lru_on_slots(struct kbase_device *kbdev)
 * prepare_fast_local_tock() - making preparation arrangement for exercizing
 *                             a fast local tock inside scheduling-actions.
 *
+ * @kbdev:  Pointer to the GPU device.
+ *
 * The function assumes that a scheduling action of firing a fast local tock
 * call (i.e. an equivalent tock action without dropping the lock) is desired
 * if there are idle onslot CSGs. The function updates those affected CSGs'
@@ -4320,8 +4428,6 @@ static bool keep_lru_on_slots(struct kbase_device *kbdev)
 * plus some potential newly idle CSGs in the scheduling action committing
 * steps.
 *
- * @kbdev:  Pointer to the GPU device.
- *
 * Return: number of on-slots CSGs that can be considered for replacing.
 */
 static int prepare_fast_local_tock(struct kbase_device *kbdev)
@@ -4408,6 +4514,17 @@ static void schedule_actions(struct kbase_device *kbdev, bool is_tick)

 redo_local_tock:
 	scheduler_prepare(kbdev);
+	/* Need to specifically enqueue the GPU idle work if there are no groups
+	 * to schedule despite the runnable groups. This scenario will happen
+	 * if System suspend is done when all groups are idle and and no work
+	 * is submitted for the groups after the System resume.
+	 */
+	if (unlikely(!scheduler->ngrp_to_schedule &&
+		     scheduler->total_runnable_grps)) {
+		dev_dbg(kbdev->dev, "No groups to schedule in the tick");
+		enqueue_gpu_idle_work(scheduler);
+		return;
+	}
 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
 	protm_grp = scheduler->active_protm_grp;

@@ -4423,6 +4540,7 @@ redo_local_tock:
 	 */
 	if (protm_grp && scheduler->top_grp == protm_grp) {
 		int new_val;
+
 		dev_dbg(kbdev->dev, "Scheduler keep protm exec: group-%d",
 			protm_grp->handle);
 		new_val = atomic_dec_return(&scheduler->non_idle_offslot_grps);
@@ -4452,11 +4570,6 @@ redo_local_tock:

 		scheduler_apply(kbdev);

-		/* Post-apply, all the committed groups in this tick are on
-		 * slots, time to arrange the idle timer on/off decision.
-		 */
-		scheduler_handle_idle_timer_onoff(kbdev);
-
 		/* Scheduler is dropping the exec of the previous protm_grp,
 		 * Until the protm quit completes, the GPU is effectively
 		 * locked in the secure mode.
@@ -4491,7 +4604,6 @@ redo_local_tock:
 	}

 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
-	return;
 }

 /**
@@ -4576,7 +4688,7 @@ static void schedule_on_tock(struct work_struct *work)

 	scheduler->state = SCHED_INACTIVE;
 	if (!scheduler->total_runnable_grps)
-		queue_work(system_wq, &scheduler->gpu_idle_work);
+		enqueue_gpu_idle_work(scheduler);
 	mutex_unlock(&scheduler->lock);
 	kbase_reset_gpu_allow(kbdev);

@@ -4627,8 +4739,9 @@ static void schedule_on_tick(struct work_struct *work)
 		dev_dbg(kbdev->dev,
 			"scheduling for next tick, num_runnable_groups:%u\n",
 			scheduler->total_runnable_grps);
-	} else if (!scheduler->total_runnable_grps)
-		queue_work(system_wq, &scheduler->gpu_idle_work);
+	} else if (!scheduler->total_runnable_grps) {
+		enqueue_gpu_idle_work(scheduler);
+	}

 	scheduler->state = SCHED_INACTIVE;
 	mutex_unlock(&scheduler->lock);
@@ -5044,7 +5157,6 @@ static void firmware_aliveness_monitor(struct work_struct *work)
 exit:
 	mutex_unlock(&kbdev->csf.scheduler.lock);
 	kbase_reset_gpu_allow(kbdev);
-	return;
 }

 int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group,
@@ -5289,6 +5401,8 @@ void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group)

 	mutex_lock(&scheduler->lock);

+	if (group->run_state == KBASE_CSF_GROUP_IDLE)
+		group->run_state = KBASE_CSF_GROUP_RUNNABLE;
 	/* Check if the group is now eligible for execution in protected mode. */
 	if (scheduler_get_protm_enter_async_group(kbdev, group))
 		scheduler_group_check_protm_enter(kbdev, group);
@@ -5457,6 +5571,11 @@ static void check_sync_update_in_sleep_mode(struct kbase_device *kbdev)
 			continue;

 		if (check_sync_update_for_on_slot_group(group)) {
+			/* As sync update has been performed for an on-slot
+			 * group, when MCU is in sleep state, ring the doorbell
+			 * so that FW can re-evaluate the SYNC_WAIT on wakeup.
+			 */
+			kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
 			scheduler_wakeup(kbdev, true);
 			return;
 		}
@@ -5529,6 +5648,7 @@ enum kbase_csf_event_callback_action check_group_sync_update_cb(void *param)
 	struct kbase_context *const kctx = param;

 	KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT, kctx, 0u);
+
 	queue_work(kctx->csf.sched.sync_update_wq,
 		&kctx->csf.sched.sync_update_work);

@@ -5610,6 +5730,14 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
 		dev_err(kbdev->dev, "Failed to allocate scheduler workqueue\n");
 		return -ENOMEM;
 	}
+	scheduler->idle_wq = alloc_ordered_workqueue(
+		"csf_scheduler_gpu_idle_wq", WQ_HIGHPRI);
+	if (!scheduler->idle_wq) {
+		dev_err(kbdev->dev,
+			"Failed to allocate GPU idle scheduler workqueue\n");
+		destroy_workqueue(kbdev->csf.scheduler.wq);
+		return -ENOMEM;
+	}

 	INIT_WORK(&scheduler->tick_work, schedule_on_tick);
 	INIT_DEFERRABLE_WORK(&scheduler->tock_work, schedule_on_tock);
@@ -5636,11 +5764,11 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
 	scheduler->last_schedule = 0;
 	scheduler->tock_pending_request = false;
 	scheduler->active_protm_grp = NULL;
-	scheduler->gpu_idle_fw_timer_enabled = false;
 	scheduler->csg_scheduling_period_ms = CSF_SCHEDULER_TIME_TICK_MS;
 	scheduler_doorbell_init(kbdev);

 	INIT_WORK(&scheduler->gpu_idle_work, gpu_idle_worker);
+	atomic_set(&scheduler->gpu_no_longer_idle, false);
 	atomic_set(&scheduler->non_idle_offslot_grps, 0);

 	hrtimer_init(&scheduler->tick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
@@ -5684,6 +5812,8 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev)

 void kbase_csf_scheduler_early_term(struct kbase_device *kbdev)
 {
+	if (kbdev->csf.scheduler.idle_wq)
+		destroy_workqueue(kbdev->csf.scheduler.idle_wq);
 	if (kbdev->csf.scheduler.wq)
 		destroy_workqueue(kbdev->csf.scheduler.wq);
 }
@@ -5715,7 +5845,7 @@ static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev)
 		enqueue_tick_work(kbdev);
 		dev_dbg(kbdev->dev, "Re-enabling the scheduler timer\n");
 	} else if (scheduler->state != SCHED_SUSPENDED) {
-		queue_work(system_wq, &scheduler->gpu_idle_work);
+		enqueue_gpu_idle_work(scheduler);
 	}
 }

@@ -5805,8 +5935,6 @@ int kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev)

 	mutex_lock(&scheduler->lock);

-	disable_gpu_idle_fw_timer(kbdev);
-
 #ifdef KBASE_PM_RUNTIME
 	/* If scheduler is in sleeping state, then MCU needs to be activated
 	 * to suspend CSGs.
@@ -5959,7 +6087,7 @@ void kbase_csf_scheduler_reval_idleness_post_sleep(struct kbase_device *kbdev)
 			&kbdev->csf.global_iface.groups[csg_nr];
 		bool csg_idle;

-		 if (!kbdev->csf.scheduler.csg_slots[csg_nr].resident_group)
+		if (!kbdev->csf.scheduler.csg_slots[csg_nr].resident_group)
 			continue;

 		csg_idle =
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h
@@ -569,6 +569,15 @@ void kbase_csf_scheduler_reval_idleness_post_sleep(struct kbase_device *kbdev);
 int kbase_csf_scheduler_handle_runtime_suspend(struct kbase_device *kbdev);
 #endif

+/**
+ * kbase_csf_scheduler_process_gpu_idle_event() - Process GPU idle IRQ
+ *
+ * @kbdev: Pointer to the device
+ *
+ * This function is called when a GPU idle IRQ has been raised.
+ */
+void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev);
+
 /**
 * kbase_csf_scheduler_get_nr_active_csgs() - Get the number of active CSGs
 *
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c
@@ -82,7 +82,7 @@ static struct kbase_csf_tiler_heap_chunk *get_last_chunk(
 * Unless the @chunk is the first in the kernel's list of chunks belonging to
 * a given tiler heap, this function stores the size and address of the @chunk
 * in the header of the preceding chunk. This requires the GPU memory region
- * containing the header to be be mapped temporarily, which can fail.
+ * containing the header to be mapped temporarily, which can fail.
 *
 * Return: 0 if successful or a negative error code on failure.
 */
@@ -204,8 +204,8 @@ static int create_chunk(struct kbase_csf_tiler_heap *const heap,

 	/* Allocate GPU memory for the new chunk. */
 	INIT_LIST_HEAD(&chunk->link);
-	chunk->region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
-					&chunk->gpu_va, mmu_sync_info);
+	chunk->region =
+		kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, &chunk->gpu_va, mmu_sync_info);

 	if (unlikely(!chunk->region)) {
 		dev_err(kctx->kbdev->dev,
@@ -464,21 +464,18 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx,
 		err = -ENOMEM;
 	} else {
 		err = create_initial_chunks(heap, initial_chunks);
-		if (unlikely(err)) {
-			kbase_csf_heap_context_allocator_free(ctx_alloc,
-				heap->gpu_va);
-		}
+		if (unlikely(err))
+			kbase_csf_heap_context_allocator_free(ctx_alloc, heap->gpu_va);
 	}

 	if (unlikely(err)) {
 		kfree(heap);
 	} else {
-		struct kbase_csf_tiler_heap_chunk const *first_chunk =
-			list_first_entry(&heap->chunks_list,
-				struct kbase_csf_tiler_heap_chunk, link);
+		struct kbase_csf_tiler_heap_chunk const *chunk = list_first_entry(
+			&heap->chunks_list, struct kbase_csf_tiler_heap_chunk, link);

 		*heap_gpu_va = heap->gpu_va;
-		*first_chunk_va = first_chunk->gpu_va;
+		*first_chunk_va = chunk->gpu_va;

 		mutex_lock(&kctx->csf.tiler_heaps.lock);
 		kctx->csf.tiler_heaps.nr_of_heaps++;
@@ -488,17 +485,25 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx,
 		KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(
 			kctx->kbdev, kctx->id, heap->heap_id,
 			PFN_UP(heap->chunk_size * heap->max_chunks),
-			PFN_UP(heap->chunk_size * heap->chunk_count),
-			heap->max_chunks, heap->chunk_size, heap->chunk_count,
-			heap->target_in_flight, 0);
+			PFN_UP(heap->chunk_size * heap->chunk_count), heap->max_chunks,
+			heap->chunk_size, heap->chunk_count, heap->target_in_flight, 0);

-		dev_dbg(kctx->kbdev->dev, "Created tiler heap 0x%llX\n",
-			heap->gpu_va);
+#if defined(CONFIG_MALI_VECTOR_DUMP)
+		list_for_each_entry(chunk, &heap->chunks_list, link) {
+			KBASE_TLSTREAM_JD_TILER_HEAP_CHUNK_ALLOC(
+				kctx->kbdev, kctx->id, heap->heap_id, chunk->gpu_va);
+		}
+#endif
+
+		dev_dbg(kctx->kbdev->dev, "Created tiler heap 0x%llX\n", heap->gpu_va);
 		mutex_unlock(&kctx->csf.tiler_heaps.lock);
 		kctx->running_total_tiler_heap_nr_chunks += heap->chunk_count;
-		kctx->running_total_tiler_heap_memory += heap->chunk_size * heap->chunk_count;
-		if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory)
-			kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory;
+		kctx->running_total_tiler_heap_memory +=
+			heap->chunk_size * heap->chunk_count;
+		if (kctx->running_total_tiler_heap_memory >
+		    kctx->peak_total_tiler_heap_memory)
+			kctx->peak_total_tiler_heap_memory =
+				kctx->running_total_tiler_heap_memory;
 	}
 	return err;
 }
@@ -609,6 +614,16 @@ int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx,
 	if (likely(heap)) {
 		err = alloc_new_chunk(heap, nr_in_flight, pending_frag_count,
 			new_chunk_ptr);
+		if (likely(!err)) {
+			/* update total and peak tiler heap memory record */
+			kctx->running_total_tiler_heap_nr_chunks++;
+			kctx->running_total_tiler_heap_memory += heap->chunk_size;
+
+			if (kctx->running_total_tiler_heap_memory >
+			    kctx->peak_total_tiler_heap_memory)
+				kctx->peak_total_tiler_heap_memory =
+					kctx->running_total_tiler_heap_memory;
+		}

 		KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(
 			kctx->kbdev, kctx->id, heap->heap_id,
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_debugfs.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_debugfs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -28,14 +28,14 @@ struct kbase_context;
 #define MALI_CSF_TILER_HEAP_DEBUGFS_VERSION 0

 /**
- * kbase_csf_tiler_heap_debugfs_init() - Create a debugfs entry for per context tiler heap
+ * kbase_csf_tiler_heap_debugfs_init - Create a debugfs entry for per context tiler heap
 *
 * @kctx: The kbase_context for which to create the debugfs entry
 */
 void kbase_csf_tiler_heap_debugfs_init(struct kbase_context *kctx);

 /**
- * kbase_csf_tiler_heap_total_debugfs_init() - Create a debugfs entry for per context tiler heap
+ * kbase_csf_tiler_heap_total_debugfs_init - Create a debugfs entry for per context tiler heap
 *
 * @kctx: The kbase_context for which to create the debugfs entry
 */
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_def.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_def.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -59,18 +59,18 @@
 /**
 * struct kbase_csf_tiler_heap_chunk - A tiler heap chunk managed by the kernel
 *
- * Chunks are allocated upon initialization of a tiler heap or in response to
- * out-of-memory events from the firmware. Chunks are always fully backed by
- * physical memory to avoid the overhead of processing GPU page faults. The
- * allocated GPU memory regions are linked together independent of the list of
- * kernel objects of this type.
- *
 * @link:   Link to this chunk in a list of chunks belonging to a
 *          @kbase_csf_tiler_heap.
 * @region: Pointer to the GPU memory region allocated for the chunk.
 * @gpu_va: GPU virtual address of the start of the memory region.
 *          This points to the header of the chunk and not to the low address
 *          of free memory within it.
+ *
+ * Chunks are allocated upon initialization of a tiler heap or in response to
+ * out-of-memory events from the firmware. Chunks are always fully backed by
+ * physical memory to avoid the overhead of processing GPU page faults. The
+ * allocated GPU memory regions are linked together independent of the list of
+ * kernel objects of this type.
 */
 struct kbase_csf_tiler_heap_chunk {
 	struct list_head link;
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_timeout.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_timeout.c
@@ -139,8 +139,7 @@ static ssize_t progress_timeout_show(struct device * const dev,

 }

-static DEVICE_ATTR(progress_timeout, 0644, progress_timeout_show,
-	progress_timeout_store);
+static DEVICE_ATTR_RW(progress_timeout);

 int kbase_csf_timeout_init(struct kbase_device *const kbdev)
 {
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c
@@ -80,9 +80,8 @@ static int kbase_csf_tl_debugfs_poll_interval_write(void *data, u64 val)
 	struct kbase_device *kbdev = (struct kbase_device *)data;
 	struct kbase_csf_tl_reader *self = &kbdev->timeline->csf_tl_reader;

-	if (val > KBASE_CSF_TL_READ_INTERVAL_MAX || val < KBASE_CSF_TL_READ_INTERVAL_MIN) {
+	if (val > KBASE_CSF_TL_READ_INTERVAL_MAX || val < KBASE_CSF_TL_READ_INTERVAL_MIN)
 		return -EINVAL;
-	}

 	self->timer_interval = (u32)val;

@@ -96,7 +95,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_tl_poll_interval_fops,

 void kbase_csf_tl_reader_debugfs_init(struct kbase_device *kbdev)
 {
-	debugfs_create_file("csf_tl_poll_interval_in_ms", S_IRUGO | S_IWUSR,
+	debugfs_create_file("csf_tl_poll_interval_in_ms", 0644,
 		kbdev->debugfs_instr_directory, kbdev,
 		&kbase_csf_tl_poll_interval_fops);

@@ -406,9 +405,8 @@ static int tl_reader_init_late(
 		return -1;
 	}

-	if (kbase_ts_converter_init(&self->ts_converter, kbdev)) {
+	if (kbase_ts_converter_init(&self->ts_converter, kbdev))
 		return -1;
-	}

 	self->kbdev = kbdev;
 	self->trace_buffer = tb;
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -133,14 +133,12 @@ void kbase_csf_tl_reader_init(struct kbase_csf_tl_reader *self,
 void kbase_csf_tl_reader_term(struct kbase_csf_tl_reader *self);

 /**
- *  kbase_csf_tl_reader_flush_buffer() -
- *   Flush trace from buffer into CSFFW timeline stream.
+ *  kbase_csf_tl_reader_flush_buffer() - Flush trace from buffer into CSFFW timeline stream.
 *
 * @self:    CSFFW TL Reader instance.
 *
 * Return: Zero on success, negative error code (EBUSY) otherwise
 */
-
 int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self);

 /**
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c
@@ -179,13 +179,13 @@ int kbase_csf_firmware_trace_buffers_init(struct kbase_device *kbdev)
 		extract_gpu_va =
 			(kbdev->csf.firmware_trace_buffers.mcu_rw.va_reg->start_pfn << PAGE_SHIFT) +
 			mcu_rw_offset;
-		extract_cpu_va = (u32*)(
+		extract_cpu_va = (u32 *)(
 			kbdev->csf.firmware_trace_buffers.mcu_rw.cpu_addr +
 			mcu_rw_offset);
 		insert_gpu_va =
 			(kbdev->csf.firmware_trace_buffers.mcu_write.va_reg->start_pfn << PAGE_SHIFT) +
 			mcu_write_offset;
-		insert_cpu_va = (u32*)(
+		insert_cpu_va = (u32 *)(
 			kbdev->csf.firmware_trace_buffers.mcu_write.cpu_addr +
 			mcu_write_offset);
 		data_buffer_gpu_va =
@@ -323,13 +323,13 @@ void kbase_csf_firmware_reload_trace_buffers_data(struct kbase_device *kbdev)
 		extract_gpu_va =
 			(kbdev->csf.firmware_trace_buffers.mcu_rw.va_reg->start_pfn << PAGE_SHIFT) +
 			mcu_rw_offset;
-		extract_cpu_va = (u32*)(
+		extract_cpu_va = (u32 *)(
 			kbdev->csf.firmware_trace_buffers.mcu_rw.cpu_addr +
 			mcu_rw_offset);
 		insert_gpu_va =
 			(kbdev->csf.firmware_trace_buffers.mcu_write.va_reg->start_pfn << PAGE_SHIFT) +
 			mcu_write_offset;
-		insert_cpu_va = (u32*)(
+		insert_cpu_va = (u32 *)(
 			kbdev->csf.firmware_trace_buffers.mcu_write.cpu_addr +
 			mcu_write_offset);
 		data_buffer_gpu_va =
--- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c
+++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -203,6 +203,8 @@ static void kbase_csf_early_term(struct kbase_device *kbdev)
 * kbase_device_hwcnt_watchdog_if_init - Create hardware counter watchdog
 *                                       interface.
 * @kbdev:	Device pointer
+ *
+ * Return: 0 if successful or a negative error code on failure.
 */
 static int kbase_device_hwcnt_watchdog_if_init(struct kbase_device *kbdev)
 {
@@ -245,8 +247,9 @@ static void kbase_device_hwcnt_backend_csf_if_term(struct kbase_device *kbdev)
 /**
 * kbase_device_hwcnt_backend_csf_init - Create hardware counter backend.
 * @kbdev:	Device pointer
+ *
+ * Return: 0 if successful or a negative error code on failure.
 */
-
 static int kbase_device_hwcnt_backend_csf_init(struct kbase_device *kbdev)
 {
 	return kbase_hwcnt_backend_csf_create(
@@ -390,7 +393,7 @@ int kbase_device_init(struct kbase_device *kbdev)
 * Hardware counter components depending on firmware are initialized after CSF
 * firmware is loaded.
 *
- * @return 0 on success. An error code on failure.
+ * Return: 0 on success. An error code on failure.
 */
 static int kbase_device_hwcnt_csf_deferred_init(struct kbase_device *kbdev)
 {
@@ -457,7 +460,7 @@ virt_fail:
 * To meet Android GKI vendor guideline, firmware load is deferred at
 * the time when @ref kbase_open is called for the first time.
 *
- * @return 0 on success. An error code on failure.
+ * Return: 0 on success. An error code on failure.
 */
 static int kbase_csf_firmware_deferred_init(struct kbase_device *kbdev)
 {
--- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c
+++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c
@@ -133,8 +133,12 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
 	if (val & RESET_COMPLETED)
 		kbase_pm_reset_done(kbdev);

-	KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, val);
-	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val);
+	/* Defer clearing CLEAN_CACHES_COMPLETED to kbase_clean_caches_done.
+	 * We need to acquire hwaccess_lock to avoid a race condition with
+	 * kbase_gpu_cache_flush_and_busy_wait
+	 */
+	KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, val & ~CLEAN_CACHES_COMPLETED);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val & ~CLEAN_CACHES_COMPLETED);

 #ifdef KBASE_PM_RUNTIME
 	if (val & DOORBELL_MIRROR) {
--- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c
+++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c
@@ -66,8 +66,12 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
 	if (val & PRFCNT_SAMPLE_COMPLETED)
 		kbase_instr_hwcnt_sample_done(kbdev);

-	KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, val);
-	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val);
+	/* Defer clearing CLEAN_CACHES_COMPLETED to kbase_clean_caches_done.
+	 * We need to acquire hwaccess_lock to avoid a race condition with
+	 * kbase_gpu_cache_flush_and_busy_wait
+	 */
+	KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, val & ~CLEAN_CACHES_COMPLETED);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val & ~CLEAN_CACHES_COMPLETED);

 	/* kbase_pm_check_transitions (called by kbase_pm_power_changed) must
 	 * be called after the IRQ has been cleared. This is because it might
--- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c
+++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c
@@ -27,6 +27,9 @@
 #include <mali_kbase_hwaccess_backend.h>
 #include <mali_kbase_ctx_sched.h>
 #include <mali_kbase_reset_gpu.h>
+#include <mali_kbase_hwcnt_watchdog_if_timer.h>
+#include <mali_kbase_hwcnt_backend_jm.h>
+#include <mali_kbase_hwcnt_backend_jm_watchdog.h>

 #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
 #include <backend/gpu/mali_kbase_model_linux.h>
@@ -148,73 +151,115 @@ static void kbase_backend_late_term(struct kbase_device *kbdev)
 	kbase_hwaccess_pm_term(kbdev);
 }

-static int kbase_device_hwcnt_backend_jm_init(struct kbase_device *kbdev)
+/**
+ * kbase_device_hwcnt_watchdog_if_init - Create hardware counter watchdog
+ *                                       interface.
+ * @kbdev:	Device pointer
+ * Return: 0 on success, or an error code on failure.
+ */
+static int kbase_device_hwcnt_watchdog_if_init(struct kbase_device *kbdev)
 {
-	return kbase_hwcnt_backend_jm_create(kbdev, &kbdev->hwcnt_gpu_iface);
+	return kbase_hwcnt_watchdog_if_timer_create(&kbdev->hwcnt_watchdog_timer);
 }

+/**
+ * kbase_device_hwcnt_watchdog_if_term - Terminate hardware counter watchdog
+ *                                       interface.
+ * @kbdev:	Device pointer
+ */
+static void kbase_device_hwcnt_watchdog_if_term(struct kbase_device *kbdev)
+{
+	kbase_hwcnt_watchdog_if_timer_destroy(&kbdev->hwcnt_watchdog_timer);
+}
+
+/**
+ * kbase_device_hwcnt_backend_jm_init - Create hardware counter backend.
+ * @kbdev:	Device pointer
+ * Return: 0 on success, or an error code on failure.
+ */
+static int kbase_device_hwcnt_backend_jm_init(struct kbase_device *kbdev)
+{
+	return kbase_hwcnt_backend_jm_create(kbdev, &kbdev->hwcnt_gpu_jm_backend);
+}
+
+/**
+ * kbase_device_hwcnt_backend_jm_term - Terminate hardware counter backend.
+ * @kbdev:	Device pointer
+ */
 static void kbase_device_hwcnt_backend_jm_term(struct kbase_device *kbdev)
 {
-	kbase_hwcnt_backend_jm_destroy(&kbdev->hwcnt_gpu_iface);
+	kbase_hwcnt_backend_jm_destroy(&kbdev->hwcnt_gpu_jm_backend);
+}
+
+/**
+ * kbase_device_hwcnt_backend_jm_watchdog_init - Create hardware counter watchdog backend.
+ * @kbdev:	Device pointer
+ * Return: 0 on success, or an error code on failure.
+ */
+static int kbase_device_hwcnt_backend_jm_watchdog_init(struct kbase_device *kbdev)
+{
+	return kbase_hwcnt_backend_jm_watchdog_create(&kbdev->hwcnt_gpu_jm_backend,
+						      &kbdev->hwcnt_watchdog_timer,
+						      &kbdev->hwcnt_gpu_iface);
+}
+
+/**
+ * kbase_device_hwcnt_backend_jm_watchdog_term - Terminate hardware counter watchdog backend.
+ * @kbdev:	Device pointer
+ */
+static void kbase_device_hwcnt_backend_jm_watchdog_term(struct kbase_device *kbdev)
+{
+	kbase_hwcnt_backend_jm_watchdog_destroy(&kbdev->hwcnt_gpu_iface);
 }

 static const struct kbase_device_init dev_init[] = {
 #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
-	{ kbase_gpu_device_create, kbase_gpu_device_destroy,
-	  "Dummy model initialization failed" },
+	{ kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" },
 #else
 	{ assign_irqs, NULL, "IRQ search failed" },
 	{ registers_map, registers_unmap, "Register map failed" },
 #endif
 	{ kbase_device_io_history_init, kbase_device_io_history_term,
 	  "Register access history initialization failed" },
-	{ kbase_device_pm_init, kbase_device_pm_term,
-	  "Power management initialization failed" },
-	{ kbase_device_early_init, kbase_device_early_term,
-	  "Early device initialization failed" },
-	{ kbase_device_populate_max_freq, NULL,
-	  "Populating max frequency failed" },
+	{ kbase_device_pm_init, kbase_device_pm_term, "Power management initialization failed" },
+	{ kbase_device_early_init, kbase_device_early_term, "Early device initialization failed" },
+	{ kbase_device_populate_max_freq, NULL, "Populating max frequency failed" },
 	{ kbase_device_misc_init, kbase_device_misc_term,
 	  "Miscellaneous device initialization failed" },
 	{ kbase_device_pcm_dev_init, kbase_device_pcm_dev_term,
 	  "Priority control manager initialization failed" },
-	{ kbase_ctx_sched_init, kbase_ctx_sched_term,
-	  "Context scheduler initialization failed" },
-	{ kbase_mem_init, kbase_mem_term,
-	  "Memory subsystem initialization failed" },
+	{ kbase_ctx_sched_init, kbase_ctx_sched_term, "Context scheduler initialization failed" },
+	{ kbase_mem_init, kbase_mem_term, "Memory subsystem initialization failed" },
 	{ kbase_device_coherency_init, NULL, "Device coherency init failed" },
 	{ kbase_protected_mode_init, kbase_protected_mode_term,
 	  "Protected mode subsystem initialization failed" },
-	{ kbase_device_list_init, kbase_device_list_term,
-	  "Device list setup failed" },
-	{ kbasep_js_devdata_init, kbasep_js_devdata_term,
-	  "Job JS devdata initialization failed" },
+	{ kbase_device_list_init, kbase_device_list_term, "Device list setup failed" },
+	{ kbasep_js_devdata_init, kbasep_js_devdata_term, "Job JS devdata initialization failed" },
 	{ kbase_device_timeline_init, kbase_device_timeline_term,
 	  "Timeline stream initialization failed" },
 	{ kbase_clk_rate_trace_manager_init, kbase_clk_rate_trace_manager_term,
 	  "Clock rate trace manager initialization failed" },
-	{ kbase_lowest_gpu_freq_init, NULL,
-	  "Lowest freq initialization failed" },
+	{ kbase_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" },
 	{ kbase_instr_backend_init, kbase_instr_backend_term,
 	  "Instrumentation backend initialization failed" },
-	{ kbase_device_hwcnt_backend_jm_init,
-	  kbase_device_hwcnt_backend_jm_term,
+	{ kbase_device_hwcnt_watchdog_if_init, kbase_device_hwcnt_watchdog_if_term,
+	  "GPU hwcnt backend watchdog interface creation failed" },
+	{ kbase_device_hwcnt_backend_jm_init, kbase_device_hwcnt_backend_jm_term,
 	  "GPU hwcnt backend creation failed" },
+	{ kbase_device_hwcnt_backend_jm_watchdog_init, kbase_device_hwcnt_backend_jm_watchdog_term,
+	  "GPU hwcnt watchdog backend creation failed" },
 	{ kbase_device_hwcnt_context_init, kbase_device_hwcnt_context_term,
 	  "GPU hwcnt context initialization failed" },
-	{ kbase_device_hwcnt_virtualizer_init,
-	  kbase_device_hwcnt_virtualizer_term,
+	{ kbase_device_hwcnt_virtualizer_init, kbase_device_hwcnt_virtualizer_term,
 	  "GPU hwcnt virtualizer initialization failed" },
 	{ kbase_device_vinstr_init, kbase_device_vinstr_term,
 	  "Virtual instrumentation initialization failed" },
 	{ kbase_device_kinstr_prfcnt_init, kbase_device_kinstr_prfcnt_term,
 	  "Performance counter instrumentation initialization failed" },
-	{ kbase_backend_late_init, kbase_backend_late_term,
-	  "Late backend initialization failed" },
+	{ kbase_backend_late_init, kbase_backend_late_term, "Late backend initialization failed" },
 	{ kbase_debug_job_fault_dev_init, kbase_debug_job_fault_dev_term,
 	  "Job fault debug initialization failed" },
-	{ kbase_device_debugfs_init, kbase_device_debugfs_term,
-	  "DebugFS initialization failed" },
+	{ kbase_device_debugfs_init, kbase_device_debugfs_term, "DebugFS initialization failed" },
 	/* Sysfs init needs to happen before registering the device with
 	 * misc_register(), otherwise it causes a race condition between
 	 * registering the device and a uevent event being generated for
@@ -233,8 +278,7 @@ static const struct kbase_device_init dev_init[] = {
 	{ kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer,
 	  "GPU property population failed" },
 	{ NULL, kbase_dummy_job_wa_cleanup, NULL },
-	{ kbase_device_late_init, kbase_device_late_term,
-	  "Late device initialization failed" },
+	{ kbase_device_late_init, kbase_device_late_term, "Late device initialization failed" },
 };

 static void kbase_device_term_partial(struct kbase_device *kbdev,
--- a/drivers/gpu/arm/bifrost/device/mali_kbase_device.c
+++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -166,8 +166,11 @@ void kbase_device_pcm_dev_term(struct kbase_device *const kbdev)
 * @nb: notifier block - used to retrieve kbdev pointer
 * @action: action (unused)
 * @data: data pointer (unused)
+ *
 * This function simply lists memory usage by the Mali driver, per GPU device,
 * for diagnostic purposes.
+ *
+ * Return: NOTIFY_OK on success, NOTIFY_BAD otherwise.
 */
 static int mali_oom_notifier_handler(struct notifier_block *nb,
 				     unsigned long action, void *data)
@@ -189,7 +192,7 @@ static int mali_oom_notifier_handler(struct notifier_block *nb,

 	mutex_lock(&kbdev->kctx_list_lock);

-	list_for_each_entry (kctx, &kbdev->kctx_list, kctx_list_link) {
+	list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) {
 		struct pid *pid_struct;
 		struct task_struct *task;
 		unsigned long task_alloc_total =
@@ -483,6 +486,7 @@ int kbase_device_early_init(struct kbase_device *kbdev)
 {
 	int err;

+
 	err = kbasep_platform_device_init(kbdev);
 	if (err)
 		return err;
--- a/drivers/gpu/arm/bifrost/device/mali_kbase_device.h
+++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -23,7 +23,6 @@

 /**
 * kbase_device_get_list - get device list.
- *
 * Get access to device list.
 *
 * Return: Pointer to the linked list head.
@@ -55,18 +54,18 @@ void kbase_increment_device_id(void);
 * When a device file is opened for the first time,
 * load firmware and initialize hardware counter components.
 *
- * @return 0 on success. An error code on failure.
+ * Return: 0 on success. An error code on failure.
 */
 int kbase_device_firmware_init_once(struct kbase_device *kbdev);

 /**
 * kbase_device_init - Device initialisation.
 *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
 * This is called from device probe to initialise various other
 * components needed.
 *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- *
 * Return: 0 on success and non-zero value on failure.
 */
 int kbase_device_init(struct kbase_device *kbdev);
@@ -74,11 +73,10 @@ int kbase_device_init(struct kbase_device *kbdev);
 /**
 * kbase_device_term - Device termination.
 *
- * This is called from device remove to terminate various components that
- * were initialised during kbase_device_init.
- *
 * @kbdev: The kbase device structure for the device (must be a valid pointer)
 *
+ * This is called from device remove to terminate various components that
+ * were initialised during kbase_device_init.
 */
 void kbase_device_term(struct kbase_device *kbdev);

--- a/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c
+++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c
@@ -63,6 +63,7 @@ static int busy_wait_cache_clean_irq(struct kbase_device *kbdev)
 	}

 	/* Clear the interrupt CLEAN_CACHES_COMPLETED bit. */
+	KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, CLEAN_CACHES_COMPLETED);
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR),
 			CLEAN_CACHES_COMPLETED);

@@ -72,7 +73,6 @@ static int busy_wait_cache_clean_irq(struct kbase_device *kbdev)
 int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev,
 					u32 flush_op)
 {
-	u32 irq_mask;
 	int need_to_wake_up = 0;
 	int ret = 0;

@@ -81,17 +81,18 @@ int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev,
 	 */
 	lockdep_assert_held(&kbdev->hwaccess_lock);

-	/* 1. Check if CLEAN_CACHES_COMPLETED irq mask bit is set.
+	/* 1. Check if kbdev->cache_clean_in_progress is set.
 	 *    If it is set, it means there are threads waiting for
-	 *    CLEAN_CACHES_COMPLETED irq to be raised.
+	 *    CLEAN_CACHES_COMPLETED irq to be raised and that the
+	 *    corresponding irq mask bit is set.
 	 *    We'll clear the irq mask bit and busy-wait for the cache
 	 *    clean operation to complete before submitting the cache
 	 *    clean command required after the GPU page table update.
 	 *    Pended flush commands will be merged to requested command.
 	 */
-	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
-	if (irq_mask & CLEAN_CACHES_COMPLETED) {
+	if (kbdev->cache_clean_in_progress) {
 		/* disable irq first */
+		u32 irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
 		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
 				irq_mask & ~CLEAN_CACHES_COMPLETED);

@@ -182,22 +183,28 @@ void kbase_clean_caches_done(struct kbase_device *kbdev)

 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);

-	if (kbdev->cache_clean_queued) {
-		u32 pended_flush_op = kbdev->cache_clean_queued;
+	if (kbdev->cache_clean_in_progress) {
+		/* Clear the interrupt CLEAN_CACHES_COMPLETED bit if set.
+		 * It might have already been done by kbase_gpu_cache_flush_and_busy_wait.
+		 */
+		KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, CLEAN_CACHES_COMPLETED);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), CLEAN_CACHES_COMPLETED);

-		kbdev->cache_clean_queued = 0;
+		if (kbdev->cache_clean_queued) {
+			u32 pended_flush_op = kbdev->cache_clean_queued;

-		KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL,
-				 pended_flush_op);
-		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
-				pended_flush_op);
-	} else {
-		/* Disable interrupt */
-		irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
-		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
-				irq_mask & ~CLEAN_CACHES_COMPLETED);
+			kbdev->cache_clean_queued = 0;

-		kbase_gpu_cache_clean_wait_complete(kbdev);
+			KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, pended_flush_op);
+			kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), pended_flush_op);
+		} else {
+			/* Disable interrupt */
+			irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
+			kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+					irq_mask & ~CLEAN_CACHES_COMPLETED);
+
+			kbase_gpu_cache_clean_wait_complete(kbdev);
+		}
 	}

 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
--- a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_csf.c
+++ b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_csf.c
@@ -20,7 +20,7 @@
 */

 #include <mali_kbase.h>
-#include <uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h>
+#include <csf/mali_kbase_csf_registers.h>
 #include <gpu/mali_kbase_gpu_fault.h>

 const char *kbase_gpu_exception_name(u32 const exception_code)
--- a/include/uapi/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_csf.h
+++ b/include/uapi/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_csf.h
@@ -19,8 +19,8 @@
 *
 */

-#ifndef _UAPI_KBASE_GPU_REGMAP_CSF_H_
-#define _UAPI_KBASE_GPU_REGMAP_CSF_H_
+#ifndef _KBASE_GPU_REGMAP_CSF_H_
+#define _KBASE_GPU_REGMAP_CSF_H_

 #include <linux/types.h>

@@ -365,4 +365,4 @@
 /* GPU_CONTROL_MCU.GPU_IRQ_RAWSTAT */
 #define PRFCNT_SAMPLE_COMPLETED (1 << 16)   /* Set when performance count sample has completed */

-#endif /* _UAPI_KBASE_GPU_REGMAP_CSF_H_ */
+#endif /* _KBASE_GPU_REGMAP_CSF_H_ */
--- a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h
+++ b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h
@@ -0,0 +1,293 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+#ifndef _KBASE_GPU_REGMAP_JM_H_
+#define _KBASE_GPU_REGMAP_JM_H_
+
+#if MALI_USE_CSF && defined(__KERNEL__)
+#error "Cannot be compiled with CSF"
+#endif
+
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_WA       0x8Dull
+/* Set to inner non-cacheable, outer-non-cacheable
+ * Setting defined by the alloc bits is ignored, but set to a valid encoding:
+ * - no-alloc on read
+ * - no alloc on write
+ */
+#define AS_MEMATTR_AARCH64_NON_CACHEABLE  0x4Cull
+
+/* Symbols for default MEMATTR to use
+ * Default is - HW implementation defined caching
+ */
+#define AS_MEMATTR_INDEX_DEFAULT               0
+#define AS_MEMATTR_INDEX_DEFAULT_ACE           3
+
+/* HW implementation defined caching */
+#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0
+/* Force cache on */
+#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL    1
+/* Write-alloc */
+#define AS_MEMATTR_INDEX_WRITE_ALLOC           2
+/* Outer coherent, inner implementation defined policy */
+#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF        3
+/* Outer coherent, write alloc inner */
+#define AS_MEMATTR_INDEX_OUTER_WA              4
+/* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */
+#define AS_MEMATTR_INDEX_NON_CACHEABLE         5
+
+/* GPU control registers */
+
+#define CORE_FEATURES           0x008   /* (RO) Shader Core Features */
+#define JS_PRESENT              0x01C   /* (RO) Job slots present */
+
+#define PRFCNT_BASE_LO   0x060  /* (RW) Performance counter memory
+				 * region base address, low word
+				 */
+#define PRFCNT_BASE_HI   0x064  /* (RW) Performance counter memory
+				 * region base address, high word
+				 */
+#define PRFCNT_CONFIG    0x068  /* (RW) Performance counter
+				 * configuration
+				 */
+#define PRFCNT_JM_EN     0x06C  /* (RW) Performance counter enable
+				 * flags for Job Manager
+				 */
+#define PRFCNT_SHADER_EN 0x070  /* (RW) Performance counter enable
+				 * flags for shader cores
+				 */
+#define PRFCNT_TILER_EN  0x074  /* (RW) Performance counter enable
+				 * flags for tiler
+				 */
+#define PRFCNT_MMU_L2_EN 0x07C  /* (RW) Performance counter enable
+				 * flags for MMU/L2 cache
+				 */
+
+#define JS0_FEATURES            0x0C0   /* (RO) Features of job slot 0 */
+#define JS1_FEATURES            0x0C4   /* (RO) Features of job slot 1 */
+#define JS2_FEATURES            0x0C8   /* (RO) Features of job slot 2 */
+#define JS3_FEATURES            0x0CC   /* (RO) Features of job slot 3 */
+#define JS4_FEATURES            0x0D0   /* (RO) Features of job slot 4 */
+#define JS5_FEATURES            0x0D4   /* (RO) Features of job slot 5 */
+#define JS6_FEATURES            0x0D8   /* (RO) Features of job slot 6 */
+#define JS7_FEATURES            0x0DC   /* (RO) Features of job slot 7 */
+#define JS8_FEATURES            0x0E0   /* (RO) Features of job slot 8 */
+#define JS9_FEATURES            0x0E4   /* (RO) Features of job slot 9 */
+#define JS10_FEATURES           0x0E8   /* (RO) Features of job slot 10 */
+#define JS11_FEATURES           0x0EC   /* (RO) Features of job slot 11 */
+#define JS12_FEATURES           0x0F0   /* (RO) Features of job slot 12 */
+#define JS13_FEATURES           0x0F4   /* (RO) Features of job slot 13 */
+#define JS14_FEATURES           0x0F8   /* (RO) Features of job slot 14 */
+#define JS15_FEATURES           0x0FC   /* (RO) Features of job slot 15 */
+
+#define JS_FEATURES_REG(n)      GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2))
+
+#define JM_CONFIG               0xF00   /* (RW) Job manager configuration (implementation-specific) */
+
+/* Job control registers */
+
+#define JOB_IRQ_JS_STATE        0x010   /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */
+#define JOB_IRQ_THROTTLE        0x014   /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt.  */
+
+#define JOB_SLOT0               0x800   /* Configuration registers for job slot 0 */
+#define JOB_SLOT1               0x880   /* Configuration registers for job slot 1 */
+#define JOB_SLOT2               0x900   /* Configuration registers for job slot 2 */
+#define JOB_SLOT3               0x980   /* Configuration registers for job slot 3 */
+#define JOB_SLOT4               0xA00   /* Configuration registers for job slot 4 */
+#define JOB_SLOT5               0xA80   /* Configuration registers for job slot 5 */
+#define JOB_SLOT6               0xB00   /* Configuration registers for job slot 6 */
+#define JOB_SLOT7               0xB80   /* Configuration registers for job slot 7 */
+#define JOB_SLOT8               0xC00   /* Configuration registers for job slot 8 */
+#define JOB_SLOT9               0xC80   /* Configuration registers for job slot 9 */
+#define JOB_SLOT10              0xD00   /* Configuration registers for job slot 10 */
+#define JOB_SLOT11              0xD80   /* Configuration registers for job slot 11 */
+#define JOB_SLOT12              0xE00   /* Configuration registers for job slot 12 */
+#define JOB_SLOT13              0xE80   /* Configuration registers for job slot 13 */
+#define JOB_SLOT14              0xF00   /* Configuration registers for job slot 14 */
+#define JOB_SLOT15              0xF80   /* Configuration registers for job slot 15 */
+
+#define JOB_SLOT_REG(n, r)      (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
+
+#define JS_HEAD_LO             0x00	/* (RO) Job queue head pointer for job slot n, low word */
+#define JS_HEAD_HI             0x04	/* (RO) Job queue head pointer for job slot n, high word */
+#define JS_TAIL_LO             0x08	/* (RO) Job queue tail pointer for job slot n, low word */
+#define JS_TAIL_HI             0x0C	/* (RO) Job queue tail pointer for job slot n, high word */
+#define JS_AFFINITY_LO         0x10	/* (RO) Core affinity mask for job slot n, low word */
+#define JS_AFFINITY_HI         0x14	/* (RO) Core affinity mask for job slot n, high word */
+#define JS_CONFIG              0x18	/* (RO) Configuration settings for job slot n */
+/* (RO) Extended affinity mask for job slot n*/
+#define JS_XAFFINITY           0x1C
+
+#define JS_COMMAND             0x20	/* (WO) Command register for job slot n */
+#define JS_STATUS              0x24	/* (RO) Status register for job slot n */
+
+#define JS_HEAD_NEXT_LO        0x40	/* (RW) Next job queue head pointer for job slot n, low word */
+#define JS_HEAD_NEXT_HI        0x44	/* (RW) Next job queue head pointer for job slot n, high word */
+
+#define JS_AFFINITY_NEXT_LO    0x50	/* (RW) Next core affinity mask for job slot n, low word */
+#define JS_AFFINITY_NEXT_HI    0x54	/* (RW) Next core affinity mask for job slot n, high word */
+#define JS_CONFIG_NEXT         0x58	/* (RW) Next configuration settings for job slot n */
+/* (RW) Next extended affinity mask for job slot n */
+#define JS_XAFFINITY_NEXT      0x5C
+
+#define JS_COMMAND_NEXT        0x60	/* (RW) Next command register for job slot n */
+
+#define JS_FLUSH_ID_NEXT       0x70	/* (RW) Next job slot n cache flush ID */
+
+/* No JM-specific MMU control registers */
+/* No JM-specific MMU address space control registers */
+
+/* JS_COMMAND register commands */
+#define JS_COMMAND_NOP         0x00	/* NOP Operation. Writing this value is ignored */
+#define JS_COMMAND_START       0x01	/* Start processing a job chain. Writing this value is ignored */
+#define JS_COMMAND_SOFT_STOP   0x02	/* Gently stop processing a job chain */
+#define JS_COMMAND_HARD_STOP   0x03	/* Rudely stop processing a job chain */
+#define JS_COMMAND_SOFT_STOP_0 0x04	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_HARD_STOP_0 0x05	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_SOFT_STOP_1 0x06	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */
+#define JS_COMMAND_HARD_STOP_1 0x07	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */
+
+#define JS_COMMAND_MASK        0x07    /* Mask of bits currently in use by the HW */
+
+/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */
+#define JS_CONFIG_START_FLUSH_NO_ACTION        (0u << 0)
+#define JS_CONFIG_START_FLUSH_CLEAN            (1u << 8)
+#define JS_CONFIG_START_FLUSH_INV_SHADER_OTHER (2u << 8)
+#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8)
+#define JS_CONFIG_START_MMU                    (1u << 10)
+#define JS_CONFIG_JOB_CHAIN_FLAG               (1u << 11)
+#define JS_CONFIG_END_FLUSH_NO_ACTION          JS_CONFIG_START_FLUSH_NO_ACTION
+#define JS_CONFIG_END_FLUSH_CLEAN              (1u << 12)
+#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE   (3u << 12)
+#define JS_CONFIG_ENABLE_FLUSH_REDUCTION       (1u << 14)
+#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK     (1u << 15)
+#define JS_CONFIG_THREAD_PRI(n)                ((n) << 16)
+
+/* JS_XAFFINITY register values */
+#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0)
+#define JS_XAFFINITY_TILER_ENABLE     (1u << 8)
+#define JS_XAFFINITY_CACHE_ENABLE     (1u << 16)
+
+/* JS_STATUS register values */
+
+/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h.
+ * The values are separated to avoid dependency of userspace and kernel code.
+ */
+
+/* Group of values representing the job status instead of a particular fault */
+#define JS_STATUS_NO_EXCEPTION_BASE   0x00
+#define JS_STATUS_INTERRUPTED         (JS_STATUS_NO_EXCEPTION_BASE + 0x02)	/* 0x02 means INTERRUPTED */
+#define JS_STATUS_STOPPED             (JS_STATUS_NO_EXCEPTION_BASE + 0x03)	/* 0x03 means STOPPED */
+#define JS_STATUS_TERMINATED          (JS_STATUS_NO_EXCEPTION_BASE + 0x04)	/* 0x04 means TERMINATED */
+
+/* General fault values */
+#define JS_STATUS_FAULT_BASE          0x40
+#define JS_STATUS_CONFIG_FAULT        (JS_STATUS_FAULT_BASE)	/* 0x40 means CONFIG FAULT */
+#define JS_STATUS_POWER_FAULT         (JS_STATUS_FAULT_BASE + 0x01)	/* 0x41 means POWER FAULT */
+#define JS_STATUS_READ_FAULT          (JS_STATUS_FAULT_BASE + 0x02)	/* 0x42 means READ FAULT */
+#define JS_STATUS_WRITE_FAULT         (JS_STATUS_FAULT_BASE + 0x03)	/* 0x43 means WRITE FAULT */
+#define JS_STATUS_AFFINITY_FAULT      (JS_STATUS_FAULT_BASE + 0x04)	/* 0x44 means AFFINITY FAULT */
+#define JS_STATUS_BUS_FAULT           (JS_STATUS_FAULT_BASE + 0x08)	/* 0x48 means BUS FAULT */
+
+/* Instruction or data faults */
+#define JS_STATUS_INSTRUCTION_FAULT_BASE  0x50
+#define JS_STATUS_INSTR_INVALID_PC        (JS_STATUS_INSTRUCTION_FAULT_BASE)	/* 0x50 means INSTR INVALID PC */
+#define JS_STATUS_INSTR_INVALID_ENC       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01)	/* 0x51 means INSTR INVALID ENC */
+#define JS_STATUS_INSTR_TYPE_MISMATCH     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02)	/* 0x52 means INSTR TYPE MISMATCH */
+#define JS_STATUS_INSTR_OPERAND_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03)	/* 0x53 means INSTR OPERAND FAULT */
+#define JS_STATUS_INSTR_TLS_FAULT         (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04)	/* 0x54 means INSTR TLS FAULT */
+#define JS_STATUS_INSTR_BARRIER_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05)	/* 0x55 means INSTR BARRIER FAULT */
+#define JS_STATUS_INSTR_ALIGN_FAULT       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06)	/* 0x56 means INSTR ALIGN FAULT */
+/* NOTE: No fault with 0x57 code defined in spec. */
+#define JS_STATUS_DATA_INVALID_FAULT      (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08)	/* 0x58 means DATA INVALID FAULT */
+#define JS_STATUS_TILE_RANGE_FAULT        (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09)	/* 0x59 means TILE RANGE FAULT */
+#define JS_STATUS_ADDRESS_RANGE_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A)	/* 0x5A means ADDRESS RANGE FAULT */
+
+/* Other faults */
+#define JS_STATUS_MEMORY_FAULT_BASE   0x60
+#define JS_STATUS_OUT_OF_MEMORY       (JS_STATUS_MEMORY_FAULT_BASE)	/* 0x60 means OUT OF MEMORY */
+#define JS_STATUS_UNKNOWN             0x7F	/* 0x7F means UNKNOWN */
+
+/* JS<n>_FEATURES register */
+#define JS_FEATURE_NULL_JOB              (1u << 1)
+#define JS_FEATURE_SET_VALUE_JOB         (1u << 2)
+#define JS_FEATURE_CACHE_FLUSH_JOB       (1u << 3)
+#define JS_FEATURE_COMPUTE_JOB           (1u << 4)
+#define JS_FEATURE_VERTEX_JOB            (1u << 5)
+#define JS_FEATURE_GEOMETRY_JOB          (1u << 6)
+#define JS_FEATURE_TILER_JOB             (1u << 7)
+#define JS_FEATURE_FUSED_JOB             (1u << 8)
+#define JS_FEATURE_FRAGMENT_JOB          (1u << 9)
+
+/* JM_CONFIG register */
+#define JM_TIMESTAMP_OVERRIDE  (1ul << 0)
+#define JM_CLOCK_GATE_OVERRIDE (1ul << 1)
+#define JM_JOB_THROTTLE_ENABLE (1ul << 2)
+#define JM_JOB_THROTTLE_LIMIT_SHIFT (3)
+#define JM_MAX_JOB_THROTTLE_LIMIT (0x3F)
+#define JM_FORCE_COHERENCY_FEATURES_SHIFT (2)
+
+/* GPU_COMMAND values */
+#define GPU_COMMAND_NOP                0x00 /* No operation, nothing happens */
+#define GPU_COMMAND_SOFT_RESET         0x01 /* Stop all external bus interfaces, and then reset the entire GPU. */
+#define GPU_COMMAND_HARD_RESET         0x02 /* Immediately reset the entire GPU. */
+#define GPU_COMMAND_PRFCNT_CLEAR       0x03 /* Clear all performance counters, setting them all to zero. */
+#define GPU_COMMAND_PRFCNT_SAMPLE      0x04 /* Sample all performance counters, writing them out to memory */
+#define GPU_COMMAND_CYCLE_COUNT_START  0x05 /* Starts the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CYCLE_COUNT_STOP   0x06 /* Stops the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CLEAN_CACHES       0x07 /* Clean all caches */
+#define GPU_COMMAND_CLEAN_INV_CACHES   0x08 /* Clean and invalidate all caches */
+#define GPU_COMMAND_SET_PROTECTED_MODE 0x09 /* Places the GPU in protected mode */
+
+/* GPU_COMMAND cache flush alias to CSF command payload */
+#define GPU_COMMAND_CACHE_CLN_INV_L2 GPU_COMMAND_CLEAN_INV_CACHES
+#define GPU_COMMAND_CACHE_CLN_INV_L2_LSC GPU_COMMAND_CLEAN_INV_CACHES
+#define GPU_COMMAND_CACHE_CLN_INV_FULL GPU_COMMAND_CLEAN_INV_CACHES
+
+/* Merge cache flush commands */
+#define GPU_COMMAND_FLUSH_CACHE_MERGE(cmd1, cmd2)                              \
+	((cmd1) > (cmd2) ? (cmd1) : (cmd2))
+
+/* IRQ flags */
+#define GPU_FAULT               (1 << 0)    /* A GPU Fault has occurred */
+#define MULTIPLE_GPU_FAULTS     (1 << 7)    /* More than one GPU Fault occurred.  */
+#define RESET_COMPLETED         (1 << 8)    /* Set when a reset has completed.  */
+#define POWER_CHANGED_SINGLE    (1 << 9)    /* Set when a single core has finished powering up or down. */
+#define POWER_CHANGED_ALL       (1 << 10)   /* Set when all cores have finished powering up or down. */
+#define PRFCNT_SAMPLE_COMPLETED (1 << 16)   /* Set when a performance count sample has completed. */
+#define CLEAN_CACHES_COMPLETED  (1 << 17)   /* Set when a cache clean operation has completed. */
+
+/*
+ * In Debug build,
+ * GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE is used to clear and enable interupts sources of GPU_IRQ
+ * by writing it onto GPU_IRQ_CLEAR/MASK registers.
+ *
+ * In Release build,
+ * GPU_IRQ_REG_COMMON is used.
+ *
+ * Note:
+ * CLEAN_CACHES_COMPLETED - Used separately for cache operation.
+ */
+#define GPU_IRQ_REG_COMMON (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \
+		| POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED)
+
+#endif /* _KBASE_GPU_REGMAP_JM_H_ */
--- a/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_fault.h
+++ b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_fault.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -23,8 +23,8 @@
 #define _KBASE_GPU_FAULT_H_

 /**
- * kbase_gpu_exception_name() -
- * Returns the name associated with a Mali exception code
+ * kbase_gpu_exception_name() - Returns associated string of the exception code
+ *
 * @exception_code: exception code
 *
 * This function is called from the interrupt handler when a GPU fault occurs.
--- a/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h
+++ b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h
@@ -23,6 +23,565 @@
 #define _KBASE_GPU_REGMAP_H_

 #include <uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h>
+#include <uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_coherency.h>
+#include <uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h>
+#if MALI_USE_CSF
+#include "backend/mali_kbase_gpu_regmap_csf.h"
+#else
+#include "backend/mali_kbase_gpu_regmap_jm.h"
+#endif
+
+/* GPU_U definition */
+#ifdef __ASSEMBLER__
+#define GPU_U(x) x
+#else
+#define GPU_U(x) x##u
+#endif /* __ASSEMBLER__ */
+
+/* Begin Register Offsets */
+/* GPU control registers */
+
+#define GPU_CONTROL_BASE        0x0000
+#define GPU_CONTROL_REG(r)      (GPU_CONTROL_BASE + (r))
+#define GPU_ID                  0x000   /* (RO) GPU and revision identifier */
+#define L2_FEATURES             0x004   /* (RO) Level 2 cache features */
+#define TILER_FEATURES          0x00C   /* (RO) Tiler Features */
+#define MEM_FEATURES            0x010   /* (RO) Memory system features */
+#define MMU_FEATURES            0x014   /* (RO) MMU features */
+#define AS_PRESENT              0x018   /* (RO) Address space slots present */
+#define GPU_IRQ_RAWSTAT         0x020   /* (RW) */
+#define GPU_IRQ_CLEAR           0x024   /* (WO) */
+#define GPU_IRQ_MASK            0x028   /* (RW) */
+#define GPU_IRQ_STATUS          0x02C   /* (RO) */
+
+#define GPU_COMMAND             0x030   /* (WO) */
+#define GPU_STATUS              0x034   /* (RO) */
+
+#define GPU_DBGEN               (1 << 8)    /* DBGEN wire status */
+
+#define GPU_FAULTSTATUS         0x03C   /* (RO) GPU exception type and fault status */
+#define GPU_FAULTADDRESS_LO     0x040   /* (RO) GPU exception fault address, low word */
+#define GPU_FAULTADDRESS_HI     0x044   /* (RO) GPU exception fault address, high word */
+
+#define L2_CONFIG               0x048   /* (RW) Level 2 cache configuration */
+
+#define GROUPS_L2_COHERENT      (1 << 0) /* Cores groups are l2 coherent */
+#define SUPER_L2_COHERENT       (1 << 1) /* Shader cores within a core
+					  * supergroup are l2 coherent
+					  */
+
+#define PWR_KEY                 0x050   /* (WO) Power manager key register */
+#define PWR_OVERRIDE0           0x054   /* (RW) Power manager override settings */
+#define PWR_OVERRIDE1           0x058   /* (RW) Power manager override settings */
+#define GPU_FEATURES_LO         0x060   /* (RO) GPU features, low word */
+#define GPU_FEATURES_HI         0x064   /* (RO) GPU features, high word */
+#define PRFCNT_FEATURES         0x068   /* (RO) Performance counter features */
+#define TIMESTAMP_OFFSET_LO     0x088   /* (RW) Global time stamp offset, low word */
+#define TIMESTAMP_OFFSET_HI     0x08C   /* (RW) Global time stamp offset, high word */
+#define CYCLE_COUNT_LO          0x090   /* (RO) Cycle counter, low word */
+#define CYCLE_COUNT_HI          0x094   /* (RO) Cycle counter, high word */
+#define TIMESTAMP_LO            0x098   /* (RO) Global time stamp counter, low word */
+#define TIMESTAMP_HI            0x09C   /* (RO) Global time stamp counter, high word */
+
+#define THREAD_MAX_THREADS      0x0A0   /* (RO) Maximum number of threads per core */
+#define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */
+#define THREAD_MAX_BARRIER_SIZE 0x0A8   /* (RO) Maximum threads waiting at a barrier */
+#define THREAD_FEATURES         0x0AC   /* (RO) Thread features */
+#define THREAD_TLS_ALLOC        0x310   /* (RO) Number of threads per core that TLS must be allocated for */
+
+#define TEXTURE_FEATURES_0      0x0B0   /* (RO) Support flags for indexed texture formats 0..31 */
+#define TEXTURE_FEATURES_1      0x0B4   /* (RO) Support flags for indexed texture formats 32..63 */
+#define TEXTURE_FEATURES_2      0x0B8   /* (RO) Support flags for indexed texture formats 64..95 */
+#define TEXTURE_FEATURES_3      0x0BC   /* (RO) Support flags for texture order */
+
+#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2))
+
+#define SHADER_PRESENT_LO       0x100   /* (RO) Shader core present bitmap, low word */
+#define SHADER_PRESENT_HI       0x104   /* (RO) Shader core present bitmap, high word */
+
+#define TILER_PRESENT_LO        0x110   /* (RO) Tiler core present bitmap, low word */
+#define TILER_PRESENT_HI        0x114   /* (RO) Tiler core present bitmap, high word */
+
+#define L2_PRESENT_LO           0x120   /* (RO) Level 2 cache present bitmap, low word */
+#define L2_PRESENT_HI           0x124   /* (RO) Level 2 cache present bitmap, high word */
+
+#define STACK_PRESENT_LO        0xE00   /* (RO) Core stack present bitmap, low word */
+#define STACK_PRESENT_HI        0xE04   /* (RO) Core stack present bitmap, high word */
+
+#define SHADER_READY_LO         0x140   /* (RO) Shader core ready bitmap, low word */
+#define SHADER_READY_HI         0x144   /* (RO) Shader core ready bitmap, high word */
+
+#define TILER_READY_LO          0x150   /* (RO) Tiler core ready bitmap, low word */
+#define TILER_READY_HI          0x154   /* (RO) Tiler core ready bitmap, high word */
+
+#define L2_READY_LO             0x160   /* (RO) Level 2 cache ready bitmap, low word */
+#define L2_READY_HI             0x164   /* (RO) Level 2 cache ready bitmap, high word */
+
+#define STACK_READY_LO          0xE10   /* (RO) Core stack ready bitmap, low word */
+#define STACK_READY_HI          0xE14   /* (RO) Core stack ready bitmap, high word */
+
+#define SHADER_PWRON_LO         0x180   /* (WO) Shader core power on bitmap, low word */
+#define SHADER_PWRON_HI         0x184   /* (WO) Shader core power on bitmap, high word */
+
+#define TILER_PWRON_LO          0x190   /* (WO) Tiler core power on bitmap, low word */
+#define TILER_PWRON_HI          0x194   /* (WO) Tiler core power on bitmap, high word */
+
+#define L2_PWRON_LO             0x1A0   /* (WO) Level 2 cache power on bitmap, low word */
+#define L2_PWRON_HI             0x1A4   /* (WO) Level 2 cache power on bitmap, high word */
+
+#define STACK_PWRON_LO          0xE20   /* (RO) Core stack power on bitmap, low word */
+#define STACK_PWRON_HI          0xE24   /* (RO) Core stack power on bitmap, high word */
+
+#define SHADER_PWROFF_LO        0x1C0   /* (WO) Shader core power off bitmap, low word */
+#define SHADER_PWROFF_HI        0x1C4   /* (WO) Shader core power off bitmap, high word */
+
+#define TILER_PWROFF_LO         0x1D0   /* (WO) Tiler core power off bitmap, low word */
+#define TILER_PWROFF_HI         0x1D4   /* (WO) Tiler core power off bitmap, high word */
+
+#define L2_PWROFF_LO            0x1E0   /* (WO) Level 2 cache power off bitmap, low word */
+#define L2_PWROFF_HI            0x1E4   /* (WO) Level 2 cache power off bitmap, high word */
+
+#define STACK_PWROFF_LO         0xE30   /* (RO) Core stack power off bitmap, low word */
+#define STACK_PWROFF_HI         0xE34   /* (RO) Core stack power off bitmap, high word */
+
+#define SHADER_PWRTRANS_LO      0x200   /* (RO) Shader core power transition bitmap, low word */
+#define SHADER_PWRTRANS_HI      0x204   /* (RO) Shader core power transition bitmap, high word */
+
+#define TILER_PWRTRANS_LO       0x210   /* (RO) Tiler core power transition bitmap, low word */
+#define TILER_PWRTRANS_HI       0x214   /* (RO) Tiler core power transition bitmap, high word */
+
+#define L2_PWRTRANS_LO          0x220   /* (RO) Level 2 cache power transition bitmap, low word */
+#define L2_PWRTRANS_HI          0x224   /* (RO) Level 2 cache power transition bitmap, high word */
+
+#define ASN_HASH_0              0x02C0 /* (RW) ASN hash function argument 0 */
+#define ASN_HASH(n)             (ASN_HASH_0 + (n)*4)
+#define ASN_HASH_COUNT          3
+
+#define SYSC_ALLOC0             0x0340 /* (RW) System cache allocation hint from source ID */
+#define SYSC_ALLOC(n) (SYSC_ALLOC0 + (n)*4)
+#define SYSC_ALLOC_COUNT 8
+
+#define STACK_PWRTRANS_LO       0xE40   /* (RO) Core stack power transition bitmap, low word */
+#define STACK_PWRTRANS_HI       0xE44   /* (RO) Core stack power transition bitmap, high word */
+
+#define SHADER_PWRACTIVE_LO     0x240   /* (RO) Shader core active bitmap, low word */
+#define SHADER_PWRACTIVE_HI     0x244   /* (RO) Shader core active bitmap, high word */
+
+#define TILER_PWRACTIVE_LO      0x250   /* (RO) Tiler core active bitmap, low word */
+#define TILER_PWRACTIVE_HI      0x254   /* (RO) Tiler core active bitmap, high word */
+
+#define L2_PWRACTIVE_LO         0x260   /* (RO) Level 2 cache active bitmap, low word */
+#define L2_PWRACTIVE_HI         0x264   /* (RO) Level 2 cache active bitmap, high word */
+
+#define COHERENCY_FEATURES      0x300   /* (RO) Coherency features present */
+#define COHERENCY_ENABLE        0x304   /* (RW) Coherency enable */
+
+
+#define SHADER_CONFIG           0xF04   /* (RW) Shader core configuration (implementation-specific) */
+#define TILER_CONFIG            0xF08   /* (RW) Tiler core configuration (implementation-specific) */
+#define L2_MMU_CONFIG           0xF0C   /* (RW) L2 cache and MMU configuration (implementation-specific) */
+
+/* Job control registers */
+
+#define JOB_CONTROL_BASE        0x1000
+
+#define JOB_CONTROL_REG(r)      (JOB_CONTROL_BASE + (r))
+
+#define JOB_IRQ_RAWSTAT         0x000   /* Raw interrupt status register */
+#define JOB_IRQ_CLEAR           0x004   /* Interrupt clear register */
+#define JOB_IRQ_MASK            0x008   /* Interrupt mask register */
+#define JOB_IRQ_STATUS          0x00C   /* Interrupt status register */
+
+/* MMU control registers */
+
+#define MMU_IRQ_CLEAR           0x004   /* (WO) Interrupt clear register */
+#define MMU_IRQ_MASK            0x008   /* (RW) Interrupt mask register */
+#define MMU_IRQ_STATUS          0x00C   /* (RO) Interrupt status register */
+
+#define MMU_AS0                 0x400   /* Configuration registers for address space 0 */
+#define MMU_AS1                 0x440   /* Configuration registers for address space 1 */
+#define MMU_AS2                 0x480   /* Configuration registers for address space 2 */
+#define MMU_AS3                 0x4C0   /* Configuration registers for address space 3 */
+#define MMU_AS4                 0x500   /* Configuration registers for address space 4 */
+#define MMU_AS5                 0x540   /* Configuration registers for address space 5 */
+#define MMU_AS6                 0x580   /* Configuration registers for address space 6 */
+#define MMU_AS7                 0x5C0   /* Configuration registers for address space 7 */
+#define MMU_AS8                 0x600   /* Configuration registers for address space 8 */
+#define MMU_AS9                 0x640   /* Configuration registers for address space 9 */
+#define MMU_AS10                0x680   /* Configuration registers for address space 10 */
+#define MMU_AS11                0x6C0   /* Configuration registers for address space 11 */
+#define MMU_AS12                0x700   /* Configuration registers for address space 12 */
+#define MMU_AS13                0x740   /* Configuration registers for address space 13 */
+#define MMU_AS14                0x780   /* Configuration registers for address space 14 */
+#define MMU_AS15                0x7C0   /* Configuration registers for address space 15 */
+
+/* MMU address space control registers */
+
+#define MMU_AS_REG(n, r)        (MMU_REG(MMU_AS0 + ((n) << 6)) + (r))
+
+#define AS_TRANSTAB_LO         0x00	/* (RW) Translation Table Base Address for address space n, low word */
+#define AS_TRANSTAB_HI         0x04	/* (RW) Translation Table Base Address for address space n, high word */
+#define AS_MEMATTR_LO          0x08	/* (RW) Memory attributes for address space n, low word. */
+#define AS_MEMATTR_HI          0x0C	/* (RW) Memory attributes for address space n, high word. */
+#define AS_LOCKADDR_LO         0x10	/* (RW) Lock region address for address space n, low word */
+#define AS_LOCKADDR_HI         0x14	/* (RW) Lock region address for address space n, high word */
+#define AS_COMMAND             0x18	/* (WO) MMU command register for address space n */
+#define AS_FAULTSTATUS         0x1C	/* (RO) MMU fault status register for address space n */
+#define AS_FAULTADDRESS_LO     0x20	/* (RO) Fault Address for address space n, low word */
+#define AS_FAULTADDRESS_HI     0x24	/* (RO) Fault Address for address space n, high word */
+#define AS_STATUS              0x28	/* (RO) Status flags for address space n */
+
+/* (RW) Translation table configuration for address space n, low word */
+#define AS_TRANSCFG_LO         0x30
+/* (RW) Translation table configuration for address space n, high word */
+#define AS_TRANSCFG_HI         0x34
+/* (RO) Secondary fault address for address space n, low word */
+#define AS_FAULTEXTRA_LO       0x38
+/* (RO) Secondary fault address for address space n, high word */
+#define AS_FAULTEXTRA_HI       0x3C
+
+/* End Register Offsets */
+
+#define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON)
+
+/*
+ * MMU_IRQ_RAWSTAT register values. Values are valid also for
+ * MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers.
+ */
+
+#define MMU_PAGE_FAULT_FLAGS    16
+
+/* Macros returning a bitmask to retrieve page fault or bus error flags from
+ * MMU registers
+ */
+#define MMU_PAGE_FAULT(n)       (1UL << (n))
+#define MMU_BUS_ERROR(n)        (1UL << ((n) + MMU_PAGE_FAULT_FLAGS))
+
+/*
+ * Begin AARCH64 MMU TRANSTAB register values
+ */
+#define MMU_HW_OUTA_BITS 40
+#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4))
+
+/*
+ * Begin MMU STATUS register values
+ */
+#define AS_STATUS_AS_ACTIVE 0x01
+
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK                      (0x7<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT         (0x0<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT          (0x1<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT        (0x2<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG               (0x3<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT        (0x4<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT   (0x5<<3)
+
+#define AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0
+#define AS_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFF << AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT)
+#define AS_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \
+	(((reg_val)&AS_FAULTSTATUS_EXCEPTION_TYPE_MASK) >> AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT)
+#define AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_0 0xC0
+
+#define AS_FAULTSTATUS_ACCESS_TYPE_SHIFT 8
+#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3 << AS_FAULTSTATUS_ACCESS_TYPE_SHIFT)
+#define AS_FAULTSTATUS_ACCESS_TYPE_GET(reg_val) \
+	(((reg_val)&AS_FAULTSTATUS_ACCESS_TYPE_MASK) >> AS_FAULTSTATUS_ACCESS_TYPE_SHIFT)
+
+#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC       (0x0)
+#define AS_FAULTSTATUS_ACCESS_TYPE_EX           (0x1)
+#define AS_FAULTSTATUS_ACCESS_TYPE_READ         (0x2)
+#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE        (0x3)
+
+#define AS_FAULTSTATUS_SOURCE_ID_SHIFT 16
+#define AS_FAULTSTATUS_SOURCE_ID_MASK (0xFFFF << AS_FAULTSTATUS_SOURCE_ID_SHIFT)
+#define AS_FAULTSTATUS_SOURCE_ID_GET(reg_val) \
+	(((reg_val)&AS_FAULTSTATUS_SOURCE_ID_MASK) >> AS_FAULTSTATUS_SOURCE_ID_SHIFT)
+
+#define PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_SHIFT (0)
+#define PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_MASK                                \
+	((0xFF) << PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_SHIFT)
+#define PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET(reg_val)                        \
+	(((reg_val)&PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_MASK) >>                \
+	 PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_SHIFT)
+
+/*
+ * Begin MMU TRANSCFG register values
+ */
+#define AS_TRANSCFG_ADRMODE_LEGACY      0
+#define AS_TRANSCFG_ADRMODE_UNMAPPED    1
+#define AS_TRANSCFG_ADRMODE_IDENTITY    2
+#define AS_TRANSCFG_ADRMODE_AARCH64_4K  6
+#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8
+
+#define AS_TRANSCFG_ADRMODE_MASK        0xF
+
+/*
+ * Begin TRANSCFG register values
+ */
+#define AS_TRANSCFG_PTW_MEMATTR_MASK (3ull << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1ull << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2ull << 24)
+
+#define AS_TRANSCFG_PTW_SH_MASK ((3ull << 28))
+#define AS_TRANSCFG_PTW_SH_OS (2ull << 28)
+#define AS_TRANSCFG_PTW_SH_IS (3ull << 28)
+#define AS_TRANSCFG_R_ALLOCATE (1ull << 30)
+
+/*
+ * Begin Command Values
+ */
+
+/* AS_COMMAND register commands */
+#define AS_COMMAND_NOP         0x00	/* NOP Operation */
+#define AS_COMMAND_UPDATE      0x01	/* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */
+#define AS_COMMAND_LOCK        0x02	/* Issue a lock region command to all MMUs */
+#define AS_COMMAND_UNLOCK      0x03	/* Issue a flush region command to all MMUs */
+/* Flush all L2 caches then issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH_PT 0x04
+/* Wait for memory accesses to complete, flush all the L1s cache then flush all
+ * L2 caches then issue a flush region command to all MMUs
+ */
+#define AS_COMMAND_FLUSH_MEM 0x05
+
+/* AS_LOCKADDR register */
+#define AS_LOCKADDR_LOCKADDR_SIZE_SHIFT GPU_U(0)
+#define AS_LOCKADDR_LOCKADDR_SIZE_MASK                                         \
+	(GPU_U(0x3F) << AS_LOCKADDR_LOCKADDR_SIZE_SHIFT)
+#define AS_LOCKADDR_LOCKADDR_SIZE_GET(reg_val)                                 \
+	(((reg_val)&AS_LOCKADDR_LOCKADDR_SIZE_MASK) >>                               \
+	 AS_LOCKADDR_LOCKADDR_SIZE_SHIFT)
+#define AS_LOCKADDR_LOCKADDR_SIZE_SET(reg_val, value)                          \
+	(((reg_val) & ~AS_LOCKADDR_LOCKADDR_SIZE_MASK) |                             \
+	 (((value) << AS_LOCKADDR_LOCKADDR_SIZE_SHIFT) &                             \
+	 AS_LOCKADDR_LOCKADDR_SIZE_MASK))
+#define AS_LOCKADDR_LOCKADDR_BASE_SHIFT GPU_U(12)
+#define AS_LOCKADDR_LOCKADDR_BASE_MASK                                         \
+	(GPU_U(0xFFFFFFFFFFFFF) << AS_LOCKADDR_LOCKADDR_BASE_SHIFT)
+#define AS_LOCKADDR_LOCKADDR_BASE_GET(reg_val)                                 \
+	(((reg_val)&AS_LOCKADDR_LOCKADDR_BASE_MASK) >>                               \
+	 AS_LOCKADDR_LOCKADDR_BASE_SHIFT)
+#define AS_LOCKADDR_LOCKADDR_BASE_SET(reg_val, value)                          \
+	(((reg_val) & ~AS_LOCKADDR_LOCKADDR_BASE_MASK) |                             \
+	 (((value) << AS_LOCKADDR_LOCKADDR_BASE_SHIFT) &                             \
+	 AS_LOCKADDR_LOCKADDR_BASE_MASK))
+
+/* GPU_STATUS values */
+#define GPU_STATUS_PRFCNT_ACTIVE            (1 << 2)    /* Set if the performance counters are active. */
+#define GPU_STATUS_CYCLE_COUNT_ACTIVE       (1 << 6)    /* Set if the cycle counter is active. */
+#define GPU_STATUS_PROTECTED_MODE_ACTIVE    (1 << 7)    /* Set if protected mode is active */
+
+/* PRFCNT_CONFIG register values */
+#define PRFCNT_CONFIG_MODE_SHIFT        0 /* Counter mode position. */
+#define PRFCNT_CONFIG_AS_SHIFT          4 /* Address space bitmap position. */
+#define PRFCNT_CONFIG_SETSELECT_SHIFT   8 /* Set select position. */
+
+/* The performance counters are disabled. */
+#define PRFCNT_CONFIG_MODE_OFF          0
+/* The performance counters are enabled, but are only written out when a
+ * PRFCNT_SAMPLE command is issued using the GPU_COMMAND register.
+ */
+#define PRFCNT_CONFIG_MODE_MANUAL       1
+/* The performance counters are enabled, and are written out each time a tile
+ * finishes rendering.
+ */
+#define PRFCNT_CONFIG_MODE_TILE         2
+
+/* AS<n>_MEMATTR values from MMU_MEMATTR_STAGE1: */
+/* Use GPU implementation-defined caching policy. */
+#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_FORCE_TO_CACHE_ALL    0x8Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_WRITE_ALLOC           0x8Dull
+
+/* Use GPU implementation-defined  caching policy. */
+#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL    0x4Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_LPAE_WRITE_ALLOC           0x4Dull
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF        0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_WA              0x8Dull
+/* There is no LPAE support for non-cacheable, since the memory type is always
+ * write-back.
+ * Marking this setting as reserved for LPAE
+ */
+#define AS_MEMATTR_LPAE_NON_CACHEABLE_RESERVED
+
+/* L2_MMU_CONFIG register */
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT       (23)
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY             (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT)
+
+/* End L2_MMU_CONFIG register */
+
+/* THREAD_* registers */
+
+/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */
+#define IMPLEMENTATION_UNSPECIFIED  0
+#define IMPLEMENTATION_SILICON      1
+#define IMPLEMENTATION_FPGA         2
+#define IMPLEMENTATION_MODEL        3
+
+/* Default values when registers are not supported by the implemented hardware */
+#define THREAD_MT_DEFAULT     256
+#define THREAD_MWS_DEFAULT    256
+#define THREAD_MBS_DEFAULT    256
+#define THREAD_MR_DEFAULT     1024
+#define THREAD_MTQ_DEFAULT    4
+#define THREAD_MTGS_DEFAULT   10
+
+/* End THREAD_* registers */
+
+/* SHADER_CONFIG register */
+#define SC_LS_ALLOW_ATTR_TYPES      (1ul << 16)
+#define SC_TLS_HASH_ENABLE          (1ul << 17)
+#define SC_LS_ATTR_CHECK_DISABLE    (1ul << 18)
+#define SC_VAR_ALGORITHM            (1ul << 29)
+/* End SHADER_CONFIG register */
+
+/* TILER_CONFIG register */
+#define TC_CLOCK_GATE_OVERRIDE      (1ul << 0)
+/* End TILER_CONFIG register */
+
+/* L2_CONFIG register */
+#define L2_CONFIG_SIZE_SHIFT        16
+#define L2_CONFIG_SIZE_MASK         (0xFFul << L2_CONFIG_SIZE_SHIFT)
+#define L2_CONFIG_HASH_SHIFT        24
+#define L2_CONFIG_HASH_MASK         (0xFFul << L2_CONFIG_HASH_SHIFT)
+#define L2_CONFIG_ASN_HASH_ENABLE_SHIFT        24
+#define L2_CONFIG_ASN_HASH_ENABLE_MASK         (1ul << L2_CONFIG_ASN_HASH_ENABLE_SHIFT)
+/* End L2_CONFIG register */
+
+
+/* IDVS_GROUP register */
+#define IDVS_GROUP_SIZE_SHIFT (16)
+#define IDVS_GROUP_MAX_SIZE (0x3F)
+
+/* SYSC_ALLOC read IDs */
+#define SYSC_ALLOC_ID_R_OTHER       0x00
+#define SYSC_ALLOC_ID_R_CSF         0x02
+#define SYSC_ALLOC_ID_R_MMU         0x04
+#define SYSC_ALLOC_ID_R_TILER_VERT  0x08
+#define SYSC_ALLOC_ID_R_TILER_PTR   0x09
+#define SYSC_ALLOC_ID_R_TILER_INDEX 0x0A
+#define SYSC_ALLOC_ID_R_TILER_OTHER 0x0B
+#define SYSC_ALLOC_ID_R_IC          0x10
+#define SYSC_ALLOC_ID_R_ATTR        0x11
+#define SYSC_ALLOC_ID_R_SCM         0x12
+#define SYSC_ALLOC_ID_R_FSDC        0x13
+#define SYSC_ALLOC_ID_R_VL          0x14
+#define SYSC_ALLOC_ID_R_PLR         0x15
+#define SYSC_ALLOC_ID_R_TEX         0x18
+#define SYSC_ALLOC_ID_R_LSC         0x1c
+
+/* SYSC_ALLOC write IDs */
+#define SYSC_ALLOC_ID_W_OTHER            0x00
+#define SYSC_ALLOC_ID_W_CSF              0x02
+#define SYSC_ALLOC_ID_W_PCB              0x07
+#define SYSC_ALLOC_ID_W_TILER_PTR        0x09
+#define SYSC_ALLOC_ID_W_TILER_VERT_PLIST 0x0A
+#define SYSC_ALLOC_ID_W_TILER_OTHER      0x0B
+#define SYSC_ALLOC_ID_W_L2_EVICT         0x0C
+#define SYSC_ALLOC_ID_W_L2_FLUSH         0x0D
+#define SYSC_ALLOC_ID_W_TIB_COLOR        0x10
+#define SYSC_ALLOC_ID_W_TIB_COLOR_AFBCH  0x11
+#define SYSC_ALLOC_ID_W_TIB_COLOR_AFBCB  0x12
+#define SYSC_ALLOC_ID_W_TIB_CRC          0x13
+#define SYSC_ALLOC_ID_W_TIB_DS           0x14
+#define SYSC_ALLOC_ID_W_TIB_DS_AFBCH     0x15
+#define SYSC_ALLOC_ID_W_TIB_DS_AFBCB     0x16
+#define SYSC_ALLOC_ID_W_LSC              0x1C
+
+/* SYSC_ALLOC values */
+#define SYSC_ALLOC_L2_ALLOC 0x0
+#define SYSC_ALLOC_NEVER_ALLOC 0x2
+#define SYSC_ALLOC_ALWAYS_ALLOC 0x3
+#define SYSC_ALLOC_PTL_ALLOC 0x4
+#define SYSC_ALLOC_L2_PTL_ALLOC 0x5
+
+/* SYSC_ALLOC register */
+#define SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT (0)
+#define SYSC_ALLOC_R_SYSC_ALLOC0_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT)
+#define SYSC_ALLOC_R_SYSC_ALLOC0_GET(reg_val)                                  \
+	(((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC0_MASK) >>                          \
+	 SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT)
+#define SYSC_ALLOC_R_SYSC_ALLOC0_SET(reg_val, value)                           \
+	(((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC0_MASK) |                        \
+	 (((value) << SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT) &                        \
+	  SYSC_ALLOC_R_SYSC_ALLOC0_MASK))
+/* End of SYSC_ALLOC_R_SYSC_ALLOC0 values */
+#define SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT (4)
+#define SYSC_ALLOC_W_SYSC_ALLOC0_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT)
+#define SYSC_ALLOC_W_SYSC_ALLOC0_GET(reg_val)                                  \
+	(((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC0_MASK) >>                          \
+	 SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT)
+#define SYSC_ALLOC_W_SYSC_ALLOC0_SET(reg_val, value)                           \
+	(((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC0_MASK) |                        \
+	 (((value) << SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT) &                        \
+	  SYSC_ALLOC_W_SYSC_ALLOC0_MASK))
+/* End of SYSC_ALLOC_W_SYSC_ALLOC0 values */
+#define SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT (8)
+#define SYSC_ALLOC_R_SYSC_ALLOC1_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT)
+#define SYSC_ALLOC_R_SYSC_ALLOC1_GET(reg_val)                                  \
+	(((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC1_MASK) >>                          \
+	 SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT)
+#define SYSC_ALLOC_R_SYSC_ALLOC1_SET(reg_val, value)                           \
+	(((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC1_MASK) |                        \
+	 (((value) << SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT) &                        \
+	  SYSC_ALLOC_R_SYSC_ALLOC1_MASK))
+/* End of SYSC_ALLOC_R_SYSC_ALLOC1 values */
+#define SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT (12)
+#define SYSC_ALLOC_W_SYSC_ALLOC1_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT)
+#define SYSC_ALLOC_W_SYSC_ALLOC1_GET(reg_val)                                  \
+	(((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC1_MASK) >>                          \
+	 SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT)
+#define SYSC_ALLOC_W_SYSC_ALLOC1_SET(reg_val, value)                           \
+	(((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC1_MASK) |                        \
+	 (((value) << SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT) &                        \
+	  SYSC_ALLOC_W_SYSC_ALLOC1_MASK))
+/* End of SYSC_ALLOC_W_SYSC_ALLOC1 values */
+#define SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT (16)
+#define SYSC_ALLOC_R_SYSC_ALLOC2_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT)
+#define SYSC_ALLOC_R_SYSC_ALLOC2_GET(reg_val)                                  \
+	(((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC2_MASK) >>                          \
+	 SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT)
+#define SYSC_ALLOC_R_SYSC_ALLOC2_SET(reg_val, value)                           \
+	(((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC2_MASK) |                        \
+	 (((value) << SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT) &                        \
+	  SYSC_ALLOC_R_SYSC_ALLOC2_MASK))
+/* End of SYSC_ALLOC_R_SYSC_ALLOC2 values */
+#define SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT (20)
+#define SYSC_ALLOC_W_SYSC_ALLOC2_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT)
+#define SYSC_ALLOC_W_SYSC_ALLOC2_GET(reg_val)                                  \
+	(((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC2_MASK) >>                          \
+	 SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT)
+#define SYSC_ALLOC_W_SYSC_ALLOC2_SET(reg_val, value)                           \
+	(((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC2_MASK) |                        \
+	 (((value) << SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT) &                        \
+	  SYSC_ALLOC_W_SYSC_ALLOC2_MASK))
+/* End of SYSC_ALLOC_W_SYSC_ALLOC2 values */
+#define SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT (24)
+#define SYSC_ALLOC_R_SYSC_ALLOC3_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT)
+#define SYSC_ALLOC_R_SYSC_ALLOC3_GET(reg_val)                                  \
+	(((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC3_MASK) >>                          \
+	 SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT)
+#define SYSC_ALLOC_R_SYSC_ALLOC3_SET(reg_val, value)                           \
+	(((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC3_MASK) |                        \
+	 (((value) << SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT) &                        \
+	  SYSC_ALLOC_R_SYSC_ALLOC3_MASK))
+/* End of SYSC_ALLOC_R_SYSC_ALLOC3 values */
+#define SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT (28)
+#define SYSC_ALLOC_W_SYSC_ALLOC3_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT)
+#define SYSC_ALLOC_W_SYSC_ALLOC3_GET(reg_val)                                  \
+	(((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC3_MASK) >>                          \
+	 SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT)
+#define SYSC_ALLOC_W_SYSC_ALLOC3_SET(reg_val, value)                           \
+	(((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC3_MASK) |                        \
+	 (((value) << SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT) &                        \
+	  SYSC_ALLOC_W_SYSC_ALLOC3_MASK))
+/* End of SYSC_ALLOC_W_SYSC_ALLOC3 values */

 /* Include POWER_CHANGED_SINGLE in debug builds for use in irq latency test. */
 #ifdef CONFIG_MALI_BIFROST_DEBUG
--- a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_jm.h
+++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_jm.h
@@ -94,7 +94,10 @@ struct kbase_ipa_model_vinstr_data {
 struct kbase_ipa_group {
 	const char *name;
 	s32 default_value;
-	s64 (*op)(struct kbase_ipa_model_vinstr_data *, s32, u32);
+	s64 (*op)(
+		struct kbase_ipa_model_vinstr_data *model_data,
+		s32 coeff,
+		u32 counter_block_offset);
 	u32 counter_block_offset;
 };

--- a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_csf.c
+++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_csf.c
@@ -115,8 +115,8 @@ static const struct kbase_ipa_counter ipa_top_level_cntrs_def_ttux[] = {
 };

 /* These tables provide a description of each performance counter
-  * used by the shader cores counter model for energy estimation.
-  */
+ * used by the shader cores counter model for energy estimation.
+ */
 static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_todx[] = {
 	SC_COUNTER_DEF("exec_instr_fma", 505449, EXEC_INSTR_FMA),
 	SC_COUNTER_DEF("tex_filt_num_operations", 574869, TEX_FILT_NUM_OPS),
@@ -150,7 +150,7 @@ static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_ttux[] = {
 	SC_COUNTER_DEF("ls_mem_read_short", 322525, LS_MEM_READ_SHORT),
 	SC_COUNTER_DEF("full_quad_warps", 844124, FULL_QUAD_WARPS),
 	SC_COUNTER_DEF("exec_instr_cvt", 226411, EXEC_INSTR_CVT),
-	SC_COUNTER_DEF("frag_quads_ezs_update",372032, FRAG_QUADS_EZS_UPDATE),
+	SC_COUNTER_DEF("frag_quads_ezs_update", 372032, FRAG_QUADS_EZS_UPDATE),
 };

 #define IPA_POWER_MODEL_OPS(gpu, init_token) \
@@ -224,8 +224,8 @@ const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find(

 const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id)
 {
-	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
-			GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	const u32 prod_id =
+		(gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT;

 	switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) {
 	case GPU_ID2_PRODUCT_TODX:
--- a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c
+++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c
@@ -111,20 +111,21 @@ static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_da

 /**
 * memsys_single_counter() - calculate energy for a single Memory System performance counter.
- * @model_data:   pointer to GPU model data.
- * @coeff:        default value of coefficient for IPA group.
- * @offset:       offset in bytes of the counter inside the block it belongs to.
+ * @model_data:            pointer to GPU model data.
+ * @coeff:                 default value of coefficient for IPA group.
+ * @counter_block_offset:  offset in bytes of the counter inside the block it belongs to.
 *
 * Return: Energy estimation for a single Memory System performance counter.
 */
 static s64 kbase_g7x_sum_all_memsys_blocks(
 		struct kbase_ipa_model_vinstr_data *model_data,
 		s32 coeff,
-		u32 offset)
+		u32 counter_block_offset)
 {
 	u32 counter;

-	counter = kbase_g7x_power_model_get_memsys_counter(model_data, offset);
+	counter = kbase_g7x_power_model_get_memsys_counter(model_data,
+						     counter_block_offset);
 	return kbase_ipa_sum_all_memsys_blocks(model_data, coeff, counter);
 }

@@ -531,8 +532,8 @@ const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find(

 const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id)
 {
-	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
-			GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	const u32 prod_id =
+		(gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT;

 	switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) {
 	case GPU_ID2_PRODUCT_TMIX:
--- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c
+++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c
@@ -71,7 +71,7 @@ KBASE_EXPORT_TEST_API(kbase_ipa_model_ops_find);

 const char *kbase_ipa_model_name_from_id(u32 gpu_id)
 {
-	const char* model_name =
+	const char *model_name =
 		kbase_ipa_counter_model_name_from_id(gpu_id);

 	if (!model_name)
@@ -610,7 +610,7 @@ static unsigned long kbase_get_dynamic_power(unsigned long freq,

 		/* Here unlike kbase_get_real_power(), shader core frequency is
 		 * used for the scaling as simple power model is used to obtain
-		 * the value of dynamic coefficient (which is is a fixed value
+		 * the value of dynamic coefficient (which is a fixed value
 		 * retrieved from the device tree).
 		 */
 		power += kbase_scale_dynamic_power(
--- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.c
+++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_debugfs.c
@@ -128,8 +128,14 @@ static ssize_t param_string_set(struct file *file, const char __user *user_buf,

 	err = kbase_ipa_model_recalculate(model);
 	if (err < 0) {
+		u32 string_len = strscpy(param->addr.str, old_str, param->size);
+
+		string_len += sizeof(char);
+		/* Make sure that the source string fit into the buffer. */
+		KBASE_DEBUG_ASSERT(string_len <= param->size);
+		CSTD_UNUSED(string_len);
+
 		ret = err;
-		strlcpy(param->addr.str, old_str, param->size);
 	}

 end:
@@ -275,7 +281,7 @@ static void kbase_ipa_model_debugfs_init(struct kbase_ipa_model *model)
 				"Type not set for %s parameter %s\n",
 				model->ops->name, param->name);
 		} else {
-			debugfs_create_file(param->name, S_IRUGO | S_IWUSR,
+			debugfs_create_file(param->name, 0644,
 					    dir, param, fops);
 		}
 	}
--- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c
+++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c
@@ -307,8 +307,12 @@ static int kbase_simple_power_model_recalculate(struct kbase_ipa_model *model)
 		model_data->gpu_tz = NULL;
 	} else {
 		char tz_name[THERMAL_NAME_LENGTH];
+		u32 string_len = strscpy(tz_name, model_data->tz_name, sizeof(tz_name));

-		strlcpy(tz_name, model_data->tz_name, sizeof(tz_name));
+		string_len += sizeof(char);
+		/* Make sure that the source string fit into the buffer. */
+		KBASE_DEBUG_ASSERT(string_len <= sizeof(tz_name));
+		CSTD_UNUSED(string_len);

 		/* Release ipa.lock so that thermal_list_lock is not acquired
 		 * with ipa.lock held, thereby avoid lock ordering violation
--- a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h
+++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -124,6 +124,18 @@
 /* Reset the GPU after each atom completion */
 #define KBASE_SERIALIZE_RESET (1 << 2)

+/**
+ * enum kbase_timeout_selector - The choice of which timeout to get scaled
+ *                               using the lowest GPU frequency.
+ * @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in
+ *                                the enum.
+ */
+enum kbase_timeout_selector {
+
+	/* Must be the last in the enum */
+	KBASE_TIMEOUT_SELECTOR_COUNT
+};
+
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 /**
 * struct base_job_fault_event - keeps track of the atom which faulted or which
@@ -653,11 +665,12 @@ static inline bool kbase_jd_katom_is_protected(

 /**
 * kbase_atom_is_younger - query if one atom is younger by age than another
- * @katom_a: the first atom
- * @katom_a: the second atom
 *
- * Return: true if the first atom is strictly younger than the second, false
- * otherwise.
+ * @katom_a: the first atom
+ * @katom_b: the second atom
+ *
+ * Return: true if the first atom is strictly younger than the second,
+ *         false otherwise.
 */
 static inline bool kbase_jd_atom_is_younger(const struct kbase_jd_atom *katom_a,
 					    const struct kbase_jd_atom *katom_b)
@@ -666,7 +679,9 @@ static inline bool kbase_jd_atom_is_younger(const struct kbase_jd_atom *katom_a,
 }

 /**
- * kbase_jd_atom_is_earlier
+ * kbase_jd_atom_is_earlier - Check whether the first atom has been submitted
+ *                            earlier than the second one
+ *
 * @katom_a: the first atom
 * @katom_b: the second atom
 *
@@ -730,17 +745,13 @@ static inline bool kbase_jd_atom_is_earlier(const struct kbase_jd_atom *katom_a,
 * A state machine is used to control incremental rendering.
 */
 enum kbase_jd_renderpass_state {
-	KBASE_JD_RP_COMPLETE,       /* COMPLETE => START */
-	KBASE_JD_RP_START,          /* START => PEND_OOM or COMPLETE */
-	KBASE_JD_RP_PEND_OOM,       /* PEND_OOM => OOM or COMPLETE */
-	KBASE_JD_RP_OOM,            /* OOM => RETRY */
-	KBASE_JD_RP_RETRY,          /* RETRY => RETRY_PEND_OOM or
-				     *          COMPLETE
-				     */
-	KBASE_JD_RP_RETRY_PEND_OOM, /* RETRY_PEND_OOM => RETRY_OOM or
-				     *                   COMPLETE
-				     */
-	KBASE_JD_RP_RETRY_OOM,      /* RETRY_OOM => RETRY */
+	KBASE_JD_RP_COMPLETE, /* COMPLETE => START */
+	KBASE_JD_RP_START, /* START => PEND_OOM or COMPLETE */
+	KBASE_JD_RP_PEND_OOM, /* PEND_OOM => OOM or COMPLETE */
+	KBASE_JD_RP_OOM, /* OOM => RETRY */
+	KBASE_JD_RP_RETRY, /* RETRY => RETRY_PEND_OOM or COMPLETE */
+	KBASE_JD_RP_RETRY_PEND_OOM, /* RETRY_PEND_OOM => RETRY_OOM or COMPLETE */
+	KBASE_JD_RP_RETRY_OOM /* RETRY_OOM => RETRY */
 };

 /**
@@ -813,7 +824,7 @@ struct kbase_jd_renderpass {
 *                            atom completes
 *                            execution on GPU or the input fence get signaled.
 * @tb_lock:                  Lock to serialize the write access made to @tb to
- *                            to store the register access trace messages.
+ *                            store the register access trace messages.
 * @tb:                       Pointer to the Userspace accessible buffer storing
 *                            the trace messages for register read/write
 *                            accesses made by the Kbase. The buffer is filled
--- a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h
+++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -36,6 +36,8 @@
 * The struct kbasep_js_device_data sub-structure of kbdev must be zero
 * initialized before passing to the kbasep_js_devdata_init() function. This is
 * to give efficient error path code.
+ *
+ * Return: 0 on success, error code otherwise.
 */
 int kbasep_js_devdata_init(struct kbase_device * const kbdev);

@@ -86,6 +88,8 @@ void kbasep_js_devdata_term(struct kbase_device *kbdev);
 *
 * The struct kbase_context must be zero initialized before passing to the
 * kbase_js_init() function. This is to give efficient error path code.
+ *
+ * Return: 0 on success, error code otherwise.
 */
 int kbasep_js_kctx_init(struct kbase_context *const kctx);

@@ -206,7 +210,7 @@ bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom);
 * @kbdev: The kbase_device to operate on
 * @kctx:  The kbase_context to operate on
 * @atom: Atom to remove
-*
+ *
 * Completely removing a job requires several calls:
 * * kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of
 *   the atom
@@ -356,9 +360,10 @@ void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev,
 		struct kbase_context *kctx);

 /**
- * kbasep_js_runpool_release_ctx_and_katom_retained_state -  Variant of
+ * kbasep_js_runpool_release_ctx_and_katom_retained_state - Variant of
 * kbasep_js_runpool_release_ctx() that handles additional
 * actions from completing an atom.
+ *
 * @kbdev:                KBase device
 * @kctx:                 KBase context
 * @katom_retained_state: Retained state from the atom
@@ -381,8 +386,8 @@ void kbasep_js_runpool_release_ctx_and_katom_retained_state(
 		struct kbasep_js_atom_retained_state *katom_retained_state);

 /**
- * kbasep_js_runpool_release_ctx_nolock -
- * Variant of kbase_js_runpool_release_ctx() w/out locks
+ * kbasep_js_runpool_release_ctx_nolock - Variant of kbase_js_runpool_release_ctx()
+ *                                        without locks
 * @kbdev: KBase device
 * @kctx:  KBase context
 *
@@ -396,6 +401,7 @@ void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,

 /**
 * kbasep_js_schedule_privileged_ctx -  Schedule in a privileged context
+ *
 * @kbdev: KBase device
 * @kctx:  KBase context
 *
@@ -459,7 +465,7 @@ void kbase_js_try_run_jobs(struct kbase_device *kbdev);
 * contexts from (re)entering the runpool.
 *
 * This does not handle suspending the one privileged context: the caller must
- * instead do this by by suspending the GPU HW Counter Instrumentation.
+ * instead do this by suspending the GPU HW Counter Instrumentation.
 *
 * This will eventually cause all Power Management active references held by
 * contexts on the runpool to be released, without running any more atoms.
@@ -688,6 +694,8 @@ void kbase_js_update_ctx_priority(struct kbase_context *kctx);
 * As with any bool, never test the return value with true.
 *
 * The caller must hold hwaccess_lock.
+ *
+ * Return: true if the context is allowed to submit jobs, false otherwise.
 */
 static inline bool kbasep_js_is_submit_allowed(
 		struct kbasep_js_device_data *js_devdata,
@@ -768,8 +776,9 @@ static inline void kbasep_js_clear_submit_allowed(
 }

 /**
- * kbasep_js_atom_retained_state_init_invalid -
- * Create an initial 'invalid' atom retained state
+ * kbasep_js_atom_retained_state_init_invalid - Create an initial 'invalid'
+ *                                              atom retained state
+ *
 * @retained_state: pointer where to create and initialize the state
 *
 * Create an initial 'invalid' atom retained state, that requires no
--- a/drivers/gpu/arm/bifrost/jm/mali_kbase_js_defs.h
+++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_js_defs.h
@@ -55,10 +55,11 @@ typedef void kbasep_js_ctx_job_cb(struct kbase_device *kbdev,
 * @KBASEP_JS_CTX_ATTR_COMPUTE: Attribute indicating a context that contains
 *                              Compute jobs.
 * @KBASEP_JS_CTX_ATTR_NON_COMPUTE: Attribute indicating a context that contains
- * 	Non-Compute jobs.
+ *                                  Non-Compute jobs.
 * @KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES: Attribute indicating that a context
- * 	contains compute-job atoms that aren't restricted to a coherent group,
- * 	and can run on all cores.
+ *                                        contains compute-job atoms that aren't
+ *                                        restricted to a coherent group,
+ *                                        and can run on all cores.
 * @KBASEP_JS_CTX_ATTR_COUNT: Must be the last in the enum
 *
 * Each context attribute can be thought of as a boolean value that caches some
@@ -115,7 +116,6 @@ typedef void kbasep_js_ctx_job_cb(struct kbase_device *kbdev,
 * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy
 * enough to handle anyway.
 *
- *
 */
 enum kbasep_js_ctx_attr {
 	KBASEP_JS_CTX_ATTR_COMPUTE,
@@ -217,44 +217,46 @@ typedef u32 kbase_atom_ordering_flag_t;
 /**
 * struct kbasep_js_device_data - KBase Device Data Job Scheduler sub-structure
 * @runpool_irq: Sub-structure to collect together Job Scheduling data used in
- *	IRQ context. The hwaccess_lock must be held when accessing.
+ *               IRQ context. The hwaccess_lock must be held when accessing.
 * @runpool_irq.submit_allowed: Bitvector indicating whether a currently
- * 	scheduled context is allowed to submit jobs. When bit 'N' is set in
- * 	this, it indicates whether the context bound to address space 'N' is
- * 	allowed to submit jobs.
+ *                              scheduled context is allowed to submit jobs.
+ *                              When bit 'N' is set in this, it indicates whether
+ *                              the context bound to address space 'N' is
+ *                              allowed to submit jobs.
 * @runpool_irq.ctx_attr_ref_count: Array of Context Attributes Ref_counters:
- * 	  Each is large enough to hold a refcount of the number of contexts
- * 	that can fit into the runpool. This is currently BASE_MAX_NR_AS.
- * 	  Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store
- * 	the refcount. Hence, it's not worthwhile reducing this to
- * 	bit-manipulation on u32s to save space (where in contrast, 4 bit
- * 	sub-fields would be easy to do and would save space).
- * 	  Whilst this must not become negative, the sign bit is used for:
- * 	- error detection in debug builds
- * 	- Optimization: it is undefined for a signed int to overflow, and so
- * 	the compiler can optimize for that never happening (thus, no masking
- * 	is required on updating the variable)
+ *     Each is large enough to hold a refcount of the number of contexts
+ *     that can fit into the runpool. This is currently BASE_MAX_NR_AS.
+ *     Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store
+ *     the refcount. Hence, it's not worthwhile reducing this to
+ *     bit-manipulation on u32s to save space (where in contrast, 4 bit
+ *     sub-fields would be easy to do and would save space).
+ *     Whilst this must not become negative, the sign bit is used for:
+ *       - error detection in debug builds
+ *       - Optimization: it is undefined for a signed int to overflow, and so
+ *         the compiler can optimize for that never happening (thus, no masking
+ *         is required on updating the variable)
 * @runpool_irq.slot_affinities: Affinity management and tracking. Bitvector
- *	to aid affinity checking. Element 'n' bit 'i' indicates that slot 'n'
- *	is using core i (i.e. slot_affinity_refcount[n][i] > 0)
+ *                               to aid affinity checking.
+ *                               Element 'n' bit 'i' indicates that slot 'n'
+ *                               is using core i (i.e. slot_affinity_refcount[n][i] > 0)
 * @runpool_irq.slot_affinity_refcount: Array of fefcount for each core owned
- *	by each slot. Used to generate the slot_affinities array of bitvectors.
- *	  The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS,
- *	because it is refcounted only when a job is definitely about to be
- *	submitted to a slot, and is de-refcounted immediately after a job
- *	finishes
+ *     by each slot. Used to generate the slot_affinities array of bitvectors.
+ *     The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS,
+ *     because it is refcounted only when a job is definitely about to be
+ *     submitted to a slot, and is de-refcounted immediately after a job
+ *     finishes
 * @schedule_sem: Scheduling semaphore. This must be held when calling
- *	kbase_jm_kick()
+ *                kbase_jm_kick()
 * @ctx_list_pullable: List of contexts that can currently be pulled from
 * @ctx_list_unpullable: List of contexts that can not currently be pulled
- *	from, but have jobs currently running.
+ *                       from, but have jobs currently running.
 * @nr_user_contexts_running: Number of currently scheduled user contexts
- *	(excluding ones that are not submitting jobs)
+ *                            (excluding ones that are not submitting jobs)
 * @nr_all_contexts_running: Number of currently scheduled contexts (including
- *	ones that are not submitting jobs)
+ *                           ones that are not submitting jobs)
 * @js_reqs: Core Requirements to match up with base_js_atom's core_req memeber
- *	@note This is a write-once member, and so no locking is required to
- *	read
+ *           @note This is a write-once member, and so no locking is required to
+ *           read
 * @scheduling_period_ns:	Value for JS_SCHEDULING_PERIOD_NS
 * @soft_stop_ticks:		Value for JS_SOFT_STOP_TICKS
 * @soft_stop_ticks_cl:		Value for JS_SOFT_STOP_TICKS_CL
@@ -268,16 +270,16 @@ typedef u32 kbase_atom_ordering_flag_t;
 * @suspended_soft_jobs_list:	List of suspended soft jobs
 * @softstop_always:		Support soft-stop on a single context
 * @init_status:The initialized-flag is placed at the end, to avoid
- * 	cache-pollution (we should only be using this during init/term paths).
- * 	@note This is a write-once member, and so no locking is required to
- * 	read
+ *              cache-pollution (we should only be using this during init/term paths).
+ *              @note This is a write-once member, and so no locking is required to
+ *              read
 * @nr_contexts_pullable:Number of contexts that can currently be pulled from
 * @nr_contexts_runnable:Number of contexts that can either be pulled from or
- * 	arecurrently running
+ *                       arecurrently running
 * @soft_job_timeout_ms:Value for JS_SOFT_JOB_TIMEOUT
 * @queue_mutex: Queue Lock, used to access the Policy's queue of contexts
- * 	independently of the Run Pool.
- *	Of course, you don't need the Run Pool lock to access this.
+ *               independently of the Run Pool.
+ *               Of course, you don't need the Run Pool lock to access this.
 * @runpool_mutex: Run Pool mutex, for managing contexts within the runpool.
 *
 * This encapsulates the current context of the Job Scheduler on a particular
--- a/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h
+++ b/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h
@@ -168,6 +168,7 @@ __attribute__((unused)) static const enum base_hw_feature base_hw_features_tTUx[
 	BASE_HW_FEATURE_L2_CONFIG,
 	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
 	BASE_HW_FEATURE_ASN_HASH,
+	BASE_HW_FEATURE_GPU_SLEEP,
 	BASE_HW_FEATURE_END
 };

--- a/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h
+++ b/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h
@@ -60,6 +60,7 @@ enum base_hw_issue {
 	BASE_HW_ISSUE_TTRX_3485,
 	BASE_HW_ISSUE_GPU2019_3212,
 	BASE_HW_ISSUE_TURSEHW_1997,
+	BASE_HW_ISSUE_GPU2019_3878,
 	BASE_HW_ISSUE_END
 };

@@ -596,6 +597,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tODx_r0p0
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_GPU2019_3212,
+	BASE_HW_ISSUE_GPU2019_3878,
 	BASE_HW_ISSUE_END
 };

@@ -605,6 +607,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tOD
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_GPU2019_3212,
+	BASE_HW_ISSUE_GPU2019_3878,
 	BASE_HW_ISSUE_END
 };

@@ -612,6 +615,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGRx_r0p0
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_GPU2019_3878,
 	BASE_HW_ISSUE_END
 };

@@ -620,6 +624,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGR
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_GPU2019_3878,
 	BASE_HW_ISSUE_END
 };

@@ -627,6 +632,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tVAx_r0p0
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_GPU2019_3878,
 	BASE_HW_ISSUE_END
 };

@@ -635,6 +641,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tVA
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_GPU2019_3878,
 	BASE_HW_ISSUE_END
 };

@@ -643,6 +650,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTU
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_GPU2019_3878,
 	BASE_HW_ISSUE_END
 };

@@ -651,6 +659,15 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_TURSEHW_1997,
+	BASE_HW_ISSUE_GPU2019_3878,
+	BASE_HW_ISSUE_END
+};
+
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_GPU2019_3878,
 	BASE_HW_ISSUE_END
 };

--- a/drivers/gpu/arm/bifrost/mali_kbase.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -109,9 +109,9 @@

 struct kbase_device *kbase_device_alloc(void);
 /*
-* note: configuration attributes member of kbdev needs to have
-* been setup before calling kbase_device_init
-*/
+ * note: configuration attributes member of kbdev needs to have
+ * been setup before calling kbase_device_init
+ */

 int kbase_device_misc_init(struct kbase_device *kbdev);
 void kbase_device_misc_term(struct kbase_device *kbdev);
@@ -256,8 +256,26 @@ void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timest
 		kbasep_js_atom_done_code done_code);
 void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
 void kbase_jd_zap_context(struct kbase_context *kctx);
-bool jd_done_nolock(struct kbase_jd_atom *katom,
-		struct list_head *completed_jobs_ctx);
+
+/*
+ * jd_done_nolock - Perform the necessary handling of an atom that has completed
+ *                  the execution.
+ *
+ * @katom: Pointer to the atom that completed the execution
+ * @post_immediately: Flag indicating that completion event can be posted
+ *                    immediately for @katom and the other atoms depdendent
+ *                    on @katom which also completed execution. The flag is
+ *                    false only for the case where the function is called by
+ *                    kbase_jd_done_worker() on the completion of atom running
+ *                    on the GPU.
+ *
+ * Note that if this is a soft-job that has had kbase_prepare_soft_job called on it then the caller
+ * is responsible for calling kbase_finish_soft_job *before* calling this function.
+ *
+ * The caller must hold the kbase_jd_context.lock.
+ */
+bool jd_done_nolock(struct kbase_jd_atom *katom, bool post_immediately);
+
 void kbase_jd_free_external_resources(struct kbase_jd_atom *katom);
 void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom);

@@ -299,19 +317,73 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
 * virtual address space in a growable memory region and the atom currently
 * executing on a job slot is the tiler job chain at the start of a renderpass.
 *
- * Return 0 if successful, otherwise a negative error code.
+ * Return: 0 if successful, otherwise a negative error code.
 */
 int kbase_job_slot_softstop_start_rp(struct kbase_context *kctx,
 		struct kbase_va_region *reg);

+/**
+ * kbase_job_slot_softstop - Soft-stop the specified job slot
+ *
+ * @kbdev:         The kbase device
+ * @js:            The job slot to soft-stop
+ * @target_katom:  The job that should be soft-stopped (or NULL for any job)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ *   The job slot must not already be in the process of being soft-stopped.
+ *
+ * Where possible any job in the next register is evicted before the soft-stop.
+ */
 void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
 		struct kbase_jd_atom *target_katom);
+
 void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
 		struct kbase_jd_atom *target_katom, u32 sw_flags);
+
+/**
+ * kbase_job_slot_hardstop - Hard-stop the specified job slot
+ * @kctx:         The kbase context that contains the job(s) that should
+ *                be hard-stopped
+ * @js:           The job slot to hard-stop
+ * @target_katom: The job that should be hard-stopped (or NULL for all
+ *                jobs from the context)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ */
 void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
 		struct kbase_jd_atom *target_katom);
+
+/**
+ * kbase_job_check_enter_disjoint - potentiall enter disjoint mode
+ * @kbdev: kbase device
+ * @action: the event which has occurred
+ * @core_reqs: core requirements of the atom
+ * @target_katom: the atom which is being affected
+ *
+ * For a certain soft-stop action, work out whether to enter disjoint
+ * state.
+ *
+ * This does not register multiple disjoint events if the atom has already
+ * started a disjoint period
+ *
+ * @core_reqs can be supplied as 0 if the atom had not started on the hardware
+ * (and so a 'real' soft/hard-stop was not required, but it still interrupted
+ * flow, perhaps on another context)
+ *
+ * kbase_job_check_leave_disjoint() should be used to end the disjoint
+ * state when the soft/hard-stop action is complete
+ */
 void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
 		base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom);
+
+/**
+ * kbase_job_check_leave_disjoint - potentially leave disjoint state
+ * @kbdev: kbase device
+ * @target_katom: atom which is finishing
+ *
+ * Work out whether to leave disjoint state when finishing an atom that was
+ * originated by kbase_job_check_enter_disjoint().
+ */
 void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
 		struct kbase_jd_atom *target_katom);

@@ -334,7 +406,7 @@ void kbase_event_wakeup(struct kbase_context *kctx);
 *		allocation is to be validated.
 * @info:	Pointer to struct @base_jit_alloc_info
 *			which is to be validated.
- * @return: 0 if jit allocation is valid; negative error code otherwise
+ * Return: 0 if jit allocation is valid; negative error code otherwise
 */
 int kbasep_jit_alloc_validate(struct kbase_context *kctx,
 					struct base_jit_alloc_info *info);
@@ -381,9 +453,12 @@ static inline void kbase_free_user_buffer(
 * @buf_data:	Pointer to the information about external resources:
 *		pages pertaining to the external resource, number of
 *		pages to copy.
+ *
+ * Return:      0 on success, error code otherwise.
 */
 int kbase_mem_copy_from_extres(struct kbase_context *kctx,
 		struct kbase_debug_copy_buffer *buf_data);
+
 #if !MALI_USE_CSF
 int kbase_process_soft_job(struct kbase_jd_atom *katom);
 int kbase_prepare_soft_job(struct kbase_jd_atom *katom);
@@ -405,7 +480,9 @@ void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt);
 void kbasep_as_do_poke(struct work_struct *work);

 /**
- * Check whether a system suspend is in progress, or has already been suspended
+ * kbase_pm_is_suspending - Check whether a system suspend is in progress,
+ * or has already been suspended
+ *
 * @kbdev: The kbase device structure for the device
 *
 * The caller should ensure that either kbdev->pm.active_count_lock is held, or
@@ -533,10 +610,12 @@ int kbase_pm_force_mcu_wakeup_after_sleep(struct kbase_device *kbdev);

 #if !MALI_USE_CSF
 /**
- * Return the atom's ID, as was originally supplied by userspace in
+ * kbase_jd_atom_id - Return the atom's ID, as was originally supplied by userspace in
 * base_jd_atom::atom_number
 * @kctx:  KBase context pointer
 * @katom: Atome for which to return ID
+ *
+ * Return: the atom's ID.
 */
 static inline int kbase_jd_atom_id(struct kbase_context *kctx,
 				   const struct kbase_jd_atom *katom)
@@ -567,7 +646,9 @@ static inline struct kbase_jd_atom *kbase_jd_atom_from_id(
 #endif /* !MALI_USE_CSF */

 /**
- * Initialize the disjoint state
+ * kbase_disjoint_init - Initialize the disjoint state
+ *
+ * @kbdev: The kbase device
 *
 * The disjoint event count and state are both set to zero.
 *
@@ -589,14 +670,12 @@ static inline struct kbase_jd_atom *kbase_jd_atom_from_id(
 * The disjoint event counter is also incremented immediately whenever a job is soft stopped
 * and during context creation.
 *
- * @kbdev: The kbase device
- *
 * Return: 0 on success and non-zero value on failure.
 */
 void kbase_disjoint_init(struct kbase_device *kbdev);

 /**
- * Increase the count of disjoint events
+ * kbase_disjoint_event - Increase the count of disjoint events
 * called when a disjoint event has happened
 *
 * @kbdev: The kbase device
@@ -604,42 +683,44 @@ void kbase_disjoint_init(struct kbase_device *kbdev);
 void kbase_disjoint_event(struct kbase_device *kbdev);

 /**
- * Increase the count of disjoint events only if the GPU is in a disjoint state
+ * kbase_disjoint_event_potential - Increase the count of disjoint events
+ * only if the GPU is in a disjoint state
+ *
+ * @kbdev: The kbase device
 *
 * This should be called when something happens which could be disjoint if the GPU
 * is in a disjoint state. The state refcount keeps track of this.
- *
- * @kbdev: The kbase device
 */
 void kbase_disjoint_event_potential(struct kbase_device *kbdev);

 /**
- * Returns the count of disjoint events
+ * kbase_disjoint_event_get - Returns the count of disjoint events
 *
 * @kbdev: The kbase device
- * @return the count of disjoint events
+ * Return: the count of disjoint events
 */
 u32 kbase_disjoint_event_get(struct kbase_device *kbdev);

 /**
- * Increment the refcount state indicating that the GPU is in a disjoint state.
+ * kbase_disjoint_state_up - Increment the refcount state indicating that
+ * the GPU is in a disjoint state.
+ *
+ * @kbdev: The kbase device
 *
 * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
 * eventually after the disjoint state has completed @ref kbase_disjoint_state_down
 * should be called
- *
- * @kbdev: The kbase device
 */
 void kbase_disjoint_state_up(struct kbase_device *kbdev);

 /**
- * Decrement the refcount state
+ * kbase_disjoint_state_down - Decrement the refcount state
+ *
+ * @kbdev: The kbase device
 *
 * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
 *
 * Called after @ref kbase_disjoint_state_up once the disjoint state is over
- *
- * @kbdev: The kbase device
 */
 void kbase_disjoint_state_down(struct kbase_device *kbdev);

@@ -668,8 +749,8 @@ int kbase_device_pcm_dev_init(struct kbase_device *const kbdev);
 void kbase_device_pcm_dev_term(struct kbase_device *const kbdev);

 /**
- * If a job is soft stopped and the number of contexts is >= this value
- * it is reported as a disjoint event
+ * KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD - If a job is soft stopped
+ * and the number of contexts is >= this value it is reported as a disjoint event
 */
 #define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2

--- a/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c
@@ -99,7 +99,7 @@ void kbase_as_fault_debugfs_init(struct kbase_device *kbdev)
 	} else {
 		for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
 			snprintf(as_name, ARRAY_SIZE(as_name), "as%u", i);
-			debugfs_create_file(as_name, S_IRUGO,
+			debugfs_create_file(as_name, 0444,
 					    debugfs_directory,
 					    (void *)(uintptr_t)i,
 					    &as_fault_fops);
@@ -108,5 +108,4 @@ void kbase_as_fault_debugfs_init(struct kbase_device *kbdev)

 #endif /* CONFIG_MALI_BIFROST_DEBUG */
 #endif /* CONFIG_DEBUG_FS */
-	return;
 }
--- a/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.h
@@ -43,7 +43,6 @@ kbase_as_fault_debugfs_new(struct kbase_device *kbdev, int as_no)
 	kbdev->debugfs_as_read_bitmap |= (1ULL << as_no);
 #endif /* CONFIG_DEBUG_FS */
 #endif /* CONFIG_MALI_BIFROST_DEBUG */
-	return;
 }

 #endif  /*_KBASE_AS_FAULT_DEBUG_FS_H*/
--- a/drivers/gpu/arm/bifrost/mali_kbase_caps.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_caps.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -28,15 +28,24 @@

 #include <linux/types.h>

-typedef enum mali_kbase_cap {
+/**
+ * enum mali_kbase_cap - Enumeration for kbase capability
+ *
+ * @MALI_KBASE_CAP_SYSTEM_MONITOR: System Monitor
+ * @MALI_KBASE_CAP_JIT_PRESSURE_LIMIT: JIT Pressure limit
+ * @MALI_KBASE_CAP_MEM_GROW_ON_GPF: Memory grow on page fault
+ * @MALI_KBASE_CAP_MEM_PROTECTED: Protected memory
+ * @MALI_KBASE_NUM_CAPS: Delimiter
+ */
+enum mali_kbase_cap {
 	MALI_KBASE_CAP_SYSTEM_MONITOR = 0,
 	MALI_KBASE_CAP_JIT_PRESSURE_LIMIT,
 	MALI_KBASE_CAP_MEM_GROW_ON_GPF,
 	MALI_KBASE_CAP_MEM_PROTECTED,
 	MALI_KBASE_NUM_CAPS
-} mali_kbase_cap;
+};

-extern bool mali_kbase_supports_cap(unsigned long api_version, mali_kbase_cap cap);
+extern bool mali_kbase_supports_cap(unsigned long api_version, enum mali_kbase_cap cap);

 static inline bool mali_kbase_supports_system_monitor(unsigned long api_version)
 {
--- a/drivers/gpu/arm/bifrost/mali_kbase_ccswe.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_ccswe.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -51,7 +51,6 @@ struct kbase_ccswe {
 */
 void kbase_ccswe_init(struct kbase_ccswe *self);

-
 /**
 * kbase_ccswe_cycle_at() - Estimate cycle count at given timestamp.
 *
@@ -68,7 +67,7 @@ void kbase_ccswe_init(struct kbase_ccswe *self);
 *     u64 ts = ktime_get_raw_ns();
 *     u64 cycle = kbase_ccswe_cycle_at(&ccswe, ts)
 *
- * Returns: estimated value of cycle count at a given time.
+ * Return: estimated value of cycle count at a given time.
 */
 u64 kbase_ccswe_cycle_at(struct kbase_ccswe *self, u64 timestamp_ns);

--- a/drivers/gpu/arm/bifrost/mali_kbase_config.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_config.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2010-2017, 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2017, 2019-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -246,8 +246,6 @@ struct kbase_pm_callback_conf {
 	 *
 	 * For linux this callback will be called by the kernel runtime_suspend callback.
 	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
-	 *
-	 * @return 0 on success, else OS error code.
 	 */
 	void (*power_runtime_off_callback)(struct kbase_device *kbdev);

@@ -255,6 +253,8 @@ struct kbase_pm_callback_conf {
 	 *
 	 * For linux this callback will be called by the kernel runtime_resume callback.
 	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 *
+	 * @return 0 on success, else OS error code.
 	 */
 	int (*power_runtime_on_callback)(struct kbase_device *kbdev);

@@ -455,7 +455,7 @@ struct kbase_platform_config {
 /**
 * kbase_get_platform_config - Gets the pointer to platform config.
 *
- * @return Pointer to the platform config
+ * Return: Pointer to the platform config
 */
 struct kbase_platform_config *kbase_get_platform_config(void);

@@ -564,7 +564,6 @@ void kbasep_platform_event_atom_complete(struct kbase_jd_atom *katom);
 #ifndef CONFIG_OF
 /**
 * kbase_platform_register - Register a platform device for the GPU
- *
 * This can be used to register a platform device on systems where device tree
 * is not enabled and the platform initialisation code in the kernel doesn't
 * create the GPU device. Where possible device tree should be used instead.
--- a/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2013-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -31,33 +31,27 @@
 #include <mali_kbase_config_platform.h>

 enum {
-	/**
-	 * Use unrestricted Address ID width on the AXI bus.
-	 */
+	/* Use unrestricted Address ID width on the AXI bus. */
 	KBASE_AID_32 = 0x0,

-	/**
-	 * Restrict GPU to a half of maximum Address ID count.
+	/* Restrict GPU to a half of maximum Address ID count.
 	 * This will reduce performance, but reduce bus load due to GPU.
 	 */
 	KBASE_AID_16 = 0x3,

-	/**
-	 * Restrict GPU to a quarter of maximum Address ID count.
+	/* Restrict GPU to a quarter of maximum Address ID count.
 	 * This will reduce performance, but reduce bus load due to GPU.
 	 */
-	KBASE_AID_8  = 0x2,
+	KBASE_AID_8 = 0x2,

-	/**
-	 * Restrict GPU to an eighth of maximum Address ID count.
+	/* Restrict GPU to an eighth of maximum Address ID count.
 	 * This will reduce performance, but reduce bus load due to GPU.
 	 */
-	KBASE_AID_4  = 0x1
+	KBASE_AID_4 = 0x1
 };

 enum {
-	/**
-	 * Use unrestricted Address ID width on the AXI bus.
+	/* Use unrestricted Address ID width on the AXI bus.
 	 * Restricting ID width will reduce performance & bus load due to GPU.
 	 */
 	KBASE_3BIT_AID_32 = 0x0,
@@ -78,10 +72,10 @@ enum {
 	KBASE_3BIT_AID_12 = 0x5,

 	/* Restrict GPU to 1/4 of maximum Address ID count. */
-	KBASE_3BIT_AID_8  = 0x6,
+	KBASE_3BIT_AID_8 = 0x6,

 	/* Restrict GPU to 1/8 of maximum Address ID count. */
-	KBASE_3BIT_AID_4  = 0x7
+	KBASE_3BIT_AID_4 = 0x7
 };

 #if MALI_USE_CSF
@@ -103,8 +97,7 @@ enum {
 #define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */
 #endif

-/**
- * Power Management poweroff tick granuality. This is in nanoseconds to
+/* Power Management poweroff tick granuality. This is in nanoseconds to
 * allow HR timer support (can be overridden by platform header).
 *
 * On each scheduling tick, the power manager core may decide to:
@@ -115,95 +108,106 @@ enum {
 #define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */
 #endif

-/**
- * Power Manager number of ticks before shader cores are powered off
+/* Power Manager number of ticks before shader cores are powered off
 * (can be overridden by platform header).
 */
 #ifndef DEFAULT_PM_POWEROFF_TICK_SHADER
 #define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */
 #endif

-/**
- * Default scheduling tick granuality (can be overridden by platform header)
- */
+/* Default scheduling tick granuality (can be overridden by platform header) */
 #ifndef DEFAULT_JS_SCHEDULING_PERIOD_NS
 #define DEFAULT_JS_SCHEDULING_PERIOD_NS    (100000000u) /* 100ms */
 #endif

-/**
- * Default minimum number of scheduling ticks before jobs are soft-stopped.
+/* Default minimum number of scheduling ticks before jobs are soft-stopped.
 *
 * This defines the time-slice for a job (which may be different from that of a
 * context)
 */
 #define DEFAULT_JS_SOFT_STOP_TICKS       (1) /* 100ms-200ms */

-/**
- * Default minimum number of scheduling ticks before CL jobs are soft-stopped.
- */
+/* Default minimum number of scheduling ticks before CL jobs are soft-stopped. */
 #define DEFAULT_JS_SOFT_STOP_TICKS_CL    (1) /* 100ms-200ms */

-/**
- * Default minimum number of scheduling ticks before jobs are hard-stopped
- */
+/* Default minimum number of scheduling ticks before jobs are hard-stopped */
 #define DEFAULT_JS_HARD_STOP_TICKS_SS    (50) /* 5s */

-/**
- * Default minimum number of scheduling ticks before CL jobs are hard-stopped.
- */
+/* Default minimum number of scheduling ticks before CL jobs are hard-stopped. */
 #define DEFAULT_JS_HARD_STOP_TICKS_CL    (50) /* 5s */

-/**
- * Default minimum number of scheduling ticks before jobs are hard-stopped
+/* Default minimum number of scheduling ticks before jobs are hard-stopped
 * during dumping
 */
 #define DEFAULT_JS_HARD_STOP_TICKS_DUMPING   (15000) /* 1500s */

-/**
- * Default timeout for some software jobs, after which the software event wait
+/* Default timeout for some software jobs, after which the software event wait
 * jobs will be cancelled.
 */
 #define DEFAULT_JS_SOFT_JOB_TIMEOUT (3000) /* 3s */

-/**
- * Default minimum number of scheduling ticks before the GPU is reset to clear a
+/* Default minimum number of scheduling ticks before the GPU is reset to clear a
 * "stuck" job
 */
 #define DEFAULT_JS_RESET_TICKS_SS           (55) /* 5.5s */

-/**
- * Default minimum number of scheduling ticks before the GPU is reset to clear a
+/* Default minimum number of scheduling ticks before the GPU is reset to clear a
 * "stuck" CL job.
 */
 #define DEFAULT_JS_RESET_TICKS_CL        (55) /* 5.5s */

-/**
- * Default minimum number of scheduling ticks before the GPU is reset to clear a
+/* Default minimum number of scheduling ticks before the GPU is reset to clear a
 * "stuck" job during dumping.
 */
 #define DEFAULT_JS_RESET_TICKS_DUMPING   (15020) /* 1502s */

-/**
- * Default number of milliseconds given for other jobs on the GPU to be
+/* Default number of milliseconds given for other jobs on the GPU to be
 * soft-stopped when the GPU needs to be reset.
 */
 #define DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */

-/* Waiting timeout for status change acknowledgment, in clock cycles
- * Based on 3000ms timeout at nominal 100MHz, as is required for Android - based
- * on scaling from a 50MHz GPU system.
+/* Nominal reference frequency that was used to obtain all following
+ * <...>_TIMEOUT_CYCLES macros, in kHz.
+ *
+ * Timeouts are scaled based on the relation between this value and the lowest
+ * GPU clock frequency.
 */
 #define DEFAULT_REF_TIMEOUT_FREQ_KHZ (100000)
-#define CSF_FIRMWARE_TIMEOUT_CYCLES (300000000)

-/* A default timeout to be used when an invalid timeout selector is
- * used to retrieve the timeout, on JM GPUs. CSF GPUs use the Firmware
- * timeout as the default.
+#if MALI_USE_CSF
+/* Waiting timeout for status change acknowledgment, in clock cycles.
+ *
+ * This is also the default timeout to be used when an invalid timeout
+ * selector is used to retrieve the timeout on CSF GPUs.
+ *
+ * Based on 75000ms timeout at nominal 100MHz, as is required for Android - based
+ * on scaling from a 50MHz GPU system.
+ */
+#define CSF_FIRMWARE_TIMEOUT_CYCLES (7500000000)
+
+/* Timeout in clock cycles for GPU Power Management to reach the desired
+ * Shader, L2 and MCU state.
+ *
+ * Based on 2500ms timeout at nominal 100MHz, scaled from a 50MHz GPU system.
+ */
+#define CSF_PM_TIMEOUT_CYCLES (250000000)
+
+/* Waiting timeout in clock cycles for GPU reset to complete.
+ *
+ * Based on 2500ms timeout at 100MHz, scaled from a 50MHz GPU system.
+ */
+#define CSF_GPU_RESET_TIMEOUT_CYCLES (250000000)
+
+#else /* MALI_USE_CSF */
+
+/* A default timeout in clock cycles to be used when an invalid timeout
+ * selector is used to retrieve the timeout, on JM GPUs.
 */
 #define JM_DEFAULT_TIMEOUT_CYCLES (150000000)

-/**
- * Default timeslice that a context is scheduled in for, in nanoseconds.
+#endif /* MALI_USE_CSF */
+
+/* Default timeslice that a context is scheduled in for, in nanoseconds.
 *
 * When a context has used up this amount of time across its jobs, it is
 * scheduled out to let another run.
@@ -213,16 +217,14 @@ enum {
 */
 #define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */

-/**
- * Maximum frequency (in kHz) that the GPU can be clocked. For some platforms
+/* Maximum frequency (in kHz) that the GPU can be clocked. For some platforms
 * this isn't available, so we simply define a dummy value here. If devfreq
 * is enabled the value will be read from there, otherwise this should be
 * overridden by defining GPU_FREQ_KHZ_MAX in the platform file.
 */
 #define DEFAULT_GPU_FREQ_KHZ_MAX (5000)

-/**
- * Default timeout for task execution on an endpoint
+/* Default timeout for task execution on an endpoint
 *
 * Number of GPU clock cycles before the driver terminates a task that is
 * making no forward progress on an endpoint (e.g. shader core).
@@ -231,8 +233,7 @@ enum {
 */
 #define DEFAULT_PROGRESS_TIMEOUT ((u64)5 * 500 * 1024 * 1024)

-/**
- * Default threshold at which to switch to incremental rendering
+/* Default threshold at which to switch to incremental rendering
 *
 * Fraction of the maximum size of an allocation that grows on GPU page fault
 * that can be used up before the driver switches to incremental rendering,
--- a/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c
--- a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2017-2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2018, 2020-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -79,7 +79,7 @@ void kbase_ctx_sched_term(struct kbase_device *kbdev);
 int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx);

 /**
- * kbase_ctx_sched_retain_ctx_refcount
+ * kbase_ctx_sched_retain_ctx_refcount - Retain a reference to the @ref kbase_context
 * @kctx: The context to which to retain a reference
 *
 * This function only retains a reference to the context. It must be called
@@ -187,8 +187,8 @@ struct kbase_context *kbase_ctx_sched_as_to_ctx_nolock(
 * @kctx: Context to be refcounted
 *
 * The following locks must be held by the caller:
- * * kbase_device::mmu_hw_mutex
- * * kbase_device::hwaccess_lock
+ * &kbase_device.mmu_hw_mutex
+ * &kbase_device.hwaccess_lock
 *
 * Return: true if refcount succeeded, and the context will not be scheduled
 * out, false if the refcount failed (because the context is being/has been
--- a/drivers/gpu/arm/bifrost/mali_kbase_debug_job_fault.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_job_fault.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2012-2016, 2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016, 2018, 2020-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -53,7 +53,7 @@ void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev);
 * kbase_debug_job_fault_context_init - Initialize the relevant
 *		data structure per context
 * @kctx: KBase context pointer
- * @return 0 on success
+ * Return: 0 on success
 */
 int kbase_debug_job_fault_context_init(struct kbase_context *kctx);

@@ -68,39 +68,42 @@ void kbase_debug_job_fault_context_term(struct kbase_context *kctx);
 * kbase_debug_job_fault_kctx_unblock - Unblock the atoms blocked on job fault
 *					dumping on context termination.
 *
+ * @kctx: KBase context pointer
+ *
 * This function is called during context termination to unblock the atom for
 * which the job fault occurred and also the atoms following it. This is needed
 * otherwise the wait for zero jobs could timeout (leading to an assertion
 * failure, kernel panic in debug builds) in the pathological case where
 * although the thread/daemon capturing the job fault events is running,
 * but for some reasons has stopped consuming the events.
- *
- * @kctx: KBase context pointer
 */
 void kbase_debug_job_fault_kctx_unblock(struct kbase_context *kctx);

 /**
 * kbase_debug_job_fault_process - Process the failed job.
- *      It will send a event and wake up the job fault waiting queue
- *      Then create a work queue to wait for job dump finish
- *      This function should be called in the interrupt handler and before
- *      jd_done that make sure the jd_done_worker will be delayed until the
- *      job dump finish
+ *
 * @katom: The failed atom pointer
 * @completion_code: the job status
- * @return true if dump is going on
+ *
+ * It will send a event and wake up the job fault waiting queue
+ * Then create a work queue to wait for job dump finish
+ * This function should be called in the interrupt handler and before
+ * jd_done that make sure the jd_done_worker will be delayed until the
+ * job dump finish
+ *
+ * Return: true if dump is going on
 */
 bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
 		u32 completion_code);

-
 /**
 * kbase_debug_job_fault_reg_snapshot_init - Set the interested registers
 *      address during the job fault process, the relevant registers will
 *      be saved when a job fault happen
 * @kctx: KBase context pointer
 * @reg_range: Maximum register address space
- * @return true if initializing successfully
+ *
+ * Return: true if initializing successfully
 */
 bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
 		int reg_range);
@@ -108,8 +111,10 @@ bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
 /**
 * kbase_job_fault_get_reg_snapshot - Read the interested registers for
 *      failed job dump
+ *
 * @kctx: KBase context pointer
- * @return true if getting registers successfully
+ *
+ * Return: true if getting registers successfully
 */
 bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx);

--- a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c
@@ -31,6 +31,22 @@

 #if IS_ENABLED(CONFIG_DEBUG_FS)

+#define SHOW_GPU_MEM_DATA(type, format)                                      \
+{                                                                            \
+	unsigned int i, j;                                                   \
+	const type *ptr = (type *)cpu_addr;                                  \
+	const unsigned int col_width = sizeof(type);                         \
+	const unsigned int row_width = (col_width == sizeof(u64)) ? 32 : 16; \
+	const unsigned int num_cols = row_width / col_width;                 \
+	for (i = 0; i < PAGE_SIZE; i += row_width) {                         \
+		seq_printf(m, "%016llx:", gpu_addr + i);                     \
+		for (j = 0; j < num_cols; j++)                               \
+			seq_printf(m, format, ptr[j]);                       \
+		ptr += num_cols;                                             \
+		seq_putc(m, '\n');                                           \
+	}                                                                    \
+}
+
 struct debug_mem_mapping {
 	struct list_head node;

@@ -44,6 +60,7 @@ struct debug_mem_mapping {
 struct debug_mem_data {
 	struct list_head mapping_list;
 	struct kbase_context *kctx;
+	unsigned int column_width;
 };

 struct debug_mem_seq_off {
@@ -111,9 +128,9 @@ static int debug_mem_show(struct seq_file *m, void *v)
 	struct debug_mem_data *mem_data = m->private;
 	struct debug_mem_seq_off *data = v;
 	struct debug_mem_mapping *map;
-	int i, j;
+	unsigned long long gpu_addr;
 	struct page *page;
-	uint32_t *mapping;
+	void *cpu_addr;
 	pgprot_t prot = PAGE_KERNEL;

 	map = list_entry(data->lh, struct debug_mem_mapping, node);
@@ -130,20 +147,33 @@ static int debug_mem_show(struct seq_file *m, void *v)
 		prot = pgprot_writecombine(prot);

 	page = as_page(map->alloc->pages[data->offset]);
-	mapping = vmap(&page, 1, VM_MAP, prot);
-	if (!mapping)
+	cpu_addr = vmap(&page, 1, VM_MAP, prot);
+	if (!cpu_addr)
 		goto out;

-	for (i = 0; i < PAGE_SIZE; i += 4*sizeof(*mapping)) {
-		seq_printf(m, "%016llx:", i + ((map->start_pfn +
-				data->offset) << PAGE_SHIFT));
+	gpu_addr = (map->start_pfn + data->offset) << PAGE_SHIFT;

-		for (j = 0; j < 4*sizeof(*mapping); j += sizeof(*mapping))
-			seq_printf(m, " %08x", mapping[(i+j)/sizeof(*mapping)]);
-		seq_putc(m, '\n');
+	/* Cases for 4 supported values of column_width for showing
+	 * the GPU memory contents.
+	 */
+	switch (mem_data->column_width) {
+	case 1:
+		SHOW_GPU_MEM_DATA(u8, " %02hhx");
+		break;
+	case 2:
+		SHOW_GPU_MEM_DATA(u16, " %04hx");
+		break;
+	case 4:
+		SHOW_GPU_MEM_DATA(u32, " %08x");
+		break;
+	case 8:
+		SHOW_GPU_MEM_DATA(u64, " %016llx");
+		break;
+	default:
+		dev_warn(mem_data->kctx->kbdev->dev, "Unexpected column width");
 	}

-	vunmap(mapping);
+	vunmap(cpu_addr);

 	seq_putc(m, '\n');

@@ -207,6 +237,14 @@ static int debug_mem_open(struct inode *i, struct file *file)
 	if (get_file_rcu(kctx->filp) == 0)
 		return -ENOENT;

+	/* Check if file was opened in write mode. GPU memory contents
+	 * are returned only when the file is not opened in write mode.
+	 */
+	if (file->f_mode & FMODE_WRITE) {
+		file->private_data = kctx;
+		return 0;
+	}
+
 	ret = seq_open(file, &ops);
 	if (ret)
 		goto open_fail;
@@ -223,6 +261,8 @@ static int debug_mem_open(struct inode *i, struct file *file)

 	kbase_gpu_vm_lock(kctx);

+	mem_data->column_width = kctx->mem_view_column_width;
+
 	ret = debug_mem_zone_open(&kctx->reg_rbtree_same, mem_data);
 	if (ret != 0) {
 		kbase_gpu_vm_unlock(kctx);
@@ -241,6 +281,20 @@ static int debug_mem_open(struct inode *i, struct file *file)
 		goto out;
 	}

+#if MALI_USE_CSF
+	ret = debug_mem_zone_open(&kctx->reg_rbtree_exec_fixed, mem_data);
+	if (ret != 0) {
+		kbase_gpu_vm_unlock(kctx);
+		goto out;
+	}
+
+	ret = debug_mem_zone_open(&kctx->reg_rbtree_fixed, mem_data);
+	if (ret != 0) {
+		kbase_gpu_vm_unlock(kctx);
+		goto out;
+	}
+#endif
+
 	kbase_gpu_vm_unlock(kctx);

 	((struct seq_file *)file->private_data)->private = mem_data;
@@ -270,32 +324,70 @@ open_fail:
 static int debug_mem_release(struct inode *inode, struct file *file)
 {
 	struct kbase_context *const kctx = inode->i_private;
-	struct seq_file *sfile = file->private_data;
-	struct debug_mem_data *mem_data = sfile->private;
-	struct debug_mem_mapping *mapping;

-	seq_release(inode, file);
+	/* If the file wasn't opened in write mode, then release the
+	 * memory allocated to show the GPU memory contents.
+	 */
+	if (!(file->f_mode & FMODE_WRITE)) {
+		struct seq_file *sfile = file->private_data;
+		struct debug_mem_data *mem_data = sfile->private;
+		struct debug_mem_mapping *mapping;

-	while (!list_empty(&mem_data->mapping_list)) {
-		mapping = list_first_entry(&mem_data->mapping_list,
+		seq_release(inode, file);
+
+		while (!list_empty(&mem_data->mapping_list)) {
+			mapping = list_first_entry(&mem_data->mapping_list,
 				struct debug_mem_mapping, node);
-		kbase_mem_phy_alloc_put(mapping->alloc);
-		list_del(&mapping->node);
-		kfree(mapping);
-	}
+			kbase_mem_phy_alloc_put(mapping->alloc);
+			list_del(&mapping->node);
+			kfree(mapping);
+		}

-	kfree(mem_data);
+		kfree(mem_data);
+	}

 	fput(kctx->filp);

 	return 0;
 }

+static ssize_t debug_mem_write(struct file *file, const char __user *ubuf,
+			       size_t count, loff_t *ppos)
+{
+	struct kbase_context *const kctx = file->private_data;
+	unsigned int column_width = 0;
+	int ret = 0;
+
+	CSTD_UNUSED(ppos);
+
+	ret = kstrtouint_from_user(ubuf, count, 0, &column_width);
+
+	if (ret)
+		return ret;
+	if (!is_power_of_2(column_width)) {
+		dev_dbg(kctx->kbdev->dev,
+			"Column width %u not a multiple of power of 2", column_width);
+		return  -EINVAL;
+	}
+	if (column_width > 8) {
+		dev_dbg(kctx->kbdev->dev,
+			"Column width %u greater than 8 not supported", column_width);
+		return  -EINVAL;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+	kctx->mem_view_column_width = column_width;
+	kbase_gpu_vm_unlock(kctx);
+
+	return count;
+}
+
 static const struct file_operations kbase_debug_mem_view_fops = {
 	.owner = THIS_MODULE,
 	.open = debug_mem_open,
 	.release = debug_mem_release,
 	.read = seq_read,
+	.write = debug_mem_write,
 	.llseek = seq_lseek
 };

@@ -308,6 +400,9 @@ void kbase_debug_mem_view_init(struct kbase_context *const kctx)
 		WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry)))
 		return;

+	/* Default column width is 4 */
+	kctx->mem_view_column_width = sizeof(u32);
+
 	debugfs_create_file("mem_view", 0400, kctx->kctx_dentry, kctx,
 			&kbase_debug_mem_view_fops);
 }
--- a/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
 *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -34,20 +34,20 @@
 /**
 * set_attr_from_string - Parse a string to set elements of an array
 *
- * This is the core of the implementation of
- * kbase_debugfs_helper_set_attr_from_string. The only difference between the
- * two functions is that this one requires the input string to be writable.
- *
 * @buf:         Input string to parse. Must be nul-terminated!
 * @array:       Address of an object that can be accessed like an array.
 * @nelems:      Number of elements in the array.
 * @set_attr_fn: Function to be called back for each array element.
 *
+ * This is the core of the implementation of
+ * kbase_debugfs_helper_set_attr_from_string. The only difference between the
+ * two functions is that this one requires the input string to be writable.
+ *
 * Return: 0 if success, negative error code otherwise.
 */
 static int
 set_attr_from_string(char *const buf, void *const array, size_t const nelems,
-		     kbase_debugfs_helper_set_attr_fn *const set_attr_fn)
+		     kbase_debugfs_helper_set_attr_fn * const set_attr_fn)
 {
 	size_t index, err = 0;
 	char *ptr = buf;
@@ -143,7 +143,7 @@ int kbase_debugfs_string_validator(char *const buf)

 int kbase_debugfs_helper_set_attr_from_string(
 	const char *const buf, void *const array, size_t const nelems,
-	kbase_debugfs_helper_set_attr_fn *const set_attr_fn)
+	kbase_debugfs_helper_set_attr_fn * const set_attr_fn)
 {
 	char *const wbuf = kstrdup(buf, GFP_KERNEL);
 	int err = 0;
@@ -168,7 +168,7 @@ int kbase_debugfs_helper_set_attr_from_string(
 ssize_t kbase_debugfs_helper_get_attr_to_string(
 	char *const buf, size_t const size, void *const array,
 	size_t const nelems,
-	kbase_debugfs_helper_get_attr_fn *const get_attr_fn)
+	kbase_debugfs_helper_get_attr_fn * const get_attr_fn)
 {
 	ssize_t total = 0;
 	size_t index;
@@ -189,7 +189,7 @@ ssize_t kbase_debugfs_helper_get_attr_to_string(
 int kbase_debugfs_helper_seq_write(
 	struct file *const file, const char __user *const ubuf,
 	size_t const count, size_t const nelems,
-	kbase_debugfs_helper_set_attr_fn *const set_attr_fn)
+	kbase_debugfs_helper_set_attr_fn * const set_attr_fn)
 {
 	const struct seq_file *const sfile = file->private_data;
 	void *const array = sfile->private;
@@ -228,8 +228,8 @@ int kbase_debugfs_helper_seq_write(
 }

 int kbase_debugfs_helper_seq_read(
-	struct seq_file *const sfile, size_t const nelems,
-	kbase_debugfs_helper_get_attr_fn *const get_attr_fn)
+	struct seq_file * const sfile, size_t const nelems,
+	kbase_debugfs_helper_get_attr_fn * const get_attr_fn)
 {
 	void *const array = sfile->private;
 	size_t index;
--- a/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -37,6 +37,11 @@ typedef void kbase_debugfs_helper_set_attr_fn(void *array, size_t index,
 * kbase_debugfs_helper_set_attr_from_string - Parse a string to reconfigure an
 *                                             array
 *
+ * @buf:         Input string to parse. Must be nul-terminated!
+ * @array:       Address of an object that can be accessed like an array.
+ * @nelems:      Number of elements in the array.
+ * @set_attr_fn: Function to be called back for each array element.
+ *
 * The given function is called once for each attribute value found in the
 * input string. It is not an error if the string specifies fewer attribute
 * values than the specified number of array elements.
@@ -46,11 +51,6 @@ typedef void kbase_debugfs_helper_set_attr_fn(void *array, size_t index,
 * Attribute values are separated by one or more space characters.
 * Additional leading and trailing spaces are ignored.
 *
- * @buf:         Input string to parse. Must be nul-terminated!
- * @array:       Address of an object that can be accessed like an array.
- * @nelems:      Number of elements in the array.
- * @set_attr_fn: Function to be called back for each array element.
- *
 * Return: 0 if success, negative error code otherwise.
 */
 int kbase_debugfs_helper_set_attr_from_string(
@@ -62,6 +62,8 @@ int kbase_debugfs_helper_set_attr_from_string(
 *                                  debugfs file for any incorrect formats
 *                                  or wrong values.
 *
+ * @buf: Null-terminated string to validate.
+ *
 * This function is to be used before any writes to debugfs values are done
 * such that any strings with erroneous values (such as octal 09 or
 * hexadecimal 0xGH are fully ignored) - without this validation, any correct
@@ -73,8 +75,6 @@ int kbase_debugfs_helper_set_attr_from_string(
 * of the input string. This function also requires the input string to be
 * writable.
 *
- * @buf: Null-terminated string to validate.
- *
 * Return: 0 with no error, else -22 (the invalid return value of kstrtoul) if
 *         any value in the string was wrong or with an incorrect format.
 */
@@ -95,17 +95,17 @@ typedef size_t kbase_debugfs_helper_get_attr_fn(void *array, size_t index);
 * kbase_debugfs_helper_get_attr_to_string - Construct a formatted string
 *                                           from elements in an array
 *
- * The given function is called once for each array element to get the
- * value of the attribute to be inspected. The attribute values are
- * written to the buffer as a formatted string of decimal numbers
- * separated by spaces and terminated by a linefeed.
- *
 * @buf:         Buffer in which to store the formatted output string.
 * @size:        The size of the buffer, in bytes.
 * @array:       Address of an object that can be accessed like an array.
 * @nelems:      Number of elements in the array.
 * @get_attr_fn: Function to be called back for each array element.
 *
+ * The given function is called once for each array element to get the
+ * value of the attribute to be inspected. The attribute values are
+ * written to the buffer as a formatted string of decimal numbers
+ * separated by spaces and terminated by a linefeed.
+ *
 * Return: Number of characters written excluding the nul terminator.
 */
 ssize_t kbase_debugfs_helper_get_attr_to_string(
@@ -116,6 +116,10 @@ ssize_t kbase_debugfs_helper_get_attr_to_string(
 * kbase_debugfs_helper_seq_read - Implements reads from a virtual file for an
 *                                 array
 *
+ * @sfile:       A virtual file previously opened by calling single_open.
+ * @nelems:      Number of elements in the array.
+ * @get_attr_fn: Function to be called back for each array element.
+ *
 * The virtual file must have been opened by calling single_open and passing
 * the address of an object that can be accessed like an array.
 *
@@ -124,10 +128,6 @@ ssize_t kbase_debugfs_helper_get_attr_to_string(
 * written to the buffer as a formatted string of decimal numbers
 * separated by spaces and terminated by a linefeed.
 *
- * @sfile:       A virtual file previously opened by calling single_open.
- * @nelems:      Number of elements in the array.
- * @get_attr_fn: Function to be called back for each array element.
- *
 * Return: 0 if success, negative error code otherwise.
 */
 int kbase_debugfs_helper_seq_read(
@@ -138,6 +138,12 @@ int kbase_debugfs_helper_seq_read(
 * kbase_debugfs_helper_seq_write - Implements writes to a virtual file for an
 *                                  array
 *
+ * @file:        A virtual file previously opened by calling single_open.
+ * @ubuf:        Source address in user space.
+ * @count:       Number of bytes written to the virtual file.
+ * @nelems:      Number of elements in the array.
+ * @set_attr_fn: Function to be called back for each array element.
+ *
 * The virtual file must have been opened by calling single_open and passing
 * the address of an object that can be accessed like an array.
 *
@@ -145,12 +151,6 @@ int kbase_debugfs_helper_seq_read(
 * data written to the virtual file. For further details, refer to the
 * description of set_attr_from_string.
 *
- * @file:        A virtual file previously opened by calling single_open.
- * @ubuf:        Source address in user space.
- * @count:       Number of bytes written to the virtual file.
- * @nelems:      Number of elements in the array.
- * @set_attr_fn: Function to be called back for each array element.
- *
 * Return: 0 if success, negative error code otherwise.
 */
 int kbase_debugfs_helper_seq_write(struct file *file,
--- a/drivers/gpu/arm/bifrost/mali_kbase_defs.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_defs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -35,11 +35,15 @@
 #include <backend/gpu/mali_kbase_instr_defs.h>
 #include <mali_kbase_pm.h>
 #include <mali_kbase_gpuprops_types.h>
+#include <mali_kbase_hwcnt_watchdog_if.h>
+
 #if MALI_USE_CSF
 #include <mali_kbase_hwcnt_backend_csf.h>
 #else
 #include <mali_kbase_hwcnt_backend_jm.h>
+#include <mali_kbase_hwcnt_backend_jm_watchdog.h>
 #endif
+
 #include <protected_mode_switcher.h>

 #include <linux/atomic.h>
@@ -82,7 +86,7 @@
 #define RESET_TIMEOUT           500

 /**
- * The maximum number of Job Slots to support in the Hardware.
+ * BASE_JM_MAX_NR_SLOTS - The maximum number of Job Slots to support in the Hardware.
 *
 * You can optimize this down if your target devices will only ever support a
 * small number of job slots.
@@ -90,7 +94,7 @@
 #define BASE_JM_MAX_NR_SLOTS        3

 /**
- * The maximum number of Address Spaces to support in the Hardware.
+ * BASE_MAX_NR_AS - The maximum number of Address Spaces to support in the Hardware.
 *
 * You can optimize this down if your target devices will only ever support a
 * small number of Address Spaces
@@ -110,19 +114,19 @@
 #define KBASEP_AS_NR_INVALID     (-1)

 /**
- * Maximum size in bytes of a MMU lock region, as a logarithm
+ * KBASE_LOCK_REGION_MAX_SIZE_LOG2 - Maximum size in bytes of a MMU lock region,
+ *                                   as a logarithm
 */
 #define KBASE_LOCK_REGION_MAX_SIZE_LOG2 (48) /*  256 TB */

 /**
- * Minimum size in bytes of a MMU lock region, as a logarithm
- */
-#define KBASE_LOCK_REGION_MIN_SIZE_LOG2 (15) /* 32 kB */
-
-/**
- * Maximum number of GPU memory region zones
+ * KBASE_REG_ZONE_MAX - Maximum number of GPU memory region zones
 */
+#if MALI_USE_CSF
+#define KBASE_REG_ZONE_MAX 6ul
+#else
 #define KBASE_REG_ZONE_MAX 4ul
+#endif

 #include "mali_kbase_hwaccess_defs.h"

@@ -248,9 +252,10 @@ struct kbase_fault {

 /**
 * struct kbase_mmu_table  - object representing a set of GPU page tables
- * @mmu_teardown_pages:   Buffer of 4 Pages in size, used to cache the entries
- *                        of top & intermediate level page tables to avoid
- *                        repeated calls to kmap_atomic during the MMU teardown.
+ * @mmu_teardown_pages:   Array containing pointers to 3 separate pages, used
+ *                        to cache the entries of top (L0) & intermediate level
+ *                        page tables (L1 & L2) to avoid repeated calls to
+ *                        kmap_atomic() during the MMU teardown.
 * @mmu_lock:             Lock to serialize the accesses made to multi level GPU
 *                        page tables
 * @pgd:                  Physical address of the page allocated for the top
@@ -265,7 +270,7 @@ struct kbase_fault {
 *                        it is NULL
 */
 struct kbase_mmu_table {
-	u64 *mmu_teardown_pages;
+	u64 *mmu_teardown_pages[MIDGARD_MMU_BOTTOMLEVEL];
 	struct mutex mmu_lock;
 	phys_addr_t pgd;
 	u8 group_id;
@@ -357,8 +362,6 @@ struct kbase_clk_rate_listener {
 *                      enumerated GPU clock.
 * @clk_rate_trace_ops: Pointer to the platform specific GPU clock rate trace
 *                      operations.
- * @gpu_clk_rate_trace_write: Pointer to the function that would emit the
- *                            tracepoint for the clock rate change.
 * @listeners:          List of listener attached.
 * @lock:               Lock to serialize the actions of GPU clock rate trace
 *                      manager.
@@ -373,13 +376,14 @@ struct kbase_clk_rate_trace_manager {

 /**
 * struct kbase_pm_device_data - Data stored per device for power management.
- * @lock: The lock protecting Power Management structures accessed outside of
- * IRQ.
- * This lock must also be held whenever the GPU is being powered on or
- * off.
- * @active_count: The reference count of active contexts on this device. Note
- * 	that some code paths keep shaders/the tiler powered whilst this is 0.
- * 	Use kbase_pm_is_active() instead to check for such cases.
+ * @lock: The lock protecting Power Management structures accessed
+ *        outside of IRQ.
+ *        This lock must also be held whenever the GPU is being
+ *        powered on or off.
+ * @active_count: The reference count of active contexts on this device.
+ *                Note that some code paths keep shaders/the tiler
+ *                powered whilst this is 0.
+ *                Use kbase_pm_is_active() instead to check for such cases.
 * @suspending: Flag indicating suspending/suspended
 * @runtime_active: Flag to track if the GPU is in runtime suspended or active
 *                  state. This ensures that runtime_put and runtime_get
@@ -388,24 +392,24 @@ struct kbase_clk_rate_trace_manager {
 *                  the call to it from runtime_gpu_active callback can be
 *                  skipped.
 * @gpu_lost: Flag indicating gpu lost
- * 	This structure contains data for the power management framework. There
- * 	is one instance of this structure per device in the system.
+ *            This structure contains data for the power management framework.
+ *            There is one instance of this structure per device in the system.
 * @zero_active_count_wait: Wait queue set when active_count == 0
 * @resume_wait: system resume of GPU device.
 * @debug_core_mask: Bit masks identifying the available shader cores that are
- * 	specified via sysfs. One mask per job slot.
+ *                   specified via sysfs. One mask per job slot.
 * @debug_core_mask_all: Bit masks identifying the available shader cores that
- * 	are specified via sysfs.
+ *                       are specified via sysfs.
 * @callback_power_runtime_init: Callback for initializing the runtime power
- * 	management. Return 0 on success, else error code
+ *                               management. Return 0 on success, else error code
 * @callback_power_runtime_term: Callback for terminating the runtime power
- * 	management.
+ *                               management.
 * @dvfs_period: Time in milliseconds between each dvfs sample
 * @backend: KBase PM backend data
 * @arb_vm_state: The state of the arbiter VM machine
 * @gpu_users_waiting: Used by virtualization to notify the arbiter that there
- * 	are users waiting for the GPU so that it can request and resume the
- * 	driver.
+ *                     are users waiting for the GPU so that it can request
+ *                     and resume the driver.
 * @clk_rtm: The state of the GPU clock rate trace manager
 */
 struct kbase_pm_device_data {
@@ -482,16 +486,16 @@ struct kbase_mem_pool {
 /**
 * struct kbase_mem_pool_group - a complete set of physical memory pools.
 *
+ * @small: Array of objects containing the state for pools of 4 KiB size
+ *         physical pages.
+ * @large: Array of objects containing the state for pools of 2 MiB size
+ *         physical pages.
+ *
 * Memory pools are used to allow efficient reallocation of previously-freed
 * physical pages. A pair of memory pools is initialized for each physical
 * memory group: one for 4 KiB pages and one for 2 MiB pages. These arrays
 * should be indexed by physical memory group ID, the meaning of which is
 * defined by the systems integrator.
- *
- * @small: Array of objects containing the state for pools of 4 KiB size
- *         physical pages.
- * @large: Array of objects containing the state for pools of 2 MiB size
- *         physical pages.
 */
 struct kbase_mem_pool_group {
 	struct kbase_mem_pool small[MEMORY_GROUP_MANAGER_NR_GROUPS];
@@ -512,11 +516,11 @@ struct kbase_mem_pool_config {
 * struct kbase_mem_pool_group_config - Initial configuration for a complete
 *                                      set of physical memory pools
 *
- * This array should be indexed by physical memory group ID, the meaning
- * of which is defined by the systems integrator.
- *
 * @small: Array of initial configuration for pools of 4 KiB pages.
 * @large: Array of initial configuration for pools of 2 MiB pages.
+ *
+ * This array should be indexed by physical memory group ID, the meaning
+ * of which is defined by the systems integrator.
 */
 struct kbase_mem_pool_group_config {
 	struct kbase_mem_pool_config small[MEMORY_GROUP_MANAGER_NR_GROUPS];
@@ -750,8 +754,13 @@ struct kbase_process {
 * @hwcnt.addr:            HW counter address
 * @hwcnt.addr_bytes:      HW counter size in bytes
 * @hwcnt.backend:         Kbase instrumentation backend
- * @hwcnt_watchdog_timer:  Hardware counter watchdog interface.
+ * @hwcnt_gpu_jm_backend:  Job manager GPU backend interface, used as superclass reference
+ *                         pointer by hwcnt_gpu_iface, which wraps this implementation in
+ *                         order to extend it with periodic dumping functionality.
 * @hwcnt_gpu_iface:       Backend interface for GPU hardware counter access.
+ * @hwcnt_watchdog_timer:  Watchdog interface, used by the GPU backend hwcnt_gpu_iface to
+ *                         perform periodic dumps in order to prevent hardware counter value
+ *                         overflow or saturation.
 * @hwcnt_gpu_ctx:         Context for GPU hardware counter access.
 *                         @hwaccess_lock must be held when calling
 *                         kbase_hwcnt_context_enable() with @hwcnt_gpu_ctx.
@@ -763,14 +772,6 @@ struct kbase_process {
 *                         therefore timeline is disabled.
 * @timeline:              Timeline context created per device.
 * @ktrace:                kbase device's ktrace
- * @trace_lock:            Lock to serialize the access to trace buffer.
- * @trace_first_out:       Index/offset in the trace buffer at which the first
- *                         unread message is present.
- * @trace_next_in:         Index/offset in the trace buffer at which the new
- *                         message will be written.
- * @trace_rbuf:            Pointer to the buffer storing debug messages/prints
- *                         tracing the various events in Driver.
- *                         The buffer is filled in circular fashion.
 * @reset_timeout_ms:      Number of milliseconds to wait for the soft stop to
 *                         complete for the GPU jobs before proceeding with the
 *                         GPU reset.
@@ -875,6 +876,13 @@ struct kbase_process {
 *                         backend specific data for HW access layer.
 * @faults_pending:        Count of page/bus faults waiting for bottom half processing
 *                         via workqueues.
+ * @mmu_hw_operation_in_progress: Set before sending the MMU command and is
+ *                         cleared after the command is complete. Whilst this
+ *                         flag is set, the write to L2_PWROFF register will be
+ *                         skipped which is needed to workaround the HW issue
+ *                         GPU2019-3878. PM state machine is invoked after
+ *                         clearing this flag and @hwaccess_lock is used to
+ *                         serialize the access.
 * @poweroff_pending:      Set when power off operation for GPU is started, reset when
 *                         power on for GPU is started.
 * @infinite_cache_active_default: Set to enable using infinite cache for all the
@@ -904,9 +912,6 @@ struct kbase_process {
 *                         enabled.
 * @protected_mode_hwcnt_disable_work: Work item to disable GPU hardware
 *                         counters, used if atomic disable is not possible.
- * @buslogger:              Pointer to the structure required for interfacing
- *                          with the bus logger module to set the size of buffer
- *                          used by the module for capturing bus logs.
 * @irq_reset_flush:        Flag to indicate that GPU reset is in-flight and flush of
 *                          IRQ + bottom half is being done, to prevent the writes
 *                          to MMU_IRQ_CLEAR & MMU_IRQ_MASK registers.
@@ -1007,7 +1012,7 @@ struct kbase_device {
 	struct memory_group_manager_device *mgm_dev;

 	struct kbase_as as[BASE_MAX_NR_AS];
-	u16 as_free; /* Bitpattern of free Address Spaces */
+	u16 as_free;
 	struct kbase_context *as_to_kctx[BASE_MAX_NR_AS];

 	spinlock_t mmu_mask_change;
@@ -1027,7 +1032,6 @@ struct kbase_device {

 #if MALI_USE_CSF
 	struct kbase_hwcnt_backend_csf_if hwcnt_backend_csf_if_fw;
-	struct kbase_hwcnt_watchdog_interface hwcnt_watchdog_timer;
 #else
 	struct kbase_hwcnt {
 		spinlock_t lock;
@@ -1038,9 +1042,13 @@ struct kbase_device {

 		struct kbase_instr_backend backend;
 	} hwcnt;
+
+	struct kbase_hwcnt_backend_interface hwcnt_gpu_jm_backend;
 #endif

 	struct kbase_hwcnt_backend_interface hwcnt_gpu_iface;
+	struct kbase_hwcnt_watchdog_interface hwcnt_watchdog_timer;
+
 	struct kbase_hwcnt_context *hwcnt_gpu_ctx;
 	struct kbase_hwcnt_virtualizer *hwcnt_gpu_virt;
 	struct kbase_vinstr_context *vinstr_ctx;
@@ -1141,6 +1149,9 @@ struct kbase_device {

 	atomic_t faults_pending;

+#if MALI_USE_CSF
+	bool mmu_hw_operation_in_progress;
+#endif
 	bool poweroff_pending;

 #if (KERNEL_VERSION(4, 4, 0) <= LINUX_VERSION_CODE)
@@ -1492,8 +1503,8 @@ struct kbase_sub_alloc {
 * @mem_partials_lock:    Lock for protecting the operations done on the elements
 *                        added to @mem_partials list.
 * @mem_partials:         List head for the list of large pages, 2MB in size, which
- *                        which have been split into 4 KB pages and are used
- *                        partially for the allocations >= 2 MB in size.
+ *                        have been split into 4 KB pages and are used partially
+ *                        for the allocations >= 2 MB in size.
 * @reg_lock:             Lock used for GPU virtual address space management operations,
 *                        like adding/freeing a memory region in the address space.
 *                        Can be converted to a rwlock ?.
@@ -1505,6 +1516,17 @@ struct kbase_sub_alloc {
 * @reg_rbtree_exec:      RB tree of the memory regions allocated from the EXEC_VA
 *                        zone of the GPU virtual address space. Used for GPU-executable
 *                        allocations which don't need the SAME_VA property.
+ * @reg_rbtree_exec_fixed: RB tree of the memory regions allocated from the
+ *                         EXEC_FIXED_VA zone of the GPU virtual address space. Used for
+ *                        GPU-executable allocations with FIXED/FIXABLE GPU virtual
+ *                        addresses.
+ * @reg_rbtree_fixed:     RB tree of the memory regions allocated from the FIXED_VA zone
+ *                        of the GPU virtual address space. Used for allocations with
+ *                        FIXED/FIXABLE GPU virtual addresses.
+ * @num_fixable_allocs:   A count for the number of memory allocations with the
+ *                        BASE_MEM_FIXABLE property.
+ * @num_fixed_allocs:     A count for the number of memory allocations with the
+ *                        BASE_MEM_FIXED property.
 * @reg_zone:             Zone information for the reg_rbtree_<...> members.
 * @cookies:              Bitmask containing of BITS_PER_LONG bits, used mainly for
 *                        SAME_VA allocations to defer the reservation of memory region
@@ -1608,6 +1630,8 @@ struct kbase_sub_alloc {
 *                        dumping of its debug info is in progress.
 * @job_fault_resume_event_list: List containing atoms completed after the faulty
 *                        atom but before the debug data for faulty atom was dumped.
+ * @mem_view_column_width: Controls the number of bytes shown in every column of the
+ *                         output of "mem_view" debugfs file.
 * @jsctx_queue:          Per slot & priority arrays of object containing the root
 *                        of RB-tree holding currently runnable atoms on the job slot
 *                        and the head item of the linked list of atoms blocked on
@@ -1748,6 +1772,12 @@ struct kbase_context {
 	struct rb_root reg_rbtree_same;
 	struct rb_root reg_rbtree_custom;
 	struct rb_root reg_rbtree_exec;
+#if MALI_USE_CSF
+	struct rb_root reg_rbtree_exec_fixed;
+	struct rb_root reg_rbtree_fixed;
+	atomic64_t num_fixable_allocs;
+	atomic64_t num_fixed_allocs;
+#endif
 	struct kbase_reg_zone reg_zone[KBASE_REG_ZONE_MAX];

 #if MALI_USE_CSF
@@ -1817,6 +1847,7 @@ struct kbase_context {
 	unsigned int *reg_dump;
 	atomic_t job_fault_count;
 	struct list_head job_fault_resume_event_list;
+	unsigned int mem_view_column_width;

 #endif /* CONFIG_DEBUG_FS */
 	struct kbase_va_region *jit_alloc[1 + BASE_JIT_ALLOC_COUNT];
@@ -1924,13 +1955,6 @@ enum kbase_share_attr_bits {
 	SHARE_INNER_BITS = (3ULL << 8)	/* inner shareable coherency */
 };

-/**
- * enum kbase_timeout_selector - The choice of which timeout to get scaled
- *                               using current GPU frequency.
- * @CSF_FIRMWARE_TIMEOUT: Response timeout from CSF firmware.
- */
-enum kbase_timeout_selector { CSF_FIRMWARE_TIMEOUT };
-
 /**
 * kbase_device_is_cpu_coherent - Returns if the device is CPU coherent.
 * @kbdev: kbase device
@@ -1946,6 +1970,24 @@ static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev)
 	return false;
 }

+/**
+ * kbase_get_lock_region_min_size_log2 - Returns the minimum size of the MMU lock
+ * region, as a logarithm
+ *
+ * @gpu_props:   GPU properties
+ *
+ * Return: the minimum size of the MMU lock region as dictated by the corresponding
+ * arch spec.
+ */
+static inline u64 kbase_get_lock_region_min_size_log2(struct kbase_gpu_props const *gpu_props)
+{
+	if (GPU_ID2_MODEL_MATCH_VALUE(gpu_props->props.core_props.product_id) >=
+	    GPU_ID2_MODEL_MAKE(12, 0))
+		return 12; /* 4 kB */
+
+	return 15; /* 32 kB */
+}
+
 /* Conversion helpers for setting up high resolution timers */
 #define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x))*1000000U))
 #define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x))
@@ -1955,4 +1997,4 @@ static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev)
 /* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */
 #define KBASE_AS_INACTIVE_MAX_LOOPS     100000000

-#endif				/* _KBASE_DEFS_H_ */
+#endif /* _KBASE_DEFS_H_ */
--- a/drivers/gpu/arm/bifrost/mali_kbase_dma_fence.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_dma_fence.c
@@ -161,7 +161,7 @@ kbase_dma_fence_cancel_atom(struct kbase_jd_atom *katom)
 	if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) {
 		/* Wait was cancelled - zap the atom */
 		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
-		if (jd_done_nolock(katom, NULL))
+		if (jd_done_nolock(katom, true))
 			kbase_js_sched_all(katom->kctx->kbdev);
 	}
 }
@@ -196,7 +196,7 @@ kbase_dma_fence_work(struct work_struct *pwork)
 	 * dependency. Run jd_done_nolock() on the katom if it is completed.
 	 */
 	if (unlikely(katom->status == KBASE_JD_ATOM_STATE_COMPLETED))
-		jd_done_nolock(katom, NULL);
+		jd_done_nolock(katom, true);
 	else
 		kbase_jd_dep_clear_locked(katom);

--- a/drivers/gpu/arm/bifrost/mali_kbase_dma_fence.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_dma_fence.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
 *
- * (C) COPYRIGHT 2010-2016, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016, 2020-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
@@ -132,6 +132,8 @@ void kbase_dma_fence_term(struct kbase_context *kctx);
 /**
 * kbase_dma_fence_init() - Initialize Mali dma-fence context
 * @kctx: kbase context to initialize
+ *
+ * Return: 0 on success, error code otherwise.
 */
 int kbase_dma_fence_init(struct kbase_context *kctx);

--- a/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.c
@@ -239,7 +239,7 @@ int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores)
 	return failed ? -EFAULT : 0;
 }

-static ssize_t show_dummy_job_wa_info(struct device * const dev,
+static ssize_t dummy_job_wa_info_show(struct device * const dev,
 		struct device_attribute * const attr, char * const buf)
 {
 	struct kbase_device *const kbdev = dev_get_drvdata(dev);
@@ -254,7 +254,7 @@ static ssize_t show_dummy_job_wa_info(struct device * const dev,
 	return err;
 }

-static DEVICE_ATTR(dummy_job_wa_info, 0444, show_dummy_job_wa_info, NULL);
+static DEVICE_ATTR_RO(dummy_job_wa_info);

 static bool wa_blob_load_needed(struct kbase_device *kbdev)
 {
--- a/Show More
+++ b/Show More