MALI: rockchip: upgrade bifrost DDK to g11p0-01eac0, from g10p0-01eac0

Change-Id: I0642ec37f151711b8b19c3206488d3301422971d
Signed-off-by: Zhen Chen <chenzhen@rock-chips.com>
This commit is contained in:
Zhen Chen
2022-02-16 17:26:04 +08:00
committed by Tao Huang
parent 451a9752b5
commit 643f7908a0
201 changed files with 10229 additions and 6812 deletions

View File

@@ -83,7 +83,7 @@
static dev_t dma_buf_lock_dev;
static struct cdev dma_buf_lock_cdev;
static struct class *dma_buf_lock_class;
static char dma_buf_lock_dev_name[] = "dma_buf_lock";
static const char dma_buf_lock_dev_name[] = "dma_buf_lock";
#if defined(HAVE_UNLOCKED_IOCTL) || defined(HAVE_COMPAT_IOCTL) || ((KERNEL_VERSION(5, 9, 0) <= LINUX_VERSION_CODE))
static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
@@ -91,8 +91,7 @@ static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned lon
static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
#endif
static struct file_operations dma_buf_lock_fops =
{
static const struct file_operations dma_buf_lock_fops = {
.owner = THIS_MODULE,
#if defined(HAVE_UNLOCKED_IOCTL) || ((KERNEL_VERSION(5, 9, 0) <= LINUX_VERSION_CODE))
.unlocked_ioctl = dma_buf_lock_ioctl,
@@ -105,8 +104,7 @@ static struct file_operations dma_buf_lock_fops =
#endif
};
typedef struct dma_buf_lock_resource
{
struct dma_buf_lock_resource {
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
struct fence fence;
#else
@@ -123,7 +121,7 @@ typedef struct dma_buf_lock_resource
struct list_head link;
struct work_struct work;
int count;
} dma_buf_lock_resource;
};
/**
* struct dma_buf_lock_fence_cb - Callback data struct for dma-fence
@@ -199,7 +197,7 @@ const struct dma_fence_ops dma_buf_lock_fence_ops = {
};
static void
dma_buf_lock_fence_init(dma_buf_lock_resource *resource)
dma_buf_lock_fence_init(struct dma_buf_lock_resource *resource)
{
dma_fence_init(&resource->fence,
&dma_buf_lock_fence_ops,
@@ -209,7 +207,7 @@ dma_buf_lock_fence_init(dma_buf_lock_resource *resource)
}
static void
dma_buf_lock_fence_free_callbacks(dma_buf_lock_resource *resource)
dma_buf_lock_fence_free_callbacks(struct dma_buf_lock_resource *resource)
{
struct dma_buf_lock_fence_cb *cb, *tmp;
@@ -228,8 +226,8 @@ dma_buf_lock_fence_free_callbacks(dma_buf_lock_resource *resource)
static void
dma_buf_lock_fence_work(struct work_struct *pwork)
{
dma_buf_lock_resource *resource =
container_of(pwork, dma_buf_lock_resource, work);
struct dma_buf_lock_resource *resource =
container_of(pwork, struct dma_buf_lock_resource, work);
WARN_ON(atomic_read(&resource->fence_dep_count));
WARN_ON(!atomic_read(&resource->locked));
@@ -250,10 +248,10 @@ dma_buf_lock_fence_callback(struct dma_fence *fence, struct dma_fence_cb *cb)
struct dma_buf_lock_fence_cb *dma_buf_lock_cb = container_of(cb,
struct dma_buf_lock_fence_cb,
fence_cb);
dma_buf_lock_resource *resource = dma_buf_lock_cb->res;
struct dma_buf_lock_resource *resource = dma_buf_lock_cb->res;
#if DMA_BUF_LOCK_DEBUG
printk(KERN_DEBUG "dma_buf_lock_fence_callback\n");
pr_debug("%s\n", __func__);
#endif
/* Callback function will be invoked in atomic context. */
@@ -270,12 +268,12 @@ dma_buf_lock_fence_callback(struct dma_fence *fence, struct dma_fence_cb *cb)
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
static int
dma_buf_lock_fence_add_callback(dma_buf_lock_resource *resource,
dma_buf_lock_fence_add_callback(struct dma_buf_lock_resource *resource,
struct fence *fence,
fence_func_t callback)
#else
static int
dma_buf_lock_fence_add_callback(dma_buf_lock_resource *resource,
dma_buf_lock_fence_add_callback(struct dma_buf_lock_resource *resource,
struct dma_fence *fence,
dma_fence_func_t callback)
#endif
@@ -324,12 +322,12 @@ dma_buf_lock_fence_add_callback(dma_buf_lock_resource *resource,
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
static int
dma_buf_lock_add_fence_reservation_callback(dma_buf_lock_resource *resource,
dma_buf_lock_add_fence_reservation_callback(struct dma_buf_lock_resource *resource,
struct reservation_object *resv,
bool exclusive)
#else
static int
dma_buf_lock_add_fence_reservation_callback(dma_buf_lock_resource *resource,
dma_buf_lock_add_fence_reservation_callback(struct dma_buf_lock_resource *resource,
struct dma_resv *resv,
bool exclusive)
#endif
@@ -398,7 +396,7 @@ out:
}
static void
dma_buf_lock_release_fence_reservation(dma_buf_lock_resource *resource,
dma_buf_lock_release_fence_reservation(struct dma_buf_lock_resource *resource,
struct ww_acquire_ctx *ctx)
{
unsigned int r;
@@ -409,7 +407,7 @@ dma_buf_lock_release_fence_reservation(dma_buf_lock_resource *resource,
}
static int
dma_buf_lock_acquire_fence_reservation(dma_buf_lock_resource *resource,
dma_buf_lock_acquire_fence_reservation(struct dma_buf_lock_resource *resource,
struct ww_acquire_ctx *ctx)
{
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
@@ -451,7 +449,7 @@ error:
/* If we deadlock try with lock_slow and retry */
if (err == -EDEADLK) {
#if DMA_BUF_LOCK_DEBUG
printk(KERN_DEBUG "deadlock at dma_buf fd %i\n",
pr_debug("deadlock at dma_buf fd %i\n",
resource->list_of_dma_buf_fds[content_resv_idx]);
#endif
content_resv = resource->dma_bufs[content_resv_idx]->resv;
@@ -466,14 +464,14 @@ error:
static int dma_buf_lock_handle_release(struct inode *inode, struct file *file)
{
dma_buf_lock_resource *resource;
struct dma_buf_lock_resource *resource;
if (!is_dma_buf_lock_file(file))
return -EINVAL;
resource = file->private_data;
#if DMA_BUF_LOCK_DEBUG
printk("dma_buf_lock_handle_release\n");
pr_debug("%s\n", __func__);
#endif
mutex_lock(&dma_buf_lock_mutex);
kref_put(&resource->refcount, dma_buf_lock_dounlock);
@@ -482,10 +480,11 @@ static int dma_buf_lock_handle_release(struct inode *inode, struct file *file)
return 0;
}
static unsigned int dma_buf_lock_handle_poll(struct file *file,
struct poll_table_struct *wait)
static unsigned int dma_buf_lock_handle_poll(
struct file *file,
struct poll_table_struct *wait)
{
dma_buf_lock_resource *resource;
struct dma_buf_lock_resource *resource;
unsigned int ret = 0;
if (!is_dma_buf_lock_file(file))
@@ -493,21 +492,19 @@ static unsigned int dma_buf_lock_handle_poll(struct file *file,
resource = file->private_data;
#if DMA_BUF_LOCK_DEBUG
printk("dma_buf_lock_handle_poll\n");
pr_debug("%s\n", __func__);
#endif
if (atomic_read(&resource->locked) == 1) {
/* Resources have been locked */
ret = POLLIN | POLLRDNORM;
if (resource->exclusive)
ret |= POLLOUT | POLLWRNORM;
}
else
{
} else {
if (!poll_does_not_wait(wait))
poll_wait(file, &resource->wait, wait);
}
#if DMA_BUF_LOCK_DEBUG
printk("dma_buf_lock_handle_poll : return %i\n", ret);
pr_debug("%s : return %i\n", __func__, ret);
#endif
return ret;
}
@@ -526,17 +523,15 @@ static inline int is_dma_buf_lock_file(struct file *file)
return file->f_op == &dma_buf_lock_handle_fops;
}
/*
* Start requested lock.
*
* Allocates required memory, copies dma_buf_fd list from userspace,
* acquires related reservation objects, and starts the lock.
*/
static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
static int dma_buf_lock_dolock(struct dma_buf_lock_k_request *request)
{
dma_buf_lock_resource *resource;
struct dma_buf_lock_resource *resource;
struct ww_acquire_ctx ww_ctx;
int size;
int fd;
@@ -553,7 +548,7 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
request->exclusive != DMA_BUF_LOCK_EXCLUSIVE)
return -EINVAL;
resource = kzalloc(sizeof(dma_buf_lock_resource), GFP_KERNEL);
resource = kzalloc(sizeof(*resource), GFP_KERNEL);
if (resource == NULL)
return -ENOMEM;
@@ -594,7 +589,7 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
}
#if DMA_BUF_LOCK_DEBUG
for (i = 0; i < request->count; i++)
printk("dma_buf %i = %X\n", i, resource->list_of_dma_buf_fds[i]);
pr_debug("dma_buf %i = %X\n", i, resource->list_of_dma_buf_fds[i]);
#endif
/* Initialize the fence associated with dma_buf_lock resource */
@@ -611,13 +606,11 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
mutex_unlock(&dma_buf_lock_mutex);
for (i = 0; i < request->count; i++)
{
for (i = 0; i < request->count; i++) {
/* Convert fd into dma_buf structure */
resource->dma_bufs[i] = dma_buf_get(resource->list_of_dma_buf_fds[i]);
if (IS_ERR_VALUE(PTR_ERR(resource->dma_bufs[i])))
{
if (IS_ERR_VALUE(PTR_ERR(resource->dma_bufs[i]))) {
mutex_lock(&dma_buf_lock_mutex);
kref_put(&resource->refcount, dma_buf_lock_dounlock);
mutex_unlock(&dma_buf_lock_mutex);
@@ -632,8 +625,8 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
return -EINVAL;
}
#if DMA_BUF_LOCK_DEBUG
printk(KERN_DEBUG "dma_buf_lock_dolock : dma_buf_fd %i dma_buf %p dma_fence reservation %p\n",
resource->list_of_dma_buf_fds[i], resource->dma_bufs[i], resource->dma_bufs[i]->resv);
pr_debug("%s : dma_buf_fd %i dma_buf %p dma_fence reservation %p\n",
__func__, resource->list_of_dma_buf_fds[i], resource->dma_bufs[i], resource->dma_bufs[i]->resv);
#endif
}
@@ -643,9 +636,8 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
/* Create file descriptor associated with lock request */
fd = anon_inode_getfd("dma_buf_lock", &dma_buf_lock_handle_fops,
(void *)resource, 0);
if (fd < 0)
{
(void *)resource, 0);
if (fd < 0) {
mutex_lock(&dma_buf_lock_mutex);
kref_put(&resource->refcount, dma_buf_lock_dounlock);
kref_put(&resource->refcount, dma_buf_lock_dounlock);
@@ -659,7 +651,7 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
ret = dma_buf_lock_acquire_fence_reservation(resource, &ww_ctx);
if (ret) {
#if DMA_BUF_LOCK_DEBUG
printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d locking reservations.\n", ret);
pr_debug("%s : Error %d locking reservations.\n", __func__, ret);
#endif
put_unused_fd(fd);
mutex_lock(&dma_buf_lock_mutex);
@@ -698,7 +690,7 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
#endif
if (ret) {
#if DMA_BUF_LOCK_DEBUG
printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d reserving space for shared fence.\n", ret);
pr_debug("%s : Error %d reserving space for shared fence.\n", __func__, ret);
#endif
break;
}
@@ -708,7 +700,7 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
false);
if (ret) {
#if DMA_BUF_LOCK_DEBUG
printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d adding reservation to callback.\n", ret);
pr_debug("%s : Error %d adding reservation to callback.\n", __func__, ret);
#endif
break;
}
@@ -724,7 +716,7 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
true);
if (ret) {
#if DMA_BUF_LOCK_DEBUG
printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d adding reservation to callback.\n", ret);
pr_debug("%s : Error %d adding reservation to callback.\n", __func__, ret);
#endif
break;
}
@@ -748,8 +740,7 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
dma_buf_lock_fence_work(&resource->work);
}
if (IS_ERR_VALUE((unsigned long)ret))
{
if (IS_ERR_VALUE((unsigned long)ret)) {
put_unused_fd(fd);
mutex_lock(&dma_buf_lock_mutex);
@@ -761,7 +752,7 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
}
#if DMA_BUF_LOCK_DEBUG
printk("dma_buf_lock_dolock : complete\n");
pr_debug("%s : complete\n", __func__);
#endif
mutex_lock(&dma_buf_lock_mutex);
kref_put(&resource->refcount, dma_buf_lock_dounlock);
@@ -773,7 +764,7 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
static void dma_buf_lock_dounlock(struct kref *ref)
{
int i;
dma_buf_lock_resource *resource = container_of(ref, dma_buf_lock_resource, refcount);
struct dma_buf_lock_resource *resource = container_of(ref, struct dma_buf_lock_resource, refcount);
atomic_set(&resource->locked, 0);
@@ -784,8 +775,7 @@ static void dma_buf_lock_dounlock(struct kref *ref)
list_del(&resource->link);
for (i = 0; i < resource->count; i++)
{
for (i = 0; i < resource->count; i++) {
if (resource->dma_bufs[i])
dma_buf_put(resource->dma_bufs[i]);
}
@@ -799,7 +789,7 @@ static int __init dma_buf_lock_init(void)
{
int err;
#if DMA_BUF_LOCK_DEBUG
printk("dma_buf_lock_init\n");
pr_debug("%s\n", __func__);
#endif
err = alloc_chrdev_region(&dma_buf_lock_dev, 0, 1, dma_buf_lock_dev_name);
@@ -812,10 +802,8 @@ static int __init dma_buf_lock_init(void)
dma_buf_lock_class = class_create(THIS_MODULE, dma_buf_lock_dev_name);
if (IS_ERR(dma_buf_lock_class))
err = PTR_ERR(dma_buf_lock_class);
else
{
struct device *mdev;
mdev = device_create(
else {
struct device *mdev = device_create(
dma_buf_lock_class, NULL, dma_buf_lock_dev,
NULL, "%s", dma_buf_lock_dev_name);
if (!IS_ERR(mdev))
@@ -830,7 +818,7 @@ static int __init dma_buf_lock_init(void)
unregister_chrdev_region(dma_buf_lock_dev, 1);
}
#if DMA_BUF_LOCK_DEBUG
printk("dma_buf_lock_init failed\n");
pr_debug("%s failed\n", __func__);
#endif
return err;
}
@@ -838,25 +826,24 @@ static int __init dma_buf_lock_init(void)
static void __exit dma_buf_lock_exit(void)
{
#if DMA_BUF_LOCK_DEBUG
printk("dma_buf_lock_exit\n");
pr_debug("%s\n", __func__);
#endif
/* Unlock all outstanding references */
while (1)
{
while (1) {
struct dma_buf_lock_resource *resource;
mutex_lock(&dma_buf_lock_mutex);
if (list_empty(&dma_buf_lock_resource_list))
{
if (list_empty(&dma_buf_lock_resource_list)) {
mutex_unlock(&dma_buf_lock_mutex);
break;
}
else
{
dma_buf_lock_resource *resource = list_entry(dma_buf_lock_resource_list.next,
dma_buf_lock_resource, link);
kref_put(&resource->refcount, dma_buf_lock_dounlock);
mutex_unlock(&dma_buf_lock_mutex);
}
resource = list_entry(dma_buf_lock_resource_list.next,
struct dma_buf_lock_resource, link);
kref_put(&resource->refcount, dma_buf_lock_dounlock);
mutex_unlock(&dma_buf_lock_mutex);
}
device_destroy(dma_buf_lock_class, dma_buf_lock_dev);
@@ -874,7 +861,7 @@ static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned lon
static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
#endif
{
dma_buf_lock_k_request request;
struct dma_buf_lock_k_request request;
int size = _IOC_SIZE(cmd);
if (_IOC_TYPE(cmd) != DMA_BUF_LOCK_IOC_MAGIC)
@@ -882,17 +869,16 @@ static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned i
if ((_IOC_NR(cmd) < DMA_BUF_LOCK_IOC_MINNR) || (_IOC_NR(cmd) > DMA_BUF_LOCK_IOC_MAXNR))
return -ENOTTY;
switch (cmd)
{
case DMA_BUF_LOCK_FUNC_LOCK_ASYNC:
if (size != sizeof(dma_buf_lock_k_request))
return -ENOTTY;
if (copy_from_user(&request, (void __user *)arg, size))
return -EFAULT;
switch (cmd) {
case DMA_BUF_LOCK_FUNC_LOCK_ASYNC:
if (size != sizeof(request))
return -ENOTTY;
if (copy_from_user(&request, (void __user *)arg, size))
return -EFAULT;
#if DMA_BUF_LOCK_DEBUG
printk("DMA_BUF_LOCK_FUNC_LOCK_ASYNC - %i\n", request.count);
pr_debug("DMA_BUF_LOCK_FUNC_LOCK_ASYNC - %i\n", request.count);
#endif
return dma_buf_lock_dolock(&request);
return dma_buf_lock_dolock(&request);
}
return -ENOTTY;

View File

@@ -22,23 +22,21 @@
#ifndef _DMA_BUF_LOCK_H
#define _DMA_BUF_LOCK_H
typedef enum dma_buf_lock_exclusive
{
enum dma_buf_lock_exclusive {
DMA_BUF_LOCK_NONEXCLUSIVE = 0,
DMA_BUF_LOCK_EXCLUSIVE = -1
} dma_buf_lock_exclusive;
};
typedef struct dma_buf_lock_k_request
{
struct dma_buf_lock_k_request {
int count;
int *list_of_dma_buf_fds;
int timeout;
dma_buf_lock_exclusive exclusive;
} dma_buf_lock_k_request;
enum dma_buf_lock_exclusive exclusive;
};
#define DMA_BUF_LOCK_IOC_MAGIC '~'
#define DMA_BUF_LOCK_FUNC_LOCK_ASYNC _IOW(DMA_BUF_LOCK_IOC_MAGIC, 11, dma_buf_lock_k_request)
#define DMA_BUF_LOCK_FUNC_LOCK_ASYNC _IOW(DMA_BUF_LOCK_IOC_MAGIC, 11, struct dma_buf_lock_k_request)
#define DMA_BUF_LOCK_IOC_MINNR 11
#define DMA_BUF_LOCK_IOC_MAXNR 11

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -81,6 +81,7 @@ static int dma_buf_te_attach(struct dma_buf *buf, struct dma_buf_attachment *att
#endif
{
struct dma_buf_te_alloc *alloc;
alloc = buf->priv;
if (alloc->fail_attach)
@@ -95,6 +96,12 @@ static int dma_buf_te_attach(struct dma_buf *buf, struct dma_buf_attachment *att
return 0;
}
/**
* dma_buf_te_detach - The detach callback function to release &attachment
*
* @buf: buffer for the &attachment
* @attachment: attachment data to be released
*/
static void dma_buf_te_detach(struct dma_buf *buf, struct dma_buf_attachment *attachment)
{
struct dma_buf_te_alloc *alloc = buf->priv;
@@ -199,6 +206,7 @@ static void dma_buf_te_release(struct dma_buf *buf)
{
size_t i;
struct dma_buf_te_alloc *alloc;
alloc = buf->priv;
/* no need for locking */
@@ -240,6 +248,7 @@ static int dma_buf_te_sync(struct dma_buf *dmabuf,
list_for_each_entry(attachment, &dmabuf->attachments, node) {
struct dma_buf_te_attachment *pa = attachment->priv;
struct sg_table *sg = pa->sg;
if (!sg) {
dev_dbg(te_device.this_device, "no mapping for device %s\n", dev_name(attachment->dev));
continue;
@@ -291,6 +300,7 @@ static void dma_buf_te_mmap_open(struct vm_area_struct *vma)
{
struct dma_buf *dma_buf;
struct dma_buf_te_alloc *alloc;
dma_buf = vma->vm_private_data;
alloc = dma_buf->priv;
@@ -303,6 +313,7 @@ static void dma_buf_te_mmap_close(struct vm_area_struct *vma)
{
struct dma_buf *dma_buf;
struct dma_buf_te_alloc *alloc;
dma_buf = vma->vm_private_data;
alloc = dma_buf->priv;
@@ -344,7 +355,7 @@ static vm_fault_t dma_buf_te_mmap_fault(struct vm_fault *vmf)
return 0;
}
struct vm_operations_struct dma_buf_te_vm_ops = {
static const struct vm_operations_struct dma_buf_te_vm_ops = {
.open = dma_buf_te_mmap_open,
.close = dma_buf_te_mmap_close,
.fault = dma_buf_te_mmap_fault
@@ -353,6 +364,7 @@ struct vm_operations_struct dma_buf_te_vm_ops = {
static int dma_buf_te_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
{
struct dma_buf_te_alloc *alloc;
alloc = dmabuf->priv;
if (alloc->fail_mmap)
@@ -398,7 +410,6 @@ static void dma_buf_te_kunmap(struct dma_buf *buf,
return;
kunmap(alloc->pages[page_num]);
return;
}
static struct dma_buf_ops dma_buf_te_ops = {
@@ -798,13 +809,14 @@ static const struct file_operations dma_buf_te_fops = {
static int __init dma_buf_te_init(void)
{
int res;
te_device.minor = MISC_DYNAMIC_MINOR;
te_device.name = "dma_buf_te";
te_device.fops = &dma_buf_te_fops;
res = misc_register(&te_device);
if (res) {
printk(KERN_WARNING"Misc device registration failed of 'dma_buf_te'\n");
pr_warn("Misc device registration failed of 'dma_buf_te'\n");
return res;
}
te_device.this_device->coherent_dma_mask = DMA_BIT_MASK(32);

View File

@@ -367,7 +367,7 @@ static vm_fault_t example_mgm_vmf_insert_pfn_prot(
dev_dbg(data->dev,
"%s(mgm_dev=%p, group_id=%d, vma=%p, addr=0x%lx, pfn=0x%lx, prot=0x%llx)\n",
__func__, (void *)mgm_dev, group_id, (void *)vma, addr, pfn,
(unsigned long long int) pgprot_val(prot));
(unsigned long long) pgprot_val(prot));
if (WARN_ON(group_id < 0) ||
WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS))

View File

@@ -107,20 +107,20 @@ static void small_granularity_alloc(struct simple_pma_device *const epma_dev,
alloc_pages_bitfield_size = ALLOC_PAGES_BITFIELD_ARR_SIZE(epma_dev->rmem_size);
WARN(alloc_bitfield_idx >= alloc_pages_bitfield_size,
"%s: idx>bf_size: %zu %zu", __FUNCTION__,
"%s: idx>bf_size: %zu %zu", __func__,
alloc_bitfield_idx, alloc_pages_bitfield_size);
WARN((start_bit + (1 << order)) > PAGES_PER_BITFIELD_ELEM,
"%s: start=%zu order=%zu ppbe=%zu",
__FUNCTION__, start_bit, order, PAGES_PER_BITFIELD_ELEM);
__func__, start_bit, order, PAGES_PER_BITFIELD_ELEM);
bitfield = &epma_dev->allocated_pages_bitfield_arr[alloc_bitfield_idx];
for (i = 0; i < (1 << order); i++) {
/* Check the pages represented by this bit are actually free */
WARN (*bitfield & (1ULL << (start_bit + i)),
WARN(*bitfield & (1ULL << (start_bit + i)),
"in %s: page not free: %zu %zu %.16llx %zu\n",
__FUNCTION__, i, order, *bitfield, alloc_pages_bitfield_size);
__func__, i, order, *bitfield, alloc_pages_bitfield_size);
/* Mark the pages as now allocated */
*bitfield |= (1ULL << (start_bit + i));
@@ -172,7 +172,7 @@ static void large_granularity_alloc(struct simple_pma_device *const epma_dev,
*/
WARN((start_alloc_bitfield_idx + order) >= ALLOC_PAGES_BITFIELD_ARR_SIZE(epma_dev->rmem_size),
"%s: start=%zu order=%zu ms=%zu",
__FUNCTION__, start_alloc_bitfield_idx, order, epma_dev->rmem_size);
__func__, start_alloc_bitfield_idx, order, epma_dev->rmem_size);
for (i = 0; i < num_bitfield_elements_needed; i++) {
u64 *bitfield = &epma_dev->allocated_pages_bitfield_arr[start_alloc_bitfield_idx + i];
@@ -180,7 +180,7 @@ static void large_granularity_alloc(struct simple_pma_device *const epma_dev,
/* We expect all pages that relate to this bitfield element to be free */
WARN((*bitfield != 0),
"in %s: pages not free: i=%zu o=%zu bf=%.16llx\n",
__FUNCTION__, i, order, *bitfield);
__func__, i, order, *bitfield);
/* Mark all the pages for this element as not free */
*bitfield = ~0ULL;
@@ -318,9 +318,7 @@ static struct protected_memory_allocation *simple_pma_alloc_page(
spin_unlock(&epma_dev->rmem_lock);
return pma;
}
}
else
{
} else {
count = 0;
}
}
@@ -402,11 +400,10 @@ static void simple_pma_free_page(
/* Clear the bits for the pages we're now freeing */
*bitfield &= ~(((1ULL << num_pages_in_allocation) - 1) << bitfield_start_bit);
}
else {
} else {
WARN(page_num % PAGES_PER_BITFIELD_ELEM,
"%s: Expecting allocs of order >= %d to be %zu-page aligned\n",
__FUNCTION__, ORDER_OF_PAGES_PER_BITFIELD_ELEM, PAGES_PER_BITFIELD_ELEM);
__func__, ORDER_OF_PAGES_PER_BITFIELD_ELEM, PAGES_PER_BITFIELD_ELEM);
for (i = 0; i < num_bitfield_elems_used_by_alloc; i++) {
bitfield = &epma_dev->allocated_pages_bitfield_arr[bitfield_idx + i];
@@ -414,7 +411,7 @@ static void simple_pma_free_page(
/* We expect all bits to be set (all pages allocated) */
WARN((*bitfield != ~0),
"%s: alloc being freed is not fully allocated: of=%zu np=%zu bf=%.16llx\n",
__FUNCTION__, offset, num_pages_in_allocation, *bitfield);
__func__, offset, num_pages_in_allocation, *bitfield);
/*
* Now clear all the bits in the bitfield element to mark all the pages

View File

@@ -71,7 +71,7 @@ endif
#
# Driver version string which is returned to userspace via an ioctl
MALI_RELEASE_NAME ?= '"g10p0-01eac0"'
MALI_RELEASE_NAME ?= '"g11p0-01eac0"'
# Set up defaults if not defined by build system
ifeq ($(CONFIG_MALI_BIFROST_DEBUG), y)
MALI_UNIT_TEST = 1
@@ -164,6 +164,7 @@ bifrost_kbase-y := \
mali_kbase_hwcnt_gpu_narrow.o \
mali_kbase_hwcnt_types.o \
mali_kbase_hwcnt_virtualizer.o \
mali_kbase_hwcnt_watchdog_if_timer.o \
mali_kbase_softjobs.o \
mali_kbase_hw.o \
mali_kbase_debug.o \
@@ -201,12 +202,12 @@ bifrost_kbase-$(CONFIG_SYNC_FILE) += \
ifeq ($(CONFIG_MALI_CSF_SUPPORT),y)
bifrost_kbase-y += \
mali_kbase_hwcnt_backend_csf.o \
mali_kbase_hwcnt_watchdog_if_timer.o \
mali_kbase_hwcnt_backend_csf_if_fw.o
else
bifrost_kbase-y += \
mali_kbase_jm.o \
mali_kbase_hwcnt_backend_jm.o \
mali_kbase_hwcnt_backend_jm_watchdog.o \
mali_kbase_dummy_job_wa.o \
mali_kbase_debug_job_fault.o \
mali_kbase_event.o \

View File

@@ -47,6 +47,14 @@ config MALI_REAL_HW
default y
default n if NO_MALI
config MALI_PLATFORM_DT_PIN_RST
bool "Enable Juno GPU Pin reset"
depends on MALI_BIFROST
default n
default y if BUSLOG
help
Enables support for GPUs pin reset on Juno platforms.
config MALI_CSF_SUPPORT
bool "Enable Mali CSF based GPU support"
depends on MALI_BIFROST

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,7 @@
*/
/**
* Mali structures define to support arbitration feature
* DOC: Mali structures define to support arbitration feature
*/
#ifndef _MALI_KBASE_ARBITER_DEFS_H_

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -20,14 +20,14 @@
*/
/**
* Defines the Mali arbiter interface
* DOC: Defines the Mali arbiter interface
*/
#ifndef _MALI_KBASE_ARBITER_INTERFACE_H_
#define _MALI_KBASE_ARBITER_INTERFACE_H_
/**
* Mali arbiter interface version
* DOC: Mali arbiter interface version
*
* This specifies the current version of the configuration interface. Whenever
* the arbiter interface changes, so that integration effort is required, the
@@ -44,7 +44,7 @@
#define MALI_KBASE_ARBITER_INTERFACE_VERSION 5
/**
* NO_FREQ is used in case platform doesn't support reporting frequency
* DOC: NO_FREQ is used in case platform doesn't support reporting frequency
*/
#define NO_FREQ 0
@@ -53,14 +53,6 @@ struct arbiter_if_dev;
/**
* struct arbiter_if_arb_vm_ops - Interface to communicate messages to VM
*
* This struct contains callbacks used to deliver messages
* from the arbiter to the corresponding VM.
*
* Note that calls into these callbacks may have synchronous calls back into
* the arbiter arbiter_if_vm_arb_ops callbacks below.
* For example vm_arb_gpu_stopped() may be called as a side effect of
* arb_vm_gpu_stop() being called here.
*
* @arb_vm_gpu_stop: Callback to ask VM to stop using GPU.
* dev: The arbif kernel module device.
*
@@ -94,6 +86,13 @@ struct arbiter_if_dev;
* freq: GPU clock frequency value reported from arbiter
*
* Informs KBase that the GPU clock frequency has been updated.
*
* This struct contains callbacks used to deliver messages
* from the arbiter to the corresponding VM.
* Note that calls into these callbacks may have synchronous calls back into
* the arbiter arbiter_if_vm_arb_ops callbacks below.
* For example vm_arb_gpu_stopped() may be called as a side effect of
* arb_vm_gpu_stop() being called here.
*/
struct arbiter_if_arb_vm_ops {
void (*arb_vm_gpu_stop)(struct device *dev);
@@ -107,12 +106,6 @@ struct arbiter_if_arb_vm_ops {
/**
* struct arbiter_if_vm_arb_ops - Interface to communicate messages to arbiter
*
* This struct contains callbacks used to request operations
* from the VM to the arbiter
*
* Note that we must not make any synchronous calls back in to the VM
* (via arbiter_if_arb_vm_ops above) in the context of these callbacks.
*
* @vm_arb_register_dev: Callback to register VM device driver callbacks.
* arbif_dev: The arbiter interface to register
* with for device callbacks
@@ -142,6 +135,11 @@ struct arbiter_if_arb_vm_ops {
* using the GPU
* arbif_dev: The arbiter interface device to notify.
* gpu_required: The GPU is still needed to do more work.
*
* This struct contains callbacks used to request operations
* from the VM to the arbiter.
* Note that we must not make any synchronous calls back in to the VM
* (via arbiter_if_arb_vm_ops above) in the context of these callbacks.
*/
struct arbiter_if_vm_arb_ops {
int (*vm_arb_register_dev)(struct arbiter_if_dev *arbif_dev,

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,7 @@
*/
/**
* Mali arbiter power manager state machine and APIs
* DOC: Mali arbiter power manager state machine and APIs
*/
#include <mali_kbase.h>
@@ -394,6 +394,8 @@ void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev)
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Install interrupts and set the interrupt_install flag to true.
*
* Return: 0 if success, or a Linux error code
*/
int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev)
{
@@ -619,18 +621,6 @@ static void kbase_arbiter_pm_vm_gpu_stop(struct kbase_device *kbdev)
case KBASE_VM_STATE_SUSPEND_PENDING:
/* Suspend finishes with a stop so nothing else to do */
break;
case KBASE_VM_STATE_INITIALIZING:
case KBASE_VM_STATE_STOPPED_GPU_REQUESTED:
/*
* Case stop() is received when in a GPU REQUESTED state, it
* means that the granted() was missed so the GPU needs to be
* requested again.
*/
dev_dbg(kbdev->dev,
"GPU stop while already stopped with GPU requested");
kbase_arbif_gpu_stopped(kbdev, true);
start_request_timer(kbdev);
break;
default:
dev_warn(kbdev->dev, "GPU_STOP when not expected - state %s\n",
kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state));
@@ -668,19 +658,8 @@ static void kbase_gpu_lost(struct kbase_device *kbdev)
break;
case KBASE_VM_STATE_SUSPENDED:
case KBASE_VM_STATE_STOPPED:
dev_dbg(kbdev->dev, "GPU lost while already stopped");
break;
case KBASE_VM_STATE_INITIALIZING:
case KBASE_VM_STATE_STOPPED_GPU_REQUESTED:
/*
* Case lost() is received when in a GPU REQUESTED state, it
* means that the granted() and stop() were missed so the GPU
* needs to be requested again. Very unlikely to happen.
*/
dev_dbg(kbdev->dev,
"GPU lost while already stopped with GPU requested");
kbase_arbif_gpu_request(kbdev);
start_request_timer(kbdev);
dev_dbg(kbdev->dev, "GPU lost while already stopped");
break;
case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT:
dev_dbg(kbdev->dev, "GPU lost while waiting to suspend");
@@ -947,6 +926,8 @@ static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev)
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Checks if the virtual machine holds VM state lock.
*
* Return: true if GPU is assigned, else false.
*/
static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld(
struct kbase_device *kbdev)
@@ -1067,14 +1048,14 @@ void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq,
}
/**
* enumerate_arb_gpu_clk() - Enumerate a GPU clock on the given index
* get_arb_gpu_clk() - Enumerate a GPU clock on the given index
* @kbdev: kbase_device pointer
* @index: GPU clock index
*
* Returns pointer to structure holding GPU clock frequency data reported from
* Return: Pointer to structure holding GPU clock frequency data reported from
* arbiter, only index 0 is valid.
*/
static void *enumerate_arb_gpu_clk(struct kbase_device *kbdev,
static void *get_arb_gpu_clk(struct kbase_device *kbdev,
unsigned int index)
{
if (index == 0)
@@ -1084,10 +1065,10 @@ static void *enumerate_arb_gpu_clk(struct kbase_device *kbdev,
/**
* get_arb_gpu_clk_rate() - Get the current rate of GPU clock frequency value
* @kbdev: kbase_device pointer
* @index: GPU clock index
* @kbdev: kbase_device pointer
* @gpu_clk_handle: Handle unique to the enumerated GPU clock
*
* Returns the GPU clock frequency value saved when gpu is granted from arbiter
* Return: The GPU clock frequency value saved when gpu is granted from arbiter
*/
static unsigned long get_arb_gpu_clk_rate(struct kbase_device *kbdev,
void *gpu_clk_handle)
@@ -1109,10 +1090,10 @@ static unsigned long get_arb_gpu_clk_rate(struct kbase_device *kbdev,
* @gpu_clk_handle: Handle unique to the enumerated GPU clock
* @nb: notifier block containing the callback function pointer
*
* Returns 0 on success, negative error code otherwise.
*
* This function registers a callback function that is invoked whenever the
* frequency of the clock corresponding to @gpu_clk_handle changes.
*
* Return: 0 on success, negative error code otherwise.
*/
static int arb_gpu_clk_notifier_register(struct kbase_device *kbdev,
void *gpu_clk_handle, struct notifier_block *nb)
@@ -1154,7 +1135,7 @@ static void arb_gpu_clk_notifier_unregister(struct kbase_device *kbdev,
struct kbase_clk_rate_trace_op_conf arb_clk_rate_trace_ops = {
.get_gpu_clk_rate = get_arb_gpu_clk_rate,
.enumerate_gpu_clk = enumerate_arb_gpu_clk,
.enumerate_gpu_clk = get_arb_gpu_clk,
.gpu_clk_notifier_register = arb_gpu_clk_notifier_register,
.gpu_clk_notifier_unregister = arb_gpu_clk_notifier_unregister
};

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,7 @@
*/
/**
* Mali arbiter power manager state machine and APIs
* DOC: Mali arbiter power manager state machine and APIs
*/
#ifndef _MALI_KBASE_ARBITER_PM_H_
@@ -101,6 +101,8 @@ void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev);
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Install interrupts and set the interrupt_install flag to true.
*
* Return: 0 if success, or a Linux error code
*/
int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev);

View File

@@ -33,7 +33,7 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev,
struct kbase_gpuprops_regdump *regdump)
{
int i;
struct kbase_gpuprops_regdump registers;
struct kbase_gpuprops_regdump registers = { 0 };
/* Fill regdump with the content of the relevant registers */
registers.gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID));

View File

@@ -421,12 +421,12 @@ int kbase_instr_backend_init(struct kbase_device *kbdev)
#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS
/* Use the build time option for the override default. */
#if defined(CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY)
kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_SET_SECONDARY;
kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_PHYSICAL_SET_SECONDARY;
#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY)
kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_SET_TERTIARY;
kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_PHYSICAL_SET_TERTIARY;
#else
/* Default to primary */
kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_SET_PRIMARY;
kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_PHYSICAL_SET_PRIMARY;
#endif
#endif
return 0;
@@ -446,8 +446,8 @@ void kbase_instr_backend_debugfs_init(struct kbase_device *kbdev)
*
* Valid inputs are the values accepted bythe SET_SELECT bits of the
* PRFCNT_CONFIG register as defined in the architecture specification.
*/
debugfs_create_u8("hwcnt_set_select", S_IRUGO | S_IWUSR,
*/
debugfs_create_u8("hwcnt_set_select", 0644,
kbdev->mali_debugfs_directory,
(u8 *)&kbdev->hwcnt.backend.override_counter_set);
}

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -48,6 +48,7 @@ struct rb_entry {
/**
* SLOT_RB_TAG_KCTX() - a function-like macro for converting a pointer to a
* u64 for serving as tagged value.
* @kctx: Pointer to kbase context.
*/
#define SLOT_RB_TAG_KCTX(kctx) (u64)((uintptr_t)(kctx))
/**

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -425,6 +425,8 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
JOB_SLOT_REG(i, JS_STATUS));
if (completion_code == BASE_JD_EVENT_STOPPED) {
u64 job_head;
KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(
kbdev, NULL,
i, 0, TL_JS_EVENT_SOFT_STOP);
@@ -441,6 +443,21 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
((u64)kbase_reg_read(kbdev,
JOB_SLOT_REG(i, JS_TAIL_HI))
<< 32);
job_head = (u64)kbase_reg_read(kbdev,
JOB_SLOT_REG(i, JS_HEAD_LO)) |
((u64)kbase_reg_read(kbdev,
JOB_SLOT_REG(i, JS_HEAD_HI))
<< 32);
/* For a soft-stopped job chain js_tail should
* same as the js_head, but if not then the
* job chain was incorrectly marked as
* soft-stopped. In such case we should not
* be resuming the job chain from js_tail and
* report the completion_code as UNKNOWN.
*/
if (job_tail != job_head)
completion_code = BASE_JD_EVENT_UNKNOWN;
} else if (completion_code ==
BASE_JD_EVENT_NOT_STARTED) {
/* PRLAM-10673 can cause a TERMINATED
@@ -922,33 +939,12 @@ void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
JS_COMMAND_SOFT_STOP | sw_flags);
}
/**
* kbase_job_slot_softstop - Soft-stop the specified job slot
* @kbdev: The kbase device
* @js: The job slot to soft-stop
* @target_katom: The job that should be soft-stopped (or NULL for any job)
* Context:
* The job slot lock must be held when calling this function.
* The job slot must not already be in the process of being soft-stopped.
*
* Where possible any job in the next register is evicted before the soft-stop.
*/
void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
struct kbase_jd_atom *target_katom)
{
kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u);
}
/**
* kbase_job_slot_hardstop - Hard-stop the specified job slot
* @kctx: The kbase context that contains the job(s) that should
* be hard-stopped
* @js: The job slot to hard-stop
* @target_katom: The job that should be hard-stopped (or NULL for all
* jobs from the context)
* Context:
* The job slot lock must be held when calling this function.
*/
void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
struct kbase_jd_atom *target_katom)
{
@@ -961,26 +957,6 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
CSTD_UNUSED(stopped);
}
/**
* kbase_job_check_enter_disjoint - potentiall enter disjoint mode
* @kbdev: kbase device
* @action: the event which has occurred
* @core_reqs: core requirements of the atom
* @target_katom: the atom which is being affected
*
* For a certain soft-stop action, work out whether to enter disjoint
* state.
*
* This does not register multiple disjoint events if the atom has already
* started a disjoint period
*
* @core_reqs can be supplied as 0 if the atom had not started on the hardware
* (and so a 'real' soft/hard-stop was not required, but it still interrupted
* flow, perhaps on another context)
*
* kbase_job_check_leave_disjoint() should be used to end the disjoint
* state when the soft/hard-stop action is complete
*/
void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom)
{
@@ -1002,14 +978,6 @@ void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
kbase_disjoint_state_up(kbdev);
}
/**
* kbase_job_check_enter_disjoint - potentially leave disjoint state
* @kbdev: kbase device
* @target_katom: atom which is finishing
*
* Work out whether to leave disjoint state when finishing an atom that was
* originated by kbase_job_check_enter_disjoint().
*/
void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
struct kbase_jd_atom *target_katom)
{
@@ -1340,8 +1308,7 @@ static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev)
* This function soft-stops all the slots to ensure that as many jobs as
* possible are saved.
*
* Return:
* The function returns a boolean which should be interpreted as follows:
* Return: boolean which should be interpreted as follows:
* true - Prepared for reset, kbase_reset_gpu_locked should be called.
* false - Another thread is performing a reset, kbase_reset_gpu should
* not be called.
@@ -1518,9 +1485,9 @@ static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev,
#ifdef CONFIG_MALI_BIFROST_DEBUG
dev_dbg(kbdev->dev,
"Limiting affinity due to BASE_JD_REQ_LIMITED_CORE_MASK from 0x%lx to 0x%lx (mask is 0x%lx)\n",
(unsigned long int)affinity,
(unsigned long int)result,
(unsigned long int)limited_core_mask);
(unsigned long)affinity,
(unsigned long)result,
(unsigned long)limited_core_mask);
#else
CSTD_UNUSED(kbdev);
#endif

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -37,14 +37,23 @@
#include <backend/gpu/mali_kbase_jm_internal.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
/* Return whether the specified ringbuffer is empty. HW access lock must be
* held
/**
* SLOT_RB_EMPTY - Return whether the specified ringbuffer is empty.
*
* @rb: ring buffer
*
* Note: HW access lock must be held
*/
#define SLOT_RB_EMPTY(rb) (rb->write_idx == rb->read_idx)
/* Return number of atoms currently in the specified ringbuffer. HW access lock
* must be held
/**
* SLOT_RB_ENTRIES - Return number of atoms currently in the specified ringbuffer.
*
* @rb: ring buffer
*
* Note: HW access lock must be held
*/
#define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx)
#define SLOT_RB_ENTRIES(rb) ((int)(s8)(rb->write_idx - rb->read_idx))
static void kbase_gpu_release_atom(struct kbase_device *kbdev,
struct kbase_jd_atom *katom,
@@ -304,10 +313,10 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev,
[katom->slot_nr]);
/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
fallthrough;
case KBASE_ATOM_GPU_RB_READY:
/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
fallthrough;
case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
break;
@@ -367,13 +376,13 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev,
}
/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
fallthrough;
case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV:
/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
fallthrough;
case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
fallthrough;
case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
break;
}
@@ -1813,7 +1822,7 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev)
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n");
dev_info(kbdev->dev, "%s:\n", __func__);
for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
int idx;

View File

@@ -62,7 +62,7 @@ void kbase_backend_timer_suspend(struct kbase_device *kbdev);
* scheduling timer
* @kbdev: Device pointer
*
* This function should be called on resume. Note that is is not guaranteed to
* This function should be called on resume. Note that is not guaranteed to
* re-start the timer, only evalute whether it should be re-started.
*
* Caller must hold runpool_mutex.

View File

@@ -121,9 +121,9 @@ int kbase_set_mmu_quirks(struct kbase_device *kbdev)
if (kbdev->system_coherency == COHERENCY_ACE) {
/* Allow memory configuration disparity to be ignored,
* we optimize the use of shared memory and thus we
* expect some disparity in the memory configuration.
*/
* we optimize the use of shared memory and thus we
* expect some disparity in the memory configuration.
*/
kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY;
}

View File

@@ -1470,9 +1470,8 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
pr_debug("JS_IRQ_MASK being read %x", *value);
}
#else /* !MALI_USE_CSF */
else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) {
/* ignore JOB_IRQ_MASK as it is handled by CSFFW */
}
else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK))
; /* ignore JOB_IRQ_MASK as it is handled by CSFFW */
#endif /* !MALI_USE_CSF */
else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) {
*value = (dummy->reset_completed_mask << 8) |

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2014-2015, 2017-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -143,7 +143,6 @@ void midgard_model_destroy(void *h);
u8 midgard_model_write_reg(void *h, u32 addr, u32 value);
u8 midgard_model_read_reg(void *h, u32 addr,
u32 * const value);
void gpu_generate_error(void);
void midgard_set_error(int job_slot);
int job_atom_inject_error(struct kbase_error_params *params);
int gpu_model_control(void *h,

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2014-2015, 2018-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -39,7 +39,11 @@ unsigned int error_probability = 50; /* to be set between 0 and 100 */
/* probability to have multiple error give that there is an error */
unsigned int multiple_error_probability = 50;
void gpu_generate_error(void)
#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM
/**
* gpu_generate_error - Generate GPU error
*/
static void gpu_generate_error(void)
{
unsigned int errors_num = 0;
@@ -94,6 +98,7 @@ void gpu_generate_error(void)
}
}
}
#endif
int job_atom_inject_error(struct kbase_error_params *params)
{

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2010, 2012-2015, 2017-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2010, 2012-2015, 2017-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -135,8 +135,12 @@ void gpu_device_raise_irq(void *model,
default:
dev_warn(kbdev->dev, "Unknown IRQ");
kmem_cache_free(kbdev->irq_slab, data);
data = NULL;
break;
}
queue_work(kbdev->irq_workq, &data->work);
if (data != NULL)
queue_work(kbdev->irq_workq, &data->work);
}
void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value)
@@ -248,6 +252,11 @@ int kbase_gpu_device_create(struct kbase_device *kbdev)
return 0;
}
/**
* kbase_gpu_device_destroy - Destroy GPU device
*
* @kbdev: kbase device
*/
void kbase_gpu_device_destroy(struct kbase_device *kbdev)
{
midgard_model_destroy(kbdev->model);

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2010-2015, 2018-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2010-2015, 2018-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -41,6 +41,11 @@ static void always_on_init(struct kbase_device *kbdev)
CSTD_UNUSED(kbdev);
}
/**
* always_on_term - Term callback function for always-on power policy
*
* @kbdev: kbase device
*/
static void always_on_term(struct kbase_device *kbdev)
{
CSTD_UNUSED(kbdev);

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -101,9 +101,8 @@ int kbase_pm_runtime_init(struct kbase_device *kbdev)
void kbase_pm_runtime_term(struct kbase_device *kbdev)
{
if (kbdev->pm.callback_power_runtime_term) {
if (kbdev->pm.callback_power_runtime_term)
kbdev->pm.callback_power_runtime_term(kbdev);
}
}
void kbase_pm_register_access_enable(struct kbase_device *kbdev)
@@ -202,6 +201,13 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
kbase_pm_hwcnt_disable_worker);
kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
kbdev->pm.backend.gpu_sleep_supported =
kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_GPU_SLEEP) &&
!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TURSEHW_1997) &&
kbdev->pm.backend.callback_power_runtime_gpu_active &&
kbdev->pm.backend.callback_power_runtime_gpu_idle;
#endif
if (IS_ENABLED(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED)) {
kbdev->pm.backend.l2_always_on = false;
@@ -288,7 +294,7 @@ static void pm_handle_power_off(struct kbase_device *kbdev)
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
if (kbdev->pm.backend.gpu_wakeup_override ) {
if (kbdev->pm.backend.gpu_wakeup_override) {
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
return;
}
@@ -362,11 +368,6 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data)
kbase_pm_lock(kbdev);
#ifdef CONFIG_MALI_ARBITER_SUPPORT
if (kbase_pm_is_gpu_lost(kbdev))
backend->poweron_required = false;
#endif
pm_handle_power_off(kbdev);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@@ -683,6 +684,13 @@ void kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev)
}
KBASE_EXPORT_TEST_API(kbase_pm_wait_for_poweroff_work_complete);
/**
* is_gpu_powered_down - Check whether GPU is powered down
*
* @kbdev: kbase device
*
* Return: true if GPU is powered down, false otherwise
*/
static bool is_gpu_powered_down(struct kbase_device *kbdev)
{
bool ret;
@@ -882,7 +890,7 @@ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
lockdep_assert_held(&kbdev->pm.lock);
if (kbase_dummy_job_wa_enabled(kbdev)) {
dev_warn(kbdev->dev, "Change of core mask not supported for slot 0 as dummy job WA is enabled");
dev_warn_once(kbdev->dev, "Change of core mask not supported for slot 0 as dummy job WA is enabled");
new_core_mask_js0 = kbdev->pm.debug_core_mask[0];
}

View File

@@ -55,6 +55,9 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
{
struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend;
unsigned long flags;
#if MALI_USE_CSF
u64 old_core_mask = 0;
#endif
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@@ -65,6 +68,8 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
core_mask, kbdev->pm.debug_core_mask);
goto unlock;
}
old_core_mask = pm_backend->ca_cores_enabled;
#else
if (!(core_mask & kbdev->pm.debug_core_mask_all)) {
dev_err(kbdev->dev, "OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n",
@@ -73,20 +78,53 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
}
if (kbase_dummy_job_wa_enabled(kbdev)) {
dev_err(kbdev->dev, "Dynamic core scaling not supported as dummy job WA is enabled");
dev_err_once(kbdev->dev, "Dynamic core scaling not supported as dummy job WA is enabled");
goto unlock;
}
#endif /* MALI_USE_CSF */
pm_backend->ca_cores_enabled = core_mask;
kbase_pm_update_state(kbdev);
unlock:
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
#if MALI_USE_CSF
/* Check if old_core_mask contained the undesired cores and wait
* for those cores to get powered down
*/
if ((core_mask & old_core_mask) != old_core_mask) {
bool can_wait;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
can_wait = kbdev->pm.backend.gpu_ready && kbase_pm_is_mcu_desired(kbdev);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
/* This check is ideally not required, the wait function can
* deal with the GPU power down. But it has been added to
* address the scenario where down-scaling request comes from
* the platform specific code soon after the GPU power down
* and at the time same time application thread tries to
* power up the GPU (on the flush of GPU queue).
* The platform specific @ref callback_power_on that gets
* invoked on power up does not return until down-scaling
* request is complete. The check mitigates the race caused by
* the problem in platform specific code.
*/
if (likely(can_wait)) {
if (kbase_pm_wait_for_desired_state(kbdev)) {
dev_warn(kbdev->dev,
"Wait for update of core_mask from %llx to %llx failed",
old_core_mask, core_mask);
}
}
}
#endif
dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX\n",
pm_backend->ca_cores_enabled);
return;
unlock:
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
KBASE_EXPORT_TEST_API(kbase_devfreq_set_core_mask);
#endif

View File

@@ -101,6 +101,8 @@ static u64 kbase_pm_get_state(
enum kbase_pm_core_type core_type,
enum kbasep_pm_action action);
static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev);
#if MALI_USE_CSF
bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev)
{
@@ -655,6 +657,35 @@ static void kbase_pm_enable_mcu_db_notification(struct kbase_device *kbdev)
}
#endif
/**
* kbasep_pm_toggle_power_interrupt - Toggles the IRQ mask for power interrupts
* from the firmware
*
* @kbdev: Pointer to the device
* @enable: boolean indicating to enable interrupts or not
*
* The POWER_CHANGED_ALL and POWER_CHANGED_SINGLE interrupts can be disabled
* after L2 has been turned on when FW is controlling the power for the shader
* cores. Correspondingly, the interrupts can be re-enabled after the MCU has
* been disabled before the power down of L2.
*/
static void kbasep_pm_toggle_power_interrupt(struct kbase_device *kbdev, bool enable)
{
u32 irq_mask;
lockdep_assert_held(&kbdev->hwaccess_lock);
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
if (enable)
irq_mask |= POWER_CHANGED_ALL | POWER_CHANGED_SINGLE;
else
irq_mask &= ~(POWER_CHANGED_ALL | POWER_CHANGED_SINGLE);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask);
}
static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
{
struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
@@ -698,6 +729,8 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
kbase_pm_ca_get_core_mask(kbdev);
kbase_csf_firmware_global_reinit(kbdev,
backend->shaders_desired_mask);
if (!kbdev->csf.firmware_hctl_core_pwr)
kbasep_pm_toggle_power_interrupt(kbdev, false);
backend->mcu_state =
KBASE_MCU_ON_GLB_REINIT_PEND;
}
@@ -906,6 +939,8 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
case KBASE_MCU_PEND_OFF:
/* wait synchronously for the MCU to get disabled */
kbase_csf_firmware_disable_mcu_wait(kbdev);
if (!kbdev->csf.firmware_hctl_core_pwr)
kbasep_pm_toggle_power_interrupt(kbdev, true);
backend->mcu_state = KBASE_MCU_OFF;
break;
#ifdef KBASE_PM_RUNTIME
@@ -924,6 +959,11 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
backend->mcu_state = KBASE_MCU_IN_SLEEP;
kbase_pm_enable_db_mirror_interrupt(kbdev);
kbase_csf_scheduler_reval_idleness_post_sleep(kbdev);
/* Enable PM interrupt, after MCU has been put
* to sleep, for the power down of L2.
*/
if (!kbdev->csf.firmware_hctl_core_pwr)
kbasep_pm_toggle_power_interrupt(kbdev, true);
}
break;
@@ -934,6 +974,11 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
kbdev, kbase_backend_get_cycle_cnt(kbdev));
kbase_pm_enable_mcu_db_notification(kbdev);
kbase_pm_disable_db_mirror_interrupt(kbdev);
/* Disable PM interrupt after L2 has been
* powered up for the wakeup of MCU.
*/
if (!kbdev->csf.firmware_hctl_core_pwr)
kbasep_pm_toggle_power_interrupt(kbdev, false);
backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE;
}
break;
@@ -1017,6 +1062,18 @@ static void kbase_pm_l2_clear_backend_slot_submit_kctx(struct kbase_device *kbde
}
#endif
static bool can_power_down_l2(struct kbase_device *kbdev)
{
#if MALI_USE_CSF
/* Due to the HW issue GPU2019-3878, need to prevent L2 power off
* whilst MMU command is in progress.
*/
return !kbdev->mmu_hw_operation_in_progress;
#else
return true;
#endif
}
static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
{
struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
@@ -1258,9 +1315,8 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
}
backend->hwcnt_desired = false;
if (!backend->hwcnt_disabled) {
if (!backend->hwcnt_disabled)
kbase_pm_trigger_hwcnt_disable(kbdev);
}
#endif
if (backend->hwcnt_disabled) {
@@ -1297,27 +1353,31 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
break;
case KBASE_L2_POWER_DOWN:
if (!backend->l2_always_on)
/* Powering off the L2 will also power off the
* tiler.
*/
kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2,
l2_present,
ACTION_PWROFF);
else
/* If L2 cache is powered then we must flush it
* before we power off the GPU. Normally this
* would have been handled when the L2 was
* powered off.
*/
kbase_gpu_start_cache_clean_nolock(
kbdev, GPU_COMMAND_CACHE_CLN_INV_L2);
if (kbase_pm_is_l2_desired(kbdev))
backend->l2_state = KBASE_L2_PEND_ON;
else if (can_power_down_l2(kbdev)) {
if (!backend->l2_always_on)
/* Powering off the L2 will also power off the
* tiler.
*/
kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2,
l2_present,
ACTION_PWROFF);
else
/* If L2 cache is powered then we must flush it
* before we power off the GPU. Normally this
* would have been handled when the L2 was
* powered off.
*/
kbase_gpu_start_cache_clean_nolock(
kbdev, GPU_COMMAND_CACHE_CLN_INV_L2);
#if !MALI_USE_CSF
KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, 0u);
KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, 0u);
#else
KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL, 0u);
KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL, 0u);
#endif
backend->l2_state = KBASE_L2_PEND_OFF;
backend->l2_state = KBASE_L2_PEND_OFF;
}
break;
case KBASE_L2_PEND_OFF:
@@ -1803,12 +1863,7 @@ static bool kbase_pm_is_in_desired_state_nolock(struct kbase_device *kbdev)
lockdep_assert_held(&kbdev->hwaccess_lock);
if (kbase_pm_is_l2_desired(kbdev) &&
kbdev->pm.backend.l2_state != KBASE_L2_ON)
in_desired_state = false;
else if (!kbase_pm_is_l2_desired(kbdev) &&
kbdev->pm.backend.l2_state != KBASE_L2_OFF)
in_desired_state = false;
in_desired_state = kbase_pm_l2_is_in_desired_state(kbdev);
#if !MALI_USE_CSF
if (kbdev->pm.backend.shaders_desired &&
@@ -1818,13 +1873,7 @@ static bool kbase_pm_is_in_desired_state_nolock(struct kbase_device *kbdev)
kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF)
in_desired_state = false;
#else
if (kbase_pm_is_mcu_desired(kbdev) &&
kbdev->pm.backend.mcu_state != KBASE_MCU_ON)
in_desired_state = false;
else if (!kbase_pm_is_mcu_desired(kbdev) &&
(kbdev->pm.backend.mcu_state != KBASE_MCU_OFF) &&
(kbdev->pm.backend.mcu_state != KBASE_MCU_IN_SLEEP))
in_desired_state = false;
in_desired_state = kbase_pm_mcu_is_in_desired_state(kbdev);
#endif
return in_desired_state;
@@ -2077,11 +2126,13 @@ void kbase_pm_reset_complete(struct kbase_device *kbdev)
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
/* Timeout for kbase_pm_wait_for_desired_state when wait_event_killable has
* aborted due to a fatal signal. If the time spent waiting has exceeded this
* threshold then there is most likely a hardware issue.
#if !MALI_USE_CSF
/* Timeout in milliseconds for GPU Power Management to reach the desired
* Shader and L2 state. If the time spent waiting has exceeded this threshold
* then there is most likely a hardware issue.
*/
#define PM_TIMEOUT_MS (5000) /* 5s */
#endif
static void kbase_pm_timed_out(struct kbase_device *kbdev)
{
@@ -2156,7 +2207,7 @@ int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev)
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
#if MALI_USE_CSF
timeout = kbase_csf_timeout_in_jiffies(PM_TIMEOUT_MS);
timeout = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT));
#else
timeout = msecs_to_jiffies(PM_TIMEOUT_MS);
#endif
@@ -2188,7 +2239,7 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev)
unsigned long flags;
long remaining;
#if MALI_USE_CSF
long timeout = kbase_csf_timeout_in_jiffies(PM_TIMEOUT_MS);
long timeout = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT));
#else
long timeout = msecs_to_jiffies(PM_TIMEOUT_MS);
#endif
@@ -2285,6 +2336,7 @@ static void update_user_reg_page_mapping(struct kbase_device *kbdev)
{
lockdep_assert_held(&kbdev->pm.lock);
mutex_lock(&kbdev->csf.reg_lock);
if (kbdev->csf.mali_file_inode) {
/* This would zap the pte corresponding to the mapping of User
* register page for all the Kbase contexts.
@@ -2293,6 +2345,7 @@ static void update_user_reg_page_mapping(struct kbase_device *kbdev)
BASEP_MEM_CSF_USER_REG_PAGE_HANDLE,
PAGE_SIZE, 1);
}
mutex_unlock(&kbdev->csf.reg_lock);
}
#endif
@@ -2358,6 +2411,7 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
update_user_reg_page_mapping(kbdev);
#endif
if (reset_required) {
/* GPU state was lost, reset GPU to ensure it is in a
* consistent state
@@ -2659,8 +2713,8 @@ static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
{
struct device_node *np = kbdev->dev->of_node;
const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
GPU_ID_VERSION_PRODUCT_ID_SHIFT;
const u32 prod_id =
(gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT;
int error = 0;
kbdev->hw_quirks_gpu = 0;

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -301,6 +301,8 @@ void kbase_pm_update_state(struct kbase_device *kbdev);
* kbase_pm_state_machine_init - Initialize the state machines, primarily the
* shader poweroff timer
* @kbdev: Device pointer
*
* Return: 0 on success, error code on error
*/
int kbase_pm_state_machine_init(struct kbase_device *kbdev);
@@ -453,6 +455,8 @@ void kbase_pm_wait_for_gpu_power_down(struct kbase_device *kbdev);
* Setup the power management callbacks and initialize/enable the runtime-pm
* for the Mali GPU platform device, using the callback function. This must be
* called before the kbase_pm_register_access_enable() function.
*
* Return: 0 on success, error code on error
*/
int kbase_pm_runtime_init(struct kbase_device *kbdev);
@@ -810,8 +814,49 @@ static inline bool kbase_pm_no_mcu_core_pwroff(struct kbase_device *kbdev)
return kbdev->pm.backend.csf_pm_sched_flags &
CSF_DYNAMIC_PM_CORE_KEEP_ON;
}
/**
* kbase_pm_mcu_is_in_desired_state - Check if MCU is in stable ON/OFF state.
*
* @kbdev: Device pointer
*
* Return: true if MCU is in stable ON/OFF state.
*/
static inline bool kbase_pm_mcu_is_in_desired_state(struct kbase_device *kbdev)
{
bool in_desired_state = true;
if (kbase_pm_is_mcu_desired(kbdev) && kbdev->pm.backend.mcu_state != KBASE_MCU_ON)
in_desired_state = false;
else if (!kbase_pm_is_mcu_desired(kbdev) &&
(kbdev->pm.backend.mcu_state != KBASE_MCU_OFF) &&
(kbdev->pm.backend.mcu_state != KBASE_MCU_IN_SLEEP))
in_desired_state = false;
return in_desired_state;
}
#endif
/**
* kbase_pm_l2_is_in_desired_state - Check if L2 is in stable ON/OFF state.
*
* @kbdev: Device pointer
*
* Return: true if L2 is in stable ON/OFF state.
*/
static inline bool kbase_pm_l2_is_in_desired_state(struct kbase_device *kbdev)
{
bool in_desired_state = true;
if (kbase_pm_is_l2_desired(kbdev) && kbdev->pm.backend.l2_state != KBASE_L2_ON)
in_desired_state = false;
else if (!kbase_pm_is_l2_desired(kbdev) && kbdev->pm.backend.l2_state != KBASE_L2_OFF)
in_desired_state = false;
return in_desired_state;
}
/**
* kbase_pm_lock - Lock all necessary mutexes to perform PM actions
*

View File

@@ -491,8 +491,7 @@ static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev)
BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)
? katom->device_nr : 0;
if (!WARN_ON(device_nr >= 2))
kbdev->pm.backend.metrics.
active_cl_ctx[device_nr] = 1;
kbdev->pm.backend.metrics.active_cl_ctx[device_nr] = 1;
} else {
kbdev->pm.backend.metrics.active_gl_ctx[js] = 1;
trace_sysgraph(SGR_ACTIVE, 0, js);

View File

@@ -180,9 +180,8 @@ void kbase_pm_update_dynamic_cores_onoff(struct kbase_device *kbdev)
shaders_desired = kbdev->pm.backend.pm_current_policy->shaders_needed(kbdev);
if (shaders_desired && kbase_pm_is_l2_desired(kbdev)) {
if (shaders_desired && kbase_pm_is_l2_desired(kbdev))
kbase_pm_update_state(kbdev);
}
#endif
}
@@ -249,9 +248,8 @@ KBASE_EXPORT_TEST_API(kbase_pm_get_policy);
#if MALI_USE_CSF
static int policy_change_wait_for_L2_off(struct kbase_device *kbdev)
{
#define WAIT_DURATION_MS (3000)
long remaining;
long timeout = kbase_csf_timeout_in_jiffies(WAIT_DURATION_MS);
long timeout = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT));
int err = 0;
/* Wait for L2 becoming off, by which the MCU is also implicitly off

View File

@@ -113,39 +113,60 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev,
*/
u64 timeout, nr_cycles = 0;
/* Default value to mean 'no cap' */
u64 timeout_cap = U64_MAX;
u64 freq_khz = kbdev->lowest_gpu_freq_khz;
/* Only for debug messages, safe default in case it's mis-maintained */
const char *selector_str = "(unknown)";
WARN_ON(!freq_khz);
switch (selector) {
/* use Firmware timeout if invalid selection */
case KBASE_TIMEOUT_SELECTOR_COUNT:
default:
#if !MALI_USE_CSF
WARN(1, "Invalid timeout selector used! Using default value");
timeout = JM_DEFAULT_TIMEOUT_CYCLES;
CSTD_UNUSED(nr_cycles);
nr_cycles = JM_DEFAULT_TIMEOUT_CYCLES;
break;
#else
/* Use Firmware timeout if invalid selection */
WARN(1,
"Invalid timeout selector used! Using CSF Firmware timeout");
fallthrough;
case CSF_FIRMWARE_TIMEOUT:
selector_str = "CSF_FIRMWARE_TIMEOUT";
nr_cycles = CSF_FIRMWARE_TIMEOUT_CYCLES;
timeout = div_u64(nr_cycles, freq_khz);
/* cap CSF FW timeout to FIRMWARE_PING_INTERVAL_MS
* if calculated timeout exceeds it. This should be adapted to a
* direct timeout comparison once the FIRMWARE_PING_INTERVAL_MS
* option is added to this timeout function. A compile-time check
* such as BUILD_BUG_ON can also be done once the firmware ping
* interval in cycles becomes available as a macro.
/* Setup a cap on CSF FW timeout to FIRMWARE_PING_INTERVAL_MS,
* if calculated timeout exceeds it. This should be adapted to
* a direct timeout comparison once the
* FIRMWARE_PING_INTERVAL_MS option is added to this timeout
* function. A compile-time check such as BUILD_BUG_ON can also
* be done once the firmware ping interval in cycles becomes
* available as a macro.
*/
if (timeout > FIRMWARE_PING_INTERVAL_MS) {
dev_dbg(kbdev->dev, "Capped CSF_FIRMWARE_TIMEOUT %llu to %d",
timeout, FIRMWARE_PING_INTERVAL_MS);
timeout = FIRMWARE_PING_INTERVAL_MS;
}
#endif
timeout_cap = FIRMWARE_PING_INTERVAL_MS;
break;
case CSF_PM_TIMEOUT:
selector_str = "CSF_PM_TIMEOUT";
nr_cycles = CSF_PM_TIMEOUT_CYCLES;
break;
case CSF_GPU_RESET_TIMEOUT:
selector_str = "CSF_GPU_RESET_TIMEOUT";
nr_cycles = CSF_GPU_RESET_TIMEOUT_CYCLES;
break;
#endif
}
timeout = div_u64(nr_cycles, freq_khz);
if (timeout > timeout_cap) {
dev_dbg(kbdev->dev, "Capped %s %llu to %llu", selector_str,
(unsigned long long)timeout, (unsigned long long)timeout_cap);
timeout = timeout_cap;
}
if (WARN(timeout > UINT_MAX,
"Capping excessive timeout %llums for %s at freq %llukHz to UINT_MAX ms",
(unsigned long long)timeout, selector_str, (unsigned long long)freq_khz))
timeout = UINT_MAX;
return (unsigned int)timeout;
}

View File

@@ -34,6 +34,9 @@ bob_defaults {
"CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}",
],
},
mali_platform_dt_pin_rst: {
kbuild_options: ["CONFIG_MALI_PLATFORM_DT_PIN_RST=y"],
},
gpu_has_csf: {
kbuild_options: ["CONFIG_MALI_CSF_SUPPORT=y"],
},

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -110,6 +110,11 @@ static void kbase_context_flush_jobs(struct kbase_context *kctx)
flush_workqueue(kctx->jctx.job_done_wq);
}
/**
* kbase_context_free - Free kcontext at its destruction
*
* @kctx: kcontext to be freed
*/
static void kbase_context_free(struct kbase_context *kctx)
{
kbase_timeline_post_kbase_context_destroy(kctx);

View File

@@ -152,6 +152,7 @@ int kbase_context_common_init(struct kbase_context *kctx)
init_waitqueue_head(&kctx->event_queue);
atomic_set(&kctx->event_count, 0);
#if !MALI_USE_CSF
atomic_set(&kctx->event_closed, false);
#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS)
@@ -159,6 +160,11 @@ int kbase_context_common_init(struct kbase_context *kctx)
#endif
#endif
#if MALI_USE_CSF
atomic64_set(&kctx->num_fixable_allocs, 0);
atomic64_set(&kctx->num_fixed_allocs, 0);
#endif
bitmap_copy(kctx->cookies, &cookies_mask, BITS_PER_LONG);
kctx->id = atomic_add_return(1, &(kctx->kbdev->ctx_num)) - 1;

View File

@@ -52,7 +52,7 @@
/*
* Maximum number of loops polling the GPU before we assume the GPU has hung.
*/
#define IPA_INACTIVE_MAX_LOOPS ((unsigned int)8000000)
#define IPA_INACTIVE_MAX_LOOPS (8000000U)
/*
* Number of bits used to configure a performance counter in SELECT registers.
@@ -347,9 +347,8 @@ void kbase_ipa_control_init(struct kbase_device *kbdev)
spin_lock_init(&ipa_ctrl->lock);
ipa_ctrl->num_active_sessions = 0;
for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) {
for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++)
ipa_ctrl->sessions[i].active = false;
}
listener_data = kmalloc(sizeof(struct kbase_ipa_control_listener_data),
GFP_KERNEL);
@@ -514,8 +513,10 @@ int kbase_ipa_control_register(
struct kbase_ipa_control_session *session = NULL;
unsigned long flags;
if (WARN_ON(kbdev == NULL) || WARN_ON(perf_counters == NULL) ||
WARN_ON(client == NULL) ||
if (WARN_ON(unlikely(kbdev == NULL)))
return -ENODEV;
if (WARN_ON(perf_counters == NULL) || WARN_ON(client == NULL) ||
WARN_ON(num_counters > KBASE_IPA_CONTROL_MAX_COUNTERS)) {
dev_err(kbdev->dev, "%s: wrong input arguments", __func__);
return -EINVAL;
@@ -697,7 +698,10 @@ int kbase_ipa_control_unregister(struct kbase_device *kbdev, const void *client)
unsigned long flags;
bool new_config = false, valid_session = false;
if (WARN_ON(kbdev == NULL) || WARN_ON(client == NULL)) {
if (WARN_ON(unlikely(kbdev == NULL)))
return -ENODEV;
if (WARN_ON(client == NULL)) {
dev_err(kbdev->dev, "%s: wrong input arguments", __func__);
return -EINVAL;
}
@@ -779,8 +783,10 @@ int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client,
unsigned long flags;
bool gpu_ready;
if (WARN_ON(kbdev == NULL) || WARN_ON(client == NULL) ||
WARN_ON(values == NULL)) {
if (WARN_ON(unlikely(kbdev == NULL)))
return -ENODEV;
if (WARN_ON(client == NULL) || WARN_ON(values == NULL)) {
dev_err(kbdev->dev, "%s: wrong input arguments", __func__);
return -EINVAL;
}

View File

@@ -27,7 +27,7 @@
#include <linux/export.h>
#include <linux/priority_control_manager.h>
#include <linux/shmem_fs.h>
#include <uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h>
#include <csf/mali_kbase_csf_registers.h>
#include "mali_kbase_csf_tiler_heap.h"
#include <mmu/mali_kbase_mmu.h>
#include "mali_kbase_csf_timeout.h"
@@ -561,6 +561,10 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
queue->sync_ptr = 0;
queue->sync_value = 0;
#if IS_ENABLED(CONFIG_DEBUG_FS)
queue->saved_cmd_ptr = 0;
#endif
queue->sb_status = 0;
queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED;
@@ -572,6 +576,8 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
INIT_WORK(&queue->fatal_event_work, fatal_event_worker);
list_add(&queue->link, &kctx->csf.queue_list);
queue->extract_ofs = 0;
region->flags |= KBASE_REG_NO_USER_FREE;
region->user_data = queue;
@@ -621,13 +627,13 @@ int kbase_csf_queue_register_ex(struct kbase_context *kctx,
return -EINVAL;
/* Validate the cs_trace configuration parameters */
if (reg->ex_buffer_size &&
((reg->ex_event_size > max_size) ||
(reg->ex_buffer_size & (reg->ex_buffer_size - 1)) ||
(reg->ex_buffer_size < min_buf_size)))
return -EINVAL;
if (reg->ex_buffer_size &&
((reg->ex_event_size > max_size) ||
(reg->ex_buffer_size & (reg->ex_buffer_size - 1)) ||
(reg->ex_buffer_size < min_buf_size)))
return -EINVAL;
return csf_queue_register_internal(kctx, NULL, reg);
return csf_queue_register_internal(kctx, NULL, reg);
}
static void unbind_queue(struct kbase_context *kctx,
@@ -1195,7 +1201,7 @@ static int create_protected_suspend_buffer(struct kbase_device *const kbdev,
}
s_buf->pma = kbase_csf_protected_memory_alloc(kbdev, phys,
nr_pages);
nr_pages, true);
if (s_buf->pma == NULL) {
err = -ENOMEM;
goto pma_alloc_failed;
@@ -1229,7 +1235,7 @@ mmu_insert_failed:
mutex_unlock(&kbdev->csf.reg_lock);
add_va_region_failed:
kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages);
kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages, true);
pma_alloc_failed:
kfree(phys);
phy_alloc_failed:
@@ -1479,7 +1485,7 @@ static void term_protected_suspend_buffer(struct kbase_device *const kbdev,
kbase_remove_va_region(kbdev, s_buf->reg);
mutex_unlock(&kbdev->csf.reg_lock);
kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages);
kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages, true);
s_buf->pma = NULL;
kfree(s_buf->reg);
s_buf->reg = NULL;
@@ -1925,7 +1931,7 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
* This function will handle the OoM event request from the firmware for the
* CS. It will retrieve the address of heap context and heap's
* statistics (like number of render passes in-flight) from the CS's kernel
* kernel output page and pass them to the tiler heap function to allocate a
* output page and pass them to the tiler heap function to allocate a
* new chunk.
* It will also update the CS's kernel input page with the address
* of a new chunk that was allocated.
@@ -2521,8 +2527,24 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
}
}
if (protm_pend)
queue_work(group->kctx->csf.wq, &group->protm_event_work);
if (protm_pend) {
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
u32 current_protm_pending_seq =
scheduler->tick_protm_pending_seq;
if (current_protm_pending_seq > group->scan_seq_num) {
scheduler->tick_protm_pending_seq = group->scan_seq_num;
queue_work(group->kctx->csf.wq, &group->protm_event_work);
}
if (test_bit(group->csg_nr, scheduler->csg_slots_idle_mask)) {
clear_bit(group->csg_nr,
scheduler->csg_slots_idle_mask);
dev_dbg(kbdev->dev,
"Group-%d on slot %d de-idled by protm request",
group->handle, group->csg_nr);
}
}
}
/**
@@ -2593,6 +2615,10 @@ static void process_csg_interrupts(struct kbase_device *const kbdev,
CSG_REQ, ack, CSG_REQ_SYNC_UPDATE_MASK);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SYNC_UPDATE_INTERRUPT, group, req ^ ack);
/* SYNC_UPDATE events shall invalidate GPU idle event */
atomic_set(&kbdev->csf.scheduler.gpu_no_longer_idle, true);
kbase_csf_event_signal_cpu_only(group->kctx);
}
@@ -2609,15 +2635,25 @@ static void process_csg_interrupts(struct kbase_device *const kbdev,
dev_dbg(kbdev->dev, "Idle notification received for Group %u on slot %d\n",
group->handle, csg_nr);
/* Check if the scheduling tick can be advanced */
if (kbase_csf_scheduler_all_csgs_idle(kbdev)) {
if (!scheduler->gpu_idle_fw_timer_enabled)
kbase_csf_scheduler_advance_tick_nolock(kbdev);
} else if (atomic_read(&scheduler->non_idle_offslot_grps)) {
if (atomic_read(&scheduler->non_idle_offslot_grps)) {
/* If there are non-idle CSGs waiting for a slot, fire
* a tock for a replacement.
*/
mod_delayed_work(scheduler->wq, &scheduler->tock_work, 0);
} else {
u32 current_protm_pending_seq =
scheduler->tick_protm_pending_seq;
if ((current_protm_pending_seq !=
KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID) &&
(group->scan_seq_num < current_protm_pending_seq)) {
/* If the protm enter was prevented due to groups
* priority, then fire a tock for the scheduler
* to re-examine the case.
*/
mod_delayed_work(scheduler->wq,
&scheduler->tock_work, 0);
}
}
}
@@ -2803,20 +2839,29 @@ static inline void process_protm_exit(struct kbase_device *kbdev, u32 glb_ack)
void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
{
unsigned long flags;
u32 remaining = val;
u32 csg_interrupts = val & ~JOB_IRQ_GLOBAL_IF;
lockdep_assert_held(&kbdev->hwaccess_lock);
KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT, NULL, val);
kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val);
if (csg_interrupts != 0) {
kbase_csf_scheduler_spin_lock(kbdev, &flags);
while (csg_interrupts != 0) {
int const csg_nr = ffs(csg_interrupts) - 1;
process_csg_interrupts(kbdev, csg_nr);
csg_interrupts &= ~(1 << csg_nr);
}
kbase_csf_scheduler_spin_unlock(kbdev, flags);
}
if (val & JOB_IRQ_GLOBAL_IF) {
const struct kbase_csf_global_iface *const global_iface =
&kbdev->csf.global_iface;
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
kbdev->csf.interrupt_received = true;
remaining &= ~JOB_IRQ_GLOBAL_IF;
if (!kbdev->csf.firmware_reloaded)
kbase_csf_firmware_reload_completed(kbdev);
@@ -2837,31 +2882,12 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
/* Handle IDLE Hysteresis notification event */
if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) {
int non_idle_offslot_grps;
bool can_suspend_on_idle;
dev_dbg(kbdev->dev, "Idle-hysteresis event flagged");
kbase_csf_firmware_global_input_mask(
global_iface, GLB_REQ, glb_ack,
GLB_REQ_IDLE_EVENT_MASK);
non_idle_offslot_grps = atomic_read(&scheduler->non_idle_offslot_grps);
can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev);
KBASE_KTRACE_ADD(kbdev, SCHEDULER_CAN_IDLE, NULL,
((u64)(u32)non_idle_offslot_grps) | (((u64)can_suspend_on_idle) << 32));
if (!non_idle_offslot_grps) {
if (can_suspend_on_idle)
queue_work(system_highpri_wq,
&scheduler->gpu_idle_work);
} else {
/* Advance the scheduling tick to get
* the non-idle suspended groups loaded
* soon.
*/
kbase_csf_scheduler_advance_tick_nolock(
kbdev);
}
kbase_csf_scheduler_process_gpu_idle_event(kbdev);
}
process_prfcnt_interrupts(kbdev, glb_req, glb_ack);
@@ -2873,23 +2899,8 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
*/
kbase_pm_update_state(kbdev);
}
if (!remaining) {
wake_up_all(&kbdev->csf.event_wait);
KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val);
return;
}
}
kbase_csf_scheduler_spin_lock(kbdev, &flags);
while (remaining != 0) {
int const csg_nr = ffs(remaining) - 1;
process_csg_interrupts(kbdev, csg_nr);
remaining &= ~(1 << csg_nr);
}
kbase_csf_scheduler_spin_unlock(kbdev, flags);
wake_up_all(&kbdev->csf.event_wait);
KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val);
}

View File

@@ -40,7 +40,12 @@
*/
#define KBASEP_USER_DB_NR_INVALID ((s8)-1)
#define FIRMWARE_PING_INTERVAL_MS (8000) /* 8 seconds */
/* Indicates an invalid value for the scan out sequence number, used to
* signify there is no group that has protected mode execution pending.
*/
#define KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID (U32_MAX)
#define FIRMWARE_PING_INTERVAL_MS (12000) /* 12 seconds */
#define FIRMWARE_IDLE_HYSTERESIS_TIME_MS (10) /* Default 10 milliseconds */
@@ -312,7 +317,7 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev);
/**
* kbase_csf_free_dummy_user_reg_page - Free the dummy page that was used
* used to replace the User register page
* to replace the User register page
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
*/

View File

@@ -54,7 +54,7 @@ static int kbasep_csf_cpu_queue_debugfs_show(struct seq_file *file, void *data)
mutex_lock(&kctx->csf.lock);
if (atomic_read(&kctx->csf.cpu_queue.dump_req_status) !=
BASE_CSF_CPU_QUEUE_DUMP_COMPLETE) {
seq_printf(file, "Dump request already started! (try again)\n");
seq_puts(file, "Dump request already started! (try again)\n");
mutex_unlock(&kctx->csf.lock);
return -EBUSY;
}
@@ -64,7 +64,8 @@ static int kbasep_csf_cpu_queue_debugfs_show(struct seq_file *file, void *data)
kbase_event_wakeup(kctx);
mutex_unlock(&kctx->csf.lock);
seq_printf(file, "CPU Queues table (version:v%u):\n", MALI_CSF_CPU_QUEUE_DEBUGFS_VERSION);
seq_puts(file,
"CPU Queues table (version:v" __stringify(MALI_CSF_CPU_QUEUE_DEBUGFS_VERSION) "):\n");
wait_for_completion_timeout(&kctx->csf.cpu_queue.dump_cmp,
msecs_to_jiffies(3000));
@@ -79,9 +80,8 @@ static int kbasep_csf_cpu_queue_debugfs_show(struct seq_file *file, void *data)
kfree(kctx->csf.cpu_queue.buffer);
kctx->csf.cpu_queue.buffer = NULL;
kctx->csf.cpu_queue.buffer_size = 0;
}
else
seq_printf(file, "Dump error! (time out)\n");
} else
seq_puts(file, "Dump error! (time out)\n");
atomic_set(&kctx->csf.cpu_queue.dump_req_status,
BASE_CSF_CPU_QUEUE_DUMP_COMPLETE);

View File

@@ -172,16 +172,18 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file,
cs_active = addr[CS_ACTIVE/4];
#define KBASEP_CSF_DEBUGFS_CS_HEADER_USER_IO \
"Bind Idx, Ringbuf addr, Prio, Insert offset, Extract offset, Active, Doorbell\n"
"Bind Idx, Ringbuf addr, Size, Prio, Insert offset, Extract offset, Active, Doorbell\n"
seq_printf(file, KBASEP_CSF_DEBUGFS_CS_HEADER_USER_IO "%8d, %16llx, %4u, %16llx, %16llx, %6u, %8d\n",
queue->csi_index, queue->base_addr, queue->priority,
cs_insert, cs_extract, cs_active, queue->doorbell_nr);
seq_printf(file, KBASEP_CSF_DEBUGFS_CS_HEADER_USER_IO "%8d, %16llx, %8x, %4u, %16llx, %16llx, %6u, %8d\n",
queue->csi_index, queue->base_addr,
queue->size,
queue->priority, cs_insert, cs_extract, cs_active, queue->doorbell_nr);
/* Print status information for blocked group waiting for sync object. For on-slot queues,
* if cs_trace is enabled, dump the interface's cs_trace configuration.
*/
if (kbase_csf_scheduler_group_get_slot(queue->group) < 0) {
seq_printf(file, "SAVED_CMD_PTR: 0x%llx\n", queue->saved_cmd_ptr);
if (CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) {
wait_status = queue->status_wait;
wait_sync_value = queue->sync_value;
@@ -268,17 +270,13 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file,
seq_puts(file, "\n");
}
/* Waiting timeout for STATUS_UPDATE acknowledgment, in milliseconds */
#define CSF_STATUS_UPDATE_TO_MS (100)
static void update_active_group_status(struct seq_file *file,
struct kbase_queue_group *const group)
{
struct kbase_device *const kbdev = group->kctx->kbdev;
struct kbase_csf_cmd_stream_group_info const *const ginfo =
&kbdev->csf.global_iface.groups[group->csg_nr];
long remaining =
kbase_csf_timeout_in_jiffies(CSF_STATUS_UPDATE_TO_MS);
long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
unsigned long flags;
/* Global doorbell ring for CSG STATUS_UPDATE request or User doorbell
@@ -327,6 +325,7 @@ static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file,
struct kbase_device *const kbdev = group->kctx->kbdev;
u32 ep_c, ep_r;
char exclusive;
char idle = 'N';
struct kbase_csf_cmd_stream_group_info const *const ginfo =
&kbdev->csf.global_iface.groups[group->csg_nr];
u8 slot_priority =
@@ -345,8 +344,12 @@ static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file,
else
exclusive = '0';
seq_puts(file, "GroupID, CSG NR, CSG Prio, Run State, Priority, C_EP(Alloc/Req), F_EP(Alloc/Req), T_EP(Alloc/Req), Exclusive\n");
seq_printf(file, "%7d, %6d, %8d, %9d, %8d, %11d/%3d, %11d/%3d, %11d/%3d, %9c\n",
if (kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) &
CSG_STATUS_STATE_IDLE_MASK)
idle = 'Y';
seq_puts(file, "GroupID, CSG NR, CSG Prio, Run State, Priority, C_EP(Alloc/Req), F_EP(Alloc/Req), T_EP(Alloc/Req), Exclusive, Idle\n");
seq_printf(file, "%7d, %6d, %8d, %9d, %8d, %11d/%3d, %11d/%3d, %11d/%3d, %9c, %4c\n",
group->handle,
group->csg_nr,
slot_priority,
@@ -358,7 +361,8 @@ static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file,
CSG_STATUS_EP_REQ_FRAGMENT_EP_GET(ep_r),
CSG_STATUS_EP_CURRENT_TILER_EP_GET(ep_c),
CSG_STATUS_EP_REQ_TILER_EP_GET(ep_r),
exclusive);
exclusive,
idle);
/* Wait for the User doobell ring to take effect */
if (kbdev->csf.scheduler.state != SCHED_SLEEPING)

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -252,6 +252,24 @@ enum kbase_queue_group_priority {
KBASE_QUEUE_GROUP_PRIORITY_COUNT
};
/**
* enum kbase_timeout_selector - The choice of which timeout to get scaled
* using the lowest GPU frequency.
* @CSF_FIRMWARE_TIMEOUT: Response timeout from CSF firmware.
* @CSF_PM_TIMEOUT: Timeout for GPU Power Management to reach the desired
* Shader, L2 and MCU state.
* @CSF_GPU_RESET_TIMEOUT: Waiting timeout for GPU reset to complete.
* @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in
* the enum.
*/
enum kbase_timeout_selector {
CSF_FIRMWARE_TIMEOUT,
CSF_PM_TIMEOUT,
CSF_GPU_RESET_TIMEOUT,
/* Must be the last in the enum */
KBASE_TIMEOUT_SELECTOR_COUNT
};
/**
* struct kbase_csf_notification - Event or error generated as part of command
@@ -333,6 +351,13 @@ struct kbase_csf_notification {
* @cs_fatal_info: Records additional information about the CS fatal event.
* @cs_fatal: Records information about the CS fatal event.
* @pending: Indicating whether the queue has new submitted work.
* @extract_ofs: The current EXTRACT offset, this is updated during certain
* events such as GPU idle IRQ in order to help detect a
* queue's true idle status.
* @saved_cmd_ptr: The command pointer value for the GPU queue, saved when the
* group to which queue is bound is suspended.
* This can be useful in certain cases to know that till which
* point the execution reached in the Linear command buffer.
*/
struct kbase_queue {
struct kbase_context *kctx;
@@ -367,6 +392,10 @@ struct kbase_queue {
u64 cs_fatal_info;
u32 cs_fatal;
atomic_t pending;
u64 extract_ofs;
#if IS_ENABLED(CONFIG_DEBUG_FS)
u64 saved_cmd_ptr;
#endif
};
/**
@@ -851,11 +880,14 @@ struct kbase_csf_csg_slot {
* This pointer being set doesn't necessarily indicates
* that GPU is in protected mode, kbdev->protected_mode
* needs to be checked for that.
* @gpu_idle_fw_timer_enabled: Whether the CSF scheduler has activiated the
* firmware idle hysteresis timer for preparing a
* GPU suspend on idle.
* @idle_wq: Workqueue for executing GPU idle notification
* handler.
* @gpu_idle_work: Work item for facilitating the scheduler to bring
* the GPU to a low-power mode on becoming idle.
* @gpu_no_longer_idle: Effective only when the GPU idle worker has been
* queued for execution, this indicates whether the
* GPU has become non-idle since the last time the
* idle notification was received.
* @non_idle_offslot_grps: Count of off-slot non-idle groups. Reset during
* the scheduler active phase in a tick. It then
* tracks the count of non-idle groups across all the
@@ -876,6 +908,12 @@ struct kbase_csf_csg_slot {
* when scheduling tick needs to be advanced from
* interrupt context, without actually deactivating
* the @tick_timer first and then enqueing @tick_work.
* @tick_protm_pending_seq: Scan out sequence number of the group that has
* protected mode execution pending for the queue(s)
* bound to it and will be considered first for the
* protected mode execution compared to other such
* groups. It is updated on every tick/tock.
* @interrupt_lock is used to serialize the access.
*/
struct kbase_csf_scheduler {
struct mutex lock;
@@ -907,13 +945,15 @@ struct kbase_csf_scheduler {
struct kbase_queue_group *top_grp;
bool tock_pending_request;
struct kbase_queue_group *active_protm_grp;
bool gpu_idle_fw_timer_enabled;
struct workqueue_struct *idle_wq;
struct work_struct gpu_idle_work;
atomic_t gpu_no_longer_idle;
atomic_t non_idle_offslot_grps;
u32 non_idle_scanout_grps;
u32 pm_active_count;
unsigned int csg_scheduling_period_ms;
bool tick_timer_active;
u32 tick_protm_pending_seq;
};
/*
@@ -1050,8 +1090,7 @@ struct kbase_ipa_control_prfcnt_config {
*
*/
struct kbase_ipa_control_prfcnt_block {
struct kbase_ipa_control_prfcnt_config
select[KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS];
struct kbase_ipa_control_prfcnt_config select[KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS];
size_t num_available_counters;
};
@@ -1074,8 +1113,7 @@ struct kbase_ipa_control_prfcnt_block {
*/
struct kbase_ipa_control {
struct kbase_ipa_control_prfcnt_block blocks[KBASE_IPA_CORE_TYPE_NUM];
struct kbase_ipa_control_session
sessions[KBASE_IPA_CONTROL_MAX_SESSIONS];
struct kbase_ipa_control_session sessions[KBASE_IPA_CONTROL_MAX_SESSIONS];
spinlock_t lock;
void *rtm_listener_data;
size_t num_active_sessions;
@@ -1089,8 +1127,15 @@ struct kbase_ipa_control {
* @node: Interface objects are on the kbase_device:csf.firmware_interfaces
* list using this list_head to link them
* @phys: Array of the physical (tagged) addresses making up this interface
* @reuse_pages: Flag used to identify if the FW interface entry reuses
* physical pages allocated for another FW interface entry.
* @is_small_page: Flag used to identify if small pages are used for
* the FW interface entry.
* @name: NULL-terminated string naming the interface
* @num_pages: Number of entries in @phys and @pma (and length of the interface)
* @num_pages_aligned: Same as @num_pages except for the case when @is_small_page
* is false and @reuse_pages is false and therefore will be
* aligned to NUM_4K_PAGES_IN_2MB_PAGE.
* @virtual: Starting GPU virtual address this interface is mapped at
* @flags: bitmask of CSF_FIRMWARE_ENTRY_* conveying the interface attributes
* @data_start: Offset into firmware image at which the interface data starts
@@ -1102,8 +1147,11 @@ struct kbase_ipa_control {
struct kbase_csf_firmware_interface {
struct list_head node;
struct tagged_addr *phys;
bool reuse_pages;
bool is_small_page;
char *name;
u32 num_pages;
u32 num_pages_aligned;
u32 virtual;
u32 flags;
u32 data_start;
@@ -1177,7 +1225,7 @@ struct kbase_csf_hwcnt {
* @reg_lock: Lock to serialize the MCU firmware related actions
* that affect all contexts such as allocation of
* regions from shared interface area, assignment of
* of hardware doorbell pages, assignment of CSGs,
* hardware doorbell pages, assignment of CSGs,
* sending global requests.
* @event_wait: Wait queue to wait for receiving csf events, i.e.
* the interrupt from CSF firmware, or scheduler state
@@ -1200,6 +1248,10 @@ struct kbase_csf_hwcnt {
* in GPU reset has completed.
* @firmware_reload_needed: Flag for indicating that the firmware needs to be
* reloaded as part of the GPU reset action.
* @firmware_full_reload_needed: Flag for indicating that the firmware needs to
* be fully re-loaded. This may be set when the
* boot or re-init of MCU fails after a successful
* soft reset.
* @firmware_hctl_core_pwr: Flag for indicating that the host diver is in
* charge of the shader core's power transitions, and
* the mcu_core_pwroff timeout feature is disabled
@@ -1259,6 +1311,7 @@ struct kbase_csf_device {
bool firmware_inited;
bool firmware_reloaded;
bool firmware_reload_needed;
bool firmware_full_reload_needed;
bool firmware_hctl_core_pwr;
struct work_struct firmware_reload_work;
bool glb_init_request_pending;

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -110,9 +110,9 @@ static inline void kbase_csf_event_signal_cpu_only(struct kbase_context *kctx)
/**
* kbase_csf_event_init - Initialize event object
*
* This function initializes the event object.
*
* @kctx: The kbase context whose event object will be initialized.
*
* This function initializes the event object.
*/
void kbase_csf_event_init(struct kbase_context *const kctx);

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -24,6 +24,7 @@
#include "mali_kbase_csf_trace_buffer.h"
#include "mali_kbase_csf_timeout.h"
#include "mali_kbase_mem.h"
#include "mali_kbase_mem_pool_group.h"
#include "mali_kbase_reset_gpu.h"
#include "mali_kbase_ctx_sched.h"
#include "mali_kbase_csf_scheduler.h"
@@ -35,7 +36,7 @@
#include "mali_kbase_csf_tl_reader.h"
#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
#include <uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h>
#include <csf/mali_kbase_csf_registers.h>
#include <linux/list.h>
#include <linux/slab.h>
@@ -50,7 +51,6 @@
#include <asm/arch_timer.h>
#define MALI_MAX_FIRMWARE_NAME_LEN ((size_t)20)
#define ACK_TIMEOUT_MILLISECONDS 1000
static char fw_name[MALI_MAX_FIRMWARE_NAME_LEN] = "mali_csffw.bin";
module_param_string(fw_name, fw_name, sizeof(fw_name), 0644);
@@ -105,9 +105,9 @@ MODULE_PARM_DESC(fw_debug,
#define CSF_MAX_FW_STOP_LOOPS (100000)
#define CSF_GLB_REQ_CFG_MASK \
(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \
GLB_REQ_CFG_PWROFF_TIMER_MASK)
#define CSF_GLB_REQ_CFG_MASK \
(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \
GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK)
static inline u32 input_page_read(const u32 *const input, const u32 offset)
{
@@ -190,11 +190,11 @@ static int setup_shared_iface_static_region(struct kbase_device *kbdev)
return -EINVAL;
reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
interface->num_pages, KBASE_REG_ZONE_MCU_SHARED);
interface->num_pages_aligned, KBASE_REG_ZONE_MCU_SHARED);
if (reg) {
mutex_lock(&kbdev->csf.reg_lock);
ret = kbase_add_va_region_rbtree(kbdev, reg,
interface->virtual, interface->num_pages, 1);
interface->virtual, interface->num_pages_aligned, 1);
mutex_unlock(&kbdev->csf.reg_lock);
if (ret)
kfree(reg);
@@ -423,7 +423,7 @@ static void load_fw_image_section(struct kbase_device *kbdev, const u8 *data,
}
}
static int reload_fw_data_sections(struct kbase_device *kbdev)
static int reload_fw_image(struct kbase_device *kbdev)
{
const u32 magic = FIRMWARE_HEADER_MAGIC;
struct kbase_csf_firmware_interface *interface;
@@ -451,23 +451,78 @@ static int reload_fw_data_sections(struct kbase_device *kbdev)
}
list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) {
/* Skip reload of text & read only data sections */
if ((interface->flags & CSF_FIRMWARE_ENTRY_EXECUTE) ||
!(interface->flags & CSF_FIRMWARE_ENTRY_WRITE))
continue;
/* Dont skip re-loading any section if full reload was requested */
if (!kbdev->csf.firmware_full_reload_needed) {
/* Skip reload of text & read only data sections */
if ((interface->flags & CSF_FIRMWARE_ENTRY_EXECUTE) ||
!(interface->flags & CSF_FIRMWARE_ENTRY_WRITE))
continue;
}
load_fw_image_section(kbdev, firmware->data, interface->phys,
interface->num_pages, interface->flags,
interface->data_start, interface->data_end);
}
kbase_csf_firmware_reload_trace_buffers_data(kbdev);
kbdev->csf.firmware_full_reload_needed = false;
kbase_csf_firmware_reload_trace_buffers_data(kbdev);
out:
release_firmware(firmware);
return ret;
}
/**
* entry_find_large_page_to_reuse() - Find if the large page of previously parsed
* FW interface entry can be reused to store
* the contents of new FW interface entry.
*
* @kbdev: Kbase device structure
* @virtual_start: Start of the virtual address range required for an entry allocation
* @virtual_end: End of the virtual address range required for an entry allocation
* @phys: Pointer to the array of physical (tagged) addresses making up the new
* FW interface entry. It is an output parameter which would be made to
* point to an already existing array allocated for the previously parsed
* FW interface entry using large page(s). If no appropriate entry is
* found it is set to NULL.
* @pma: Pointer to a protected memory allocation. It is an output parameter
* which would be made to the protected memory allocation of a previously
* parsed FW interface entry using large page(s) from protected memory.
* If no appropriate entry is found it is set to NULL.
* @num_pages: Number of pages requested.
* @num_pages_aligned: This is an output parameter used to carry the number of 4KB pages
* within the 2MB pages aligned allocation.
* @is_small_page: This is an output flag used to select between the small and large page
* to be used for the FW entry allocation.
*
* Go through all the already initialized interfaces and find if a previously
* allocated large page can be used to store contents of new FW interface entry.
*
* Return: true if a large page can be reused, false otherwise.
*/
static inline bool entry_find_large_page_to_reuse(
struct kbase_device *kbdev, const u32 virtual_start, const u32 virtual_end,
struct tagged_addr **phys, struct protected_memory_allocation ***pma,
u32 num_pages, u32 *num_pages_aligned, bool *is_small_page)
{
struct kbase_csf_firmware_interface *interface = NULL;
struct kbase_csf_firmware_interface *target_interface = NULL;
u32 virtual_diff_min = U32_MAX;
bool reuse_large_page = false;
CSTD_UNUSED(interface);
CSTD_UNUSED(target_interface);
CSTD_UNUSED(virtual_diff_min);
*num_pages_aligned = num_pages;
*is_small_page = true;
*phys = NULL;
*pma = NULL;
return reuse_large_page;
}
/**
* parse_memory_setup_entry() - Process an "interface memory setup" section
*
@@ -493,6 +548,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
const u32 data_start = entry[3];
const u32 data_end = entry[4];
u32 num_pages;
u32 num_pages_aligned;
char *name;
struct tagged_addr *phys = NULL;
struct kbase_csf_firmware_interface *interface = NULL;
@@ -500,6 +556,8 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
unsigned long mem_flags = 0;
u32 cache_mode = 0;
struct protected_memory_allocation **pma = NULL;
bool reuse_pages = false;
bool is_small_page = true;
if (data_end < data_start) {
dev_err(kbdev->dev, "Firmware corrupt, data_end < data_start (0x%x<0x%x)\n",
@@ -542,23 +600,37 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
num_pages = (virtual_end - virtual_start)
>> PAGE_SHIFT;
phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL);
reuse_pages = entry_find_large_page_to_reuse(
kbdev, virtual_start, virtual_end, &phys, &pma,
num_pages, &num_pages_aligned, &is_small_page);
if (!reuse_pages)
phys = kmalloc_array(num_pages_aligned, sizeof(*phys), GFP_KERNEL);
if (!phys)
return -ENOMEM;
if (protected_mode) {
pma = kbase_csf_protected_memory_alloc(kbdev, phys, num_pages);
if (pma == NULL) {
ret = -ENOMEM;
goto out;
if (!reuse_pages) {
pma = kbase_csf_protected_memory_alloc(
kbdev, phys, num_pages_aligned, is_small_page);
}
if (!pma)
ret = -ENOMEM;
} else {
ret = kbase_mem_pool_alloc_pages(
&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
num_pages, phys, false);
if (ret < 0)
goto out;
if (!reuse_pages) {
ret = kbase_mem_pool_alloc_pages(
kbase_mem_pool_group_select(
kbdev, KBASE_MEM_GROUP_CSF_FW, is_small_page),
num_pages_aligned, phys, false);
}
}
if (ret < 0) {
dev_err(kbdev->dev,
"Failed to allocate %u physical pages for the firmware interface entry at VA 0x%x\n",
num_pages_aligned, virtual_start);
goto out;
}
allocated_pages = true;
@@ -584,7 +656,10 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
interface->name = name;
interface->phys = phys;
interface->reuse_pages = reuse_pages;
interface->is_small_page = is_small_page;
interface->num_pages = num_pages;
interface->num_pages_aligned = num_pages_aligned;
interface->virtual = virtual_start;
interface->kernel_map = NULL;
interface->flags = flags;
@@ -645,15 +720,17 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
list_add(&interface->node, &kbdev->csf.firmware_interfaces);
ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu,
virtual_start >> PAGE_SHIFT, phys, num_pages, mem_flags,
KBASE_MEM_GROUP_CSF_FW);
if (!reuse_pages) {
ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu,
virtual_start >> PAGE_SHIFT, phys, num_pages_aligned, mem_flags,
KBASE_MEM_GROUP_CSF_FW);
if (ret != 0) {
dev_err(kbdev->dev, "Failed to insert firmware pages\n");
/* The interface has been added to the list, so cleanup will
* be handled by firmware unloading
*/
if (ret != 0) {
dev_err(kbdev->dev, "Failed to insert firmware pages\n");
/* The interface has been added to the list, so cleanup will
* be handled by firmware unloading
*/
}
}
dev_dbg(kbdev->dev, "Processed section '%s'", name);
@@ -662,16 +739,22 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
out:
if (allocated_pages) {
if (protected_mode) {
kbase_csf_protected_memory_free(kbdev, pma, num_pages);
} else {
kbase_mem_pool_free_pages(
&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
num_pages, phys, false, false);
if (!reuse_pages) {
if (protected_mode) {
kbase_csf_protected_memory_free(
kbdev, pma, num_pages_aligned, is_small_page);
} else {
kbase_mem_pool_free_pages(
kbase_mem_pool_group_select(
kbdev, KBASE_MEM_GROUP_CSF_FW, is_small_page),
num_pages_aligned, phys, false, false);
}
}
}
kfree(phys);
if (!reuse_pages)
kfree(phys);
kfree(interface);
return ret;
}
@@ -994,11 +1077,10 @@ static int parse_capabilities(struct kbase_device *kbdev)
iface->group_stride = shared_info[GLB_GROUP_STRIDE/4];
iface->prfcnt_size = shared_info[GLB_PRFCNT_SIZE/4];
if (iface->version >= kbase_csf_interface_version(1, 1, 0)) {
if (iface->version >= kbase_csf_interface_version(1, 1, 0))
iface->instr_features = shared_info[GLB_INSTR_FEATURES / 4];
} else {
else
iface->instr_features = 0;
}
if ((GROUP_CONTROL_0 +
(unsigned long)iface->group_num * iface->group_stride) >
@@ -1378,16 +1460,28 @@ static void set_timeout_global(
set_global_request(global_iface, GLB_REQ_CFG_PROGRESS_TIMER_MASK);
}
static void enable_gpu_idle_timer(struct kbase_device *const kbdev)
{
struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
kbase_csf_scheduler_spin_lock_assert_held(kbdev);
kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER,
kbdev->csf.gpu_idle_dur_count);
kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE,
GLB_REQ_IDLE_ENABLE_MASK);
dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x",
kbdev->csf.gpu_idle_dur_count);
}
static void global_init(struct kbase_device *const kbdev, u64 core_mask)
{
u32 const ack_irq_mask = GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK |
GLB_ACK_IRQ_MASK_PING_MASK |
GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK |
GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK |
GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK |
GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK;
u32 const ack_irq_mask =
GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK | GLB_ACK_IRQ_MASK_PING_MASK |
GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK |
GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK;
const struct kbase_csf_global_iface *const global_iface =
&kbdev->csf.global_iface;
@@ -1401,6 +1495,12 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask)
set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev));
/* The GPU idle timer is always enabled for simplicity. Checks will be
* done before scheduling the GPU idle worker to see if it is
* appropriate for the current power policy.
*/
enable_gpu_idle_timer(kbdev);
/* Unmask the interrupts */
kbase_csf_firmware_global_input(global_iface,
GLB_ACK_IRQ_MASK, ack_irq_mask);
@@ -1507,7 +1607,7 @@ static void kbase_csf_firmware_reload_worker(struct work_struct *work)
KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_RELOADING(kbdev, kbase_backend_get_cycle_cnt(kbdev));
/* Reload just the data sections from firmware binary image */
err = reload_fw_data_sections(kbdev);
err = reload_fw_image(kbdev);
if (err)
return;
@@ -1598,7 +1698,14 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_m
u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev)
{
return kbdev->csf.gpu_idle_hysteresis_ms;
unsigned long flags;
u32 dur;
kbase_csf_scheduler_spin_lock(kbdev, &flags);
dur = kbdev->csf.gpu_idle_hysteresis_ms;
kbase_csf_scheduler_spin_unlock(kbdev, flags);
return dur;
}
u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur)
@@ -1606,11 +1713,53 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
unsigned long flags;
const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur);
/* The 'fw_load_lock' is taken to synchronize against the deferred
* loading of FW, where the idle timer will be enabled.
*/
mutex_lock(&kbdev->fw_load_lock);
if (unlikely(!kbdev->csf.firmware_inited)) {
kbase_csf_scheduler_spin_lock(kbdev, &flags);
kbdev->csf.gpu_idle_hysteresis_ms = dur;
kbdev->csf.gpu_idle_dur_count = hysteresis_val;
kbase_csf_scheduler_spin_unlock(kbdev, flags);
mutex_unlock(&kbdev->fw_load_lock);
goto end;
}
mutex_unlock(&kbdev->fw_load_lock);
kbase_csf_scheduler_pm_active(kbdev);
if (kbase_csf_scheduler_wait_mcu_active(kbdev)) {
dev_err(kbdev->dev,
"Unable to activate the MCU, the idle hysteresis value shall remain unchanged");
kbase_csf_scheduler_pm_idle(kbdev);
return kbdev->csf.gpu_idle_dur_count;
}
/* The 'reg_lock' is also taken and is held till the update is not
* complete, to ensure the update of idle timer value by multiple Users
* gets serialized.
*/
mutex_lock(&kbdev->csf.reg_lock);
/* The firmware only reads the new idle timer value when the timer is
* disabled.
*/
kbase_csf_scheduler_spin_lock(kbdev, &flags);
kbase_csf_firmware_disable_gpu_idle_timer(kbdev);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
/* Ensure that the request has taken effect */
wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK);
kbase_csf_scheduler_spin_lock(kbdev, &flags);
kbdev->csf.gpu_idle_hysteresis_ms = dur;
kbdev->csf.gpu_idle_dur_count = hysteresis_val;
kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK);
mutex_unlock(&kbdev->csf.reg_lock);
kbase_csf_scheduler_pm_idle(kbdev);
end:
dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x",
hysteresis_val);
@@ -1711,7 +1860,7 @@ static int kbase_device_csf_iterator_trace_init(struct kbase_device *kbdev)
long ack_timeout;
ack_timeout = kbase_csf_timeout_in_jiffies(
ACK_TIMEOUT_MILLISECONDS);
kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT));
/* write enable request to global input */
kbase_csf_firmware_global_input_mask(
@@ -1748,6 +1897,20 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
kbdev->csf.fw_timeout_ms =
kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT);
kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
#ifdef KBASE_PM_RUNTIME
if (kbase_pm_gpu_sleep_allowed(kbdev))
kbdev->csf.gpu_idle_hysteresis_ms /=
FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
#endif
WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
kbdev->csf.mcu_core_pwroff_dur_us = DEFAULT_GLB_PWROFF_TIMEOUT_US;
kbdev->csf.mcu_core_pwroff_dur_count = convert_dur_to_core_pwroff_count(
kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_US);
INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces);
INIT_LIST_HEAD(&kbdev->csf.firmware_config);
INIT_LIST_HEAD(&kbdev->csf.firmware_timeline_metadata);
@@ -1786,20 +1949,6 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
return ret;
}
kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
#ifdef KBASE_PM_RUNTIME
if (kbase_pm_gpu_sleep_allowed(kbdev))
kbdev->csf.gpu_idle_hysteresis_ms /=
FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
#endif
WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
kbdev->csf.mcu_core_pwroff_dur_us = DEFAULT_GLB_PWROFF_TIMEOUT_US;
kbdev->csf.mcu_core_pwroff_dur_count = convert_dur_to_core_pwroff_count(
kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_US);
ret = kbase_mcu_shared_interface_region_tracker_init(kbdev);
if (ret != 0) {
dev_err(kbdev->dev,
@@ -1992,17 +2141,25 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
list_del(&interface->node);
vunmap(interface->kernel_map);
if (interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) {
kbase_csf_protected_memory_free(kbdev, interface->pma,
interface->num_pages);
} else {
kbase_mem_pool_free_pages(
&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
interface->num_pages, interface->phys,
true, false);
if (!interface->reuse_pages) {
if (interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) {
kbase_csf_protected_memory_free(
kbdev, interface->pma, interface->num_pages_aligned,
interface->is_small_page);
} else {
kbase_mem_pool_free_pages(
kbase_mem_pool_group_select(
kbdev, KBASE_MEM_GROUP_CSF_FW,
interface->is_small_page),
interface->num_pages_aligned,
interface->phys,
true, false);
}
kfree(interface->phys);
}
kfree(interface->phys);
kfree(interface);
}
@@ -2034,29 +2191,19 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev)
{
struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
const u32 glb_req =
kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
const u32 glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
kbase_csf_scheduler_spin_lock_assert_held(kbdev);
/* The scheduler is assumed to only call the enable when its internal
* state indicates that the idle timer has previously been disabled. So
* on entry the expected field values are:
* 1. GLOBAL_INPUT_BLOCK.GLB_REQ.IDLE_ENABLE: 0
* 2. GLOBAL_OUTPUT_BLOCK.GLB_ACK.IDLE_ENABLE: 0, or, on 1 -> 0
*/
if (glb_req & GLB_REQ_IDLE_ENABLE_MASK)
dev_err(kbdev->dev, "Incoherent scheduler state on REQ_IDLE_ENABLE!");
kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER,
kbdev->csf.gpu_idle_dur_count);
kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ,
GLB_REQ_REQ_IDLE_ENABLE, GLB_REQ_IDLE_ENABLE_MASK);
dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x",
kbdev->csf.gpu_idle_dur_count);
enable_gpu_idle_timer(kbdev);
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
}
@@ -2120,6 +2267,8 @@ void kbase_csf_enter_protected_mode(struct kbase_device *kbdev)
{
struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev, kbdev);
kbase_csf_scheduler_spin_lock_assert_held(kbdev);
set_global_request(global_iface, GLB_REQ_PROTM_ENTER_MASK);
dev_dbg(kbdev->dev, "Sending request to enter protected mode");
@@ -2134,6 +2283,8 @@ void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev)
if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(kbdev);
}
KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev, kbdev);
}
void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev)

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -23,7 +23,7 @@
#define _KBASE_CSF_FIRMWARE_H_
#include "device/mali_kbase_device.h"
#include <uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h>
#include <csf/mali_kbase_csf_registers.h>
/*
* PAGE_KERNEL_RO was only defined on 32bit ARM in 4.19 in:
@@ -75,7 +75,7 @@
#define MAX_SUPPORTED_CSGS 31
/* GROUP_STREAM_NUM: At least 8 CSs per CSG, but no more than 32 */
#define MIN_SUPPORTED_STREAMS_PER_GROUP 8
/* Maximum CSs per csg. */
/* MAX_SUPPORTED_STREAMS_PER_GROUP: Maximum CSs per csg. */
#define MAX_SUPPORTED_STREAMS_PER_GROUP 32
struct kbase_device;
@@ -777,7 +777,7 @@ u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32
/**
* kbase_csf_interface_version - Helper function to build the full firmware
* interface version in a format compatible with
* with GLB_VERSION register
* GLB_VERSION register
*
* @major: major version of csf interface
* @minor: minor version of csf interface

View File

@@ -67,9 +67,9 @@ struct firmware_config {
.mode = VERIFY_OCTAL_PERMISSIONS(_mode), \
}
static FW_CFG_ATTR(min, S_IRUGO);
static FW_CFG_ATTR(max, S_IRUGO);
static FW_CFG_ATTR(cur, S_IRUGO | S_IWUSR);
static FW_CFG_ATTR(min, 0444);
static FW_CFG_ATTR(max, 0444);
static FW_CFG_ATTR(cur, 0644);
static void fw_cfg_kobj_release(struct kobject *kobj)
{

View File

@@ -101,7 +101,7 @@ struct dummy_firmware_interface {
#define CSF_GLB_REQ_CFG_MASK \
(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \
GLB_REQ_CFG_PWROFF_TIMER_MASK)
GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK)
static inline u32 input_page_read(const u32 *const input, const u32 offset)
{
@@ -193,9 +193,8 @@ static int invent_cmd_stream_group_info(struct kbase_device *kbdev,
ginfo->stream_stride = 0;
ginfo->streams = kcalloc(ginfo->stream_num, sizeof(*ginfo->streams), GFP_KERNEL);
if (ginfo->streams == NULL) {
if (ginfo->streams == NULL)
return -ENOMEM;
}
for (sid = 0; sid < ginfo->stream_num; ++sid) {
struct kbase_csf_cmd_stream_info *stream = &ginfo->streams[sid];
@@ -241,9 +240,8 @@ static int invent_capabilities(struct kbase_device *kbdev)
iface->group_stride = 0;
iface->groups = kcalloc(iface->group_num, sizeof(*iface->groups), GFP_KERNEL);
if (iface->groups == NULL) {
if (iface->groups == NULL)
return -ENOMEM;
}
for (gid = 0; gid < iface->group_num; ++gid) {
int err;
@@ -619,6 +617,20 @@ static void set_timeout_global(
set_global_request(global_iface, GLB_REQ_CFG_PROGRESS_TIMER_MASK);
}
static void enable_gpu_idle_timer(struct kbase_device *const kbdev)
{
struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
kbase_csf_scheduler_spin_lock_assert_held(kbdev);
kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER,
kbdev->csf.gpu_idle_dur_count);
kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE,
GLB_REQ_IDLE_ENABLE_MASK);
dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x",
kbdev->csf.gpu_idle_dur_count);
}
static void global_init(struct kbase_device *const kbdev, u64 core_mask)
{
u32 const ack_irq_mask = GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK |
@@ -628,7 +640,8 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask)
GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK |
GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK |
GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK;
GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK |
GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK;
const struct kbase_csf_global_iface *const global_iface =
&kbdev->csf.global_iface;
@@ -642,6 +655,12 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask)
set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev));
/* The GPU idle timer is always enabled for simplicity. Checks will be
* done before scheduling the GPU idle worker to see if it is
* appropriate for the current power policy.
*/
enable_gpu_idle_timer(kbdev);
/* Unmask the interrupts */
kbase_csf_firmware_global_input(global_iface,
GLB_ACK_IRQ_MASK, ack_irq_mask);
@@ -809,7 +828,14 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_m
u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev)
{
return kbdev->csf.gpu_idle_hysteresis_ms;
unsigned long flags;
u32 dur;
kbase_csf_scheduler_spin_lock(kbdev, &flags);
dur = kbdev->csf.gpu_idle_hysteresis_ms;
kbase_csf_scheduler_spin_unlock(kbdev, flags);
return dur;
}
u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur)
@@ -817,11 +843,53 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
unsigned long flags;
const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur);
/* The 'fw_load_lock' is taken to synchronize against the deferred
* loading of FW, where the idle timer will be enabled.
*/
mutex_lock(&kbdev->fw_load_lock);
if (unlikely(!kbdev->csf.firmware_inited)) {
kbase_csf_scheduler_spin_lock(kbdev, &flags);
kbdev->csf.gpu_idle_hysteresis_ms = dur;
kbdev->csf.gpu_idle_dur_count = hysteresis_val;
kbase_csf_scheduler_spin_unlock(kbdev, flags);
mutex_unlock(&kbdev->fw_load_lock);
goto end;
}
mutex_unlock(&kbdev->fw_load_lock);
kbase_csf_scheduler_pm_active(kbdev);
if (kbase_csf_scheduler_wait_mcu_active(kbdev)) {
dev_err(kbdev->dev,
"Unable to activate the MCU, the idle hysteresis value shall remain unchanged");
kbase_csf_scheduler_pm_idle(kbdev);
return kbdev->csf.gpu_idle_dur_count;
}
/* The 'reg_lock' is also taken and is held till the update is not
* complete, to ensure the update of idle timer value by multiple Users
* gets serialized.
*/
mutex_lock(&kbdev->csf.reg_lock);
/* The firmware only reads the new idle timer value when the timer is
* disabled.
*/
kbase_csf_scheduler_spin_lock(kbdev, &flags);
kbase_csf_firmware_disable_gpu_idle_timer(kbdev);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
/* Ensure that the request has taken effect */
wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK);
kbase_csf_scheduler_spin_lock(kbdev, &flags);
kbdev->csf.gpu_idle_hysteresis_ms = dur;
kbdev->csf.gpu_idle_dur_count = hysteresis_val;
kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK);
mutex_unlock(&kbdev->csf.reg_lock);
kbase_csf_scheduler_pm_idle(kbdev);
end:
dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x",
hysteresis_val);
@@ -897,6 +965,16 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
kbdev->csf.fw_timeout_ms =
kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT);
kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
#ifdef KBASE_PM_RUNTIME
if (kbase_pm_gpu_sleep_allowed(kbdev))
kbdev->csf.gpu_idle_hysteresis_ms /=
FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
#endif
WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces);
INIT_LIST_HEAD(&kbdev->csf.firmware_config);
INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list);
@@ -928,16 +1006,6 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
return ret;
}
kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
#ifdef KBASE_PM_RUNTIME
if (kbase_pm_gpu_sleep_allowed(kbdev))
kbdev->csf.gpu_idle_hysteresis_ms /=
FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
#endif
WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
ret = kbase_mcu_shared_interface_region_tracker_init(kbdev);
if (ret != 0) {
dev_err(kbdev->dev,
@@ -1035,29 +1103,19 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev)
{
struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
u32 glb_req;
const u32 glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
kbase_csf_scheduler_spin_lock_assert_held(kbdev);
/* The scheduler is assumed to only call the enable when its internal
* state indicates that the idle timer has previously been disabled. So
* on entry the expected field values are:
* 1. GLOBAL_INPUT_BLOCK.GLB_REQ.IDLE_ENABLE: 0
* 2. GLOBAL_OUTPUT_BLOCK.GLB_ACK.IDLE_ENABLE: 0, or, on 1 -> 0
*/
glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
if (glb_req & GLB_REQ_IDLE_ENABLE_MASK)
dev_err(kbdev->dev, "Incoherent scheduler state on REQ_IDLE_ENABLE!");
kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER,
kbdev->csf.gpu_idle_dur_count);
kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ,
GLB_REQ_REQ_IDLE_ENABLE, GLB_REQ_IDLE_ENABLE_MASK);
dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x",
kbdev->csf.gpu_idle_dur_count);
enable_gpu_idle_timer(kbdev);
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
}

View File

@@ -174,17 +174,15 @@ u64 kbase_csf_heap_context_allocator_alloc(
* allocate it.
*/
if (!ctx_alloc->region) {
ctx_alloc->region =
kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
&ctx_alloc->gpu_va, mmu_sync_info);
ctx_alloc->region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
&ctx_alloc->gpu_va, mmu_sync_info);
}
/* If the pool still isn't allocated then an error occurred. */
if (unlikely(!ctx_alloc->region)) {
if (unlikely(!ctx_alloc->region))
dev_dbg(kctx->kbdev->dev, "Failed to allocate a pool of tiler heap contexts");
} else {
else
heap_gpu_va = sub_alloc(ctx_alloc);
}
mutex_unlock(&ctx_alloc->lock);

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -45,6 +45,10 @@ static int kbase_kcpu_map_import_prepare(
{
struct kbase_context *const kctx = kcpu_queue->kctx;
struct kbase_va_region *reg;
struct kbase_mem_phy_alloc *alloc;
struct page **pages;
struct tagged_addr *pa;
long i;
int ret = 0;
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
@@ -76,6 +80,13 @@ static int kbase_kcpu_map_import_prepare(
ret = kbase_jd_user_buf_pin_pages(kctx, reg);
if (ret)
goto out;
alloc = reg->gpu_alloc;
pa = kbase_get_gpu_phy_pages(reg);
pages = alloc->imported.user_buf.pages;
for (i = 0; i < alloc->nents; i++)
pa[i] = as_tagged(page_to_phys(pages[i]));
}
current_command->type = BASE_KCPU_COMMAND_TYPE_MAP_IMPORT;
@@ -172,8 +183,8 @@ static void kbase_jit_add_to_pending_alloc_list(
list_for_each_entry(blocked_queue,
&kctx->csf.kcpu_queues.jit_blocked_queues,
jit_blocked) {
struct kbase_kcpu_command const*const jit_alloc_cmd =
&blocked_queue->commands[blocked_queue->start_offset];
struct kbase_kcpu_command const *const jit_alloc_cmd =
&blocked_queue->commands[blocked_queue->start_offset];
WARN_ON(jit_alloc_cmd->type != BASE_KCPU_COMMAND_TYPE_JIT_ALLOC);
if (cmd->enqueue_ts < jit_alloc_cmd->enqueue_ts) {
@@ -244,7 +255,7 @@ static int kbase_kcpu_jit_allocate_process(
break;
if (jit_cmd->type == BASE_KCPU_COMMAND_TYPE_JIT_FREE) {
u8 const*const free_ids = jit_cmd->info.jit_free.ids;
u8 const *const free_ids = jit_cmd->info.jit_free.ids;
if (free_ids && *free_ids && kctx->jit_alloc[*free_ids]) {
/*
@@ -456,8 +467,8 @@ static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue,
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END(
queue->kctx->kbdev, queue);
KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END(queue->kctx->kbdev,
queue);
for (i = 0; i < count; i++) {
u64 pages_used = 0;
@@ -636,7 +647,7 @@ static int kbase_csf_queue_group_suspend_prepare(
struct tagged_addr *page_array;
u64 start, end, i;
if (!(reg->flags & BASE_MEM_SAME_VA) ||
if (((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_SAME_VA) ||
reg->nr_pages < nr_pages ||
kbase_reg_current_backed_size(reg) !=
reg->nr_pages) {
@@ -734,8 +745,8 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev,
cqs_wait->objs[i].addr, &mapping);
if (!queue->command_started) {
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START(
kbdev, queue);
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START(kbdev,
queue);
queue->command_started = true;
KBASE_KTRACE_ADD_CSF_KCPU(kbdev, CQS_WAIT_START,
queue, cqs_wait->nr_objs, 0);
@@ -764,8 +775,7 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev,
error);
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END(
kbdev, queue,
evt[BASEP_EVENT_ERR_INDEX]);
kbdev, queue, evt[BASEP_EVENT_ERR_INDEX]);
queue->command_started = false;
}
@@ -855,8 +865,7 @@ static void kbase_kcpu_cqs_set_process(struct kbase_device *kbdev,
evt = (u32 *)kbase_phy_alloc_mapping_get(
queue->kctx, cqs_set->objs[i].addr, &mapping);
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET(kbdev, queue,
evt ? 0 : 1);
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET(kbdev, queue, evt ? 0 : 1);
if (!evt) {
dev_warn(kbdev->dev,
@@ -1490,8 +1499,7 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_INFO(
{
u8 i;
KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(
kbdev, queue);
KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(kbdev, queue);
for (i = 0; i < jit_alloc->count; i++) {
const u8 id = jit_alloc->info[i].id;
const struct kbase_va_region *reg = queue->kctx->jit_alloc[id];
@@ -1521,16 +1529,14 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(
struct kbase_device *kbdev,
const struct kbase_kcpu_command_queue *queue)
{
KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(
kbdev, queue);
KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(kbdev, queue);
}
static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END(
struct kbase_device *kbdev,
const struct kbase_kcpu_command_queue *queue)
{
KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END(
kbdev, queue);
KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END(kbdev, queue);
}
static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
@@ -1550,8 +1556,8 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
switch (cmd->type) {
case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT:
if (!queue->command_started) {
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START(
kbdev, queue);
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START(kbdev,
queue);
queue->command_started = true;
}
@@ -1584,8 +1590,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
}
break;
case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL:
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START(
kbdev, queue);
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START(kbdev, queue);
status = 0;
@@ -1603,8 +1608,8 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
queue->has_error = true;
#endif
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END(
kbdev, queue, status);
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END(kbdev, queue,
status);
break;
case BASE_KCPU_COMMAND_TYPE_CQS_WAIT:
status = kbase_kcpu_cqs_wait_process(kbdev, queue,
@@ -1654,15 +1659,14 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
/* Clear the queue's error state */
queue->has_error = false;
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER(
kbdev, queue);
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER(kbdev, queue);
break;
case BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: {
struct kbase_ctx_ext_res_meta *meta = NULL;
if (!drain_queue) {
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START(
kbdev, queue);
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START(kbdev,
queue);
kbase_gpu_vm_lock(queue->kctx);
meta = kbase_sticky_resource_acquire(
@@ -1684,8 +1688,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: {
bool ret;
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START(
kbdev, queue);
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START(kbdev, queue);
kbase_gpu_vm_lock(queue->kctx);
ret = kbase_sticky_resource_release(
@@ -1698,15 +1701,15 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
"failed to release the reference. resource not found");
}
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END(
kbdev, queue, ret ? 0 : 1);
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END(kbdev, queue,
ret ? 0 : 1);
break;
}
case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: {
bool ret;
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START(
kbdev, queue);
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START(kbdev,
queue);
kbase_gpu_vm_lock(queue->kctx);
ret = kbase_sticky_resource_release_force(
@@ -1729,8 +1732,8 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
/* We still need to call this function to clean the JIT alloc info up */
kbase_kcpu_jit_allocate_finish(queue, cmd);
} else {
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START(
kbdev, queue);
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START(kbdev,
queue);
status = kbase_kcpu_jit_allocate_process(queue,
cmd);
@@ -1754,8 +1757,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
break;
}
case BASE_KCPU_COMMAND_TYPE_JIT_FREE:
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START(
kbdev, queue);
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START(kbdev, queue);
status = kbase_kcpu_jit_free_process(queue, cmd);
if (status)
@@ -1838,12 +1840,12 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
switch (cmd->type) {
case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT:
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT(
kbdev, queue, cmd->info.fence.fence);
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT(kbdev, queue,
cmd->info.fence.fence);
break;
case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL:
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL(
kbdev, queue, cmd->info.fence.fence);
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL(kbdev, queue,
cmd->info.fence.fence);
break;
case BASE_KCPU_COMMAND_TYPE_CQS_WAIT:
{
@@ -1865,8 +1867,8 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
unsigned int i;
for (i = 0; i < cmd->info.cqs_set.nr_objs; i++) {
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET(
kbdev, queue, sets[i].addr);
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET(kbdev, queue,
sets[i].addr);
}
break;
}
@@ -1881,16 +1883,15 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
break;
}
case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER:
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER(kbdev,
queue);
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER(kbdev, queue);
break;
case BASE_KCPU_COMMAND_TYPE_MAP_IMPORT:
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT(
kbdev, queue, cmd->info.import.gpu_va);
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT(kbdev, queue,
cmd->info.import.gpu_va);
break;
case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT:
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT(
kbdev, queue, cmd->info.import.gpu_va);
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT(kbdev, queue,
cmd->info.import.gpu_va);
break;
case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE:
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE(
@@ -1900,35 +1901,29 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
{
u8 i;
KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC(
kbdev, queue);
KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC(kbdev, queue);
for (i = 0; i < cmd->info.jit_alloc.count; i++) {
const struct base_jit_alloc_info *info =
&cmd->info.jit_alloc.info[i];
KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC(
kbdev, queue, info->gpu_alloc_addr,
info->va_pages, info->commit_pages,
info->extension, info->id, info->bin_id,
info->max_allocations, info->flags,
info->usage_id);
kbdev, queue, info->gpu_alloc_addr, info->va_pages,
info->commit_pages, info->extension, info->id, info->bin_id,
info->max_allocations, info->flags, info->usage_id);
}
KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC(
kbdev, queue);
KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC(kbdev, queue);
break;
}
case BASE_KCPU_COMMAND_TYPE_JIT_FREE:
{
u8 i;
KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE(
kbdev, queue);
KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE(kbdev, queue);
for (i = 0; i < cmd->info.jit_free.count; i++) {
KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE(
kbdev, queue, cmd->info.jit_free.ids[i]);
}
KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE(
kbdev, queue);
KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE(kbdev, queue);
break;
}
case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND:
@@ -1936,6 +1931,9 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
kbdev, queue, cmd->info.suspend_buf_copy.sus_buf,
cmd->info.suspend_buf_copy.group_handle);
break;
default:
dev_dbg(kbdev->dev, "Unknown command type %u", cmd->type);
break;
}
}
@@ -2210,8 +2208,8 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
/* Fire the tracepoint with the mutex held to enforce correct ordering
* with the summary stream.
*/
KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE(
kctx->kbdev, queue, kctx->id, queue->num_pending_cmds);
KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE(kctx->kbdev, queue, queue->id, kctx->id,
queue->num_pending_cmds);
KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_NEW, queue,
queue->fence_context, 0);

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -206,14 +206,16 @@ struct kbase_kcpu_command_group_suspend_info {
* indicates that it has been enqueued earlier.
* @info: Structure which holds information about the command
* dependent on the command type.
* @info.fence: Fence
* @info.cqs_wait: CQS wait
* @info.cqs_set: CQS set
* @info.import: import
* @info.jit_alloc: jit allocation
* @info.jit_free: jit deallocation
* @info.suspend_buf_copy: suspend buffer copy
* @info.sample_time: sample time
* @info.fence: Fence
* @info.cqs_wait: CQS wait
* @info.cqs_set: CQS set
* @info.cqs_wait_operation: CQS wait operation
* @info.cqs_set_operation: CQS set operation
* @info.import: import
* @info.jit_alloc: JIT allocation
* @info.jit_free: JIT deallocation
* @info.suspend_buf_copy: suspend buffer copy
* @info.sample_time: sample time
*/
struct kbase_kcpu_command {
enum base_kcpu_command_type type;
@@ -303,8 +305,6 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
/**
* kbase_csf_kcpu_queue_delete - Delete KCPU command queue.
*
* Return: 0 if successful, -EINVAL if the queue ID is invalid.
*
* @kctx: Pointer to the kbase context from which the KCPU command
* queue is to be deleted.
* @del: Pointer to the structure which specifies the KCPU command

View File

@@ -71,29 +71,60 @@ struct protected_memory_allocation **
kbase_csf_protected_memory_alloc(
struct kbase_device *const kbdev,
struct tagged_addr *phys,
size_t num_pages)
size_t num_pages,
bool is_small_page)
{
size_t i;
struct protected_memory_allocator_device *pma_dev =
kbdev->csf.pma_dev;
struct protected_memory_allocation **pma =
kmalloc_array(num_pages, sizeof(*pma), GFP_KERNEL);
struct protected_memory_allocation **pma = NULL;
unsigned int order = KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER;
unsigned int num_pages_order;
if (is_small_page)
order = KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER;
num_pages_order = (1u << order);
/* Ensure the requested num_pages is aligned with
* the order type passed as argument.
*
* pma_alloc_page() will then handle the granularity
* of the allocation based on order.
*/
num_pages = div64_u64(num_pages + num_pages_order - 1, num_pages_order);
pma = kmalloc_array(num_pages, sizeof(*pma), GFP_KERNEL);
if (WARN_ON(!pma_dev) || WARN_ON(!phys) || !pma)
return NULL;
for (i = 0; i < num_pages; i++) {
pma[i] = pma_dev->ops.pma_alloc_page(pma_dev,
KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER);
phys_addr_t phys_addr;
pma[i] = pma_dev->ops.pma_alloc_page(pma_dev, order);
if (!pma[i])
break;
phys[i] = as_tagged(pma_dev->ops.pma_get_phys_addr(pma_dev,
pma[i]));
phys_addr = pma_dev->ops.pma_get_phys_addr(pma_dev, pma[i]);
if (order) {
size_t j;
*phys++ = as_tagged_tag(phys_addr, HUGE_HEAD | HUGE_PAGE);
for (j = 1; j < num_pages_order; j++) {
*phys++ = as_tagged_tag(phys_addr +
PAGE_SIZE * j,
HUGE_PAGE);
}
} else {
phys[i] = as_tagged(phys_addr);
}
}
if (i != num_pages) {
kbase_csf_protected_memory_free(kbdev, pma, i);
kbase_csf_protected_memory_free(kbdev, pma, i * num_pages_order, is_small_page);
return NULL;
}
@@ -103,15 +134,28 @@ struct protected_memory_allocation **
void kbase_csf_protected_memory_free(
struct kbase_device *const kbdev,
struct protected_memory_allocation **pma,
size_t num_pages)
size_t num_pages,
bool is_small_page)
{
size_t i;
struct protected_memory_allocator_device *pma_dev =
kbdev->csf.pma_dev;
unsigned int num_pages_order = (1u << KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER);
if (is_small_page)
num_pages_order = (1u << KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER);
if (WARN_ON(!pma_dev) || WARN_ON(!pma))
return;
/* Ensure the requested num_pages is aligned with
* the order type passed as argument.
*
* pma_alloc_page() will then handle the granularity
* of the allocation based on order.
*/
num_pages = div64_u64(num_pages + num_pages_order - 1, num_pages_order);
for (i = 0; i < num_pages; i++)
pma_dev->ops.pma_free_page(pma_dev, pma[i]);

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -46,6 +46,7 @@ void kbase_csf_protected_memory_term(struct kbase_device *const kbdev);
* @phys: Array of physical addresses to be filled in by the protected
* memory allocator.
* @num_pages: Number of pages requested to be allocated.
* @is_small_page: Flag used to select the order of protected memory page.
*
* Return: Pointer to an array of protected memory allocations on success,
* or NULL on failure.
@@ -54,7 +55,8 @@ struct protected_memory_allocation **
kbase_csf_protected_memory_alloc(
struct kbase_device *const kbdev,
struct tagged_addr *phys,
size_t num_pages);
size_t num_pages,
bool is_small_page);
/**
* kbase_csf_protected_memory_free - Free the allocated
@@ -63,9 +65,11 @@ struct protected_memory_allocation **
* @kbdev: Device pointer.
* @pma: Array of pointer to protected memory allocations.
* @num_pages: Number of pages to be freed.
* @is_small_page: Flag used to select the order of protected memory page.
*/
void kbase_csf_protected_memory_free(
struct kbase_device *const kbdev,
struct protected_memory_allocation **pma,
size_t num_pages);
size_t num_pages,
bool is_small_page);
#endif

View File

@@ -24,8 +24,8 @@
* expected) to have to add to it.
*/
#ifndef _UAPI_GPU_CSF_REGISTERS_H_
#define _UAPI_GPU_CSF_REGISTERS_H_
#ifndef _KBASE_CSF_REGISTERS_H_
#define _KBASE_CSF_REGISTERS_H_
/*
* Begin register sets
@@ -480,7 +480,7 @@
/* CS_INSTR_BUFFER_OFFSET_POINTER register */
#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT (0)
#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK \
((u64)0xFFFFFFFFFFFFFFFF) << CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT)
(((u64)0xFFFFFFFFFFFFFFFF) << CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT)
#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_GET(reg_val) \
(((reg_val)&CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK) >> CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT)
#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SET(reg_val, value) \
@@ -1448,6 +1448,9 @@
#define GLB_ACK_IRQ_MASK_IDLE_EVENT_SHIFT (26)
#define GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK (0x1 << GLB_ACK_IRQ_MASK_IDLE_EVENT_SHIFT)
#define GLB_ACK_IRQ_MASK_IDLE_ENABLE_SHIFT GPU_U(10)
#define GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK (GPU_U(0x1) << GLB_ACK_IRQ_MASK_IDLE_ENABLE_SHIFT)
#define GLB_IDLE_TIMER (0x0080)
/* GLB_IDLE_TIMER register */
#define GLB_IDLE_TIMER_TIMEOUT_SHIFT (0)
@@ -1518,4 +1521,4 @@
(((value) << GLB_REQ_ITER_TRACE_ENABLE_SHIFT) & \
GLB_REQ_ITER_TRACE_ENABLE_MASK))
#endif /* _UAPI_GPU_CSF_REGISTERS_H_ */
#endif /* _KBASE_CSF_REGISTERS_H_ */

View File

@@ -29,14 +29,14 @@
#include <csf/mali_kbase_csf_trace_buffer.h>
#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
#include <mali_kbase_reset_gpu.h>
#include <linux/string.h>
/* Waiting timeout for GPU reset to complete */
#define GPU_RESET_TIMEOUT_MS (5000) /* 5 seconds */
#define DUMP_DWORDS_PER_LINE (4)
/* 16 characters needed for a 8 byte value in hex & 1 character for space */
#define DUMP_HEX_CHARS_PER_DWORD ((2 * 8) + 1)
#define DUMP_HEX_CHARS_PER_LINE \
(DUMP_DWORDS_PER_LINE * DUMP_HEX_CHARS_PER_DWORD)
enum kbasep_soft_reset_status {
RESET_SUCCESS = 0,
SOFT_RESET_FAILED,
L2_ON_FAILED,
MCU_REINIT_FAILED
};
static inline bool
kbase_csf_reset_state_is_silent(enum kbase_csf_reset_gpu_state state)
@@ -259,8 +259,8 @@ static void kbase_csf_debug_dump_registers(struct kbase_device *kbdev)
static void kbase_csf_dump_firmware_trace_buffer(struct kbase_device *kbdev)
{
u8 *buf, *line_str;
unsigned int read_size;
u8 *buf, *p, *pnewline, *pend, *pendbuf;
unsigned int read_size, remaining_size;
struct firmware_trace_buffer *tb =
kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME);
@@ -269,41 +269,53 @@ static void kbase_csf_dump_firmware_trace_buffer(struct kbase_device *kbdev)
return;
}
buf = kmalloc(PAGE_SIZE + DUMP_HEX_CHARS_PER_LINE + 1, GFP_KERNEL);
buf = kmalloc(PAGE_SIZE + 1, GFP_KERNEL);
if (buf == NULL) {
dev_err(kbdev->dev, "Short of memory, firmware trace dump skipped");
return;
}
line_str = &buf[PAGE_SIZE];
buf[PAGE_SIZE] = 0;
p = buf;
pendbuf = &buf[PAGE_SIZE];
dev_err(kbdev->dev, "Firmware trace buffer dump:");
while ((read_size = kbase_csf_firmware_trace_buffer_read_data(tb, buf,
PAGE_SIZE))) {
u64 *ptr = (u64 *)buf;
u32 num_dwords;
while ((read_size = kbase_csf_firmware_trace_buffer_read_data(tb, p,
pendbuf - p))) {
pend = p + read_size;
p = buf;
for (num_dwords = read_size / sizeof(u64);
num_dwords >= DUMP_DWORDS_PER_LINE;
num_dwords -= DUMP_DWORDS_PER_LINE) {
dev_err(kbdev->dev, "%016llx %016llx %016llx %016llx",
ptr[0], ptr[1], ptr[2], ptr[3]);
ptr += DUMP_DWORDS_PER_LINE;
while (p < pend && (pnewline = memchr(p, '\n', pend - p))) {
/* Null-terminate the string */
*pnewline = 0;
dev_err(kbdev->dev, "FW> %s", p);
p = pnewline + 1;
}
if (num_dwords) {
int pos = 0;
remaining_size = pend - p;
while (num_dwords--) {
pos += snprintf(line_str + pos,
DUMP_HEX_CHARS_PER_DWORD + 1,
"%016llx ", ptr[0]);
ptr++;
}
dev_err(kbdev->dev, "%s", line_str);
if (!remaining_size) {
p = buf;
} else if (remaining_size < PAGE_SIZE) {
/* Copy unfinished string to the start of the buffer */
memmove(buf, p, remaining_size);
p = &buf[remaining_size];
} else {
/* Print abnormal page-long string without newlines */
dev_err(kbdev->dev, "FW> %s", buf);
p = buf;
}
}
if (p != buf) {
/* Null-terminate and print last unfinished string */
*p = 0;
dev_err(kbdev->dev, "FW> %s", buf);
}
kfree(buf);
}
@@ -332,36 +344,12 @@ static void kbase_csf_hwcnt_on_reset_error(struct kbase_device *kbdev)
kbase_csf_scheduler_spin_unlock(kbdev, flags);
}
static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev,
bool firmware_inited, bool silent)
static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_device *kbdev,
bool firmware_inited, bool silent)
{
unsigned long flags;
int err;
WARN_ON(kbdev->irq_reset_flush);
/* The reset must now be happening otherwise other threads will not
* have been synchronized with to stop their access to the HW
*/
#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE
lockdep_assert_held_write(&kbdev->csf.reset.sem);
#elif KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE
lockdep_assert_held_exclusive(&kbdev->csf.reset.sem);
#else
lockdep_assert_held(&kbdev->csf.reset.sem);
#endif
WARN_ON(!kbase_reset_gpu_is_active(kbdev));
/* Reset the scheduler state before disabling the interrupts as suspend
* of active CSG slots would also be done as a part of reset.
*/
if (likely(firmware_inited))
kbase_csf_scheduler_reset(kbdev);
cancel_work_sync(&kbdev->csf.firmware_reload_work);
dev_dbg(kbdev->dev, "Disable GPU hardware counters.\n");
/* This call will block until counters are disabled.
*/
kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
enum kbasep_soft_reset_status ret = RESET_SUCCESS;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
spin_lock(&kbdev->mmu_mask_change);
@@ -380,8 +368,7 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev,
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
dev_dbg(kbdev->dev, "Ensure that any IRQ handlers have finished\n");
/* Must be done without any locks IRQ handlers will take.
*/
/* Must be done without any locks IRQ handlers will take. */
kbase_synchronize_irqs(kbdev);
dev_dbg(kbdev->dev, "Flush out any in-flight work items\n");
@@ -421,10 +408,8 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev,
mutex_unlock(&kbdev->pm.lock);
if (WARN_ON(err)) {
kbase_csf_hwcnt_on_reset_error(kbdev);
return err;
}
if (WARN_ON(err))
return SOFT_RESET_FAILED;
mutex_lock(&kbdev->mmu_hw_mutex);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@@ -441,20 +426,78 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev,
err = kbase_pm_wait_for_desired_state(kbdev);
mutex_unlock(&kbdev->pm.lock);
if (WARN_ON(err)) {
kbase_csf_hwcnt_on_reset_error(kbdev);
return err;
if (err) {
if (!kbase_pm_l2_is_in_desired_state(kbdev))
ret = L2_ON_FAILED;
else if (!kbase_pm_mcu_is_in_desired_state(kbdev))
ret = MCU_REINIT_FAILED;
}
return ret;
}
static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev, bool firmware_inited, bool silent)
{
unsigned long flags;
enum kbasep_soft_reset_status ret;
WARN_ON(kbdev->irq_reset_flush);
/* The reset must now be happening otherwise other threads will not
* have been synchronized with to stop their access to the HW
*/
#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE
lockdep_assert_held_write(&kbdev->csf.reset.sem);
#elif KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE
lockdep_assert_held_exclusive(&kbdev->csf.reset.sem);
#else
lockdep_assert_held(&kbdev->csf.reset.sem);
#endif
WARN_ON(!kbase_reset_gpu_is_active(kbdev));
/* Reset the scheduler state before disabling the interrupts as suspend
* of active CSG slots would also be done as a part of reset.
*/
if (likely(firmware_inited))
kbase_csf_scheduler_reset(kbdev);
cancel_work_sync(&kbdev->csf.firmware_reload_work);
dev_dbg(kbdev->dev, "Disable GPU hardware counters.\n");
/* This call will block until counters are disabled. */
kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
ret = kbase_csf_reset_gpu_once(kbdev, firmware_inited, silent);
if (ret == SOFT_RESET_FAILED) {
dev_err(kbdev->dev, "Soft-reset failed");
goto err;
} else if (ret == L2_ON_FAILED) {
dev_err(kbdev->dev, "L2 power up failed after the soft-reset");
goto err;
} else if (ret == MCU_REINIT_FAILED) {
dev_err(kbdev->dev, "MCU re-init failed trying full firmware reload");
/* Since MCU reinit failed despite successful soft reset, we can try
* the firmware full reload.
*/
kbdev->csf.firmware_full_reload_needed = true;
ret = kbase_csf_reset_gpu_once(kbdev, firmware_inited, true);
if (ret != RESET_SUCCESS) {
dev_err(kbdev->dev,
"MCU Re-init failed even after trying full firmware reload, ret = [%d]",
ret);
goto err;
}
}
/* Re-enable GPU hardware counters */
kbase_csf_scheduler_spin_lock(kbdev, &flags);
kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
if (!silent)
dev_err(kbdev->dev, "Reset complete");
return 0;
err:
kbase_csf_hwcnt_on_reset_error(kbdev);
return -1;
}
static void kbase_csf_reset_gpu_worker(struct work_struct *data)
@@ -593,7 +636,7 @@ bool kbase_reset_gpu_is_active(struct kbase_device *kbdev)
int kbase_reset_gpu_wait(struct kbase_device *kbdev)
{
const long wait_timeout =
kbase_csf_timeout_in_jiffies(GPU_RESET_TIMEOUT_MS);
kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_GPU_RESET_TIMEOUT));
long remaining;
/* Inform lockdep we might be trying to wait on a reset (as

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -28,7 +28,7 @@
#include <tl/mali_kbase_tracepoints.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
#include <linux/export.h>
#include <uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h>
#include <csf/mali_kbase_csf_registers.h>
#include <uapi/gpu/arm/bifrost/mali_base_kernel.h>
#include <mali_kbase_hwaccess_time.h>
@@ -246,7 +246,7 @@ static enum hrtimer_restart tick_timer_callback(struct hrtimer *timer)
*
* This function will start the scheduling tick hrtimer and is supposed to
* be called only from the tick work item function. The tick hrtimer should
* should not be active already.
* not be active already.
*/
static void start_tick_timer(struct kbase_device *kbdev)
{
@@ -372,7 +372,7 @@ static void assign_user_doorbell_to_queue(struct kbase_device *kbdev,
mutex_lock(&kbdev->csf.reg_lock);
/* If bind operation for the queue hasn't completed yet, then the
* the CSI can't be programmed for the queue
* CSI can't be programmed for the queue
* (even in stopped state) and so the doorbell also can't be assigned
* to it.
*/
@@ -406,6 +406,85 @@ static void scheduler_doorbell_init(struct kbase_device *kbdev)
WARN_ON(doorbell_nr != CSF_KERNEL_DOORBELL_NR);
}
/**
* update_on_slot_queues_offsets - Update active queues' INSERT & EXTRACT ofs
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
*
* This function updates the EXTRACT offset for all queues which groups have
* been assigned a physical slot. These values could be used to detect a
* queue's true idleness status. This is intended to be an additional check
* on top of the GPU idle notification to account for race conditions.
* This function is supposed to be called only when GPU idle notification
* interrupt is received.
*/
static void update_on_slot_queues_offsets(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
/* All CSGs have the same number of CSs */
size_t const max_streams = kbdev->csf.global_iface.groups[0].stream_num;
size_t i;
lockdep_assert_held(&scheduler->interrupt_lock);
/* csg_slots_idle_mask is not used here for the looping, as it could get
* updated concurrently when Scheduler re-evaluates the idle status of
* the CSGs for which idle notification was received previously.
*/
for_each_set_bit(i, scheduler->csg_inuse_bitmap, kbdev->csf.global_iface.group_num) {
struct kbase_queue_group *const group = scheduler->csg_slots[i].resident_group;
size_t j;
if (WARN_ON(!group))
continue;
for (j = 0; j < max_streams; ++j) {
struct kbase_queue *const queue = group->bound_queues[j];
if (queue) {
u64 const *const output_addr =
(u64 const *)(queue->user_io_addr + PAGE_SIZE);
queue->extract_ofs = output_addr[CS_EXTRACT_LO / sizeof(u64)];
}
}
}
}
static void enqueue_gpu_idle_work(struct kbase_csf_scheduler *const scheduler)
{
atomic_set(&scheduler->gpu_no_longer_idle, false);
queue_work(scheduler->idle_wq, &scheduler->gpu_idle_work);
}
void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
int non_idle_offslot_grps;
bool can_suspend_on_idle;
lockdep_assert_held(&scheduler->interrupt_lock);
non_idle_offslot_grps = atomic_read(&scheduler->non_idle_offslot_grps);
can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev);
KBASE_KTRACE_ADD(kbdev, SCHEDULER_CAN_IDLE, NULL,
((u64)(u32)non_idle_offslot_grps) | (((u64)can_suspend_on_idle) << 32));
if (!non_idle_offslot_grps) {
if (can_suspend_on_idle) {
/* The GPU idle worker relies on update_on_slot_queues_offsets() to have
* finished. It's queued before to reduce the time it takes till execution
* but it'll eventually be blocked by the scheduler->interrupt_lock.
*/
enqueue_gpu_idle_work(scheduler);
update_on_slot_queues_offsets(kbdev);
}
} else {
/* Advance the scheduling tick to get the non-idle suspended groups loaded soon */
kbase_csf_scheduler_advance_tick_nolock(kbdev);
}
}
u32 kbase_csf_scheduler_get_nr_active_csgs_locked(struct kbase_device *kbdev)
{
u32 nr_active_csgs;
@@ -551,54 +630,6 @@ static bool scheduler_timer_is_enabled_nolock(struct kbase_device *kbdev)
return kbdev->csf.scheduler.timer_enabled;
}
static void enable_gpu_idle_fw_timer(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
unsigned long flags;
lockdep_assert_held(&scheduler->lock);
if (scheduler->gpu_idle_fw_timer_enabled)
return;
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
/* Update the timer_enabled flag requires holding interrupt_lock */
scheduler->gpu_idle_fw_timer_enabled = true;
kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
}
static void disable_gpu_idle_fw_timer_locked(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
lockdep_assert_held(&scheduler->lock);
lockdep_assert_held(&scheduler->interrupt_lock);
/* Update of the timer_enabled flag requires holding interrupt_lock */
if (scheduler->gpu_idle_fw_timer_enabled) {
scheduler->gpu_idle_fw_timer_enabled = false;
kbase_csf_firmware_disable_gpu_idle_timer(kbdev);
}
}
static void disable_gpu_idle_fw_timer(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
unsigned long flags;
lockdep_assert_held(&scheduler->lock);
if (!scheduler->gpu_idle_fw_timer_enabled)
return;
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
disable_gpu_idle_fw_timer_locked(kbdev);
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
}
/**
* scheduler_pm_active_handle_suspend() - Acquire the PM reference count for
* Scheduler
@@ -631,12 +662,15 @@ static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev,
if (!prev_count) {
ret = kbase_pm_context_active_handle_suspend(kbdev,
suspend_handler);
if (ret) {
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
/* Invoke the PM state machines again as the change in MCU
* desired status, due to the update of scheduler.pm_active_count,
* may be missed by the thread that called pm_wait_for_desired_state()
*/
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
if (ret)
kbdev->csf.scheduler.pm_active_count--;
kbase_pm_update_state(kbdev);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
kbase_pm_update_state(kbdev);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
return ret;
@@ -716,8 +750,16 @@ static void scheduler_pm_idle(struct kbase_device *kbdev)
kbdev->csf.scheduler.pm_active_count--;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
if (prev_count == 1)
if (prev_count == 1) {
kbase_pm_context_idle(kbdev);
/* Invoke the PM state machines again as the change in MCU
* desired status, due to the update of scheduler.pm_active_count,
* may be missed by the thread that called pm_wait_for_desired_state()
*/
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbase_pm_update_state(kbdev);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
}
#ifdef KBASE_PM_RUNTIME
@@ -746,8 +788,16 @@ static void scheduler_pm_idle_before_sleep(struct kbase_device *kbdev)
kbdev->pm.backend.exit_gpu_sleep_mode = false;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
if (prev_count == 1)
if (prev_count == 1) {
kbase_pm_context_idle(kbdev);
/* Invoke the PM state machines again as the change in MCU
* desired status, due to the update of scheduler.pm_active_count,
* may be missed by the thread that called pm_wait_for_desired_state()
*/
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbase_pm_update_state(kbdev);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
}
#endif
@@ -1735,6 +1785,13 @@ bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo,
u32 status = kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT);
bool is_waiting = false;
#if IS_ENABLED(CONFIG_DEBUG_FS)
u64 cmd_ptr = kbase_csf_firmware_cs_output(stream, CS_STATUS_CMD_PTR_LO);
cmd_ptr |= (u64)kbase_csf_firmware_cs_output(stream, CS_STATUS_CMD_PTR_HI) << 32;
queue->saved_cmd_ptr = cmd_ptr;
#endif
KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_STATUS_WAIT,
queue->group, queue, status);
@@ -1948,7 +2005,7 @@ void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler,
cancel_tick_timer(kctx->kbdev);
WARN_ON(atomic_read(&scheduler->non_idle_offslot_grps));
if (scheduler->state != SCHED_SUSPENDED)
queue_work(system_wq, &scheduler->gpu_idle_work);
enqueue_gpu_idle_work(scheduler);
}
KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp,
scheduler->num_active_address_spaces |
@@ -2078,7 +2135,7 @@ static void update_offslot_non_idle_cnt_on_grp_suspend(
}
}
static bool confirm_cmd_buf_empty(struct kbase_queue *queue)
static bool confirm_cmd_buf_empty(struct kbase_queue const *queue)
{
bool cs_empty;
bool cs_idle;
@@ -2090,8 +2147,8 @@ static bool confirm_cmd_buf_empty(struct kbase_queue *queue)
u32 glb_version = iface->version;
u64 *input_addr = (u64 *)queue->user_io_addr;
u64 *output_addr = (u64 *)(queue->user_io_addr + PAGE_SIZE);
u64 const *input_addr = (u64 const *)queue->user_io_addr;
u64 const *output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE);
if (glb_version >= kbase_csf_interface_version(1, 0, 0)) {
/* CS_STATUS_SCOREBOARD supported from CSF 1.0 */
@@ -2605,7 +2662,7 @@ void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group)
if (kbase_csf_scheduler_wait_mcu_active(kbdev))
dev_warn(
kbdev->dev,
"[%llu] Wait for MCU active failed when when terminating group %d of context %d_%d on slot %d",
"[%llu] Wait for MCU active failed when terminating group %d of context %d_%d on slot %d",
kbase_backend_get_cycle_cnt(kbdev),
group->handle, group->kctx->tgid,
group->kctx->id, group->csg_nr);
@@ -2704,6 +2761,7 @@ static int scheduler_group_schedule(struct kbase_queue_group *group)
}
} else if (!queue_group_scheduled_locked(group)) {
int new_val;
insert_group_to_runnable(&kbdev->csf.scheduler, group,
KBASE_CSF_GROUP_RUNNABLE);
/* A new group into the scheduler */
@@ -3033,9 +3091,9 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev)
struct kbase_queue_group *group =
scheduler->csg_slots[i].resident_group;
if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i))) {
if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i)))
continue;
}
/* The on slot csg is now stopped */
clear_bit(i, slot_mask);
@@ -3533,13 +3591,13 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
* GPUCORE-21394.
*/
/* Disable the idle timer */
disable_gpu_idle_fw_timer_locked(kbdev);
/* Switch to protected mode */
scheduler->active_protm_grp = input_grp;
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_ENTER_PROTM,
input_grp, 0u);
/* Reset the tick's pending protm seq number */
scheduler->tick_protm_pending_seq =
KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID;
kbase_csf_enter_protected_mode(kbdev);
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
@@ -3637,6 +3695,7 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev,
struct kbase_queue_group *group;
lockdep_assert_held(&scheduler->lock);
lockdep_assert_held(&scheduler->interrupt_lock);
if (WARN_ON(priority < 0) ||
WARN_ON(priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT))
return;
@@ -3656,6 +3715,14 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev,
/* Set the scanout sequence number, starting from 0 */
group->scan_seq_num = scheduler->csg_scan_count_for_tick++;
if (scheduler->tick_protm_pending_seq ==
KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID) {
if (!bitmap_empty(group->protm_pending_bitmap,
kbdev->csf.global_iface.groups[0].stream_num))
scheduler->tick_protm_pending_seq =
group->scan_seq_num;
}
if (queue_group_idle_locked(group)) {
if (on_slot_group_idle_locked(group))
list_add_tail(&group->link_to_schedule,
@@ -3738,6 +3805,7 @@ static void scheduler_rotate_groups(struct kbase_device *kbdev)
WARN_ON(top_grp->kctx != top_ctx);
if (!WARN_ON(list_empty(list))) {
struct kbase_queue_group *new_head_grp;
list_move_tail(&top_grp->link, list);
new_head_grp = (!list_empty(list)) ?
list_first_entry(list, struct kbase_queue_group, link) :
@@ -3774,6 +3842,7 @@ static void scheduler_rotate_ctxs(struct kbase_device *kbdev)
if (!WARN_ON(!found)) {
struct kbase_context *new_head_kctx;
list_move_tail(&pos->csf.link, list);
KBASE_KTRACE_ADD(kbdev, SCHEDULER_ROTATE_RUNNABLE, pos,
0u);
@@ -4042,6 +4111,59 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
return 0;
}
/**
* all_on_slot_groups_remained_idle - Live check for all groups' idleness
*
* @kbdev: Pointer to the device.
*
* Returns false if any of the queues inside any of the groups that have been
* assigned a physical CSG slot have work to execute, or have executed work
* since having received a GPU idle notification. This function is used to
* handle a rance condition between firmware reporting GPU idle and userspace
* submitting more work by directly ringing a doorbell.
*
* Return: false if any queue inside any resident group has work to be processed
* or has processed work since GPU idle event, true otherwise.
*/
static bool all_on_slot_groups_remained_idle(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
/* All CSGs have the same number of CSs */
size_t const max_streams = kbdev->csf.global_iface.groups[0].stream_num;
size_t i;
lockdep_assert_held(&scheduler->lock);
lockdep_assert_held(&scheduler->interrupt_lock);
for_each_set_bit(i, scheduler->csg_slots_idle_mask,
kbdev->csf.global_iface.group_num) {
struct kbase_queue_group *const group =
scheduler->csg_slots[i].resident_group;
size_t j;
for (j = 0; j < max_streams; ++j) {
struct kbase_queue const *const queue =
group->bound_queues[j];
u64 const *output_addr;
u64 cur_extract_ofs;
if (!queue)
continue;
output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE);
cur_extract_ofs = output_addr[CS_EXTRACT_LO / sizeof(u64)];
if (cur_extract_ofs != queue->extract_ofs) {
/* More work has been executed since the idle
* notification.
*/
return false;
}
}
}
return true;
}
static bool scheduler_idle_suspendable(struct kbase_device *kbdev)
{
bool suspend;
@@ -4055,18 +4177,28 @@ static bool scheduler_idle_suspendable(struct kbase_device *kbdev)
return false;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
spin_lock(&scheduler->interrupt_lock);
if (scheduler->total_runnable_grps) {
spin_lock(&scheduler->interrupt_lock);
/* Check both on-slots and off-slots groups idle status */
suspend = kbase_csf_scheduler_all_csgs_idle(kbdev) &&
!atomic_read(&scheduler->non_idle_offslot_grps) &&
kbase_pm_idle_groups_sched_suspendable(kbdev);
spin_unlock(&scheduler->interrupt_lock);
} else
suspend = kbase_pm_no_runnables_sched_suspendable(kbdev);
/* Confirm that all groups are actually idle before proceeding with
* suspension as groups might potentially become active again without
* informing the scheduler in case userspace rings a doorbell directly.
*/
if (suspend && (unlikely(atomic_read(&scheduler->gpu_no_longer_idle)) ||
unlikely(!all_on_slot_groups_remained_idle(kbdev)))) {
dev_info(kbdev->dev,
"GPU suspension skipped due to active CSGs");
suspend = false;
}
spin_unlock(&scheduler->interrupt_lock);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
return suspend;
@@ -4150,8 +4282,6 @@ static void gpu_idle_worker(struct work_struct *work)
}
mutex_lock(&scheduler->lock);
/* Cycle completed, disable the firmware idle timer */
disable_gpu_idle_fw_timer(kbdev);
scheduler_is_idle_suspendable = scheduler_idle_suspendable(kbdev);
if (scheduler_is_idle_suspendable) {
KBASE_KTRACE_ADD(kbdev, GPU_IDLE_HANDLING_START, NULL,
@@ -4177,6 +4307,7 @@ static void gpu_idle_worker(struct work_struct *work)
static int scheduler_prepare(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
unsigned long flags;
int i;
lockdep_assert_held(&scheduler->lock);
@@ -4202,6 +4333,9 @@ static int scheduler_prepare(struct kbase_device *kbdev)
scheduler->num_csg_slots_for_tick = 0;
bitmap_zero(scheduler->csg_slots_prio_update, MAX_SUPPORTED_CSGS);
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
scheduler->tick_protm_pending_seq =
KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID;
/* Scan out to run groups */
for (i = 0; i < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++i) {
struct kbase_context *kctx;
@@ -4209,6 +4343,7 @@ static int scheduler_prepare(struct kbase_device *kbdev)
list_for_each_entry(kctx, &scheduler->runnable_kctxs, csf.link)
scheduler_ctx_scan_groups(kbdev, kctx, i);
}
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
/* Update this tick's non-idle groups */
scheduler->non_idle_scanout_grps = scheduler->ngrp_to_schedule;
@@ -4237,42 +4372,17 @@ static int scheduler_prepare(struct kbase_device *kbdev)
return 0;
}
static void scheduler_handle_idle_timer_onoff(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
lockdep_assert_held(&scheduler->lock);
/* After the scheduler apply operation, the internal variable
* scheduler->non_idle_offslot_grps reflects the end-point view
* of the count at the end of the active phase.
*
* Any changes that follow (after the scheduler has dropped the
* scheduler->lock), reflects async operations to the scheduler,
* such as a group gets killed (evicted) or a new group inserted,
* cqs wait-sync triggered state transtion etc.
*
* The condition for enable the idle timer is that there is no
* non-idle groups off-slots. If there is non-idle group off-slot,
* the timer should be disabled.
*/
if (atomic_read(&scheduler->non_idle_offslot_grps))
disable_gpu_idle_fw_timer(kbdev);
else
enable_gpu_idle_fw_timer(kbdev);
}
/**
* keep_lru_on_slots() - Check the condition for LRU is met.
*
* @kbdev: Pointer to the device.
*
* This function tries to maintain the Last-Recent-Use case on slots, when
* the scheduler has no non-idle off-slot CSGs for a replacement
* consideration. This effectively extends the previous scheduling results
* for the new one. That is, the last recent used CSGs are retained on slots
* for the new tick/tock action.
*
* @kbdev: Pointer to the device.
*
* Return: true for avoiding on-slot CSGs changes (i.e. keep existing LRU),
* otherwise false.
*/
@@ -4294,10 +4404,6 @@ static bool keep_lru_on_slots(struct kbase_device *kbdev)
*/
keep_lru = kbase_csf_scheduler_all_csgs_idle(kbdev);
if (keep_lru && !scheduler->gpu_idle_fw_timer_enabled) {
scheduler->gpu_idle_fw_timer_enabled = true;
kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
}
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
dev_dbg(kbdev->dev, "Keep_LRU: %d, CSGs on-slots: %d\n",
@@ -4311,6 +4417,8 @@ static bool keep_lru_on_slots(struct kbase_device *kbdev)
* prepare_fast_local_tock() - making preparation arrangement for exercizing
* a fast local tock inside scheduling-actions.
*
* @kbdev: Pointer to the GPU device.
*
* The function assumes that a scheduling action of firing a fast local tock
* call (i.e. an equivalent tock action without dropping the lock) is desired
* if there are idle onslot CSGs. The function updates those affected CSGs'
@@ -4320,8 +4428,6 @@ static bool keep_lru_on_slots(struct kbase_device *kbdev)
* plus some potential newly idle CSGs in the scheduling action committing
* steps.
*
* @kbdev: Pointer to the GPU device.
*
* Return: number of on-slots CSGs that can be considered for replacing.
*/
static int prepare_fast_local_tock(struct kbase_device *kbdev)
@@ -4408,6 +4514,17 @@ static void schedule_actions(struct kbase_device *kbdev, bool is_tick)
redo_local_tock:
scheduler_prepare(kbdev);
/* Need to specifically enqueue the GPU idle work if there are no groups
* to schedule despite the runnable groups. This scenario will happen
* if System suspend is done when all groups are idle and and no work
* is submitted for the groups after the System resume.
*/
if (unlikely(!scheduler->ngrp_to_schedule &&
scheduler->total_runnable_grps)) {
dev_dbg(kbdev->dev, "No groups to schedule in the tick");
enqueue_gpu_idle_work(scheduler);
return;
}
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
protm_grp = scheduler->active_protm_grp;
@@ -4423,6 +4540,7 @@ redo_local_tock:
*/
if (protm_grp && scheduler->top_grp == protm_grp) {
int new_val;
dev_dbg(kbdev->dev, "Scheduler keep protm exec: group-%d",
protm_grp->handle);
new_val = atomic_dec_return(&scheduler->non_idle_offslot_grps);
@@ -4452,11 +4570,6 @@ redo_local_tock:
scheduler_apply(kbdev);
/* Post-apply, all the committed groups in this tick are on
* slots, time to arrange the idle timer on/off decision.
*/
scheduler_handle_idle_timer_onoff(kbdev);
/* Scheduler is dropping the exec of the previous protm_grp,
* Until the protm quit completes, the GPU is effectively
* locked in the secure mode.
@@ -4491,7 +4604,6 @@ redo_local_tock:
}
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
return;
}
/**
@@ -4576,7 +4688,7 @@ static void schedule_on_tock(struct work_struct *work)
scheduler->state = SCHED_INACTIVE;
if (!scheduler->total_runnable_grps)
queue_work(system_wq, &scheduler->gpu_idle_work);
enqueue_gpu_idle_work(scheduler);
mutex_unlock(&scheduler->lock);
kbase_reset_gpu_allow(kbdev);
@@ -4627,8 +4739,9 @@ static void schedule_on_tick(struct work_struct *work)
dev_dbg(kbdev->dev,
"scheduling for next tick, num_runnable_groups:%u\n",
scheduler->total_runnable_grps);
} else if (!scheduler->total_runnable_grps)
queue_work(system_wq, &scheduler->gpu_idle_work);
} else if (!scheduler->total_runnable_grps) {
enqueue_gpu_idle_work(scheduler);
}
scheduler->state = SCHED_INACTIVE;
mutex_unlock(&scheduler->lock);
@@ -5044,7 +5157,6 @@ static void firmware_aliveness_monitor(struct work_struct *work)
exit:
mutex_unlock(&kbdev->csf.scheduler.lock);
kbase_reset_gpu_allow(kbdev);
return;
}
int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group,
@@ -5289,6 +5401,8 @@ void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group)
mutex_lock(&scheduler->lock);
if (group->run_state == KBASE_CSF_GROUP_IDLE)
group->run_state = KBASE_CSF_GROUP_RUNNABLE;
/* Check if the group is now eligible for execution in protected mode. */
if (scheduler_get_protm_enter_async_group(kbdev, group))
scheduler_group_check_protm_enter(kbdev, group);
@@ -5457,6 +5571,11 @@ static void check_sync_update_in_sleep_mode(struct kbase_device *kbdev)
continue;
if (check_sync_update_for_on_slot_group(group)) {
/* As sync update has been performed for an on-slot
* group, when MCU is in sleep state, ring the doorbell
* so that FW can re-evaluate the SYNC_WAIT on wakeup.
*/
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
scheduler_wakeup(kbdev, true);
return;
}
@@ -5529,6 +5648,7 @@ enum kbase_csf_event_callback_action check_group_sync_update_cb(void *param)
struct kbase_context *const kctx = param;
KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT, kctx, 0u);
queue_work(kctx->csf.sched.sync_update_wq,
&kctx->csf.sched.sync_update_work);
@@ -5610,6 +5730,14 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
dev_err(kbdev->dev, "Failed to allocate scheduler workqueue\n");
return -ENOMEM;
}
scheduler->idle_wq = alloc_ordered_workqueue(
"csf_scheduler_gpu_idle_wq", WQ_HIGHPRI);
if (!scheduler->idle_wq) {
dev_err(kbdev->dev,
"Failed to allocate GPU idle scheduler workqueue\n");
destroy_workqueue(kbdev->csf.scheduler.wq);
return -ENOMEM;
}
INIT_WORK(&scheduler->tick_work, schedule_on_tick);
INIT_DEFERRABLE_WORK(&scheduler->tock_work, schedule_on_tock);
@@ -5636,11 +5764,11 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
scheduler->last_schedule = 0;
scheduler->tock_pending_request = false;
scheduler->active_protm_grp = NULL;
scheduler->gpu_idle_fw_timer_enabled = false;
scheduler->csg_scheduling_period_ms = CSF_SCHEDULER_TIME_TICK_MS;
scheduler_doorbell_init(kbdev);
INIT_WORK(&scheduler->gpu_idle_work, gpu_idle_worker);
atomic_set(&scheduler->gpu_no_longer_idle, false);
atomic_set(&scheduler->non_idle_offslot_grps, 0);
hrtimer_init(&scheduler->tick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
@@ -5684,6 +5812,8 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev)
void kbase_csf_scheduler_early_term(struct kbase_device *kbdev)
{
if (kbdev->csf.scheduler.idle_wq)
destroy_workqueue(kbdev->csf.scheduler.idle_wq);
if (kbdev->csf.scheduler.wq)
destroy_workqueue(kbdev->csf.scheduler.wq);
}
@@ -5715,7 +5845,7 @@ static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev)
enqueue_tick_work(kbdev);
dev_dbg(kbdev->dev, "Re-enabling the scheduler timer\n");
} else if (scheduler->state != SCHED_SUSPENDED) {
queue_work(system_wq, &scheduler->gpu_idle_work);
enqueue_gpu_idle_work(scheduler);
}
}
@@ -5805,8 +5935,6 @@ int kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev)
mutex_lock(&scheduler->lock);
disable_gpu_idle_fw_timer(kbdev);
#ifdef KBASE_PM_RUNTIME
/* If scheduler is in sleeping state, then MCU needs to be activated
* to suspend CSGs.
@@ -5959,7 +6087,7 @@ void kbase_csf_scheduler_reval_idleness_post_sleep(struct kbase_device *kbdev)
&kbdev->csf.global_iface.groups[csg_nr];
bool csg_idle;
if (!kbdev->csf.scheduler.csg_slots[csg_nr].resident_group)
if (!kbdev->csf.scheduler.csg_slots[csg_nr].resident_group)
continue;
csg_idle =

View File

@@ -569,6 +569,15 @@ void kbase_csf_scheduler_reval_idleness_post_sleep(struct kbase_device *kbdev);
int kbase_csf_scheduler_handle_runtime_suspend(struct kbase_device *kbdev);
#endif
/**
* kbase_csf_scheduler_process_gpu_idle_event() - Process GPU idle IRQ
*
* @kbdev: Pointer to the device
*
* This function is called when a GPU idle IRQ has been raised.
*/
void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev);
/**
* kbase_csf_scheduler_get_nr_active_csgs() - Get the number of active CSGs
*

View File

@@ -82,7 +82,7 @@ static struct kbase_csf_tiler_heap_chunk *get_last_chunk(
* Unless the @chunk is the first in the kernel's list of chunks belonging to
* a given tiler heap, this function stores the size and address of the @chunk
* in the header of the preceding chunk. This requires the GPU memory region
* containing the header to be be mapped temporarily, which can fail.
* containing the header to be mapped temporarily, which can fail.
*
* Return: 0 if successful or a negative error code on failure.
*/
@@ -204,8 +204,8 @@ static int create_chunk(struct kbase_csf_tiler_heap *const heap,
/* Allocate GPU memory for the new chunk. */
INIT_LIST_HEAD(&chunk->link);
chunk->region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
&chunk->gpu_va, mmu_sync_info);
chunk->region =
kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, &chunk->gpu_va, mmu_sync_info);
if (unlikely(!chunk->region)) {
dev_err(kctx->kbdev->dev,
@@ -464,21 +464,18 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx,
err = -ENOMEM;
} else {
err = create_initial_chunks(heap, initial_chunks);
if (unlikely(err)) {
kbase_csf_heap_context_allocator_free(ctx_alloc,
heap->gpu_va);
}
if (unlikely(err))
kbase_csf_heap_context_allocator_free(ctx_alloc, heap->gpu_va);
}
if (unlikely(err)) {
kfree(heap);
} else {
struct kbase_csf_tiler_heap_chunk const *first_chunk =
list_first_entry(&heap->chunks_list,
struct kbase_csf_tiler_heap_chunk, link);
struct kbase_csf_tiler_heap_chunk const *chunk = list_first_entry(
&heap->chunks_list, struct kbase_csf_tiler_heap_chunk, link);
*heap_gpu_va = heap->gpu_va;
*first_chunk_va = first_chunk->gpu_va;
*first_chunk_va = chunk->gpu_va;
mutex_lock(&kctx->csf.tiler_heaps.lock);
kctx->csf.tiler_heaps.nr_of_heaps++;
@@ -488,17 +485,25 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx,
KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(
kctx->kbdev, kctx->id, heap->heap_id,
PFN_UP(heap->chunk_size * heap->max_chunks),
PFN_UP(heap->chunk_size * heap->chunk_count),
heap->max_chunks, heap->chunk_size, heap->chunk_count,
heap->target_in_flight, 0);
PFN_UP(heap->chunk_size * heap->chunk_count), heap->max_chunks,
heap->chunk_size, heap->chunk_count, heap->target_in_flight, 0);
dev_dbg(kctx->kbdev->dev, "Created tiler heap 0x%llX\n",
heap->gpu_va);
#if defined(CONFIG_MALI_VECTOR_DUMP)
list_for_each_entry(chunk, &heap->chunks_list, link) {
KBASE_TLSTREAM_JD_TILER_HEAP_CHUNK_ALLOC(
kctx->kbdev, kctx->id, heap->heap_id, chunk->gpu_va);
}
#endif
dev_dbg(kctx->kbdev->dev, "Created tiler heap 0x%llX\n", heap->gpu_va);
mutex_unlock(&kctx->csf.tiler_heaps.lock);
kctx->running_total_tiler_heap_nr_chunks += heap->chunk_count;
kctx->running_total_tiler_heap_memory += heap->chunk_size * heap->chunk_count;
if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory)
kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory;
kctx->running_total_tiler_heap_memory +=
heap->chunk_size * heap->chunk_count;
if (kctx->running_total_tiler_heap_memory >
kctx->peak_total_tiler_heap_memory)
kctx->peak_total_tiler_heap_memory =
kctx->running_total_tiler_heap_memory;
}
return err;
}
@@ -609,6 +614,16 @@ int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx,
if (likely(heap)) {
err = alloc_new_chunk(heap, nr_in_flight, pending_frag_count,
new_chunk_ptr);
if (likely(!err)) {
/* update total and peak tiler heap memory record */
kctx->running_total_tiler_heap_nr_chunks++;
kctx->running_total_tiler_heap_memory += heap->chunk_size;
if (kctx->running_total_tiler_heap_memory >
kctx->peak_total_tiler_heap_memory)
kctx->peak_total_tiler_heap_memory =
kctx->running_total_tiler_heap_memory;
}
KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(
kctx->kbdev, kctx->id, heap->heap_id,

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -28,14 +28,14 @@ struct kbase_context;
#define MALI_CSF_TILER_HEAP_DEBUGFS_VERSION 0
/**
* kbase_csf_tiler_heap_debugfs_init() - Create a debugfs entry for per context tiler heap
* kbase_csf_tiler_heap_debugfs_init - Create a debugfs entry for per context tiler heap
*
* @kctx: The kbase_context for which to create the debugfs entry
*/
void kbase_csf_tiler_heap_debugfs_init(struct kbase_context *kctx);
/**
* kbase_csf_tiler_heap_total_debugfs_init() - Create a debugfs entry for per context tiler heap
* kbase_csf_tiler_heap_total_debugfs_init - Create a debugfs entry for per context tiler heap
*
* @kctx: The kbase_context for which to create the debugfs entry
*/

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -59,18 +59,18 @@
/**
* struct kbase_csf_tiler_heap_chunk - A tiler heap chunk managed by the kernel
*
* Chunks are allocated upon initialization of a tiler heap or in response to
* out-of-memory events from the firmware. Chunks are always fully backed by
* physical memory to avoid the overhead of processing GPU page faults. The
* allocated GPU memory regions are linked together independent of the list of
* kernel objects of this type.
*
* @link: Link to this chunk in a list of chunks belonging to a
* @kbase_csf_tiler_heap.
* @region: Pointer to the GPU memory region allocated for the chunk.
* @gpu_va: GPU virtual address of the start of the memory region.
* This points to the header of the chunk and not to the low address
* of free memory within it.
*
* Chunks are allocated upon initialization of a tiler heap or in response to
* out-of-memory events from the firmware. Chunks are always fully backed by
* physical memory to avoid the overhead of processing GPU page faults. The
* allocated GPU memory regions are linked together independent of the list of
* kernel objects of this type.
*/
struct kbase_csf_tiler_heap_chunk {
struct list_head link;

View File

@@ -139,8 +139,7 @@ static ssize_t progress_timeout_show(struct device * const dev,
}
static DEVICE_ATTR(progress_timeout, 0644, progress_timeout_show,
progress_timeout_store);
static DEVICE_ATTR_RW(progress_timeout);
int kbase_csf_timeout_init(struct kbase_device *const kbdev)
{

View File

@@ -80,9 +80,8 @@ static int kbase_csf_tl_debugfs_poll_interval_write(void *data, u64 val)
struct kbase_device *kbdev = (struct kbase_device *)data;
struct kbase_csf_tl_reader *self = &kbdev->timeline->csf_tl_reader;
if (val > KBASE_CSF_TL_READ_INTERVAL_MAX || val < KBASE_CSF_TL_READ_INTERVAL_MIN) {
if (val > KBASE_CSF_TL_READ_INTERVAL_MAX || val < KBASE_CSF_TL_READ_INTERVAL_MIN)
return -EINVAL;
}
self->timer_interval = (u32)val;
@@ -96,7 +95,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_tl_poll_interval_fops,
void kbase_csf_tl_reader_debugfs_init(struct kbase_device *kbdev)
{
debugfs_create_file("csf_tl_poll_interval_in_ms", S_IRUGO | S_IWUSR,
debugfs_create_file("csf_tl_poll_interval_in_ms", 0644,
kbdev->debugfs_instr_directory, kbdev,
&kbase_csf_tl_poll_interval_fops);
@@ -406,9 +405,8 @@ static int tl_reader_init_late(
return -1;
}
if (kbase_ts_converter_init(&self->ts_converter, kbdev)) {
if (kbase_ts_converter_init(&self->ts_converter, kbdev))
return -1;
}
self->kbdev = kbdev;
self->trace_buffer = tb;

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -133,14 +133,12 @@ void kbase_csf_tl_reader_init(struct kbase_csf_tl_reader *self,
void kbase_csf_tl_reader_term(struct kbase_csf_tl_reader *self);
/**
* kbase_csf_tl_reader_flush_buffer() -
* Flush trace from buffer into CSFFW timeline stream.
* kbase_csf_tl_reader_flush_buffer() - Flush trace from buffer into CSFFW timeline stream.
*
* @self: CSFFW TL Reader instance.
*
* Return: Zero on success, negative error code (EBUSY) otherwise
*/
int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self);
/**

View File

@@ -179,13 +179,13 @@ int kbase_csf_firmware_trace_buffers_init(struct kbase_device *kbdev)
extract_gpu_va =
(kbdev->csf.firmware_trace_buffers.mcu_rw.va_reg->start_pfn << PAGE_SHIFT) +
mcu_rw_offset;
extract_cpu_va = (u32*)(
extract_cpu_va = (u32 *)(
kbdev->csf.firmware_trace_buffers.mcu_rw.cpu_addr +
mcu_rw_offset);
insert_gpu_va =
(kbdev->csf.firmware_trace_buffers.mcu_write.va_reg->start_pfn << PAGE_SHIFT) +
mcu_write_offset;
insert_cpu_va = (u32*)(
insert_cpu_va = (u32 *)(
kbdev->csf.firmware_trace_buffers.mcu_write.cpu_addr +
mcu_write_offset);
data_buffer_gpu_va =
@@ -323,13 +323,13 @@ void kbase_csf_firmware_reload_trace_buffers_data(struct kbase_device *kbdev)
extract_gpu_va =
(kbdev->csf.firmware_trace_buffers.mcu_rw.va_reg->start_pfn << PAGE_SHIFT) +
mcu_rw_offset;
extract_cpu_va = (u32*)(
extract_cpu_va = (u32 *)(
kbdev->csf.firmware_trace_buffers.mcu_rw.cpu_addr +
mcu_rw_offset);
insert_gpu_va =
(kbdev->csf.firmware_trace_buffers.mcu_write.va_reg->start_pfn << PAGE_SHIFT) +
mcu_write_offset;
insert_cpu_va = (u32*)(
insert_cpu_va = (u32 *)(
kbdev->csf.firmware_trace_buffers.mcu_write.cpu_addr +
mcu_write_offset);
data_buffer_gpu_va =

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -203,6 +203,8 @@ static void kbase_csf_early_term(struct kbase_device *kbdev)
* kbase_device_hwcnt_watchdog_if_init - Create hardware counter watchdog
* interface.
* @kbdev: Device pointer
*
* Return: 0 if successful or a negative error code on failure.
*/
static int kbase_device_hwcnt_watchdog_if_init(struct kbase_device *kbdev)
{
@@ -245,8 +247,9 @@ static void kbase_device_hwcnt_backend_csf_if_term(struct kbase_device *kbdev)
/**
* kbase_device_hwcnt_backend_csf_init - Create hardware counter backend.
* @kbdev: Device pointer
*
* Return: 0 if successful or a negative error code on failure.
*/
static int kbase_device_hwcnt_backend_csf_init(struct kbase_device *kbdev)
{
return kbase_hwcnt_backend_csf_create(
@@ -390,7 +393,7 @@ int kbase_device_init(struct kbase_device *kbdev)
* Hardware counter components depending on firmware are initialized after CSF
* firmware is loaded.
*
* @return 0 on success. An error code on failure.
* Return: 0 on success. An error code on failure.
*/
static int kbase_device_hwcnt_csf_deferred_init(struct kbase_device *kbdev)
{
@@ -457,7 +460,7 @@ virt_fail:
* To meet Android GKI vendor guideline, firmware load is deferred at
* the time when @ref kbase_open is called for the first time.
*
* @return 0 on success. An error code on failure.
* Return: 0 on success. An error code on failure.
*/
static int kbase_csf_firmware_deferred_init(struct kbase_device *kbdev)
{

View File

@@ -133,8 +133,12 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
if (val & RESET_COMPLETED)
kbase_pm_reset_done(kbdev);
KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, val);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val);
/* Defer clearing CLEAN_CACHES_COMPLETED to kbase_clean_caches_done.
* We need to acquire hwaccess_lock to avoid a race condition with
* kbase_gpu_cache_flush_and_busy_wait
*/
KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, val & ~CLEAN_CACHES_COMPLETED);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val & ~CLEAN_CACHES_COMPLETED);
#ifdef KBASE_PM_RUNTIME
if (val & DOORBELL_MIRROR) {

View File

@@ -66,8 +66,12 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
if (val & PRFCNT_SAMPLE_COMPLETED)
kbase_instr_hwcnt_sample_done(kbdev);
KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, val);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val);
/* Defer clearing CLEAN_CACHES_COMPLETED to kbase_clean_caches_done.
* We need to acquire hwaccess_lock to avoid a race condition with
* kbase_gpu_cache_flush_and_busy_wait
*/
KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, val & ~CLEAN_CACHES_COMPLETED);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val & ~CLEAN_CACHES_COMPLETED);
/* kbase_pm_check_transitions (called by kbase_pm_power_changed) must
* be called after the IRQ has been cleared. This is because it might

View File

@@ -27,6 +27,9 @@
#include <mali_kbase_hwaccess_backend.h>
#include <mali_kbase_ctx_sched.h>
#include <mali_kbase_reset_gpu.h>
#include <mali_kbase_hwcnt_watchdog_if_timer.h>
#include <mali_kbase_hwcnt_backend_jm.h>
#include <mali_kbase_hwcnt_backend_jm_watchdog.h>
#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
#include <backend/gpu/mali_kbase_model_linux.h>
@@ -148,73 +151,115 @@ static void kbase_backend_late_term(struct kbase_device *kbdev)
kbase_hwaccess_pm_term(kbdev);
}
static int kbase_device_hwcnt_backend_jm_init(struct kbase_device *kbdev)
/**
* kbase_device_hwcnt_watchdog_if_init - Create hardware counter watchdog
* interface.
* @kbdev: Device pointer
* Return: 0 on success, or an error code on failure.
*/
static int kbase_device_hwcnt_watchdog_if_init(struct kbase_device *kbdev)
{
return kbase_hwcnt_backend_jm_create(kbdev, &kbdev->hwcnt_gpu_iface);
return kbase_hwcnt_watchdog_if_timer_create(&kbdev->hwcnt_watchdog_timer);
}
/**
* kbase_device_hwcnt_watchdog_if_term - Terminate hardware counter watchdog
* interface.
* @kbdev: Device pointer
*/
static void kbase_device_hwcnt_watchdog_if_term(struct kbase_device *kbdev)
{
kbase_hwcnt_watchdog_if_timer_destroy(&kbdev->hwcnt_watchdog_timer);
}
/**
* kbase_device_hwcnt_backend_jm_init - Create hardware counter backend.
* @kbdev: Device pointer
* Return: 0 on success, or an error code on failure.
*/
static int kbase_device_hwcnt_backend_jm_init(struct kbase_device *kbdev)
{
return kbase_hwcnt_backend_jm_create(kbdev, &kbdev->hwcnt_gpu_jm_backend);
}
/**
* kbase_device_hwcnt_backend_jm_term - Terminate hardware counter backend.
* @kbdev: Device pointer
*/
static void kbase_device_hwcnt_backend_jm_term(struct kbase_device *kbdev)
{
kbase_hwcnt_backend_jm_destroy(&kbdev->hwcnt_gpu_iface);
kbase_hwcnt_backend_jm_destroy(&kbdev->hwcnt_gpu_jm_backend);
}
/**
* kbase_device_hwcnt_backend_jm_watchdog_init - Create hardware counter watchdog backend.
* @kbdev: Device pointer
* Return: 0 on success, or an error code on failure.
*/
static int kbase_device_hwcnt_backend_jm_watchdog_init(struct kbase_device *kbdev)
{
return kbase_hwcnt_backend_jm_watchdog_create(&kbdev->hwcnt_gpu_jm_backend,
&kbdev->hwcnt_watchdog_timer,
&kbdev->hwcnt_gpu_iface);
}
/**
* kbase_device_hwcnt_backend_jm_watchdog_term - Terminate hardware counter watchdog backend.
* @kbdev: Device pointer
*/
static void kbase_device_hwcnt_backend_jm_watchdog_term(struct kbase_device *kbdev)
{
kbase_hwcnt_backend_jm_watchdog_destroy(&kbdev->hwcnt_gpu_iface);
}
static const struct kbase_device_init dev_init[] = {
#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
{ kbase_gpu_device_create, kbase_gpu_device_destroy,
"Dummy model initialization failed" },
{ kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" },
#else
{ assign_irqs, NULL, "IRQ search failed" },
{ registers_map, registers_unmap, "Register map failed" },
#endif
{ kbase_device_io_history_init, kbase_device_io_history_term,
"Register access history initialization failed" },
{ kbase_device_pm_init, kbase_device_pm_term,
"Power management initialization failed" },
{ kbase_device_early_init, kbase_device_early_term,
"Early device initialization failed" },
{ kbase_device_populate_max_freq, NULL,
"Populating max frequency failed" },
{ kbase_device_pm_init, kbase_device_pm_term, "Power management initialization failed" },
{ kbase_device_early_init, kbase_device_early_term, "Early device initialization failed" },
{ kbase_device_populate_max_freq, NULL, "Populating max frequency failed" },
{ kbase_device_misc_init, kbase_device_misc_term,
"Miscellaneous device initialization failed" },
{ kbase_device_pcm_dev_init, kbase_device_pcm_dev_term,
"Priority control manager initialization failed" },
{ kbase_ctx_sched_init, kbase_ctx_sched_term,
"Context scheduler initialization failed" },
{ kbase_mem_init, kbase_mem_term,
"Memory subsystem initialization failed" },
{ kbase_ctx_sched_init, kbase_ctx_sched_term, "Context scheduler initialization failed" },
{ kbase_mem_init, kbase_mem_term, "Memory subsystem initialization failed" },
{ kbase_device_coherency_init, NULL, "Device coherency init failed" },
{ kbase_protected_mode_init, kbase_protected_mode_term,
"Protected mode subsystem initialization failed" },
{ kbase_device_list_init, kbase_device_list_term,
"Device list setup failed" },
{ kbasep_js_devdata_init, kbasep_js_devdata_term,
"Job JS devdata initialization failed" },
{ kbase_device_list_init, kbase_device_list_term, "Device list setup failed" },
{ kbasep_js_devdata_init, kbasep_js_devdata_term, "Job JS devdata initialization failed" },
{ kbase_device_timeline_init, kbase_device_timeline_term,
"Timeline stream initialization failed" },
{ kbase_clk_rate_trace_manager_init, kbase_clk_rate_trace_manager_term,
"Clock rate trace manager initialization failed" },
{ kbase_lowest_gpu_freq_init, NULL,
"Lowest freq initialization failed" },
{ kbase_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" },
{ kbase_instr_backend_init, kbase_instr_backend_term,
"Instrumentation backend initialization failed" },
{ kbase_device_hwcnt_backend_jm_init,
kbase_device_hwcnt_backend_jm_term,
{ kbase_device_hwcnt_watchdog_if_init, kbase_device_hwcnt_watchdog_if_term,
"GPU hwcnt backend watchdog interface creation failed" },
{ kbase_device_hwcnt_backend_jm_init, kbase_device_hwcnt_backend_jm_term,
"GPU hwcnt backend creation failed" },
{ kbase_device_hwcnt_backend_jm_watchdog_init, kbase_device_hwcnt_backend_jm_watchdog_term,
"GPU hwcnt watchdog backend creation failed" },
{ kbase_device_hwcnt_context_init, kbase_device_hwcnt_context_term,
"GPU hwcnt context initialization failed" },
{ kbase_device_hwcnt_virtualizer_init,
kbase_device_hwcnt_virtualizer_term,
{ kbase_device_hwcnt_virtualizer_init, kbase_device_hwcnt_virtualizer_term,
"GPU hwcnt virtualizer initialization failed" },
{ kbase_device_vinstr_init, kbase_device_vinstr_term,
"Virtual instrumentation initialization failed" },
{ kbase_device_kinstr_prfcnt_init, kbase_device_kinstr_prfcnt_term,
"Performance counter instrumentation initialization failed" },
{ kbase_backend_late_init, kbase_backend_late_term,
"Late backend initialization failed" },
{ kbase_backend_late_init, kbase_backend_late_term, "Late backend initialization failed" },
{ kbase_debug_job_fault_dev_init, kbase_debug_job_fault_dev_term,
"Job fault debug initialization failed" },
{ kbase_device_debugfs_init, kbase_device_debugfs_term,
"DebugFS initialization failed" },
{ kbase_device_debugfs_init, kbase_device_debugfs_term, "DebugFS initialization failed" },
/* Sysfs init needs to happen before registering the device with
* misc_register(), otherwise it causes a race condition between
* registering the device and a uevent event being generated for
@@ -233,8 +278,7 @@ static const struct kbase_device_init dev_init[] = {
{ kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer,
"GPU property population failed" },
{ NULL, kbase_dummy_job_wa_cleanup, NULL },
{ kbase_device_late_init, kbase_device_late_term,
"Late device initialization failed" },
{ kbase_device_late_init, kbase_device_late_term, "Late device initialization failed" },
};
static void kbase_device_term_partial(struct kbase_device *kbdev,

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -166,8 +166,11 @@ void kbase_device_pcm_dev_term(struct kbase_device *const kbdev)
* @nb: notifier block - used to retrieve kbdev pointer
* @action: action (unused)
* @data: data pointer (unused)
*
* This function simply lists memory usage by the Mali driver, per GPU device,
* for diagnostic purposes.
*
* Return: NOTIFY_OK on success, NOTIFY_BAD otherwise.
*/
static int mali_oom_notifier_handler(struct notifier_block *nb,
unsigned long action, void *data)
@@ -189,7 +192,7 @@ static int mali_oom_notifier_handler(struct notifier_block *nb,
mutex_lock(&kbdev->kctx_list_lock);
list_for_each_entry (kctx, &kbdev->kctx_list, kctx_list_link) {
list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) {
struct pid *pid_struct;
struct task_struct *task;
unsigned long task_alloc_total =
@@ -483,6 +486,7 @@ int kbase_device_early_init(struct kbase_device *kbdev)
{
int err;
err = kbasep_platform_device_init(kbdev);
if (err)
return err;

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -23,7 +23,6 @@
/**
* kbase_device_get_list - get device list.
*
* Get access to device list.
*
* Return: Pointer to the linked list head.
@@ -55,18 +54,18 @@ void kbase_increment_device_id(void);
* When a device file is opened for the first time,
* load firmware and initialize hardware counter components.
*
* @return 0 on success. An error code on failure.
* Return: 0 on success. An error code on failure.
*/
int kbase_device_firmware_init_once(struct kbase_device *kbdev);
/**
* kbase_device_init - Device initialisation.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* This is called from device probe to initialise various other
* components needed.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Return: 0 on success and non-zero value on failure.
*/
int kbase_device_init(struct kbase_device *kbdev);
@@ -74,11 +73,10 @@ int kbase_device_init(struct kbase_device *kbdev);
/**
* kbase_device_term - Device termination.
*
* This is called from device remove to terminate various components that
* were initialised during kbase_device_init.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* This is called from device remove to terminate various components that
* were initialised during kbase_device_init.
*/
void kbase_device_term(struct kbase_device *kbdev);

View File

@@ -63,6 +63,7 @@ static int busy_wait_cache_clean_irq(struct kbase_device *kbdev)
}
/* Clear the interrupt CLEAN_CACHES_COMPLETED bit. */
KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, CLEAN_CACHES_COMPLETED);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR),
CLEAN_CACHES_COMPLETED);
@@ -72,7 +73,6 @@ static int busy_wait_cache_clean_irq(struct kbase_device *kbdev)
int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev,
u32 flush_op)
{
u32 irq_mask;
int need_to_wake_up = 0;
int ret = 0;
@@ -81,17 +81,18 @@ int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev,
*/
lockdep_assert_held(&kbdev->hwaccess_lock);
/* 1. Check if CLEAN_CACHES_COMPLETED irq mask bit is set.
/* 1. Check if kbdev->cache_clean_in_progress is set.
* If it is set, it means there are threads waiting for
* CLEAN_CACHES_COMPLETED irq to be raised.
* CLEAN_CACHES_COMPLETED irq to be raised and that the
* corresponding irq mask bit is set.
* We'll clear the irq mask bit and busy-wait for the cache
* clean operation to complete before submitting the cache
* clean command required after the GPU page table update.
* Pended flush commands will be merged to requested command.
*/
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
if (irq_mask & CLEAN_CACHES_COMPLETED) {
if (kbdev->cache_clean_in_progress) {
/* disable irq first */
u32 irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
irq_mask & ~CLEAN_CACHES_COMPLETED);
@@ -182,22 +183,28 @@ void kbase_clean_caches_done(struct kbase_device *kbdev)
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
if (kbdev->cache_clean_queued) {
u32 pended_flush_op = kbdev->cache_clean_queued;
if (kbdev->cache_clean_in_progress) {
/* Clear the interrupt CLEAN_CACHES_COMPLETED bit if set.
* It might have already been done by kbase_gpu_cache_flush_and_busy_wait.
*/
KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, CLEAN_CACHES_COMPLETED);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), CLEAN_CACHES_COMPLETED);
kbdev->cache_clean_queued = 0;
if (kbdev->cache_clean_queued) {
u32 pended_flush_op = kbdev->cache_clean_queued;
KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL,
pended_flush_op);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
pended_flush_op);
} else {
/* Disable interrupt */
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
irq_mask & ~CLEAN_CACHES_COMPLETED);
kbdev->cache_clean_queued = 0;
kbase_gpu_cache_clean_wait_complete(kbdev);
KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, pended_flush_op);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), pended_flush_op);
} else {
/* Disable interrupt */
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
irq_mask & ~CLEAN_CACHES_COMPLETED);
kbase_gpu_cache_clean_wait_complete(kbdev);
}
}
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);

View File

@@ -20,7 +20,7 @@
*/
#include <mali_kbase.h>
#include <uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h>
#include <csf/mali_kbase_csf_registers.h>
#include <gpu/mali_kbase_gpu_fault.h>
const char *kbase_gpu_exception_name(u32 const exception_code)

View File

@@ -19,8 +19,8 @@
*
*/
#ifndef _UAPI_KBASE_GPU_REGMAP_CSF_H_
#define _UAPI_KBASE_GPU_REGMAP_CSF_H_
#ifndef _KBASE_GPU_REGMAP_CSF_H_
#define _KBASE_GPU_REGMAP_CSF_H_
#include <linux/types.h>
@@ -365,4 +365,4 @@
/* GPU_CONTROL_MCU.GPU_IRQ_RAWSTAT */
#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when performance count sample has completed */
#endif /* _UAPI_KBASE_GPU_REGMAP_CSF_H_ */
#endif /* _KBASE_GPU_REGMAP_CSF_H_ */

View File

@@ -0,0 +1,293 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU license.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
*/
#ifndef _KBASE_GPU_REGMAP_JM_H_
#define _KBASE_GPU_REGMAP_JM_H_
#if MALI_USE_CSF && defined(__KERNEL__)
#error "Cannot be compiled with CSF"
#endif
/* Set to implementation defined, outer caching */
#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull
/* Set to write back memory, outer caching */
#define AS_MEMATTR_AARCH64_OUTER_WA 0x8Dull
/* Set to inner non-cacheable, outer-non-cacheable
* Setting defined by the alloc bits is ignored, but set to a valid encoding:
* - no-alloc on read
* - no alloc on write
*/
#define AS_MEMATTR_AARCH64_NON_CACHEABLE 0x4Cull
/* Symbols for default MEMATTR to use
* Default is - HW implementation defined caching
*/
#define AS_MEMATTR_INDEX_DEFAULT 0
#define AS_MEMATTR_INDEX_DEFAULT_ACE 3
/* HW implementation defined caching */
#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0
/* Force cache on */
#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL 1
/* Write-alloc */
#define AS_MEMATTR_INDEX_WRITE_ALLOC 2
/* Outer coherent, inner implementation defined policy */
#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF 3
/* Outer coherent, write alloc inner */
#define AS_MEMATTR_INDEX_OUTER_WA 4
/* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */
#define AS_MEMATTR_INDEX_NON_CACHEABLE 5
/* GPU control registers */
#define CORE_FEATURES 0x008 /* (RO) Shader Core Features */
#define JS_PRESENT 0x01C /* (RO) Job slots present */
#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory
* region base address, low word
*/
#define PRFCNT_BASE_HI 0x064 /* (RW) Performance counter memory
* region base address, high word
*/
#define PRFCNT_CONFIG 0x068 /* (RW) Performance counter
* configuration
*/
#define PRFCNT_JM_EN 0x06C /* (RW) Performance counter enable
* flags for Job Manager
*/
#define PRFCNT_SHADER_EN 0x070 /* (RW) Performance counter enable
* flags for shader cores
*/
#define PRFCNT_TILER_EN 0x074 /* (RW) Performance counter enable
* flags for tiler
*/
#define PRFCNT_MMU_L2_EN 0x07C /* (RW) Performance counter enable
* flags for MMU/L2 cache
*/
#define JS0_FEATURES 0x0C0 /* (RO) Features of job slot 0 */
#define JS1_FEATURES 0x0C4 /* (RO) Features of job slot 1 */
#define JS2_FEATURES 0x0C8 /* (RO) Features of job slot 2 */
#define JS3_FEATURES 0x0CC /* (RO) Features of job slot 3 */
#define JS4_FEATURES 0x0D0 /* (RO) Features of job slot 4 */
#define JS5_FEATURES 0x0D4 /* (RO) Features of job slot 5 */
#define JS6_FEATURES 0x0D8 /* (RO) Features of job slot 6 */
#define JS7_FEATURES 0x0DC /* (RO) Features of job slot 7 */
#define JS8_FEATURES 0x0E0 /* (RO) Features of job slot 8 */
#define JS9_FEATURES 0x0E4 /* (RO) Features of job slot 9 */
#define JS10_FEATURES 0x0E8 /* (RO) Features of job slot 10 */
#define JS11_FEATURES 0x0EC /* (RO) Features of job slot 11 */
#define JS12_FEATURES 0x0F0 /* (RO) Features of job slot 12 */
#define JS13_FEATURES 0x0F4 /* (RO) Features of job slot 13 */
#define JS14_FEATURES 0x0F8 /* (RO) Features of job slot 14 */
#define JS15_FEATURES 0x0FC /* (RO) Features of job slot 15 */
#define JS_FEATURES_REG(n) GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2))
#define JM_CONFIG 0xF00 /* (RW) Job manager configuration (implementation-specific) */
/* Job control registers */
#define JOB_IRQ_JS_STATE 0x010 /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */
#define JOB_IRQ_THROTTLE 0x014 /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt. */
#define JOB_SLOT0 0x800 /* Configuration registers for job slot 0 */
#define JOB_SLOT1 0x880 /* Configuration registers for job slot 1 */
#define JOB_SLOT2 0x900 /* Configuration registers for job slot 2 */
#define JOB_SLOT3 0x980 /* Configuration registers for job slot 3 */
#define JOB_SLOT4 0xA00 /* Configuration registers for job slot 4 */
#define JOB_SLOT5 0xA80 /* Configuration registers for job slot 5 */
#define JOB_SLOT6 0xB00 /* Configuration registers for job slot 6 */
#define JOB_SLOT7 0xB80 /* Configuration registers for job slot 7 */
#define JOB_SLOT8 0xC00 /* Configuration registers for job slot 8 */
#define JOB_SLOT9 0xC80 /* Configuration registers for job slot 9 */
#define JOB_SLOT10 0xD00 /* Configuration registers for job slot 10 */
#define JOB_SLOT11 0xD80 /* Configuration registers for job slot 11 */
#define JOB_SLOT12 0xE00 /* Configuration registers for job slot 12 */
#define JOB_SLOT13 0xE80 /* Configuration registers for job slot 13 */
#define JOB_SLOT14 0xF00 /* Configuration registers for job slot 14 */
#define JOB_SLOT15 0xF80 /* Configuration registers for job slot 15 */
#define JOB_SLOT_REG(n, r) (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
#define JS_HEAD_LO 0x00 /* (RO) Job queue head pointer for job slot n, low word */
#define JS_HEAD_HI 0x04 /* (RO) Job queue head pointer for job slot n, high word */
#define JS_TAIL_LO 0x08 /* (RO) Job queue tail pointer for job slot n, low word */
#define JS_TAIL_HI 0x0C /* (RO) Job queue tail pointer for job slot n, high word */
#define JS_AFFINITY_LO 0x10 /* (RO) Core affinity mask for job slot n, low word */
#define JS_AFFINITY_HI 0x14 /* (RO) Core affinity mask for job slot n, high word */
#define JS_CONFIG 0x18 /* (RO) Configuration settings for job slot n */
/* (RO) Extended affinity mask for job slot n*/
#define JS_XAFFINITY 0x1C
#define JS_COMMAND 0x20 /* (WO) Command register for job slot n */
#define JS_STATUS 0x24 /* (RO) Status register for job slot n */
#define JS_HEAD_NEXT_LO 0x40 /* (RW) Next job queue head pointer for job slot n, low word */
#define JS_HEAD_NEXT_HI 0x44 /* (RW) Next job queue head pointer for job slot n, high word */
#define JS_AFFINITY_NEXT_LO 0x50 /* (RW) Next core affinity mask for job slot n, low word */
#define JS_AFFINITY_NEXT_HI 0x54 /* (RW) Next core affinity mask for job slot n, high word */
#define JS_CONFIG_NEXT 0x58 /* (RW) Next configuration settings for job slot n */
/* (RW) Next extended affinity mask for job slot n */
#define JS_XAFFINITY_NEXT 0x5C
#define JS_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */
#define JS_FLUSH_ID_NEXT 0x70 /* (RW) Next job slot n cache flush ID */
/* No JM-specific MMU control registers */
/* No JM-specific MMU address space control registers */
/* JS_COMMAND register commands */
#define JS_COMMAND_NOP 0x00 /* NOP Operation. Writing this value is ignored */
#define JS_COMMAND_START 0x01 /* Start processing a job chain. Writing this value is ignored */
#define JS_COMMAND_SOFT_STOP 0x02 /* Gently stop processing a job chain */
#define JS_COMMAND_HARD_STOP 0x03 /* Rudely stop processing a job chain */
#define JS_COMMAND_SOFT_STOP_0 0x04 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */
#define JS_COMMAND_HARD_STOP_0 0x05 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */
#define JS_COMMAND_SOFT_STOP_1 0x06 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */
#define JS_COMMAND_HARD_STOP_1 0x07 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */
#define JS_COMMAND_MASK 0x07 /* Mask of bits currently in use by the HW */
/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */
#define JS_CONFIG_START_FLUSH_NO_ACTION (0u << 0)
#define JS_CONFIG_START_FLUSH_CLEAN (1u << 8)
#define JS_CONFIG_START_FLUSH_INV_SHADER_OTHER (2u << 8)
#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8)
#define JS_CONFIG_START_MMU (1u << 10)
#define JS_CONFIG_JOB_CHAIN_FLAG (1u << 11)
#define JS_CONFIG_END_FLUSH_NO_ACTION JS_CONFIG_START_FLUSH_NO_ACTION
#define JS_CONFIG_END_FLUSH_CLEAN (1u << 12)
#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE (3u << 12)
#define JS_CONFIG_ENABLE_FLUSH_REDUCTION (1u << 14)
#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK (1u << 15)
#define JS_CONFIG_THREAD_PRI(n) ((n) << 16)
/* JS_XAFFINITY register values */
#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0)
#define JS_XAFFINITY_TILER_ENABLE (1u << 8)
#define JS_XAFFINITY_CACHE_ENABLE (1u << 16)
/* JS_STATUS register values */
/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h.
* The values are separated to avoid dependency of userspace and kernel code.
*/
/* Group of values representing the job status instead of a particular fault */
#define JS_STATUS_NO_EXCEPTION_BASE 0x00
#define JS_STATUS_INTERRUPTED (JS_STATUS_NO_EXCEPTION_BASE + 0x02) /* 0x02 means INTERRUPTED */
#define JS_STATUS_STOPPED (JS_STATUS_NO_EXCEPTION_BASE + 0x03) /* 0x03 means STOPPED */
#define JS_STATUS_TERMINATED (JS_STATUS_NO_EXCEPTION_BASE + 0x04) /* 0x04 means TERMINATED */
/* General fault values */
#define JS_STATUS_FAULT_BASE 0x40
#define JS_STATUS_CONFIG_FAULT (JS_STATUS_FAULT_BASE) /* 0x40 means CONFIG FAULT */
#define JS_STATUS_POWER_FAULT (JS_STATUS_FAULT_BASE + 0x01) /* 0x41 means POWER FAULT */
#define JS_STATUS_READ_FAULT (JS_STATUS_FAULT_BASE + 0x02) /* 0x42 means READ FAULT */
#define JS_STATUS_WRITE_FAULT (JS_STATUS_FAULT_BASE + 0x03) /* 0x43 means WRITE FAULT */
#define JS_STATUS_AFFINITY_FAULT (JS_STATUS_FAULT_BASE + 0x04) /* 0x44 means AFFINITY FAULT */
#define JS_STATUS_BUS_FAULT (JS_STATUS_FAULT_BASE + 0x08) /* 0x48 means BUS FAULT */
/* Instruction or data faults */
#define JS_STATUS_INSTRUCTION_FAULT_BASE 0x50
#define JS_STATUS_INSTR_INVALID_PC (JS_STATUS_INSTRUCTION_FAULT_BASE) /* 0x50 means INSTR INVALID PC */
#define JS_STATUS_INSTR_INVALID_ENC (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01) /* 0x51 means INSTR INVALID ENC */
#define JS_STATUS_INSTR_TYPE_MISMATCH (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02) /* 0x52 means INSTR TYPE MISMATCH */
#define JS_STATUS_INSTR_OPERAND_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03) /* 0x53 means INSTR OPERAND FAULT */
#define JS_STATUS_INSTR_TLS_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04) /* 0x54 means INSTR TLS FAULT */
#define JS_STATUS_INSTR_BARRIER_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05) /* 0x55 means INSTR BARRIER FAULT */
#define JS_STATUS_INSTR_ALIGN_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06) /* 0x56 means INSTR ALIGN FAULT */
/* NOTE: No fault with 0x57 code defined in spec. */
#define JS_STATUS_DATA_INVALID_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08) /* 0x58 means DATA INVALID FAULT */
#define JS_STATUS_TILE_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09) /* 0x59 means TILE RANGE FAULT */
#define JS_STATUS_ADDRESS_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A) /* 0x5A means ADDRESS RANGE FAULT */
/* Other faults */
#define JS_STATUS_MEMORY_FAULT_BASE 0x60
#define JS_STATUS_OUT_OF_MEMORY (JS_STATUS_MEMORY_FAULT_BASE) /* 0x60 means OUT OF MEMORY */
#define JS_STATUS_UNKNOWN 0x7F /* 0x7F means UNKNOWN */
/* JS<n>_FEATURES register */
#define JS_FEATURE_NULL_JOB (1u << 1)
#define JS_FEATURE_SET_VALUE_JOB (1u << 2)
#define JS_FEATURE_CACHE_FLUSH_JOB (1u << 3)
#define JS_FEATURE_COMPUTE_JOB (1u << 4)
#define JS_FEATURE_VERTEX_JOB (1u << 5)
#define JS_FEATURE_GEOMETRY_JOB (1u << 6)
#define JS_FEATURE_TILER_JOB (1u << 7)
#define JS_FEATURE_FUSED_JOB (1u << 8)
#define JS_FEATURE_FRAGMENT_JOB (1u << 9)
/* JM_CONFIG register */
#define JM_TIMESTAMP_OVERRIDE (1ul << 0)
#define JM_CLOCK_GATE_OVERRIDE (1ul << 1)
#define JM_JOB_THROTTLE_ENABLE (1ul << 2)
#define JM_JOB_THROTTLE_LIMIT_SHIFT (3)
#define JM_MAX_JOB_THROTTLE_LIMIT (0x3F)
#define JM_FORCE_COHERENCY_FEATURES_SHIFT (2)
/* GPU_COMMAND values */
#define GPU_COMMAND_NOP 0x00 /* No operation, nothing happens */
#define GPU_COMMAND_SOFT_RESET 0x01 /* Stop all external bus interfaces, and then reset the entire GPU. */
#define GPU_COMMAND_HARD_RESET 0x02 /* Immediately reset the entire GPU. */
#define GPU_COMMAND_PRFCNT_CLEAR 0x03 /* Clear all performance counters, setting them all to zero. */
#define GPU_COMMAND_PRFCNT_SAMPLE 0x04 /* Sample all performance counters, writing them out to memory */
#define GPU_COMMAND_CYCLE_COUNT_START 0x05 /* Starts the cycle counter, and system timestamp propagation */
#define GPU_COMMAND_CYCLE_COUNT_STOP 0x06 /* Stops the cycle counter, and system timestamp propagation */
#define GPU_COMMAND_CLEAN_CACHES 0x07 /* Clean all caches */
#define GPU_COMMAND_CLEAN_INV_CACHES 0x08 /* Clean and invalidate all caches */
#define GPU_COMMAND_SET_PROTECTED_MODE 0x09 /* Places the GPU in protected mode */
/* GPU_COMMAND cache flush alias to CSF command payload */
#define GPU_COMMAND_CACHE_CLN_INV_L2 GPU_COMMAND_CLEAN_INV_CACHES
#define GPU_COMMAND_CACHE_CLN_INV_L2_LSC GPU_COMMAND_CLEAN_INV_CACHES
#define GPU_COMMAND_CACHE_CLN_INV_FULL GPU_COMMAND_CLEAN_INV_CACHES
/* Merge cache flush commands */
#define GPU_COMMAND_FLUSH_CACHE_MERGE(cmd1, cmd2) \
((cmd1) > (cmd2) ? (cmd1) : (cmd2))
/* IRQ flags */
#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */
#define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */
#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */
#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */
#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */
#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when a performance count sample has completed. */
#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */
/*
* In Debug build,
* GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE is used to clear and enable interupts sources of GPU_IRQ
* by writing it onto GPU_IRQ_CLEAR/MASK registers.
*
* In Release build,
* GPU_IRQ_REG_COMMON is used.
*
* Note:
* CLEAN_CACHES_COMPLETED - Used separately for cache operation.
*/
#define GPU_IRQ_REG_COMMON (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \
| POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED)
#endif /* _KBASE_GPU_REGMAP_JM_H_ */

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -23,8 +23,8 @@
#define _KBASE_GPU_FAULT_H_
/**
* kbase_gpu_exception_name() -
* Returns the name associated with a Mali exception code
* kbase_gpu_exception_name() - Returns associated string of the exception code
*
* @exception_code: exception code
*
* This function is called from the interrupt handler when a GPU fault occurs.

View File

@@ -23,6 +23,565 @@
#define _KBASE_GPU_REGMAP_H_
#include <uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h>
#include <uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_coherency.h>
#include <uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h>
#if MALI_USE_CSF
#include "backend/mali_kbase_gpu_regmap_csf.h"
#else
#include "backend/mali_kbase_gpu_regmap_jm.h"
#endif
/* GPU_U definition */
#ifdef __ASSEMBLER__
#define GPU_U(x) x
#else
#define GPU_U(x) x##u
#endif /* __ASSEMBLER__ */
/* Begin Register Offsets */
/* GPU control registers */
#define GPU_CONTROL_BASE 0x0000
#define GPU_CONTROL_REG(r) (GPU_CONTROL_BASE + (r))
#define GPU_ID 0x000 /* (RO) GPU and revision identifier */
#define L2_FEATURES 0x004 /* (RO) Level 2 cache features */
#define TILER_FEATURES 0x00C /* (RO) Tiler Features */
#define MEM_FEATURES 0x010 /* (RO) Memory system features */
#define MMU_FEATURES 0x014 /* (RO) MMU features */
#define AS_PRESENT 0x018 /* (RO) Address space slots present */
#define GPU_IRQ_RAWSTAT 0x020 /* (RW) */
#define GPU_IRQ_CLEAR 0x024 /* (WO) */
#define GPU_IRQ_MASK 0x028 /* (RW) */
#define GPU_IRQ_STATUS 0x02C /* (RO) */
#define GPU_COMMAND 0x030 /* (WO) */
#define GPU_STATUS 0x034 /* (RO) */
#define GPU_DBGEN (1 << 8) /* DBGEN wire status */
#define GPU_FAULTSTATUS 0x03C /* (RO) GPU exception type and fault status */
#define GPU_FAULTADDRESS_LO 0x040 /* (RO) GPU exception fault address, low word */
#define GPU_FAULTADDRESS_HI 0x044 /* (RO) GPU exception fault address, high word */
#define L2_CONFIG 0x048 /* (RW) Level 2 cache configuration */
#define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */
#define SUPER_L2_COHERENT (1 << 1) /* Shader cores within a core
* supergroup are l2 coherent
*/
#define PWR_KEY 0x050 /* (WO) Power manager key register */
#define PWR_OVERRIDE0 0x054 /* (RW) Power manager override settings */
#define PWR_OVERRIDE1 0x058 /* (RW) Power manager override settings */
#define GPU_FEATURES_LO 0x060 /* (RO) GPU features, low word */
#define GPU_FEATURES_HI 0x064 /* (RO) GPU features, high word */
#define PRFCNT_FEATURES 0x068 /* (RO) Performance counter features */
#define TIMESTAMP_OFFSET_LO 0x088 /* (RW) Global time stamp offset, low word */
#define TIMESTAMP_OFFSET_HI 0x08C /* (RW) Global time stamp offset, high word */
#define CYCLE_COUNT_LO 0x090 /* (RO) Cycle counter, low word */
#define CYCLE_COUNT_HI 0x094 /* (RO) Cycle counter, high word */
#define TIMESTAMP_LO 0x098 /* (RO) Global time stamp counter, low word */
#define TIMESTAMP_HI 0x09C /* (RO) Global time stamp counter, high word */
#define THREAD_MAX_THREADS 0x0A0 /* (RO) Maximum number of threads per core */
#define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */
#define THREAD_MAX_BARRIER_SIZE 0x0A8 /* (RO) Maximum threads waiting at a barrier */
#define THREAD_FEATURES 0x0AC /* (RO) Thread features */
#define THREAD_TLS_ALLOC 0x310 /* (RO) Number of threads per core that TLS must be allocated for */
#define TEXTURE_FEATURES_0 0x0B0 /* (RO) Support flags for indexed texture formats 0..31 */
#define TEXTURE_FEATURES_1 0x0B4 /* (RO) Support flags for indexed texture formats 32..63 */
#define TEXTURE_FEATURES_2 0x0B8 /* (RO) Support flags for indexed texture formats 64..95 */
#define TEXTURE_FEATURES_3 0x0BC /* (RO) Support flags for texture order */
#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2))
#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */
#define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */
#define TILER_PRESENT_LO 0x110 /* (RO) Tiler core present bitmap, low word */
#define TILER_PRESENT_HI 0x114 /* (RO) Tiler core present bitmap, high word */
#define L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */
#define L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */
#define STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */
#define STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */
#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */
#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */
#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */
#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */
#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */
#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */
#define STACK_READY_LO 0xE10 /* (RO) Core stack ready bitmap, low word */
#define STACK_READY_HI 0xE14 /* (RO) Core stack ready bitmap, high word */
#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */
#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */
#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */
#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */
#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */
#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */
#define STACK_PWRON_LO 0xE20 /* (RO) Core stack power on bitmap, low word */
#define STACK_PWRON_HI 0xE24 /* (RO) Core stack power on bitmap, high word */
#define SHADER_PWROFF_LO 0x1C0 /* (WO) Shader core power off bitmap, low word */
#define SHADER_PWROFF_HI 0x1C4 /* (WO) Shader core power off bitmap, high word */
#define TILER_PWROFF_LO 0x1D0 /* (WO) Tiler core power off bitmap, low word */
#define TILER_PWROFF_HI 0x1D4 /* (WO) Tiler core power off bitmap, high word */
#define L2_PWROFF_LO 0x1E0 /* (WO) Level 2 cache power off bitmap, low word */
#define L2_PWROFF_HI 0x1E4 /* (WO) Level 2 cache power off bitmap, high word */
#define STACK_PWROFF_LO 0xE30 /* (RO) Core stack power off bitmap, low word */
#define STACK_PWROFF_HI 0xE34 /* (RO) Core stack power off bitmap, high word */
#define SHADER_PWRTRANS_LO 0x200 /* (RO) Shader core power transition bitmap, low word */
#define SHADER_PWRTRANS_HI 0x204 /* (RO) Shader core power transition bitmap, high word */
#define TILER_PWRTRANS_LO 0x210 /* (RO) Tiler core power transition bitmap, low word */
#define TILER_PWRTRANS_HI 0x214 /* (RO) Tiler core power transition bitmap, high word */
#define L2_PWRTRANS_LO 0x220 /* (RO) Level 2 cache power transition bitmap, low word */
#define L2_PWRTRANS_HI 0x224 /* (RO) Level 2 cache power transition bitmap, high word */
#define ASN_HASH_0 0x02C0 /* (RW) ASN hash function argument 0 */
#define ASN_HASH(n) (ASN_HASH_0 + (n)*4)
#define ASN_HASH_COUNT 3
#define SYSC_ALLOC0 0x0340 /* (RW) System cache allocation hint from source ID */
#define SYSC_ALLOC(n) (SYSC_ALLOC0 + (n)*4)
#define SYSC_ALLOC_COUNT 8
#define STACK_PWRTRANS_LO 0xE40 /* (RO) Core stack power transition bitmap, low word */
#define STACK_PWRTRANS_HI 0xE44 /* (RO) Core stack power transition bitmap, high word */
#define SHADER_PWRACTIVE_LO 0x240 /* (RO) Shader core active bitmap, low word */
#define SHADER_PWRACTIVE_HI 0x244 /* (RO) Shader core active bitmap, high word */
#define TILER_PWRACTIVE_LO 0x250 /* (RO) Tiler core active bitmap, low word */
#define TILER_PWRACTIVE_HI 0x254 /* (RO) Tiler core active bitmap, high word */
#define L2_PWRACTIVE_LO 0x260 /* (RO) Level 2 cache active bitmap, low word */
#define L2_PWRACTIVE_HI 0x264 /* (RO) Level 2 cache active bitmap, high word */
#define COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */
#define COHERENCY_ENABLE 0x304 /* (RW) Coherency enable */
#define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration (implementation-specific) */
#define TILER_CONFIG 0xF08 /* (RW) Tiler core configuration (implementation-specific) */
#define L2_MMU_CONFIG 0xF0C /* (RW) L2 cache and MMU configuration (implementation-specific) */
/* Job control registers */
#define JOB_CONTROL_BASE 0x1000
#define JOB_CONTROL_REG(r) (JOB_CONTROL_BASE + (r))
#define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */
#define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */
#define JOB_IRQ_MASK 0x008 /* Interrupt mask register */
#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */
/* MMU control registers */
#define MMU_IRQ_CLEAR 0x004 /* (WO) Interrupt clear register */
#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */
#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */
#define MMU_AS0 0x400 /* Configuration registers for address space 0 */
#define MMU_AS1 0x440 /* Configuration registers for address space 1 */
#define MMU_AS2 0x480 /* Configuration registers for address space 2 */
#define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */
#define MMU_AS4 0x500 /* Configuration registers for address space 4 */
#define MMU_AS5 0x540 /* Configuration registers for address space 5 */
#define MMU_AS6 0x580 /* Configuration registers for address space 6 */
#define MMU_AS7 0x5C0 /* Configuration registers for address space 7 */
#define MMU_AS8 0x600 /* Configuration registers for address space 8 */
#define MMU_AS9 0x640 /* Configuration registers for address space 9 */
#define MMU_AS10 0x680 /* Configuration registers for address space 10 */
#define MMU_AS11 0x6C0 /* Configuration registers for address space 11 */
#define MMU_AS12 0x700 /* Configuration registers for address space 12 */
#define MMU_AS13 0x740 /* Configuration registers for address space 13 */
#define MMU_AS14 0x780 /* Configuration registers for address space 14 */
#define MMU_AS15 0x7C0 /* Configuration registers for address space 15 */
/* MMU address space control registers */
#define MMU_AS_REG(n, r) (MMU_REG(MMU_AS0 + ((n) << 6)) + (r))
#define AS_TRANSTAB_LO 0x00 /* (RW) Translation Table Base Address for address space n, low word */
#define AS_TRANSTAB_HI 0x04 /* (RW) Translation Table Base Address for address space n, high word */
#define AS_MEMATTR_LO 0x08 /* (RW) Memory attributes for address space n, low word. */
#define AS_MEMATTR_HI 0x0C /* (RW) Memory attributes for address space n, high word. */
#define AS_LOCKADDR_LO 0x10 /* (RW) Lock region address for address space n, low word */
#define AS_LOCKADDR_HI 0x14 /* (RW) Lock region address for address space n, high word */
#define AS_COMMAND 0x18 /* (WO) MMU command register for address space n */
#define AS_FAULTSTATUS 0x1C /* (RO) MMU fault status register for address space n */
#define AS_FAULTADDRESS_LO 0x20 /* (RO) Fault Address for address space n, low word */
#define AS_FAULTADDRESS_HI 0x24 /* (RO) Fault Address for address space n, high word */
#define AS_STATUS 0x28 /* (RO) Status flags for address space n */
/* (RW) Translation table configuration for address space n, low word */
#define AS_TRANSCFG_LO 0x30
/* (RW) Translation table configuration for address space n, high word */
#define AS_TRANSCFG_HI 0x34
/* (RO) Secondary fault address for address space n, low word */
#define AS_FAULTEXTRA_LO 0x38
/* (RO) Secondary fault address for address space n, high word */
#define AS_FAULTEXTRA_HI 0x3C
/* End Register Offsets */
#define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON)
/*
* MMU_IRQ_RAWSTAT register values. Values are valid also for
* MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers.
*/
#define MMU_PAGE_FAULT_FLAGS 16
/* Macros returning a bitmask to retrieve page fault or bus error flags from
* MMU registers
*/
#define MMU_PAGE_FAULT(n) (1UL << (n))
#define MMU_BUS_ERROR(n) (1UL << ((n) + MMU_PAGE_FAULT_FLAGS))
/*
* Begin AARCH64 MMU TRANSTAB register values
*/
#define MMU_HW_OUTA_BITS 40
#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4))
/*
* Begin MMU STATUS register values
*/
#define AS_STATUS_AS_ACTIVE 0x01
#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK (0x7<<3)
#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT (0x0<<3)
#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT (0x1<<3)
#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT (0x2<<3)
#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG (0x3<<3)
#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT (0x4<<3)
#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3)
#define AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0
#define AS_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFF << AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT)
#define AS_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \
(((reg_val)&AS_FAULTSTATUS_EXCEPTION_TYPE_MASK) >> AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT)
#define AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_0 0xC0
#define AS_FAULTSTATUS_ACCESS_TYPE_SHIFT 8
#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3 << AS_FAULTSTATUS_ACCESS_TYPE_SHIFT)
#define AS_FAULTSTATUS_ACCESS_TYPE_GET(reg_val) \
(((reg_val)&AS_FAULTSTATUS_ACCESS_TYPE_MASK) >> AS_FAULTSTATUS_ACCESS_TYPE_SHIFT)
#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0)
#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1)
#define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2)
#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3)
#define AS_FAULTSTATUS_SOURCE_ID_SHIFT 16
#define AS_FAULTSTATUS_SOURCE_ID_MASK (0xFFFF << AS_FAULTSTATUS_SOURCE_ID_SHIFT)
#define AS_FAULTSTATUS_SOURCE_ID_GET(reg_val) \
(((reg_val)&AS_FAULTSTATUS_SOURCE_ID_MASK) >> AS_FAULTSTATUS_SOURCE_ID_SHIFT)
#define PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_SHIFT (0)
#define PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_MASK \
((0xFF) << PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_SHIFT)
#define PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET(reg_val) \
(((reg_val)&PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_MASK) >> \
PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_SHIFT)
/*
* Begin MMU TRANSCFG register values
*/
#define AS_TRANSCFG_ADRMODE_LEGACY 0
#define AS_TRANSCFG_ADRMODE_UNMAPPED 1
#define AS_TRANSCFG_ADRMODE_IDENTITY 2
#define AS_TRANSCFG_ADRMODE_AARCH64_4K 6
#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8
#define AS_TRANSCFG_ADRMODE_MASK 0xF
/*
* Begin TRANSCFG register values
*/
#define AS_TRANSCFG_PTW_MEMATTR_MASK (3ull << 24)
#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1ull << 24)
#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2ull << 24)
#define AS_TRANSCFG_PTW_SH_MASK ((3ull << 28))
#define AS_TRANSCFG_PTW_SH_OS (2ull << 28)
#define AS_TRANSCFG_PTW_SH_IS (3ull << 28)
#define AS_TRANSCFG_R_ALLOCATE (1ull << 30)
/*
* Begin Command Values
*/
/* AS_COMMAND register commands */
#define AS_COMMAND_NOP 0x00 /* NOP Operation */
#define AS_COMMAND_UPDATE 0x01 /* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */
#define AS_COMMAND_LOCK 0x02 /* Issue a lock region command to all MMUs */
#define AS_COMMAND_UNLOCK 0x03 /* Issue a flush region command to all MMUs */
/* Flush all L2 caches then issue a flush region command to all MMUs */
#define AS_COMMAND_FLUSH_PT 0x04
/* Wait for memory accesses to complete, flush all the L1s cache then flush all
* L2 caches then issue a flush region command to all MMUs
*/
#define AS_COMMAND_FLUSH_MEM 0x05
/* AS_LOCKADDR register */
#define AS_LOCKADDR_LOCKADDR_SIZE_SHIFT GPU_U(0)
#define AS_LOCKADDR_LOCKADDR_SIZE_MASK \
(GPU_U(0x3F) << AS_LOCKADDR_LOCKADDR_SIZE_SHIFT)
#define AS_LOCKADDR_LOCKADDR_SIZE_GET(reg_val) \
(((reg_val)&AS_LOCKADDR_LOCKADDR_SIZE_MASK) >> \
AS_LOCKADDR_LOCKADDR_SIZE_SHIFT)
#define AS_LOCKADDR_LOCKADDR_SIZE_SET(reg_val, value) \
(((reg_val) & ~AS_LOCKADDR_LOCKADDR_SIZE_MASK) | \
(((value) << AS_LOCKADDR_LOCKADDR_SIZE_SHIFT) & \
AS_LOCKADDR_LOCKADDR_SIZE_MASK))
#define AS_LOCKADDR_LOCKADDR_BASE_SHIFT GPU_U(12)
#define AS_LOCKADDR_LOCKADDR_BASE_MASK \
(GPU_U(0xFFFFFFFFFFFFF) << AS_LOCKADDR_LOCKADDR_BASE_SHIFT)
#define AS_LOCKADDR_LOCKADDR_BASE_GET(reg_val) \
(((reg_val)&AS_LOCKADDR_LOCKADDR_BASE_MASK) >> \
AS_LOCKADDR_LOCKADDR_BASE_SHIFT)
#define AS_LOCKADDR_LOCKADDR_BASE_SET(reg_val, value) \
(((reg_val) & ~AS_LOCKADDR_LOCKADDR_BASE_MASK) | \
(((value) << AS_LOCKADDR_LOCKADDR_BASE_SHIFT) & \
AS_LOCKADDR_LOCKADDR_BASE_MASK))
/* GPU_STATUS values */
#define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */
#define GPU_STATUS_CYCLE_COUNT_ACTIVE (1 << 6) /* Set if the cycle counter is active. */
#define GPU_STATUS_PROTECTED_MODE_ACTIVE (1 << 7) /* Set if protected mode is active */
/* PRFCNT_CONFIG register values */
#define PRFCNT_CONFIG_MODE_SHIFT 0 /* Counter mode position. */
#define PRFCNT_CONFIG_AS_SHIFT 4 /* Address space bitmap position. */
#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */
/* The performance counters are disabled. */
#define PRFCNT_CONFIG_MODE_OFF 0
/* The performance counters are enabled, but are only written out when a
* PRFCNT_SAMPLE command is issued using the GPU_COMMAND register.
*/
#define PRFCNT_CONFIG_MODE_MANUAL 1
/* The performance counters are enabled, and are written out each time a tile
* finishes rendering.
*/
#define PRFCNT_CONFIG_MODE_TILE 2
/* AS<n>_MEMATTR values from MMU_MEMATTR_STAGE1: */
/* Use GPU implementation-defined caching policy. */
#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull
/* The attribute set to force all resources to be cached. */
#define AS_MEMATTR_FORCE_TO_CACHE_ALL 0x8Full
/* Inner write-alloc cache setup, no outer caching */
#define AS_MEMATTR_WRITE_ALLOC 0x8Dull
/* Use GPU implementation-defined caching policy. */
#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull
/* The attribute set to force all resources to be cached. */
#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL 0x4Full
/* Inner write-alloc cache setup, no outer caching */
#define AS_MEMATTR_LPAE_WRITE_ALLOC 0x4Dull
/* Set to implementation defined, outer caching */
#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF 0x88ull
/* Set to write back memory, outer caching */
#define AS_MEMATTR_LPAE_OUTER_WA 0x8Dull
/* There is no LPAE support for non-cacheable, since the memory type is always
* write-back.
* Marking this setting as reserved for LPAE
*/
#define AS_MEMATTR_LPAE_NON_CACHEABLE_RESERVED
/* L2_MMU_CONFIG register */
#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT (23)
#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT)
/* End L2_MMU_CONFIG register */
/* THREAD_* registers */
/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */
#define IMPLEMENTATION_UNSPECIFIED 0
#define IMPLEMENTATION_SILICON 1
#define IMPLEMENTATION_FPGA 2
#define IMPLEMENTATION_MODEL 3
/* Default values when registers are not supported by the implemented hardware */
#define THREAD_MT_DEFAULT 256
#define THREAD_MWS_DEFAULT 256
#define THREAD_MBS_DEFAULT 256
#define THREAD_MR_DEFAULT 1024
#define THREAD_MTQ_DEFAULT 4
#define THREAD_MTGS_DEFAULT 10
/* End THREAD_* registers */
/* SHADER_CONFIG register */
#define SC_LS_ALLOW_ATTR_TYPES (1ul << 16)
#define SC_TLS_HASH_ENABLE (1ul << 17)
#define SC_LS_ATTR_CHECK_DISABLE (1ul << 18)
#define SC_VAR_ALGORITHM (1ul << 29)
/* End SHADER_CONFIG register */
/* TILER_CONFIG register */
#define TC_CLOCK_GATE_OVERRIDE (1ul << 0)
/* End TILER_CONFIG register */
/* L2_CONFIG register */
#define L2_CONFIG_SIZE_SHIFT 16
#define L2_CONFIG_SIZE_MASK (0xFFul << L2_CONFIG_SIZE_SHIFT)
#define L2_CONFIG_HASH_SHIFT 24
#define L2_CONFIG_HASH_MASK (0xFFul << L2_CONFIG_HASH_SHIFT)
#define L2_CONFIG_ASN_HASH_ENABLE_SHIFT 24
#define L2_CONFIG_ASN_HASH_ENABLE_MASK (1ul << L2_CONFIG_ASN_HASH_ENABLE_SHIFT)
/* End L2_CONFIG register */
/* IDVS_GROUP register */
#define IDVS_GROUP_SIZE_SHIFT (16)
#define IDVS_GROUP_MAX_SIZE (0x3F)
/* SYSC_ALLOC read IDs */
#define SYSC_ALLOC_ID_R_OTHER 0x00
#define SYSC_ALLOC_ID_R_CSF 0x02
#define SYSC_ALLOC_ID_R_MMU 0x04
#define SYSC_ALLOC_ID_R_TILER_VERT 0x08
#define SYSC_ALLOC_ID_R_TILER_PTR 0x09
#define SYSC_ALLOC_ID_R_TILER_INDEX 0x0A
#define SYSC_ALLOC_ID_R_TILER_OTHER 0x0B
#define SYSC_ALLOC_ID_R_IC 0x10
#define SYSC_ALLOC_ID_R_ATTR 0x11
#define SYSC_ALLOC_ID_R_SCM 0x12
#define SYSC_ALLOC_ID_R_FSDC 0x13
#define SYSC_ALLOC_ID_R_VL 0x14
#define SYSC_ALLOC_ID_R_PLR 0x15
#define SYSC_ALLOC_ID_R_TEX 0x18
#define SYSC_ALLOC_ID_R_LSC 0x1c
/* SYSC_ALLOC write IDs */
#define SYSC_ALLOC_ID_W_OTHER 0x00
#define SYSC_ALLOC_ID_W_CSF 0x02
#define SYSC_ALLOC_ID_W_PCB 0x07
#define SYSC_ALLOC_ID_W_TILER_PTR 0x09
#define SYSC_ALLOC_ID_W_TILER_VERT_PLIST 0x0A
#define SYSC_ALLOC_ID_W_TILER_OTHER 0x0B
#define SYSC_ALLOC_ID_W_L2_EVICT 0x0C
#define SYSC_ALLOC_ID_W_L2_FLUSH 0x0D
#define SYSC_ALLOC_ID_W_TIB_COLOR 0x10
#define SYSC_ALLOC_ID_W_TIB_COLOR_AFBCH 0x11
#define SYSC_ALLOC_ID_W_TIB_COLOR_AFBCB 0x12
#define SYSC_ALLOC_ID_W_TIB_CRC 0x13
#define SYSC_ALLOC_ID_W_TIB_DS 0x14
#define SYSC_ALLOC_ID_W_TIB_DS_AFBCH 0x15
#define SYSC_ALLOC_ID_W_TIB_DS_AFBCB 0x16
#define SYSC_ALLOC_ID_W_LSC 0x1C
/* SYSC_ALLOC values */
#define SYSC_ALLOC_L2_ALLOC 0x0
#define SYSC_ALLOC_NEVER_ALLOC 0x2
#define SYSC_ALLOC_ALWAYS_ALLOC 0x3
#define SYSC_ALLOC_PTL_ALLOC 0x4
#define SYSC_ALLOC_L2_PTL_ALLOC 0x5
/* SYSC_ALLOC register */
#define SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT (0)
#define SYSC_ALLOC_R_SYSC_ALLOC0_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT)
#define SYSC_ALLOC_R_SYSC_ALLOC0_GET(reg_val) \
(((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC0_MASK) >> \
SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT)
#define SYSC_ALLOC_R_SYSC_ALLOC0_SET(reg_val, value) \
(((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC0_MASK) | \
(((value) << SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT) & \
SYSC_ALLOC_R_SYSC_ALLOC0_MASK))
/* End of SYSC_ALLOC_R_SYSC_ALLOC0 values */
#define SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT (4)
#define SYSC_ALLOC_W_SYSC_ALLOC0_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT)
#define SYSC_ALLOC_W_SYSC_ALLOC0_GET(reg_val) \
(((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC0_MASK) >> \
SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT)
#define SYSC_ALLOC_W_SYSC_ALLOC0_SET(reg_val, value) \
(((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC0_MASK) | \
(((value) << SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT) & \
SYSC_ALLOC_W_SYSC_ALLOC0_MASK))
/* End of SYSC_ALLOC_W_SYSC_ALLOC0 values */
#define SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT (8)
#define SYSC_ALLOC_R_SYSC_ALLOC1_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT)
#define SYSC_ALLOC_R_SYSC_ALLOC1_GET(reg_val) \
(((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC1_MASK) >> \
SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT)
#define SYSC_ALLOC_R_SYSC_ALLOC1_SET(reg_val, value) \
(((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC1_MASK) | \
(((value) << SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT) & \
SYSC_ALLOC_R_SYSC_ALLOC1_MASK))
/* End of SYSC_ALLOC_R_SYSC_ALLOC1 values */
#define SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT (12)
#define SYSC_ALLOC_W_SYSC_ALLOC1_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT)
#define SYSC_ALLOC_W_SYSC_ALLOC1_GET(reg_val) \
(((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC1_MASK) >> \
SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT)
#define SYSC_ALLOC_W_SYSC_ALLOC1_SET(reg_val, value) \
(((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC1_MASK) | \
(((value) << SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT) & \
SYSC_ALLOC_W_SYSC_ALLOC1_MASK))
/* End of SYSC_ALLOC_W_SYSC_ALLOC1 values */
#define SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT (16)
#define SYSC_ALLOC_R_SYSC_ALLOC2_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT)
#define SYSC_ALLOC_R_SYSC_ALLOC2_GET(reg_val) \
(((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC2_MASK) >> \
SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT)
#define SYSC_ALLOC_R_SYSC_ALLOC2_SET(reg_val, value) \
(((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC2_MASK) | \
(((value) << SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT) & \
SYSC_ALLOC_R_SYSC_ALLOC2_MASK))
/* End of SYSC_ALLOC_R_SYSC_ALLOC2 values */
#define SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT (20)
#define SYSC_ALLOC_W_SYSC_ALLOC2_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT)
#define SYSC_ALLOC_W_SYSC_ALLOC2_GET(reg_val) \
(((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC2_MASK) >> \
SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT)
#define SYSC_ALLOC_W_SYSC_ALLOC2_SET(reg_val, value) \
(((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC2_MASK) | \
(((value) << SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT) & \
SYSC_ALLOC_W_SYSC_ALLOC2_MASK))
/* End of SYSC_ALLOC_W_SYSC_ALLOC2 values */
#define SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT (24)
#define SYSC_ALLOC_R_SYSC_ALLOC3_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT)
#define SYSC_ALLOC_R_SYSC_ALLOC3_GET(reg_val) \
(((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC3_MASK) >> \
SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT)
#define SYSC_ALLOC_R_SYSC_ALLOC3_SET(reg_val, value) \
(((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC3_MASK) | \
(((value) << SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT) & \
SYSC_ALLOC_R_SYSC_ALLOC3_MASK))
/* End of SYSC_ALLOC_R_SYSC_ALLOC3 values */
#define SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT (28)
#define SYSC_ALLOC_W_SYSC_ALLOC3_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT)
#define SYSC_ALLOC_W_SYSC_ALLOC3_GET(reg_val) \
(((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC3_MASK) >> \
SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT)
#define SYSC_ALLOC_W_SYSC_ALLOC3_SET(reg_val, value) \
(((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC3_MASK) | \
(((value) << SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT) & \
SYSC_ALLOC_W_SYSC_ALLOC3_MASK))
/* End of SYSC_ALLOC_W_SYSC_ALLOC3 values */
/* Include POWER_CHANGED_SINGLE in debug builds for use in irq latency test. */
#ifdef CONFIG_MALI_BIFROST_DEBUG

View File

@@ -94,7 +94,10 @@ struct kbase_ipa_model_vinstr_data {
struct kbase_ipa_group {
const char *name;
s32 default_value;
s64 (*op)(struct kbase_ipa_model_vinstr_data *, s32, u32);
s64 (*op)(
struct kbase_ipa_model_vinstr_data *model_data,
s32 coeff,
u32 counter_block_offset);
u32 counter_block_offset;
};

View File

@@ -115,8 +115,8 @@ static const struct kbase_ipa_counter ipa_top_level_cntrs_def_ttux[] = {
};
/* These tables provide a description of each performance counter
* used by the shader cores counter model for energy estimation.
*/
* used by the shader cores counter model for energy estimation.
*/
static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_todx[] = {
SC_COUNTER_DEF("exec_instr_fma", 505449, EXEC_INSTR_FMA),
SC_COUNTER_DEF("tex_filt_num_operations", 574869, TEX_FILT_NUM_OPS),
@@ -150,7 +150,7 @@ static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_ttux[] = {
SC_COUNTER_DEF("ls_mem_read_short", 322525, LS_MEM_READ_SHORT),
SC_COUNTER_DEF("full_quad_warps", 844124, FULL_QUAD_WARPS),
SC_COUNTER_DEF("exec_instr_cvt", 226411, EXEC_INSTR_CVT),
SC_COUNTER_DEF("frag_quads_ezs_update",372032, FRAG_QUADS_EZS_UPDATE),
SC_COUNTER_DEF("frag_quads_ezs_update", 372032, FRAG_QUADS_EZS_UPDATE),
};
#define IPA_POWER_MODEL_OPS(gpu, init_token) \
@@ -224,8 +224,8 @@ const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find(
const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id)
{
const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
GPU_ID_VERSION_PRODUCT_ID_SHIFT;
const u32 prod_id =
(gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT;
switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) {
case GPU_ID2_PRODUCT_TODX:

View File

@@ -111,20 +111,21 @@ static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_da
/**
* memsys_single_counter() - calculate energy for a single Memory System performance counter.
* @model_data: pointer to GPU model data.
* @coeff: default value of coefficient for IPA group.
* @offset: offset in bytes of the counter inside the block it belongs to.
* @model_data: pointer to GPU model data.
* @coeff: default value of coefficient for IPA group.
* @counter_block_offset: offset in bytes of the counter inside the block it belongs to.
*
* Return: Energy estimation for a single Memory System performance counter.
*/
static s64 kbase_g7x_sum_all_memsys_blocks(
struct kbase_ipa_model_vinstr_data *model_data,
s32 coeff,
u32 offset)
u32 counter_block_offset)
{
u32 counter;
counter = kbase_g7x_power_model_get_memsys_counter(model_data, offset);
counter = kbase_g7x_power_model_get_memsys_counter(model_data,
counter_block_offset);
return kbase_ipa_sum_all_memsys_blocks(model_data, coeff, counter);
}
@@ -531,8 +532,8 @@ const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find(
const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id)
{
const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
GPU_ID_VERSION_PRODUCT_ID_SHIFT;
const u32 prod_id =
(gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT;
switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) {
case GPU_ID2_PRODUCT_TMIX:

View File

@@ -71,7 +71,7 @@ KBASE_EXPORT_TEST_API(kbase_ipa_model_ops_find);
const char *kbase_ipa_model_name_from_id(u32 gpu_id)
{
const char* model_name =
const char *model_name =
kbase_ipa_counter_model_name_from_id(gpu_id);
if (!model_name)
@@ -610,7 +610,7 @@ static unsigned long kbase_get_dynamic_power(unsigned long freq,
/* Here unlike kbase_get_real_power(), shader core frequency is
* used for the scaling as simple power model is used to obtain
* the value of dynamic coefficient (which is is a fixed value
* the value of dynamic coefficient (which is a fixed value
* retrieved from the device tree).
*/
power += kbase_scale_dynamic_power(

View File

@@ -128,8 +128,14 @@ static ssize_t param_string_set(struct file *file, const char __user *user_buf,
err = kbase_ipa_model_recalculate(model);
if (err < 0) {
u32 string_len = strscpy(param->addr.str, old_str, param->size);
string_len += sizeof(char);
/* Make sure that the source string fit into the buffer. */
KBASE_DEBUG_ASSERT(string_len <= param->size);
CSTD_UNUSED(string_len);
ret = err;
strlcpy(param->addr.str, old_str, param->size);
}
end:
@@ -275,7 +281,7 @@ static void kbase_ipa_model_debugfs_init(struct kbase_ipa_model *model)
"Type not set for %s parameter %s\n",
model->ops->name, param->name);
} else {
debugfs_create_file(param->name, S_IRUGO | S_IWUSR,
debugfs_create_file(param->name, 0644,
dir, param, fops);
}
}

View File

@@ -307,8 +307,12 @@ static int kbase_simple_power_model_recalculate(struct kbase_ipa_model *model)
model_data->gpu_tz = NULL;
} else {
char tz_name[THERMAL_NAME_LENGTH];
u32 string_len = strscpy(tz_name, model_data->tz_name, sizeof(tz_name));
strlcpy(tz_name, model_data->tz_name, sizeof(tz_name));
string_len += sizeof(char);
/* Make sure that the source string fit into the buffer. */
KBASE_DEBUG_ASSERT(string_len <= sizeof(tz_name));
CSTD_UNUSED(string_len);
/* Release ipa.lock so that thermal_list_lock is not acquired
* with ipa.lock held, thereby avoid lock ordering violation

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -124,6 +124,18 @@
/* Reset the GPU after each atom completion */
#define KBASE_SERIALIZE_RESET (1 << 2)
/**
* enum kbase_timeout_selector - The choice of which timeout to get scaled
* using the lowest GPU frequency.
* @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in
* the enum.
*/
enum kbase_timeout_selector {
/* Must be the last in the enum */
KBASE_TIMEOUT_SELECTOR_COUNT
};
#if IS_ENABLED(CONFIG_DEBUG_FS)
/**
* struct base_job_fault_event - keeps track of the atom which faulted or which
@@ -653,11 +665,12 @@ static inline bool kbase_jd_katom_is_protected(
/**
* kbase_atom_is_younger - query if one atom is younger by age than another
* @katom_a: the first atom
* @katom_a: the second atom
*
* Return: true if the first atom is strictly younger than the second, false
* otherwise.
* @katom_a: the first atom
* @katom_b: the second atom
*
* Return: true if the first atom is strictly younger than the second,
* false otherwise.
*/
static inline bool kbase_jd_atom_is_younger(const struct kbase_jd_atom *katom_a,
const struct kbase_jd_atom *katom_b)
@@ -666,7 +679,9 @@ static inline bool kbase_jd_atom_is_younger(const struct kbase_jd_atom *katom_a,
}
/**
* kbase_jd_atom_is_earlier
* kbase_jd_atom_is_earlier - Check whether the first atom has been submitted
* earlier than the second one
*
* @katom_a: the first atom
* @katom_b: the second atom
*
@@ -730,17 +745,13 @@ static inline bool kbase_jd_atom_is_earlier(const struct kbase_jd_atom *katom_a,
* A state machine is used to control incremental rendering.
*/
enum kbase_jd_renderpass_state {
KBASE_JD_RP_COMPLETE, /* COMPLETE => START */
KBASE_JD_RP_START, /* START => PEND_OOM or COMPLETE */
KBASE_JD_RP_PEND_OOM, /* PEND_OOM => OOM or COMPLETE */
KBASE_JD_RP_OOM, /* OOM => RETRY */
KBASE_JD_RP_RETRY, /* RETRY => RETRY_PEND_OOM or
* COMPLETE
*/
KBASE_JD_RP_RETRY_PEND_OOM, /* RETRY_PEND_OOM => RETRY_OOM or
* COMPLETE
*/
KBASE_JD_RP_RETRY_OOM, /* RETRY_OOM => RETRY */
KBASE_JD_RP_COMPLETE, /* COMPLETE => START */
KBASE_JD_RP_START, /* START => PEND_OOM or COMPLETE */
KBASE_JD_RP_PEND_OOM, /* PEND_OOM => OOM or COMPLETE */
KBASE_JD_RP_OOM, /* OOM => RETRY */
KBASE_JD_RP_RETRY, /* RETRY => RETRY_PEND_OOM or COMPLETE */
KBASE_JD_RP_RETRY_PEND_OOM, /* RETRY_PEND_OOM => RETRY_OOM or COMPLETE */
KBASE_JD_RP_RETRY_OOM /* RETRY_OOM => RETRY */
};
/**
@@ -813,7 +824,7 @@ struct kbase_jd_renderpass {
* atom completes
* execution on GPU or the input fence get signaled.
* @tb_lock: Lock to serialize the write access made to @tb to
* to store the register access trace messages.
* store the register access trace messages.
* @tb: Pointer to the Userspace accessible buffer storing
* the trace messages for register read/write
* accesses made by the Kbase. The buffer is filled

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -36,6 +36,8 @@
* The struct kbasep_js_device_data sub-structure of kbdev must be zero
* initialized before passing to the kbasep_js_devdata_init() function. This is
* to give efficient error path code.
*
* Return: 0 on success, error code otherwise.
*/
int kbasep_js_devdata_init(struct kbase_device * const kbdev);
@@ -86,6 +88,8 @@ void kbasep_js_devdata_term(struct kbase_device *kbdev);
*
* The struct kbase_context must be zero initialized before passing to the
* kbase_js_init() function. This is to give efficient error path code.
*
* Return: 0 on success, error code otherwise.
*/
int kbasep_js_kctx_init(struct kbase_context *const kctx);
@@ -206,7 +210,7 @@ bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom);
* @kbdev: The kbase_device to operate on
* @kctx: The kbase_context to operate on
* @atom: Atom to remove
*
*
* Completely removing a job requires several calls:
* * kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of
* the atom
@@ -356,9 +360,10 @@ void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev,
struct kbase_context *kctx);
/**
* kbasep_js_runpool_release_ctx_and_katom_retained_state - Variant of
* kbasep_js_runpool_release_ctx_and_katom_retained_state - Variant of
* kbasep_js_runpool_release_ctx() that handles additional
* actions from completing an atom.
*
* @kbdev: KBase device
* @kctx: KBase context
* @katom_retained_state: Retained state from the atom
@@ -381,8 +386,8 @@ void kbasep_js_runpool_release_ctx_and_katom_retained_state(
struct kbasep_js_atom_retained_state *katom_retained_state);
/**
* kbasep_js_runpool_release_ctx_nolock -
* Variant of kbase_js_runpool_release_ctx() w/out locks
* kbasep_js_runpool_release_ctx_nolock - Variant of kbase_js_runpool_release_ctx()
* without locks
* @kbdev: KBase device
* @kctx: KBase context
*
@@ -396,6 +401,7 @@ void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
/**
* kbasep_js_schedule_privileged_ctx - Schedule in a privileged context
*
* @kbdev: KBase device
* @kctx: KBase context
*
@@ -459,7 +465,7 @@ void kbase_js_try_run_jobs(struct kbase_device *kbdev);
* contexts from (re)entering the runpool.
*
* This does not handle suspending the one privileged context: the caller must
* instead do this by by suspending the GPU HW Counter Instrumentation.
* instead do this by suspending the GPU HW Counter Instrumentation.
*
* This will eventually cause all Power Management active references held by
* contexts on the runpool to be released, without running any more atoms.
@@ -688,6 +694,8 @@ void kbase_js_update_ctx_priority(struct kbase_context *kctx);
* As with any bool, never test the return value with true.
*
* The caller must hold hwaccess_lock.
*
* Return: true if the context is allowed to submit jobs, false otherwise.
*/
static inline bool kbasep_js_is_submit_allowed(
struct kbasep_js_device_data *js_devdata,
@@ -768,8 +776,9 @@ static inline void kbasep_js_clear_submit_allowed(
}
/**
* kbasep_js_atom_retained_state_init_invalid -
* Create an initial 'invalid' atom retained state
* kbasep_js_atom_retained_state_init_invalid - Create an initial 'invalid'
* atom retained state
*
* @retained_state: pointer where to create and initialize the state
*
* Create an initial 'invalid' atom retained state, that requires no

View File

@@ -55,10 +55,11 @@ typedef void kbasep_js_ctx_job_cb(struct kbase_device *kbdev,
* @KBASEP_JS_CTX_ATTR_COMPUTE: Attribute indicating a context that contains
* Compute jobs.
* @KBASEP_JS_CTX_ATTR_NON_COMPUTE: Attribute indicating a context that contains
* Non-Compute jobs.
* Non-Compute jobs.
* @KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES: Attribute indicating that a context
* contains compute-job atoms that aren't restricted to a coherent group,
* and can run on all cores.
* contains compute-job atoms that aren't
* restricted to a coherent group,
* and can run on all cores.
* @KBASEP_JS_CTX_ATTR_COUNT: Must be the last in the enum
*
* Each context attribute can be thought of as a boolean value that caches some
@@ -115,7 +116,6 @@ typedef void kbasep_js_ctx_job_cb(struct kbase_device *kbdev,
* BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy
* enough to handle anyway.
*
*
*/
enum kbasep_js_ctx_attr {
KBASEP_JS_CTX_ATTR_COMPUTE,
@@ -217,44 +217,46 @@ typedef u32 kbase_atom_ordering_flag_t;
/**
* struct kbasep_js_device_data - KBase Device Data Job Scheduler sub-structure
* @runpool_irq: Sub-structure to collect together Job Scheduling data used in
* IRQ context. The hwaccess_lock must be held when accessing.
* IRQ context. The hwaccess_lock must be held when accessing.
* @runpool_irq.submit_allowed: Bitvector indicating whether a currently
* scheduled context is allowed to submit jobs. When bit 'N' is set in
* this, it indicates whether the context bound to address space 'N' is
* allowed to submit jobs.
* scheduled context is allowed to submit jobs.
* When bit 'N' is set in this, it indicates whether
* the context bound to address space 'N' is
* allowed to submit jobs.
* @runpool_irq.ctx_attr_ref_count: Array of Context Attributes Ref_counters:
* Each is large enough to hold a refcount of the number of contexts
* that can fit into the runpool. This is currently BASE_MAX_NR_AS.
* Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store
* the refcount. Hence, it's not worthwhile reducing this to
* bit-manipulation on u32s to save space (where in contrast, 4 bit
* sub-fields would be easy to do and would save space).
* Whilst this must not become negative, the sign bit is used for:
* - error detection in debug builds
* - Optimization: it is undefined for a signed int to overflow, and so
* the compiler can optimize for that never happening (thus, no masking
* is required on updating the variable)
* Each is large enough to hold a refcount of the number of contexts
* that can fit into the runpool. This is currently BASE_MAX_NR_AS.
* Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store
* the refcount. Hence, it's not worthwhile reducing this to
* bit-manipulation on u32s to save space (where in contrast, 4 bit
* sub-fields would be easy to do and would save space).
* Whilst this must not become negative, the sign bit is used for:
* - error detection in debug builds
* - Optimization: it is undefined for a signed int to overflow, and so
* the compiler can optimize for that never happening (thus, no masking
* is required on updating the variable)
* @runpool_irq.slot_affinities: Affinity management and tracking. Bitvector
* to aid affinity checking. Element 'n' bit 'i' indicates that slot 'n'
* is using core i (i.e. slot_affinity_refcount[n][i] > 0)
* to aid affinity checking.
* Element 'n' bit 'i' indicates that slot 'n'
* is using core i (i.e. slot_affinity_refcount[n][i] > 0)
* @runpool_irq.slot_affinity_refcount: Array of fefcount for each core owned
* by each slot. Used to generate the slot_affinities array of bitvectors.
* The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS,
* because it is refcounted only when a job is definitely about to be
* submitted to a slot, and is de-refcounted immediately after a job
* finishes
* by each slot. Used to generate the slot_affinities array of bitvectors.
* The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS,
* because it is refcounted only when a job is definitely about to be
* submitted to a slot, and is de-refcounted immediately after a job
* finishes
* @schedule_sem: Scheduling semaphore. This must be held when calling
* kbase_jm_kick()
* kbase_jm_kick()
* @ctx_list_pullable: List of contexts that can currently be pulled from
* @ctx_list_unpullable: List of contexts that can not currently be pulled
* from, but have jobs currently running.
* from, but have jobs currently running.
* @nr_user_contexts_running: Number of currently scheduled user contexts
* (excluding ones that are not submitting jobs)
* (excluding ones that are not submitting jobs)
* @nr_all_contexts_running: Number of currently scheduled contexts (including
* ones that are not submitting jobs)
* ones that are not submitting jobs)
* @js_reqs: Core Requirements to match up with base_js_atom's core_req memeber
* @note This is a write-once member, and so no locking is required to
* read
* @note This is a write-once member, and so no locking is required to
* read
* @scheduling_period_ns: Value for JS_SCHEDULING_PERIOD_NS
* @soft_stop_ticks: Value for JS_SOFT_STOP_TICKS
* @soft_stop_ticks_cl: Value for JS_SOFT_STOP_TICKS_CL
@@ -268,16 +270,16 @@ typedef u32 kbase_atom_ordering_flag_t;
* @suspended_soft_jobs_list: List of suspended soft jobs
* @softstop_always: Support soft-stop on a single context
* @init_status:The initialized-flag is placed at the end, to avoid
* cache-pollution (we should only be using this during init/term paths).
* @note This is a write-once member, and so no locking is required to
* read
* cache-pollution (we should only be using this during init/term paths).
* @note This is a write-once member, and so no locking is required to
* read
* @nr_contexts_pullable:Number of contexts that can currently be pulled from
* @nr_contexts_runnable:Number of contexts that can either be pulled from or
* arecurrently running
* arecurrently running
* @soft_job_timeout_ms:Value for JS_SOFT_JOB_TIMEOUT
* @queue_mutex: Queue Lock, used to access the Policy's queue of contexts
* independently of the Run Pool.
* Of course, you don't need the Run Pool lock to access this.
* independently of the Run Pool.
* Of course, you don't need the Run Pool lock to access this.
* @runpool_mutex: Run Pool mutex, for managing contexts within the runpool.
*
* This encapsulates the current context of the Job Scheduler on a particular

View File

@@ -168,6 +168,7 @@ __attribute__((unused)) static const enum base_hw_feature base_hw_features_tTUx[
BASE_HW_FEATURE_L2_CONFIG,
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
BASE_HW_FEATURE_ASN_HASH,
BASE_HW_FEATURE_GPU_SLEEP,
BASE_HW_FEATURE_END
};

View File

@@ -60,6 +60,7 @@ enum base_hw_issue {
BASE_HW_ISSUE_TTRX_3485,
BASE_HW_ISSUE_GPU2019_3212,
BASE_HW_ISSUE_TURSEHW_1997,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_END
};
@@ -596,6 +597,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tODx_r0p0
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3212,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_END
};
@@ -605,6 +607,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tOD
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3212,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_END
};
@@ -612,6 +615,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGRx_r0p0
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_END
};
@@ -620,6 +624,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGR
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_END
};
@@ -627,6 +632,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tVAx_r0p0
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_END
};
@@ -635,6 +641,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tVA
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_END
};
@@ -643,6 +650,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTU
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_END
};
@@ -651,6 +659,15 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_TURSEHW_1997,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_END
};
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_END
};

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -109,9 +109,9 @@
struct kbase_device *kbase_device_alloc(void);
/*
* note: configuration attributes member of kbdev needs to have
* been setup before calling kbase_device_init
*/
* note: configuration attributes member of kbdev needs to have
* been setup before calling kbase_device_init
*/
int kbase_device_misc_init(struct kbase_device *kbdev);
void kbase_device_misc_term(struct kbase_device *kbdev);
@@ -256,8 +256,26 @@ void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timest
kbasep_js_atom_done_code done_code);
void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
void kbase_jd_zap_context(struct kbase_context *kctx);
bool jd_done_nolock(struct kbase_jd_atom *katom,
struct list_head *completed_jobs_ctx);
/*
* jd_done_nolock - Perform the necessary handling of an atom that has completed
* the execution.
*
* @katom: Pointer to the atom that completed the execution
* @post_immediately: Flag indicating that completion event can be posted
* immediately for @katom and the other atoms depdendent
* on @katom which also completed execution. The flag is
* false only for the case where the function is called by
* kbase_jd_done_worker() on the completion of atom running
* on the GPU.
*
* Note that if this is a soft-job that has had kbase_prepare_soft_job called on it then the caller
* is responsible for calling kbase_finish_soft_job *before* calling this function.
*
* The caller must hold the kbase_jd_context.lock.
*/
bool jd_done_nolock(struct kbase_jd_atom *katom, bool post_immediately);
void kbase_jd_free_external_resources(struct kbase_jd_atom *katom);
void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom);
@@ -299,19 +317,73 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
* virtual address space in a growable memory region and the atom currently
* executing on a job slot is the tiler job chain at the start of a renderpass.
*
* Return 0 if successful, otherwise a negative error code.
* Return: 0 if successful, otherwise a negative error code.
*/
int kbase_job_slot_softstop_start_rp(struct kbase_context *kctx,
struct kbase_va_region *reg);
/**
* kbase_job_slot_softstop - Soft-stop the specified job slot
*
* @kbdev: The kbase device
* @js: The job slot to soft-stop
* @target_katom: The job that should be soft-stopped (or NULL for any job)
* Context:
* The job slot lock must be held when calling this function.
* The job slot must not already be in the process of being soft-stopped.
*
* Where possible any job in the next register is evicted before the soft-stop.
*/
void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
struct kbase_jd_atom *target_katom);
void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
struct kbase_jd_atom *target_katom, u32 sw_flags);
/**
* kbase_job_slot_hardstop - Hard-stop the specified job slot
* @kctx: The kbase context that contains the job(s) that should
* be hard-stopped
* @js: The job slot to hard-stop
* @target_katom: The job that should be hard-stopped (or NULL for all
* jobs from the context)
* Context:
* The job slot lock must be held when calling this function.
*/
void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
struct kbase_jd_atom *target_katom);
/**
* kbase_job_check_enter_disjoint - potentiall enter disjoint mode
* @kbdev: kbase device
* @action: the event which has occurred
* @core_reqs: core requirements of the atom
* @target_katom: the atom which is being affected
*
* For a certain soft-stop action, work out whether to enter disjoint
* state.
*
* This does not register multiple disjoint events if the atom has already
* started a disjoint period
*
* @core_reqs can be supplied as 0 if the atom had not started on the hardware
* (and so a 'real' soft/hard-stop was not required, but it still interrupted
* flow, perhaps on another context)
*
* kbase_job_check_leave_disjoint() should be used to end the disjoint
* state when the soft/hard-stop action is complete
*/
void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom);
/**
* kbase_job_check_leave_disjoint - potentially leave disjoint state
* @kbdev: kbase device
* @target_katom: atom which is finishing
*
* Work out whether to leave disjoint state when finishing an atom that was
* originated by kbase_job_check_enter_disjoint().
*/
void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
struct kbase_jd_atom *target_katom);
@@ -334,7 +406,7 @@ void kbase_event_wakeup(struct kbase_context *kctx);
* allocation is to be validated.
* @info: Pointer to struct @base_jit_alloc_info
* which is to be validated.
* @return: 0 if jit allocation is valid; negative error code otherwise
* Return: 0 if jit allocation is valid; negative error code otherwise
*/
int kbasep_jit_alloc_validate(struct kbase_context *kctx,
struct base_jit_alloc_info *info);
@@ -381,9 +453,12 @@ static inline void kbase_free_user_buffer(
* @buf_data: Pointer to the information about external resources:
* pages pertaining to the external resource, number of
* pages to copy.
*
* Return: 0 on success, error code otherwise.
*/
int kbase_mem_copy_from_extres(struct kbase_context *kctx,
struct kbase_debug_copy_buffer *buf_data);
#if !MALI_USE_CSF
int kbase_process_soft_job(struct kbase_jd_atom *katom);
int kbase_prepare_soft_job(struct kbase_jd_atom *katom);
@@ -405,7 +480,9 @@ void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt);
void kbasep_as_do_poke(struct work_struct *work);
/**
* Check whether a system suspend is in progress, or has already been suspended
* kbase_pm_is_suspending - Check whether a system suspend is in progress,
* or has already been suspended
*
* @kbdev: The kbase device structure for the device
*
* The caller should ensure that either kbdev->pm.active_count_lock is held, or
@@ -533,10 +610,12 @@ int kbase_pm_force_mcu_wakeup_after_sleep(struct kbase_device *kbdev);
#if !MALI_USE_CSF
/**
* Return the atom's ID, as was originally supplied by userspace in
* kbase_jd_atom_id - Return the atom's ID, as was originally supplied by userspace in
* base_jd_atom::atom_number
* @kctx: KBase context pointer
* @katom: Atome for which to return ID
*
* Return: the atom's ID.
*/
static inline int kbase_jd_atom_id(struct kbase_context *kctx,
const struct kbase_jd_atom *katom)
@@ -567,7 +646,9 @@ static inline struct kbase_jd_atom *kbase_jd_atom_from_id(
#endif /* !MALI_USE_CSF */
/**
* Initialize the disjoint state
* kbase_disjoint_init - Initialize the disjoint state
*
* @kbdev: The kbase device
*
* The disjoint event count and state are both set to zero.
*
@@ -589,14 +670,12 @@ static inline struct kbase_jd_atom *kbase_jd_atom_from_id(
* The disjoint event counter is also incremented immediately whenever a job is soft stopped
* and during context creation.
*
* @kbdev: The kbase device
*
* Return: 0 on success and non-zero value on failure.
*/
void kbase_disjoint_init(struct kbase_device *kbdev);
/**
* Increase the count of disjoint events
* kbase_disjoint_event - Increase the count of disjoint events
* called when a disjoint event has happened
*
* @kbdev: The kbase device
@@ -604,42 +683,44 @@ void kbase_disjoint_init(struct kbase_device *kbdev);
void kbase_disjoint_event(struct kbase_device *kbdev);
/**
* Increase the count of disjoint events only if the GPU is in a disjoint state
* kbase_disjoint_event_potential - Increase the count of disjoint events
* only if the GPU is in a disjoint state
*
* @kbdev: The kbase device
*
* This should be called when something happens which could be disjoint if the GPU
* is in a disjoint state. The state refcount keeps track of this.
*
* @kbdev: The kbase device
*/
void kbase_disjoint_event_potential(struct kbase_device *kbdev);
/**
* Returns the count of disjoint events
* kbase_disjoint_event_get - Returns the count of disjoint events
*
* @kbdev: The kbase device
* @return the count of disjoint events
* Return: the count of disjoint events
*/
u32 kbase_disjoint_event_get(struct kbase_device *kbdev);
/**
* Increment the refcount state indicating that the GPU is in a disjoint state.
* kbase_disjoint_state_up - Increment the refcount state indicating that
* the GPU is in a disjoint state.
*
* @kbdev: The kbase device
*
* Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
* eventually after the disjoint state has completed @ref kbase_disjoint_state_down
* should be called
*
* @kbdev: The kbase device
*/
void kbase_disjoint_state_up(struct kbase_device *kbdev);
/**
* Decrement the refcount state
* kbase_disjoint_state_down - Decrement the refcount state
*
* @kbdev: The kbase device
*
* Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
*
* Called after @ref kbase_disjoint_state_up once the disjoint state is over
*
* @kbdev: The kbase device
*/
void kbase_disjoint_state_down(struct kbase_device *kbdev);
@@ -668,8 +749,8 @@ int kbase_device_pcm_dev_init(struct kbase_device *const kbdev);
void kbase_device_pcm_dev_term(struct kbase_device *const kbdev);
/**
* If a job is soft stopped and the number of contexts is >= this value
* it is reported as a disjoint event
* KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD - If a job is soft stopped
* and the number of contexts is >= this value it is reported as a disjoint event
*/
#define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2

View File

@@ -99,7 +99,7 @@ void kbase_as_fault_debugfs_init(struct kbase_device *kbdev)
} else {
for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
snprintf(as_name, ARRAY_SIZE(as_name), "as%u", i);
debugfs_create_file(as_name, S_IRUGO,
debugfs_create_file(as_name, 0444,
debugfs_directory,
(void *)(uintptr_t)i,
&as_fault_fops);
@@ -108,5 +108,4 @@ void kbase_as_fault_debugfs_init(struct kbase_device *kbdev)
#endif /* CONFIG_MALI_BIFROST_DEBUG */
#endif /* CONFIG_DEBUG_FS */
return;
}

View File

@@ -43,7 +43,6 @@ kbase_as_fault_debugfs_new(struct kbase_device *kbdev, int as_no)
kbdev->debugfs_as_read_bitmap |= (1ULL << as_no);
#endif /* CONFIG_DEBUG_FS */
#endif /* CONFIG_MALI_BIFROST_DEBUG */
return;
}
#endif /*_KBASE_AS_FAULT_DEBUG_FS_H*/

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -28,15 +28,24 @@
#include <linux/types.h>
typedef enum mali_kbase_cap {
/**
* enum mali_kbase_cap - Enumeration for kbase capability
*
* @MALI_KBASE_CAP_SYSTEM_MONITOR: System Monitor
* @MALI_KBASE_CAP_JIT_PRESSURE_LIMIT: JIT Pressure limit
* @MALI_KBASE_CAP_MEM_GROW_ON_GPF: Memory grow on page fault
* @MALI_KBASE_CAP_MEM_PROTECTED: Protected memory
* @MALI_KBASE_NUM_CAPS: Delimiter
*/
enum mali_kbase_cap {
MALI_KBASE_CAP_SYSTEM_MONITOR = 0,
MALI_KBASE_CAP_JIT_PRESSURE_LIMIT,
MALI_KBASE_CAP_MEM_GROW_ON_GPF,
MALI_KBASE_CAP_MEM_PROTECTED,
MALI_KBASE_NUM_CAPS
} mali_kbase_cap;
};
extern bool mali_kbase_supports_cap(unsigned long api_version, mali_kbase_cap cap);
extern bool mali_kbase_supports_cap(unsigned long api_version, enum mali_kbase_cap cap);
static inline bool mali_kbase_supports_system_monitor(unsigned long api_version)
{

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -51,7 +51,6 @@ struct kbase_ccswe {
*/
void kbase_ccswe_init(struct kbase_ccswe *self);
/**
* kbase_ccswe_cycle_at() - Estimate cycle count at given timestamp.
*
@@ -68,7 +67,7 @@ void kbase_ccswe_init(struct kbase_ccswe *self);
* u64 ts = ktime_get_raw_ns();
* u64 cycle = kbase_ccswe_cycle_at(&ccswe, ts)
*
* Returns: estimated value of cycle count at a given time.
* Return: estimated value of cycle count at a given time.
*/
u64 kbase_ccswe_cycle_at(struct kbase_ccswe *self, u64 timestamp_ns);

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2010-2017, 2019-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2010-2017, 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -246,8 +246,6 @@ struct kbase_pm_callback_conf {
*
* For linux this callback will be called by the kernel runtime_suspend callback.
* Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
*
* @return 0 on success, else OS error code.
*/
void (*power_runtime_off_callback)(struct kbase_device *kbdev);
@@ -255,6 +253,8 @@ struct kbase_pm_callback_conf {
*
* For linux this callback will be called by the kernel runtime_resume callback.
* Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
*
* @return 0 on success, else OS error code.
*/
int (*power_runtime_on_callback)(struct kbase_device *kbdev);
@@ -455,7 +455,7 @@ struct kbase_platform_config {
/**
* kbase_get_platform_config - Gets the pointer to platform config.
*
* @return Pointer to the platform config
* Return: Pointer to the platform config
*/
struct kbase_platform_config *kbase_get_platform_config(void);
@@ -564,7 +564,6 @@ void kbasep_platform_event_atom_complete(struct kbase_jd_atom *katom);
#ifndef CONFIG_OF
/**
* kbase_platform_register - Register a platform device for the GPU
*
* This can be used to register a platform device on systems where device tree
* is not enabled and the platform initialisation code in the kernel doesn't
* create the GPU device. Where possible device tree should be used instead.

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2013-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -31,33 +31,27 @@
#include <mali_kbase_config_platform.h>
enum {
/**
* Use unrestricted Address ID width on the AXI bus.
*/
/* Use unrestricted Address ID width on the AXI bus. */
KBASE_AID_32 = 0x0,
/**
* Restrict GPU to a half of maximum Address ID count.
/* Restrict GPU to a half of maximum Address ID count.
* This will reduce performance, but reduce bus load due to GPU.
*/
KBASE_AID_16 = 0x3,
/**
* Restrict GPU to a quarter of maximum Address ID count.
/* Restrict GPU to a quarter of maximum Address ID count.
* This will reduce performance, but reduce bus load due to GPU.
*/
KBASE_AID_8 = 0x2,
KBASE_AID_8 = 0x2,
/**
* Restrict GPU to an eighth of maximum Address ID count.
/* Restrict GPU to an eighth of maximum Address ID count.
* This will reduce performance, but reduce bus load due to GPU.
*/
KBASE_AID_4 = 0x1
KBASE_AID_4 = 0x1
};
enum {
/**
* Use unrestricted Address ID width on the AXI bus.
/* Use unrestricted Address ID width on the AXI bus.
* Restricting ID width will reduce performance & bus load due to GPU.
*/
KBASE_3BIT_AID_32 = 0x0,
@@ -78,10 +72,10 @@ enum {
KBASE_3BIT_AID_12 = 0x5,
/* Restrict GPU to 1/4 of maximum Address ID count. */
KBASE_3BIT_AID_8 = 0x6,
KBASE_3BIT_AID_8 = 0x6,
/* Restrict GPU to 1/8 of maximum Address ID count. */
KBASE_3BIT_AID_4 = 0x7
KBASE_3BIT_AID_4 = 0x7
};
#if MALI_USE_CSF
@@ -103,8 +97,7 @@ enum {
#define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */
#endif
/**
* Power Management poweroff tick granuality. This is in nanoseconds to
/* Power Management poweroff tick granuality. This is in nanoseconds to
* allow HR timer support (can be overridden by platform header).
*
* On each scheduling tick, the power manager core may decide to:
@@ -115,95 +108,106 @@ enum {
#define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */
#endif
/**
* Power Manager number of ticks before shader cores are powered off
/* Power Manager number of ticks before shader cores are powered off
* (can be overridden by platform header).
*/
#ifndef DEFAULT_PM_POWEROFF_TICK_SHADER
#define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */
#endif
/**
* Default scheduling tick granuality (can be overridden by platform header)
*/
/* Default scheduling tick granuality (can be overridden by platform header) */
#ifndef DEFAULT_JS_SCHEDULING_PERIOD_NS
#define DEFAULT_JS_SCHEDULING_PERIOD_NS (100000000u) /* 100ms */
#endif
/**
* Default minimum number of scheduling ticks before jobs are soft-stopped.
/* Default minimum number of scheduling ticks before jobs are soft-stopped.
*
* This defines the time-slice for a job (which may be different from that of a
* context)
*/
#define DEFAULT_JS_SOFT_STOP_TICKS (1) /* 100ms-200ms */
/**
* Default minimum number of scheduling ticks before CL jobs are soft-stopped.
*/
/* Default minimum number of scheduling ticks before CL jobs are soft-stopped. */
#define DEFAULT_JS_SOFT_STOP_TICKS_CL (1) /* 100ms-200ms */
/**
* Default minimum number of scheduling ticks before jobs are hard-stopped
*/
/* Default minimum number of scheduling ticks before jobs are hard-stopped */
#define DEFAULT_JS_HARD_STOP_TICKS_SS (50) /* 5s */
/**
* Default minimum number of scheduling ticks before CL jobs are hard-stopped.
*/
/* Default minimum number of scheduling ticks before CL jobs are hard-stopped. */
#define DEFAULT_JS_HARD_STOP_TICKS_CL (50) /* 5s */
/**
* Default minimum number of scheduling ticks before jobs are hard-stopped
/* Default minimum number of scheduling ticks before jobs are hard-stopped
* during dumping
*/
#define DEFAULT_JS_HARD_STOP_TICKS_DUMPING (15000) /* 1500s */
/**
* Default timeout for some software jobs, after which the software event wait
/* Default timeout for some software jobs, after which the software event wait
* jobs will be cancelled.
*/
#define DEFAULT_JS_SOFT_JOB_TIMEOUT (3000) /* 3s */
/**
* Default minimum number of scheduling ticks before the GPU is reset to clear a
/* Default minimum number of scheduling ticks before the GPU is reset to clear a
* "stuck" job
*/
#define DEFAULT_JS_RESET_TICKS_SS (55) /* 5.5s */
/**
* Default minimum number of scheduling ticks before the GPU is reset to clear a
/* Default minimum number of scheduling ticks before the GPU is reset to clear a
* "stuck" CL job.
*/
#define DEFAULT_JS_RESET_TICKS_CL (55) /* 5.5s */
/**
* Default minimum number of scheduling ticks before the GPU is reset to clear a
/* Default minimum number of scheduling ticks before the GPU is reset to clear a
* "stuck" job during dumping.
*/
#define DEFAULT_JS_RESET_TICKS_DUMPING (15020) /* 1502s */
/**
* Default number of milliseconds given for other jobs on the GPU to be
/* Default number of milliseconds given for other jobs on the GPU to be
* soft-stopped when the GPU needs to be reset.
*/
#define DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */
/* Waiting timeout for status change acknowledgment, in clock cycles
* Based on 3000ms timeout at nominal 100MHz, as is required for Android - based
* on scaling from a 50MHz GPU system.
/* Nominal reference frequency that was used to obtain all following
* <...>_TIMEOUT_CYCLES macros, in kHz.
*
* Timeouts are scaled based on the relation between this value and the lowest
* GPU clock frequency.
*/
#define DEFAULT_REF_TIMEOUT_FREQ_KHZ (100000)
#define CSF_FIRMWARE_TIMEOUT_CYCLES (300000000)
/* A default timeout to be used when an invalid timeout selector is
* used to retrieve the timeout, on JM GPUs. CSF GPUs use the Firmware
* timeout as the default.
#if MALI_USE_CSF
/* Waiting timeout for status change acknowledgment, in clock cycles.
*
* This is also the default timeout to be used when an invalid timeout
* selector is used to retrieve the timeout on CSF GPUs.
*
* Based on 75000ms timeout at nominal 100MHz, as is required for Android - based
* on scaling from a 50MHz GPU system.
*/
#define CSF_FIRMWARE_TIMEOUT_CYCLES (7500000000)
/* Timeout in clock cycles for GPU Power Management to reach the desired
* Shader, L2 and MCU state.
*
* Based on 2500ms timeout at nominal 100MHz, scaled from a 50MHz GPU system.
*/
#define CSF_PM_TIMEOUT_CYCLES (250000000)
/* Waiting timeout in clock cycles for GPU reset to complete.
*
* Based on 2500ms timeout at 100MHz, scaled from a 50MHz GPU system.
*/
#define CSF_GPU_RESET_TIMEOUT_CYCLES (250000000)
#else /* MALI_USE_CSF */
/* A default timeout in clock cycles to be used when an invalid timeout
* selector is used to retrieve the timeout, on JM GPUs.
*/
#define JM_DEFAULT_TIMEOUT_CYCLES (150000000)
/**
* Default timeslice that a context is scheduled in for, in nanoseconds.
#endif /* MALI_USE_CSF */
/* Default timeslice that a context is scheduled in for, in nanoseconds.
*
* When a context has used up this amount of time across its jobs, it is
* scheduled out to let another run.
@@ -213,16 +217,14 @@ enum {
*/
#define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */
/**
* Maximum frequency (in kHz) that the GPU can be clocked. For some platforms
/* Maximum frequency (in kHz) that the GPU can be clocked. For some platforms
* this isn't available, so we simply define a dummy value here. If devfreq
* is enabled the value will be read from there, otherwise this should be
* overridden by defining GPU_FREQ_KHZ_MAX in the platform file.
*/
#define DEFAULT_GPU_FREQ_KHZ_MAX (5000)
/**
* Default timeout for task execution on an endpoint
/* Default timeout for task execution on an endpoint
*
* Number of GPU clock cycles before the driver terminates a task that is
* making no forward progress on an endpoint (e.g. shader core).
@@ -231,8 +233,7 @@ enum {
*/
#define DEFAULT_PROGRESS_TIMEOUT ((u64)5 * 500 * 1024 * 1024)
/**
* Default threshold at which to switch to incremental rendering
/* Default threshold at which to switch to incremental rendering
*
* Fraction of the maximum size of an allocation that grows on GPU page fault
* that can be used up before the driver switches to incremental rendering,

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2017-2018, 2020-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2017-2018, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -79,7 +79,7 @@ void kbase_ctx_sched_term(struct kbase_device *kbdev);
int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx);
/**
* kbase_ctx_sched_retain_ctx_refcount
* kbase_ctx_sched_retain_ctx_refcount - Retain a reference to the @ref kbase_context
* @kctx: The context to which to retain a reference
*
* This function only retains a reference to the context. It must be called
@@ -187,8 +187,8 @@ struct kbase_context *kbase_ctx_sched_as_to_ctx_nolock(
* @kctx: Context to be refcounted
*
* The following locks must be held by the caller:
* * kbase_device::mmu_hw_mutex
* * kbase_device::hwaccess_lock
* &kbase_device.mmu_hw_mutex
* &kbase_device.hwaccess_lock
*
* Return: true if refcount succeeded, and the context will not be scheduled
* out, false if the refcount failed (because the context is being/has been

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2012-2016, 2018, 2020-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2012-2016, 2018, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -53,7 +53,7 @@ void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev);
* kbase_debug_job_fault_context_init - Initialize the relevant
* data structure per context
* @kctx: KBase context pointer
* @return 0 on success
* Return: 0 on success
*/
int kbase_debug_job_fault_context_init(struct kbase_context *kctx);
@@ -68,39 +68,42 @@ void kbase_debug_job_fault_context_term(struct kbase_context *kctx);
* kbase_debug_job_fault_kctx_unblock - Unblock the atoms blocked on job fault
* dumping on context termination.
*
* @kctx: KBase context pointer
*
* This function is called during context termination to unblock the atom for
* which the job fault occurred and also the atoms following it. This is needed
* otherwise the wait for zero jobs could timeout (leading to an assertion
* failure, kernel panic in debug builds) in the pathological case where
* although the thread/daemon capturing the job fault events is running,
* but for some reasons has stopped consuming the events.
*
* @kctx: KBase context pointer
*/
void kbase_debug_job_fault_kctx_unblock(struct kbase_context *kctx);
/**
* kbase_debug_job_fault_process - Process the failed job.
* It will send a event and wake up the job fault waiting queue
* Then create a work queue to wait for job dump finish
* This function should be called in the interrupt handler and before
* jd_done that make sure the jd_done_worker will be delayed until the
* job dump finish
*
* @katom: The failed atom pointer
* @completion_code: the job status
* @return true if dump is going on
*
* It will send a event and wake up the job fault waiting queue
* Then create a work queue to wait for job dump finish
* This function should be called in the interrupt handler and before
* jd_done that make sure the jd_done_worker will be delayed until the
* job dump finish
*
* Return: true if dump is going on
*/
bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
u32 completion_code);
/**
* kbase_debug_job_fault_reg_snapshot_init - Set the interested registers
* address during the job fault process, the relevant registers will
* be saved when a job fault happen
* @kctx: KBase context pointer
* @reg_range: Maximum register address space
* @return true if initializing successfully
*
* Return: true if initializing successfully
*/
bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
int reg_range);
@@ -108,8 +111,10 @@ bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
/**
* kbase_job_fault_get_reg_snapshot - Read the interested registers for
* failed job dump
*
* @kctx: KBase context pointer
* @return true if getting registers successfully
*
* Return: true if getting registers successfully
*/
bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx);

View File

@@ -31,6 +31,22 @@
#if IS_ENABLED(CONFIG_DEBUG_FS)
#define SHOW_GPU_MEM_DATA(type, format) \
{ \
unsigned int i, j; \
const type *ptr = (type *)cpu_addr; \
const unsigned int col_width = sizeof(type); \
const unsigned int row_width = (col_width == sizeof(u64)) ? 32 : 16; \
const unsigned int num_cols = row_width / col_width; \
for (i = 0; i < PAGE_SIZE; i += row_width) { \
seq_printf(m, "%016llx:", gpu_addr + i); \
for (j = 0; j < num_cols; j++) \
seq_printf(m, format, ptr[j]); \
ptr += num_cols; \
seq_putc(m, '\n'); \
} \
}
struct debug_mem_mapping {
struct list_head node;
@@ -44,6 +60,7 @@ struct debug_mem_mapping {
struct debug_mem_data {
struct list_head mapping_list;
struct kbase_context *kctx;
unsigned int column_width;
};
struct debug_mem_seq_off {
@@ -111,9 +128,9 @@ static int debug_mem_show(struct seq_file *m, void *v)
struct debug_mem_data *mem_data = m->private;
struct debug_mem_seq_off *data = v;
struct debug_mem_mapping *map;
int i, j;
unsigned long long gpu_addr;
struct page *page;
uint32_t *mapping;
void *cpu_addr;
pgprot_t prot = PAGE_KERNEL;
map = list_entry(data->lh, struct debug_mem_mapping, node);
@@ -130,20 +147,33 @@ static int debug_mem_show(struct seq_file *m, void *v)
prot = pgprot_writecombine(prot);
page = as_page(map->alloc->pages[data->offset]);
mapping = vmap(&page, 1, VM_MAP, prot);
if (!mapping)
cpu_addr = vmap(&page, 1, VM_MAP, prot);
if (!cpu_addr)
goto out;
for (i = 0; i < PAGE_SIZE; i += 4*sizeof(*mapping)) {
seq_printf(m, "%016llx:", i + ((map->start_pfn +
data->offset) << PAGE_SHIFT));
gpu_addr = (map->start_pfn + data->offset) << PAGE_SHIFT;
for (j = 0; j < 4*sizeof(*mapping); j += sizeof(*mapping))
seq_printf(m, " %08x", mapping[(i+j)/sizeof(*mapping)]);
seq_putc(m, '\n');
/* Cases for 4 supported values of column_width for showing
* the GPU memory contents.
*/
switch (mem_data->column_width) {
case 1:
SHOW_GPU_MEM_DATA(u8, " %02hhx");
break;
case 2:
SHOW_GPU_MEM_DATA(u16, " %04hx");
break;
case 4:
SHOW_GPU_MEM_DATA(u32, " %08x");
break;
case 8:
SHOW_GPU_MEM_DATA(u64, " %016llx");
break;
default:
dev_warn(mem_data->kctx->kbdev->dev, "Unexpected column width");
}
vunmap(mapping);
vunmap(cpu_addr);
seq_putc(m, '\n');
@@ -207,6 +237,14 @@ static int debug_mem_open(struct inode *i, struct file *file)
if (get_file_rcu(kctx->filp) == 0)
return -ENOENT;
/* Check if file was opened in write mode. GPU memory contents
* are returned only when the file is not opened in write mode.
*/
if (file->f_mode & FMODE_WRITE) {
file->private_data = kctx;
return 0;
}
ret = seq_open(file, &ops);
if (ret)
goto open_fail;
@@ -223,6 +261,8 @@ static int debug_mem_open(struct inode *i, struct file *file)
kbase_gpu_vm_lock(kctx);
mem_data->column_width = kctx->mem_view_column_width;
ret = debug_mem_zone_open(&kctx->reg_rbtree_same, mem_data);
if (ret != 0) {
kbase_gpu_vm_unlock(kctx);
@@ -241,6 +281,20 @@ static int debug_mem_open(struct inode *i, struct file *file)
goto out;
}
#if MALI_USE_CSF
ret = debug_mem_zone_open(&kctx->reg_rbtree_exec_fixed, mem_data);
if (ret != 0) {
kbase_gpu_vm_unlock(kctx);
goto out;
}
ret = debug_mem_zone_open(&kctx->reg_rbtree_fixed, mem_data);
if (ret != 0) {
kbase_gpu_vm_unlock(kctx);
goto out;
}
#endif
kbase_gpu_vm_unlock(kctx);
((struct seq_file *)file->private_data)->private = mem_data;
@@ -270,32 +324,70 @@ open_fail:
static int debug_mem_release(struct inode *inode, struct file *file)
{
struct kbase_context *const kctx = inode->i_private;
struct seq_file *sfile = file->private_data;
struct debug_mem_data *mem_data = sfile->private;
struct debug_mem_mapping *mapping;
seq_release(inode, file);
/* If the file wasn't opened in write mode, then release the
* memory allocated to show the GPU memory contents.
*/
if (!(file->f_mode & FMODE_WRITE)) {
struct seq_file *sfile = file->private_data;
struct debug_mem_data *mem_data = sfile->private;
struct debug_mem_mapping *mapping;
while (!list_empty(&mem_data->mapping_list)) {
mapping = list_first_entry(&mem_data->mapping_list,
seq_release(inode, file);
while (!list_empty(&mem_data->mapping_list)) {
mapping = list_first_entry(&mem_data->mapping_list,
struct debug_mem_mapping, node);
kbase_mem_phy_alloc_put(mapping->alloc);
list_del(&mapping->node);
kfree(mapping);
}
kbase_mem_phy_alloc_put(mapping->alloc);
list_del(&mapping->node);
kfree(mapping);
}
kfree(mem_data);
kfree(mem_data);
}
fput(kctx->filp);
return 0;
}
static ssize_t debug_mem_write(struct file *file, const char __user *ubuf,
size_t count, loff_t *ppos)
{
struct kbase_context *const kctx = file->private_data;
unsigned int column_width = 0;
int ret = 0;
CSTD_UNUSED(ppos);
ret = kstrtouint_from_user(ubuf, count, 0, &column_width);
if (ret)
return ret;
if (!is_power_of_2(column_width)) {
dev_dbg(kctx->kbdev->dev,
"Column width %u not a multiple of power of 2", column_width);
return -EINVAL;
}
if (column_width > 8) {
dev_dbg(kctx->kbdev->dev,
"Column width %u greater than 8 not supported", column_width);
return -EINVAL;
}
kbase_gpu_vm_lock(kctx);
kctx->mem_view_column_width = column_width;
kbase_gpu_vm_unlock(kctx);
return count;
}
static const struct file_operations kbase_debug_mem_view_fops = {
.owner = THIS_MODULE,
.open = debug_mem_open,
.release = debug_mem_release,
.read = seq_read,
.write = debug_mem_write,
.llseek = seq_lseek
};
@@ -308,6 +400,9 @@ void kbase_debug_mem_view_init(struct kbase_context *const kctx)
WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry)))
return;
/* Default column width is 4 */
kctx->mem_view_column_width = sizeof(u32);
debugfs_create_file("mem_view", 0400, kctx->kctx_dentry, kctx,
&kbase_debug_mem_view_fops);
}

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -34,20 +34,20 @@
/**
* set_attr_from_string - Parse a string to set elements of an array
*
* This is the core of the implementation of
* kbase_debugfs_helper_set_attr_from_string. The only difference between the
* two functions is that this one requires the input string to be writable.
*
* @buf: Input string to parse. Must be nul-terminated!
* @array: Address of an object that can be accessed like an array.
* @nelems: Number of elements in the array.
* @set_attr_fn: Function to be called back for each array element.
*
* This is the core of the implementation of
* kbase_debugfs_helper_set_attr_from_string. The only difference between the
* two functions is that this one requires the input string to be writable.
*
* Return: 0 if success, negative error code otherwise.
*/
static int
set_attr_from_string(char *const buf, void *const array, size_t const nelems,
kbase_debugfs_helper_set_attr_fn *const set_attr_fn)
kbase_debugfs_helper_set_attr_fn * const set_attr_fn)
{
size_t index, err = 0;
char *ptr = buf;
@@ -143,7 +143,7 @@ int kbase_debugfs_string_validator(char *const buf)
int kbase_debugfs_helper_set_attr_from_string(
const char *const buf, void *const array, size_t const nelems,
kbase_debugfs_helper_set_attr_fn *const set_attr_fn)
kbase_debugfs_helper_set_attr_fn * const set_attr_fn)
{
char *const wbuf = kstrdup(buf, GFP_KERNEL);
int err = 0;
@@ -168,7 +168,7 @@ int kbase_debugfs_helper_set_attr_from_string(
ssize_t kbase_debugfs_helper_get_attr_to_string(
char *const buf, size_t const size, void *const array,
size_t const nelems,
kbase_debugfs_helper_get_attr_fn *const get_attr_fn)
kbase_debugfs_helper_get_attr_fn * const get_attr_fn)
{
ssize_t total = 0;
size_t index;
@@ -189,7 +189,7 @@ ssize_t kbase_debugfs_helper_get_attr_to_string(
int kbase_debugfs_helper_seq_write(
struct file *const file, const char __user *const ubuf,
size_t const count, size_t const nelems,
kbase_debugfs_helper_set_attr_fn *const set_attr_fn)
kbase_debugfs_helper_set_attr_fn * const set_attr_fn)
{
const struct seq_file *const sfile = file->private_data;
void *const array = sfile->private;
@@ -228,8 +228,8 @@ int kbase_debugfs_helper_seq_write(
}
int kbase_debugfs_helper_seq_read(
struct seq_file *const sfile, size_t const nelems,
kbase_debugfs_helper_get_attr_fn *const get_attr_fn)
struct seq_file * const sfile, size_t const nelems,
kbase_debugfs_helper_get_attr_fn * const get_attr_fn)
{
void *const array = sfile->private;
size_t index;

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -37,6 +37,11 @@ typedef void kbase_debugfs_helper_set_attr_fn(void *array, size_t index,
* kbase_debugfs_helper_set_attr_from_string - Parse a string to reconfigure an
* array
*
* @buf: Input string to parse. Must be nul-terminated!
* @array: Address of an object that can be accessed like an array.
* @nelems: Number of elements in the array.
* @set_attr_fn: Function to be called back for each array element.
*
* The given function is called once for each attribute value found in the
* input string. It is not an error if the string specifies fewer attribute
* values than the specified number of array elements.
@@ -46,11 +51,6 @@ typedef void kbase_debugfs_helper_set_attr_fn(void *array, size_t index,
* Attribute values are separated by one or more space characters.
* Additional leading and trailing spaces are ignored.
*
* @buf: Input string to parse. Must be nul-terminated!
* @array: Address of an object that can be accessed like an array.
* @nelems: Number of elements in the array.
* @set_attr_fn: Function to be called back for each array element.
*
* Return: 0 if success, negative error code otherwise.
*/
int kbase_debugfs_helper_set_attr_from_string(
@@ -62,6 +62,8 @@ int kbase_debugfs_helper_set_attr_from_string(
* debugfs file for any incorrect formats
* or wrong values.
*
* @buf: Null-terminated string to validate.
*
* This function is to be used before any writes to debugfs values are done
* such that any strings with erroneous values (such as octal 09 or
* hexadecimal 0xGH are fully ignored) - without this validation, any correct
@@ -73,8 +75,6 @@ int kbase_debugfs_helper_set_attr_from_string(
* of the input string. This function also requires the input string to be
* writable.
*
* @buf: Null-terminated string to validate.
*
* Return: 0 with no error, else -22 (the invalid return value of kstrtoul) if
* any value in the string was wrong or with an incorrect format.
*/
@@ -95,17 +95,17 @@ typedef size_t kbase_debugfs_helper_get_attr_fn(void *array, size_t index);
* kbase_debugfs_helper_get_attr_to_string - Construct a formatted string
* from elements in an array
*
* The given function is called once for each array element to get the
* value of the attribute to be inspected. The attribute values are
* written to the buffer as a formatted string of decimal numbers
* separated by spaces and terminated by a linefeed.
*
* @buf: Buffer in which to store the formatted output string.
* @size: The size of the buffer, in bytes.
* @array: Address of an object that can be accessed like an array.
* @nelems: Number of elements in the array.
* @get_attr_fn: Function to be called back for each array element.
*
* The given function is called once for each array element to get the
* value of the attribute to be inspected. The attribute values are
* written to the buffer as a formatted string of decimal numbers
* separated by spaces and terminated by a linefeed.
*
* Return: Number of characters written excluding the nul terminator.
*/
ssize_t kbase_debugfs_helper_get_attr_to_string(
@@ -116,6 +116,10 @@ ssize_t kbase_debugfs_helper_get_attr_to_string(
* kbase_debugfs_helper_seq_read - Implements reads from a virtual file for an
* array
*
* @sfile: A virtual file previously opened by calling single_open.
* @nelems: Number of elements in the array.
* @get_attr_fn: Function to be called back for each array element.
*
* The virtual file must have been opened by calling single_open and passing
* the address of an object that can be accessed like an array.
*
@@ -124,10 +128,6 @@ ssize_t kbase_debugfs_helper_get_attr_to_string(
* written to the buffer as a formatted string of decimal numbers
* separated by spaces and terminated by a linefeed.
*
* @sfile: A virtual file previously opened by calling single_open.
* @nelems: Number of elements in the array.
* @get_attr_fn: Function to be called back for each array element.
*
* Return: 0 if success, negative error code otherwise.
*/
int kbase_debugfs_helper_seq_read(
@@ -138,6 +138,12 @@ int kbase_debugfs_helper_seq_read(
* kbase_debugfs_helper_seq_write - Implements writes to a virtual file for an
* array
*
* @file: A virtual file previously opened by calling single_open.
* @ubuf: Source address in user space.
* @count: Number of bytes written to the virtual file.
* @nelems: Number of elements in the array.
* @set_attr_fn: Function to be called back for each array element.
*
* The virtual file must have been opened by calling single_open and passing
* the address of an object that can be accessed like an array.
*
@@ -145,12 +151,6 @@ int kbase_debugfs_helper_seq_read(
* data written to the virtual file. For further details, refer to the
* description of set_attr_from_string.
*
* @file: A virtual file previously opened by calling single_open.
* @ubuf: Source address in user space.
* @count: Number of bytes written to the virtual file.
* @nelems: Number of elements in the array.
* @set_attr_fn: Function to be called back for each array element.
*
* Return: 0 if success, negative error code otherwise.
*/
int kbase_debugfs_helper_seq_write(struct file *file,

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -35,11 +35,15 @@
#include <backend/gpu/mali_kbase_instr_defs.h>
#include <mali_kbase_pm.h>
#include <mali_kbase_gpuprops_types.h>
#include <mali_kbase_hwcnt_watchdog_if.h>
#if MALI_USE_CSF
#include <mali_kbase_hwcnt_backend_csf.h>
#else
#include <mali_kbase_hwcnt_backend_jm.h>
#include <mali_kbase_hwcnt_backend_jm_watchdog.h>
#endif
#include <protected_mode_switcher.h>
#include <linux/atomic.h>
@@ -82,7 +86,7 @@
#define RESET_TIMEOUT 500
/**
* The maximum number of Job Slots to support in the Hardware.
* BASE_JM_MAX_NR_SLOTS - The maximum number of Job Slots to support in the Hardware.
*
* You can optimize this down if your target devices will only ever support a
* small number of job slots.
@@ -90,7 +94,7 @@
#define BASE_JM_MAX_NR_SLOTS 3
/**
* The maximum number of Address Spaces to support in the Hardware.
* BASE_MAX_NR_AS - The maximum number of Address Spaces to support in the Hardware.
*
* You can optimize this down if your target devices will only ever support a
* small number of Address Spaces
@@ -110,19 +114,19 @@
#define KBASEP_AS_NR_INVALID (-1)
/**
* Maximum size in bytes of a MMU lock region, as a logarithm
* KBASE_LOCK_REGION_MAX_SIZE_LOG2 - Maximum size in bytes of a MMU lock region,
* as a logarithm
*/
#define KBASE_LOCK_REGION_MAX_SIZE_LOG2 (48) /* 256 TB */
/**
* Minimum size in bytes of a MMU lock region, as a logarithm
*/
#define KBASE_LOCK_REGION_MIN_SIZE_LOG2 (15) /* 32 kB */
/**
* Maximum number of GPU memory region zones
* KBASE_REG_ZONE_MAX - Maximum number of GPU memory region zones
*/
#if MALI_USE_CSF
#define KBASE_REG_ZONE_MAX 6ul
#else
#define KBASE_REG_ZONE_MAX 4ul
#endif
#include "mali_kbase_hwaccess_defs.h"
@@ -248,9 +252,10 @@ struct kbase_fault {
/**
* struct kbase_mmu_table - object representing a set of GPU page tables
* @mmu_teardown_pages: Buffer of 4 Pages in size, used to cache the entries
* of top & intermediate level page tables to avoid
* repeated calls to kmap_atomic during the MMU teardown.
* @mmu_teardown_pages: Array containing pointers to 3 separate pages, used
* to cache the entries of top (L0) & intermediate level
* page tables (L1 & L2) to avoid repeated calls to
* kmap_atomic() during the MMU teardown.
* @mmu_lock: Lock to serialize the accesses made to multi level GPU
* page tables
* @pgd: Physical address of the page allocated for the top
@@ -265,7 +270,7 @@ struct kbase_fault {
* it is NULL
*/
struct kbase_mmu_table {
u64 *mmu_teardown_pages;
u64 *mmu_teardown_pages[MIDGARD_MMU_BOTTOMLEVEL];
struct mutex mmu_lock;
phys_addr_t pgd;
u8 group_id;
@@ -357,8 +362,6 @@ struct kbase_clk_rate_listener {
* enumerated GPU clock.
* @clk_rate_trace_ops: Pointer to the platform specific GPU clock rate trace
* operations.
* @gpu_clk_rate_trace_write: Pointer to the function that would emit the
* tracepoint for the clock rate change.
* @listeners: List of listener attached.
* @lock: Lock to serialize the actions of GPU clock rate trace
* manager.
@@ -373,13 +376,14 @@ struct kbase_clk_rate_trace_manager {
/**
* struct kbase_pm_device_data - Data stored per device for power management.
* @lock: The lock protecting Power Management structures accessed outside of
* IRQ.
* This lock must also be held whenever the GPU is being powered on or
* off.
* @active_count: The reference count of active contexts on this device. Note
* that some code paths keep shaders/the tiler powered whilst this is 0.
* Use kbase_pm_is_active() instead to check for such cases.
* @lock: The lock protecting Power Management structures accessed
* outside of IRQ.
* This lock must also be held whenever the GPU is being
* powered on or off.
* @active_count: The reference count of active contexts on this device.
* Note that some code paths keep shaders/the tiler
* powered whilst this is 0.
* Use kbase_pm_is_active() instead to check for such cases.
* @suspending: Flag indicating suspending/suspended
* @runtime_active: Flag to track if the GPU is in runtime suspended or active
* state. This ensures that runtime_put and runtime_get
@@ -388,24 +392,24 @@ struct kbase_clk_rate_trace_manager {
* the call to it from runtime_gpu_active callback can be
* skipped.
* @gpu_lost: Flag indicating gpu lost
* This structure contains data for the power management framework. There
* is one instance of this structure per device in the system.
* This structure contains data for the power management framework.
* There is one instance of this structure per device in the system.
* @zero_active_count_wait: Wait queue set when active_count == 0
* @resume_wait: system resume of GPU device.
* @debug_core_mask: Bit masks identifying the available shader cores that are
* specified via sysfs. One mask per job slot.
* specified via sysfs. One mask per job slot.
* @debug_core_mask_all: Bit masks identifying the available shader cores that
* are specified via sysfs.
* are specified via sysfs.
* @callback_power_runtime_init: Callback for initializing the runtime power
* management. Return 0 on success, else error code
* management. Return 0 on success, else error code
* @callback_power_runtime_term: Callback for terminating the runtime power
* management.
* management.
* @dvfs_period: Time in milliseconds between each dvfs sample
* @backend: KBase PM backend data
* @arb_vm_state: The state of the arbiter VM machine
* @gpu_users_waiting: Used by virtualization to notify the arbiter that there
* are users waiting for the GPU so that it can request and resume the
* driver.
* are users waiting for the GPU so that it can request
* and resume the driver.
* @clk_rtm: The state of the GPU clock rate trace manager
*/
struct kbase_pm_device_data {
@@ -482,16 +486,16 @@ struct kbase_mem_pool {
/**
* struct kbase_mem_pool_group - a complete set of physical memory pools.
*
* @small: Array of objects containing the state for pools of 4 KiB size
* physical pages.
* @large: Array of objects containing the state for pools of 2 MiB size
* physical pages.
*
* Memory pools are used to allow efficient reallocation of previously-freed
* physical pages. A pair of memory pools is initialized for each physical
* memory group: one for 4 KiB pages and one for 2 MiB pages. These arrays
* should be indexed by physical memory group ID, the meaning of which is
* defined by the systems integrator.
*
* @small: Array of objects containing the state for pools of 4 KiB size
* physical pages.
* @large: Array of objects containing the state for pools of 2 MiB size
* physical pages.
*/
struct kbase_mem_pool_group {
struct kbase_mem_pool small[MEMORY_GROUP_MANAGER_NR_GROUPS];
@@ -512,11 +516,11 @@ struct kbase_mem_pool_config {
* struct kbase_mem_pool_group_config - Initial configuration for a complete
* set of physical memory pools
*
* This array should be indexed by physical memory group ID, the meaning
* of which is defined by the systems integrator.
*
* @small: Array of initial configuration for pools of 4 KiB pages.
* @large: Array of initial configuration for pools of 2 MiB pages.
*
* This array should be indexed by physical memory group ID, the meaning
* of which is defined by the systems integrator.
*/
struct kbase_mem_pool_group_config {
struct kbase_mem_pool_config small[MEMORY_GROUP_MANAGER_NR_GROUPS];
@@ -750,8 +754,13 @@ struct kbase_process {
* @hwcnt.addr: HW counter address
* @hwcnt.addr_bytes: HW counter size in bytes
* @hwcnt.backend: Kbase instrumentation backend
* @hwcnt_watchdog_timer: Hardware counter watchdog interface.
* @hwcnt_gpu_jm_backend: Job manager GPU backend interface, used as superclass reference
* pointer by hwcnt_gpu_iface, which wraps this implementation in
* order to extend it with periodic dumping functionality.
* @hwcnt_gpu_iface: Backend interface for GPU hardware counter access.
* @hwcnt_watchdog_timer: Watchdog interface, used by the GPU backend hwcnt_gpu_iface to
* perform periodic dumps in order to prevent hardware counter value
* overflow or saturation.
* @hwcnt_gpu_ctx: Context for GPU hardware counter access.
* @hwaccess_lock must be held when calling
* kbase_hwcnt_context_enable() with @hwcnt_gpu_ctx.
@@ -763,14 +772,6 @@ struct kbase_process {
* therefore timeline is disabled.
* @timeline: Timeline context created per device.
* @ktrace: kbase device's ktrace
* @trace_lock: Lock to serialize the access to trace buffer.
* @trace_first_out: Index/offset in the trace buffer at which the first
* unread message is present.
* @trace_next_in: Index/offset in the trace buffer at which the new
* message will be written.
* @trace_rbuf: Pointer to the buffer storing debug messages/prints
* tracing the various events in Driver.
* The buffer is filled in circular fashion.
* @reset_timeout_ms: Number of milliseconds to wait for the soft stop to
* complete for the GPU jobs before proceeding with the
* GPU reset.
@@ -875,6 +876,13 @@ struct kbase_process {
* backend specific data for HW access layer.
* @faults_pending: Count of page/bus faults waiting for bottom half processing
* via workqueues.
* @mmu_hw_operation_in_progress: Set before sending the MMU command and is
* cleared after the command is complete. Whilst this
* flag is set, the write to L2_PWROFF register will be
* skipped which is needed to workaround the HW issue
* GPU2019-3878. PM state machine is invoked after
* clearing this flag and @hwaccess_lock is used to
* serialize the access.
* @poweroff_pending: Set when power off operation for GPU is started, reset when
* power on for GPU is started.
* @infinite_cache_active_default: Set to enable using infinite cache for all the
@@ -904,9 +912,6 @@ struct kbase_process {
* enabled.
* @protected_mode_hwcnt_disable_work: Work item to disable GPU hardware
* counters, used if atomic disable is not possible.
* @buslogger: Pointer to the structure required for interfacing
* with the bus logger module to set the size of buffer
* used by the module for capturing bus logs.
* @irq_reset_flush: Flag to indicate that GPU reset is in-flight and flush of
* IRQ + bottom half is being done, to prevent the writes
* to MMU_IRQ_CLEAR & MMU_IRQ_MASK registers.
@@ -1007,7 +1012,7 @@ struct kbase_device {
struct memory_group_manager_device *mgm_dev;
struct kbase_as as[BASE_MAX_NR_AS];
u16 as_free; /* Bitpattern of free Address Spaces */
u16 as_free;
struct kbase_context *as_to_kctx[BASE_MAX_NR_AS];
spinlock_t mmu_mask_change;
@@ -1027,7 +1032,6 @@ struct kbase_device {
#if MALI_USE_CSF
struct kbase_hwcnt_backend_csf_if hwcnt_backend_csf_if_fw;
struct kbase_hwcnt_watchdog_interface hwcnt_watchdog_timer;
#else
struct kbase_hwcnt {
spinlock_t lock;
@@ -1038,9 +1042,13 @@ struct kbase_device {
struct kbase_instr_backend backend;
} hwcnt;
struct kbase_hwcnt_backend_interface hwcnt_gpu_jm_backend;
#endif
struct kbase_hwcnt_backend_interface hwcnt_gpu_iface;
struct kbase_hwcnt_watchdog_interface hwcnt_watchdog_timer;
struct kbase_hwcnt_context *hwcnt_gpu_ctx;
struct kbase_hwcnt_virtualizer *hwcnt_gpu_virt;
struct kbase_vinstr_context *vinstr_ctx;
@@ -1141,6 +1149,9 @@ struct kbase_device {
atomic_t faults_pending;
#if MALI_USE_CSF
bool mmu_hw_operation_in_progress;
#endif
bool poweroff_pending;
#if (KERNEL_VERSION(4, 4, 0) <= LINUX_VERSION_CODE)
@@ -1492,8 +1503,8 @@ struct kbase_sub_alloc {
* @mem_partials_lock: Lock for protecting the operations done on the elements
* added to @mem_partials list.
* @mem_partials: List head for the list of large pages, 2MB in size, which
* which have been split into 4 KB pages and are used
* partially for the allocations >= 2 MB in size.
* have been split into 4 KB pages and are used partially
* for the allocations >= 2 MB in size.
* @reg_lock: Lock used for GPU virtual address space management operations,
* like adding/freeing a memory region in the address space.
* Can be converted to a rwlock ?.
@@ -1505,6 +1516,17 @@ struct kbase_sub_alloc {
* @reg_rbtree_exec: RB tree of the memory regions allocated from the EXEC_VA
* zone of the GPU virtual address space. Used for GPU-executable
* allocations which don't need the SAME_VA property.
* @reg_rbtree_exec_fixed: RB tree of the memory regions allocated from the
* EXEC_FIXED_VA zone of the GPU virtual address space. Used for
* GPU-executable allocations with FIXED/FIXABLE GPU virtual
* addresses.
* @reg_rbtree_fixed: RB tree of the memory regions allocated from the FIXED_VA zone
* of the GPU virtual address space. Used for allocations with
* FIXED/FIXABLE GPU virtual addresses.
* @num_fixable_allocs: A count for the number of memory allocations with the
* BASE_MEM_FIXABLE property.
* @num_fixed_allocs: A count for the number of memory allocations with the
* BASE_MEM_FIXED property.
* @reg_zone: Zone information for the reg_rbtree_<...> members.
* @cookies: Bitmask containing of BITS_PER_LONG bits, used mainly for
* SAME_VA allocations to defer the reservation of memory region
@@ -1608,6 +1630,8 @@ struct kbase_sub_alloc {
* dumping of its debug info is in progress.
* @job_fault_resume_event_list: List containing atoms completed after the faulty
* atom but before the debug data for faulty atom was dumped.
* @mem_view_column_width: Controls the number of bytes shown in every column of the
* output of "mem_view" debugfs file.
* @jsctx_queue: Per slot & priority arrays of object containing the root
* of RB-tree holding currently runnable atoms on the job slot
* and the head item of the linked list of atoms blocked on
@@ -1748,6 +1772,12 @@ struct kbase_context {
struct rb_root reg_rbtree_same;
struct rb_root reg_rbtree_custom;
struct rb_root reg_rbtree_exec;
#if MALI_USE_CSF
struct rb_root reg_rbtree_exec_fixed;
struct rb_root reg_rbtree_fixed;
atomic64_t num_fixable_allocs;
atomic64_t num_fixed_allocs;
#endif
struct kbase_reg_zone reg_zone[KBASE_REG_ZONE_MAX];
#if MALI_USE_CSF
@@ -1817,6 +1847,7 @@ struct kbase_context {
unsigned int *reg_dump;
atomic_t job_fault_count;
struct list_head job_fault_resume_event_list;
unsigned int mem_view_column_width;
#endif /* CONFIG_DEBUG_FS */
struct kbase_va_region *jit_alloc[1 + BASE_JIT_ALLOC_COUNT];
@@ -1924,13 +1955,6 @@ enum kbase_share_attr_bits {
SHARE_INNER_BITS = (3ULL << 8) /* inner shareable coherency */
};
/**
* enum kbase_timeout_selector - The choice of which timeout to get scaled
* using current GPU frequency.
* @CSF_FIRMWARE_TIMEOUT: Response timeout from CSF firmware.
*/
enum kbase_timeout_selector { CSF_FIRMWARE_TIMEOUT };
/**
* kbase_device_is_cpu_coherent - Returns if the device is CPU coherent.
* @kbdev: kbase device
@@ -1946,6 +1970,24 @@ static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev)
return false;
}
/**
* kbase_get_lock_region_min_size_log2 - Returns the minimum size of the MMU lock
* region, as a logarithm
*
* @gpu_props: GPU properties
*
* Return: the minimum size of the MMU lock region as dictated by the corresponding
* arch spec.
*/
static inline u64 kbase_get_lock_region_min_size_log2(struct kbase_gpu_props const *gpu_props)
{
if (GPU_ID2_MODEL_MATCH_VALUE(gpu_props->props.core_props.product_id) >=
GPU_ID2_MODEL_MAKE(12, 0))
return 12; /* 4 kB */
return 15; /* 32 kB */
}
/* Conversion helpers for setting up high resolution timers */
#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x))*1000000U))
#define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x))
@@ -1955,4 +1997,4 @@ static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev)
/* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */
#define KBASE_AS_INACTIVE_MAX_LOOPS 100000000
#endif /* _KBASE_DEFS_H_ */
#endif /* _KBASE_DEFS_H_ */

View File

@@ -161,7 +161,7 @@ kbase_dma_fence_cancel_atom(struct kbase_jd_atom *katom)
if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) {
/* Wait was cancelled - zap the atom */
katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
if (jd_done_nolock(katom, NULL))
if (jd_done_nolock(katom, true))
kbase_js_sched_all(katom->kctx->kbdev);
}
}
@@ -196,7 +196,7 @@ kbase_dma_fence_work(struct work_struct *pwork)
* dependency. Run jd_done_nolock() on the katom if it is completed.
*/
if (unlikely(katom->status == KBASE_JD_ATOM_STATE_COMPLETED))
jd_done_nolock(katom, NULL);
jd_done_nolock(katom, true);
else
kbase_jd_dep_clear_locked(katom);

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2010-2016, 2020-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2010-2016, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -132,6 +132,8 @@ void kbase_dma_fence_term(struct kbase_context *kctx);
/**
* kbase_dma_fence_init() - Initialize Mali dma-fence context
* @kctx: kbase context to initialize
*
* Return: 0 on success, error code otherwise.
*/
int kbase_dma_fence_init(struct kbase_context *kctx);

View File

@@ -239,7 +239,7 @@ int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores)
return failed ? -EFAULT : 0;
}
static ssize_t show_dummy_job_wa_info(struct device * const dev,
static ssize_t dummy_job_wa_info_show(struct device * const dev,
struct device_attribute * const attr, char * const buf)
{
struct kbase_device *const kbdev = dev_get_drvdata(dev);
@@ -254,7 +254,7 @@ static ssize_t show_dummy_job_wa_info(struct device * const dev,
return err;
}
static DEVICE_ATTR(dummy_job_wa_info, 0444, show_dummy_job_wa_info, NULL);
static DEVICE_ATTR_RO(dummy_job_wa_info);
static bool wa_blob_load_needed(struct kbase_device *kbdev)
{

Some files were not shown because too many files have changed in this diff Show More