mirror of
https://github.com/hardkernel/linux.git
synced 2026-06-05 18:41:58 +09:00
MALI: rockchip: upgrade bifrost DDK to g11p0-01eac0, from g10p0-01eac0
Change-Id: I0642ec37f151711b8b19c3206488d3301422971d Signed-off-by: Zhen Chen <chenzhen@rock-chips.com>
This commit is contained in:
@@ -83,7 +83,7 @@
|
||||
static dev_t dma_buf_lock_dev;
|
||||
static struct cdev dma_buf_lock_cdev;
|
||||
static struct class *dma_buf_lock_class;
|
||||
static char dma_buf_lock_dev_name[] = "dma_buf_lock";
|
||||
static const char dma_buf_lock_dev_name[] = "dma_buf_lock";
|
||||
|
||||
#if defined(HAVE_UNLOCKED_IOCTL) || defined(HAVE_COMPAT_IOCTL) || ((KERNEL_VERSION(5, 9, 0) <= LINUX_VERSION_CODE))
|
||||
static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
|
||||
@@ -91,8 +91,7 @@ static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned lon
|
||||
static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
|
||||
#endif
|
||||
|
||||
static struct file_operations dma_buf_lock_fops =
|
||||
{
|
||||
static const struct file_operations dma_buf_lock_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
#if defined(HAVE_UNLOCKED_IOCTL) || ((KERNEL_VERSION(5, 9, 0) <= LINUX_VERSION_CODE))
|
||||
.unlocked_ioctl = dma_buf_lock_ioctl,
|
||||
@@ -105,8 +104,7 @@ static struct file_operations dma_buf_lock_fops =
|
||||
#endif
|
||||
};
|
||||
|
||||
typedef struct dma_buf_lock_resource
|
||||
{
|
||||
struct dma_buf_lock_resource {
|
||||
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
|
||||
struct fence fence;
|
||||
#else
|
||||
@@ -123,7 +121,7 @@ typedef struct dma_buf_lock_resource
|
||||
struct list_head link;
|
||||
struct work_struct work;
|
||||
int count;
|
||||
} dma_buf_lock_resource;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct dma_buf_lock_fence_cb - Callback data struct for dma-fence
|
||||
@@ -199,7 +197,7 @@ const struct dma_fence_ops dma_buf_lock_fence_ops = {
|
||||
};
|
||||
|
||||
static void
|
||||
dma_buf_lock_fence_init(dma_buf_lock_resource *resource)
|
||||
dma_buf_lock_fence_init(struct dma_buf_lock_resource *resource)
|
||||
{
|
||||
dma_fence_init(&resource->fence,
|
||||
&dma_buf_lock_fence_ops,
|
||||
@@ -209,7 +207,7 @@ dma_buf_lock_fence_init(dma_buf_lock_resource *resource)
|
||||
}
|
||||
|
||||
static void
|
||||
dma_buf_lock_fence_free_callbacks(dma_buf_lock_resource *resource)
|
||||
dma_buf_lock_fence_free_callbacks(struct dma_buf_lock_resource *resource)
|
||||
{
|
||||
struct dma_buf_lock_fence_cb *cb, *tmp;
|
||||
|
||||
@@ -228,8 +226,8 @@ dma_buf_lock_fence_free_callbacks(dma_buf_lock_resource *resource)
|
||||
static void
|
||||
dma_buf_lock_fence_work(struct work_struct *pwork)
|
||||
{
|
||||
dma_buf_lock_resource *resource =
|
||||
container_of(pwork, dma_buf_lock_resource, work);
|
||||
struct dma_buf_lock_resource *resource =
|
||||
container_of(pwork, struct dma_buf_lock_resource, work);
|
||||
|
||||
WARN_ON(atomic_read(&resource->fence_dep_count));
|
||||
WARN_ON(!atomic_read(&resource->locked));
|
||||
@@ -250,10 +248,10 @@ dma_buf_lock_fence_callback(struct dma_fence *fence, struct dma_fence_cb *cb)
|
||||
struct dma_buf_lock_fence_cb *dma_buf_lock_cb = container_of(cb,
|
||||
struct dma_buf_lock_fence_cb,
|
||||
fence_cb);
|
||||
dma_buf_lock_resource *resource = dma_buf_lock_cb->res;
|
||||
struct dma_buf_lock_resource *resource = dma_buf_lock_cb->res;
|
||||
|
||||
#if DMA_BUF_LOCK_DEBUG
|
||||
printk(KERN_DEBUG "dma_buf_lock_fence_callback\n");
|
||||
pr_debug("%s\n", __func__);
|
||||
#endif
|
||||
|
||||
/* Callback function will be invoked in atomic context. */
|
||||
@@ -270,12 +268,12 @@ dma_buf_lock_fence_callback(struct dma_fence *fence, struct dma_fence_cb *cb)
|
||||
|
||||
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
|
||||
static int
|
||||
dma_buf_lock_fence_add_callback(dma_buf_lock_resource *resource,
|
||||
dma_buf_lock_fence_add_callback(struct dma_buf_lock_resource *resource,
|
||||
struct fence *fence,
|
||||
fence_func_t callback)
|
||||
#else
|
||||
static int
|
||||
dma_buf_lock_fence_add_callback(dma_buf_lock_resource *resource,
|
||||
dma_buf_lock_fence_add_callback(struct dma_buf_lock_resource *resource,
|
||||
struct dma_fence *fence,
|
||||
dma_fence_func_t callback)
|
||||
#endif
|
||||
@@ -324,12 +322,12 @@ dma_buf_lock_fence_add_callback(dma_buf_lock_resource *resource,
|
||||
|
||||
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
|
||||
static int
|
||||
dma_buf_lock_add_fence_reservation_callback(dma_buf_lock_resource *resource,
|
||||
dma_buf_lock_add_fence_reservation_callback(struct dma_buf_lock_resource *resource,
|
||||
struct reservation_object *resv,
|
||||
bool exclusive)
|
||||
#else
|
||||
static int
|
||||
dma_buf_lock_add_fence_reservation_callback(dma_buf_lock_resource *resource,
|
||||
dma_buf_lock_add_fence_reservation_callback(struct dma_buf_lock_resource *resource,
|
||||
struct dma_resv *resv,
|
||||
bool exclusive)
|
||||
#endif
|
||||
@@ -398,7 +396,7 @@ out:
|
||||
}
|
||||
|
||||
static void
|
||||
dma_buf_lock_release_fence_reservation(dma_buf_lock_resource *resource,
|
||||
dma_buf_lock_release_fence_reservation(struct dma_buf_lock_resource *resource,
|
||||
struct ww_acquire_ctx *ctx)
|
||||
{
|
||||
unsigned int r;
|
||||
@@ -409,7 +407,7 @@ dma_buf_lock_release_fence_reservation(dma_buf_lock_resource *resource,
|
||||
}
|
||||
|
||||
static int
|
||||
dma_buf_lock_acquire_fence_reservation(dma_buf_lock_resource *resource,
|
||||
dma_buf_lock_acquire_fence_reservation(struct dma_buf_lock_resource *resource,
|
||||
struct ww_acquire_ctx *ctx)
|
||||
{
|
||||
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
|
||||
@@ -451,7 +449,7 @@ error:
|
||||
/* If we deadlock try with lock_slow and retry */
|
||||
if (err == -EDEADLK) {
|
||||
#if DMA_BUF_LOCK_DEBUG
|
||||
printk(KERN_DEBUG "deadlock at dma_buf fd %i\n",
|
||||
pr_debug("deadlock at dma_buf fd %i\n",
|
||||
resource->list_of_dma_buf_fds[content_resv_idx]);
|
||||
#endif
|
||||
content_resv = resource->dma_bufs[content_resv_idx]->resv;
|
||||
@@ -466,14 +464,14 @@ error:
|
||||
|
||||
static int dma_buf_lock_handle_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
dma_buf_lock_resource *resource;
|
||||
struct dma_buf_lock_resource *resource;
|
||||
|
||||
if (!is_dma_buf_lock_file(file))
|
||||
return -EINVAL;
|
||||
|
||||
resource = file->private_data;
|
||||
#if DMA_BUF_LOCK_DEBUG
|
||||
printk("dma_buf_lock_handle_release\n");
|
||||
pr_debug("%s\n", __func__);
|
||||
#endif
|
||||
mutex_lock(&dma_buf_lock_mutex);
|
||||
kref_put(&resource->refcount, dma_buf_lock_dounlock);
|
||||
@@ -482,10 +480,11 @@ static int dma_buf_lock_handle_release(struct inode *inode, struct file *file)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned int dma_buf_lock_handle_poll(struct file *file,
|
||||
struct poll_table_struct *wait)
|
||||
static unsigned int dma_buf_lock_handle_poll(
|
||||
struct file *file,
|
||||
struct poll_table_struct *wait)
|
||||
{
|
||||
dma_buf_lock_resource *resource;
|
||||
struct dma_buf_lock_resource *resource;
|
||||
unsigned int ret = 0;
|
||||
|
||||
if (!is_dma_buf_lock_file(file))
|
||||
@@ -493,21 +492,19 @@ static unsigned int dma_buf_lock_handle_poll(struct file *file,
|
||||
|
||||
resource = file->private_data;
|
||||
#if DMA_BUF_LOCK_DEBUG
|
||||
printk("dma_buf_lock_handle_poll\n");
|
||||
pr_debug("%s\n", __func__);
|
||||
#endif
|
||||
if (atomic_read(&resource->locked) == 1) {
|
||||
/* Resources have been locked */
|
||||
ret = POLLIN | POLLRDNORM;
|
||||
if (resource->exclusive)
|
||||
ret |= POLLOUT | POLLWRNORM;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
if (!poll_does_not_wait(wait))
|
||||
poll_wait(file, &resource->wait, wait);
|
||||
}
|
||||
#if DMA_BUF_LOCK_DEBUG
|
||||
printk("dma_buf_lock_handle_poll : return %i\n", ret);
|
||||
pr_debug("%s : return %i\n", __func__, ret);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
@@ -526,17 +523,15 @@ static inline int is_dma_buf_lock_file(struct file *file)
|
||||
return file->f_op == &dma_buf_lock_handle_fops;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Start requested lock.
|
||||
*
|
||||
* Allocates required memory, copies dma_buf_fd list from userspace,
|
||||
* acquires related reservation objects, and starts the lock.
|
||||
*/
|
||||
static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
|
||||
static int dma_buf_lock_dolock(struct dma_buf_lock_k_request *request)
|
||||
{
|
||||
dma_buf_lock_resource *resource;
|
||||
struct dma_buf_lock_resource *resource;
|
||||
struct ww_acquire_ctx ww_ctx;
|
||||
int size;
|
||||
int fd;
|
||||
@@ -553,7 +548,7 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
|
||||
request->exclusive != DMA_BUF_LOCK_EXCLUSIVE)
|
||||
return -EINVAL;
|
||||
|
||||
resource = kzalloc(sizeof(dma_buf_lock_resource), GFP_KERNEL);
|
||||
resource = kzalloc(sizeof(*resource), GFP_KERNEL);
|
||||
if (resource == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -594,7 +589,7 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
|
||||
}
|
||||
#if DMA_BUF_LOCK_DEBUG
|
||||
for (i = 0; i < request->count; i++)
|
||||
printk("dma_buf %i = %X\n", i, resource->list_of_dma_buf_fds[i]);
|
||||
pr_debug("dma_buf %i = %X\n", i, resource->list_of_dma_buf_fds[i]);
|
||||
#endif
|
||||
|
||||
/* Initialize the fence associated with dma_buf_lock resource */
|
||||
@@ -611,13 +606,11 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
|
||||
|
||||
mutex_unlock(&dma_buf_lock_mutex);
|
||||
|
||||
for (i = 0; i < request->count; i++)
|
||||
{
|
||||
for (i = 0; i < request->count; i++) {
|
||||
/* Convert fd into dma_buf structure */
|
||||
resource->dma_bufs[i] = dma_buf_get(resource->list_of_dma_buf_fds[i]);
|
||||
|
||||
if (IS_ERR_VALUE(PTR_ERR(resource->dma_bufs[i])))
|
||||
{
|
||||
if (IS_ERR_VALUE(PTR_ERR(resource->dma_bufs[i]))) {
|
||||
mutex_lock(&dma_buf_lock_mutex);
|
||||
kref_put(&resource->refcount, dma_buf_lock_dounlock);
|
||||
mutex_unlock(&dma_buf_lock_mutex);
|
||||
@@ -632,8 +625,8 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
|
||||
return -EINVAL;
|
||||
}
|
||||
#if DMA_BUF_LOCK_DEBUG
|
||||
printk(KERN_DEBUG "dma_buf_lock_dolock : dma_buf_fd %i dma_buf %p dma_fence reservation %p\n",
|
||||
resource->list_of_dma_buf_fds[i], resource->dma_bufs[i], resource->dma_bufs[i]->resv);
|
||||
pr_debug("%s : dma_buf_fd %i dma_buf %p dma_fence reservation %p\n",
|
||||
__func__, resource->list_of_dma_buf_fds[i], resource->dma_bufs[i], resource->dma_bufs[i]->resv);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -643,9 +636,8 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
|
||||
|
||||
/* Create file descriptor associated with lock request */
|
||||
fd = anon_inode_getfd("dma_buf_lock", &dma_buf_lock_handle_fops,
|
||||
(void *)resource, 0);
|
||||
if (fd < 0)
|
||||
{
|
||||
(void *)resource, 0);
|
||||
if (fd < 0) {
|
||||
mutex_lock(&dma_buf_lock_mutex);
|
||||
kref_put(&resource->refcount, dma_buf_lock_dounlock);
|
||||
kref_put(&resource->refcount, dma_buf_lock_dounlock);
|
||||
@@ -659,7 +651,7 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
|
||||
ret = dma_buf_lock_acquire_fence_reservation(resource, &ww_ctx);
|
||||
if (ret) {
|
||||
#if DMA_BUF_LOCK_DEBUG
|
||||
printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d locking reservations.\n", ret);
|
||||
pr_debug("%s : Error %d locking reservations.\n", __func__, ret);
|
||||
#endif
|
||||
put_unused_fd(fd);
|
||||
mutex_lock(&dma_buf_lock_mutex);
|
||||
@@ -698,7 +690,7 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
|
||||
#endif
|
||||
if (ret) {
|
||||
#if DMA_BUF_LOCK_DEBUG
|
||||
printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d reserving space for shared fence.\n", ret);
|
||||
pr_debug("%s : Error %d reserving space for shared fence.\n", __func__, ret);
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
@@ -708,7 +700,7 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
|
||||
false);
|
||||
if (ret) {
|
||||
#if DMA_BUF_LOCK_DEBUG
|
||||
printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d adding reservation to callback.\n", ret);
|
||||
pr_debug("%s : Error %d adding reservation to callback.\n", __func__, ret);
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
@@ -724,7 +716,7 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
|
||||
true);
|
||||
if (ret) {
|
||||
#if DMA_BUF_LOCK_DEBUG
|
||||
printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d adding reservation to callback.\n", ret);
|
||||
pr_debug("%s : Error %d adding reservation to callback.\n", __func__, ret);
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
@@ -748,8 +740,7 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
|
||||
dma_buf_lock_fence_work(&resource->work);
|
||||
}
|
||||
|
||||
if (IS_ERR_VALUE((unsigned long)ret))
|
||||
{
|
||||
if (IS_ERR_VALUE((unsigned long)ret)) {
|
||||
put_unused_fd(fd);
|
||||
|
||||
mutex_lock(&dma_buf_lock_mutex);
|
||||
@@ -761,7 +752,7 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
|
||||
}
|
||||
|
||||
#if DMA_BUF_LOCK_DEBUG
|
||||
printk("dma_buf_lock_dolock : complete\n");
|
||||
pr_debug("%s : complete\n", __func__);
|
||||
#endif
|
||||
mutex_lock(&dma_buf_lock_mutex);
|
||||
kref_put(&resource->refcount, dma_buf_lock_dounlock);
|
||||
@@ -773,7 +764,7 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
|
||||
static void dma_buf_lock_dounlock(struct kref *ref)
|
||||
{
|
||||
int i;
|
||||
dma_buf_lock_resource *resource = container_of(ref, dma_buf_lock_resource, refcount);
|
||||
struct dma_buf_lock_resource *resource = container_of(ref, struct dma_buf_lock_resource, refcount);
|
||||
|
||||
atomic_set(&resource->locked, 0);
|
||||
|
||||
@@ -784,8 +775,7 @@ static void dma_buf_lock_dounlock(struct kref *ref)
|
||||
|
||||
list_del(&resource->link);
|
||||
|
||||
for (i = 0; i < resource->count; i++)
|
||||
{
|
||||
for (i = 0; i < resource->count; i++) {
|
||||
if (resource->dma_bufs[i])
|
||||
dma_buf_put(resource->dma_bufs[i]);
|
||||
}
|
||||
@@ -799,7 +789,7 @@ static int __init dma_buf_lock_init(void)
|
||||
{
|
||||
int err;
|
||||
#if DMA_BUF_LOCK_DEBUG
|
||||
printk("dma_buf_lock_init\n");
|
||||
pr_debug("%s\n", __func__);
|
||||
#endif
|
||||
err = alloc_chrdev_region(&dma_buf_lock_dev, 0, 1, dma_buf_lock_dev_name);
|
||||
|
||||
@@ -812,10 +802,8 @@ static int __init dma_buf_lock_init(void)
|
||||
dma_buf_lock_class = class_create(THIS_MODULE, dma_buf_lock_dev_name);
|
||||
if (IS_ERR(dma_buf_lock_class))
|
||||
err = PTR_ERR(dma_buf_lock_class);
|
||||
else
|
||||
{
|
||||
struct device *mdev;
|
||||
mdev = device_create(
|
||||
else {
|
||||
struct device *mdev = device_create(
|
||||
dma_buf_lock_class, NULL, dma_buf_lock_dev,
|
||||
NULL, "%s", dma_buf_lock_dev_name);
|
||||
if (!IS_ERR(mdev))
|
||||
@@ -830,7 +818,7 @@ static int __init dma_buf_lock_init(void)
|
||||
unregister_chrdev_region(dma_buf_lock_dev, 1);
|
||||
}
|
||||
#if DMA_BUF_LOCK_DEBUG
|
||||
printk("dma_buf_lock_init failed\n");
|
||||
pr_debug("%s failed\n", __func__);
|
||||
#endif
|
||||
return err;
|
||||
}
|
||||
@@ -838,25 +826,24 @@ static int __init dma_buf_lock_init(void)
|
||||
static void __exit dma_buf_lock_exit(void)
|
||||
{
|
||||
#if DMA_BUF_LOCK_DEBUG
|
||||
printk("dma_buf_lock_exit\n");
|
||||
pr_debug("%s\n", __func__);
|
||||
#endif
|
||||
|
||||
/* Unlock all outstanding references */
|
||||
while (1)
|
||||
{
|
||||
while (1) {
|
||||
struct dma_buf_lock_resource *resource;
|
||||
|
||||
mutex_lock(&dma_buf_lock_mutex);
|
||||
if (list_empty(&dma_buf_lock_resource_list))
|
||||
{
|
||||
if (list_empty(&dma_buf_lock_resource_list)) {
|
||||
mutex_unlock(&dma_buf_lock_mutex);
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
dma_buf_lock_resource *resource = list_entry(dma_buf_lock_resource_list.next,
|
||||
dma_buf_lock_resource, link);
|
||||
kref_put(&resource->refcount, dma_buf_lock_dounlock);
|
||||
mutex_unlock(&dma_buf_lock_mutex);
|
||||
}
|
||||
|
||||
resource = list_entry(dma_buf_lock_resource_list.next,
|
||||
struct dma_buf_lock_resource, link);
|
||||
|
||||
kref_put(&resource->refcount, dma_buf_lock_dounlock);
|
||||
mutex_unlock(&dma_buf_lock_mutex);
|
||||
}
|
||||
|
||||
device_destroy(dma_buf_lock_class, dma_buf_lock_dev);
|
||||
@@ -874,7 +861,7 @@ static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned lon
|
||||
static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
#endif
|
||||
{
|
||||
dma_buf_lock_k_request request;
|
||||
struct dma_buf_lock_k_request request;
|
||||
int size = _IOC_SIZE(cmd);
|
||||
|
||||
if (_IOC_TYPE(cmd) != DMA_BUF_LOCK_IOC_MAGIC)
|
||||
@@ -882,17 +869,16 @@ static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned i
|
||||
if ((_IOC_NR(cmd) < DMA_BUF_LOCK_IOC_MINNR) || (_IOC_NR(cmd) > DMA_BUF_LOCK_IOC_MAXNR))
|
||||
return -ENOTTY;
|
||||
|
||||
switch (cmd)
|
||||
{
|
||||
case DMA_BUF_LOCK_FUNC_LOCK_ASYNC:
|
||||
if (size != sizeof(dma_buf_lock_k_request))
|
||||
return -ENOTTY;
|
||||
if (copy_from_user(&request, (void __user *)arg, size))
|
||||
return -EFAULT;
|
||||
switch (cmd) {
|
||||
case DMA_BUF_LOCK_FUNC_LOCK_ASYNC:
|
||||
if (size != sizeof(request))
|
||||
return -ENOTTY;
|
||||
if (copy_from_user(&request, (void __user *)arg, size))
|
||||
return -EFAULT;
|
||||
#if DMA_BUF_LOCK_DEBUG
|
||||
printk("DMA_BUF_LOCK_FUNC_LOCK_ASYNC - %i\n", request.count);
|
||||
pr_debug("DMA_BUF_LOCK_FUNC_LOCK_ASYNC - %i\n", request.count);
|
||||
#endif
|
||||
return dma_buf_lock_dolock(&request);
|
||||
return dma_buf_lock_dolock(&request);
|
||||
}
|
||||
|
||||
return -ENOTTY;
|
||||
|
||||
@@ -22,23 +22,21 @@
|
||||
#ifndef _DMA_BUF_LOCK_H
|
||||
#define _DMA_BUF_LOCK_H
|
||||
|
||||
typedef enum dma_buf_lock_exclusive
|
||||
{
|
||||
enum dma_buf_lock_exclusive {
|
||||
DMA_BUF_LOCK_NONEXCLUSIVE = 0,
|
||||
DMA_BUF_LOCK_EXCLUSIVE = -1
|
||||
} dma_buf_lock_exclusive;
|
||||
};
|
||||
|
||||
typedef struct dma_buf_lock_k_request
|
||||
{
|
||||
struct dma_buf_lock_k_request {
|
||||
int count;
|
||||
int *list_of_dma_buf_fds;
|
||||
int timeout;
|
||||
dma_buf_lock_exclusive exclusive;
|
||||
} dma_buf_lock_k_request;
|
||||
enum dma_buf_lock_exclusive exclusive;
|
||||
};
|
||||
|
||||
#define DMA_BUF_LOCK_IOC_MAGIC '~'
|
||||
|
||||
#define DMA_BUF_LOCK_FUNC_LOCK_ASYNC _IOW(DMA_BUF_LOCK_IOC_MAGIC, 11, dma_buf_lock_k_request)
|
||||
#define DMA_BUF_LOCK_FUNC_LOCK_ASYNC _IOW(DMA_BUF_LOCK_IOC_MAGIC, 11, struct dma_buf_lock_k_request)
|
||||
|
||||
#define DMA_BUF_LOCK_IOC_MINNR 11
|
||||
#define DMA_BUF_LOCK_IOC_MAXNR 11
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -81,6 +81,7 @@ static int dma_buf_te_attach(struct dma_buf *buf, struct dma_buf_attachment *att
|
||||
#endif
|
||||
{
|
||||
struct dma_buf_te_alloc *alloc;
|
||||
|
||||
alloc = buf->priv;
|
||||
|
||||
if (alloc->fail_attach)
|
||||
@@ -95,6 +96,12 @@ static int dma_buf_te_attach(struct dma_buf *buf, struct dma_buf_attachment *att
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* dma_buf_te_detach - The detach callback function to release &attachment
|
||||
*
|
||||
* @buf: buffer for the &attachment
|
||||
* @attachment: attachment data to be released
|
||||
*/
|
||||
static void dma_buf_te_detach(struct dma_buf *buf, struct dma_buf_attachment *attachment)
|
||||
{
|
||||
struct dma_buf_te_alloc *alloc = buf->priv;
|
||||
@@ -199,6 +206,7 @@ static void dma_buf_te_release(struct dma_buf *buf)
|
||||
{
|
||||
size_t i;
|
||||
struct dma_buf_te_alloc *alloc;
|
||||
|
||||
alloc = buf->priv;
|
||||
/* no need for locking */
|
||||
|
||||
@@ -240,6 +248,7 @@ static int dma_buf_te_sync(struct dma_buf *dmabuf,
|
||||
list_for_each_entry(attachment, &dmabuf->attachments, node) {
|
||||
struct dma_buf_te_attachment *pa = attachment->priv;
|
||||
struct sg_table *sg = pa->sg;
|
||||
|
||||
if (!sg) {
|
||||
dev_dbg(te_device.this_device, "no mapping for device %s\n", dev_name(attachment->dev));
|
||||
continue;
|
||||
@@ -291,6 +300,7 @@ static void dma_buf_te_mmap_open(struct vm_area_struct *vma)
|
||||
{
|
||||
struct dma_buf *dma_buf;
|
||||
struct dma_buf_te_alloc *alloc;
|
||||
|
||||
dma_buf = vma->vm_private_data;
|
||||
alloc = dma_buf->priv;
|
||||
|
||||
@@ -303,6 +313,7 @@ static void dma_buf_te_mmap_close(struct vm_area_struct *vma)
|
||||
{
|
||||
struct dma_buf *dma_buf;
|
||||
struct dma_buf_te_alloc *alloc;
|
||||
|
||||
dma_buf = vma->vm_private_data;
|
||||
alloc = dma_buf->priv;
|
||||
|
||||
@@ -344,7 +355,7 @@ static vm_fault_t dma_buf_te_mmap_fault(struct vm_fault *vmf)
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct vm_operations_struct dma_buf_te_vm_ops = {
|
||||
static const struct vm_operations_struct dma_buf_te_vm_ops = {
|
||||
.open = dma_buf_te_mmap_open,
|
||||
.close = dma_buf_te_mmap_close,
|
||||
.fault = dma_buf_te_mmap_fault
|
||||
@@ -353,6 +364,7 @@ struct vm_operations_struct dma_buf_te_vm_ops = {
|
||||
static int dma_buf_te_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
|
||||
{
|
||||
struct dma_buf_te_alloc *alloc;
|
||||
|
||||
alloc = dmabuf->priv;
|
||||
|
||||
if (alloc->fail_mmap)
|
||||
@@ -398,7 +410,6 @@ static void dma_buf_te_kunmap(struct dma_buf *buf,
|
||||
return;
|
||||
|
||||
kunmap(alloc->pages[page_num]);
|
||||
return;
|
||||
}
|
||||
|
||||
static struct dma_buf_ops dma_buf_te_ops = {
|
||||
@@ -798,13 +809,14 @@ static const struct file_operations dma_buf_te_fops = {
|
||||
static int __init dma_buf_te_init(void)
|
||||
{
|
||||
int res;
|
||||
|
||||
te_device.minor = MISC_DYNAMIC_MINOR;
|
||||
te_device.name = "dma_buf_te";
|
||||
te_device.fops = &dma_buf_te_fops;
|
||||
|
||||
res = misc_register(&te_device);
|
||||
if (res) {
|
||||
printk(KERN_WARNING"Misc device registration failed of 'dma_buf_te'\n");
|
||||
pr_warn("Misc device registration failed of 'dma_buf_te'\n");
|
||||
return res;
|
||||
}
|
||||
te_device.this_device->coherent_dma_mask = DMA_BIT_MASK(32);
|
||||
|
||||
@@ -367,7 +367,7 @@ static vm_fault_t example_mgm_vmf_insert_pfn_prot(
|
||||
dev_dbg(data->dev,
|
||||
"%s(mgm_dev=%p, group_id=%d, vma=%p, addr=0x%lx, pfn=0x%lx, prot=0x%llx)\n",
|
||||
__func__, (void *)mgm_dev, group_id, (void *)vma, addr, pfn,
|
||||
(unsigned long long int) pgprot_val(prot));
|
||||
(unsigned long long) pgprot_val(prot));
|
||||
|
||||
if (WARN_ON(group_id < 0) ||
|
||||
WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS))
|
||||
|
||||
@@ -107,20 +107,20 @@ static void small_granularity_alloc(struct simple_pma_device *const epma_dev,
|
||||
alloc_pages_bitfield_size = ALLOC_PAGES_BITFIELD_ARR_SIZE(epma_dev->rmem_size);
|
||||
|
||||
WARN(alloc_bitfield_idx >= alloc_pages_bitfield_size,
|
||||
"%s: idx>bf_size: %zu %zu", __FUNCTION__,
|
||||
"%s: idx>bf_size: %zu %zu", __func__,
|
||||
alloc_bitfield_idx, alloc_pages_bitfield_size);
|
||||
|
||||
WARN((start_bit + (1 << order)) > PAGES_PER_BITFIELD_ELEM,
|
||||
"%s: start=%zu order=%zu ppbe=%zu",
|
||||
__FUNCTION__, start_bit, order, PAGES_PER_BITFIELD_ELEM);
|
||||
__func__, start_bit, order, PAGES_PER_BITFIELD_ELEM);
|
||||
|
||||
bitfield = &epma_dev->allocated_pages_bitfield_arr[alloc_bitfield_idx];
|
||||
|
||||
for (i = 0; i < (1 << order); i++) {
|
||||
/* Check the pages represented by this bit are actually free */
|
||||
WARN (*bitfield & (1ULL << (start_bit + i)),
|
||||
WARN(*bitfield & (1ULL << (start_bit + i)),
|
||||
"in %s: page not free: %zu %zu %.16llx %zu\n",
|
||||
__FUNCTION__, i, order, *bitfield, alloc_pages_bitfield_size);
|
||||
__func__, i, order, *bitfield, alloc_pages_bitfield_size);
|
||||
|
||||
/* Mark the pages as now allocated */
|
||||
*bitfield |= (1ULL << (start_bit + i));
|
||||
@@ -172,7 +172,7 @@ static void large_granularity_alloc(struct simple_pma_device *const epma_dev,
|
||||
*/
|
||||
WARN((start_alloc_bitfield_idx + order) >= ALLOC_PAGES_BITFIELD_ARR_SIZE(epma_dev->rmem_size),
|
||||
"%s: start=%zu order=%zu ms=%zu",
|
||||
__FUNCTION__, start_alloc_bitfield_idx, order, epma_dev->rmem_size);
|
||||
__func__, start_alloc_bitfield_idx, order, epma_dev->rmem_size);
|
||||
|
||||
for (i = 0; i < num_bitfield_elements_needed; i++) {
|
||||
u64 *bitfield = &epma_dev->allocated_pages_bitfield_arr[start_alloc_bitfield_idx + i];
|
||||
@@ -180,7 +180,7 @@ static void large_granularity_alloc(struct simple_pma_device *const epma_dev,
|
||||
/* We expect all pages that relate to this bitfield element to be free */
|
||||
WARN((*bitfield != 0),
|
||||
"in %s: pages not free: i=%zu o=%zu bf=%.16llx\n",
|
||||
__FUNCTION__, i, order, *bitfield);
|
||||
__func__, i, order, *bitfield);
|
||||
|
||||
/* Mark all the pages for this element as not free */
|
||||
*bitfield = ~0ULL;
|
||||
@@ -318,9 +318,7 @@ static struct protected_memory_allocation *simple_pma_alloc_page(
|
||||
spin_unlock(&epma_dev->rmem_lock);
|
||||
return pma;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
count = 0;
|
||||
}
|
||||
}
|
||||
@@ -402,11 +400,10 @@ static void simple_pma_free_page(
|
||||
|
||||
/* Clear the bits for the pages we're now freeing */
|
||||
*bitfield &= ~(((1ULL << num_pages_in_allocation) - 1) << bitfield_start_bit);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
WARN(page_num % PAGES_PER_BITFIELD_ELEM,
|
||||
"%s: Expecting allocs of order >= %d to be %zu-page aligned\n",
|
||||
__FUNCTION__, ORDER_OF_PAGES_PER_BITFIELD_ELEM, PAGES_PER_BITFIELD_ELEM);
|
||||
__func__, ORDER_OF_PAGES_PER_BITFIELD_ELEM, PAGES_PER_BITFIELD_ELEM);
|
||||
|
||||
for (i = 0; i < num_bitfield_elems_used_by_alloc; i++) {
|
||||
bitfield = &epma_dev->allocated_pages_bitfield_arr[bitfield_idx + i];
|
||||
@@ -414,7 +411,7 @@ static void simple_pma_free_page(
|
||||
/* We expect all bits to be set (all pages allocated) */
|
||||
WARN((*bitfield != ~0),
|
||||
"%s: alloc being freed is not fully allocated: of=%zu np=%zu bf=%.16llx\n",
|
||||
__FUNCTION__, offset, num_pages_in_allocation, *bitfield);
|
||||
__func__, offset, num_pages_in_allocation, *bitfield);
|
||||
|
||||
/*
|
||||
* Now clear all the bits in the bitfield element to mark all the pages
|
||||
|
||||
@@ -71,7 +71,7 @@ endif
|
||||
#
|
||||
|
||||
# Driver version string which is returned to userspace via an ioctl
|
||||
MALI_RELEASE_NAME ?= '"g10p0-01eac0"'
|
||||
MALI_RELEASE_NAME ?= '"g11p0-01eac0"'
|
||||
# Set up defaults if not defined by build system
|
||||
ifeq ($(CONFIG_MALI_BIFROST_DEBUG), y)
|
||||
MALI_UNIT_TEST = 1
|
||||
@@ -164,6 +164,7 @@ bifrost_kbase-y := \
|
||||
mali_kbase_hwcnt_gpu_narrow.o \
|
||||
mali_kbase_hwcnt_types.o \
|
||||
mali_kbase_hwcnt_virtualizer.o \
|
||||
mali_kbase_hwcnt_watchdog_if_timer.o \
|
||||
mali_kbase_softjobs.o \
|
||||
mali_kbase_hw.o \
|
||||
mali_kbase_debug.o \
|
||||
@@ -201,12 +202,12 @@ bifrost_kbase-$(CONFIG_SYNC_FILE) += \
|
||||
ifeq ($(CONFIG_MALI_CSF_SUPPORT),y)
|
||||
bifrost_kbase-y += \
|
||||
mali_kbase_hwcnt_backend_csf.o \
|
||||
mali_kbase_hwcnt_watchdog_if_timer.o \
|
||||
mali_kbase_hwcnt_backend_csf_if_fw.o
|
||||
else
|
||||
bifrost_kbase-y += \
|
||||
mali_kbase_jm.o \
|
||||
mali_kbase_hwcnt_backend_jm.o \
|
||||
mali_kbase_hwcnt_backend_jm_watchdog.o \
|
||||
mali_kbase_dummy_job_wa.o \
|
||||
mali_kbase_debug_job_fault.o \
|
||||
mali_kbase_event.o \
|
||||
|
||||
@@ -47,6 +47,14 @@ config MALI_REAL_HW
|
||||
default y
|
||||
default n if NO_MALI
|
||||
|
||||
config MALI_PLATFORM_DT_PIN_RST
|
||||
bool "Enable Juno GPU Pin reset"
|
||||
depends on MALI_BIFROST
|
||||
default n
|
||||
default y if BUSLOG
|
||||
help
|
||||
Enables support for GPUs pin reset on Juno platforms.
|
||||
|
||||
config MALI_CSF_SUPPORT
|
||||
bool "Enable Mali CSF based GPU support"
|
||||
depends on MALI_BIFROST
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -20,7 +20,7 @@
|
||||
*/
|
||||
|
||||
/**
|
||||
* Mali structures define to support arbitration feature
|
||||
* DOC: Mali structures define to support arbitration feature
|
||||
*/
|
||||
|
||||
#ifndef _MALI_KBASE_ARBITER_DEFS_H_
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -20,14 +20,14 @@
|
||||
*/
|
||||
|
||||
/**
|
||||
* Defines the Mali arbiter interface
|
||||
* DOC: Defines the Mali arbiter interface
|
||||
*/
|
||||
|
||||
#ifndef _MALI_KBASE_ARBITER_INTERFACE_H_
|
||||
#define _MALI_KBASE_ARBITER_INTERFACE_H_
|
||||
|
||||
/**
|
||||
* Mali arbiter interface version
|
||||
* DOC: Mali arbiter interface version
|
||||
*
|
||||
* This specifies the current version of the configuration interface. Whenever
|
||||
* the arbiter interface changes, so that integration effort is required, the
|
||||
@@ -44,7 +44,7 @@
|
||||
#define MALI_KBASE_ARBITER_INTERFACE_VERSION 5
|
||||
|
||||
/**
|
||||
* NO_FREQ is used in case platform doesn't support reporting frequency
|
||||
* DOC: NO_FREQ is used in case platform doesn't support reporting frequency
|
||||
*/
|
||||
#define NO_FREQ 0
|
||||
|
||||
@@ -53,14 +53,6 @@ struct arbiter_if_dev;
|
||||
/**
|
||||
* struct arbiter_if_arb_vm_ops - Interface to communicate messages to VM
|
||||
*
|
||||
* This struct contains callbacks used to deliver messages
|
||||
* from the arbiter to the corresponding VM.
|
||||
*
|
||||
* Note that calls into these callbacks may have synchronous calls back into
|
||||
* the arbiter arbiter_if_vm_arb_ops callbacks below.
|
||||
* For example vm_arb_gpu_stopped() may be called as a side effect of
|
||||
* arb_vm_gpu_stop() being called here.
|
||||
*
|
||||
* @arb_vm_gpu_stop: Callback to ask VM to stop using GPU.
|
||||
* dev: The arbif kernel module device.
|
||||
*
|
||||
@@ -94,6 +86,13 @@ struct arbiter_if_dev;
|
||||
* freq: GPU clock frequency value reported from arbiter
|
||||
*
|
||||
* Informs KBase that the GPU clock frequency has been updated.
|
||||
*
|
||||
* This struct contains callbacks used to deliver messages
|
||||
* from the arbiter to the corresponding VM.
|
||||
* Note that calls into these callbacks may have synchronous calls back into
|
||||
* the arbiter arbiter_if_vm_arb_ops callbacks below.
|
||||
* For example vm_arb_gpu_stopped() may be called as a side effect of
|
||||
* arb_vm_gpu_stop() being called here.
|
||||
*/
|
||||
struct arbiter_if_arb_vm_ops {
|
||||
void (*arb_vm_gpu_stop)(struct device *dev);
|
||||
@@ -107,12 +106,6 @@ struct arbiter_if_arb_vm_ops {
|
||||
/**
|
||||
* struct arbiter_if_vm_arb_ops - Interface to communicate messages to arbiter
|
||||
*
|
||||
* This struct contains callbacks used to request operations
|
||||
* from the VM to the arbiter
|
||||
*
|
||||
* Note that we must not make any synchronous calls back in to the VM
|
||||
* (via arbiter_if_arb_vm_ops above) in the context of these callbacks.
|
||||
*
|
||||
* @vm_arb_register_dev: Callback to register VM device driver callbacks.
|
||||
* arbif_dev: The arbiter interface to register
|
||||
* with for device callbacks
|
||||
@@ -142,6 +135,11 @@ struct arbiter_if_arb_vm_ops {
|
||||
* using the GPU
|
||||
* arbif_dev: The arbiter interface device to notify.
|
||||
* gpu_required: The GPU is still needed to do more work.
|
||||
*
|
||||
* This struct contains callbacks used to request operations
|
||||
* from the VM to the arbiter.
|
||||
* Note that we must not make any synchronous calls back in to the VM
|
||||
* (via arbiter_if_arb_vm_ops above) in the context of these callbacks.
|
||||
*/
|
||||
struct arbiter_if_vm_arb_ops {
|
||||
int (*vm_arb_register_dev)(struct arbiter_if_dev *arbif_dev,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -20,7 +20,7 @@
|
||||
*/
|
||||
|
||||
/**
|
||||
* Mali arbiter power manager state machine and APIs
|
||||
* DOC: Mali arbiter power manager state machine and APIs
|
||||
*/
|
||||
|
||||
#include <mali_kbase.h>
|
||||
@@ -394,6 +394,8 @@ void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev)
|
||||
* @kbdev: The kbase device structure for the device (must be a valid pointer)
|
||||
*
|
||||
* Install interrupts and set the interrupt_install flag to true.
|
||||
*
|
||||
* Return: 0 if success, or a Linux error code
|
||||
*/
|
||||
int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev)
|
||||
{
|
||||
@@ -619,18 +621,6 @@ static void kbase_arbiter_pm_vm_gpu_stop(struct kbase_device *kbdev)
|
||||
case KBASE_VM_STATE_SUSPEND_PENDING:
|
||||
/* Suspend finishes with a stop so nothing else to do */
|
||||
break;
|
||||
case KBASE_VM_STATE_INITIALIZING:
|
||||
case KBASE_VM_STATE_STOPPED_GPU_REQUESTED:
|
||||
/*
|
||||
* Case stop() is received when in a GPU REQUESTED state, it
|
||||
* means that the granted() was missed so the GPU needs to be
|
||||
* requested again.
|
||||
*/
|
||||
dev_dbg(kbdev->dev,
|
||||
"GPU stop while already stopped with GPU requested");
|
||||
kbase_arbif_gpu_stopped(kbdev, true);
|
||||
start_request_timer(kbdev);
|
||||
break;
|
||||
default:
|
||||
dev_warn(kbdev->dev, "GPU_STOP when not expected - state %s\n",
|
||||
kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state));
|
||||
@@ -668,19 +658,8 @@ static void kbase_gpu_lost(struct kbase_device *kbdev)
|
||||
break;
|
||||
case KBASE_VM_STATE_SUSPENDED:
|
||||
case KBASE_VM_STATE_STOPPED:
|
||||
dev_dbg(kbdev->dev, "GPU lost while already stopped");
|
||||
break;
|
||||
case KBASE_VM_STATE_INITIALIZING:
|
||||
case KBASE_VM_STATE_STOPPED_GPU_REQUESTED:
|
||||
/*
|
||||
* Case lost() is received when in a GPU REQUESTED state, it
|
||||
* means that the granted() and stop() were missed so the GPU
|
||||
* needs to be requested again. Very unlikely to happen.
|
||||
*/
|
||||
dev_dbg(kbdev->dev,
|
||||
"GPU lost while already stopped with GPU requested");
|
||||
kbase_arbif_gpu_request(kbdev);
|
||||
start_request_timer(kbdev);
|
||||
dev_dbg(kbdev->dev, "GPU lost while already stopped");
|
||||
break;
|
||||
case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT:
|
||||
dev_dbg(kbdev->dev, "GPU lost while waiting to suspend");
|
||||
@@ -947,6 +926,8 @@ static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev)
|
||||
* @kbdev: The kbase device structure for the device (must be a valid pointer)
|
||||
*
|
||||
* Checks if the virtual machine holds VM state lock.
|
||||
*
|
||||
* Return: true if GPU is assigned, else false.
|
||||
*/
|
||||
static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld(
|
||||
struct kbase_device *kbdev)
|
||||
@@ -1067,14 +1048,14 @@ void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq,
|
||||
}
|
||||
|
||||
/**
|
||||
* enumerate_arb_gpu_clk() - Enumerate a GPU clock on the given index
|
||||
* get_arb_gpu_clk() - Enumerate a GPU clock on the given index
|
||||
* @kbdev: kbase_device pointer
|
||||
* @index: GPU clock index
|
||||
*
|
||||
* Returns pointer to structure holding GPU clock frequency data reported from
|
||||
* Return: Pointer to structure holding GPU clock frequency data reported from
|
||||
* arbiter, only index 0 is valid.
|
||||
*/
|
||||
static void *enumerate_arb_gpu_clk(struct kbase_device *kbdev,
|
||||
static void *get_arb_gpu_clk(struct kbase_device *kbdev,
|
||||
unsigned int index)
|
||||
{
|
||||
if (index == 0)
|
||||
@@ -1084,10 +1065,10 @@ static void *enumerate_arb_gpu_clk(struct kbase_device *kbdev,
|
||||
|
||||
/**
|
||||
* get_arb_gpu_clk_rate() - Get the current rate of GPU clock frequency value
|
||||
* @kbdev: kbase_device pointer
|
||||
* @index: GPU clock index
|
||||
* @kbdev: kbase_device pointer
|
||||
* @gpu_clk_handle: Handle unique to the enumerated GPU clock
|
||||
*
|
||||
* Returns the GPU clock frequency value saved when gpu is granted from arbiter
|
||||
* Return: The GPU clock frequency value saved when gpu is granted from arbiter
|
||||
*/
|
||||
static unsigned long get_arb_gpu_clk_rate(struct kbase_device *kbdev,
|
||||
void *gpu_clk_handle)
|
||||
@@ -1109,10 +1090,10 @@ static unsigned long get_arb_gpu_clk_rate(struct kbase_device *kbdev,
|
||||
* @gpu_clk_handle: Handle unique to the enumerated GPU clock
|
||||
* @nb: notifier block containing the callback function pointer
|
||||
*
|
||||
* Returns 0 on success, negative error code otherwise.
|
||||
*
|
||||
* This function registers a callback function that is invoked whenever the
|
||||
* frequency of the clock corresponding to @gpu_clk_handle changes.
|
||||
*
|
||||
* Return: 0 on success, negative error code otherwise.
|
||||
*/
|
||||
static int arb_gpu_clk_notifier_register(struct kbase_device *kbdev,
|
||||
void *gpu_clk_handle, struct notifier_block *nb)
|
||||
@@ -1154,7 +1135,7 @@ static void arb_gpu_clk_notifier_unregister(struct kbase_device *kbdev,
|
||||
|
||||
struct kbase_clk_rate_trace_op_conf arb_clk_rate_trace_ops = {
|
||||
.get_gpu_clk_rate = get_arb_gpu_clk_rate,
|
||||
.enumerate_gpu_clk = enumerate_arb_gpu_clk,
|
||||
.enumerate_gpu_clk = get_arb_gpu_clk,
|
||||
.gpu_clk_notifier_register = arb_gpu_clk_notifier_register,
|
||||
.gpu_clk_notifier_unregister = arb_gpu_clk_notifier_unregister
|
||||
};
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -20,7 +20,7 @@
|
||||
*/
|
||||
|
||||
/**
|
||||
* Mali arbiter power manager state machine and APIs
|
||||
* DOC: Mali arbiter power manager state machine and APIs
|
||||
*/
|
||||
|
||||
#ifndef _MALI_KBASE_ARBITER_PM_H_
|
||||
@@ -101,6 +101,8 @@ void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev);
|
||||
* @kbdev: The kbase device structure for the device (must be a valid pointer)
|
||||
*
|
||||
* Install interrupts and set the interrupt_install flag to true.
|
||||
*
|
||||
* Return: 0 if success, or a Linux error code
|
||||
*/
|
||||
int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev);
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev,
|
||||
struct kbase_gpuprops_regdump *regdump)
|
||||
{
|
||||
int i;
|
||||
struct kbase_gpuprops_regdump registers;
|
||||
struct kbase_gpuprops_regdump registers = { 0 };
|
||||
|
||||
/* Fill regdump with the content of the relevant registers */
|
||||
registers.gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID));
|
||||
|
||||
@@ -421,12 +421,12 @@ int kbase_instr_backend_init(struct kbase_device *kbdev)
|
||||
#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS
|
||||
/* Use the build time option for the override default. */
|
||||
#if defined(CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY)
|
||||
kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_SET_SECONDARY;
|
||||
kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_PHYSICAL_SET_SECONDARY;
|
||||
#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY)
|
||||
kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_SET_TERTIARY;
|
||||
kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_PHYSICAL_SET_TERTIARY;
|
||||
#else
|
||||
/* Default to primary */
|
||||
kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_SET_PRIMARY;
|
||||
kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_PHYSICAL_SET_PRIMARY;
|
||||
#endif
|
||||
#endif
|
||||
return 0;
|
||||
@@ -446,8 +446,8 @@ void kbase_instr_backend_debugfs_init(struct kbase_device *kbdev)
|
||||
*
|
||||
* Valid inputs are the values accepted bythe SET_SELECT bits of the
|
||||
* PRFCNT_CONFIG register as defined in the architecture specification.
|
||||
*/
|
||||
debugfs_create_u8("hwcnt_set_select", S_IRUGO | S_IWUSR,
|
||||
*/
|
||||
debugfs_create_u8("hwcnt_set_select", 0644,
|
||||
kbdev->mali_debugfs_directory,
|
||||
(u8 *)&kbdev->hwcnt.backend.override_counter_set);
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -48,6 +48,7 @@ struct rb_entry {
|
||||
/**
|
||||
* SLOT_RB_TAG_KCTX() - a function-like macro for converting a pointer to a
|
||||
* u64 for serving as tagged value.
|
||||
* @kctx: Pointer to kbase context.
|
||||
*/
|
||||
#define SLOT_RB_TAG_KCTX(kctx) (u64)((uintptr_t)(kctx))
|
||||
/**
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -425,6 +425,8 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
|
||||
JOB_SLOT_REG(i, JS_STATUS));
|
||||
|
||||
if (completion_code == BASE_JD_EVENT_STOPPED) {
|
||||
u64 job_head;
|
||||
|
||||
KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(
|
||||
kbdev, NULL,
|
||||
i, 0, TL_JS_EVENT_SOFT_STOP);
|
||||
@@ -441,6 +443,21 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
|
||||
((u64)kbase_reg_read(kbdev,
|
||||
JOB_SLOT_REG(i, JS_TAIL_HI))
|
||||
<< 32);
|
||||
job_head = (u64)kbase_reg_read(kbdev,
|
||||
JOB_SLOT_REG(i, JS_HEAD_LO)) |
|
||||
((u64)kbase_reg_read(kbdev,
|
||||
JOB_SLOT_REG(i, JS_HEAD_HI))
|
||||
<< 32);
|
||||
/* For a soft-stopped job chain js_tail should
|
||||
* same as the js_head, but if not then the
|
||||
* job chain was incorrectly marked as
|
||||
* soft-stopped. In such case we should not
|
||||
* be resuming the job chain from js_tail and
|
||||
* report the completion_code as UNKNOWN.
|
||||
*/
|
||||
if (job_tail != job_head)
|
||||
completion_code = BASE_JD_EVENT_UNKNOWN;
|
||||
|
||||
} else if (completion_code ==
|
||||
BASE_JD_EVENT_NOT_STARTED) {
|
||||
/* PRLAM-10673 can cause a TERMINATED
|
||||
@@ -922,33 +939,12 @@ void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
|
||||
JS_COMMAND_SOFT_STOP | sw_flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* kbase_job_slot_softstop - Soft-stop the specified job slot
|
||||
* @kbdev: The kbase device
|
||||
* @js: The job slot to soft-stop
|
||||
* @target_katom: The job that should be soft-stopped (or NULL for any job)
|
||||
* Context:
|
||||
* The job slot lock must be held when calling this function.
|
||||
* The job slot must not already be in the process of being soft-stopped.
|
||||
*
|
||||
* Where possible any job in the next register is evicted before the soft-stop.
|
||||
*/
|
||||
void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
|
||||
struct kbase_jd_atom *target_katom)
|
||||
{
|
||||
kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u);
|
||||
}
|
||||
|
||||
/**
|
||||
* kbase_job_slot_hardstop - Hard-stop the specified job slot
|
||||
* @kctx: The kbase context that contains the job(s) that should
|
||||
* be hard-stopped
|
||||
* @js: The job slot to hard-stop
|
||||
* @target_katom: The job that should be hard-stopped (or NULL for all
|
||||
* jobs from the context)
|
||||
* Context:
|
||||
* The job slot lock must be held when calling this function.
|
||||
*/
|
||||
void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
|
||||
struct kbase_jd_atom *target_katom)
|
||||
{
|
||||
@@ -961,26 +957,6 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
|
||||
CSTD_UNUSED(stopped);
|
||||
}
|
||||
|
||||
/**
|
||||
* kbase_job_check_enter_disjoint - potentiall enter disjoint mode
|
||||
* @kbdev: kbase device
|
||||
* @action: the event which has occurred
|
||||
* @core_reqs: core requirements of the atom
|
||||
* @target_katom: the atom which is being affected
|
||||
*
|
||||
* For a certain soft-stop action, work out whether to enter disjoint
|
||||
* state.
|
||||
*
|
||||
* This does not register multiple disjoint events if the atom has already
|
||||
* started a disjoint period
|
||||
*
|
||||
* @core_reqs can be supplied as 0 if the atom had not started on the hardware
|
||||
* (and so a 'real' soft/hard-stop was not required, but it still interrupted
|
||||
* flow, perhaps on another context)
|
||||
*
|
||||
* kbase_job_check_leave_disjoint() should be used to end the disjoint
|
||||
* state when the soft/hard-stop action is complete
|
||||
*/
|
||||
void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
|
||||
base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom)
|
||||
{
|
||||
@@ -1002,14 +978,6 @@ void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
|
||||
kbase_disjoint_state_up(kbdev);
|
||||
}
|
||||
|
||||
/**
|
||||
* kbase_job_check_enter_disjoint - potentially leave disjoint state
|
||||
* @kbdev: kbase device
|
||||
* @target_katom: atom which is finishing
|
||||
*
|
||||
* Work out whether to leave disjoint state when finishing an atom that was
|
||||
* originated by kbase_job_check_enter_disjoint().
|
||||
*/
|
||||
void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
|
||||
struct kbase_jd_atom *target_katom)
|
||||
{
|
||||
@@ -1340,8 +1308,7 @@ static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev)
|
||||
* This function soft-stops all the slots to ensure that as many jobs as
|
||||
* possible are saved.
|
||||
*
|
||||
* Return:
|
||||
* The function returns a boolean which should be interpreted as follows:
|
||||
* Return: boolean which should be interpreted as follows:
|
||||
* true - Prepared for reset, kbase_reset_gpu_locked should be called.
|
||||
* false - Another thread is performing a reset, kbase_reset_gpu should
|
||||
* not be called.
|
||||
@@ -1518,9 +1485,9 @@ static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev,
|
||||
#ifdef CONFIG_MALI_BIFROST_DEBUG
|
||||
dev_dbg(kbdev->dev,
|
||||
"Limiting affinity due to BASE_JD_REQ_LIMITED_CORE_MASK from 0x%lx to 0x%lx (mask is 0x%lx)\n",
|
||||
(unsigned long int)affinity,
|
||||
(unsigned long int)result,
|
||||
(unsigned long int)limited_core_mask);
|
||||
(unsigned long)affinity,
|
||||
(unsigned long)result,
|
||||
(unsigned long)limited_core_mask);
|
||||
#else
|
||||
CSTD_UNUSED(kbdev);
|
||||
#endif
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -37,14 +37,23 @@
|
||||
#include <backend/gpu/mali_kbase_jm_internal.h>
|
||||
#include <backend/gpu/mali_kbase_pm_internal.h>
|
||||
|
||||
/* Return whether the specified ringbuffer is empty. HW access lock must be
|
||||
* held
|
||||
/**
|
||||
* SLOT_RB_EMPTY - Return whether the specified ringbuffer is empty.
|
||||
*
|
||||
* @rb: ring buffer
|
||||
*
|
||||
* Note: HW access lock must be held
|
||||
*/
|
||||
#define SLOT_RB_EMPTY(rb) (rb->write_idx == rb->read_idx)
|
||||
/* Return number of atoms currently in the specified ringbuffer. HW access lock
|
||||
* must be held
|
||||
|
||||
/**
|
||||
* SLOT_RB_ENTRIES - Return number of atoms currently in the specified ringbuffer.
|
||||
*
|
||||
* @rb: ring buffer
|
||||
*
|
||||
* Note: HW access lock must be held
|
||||
*/
|
||||
#define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx)
|
||||
#define SLOT_RB_ENTRIES(rb) ((int)(s8)(rb->write_idx - rb->read_idx))
|
||||
|
||||
static void kbase_gpu_release_atom(struct kbase_device *kbdev,
|
||||
struct kbase_jd_atom *katom,
|
||||
@@ -304,10 +313,10 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev,
|
||||
[katom->slot_nr]);
|
||||
|
||||
/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
|
||||
|
||||
fallthrough;
|
||||
case KBASE_ATOM_GPU_RB_READY:
|
||||
/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
|
||||
|
||||
fallthrough;
|
||||
case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
|
||||
break;
|
||||
|
||||
@@ -367,13 +376,13 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev,
|
||||
}
|
||||
|
||||
/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
|
||||
|
||||
fallthrough;
|
||||
case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV:
|
||||
/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
|
||||
|
||||
fallthrough;
|
||||
case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
|
||||
/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
|
||||
|
||||
fallthrough;
|
||||
case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
|
||||
break;
|
||||
}
|
||||
@@ -1813,7 +1822,7 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev)
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n");
|
||||
dev_info(kbdev->dev, "%s:\n", __func__);
|
||||
|
||||
for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
|
||||
int idx;
|
||||
|
||||
@@ -62,7 +62,7 @@ void kbase_backend_timer_suspend(struct kbase_device *kbdev);
|
||||
* scheduling timer
|
||||
* @kbdev: Device pointer
|
||||
*
|
||||
* This function should be called on resume. Note that is is not guaranteed to
|
||||
* This function should be called on resume. Note that is not guaranteed to
|
||||
* re-start the timer, only evalute whether it should be re-started.
|
||||
*
|
||||
* Caller must hold runpool_mutex.
|
||||
|
||||
@@ -121,9 +121,9 @@ int kbase_set_mmu_quirks(struct kbase_device *kbdev)
|
||||
|
||||
if (kbdev->system_coherency == COHERENCY_ACE) {
|
||||
/* Allow memory configuration disparity to be ignored,
|
||||
* we optimize the use of shared memory and thus we
|
||||
* expect some disparity in the memory configuration.
|
||||
*/
|
||||
* we optimize the use of shared memory and thus we
|
||||
* expect some disparity in the memory configuration.
|
||||
*/
|
||||
kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY;
|
||||
}
|
||||
|
||||
|
||||
@@ -1470,9 +1470,8 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
|
||||
pr_debug("JS_IRQ_MASK being read %x", *value);
|
||||
}
|
||||
#else /* !MALI_USE_CSF */
|
||||
else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) {
|
||||
/* ignore JOB_IRQ_MASK as it is handled by CSFFW */
|
||||
}
|
||||
else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK))
|
||||
; /* ignore JOB_IRQ_MASK as it is handled by CSFFW */
|
||||
#endif /* !MALI_USE_CSF */
|
||||
else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) {
|
||||
*value = (dummy->reset_completed_mask << 8) |
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2014-2015, 2017-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -143,7 +143,6 @@ void midgard_model_destroy(void *h);
|
||||
u8 midgard_model_write_reg(void *h, u32 addr, u32 value);
|
||||
u8 midgard_model_read_reg(void *h, u32 addr,
|
||||
u32 * const value);
|
||||
void gpu_generate_error(void);
|
||||
void midgard_set_error(int job_slot);
|
||||
int job_atom_inject_error(struct kbase_error_params *params);
|
||||
int gpu_model_control(void *h,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2014-2015, 2018-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -39,7 +39,11 @@ unsigned int error_probability = 50; /* to be set between 0 and 100 */
|
||||
/* probability to have multiple error give that there is an error */
|
||||
unsigned int multiple_error_probability = 50;
|
||||
|
||||
void gpu_generate_error(void)
|
||||
#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM
|
||||
/**
|
||||
* gpu_generate_error - Generate GPU error
|
||||
*/
|
||||
static void gpu_generate_error(void)
|
||||
{
|
||||
unsigned int errors_num = 0;
|
||||
|
||||
@@ -94,6 +98,7 @@ void gpu_generate_error(void)
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
int job_atom_inject_error(struct kbase_error_params *params)
|
||||
{
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2010, 2012-2015, 2017-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2010, 2012-2015, 2017-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -135,8 +135,12 @@ void gpu_device_raise_irq(void *model,
|
||||
default:
|
||||
dev_warn(kbdev->dev, "Unknown IRQ");
|
||||
kmem_cache_free(kbdev->irq_slab, data);
|
||||
data = NULL;
|
||||
break;
|
||||
}
|
||||
queue_work(kbdev->irq_workq, &data->work);
|
||||
|
||||
if (data != NULL)
|
||||
queue_work(kbdev->irq_workq, &data->work);
|
||||
}
|
||||
|
||||
void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value)
|
||||
@@ -248,6 +252,11 @@ int kbase_gpu_device_create(struct kbase_device *kbdev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* kbase_gpu_device_destroy - Destroy GPU device
|
||||
*
|
||||
* @kbdev: kbase device
|
||||
*/
|
||||
void kbase_gpu_device_destroy(struct kbase_device *kbdev)
|
||||
{
|
||||
midgard_model_destroy(kbdev->model);
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2010-2015, 2018-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2010-2015, 2018-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -41,6 +41,11 @@ static void always_on_init(struct kbase_device *kbdev)
|
||||
CSTD_UNUSED(kbdev);
|
||||
}
|
||||
|
||||
/**
|
||||
* always_on_term - Term callback function for always-on power policy
|
||||
*
|
||||
* @kbdev: kbase device
|
||||
*/
|
||||
static void always_on_term(struct kbase_device *kbdev)
|
||||
{
|
||||
CSTD_UNUSED(kbdev);
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -101,9 +101,8 @@ int kbase_pm_runtime_init(struct kbase_device *kbdev)
|
||||
|
||||
void kbase_pm_runtime_term(struct kbase_device *kbdev)
|
||||
{
|
||||
if (kbdev->pm.callback_power_runtime_term) {
|
||||
if (kbdev->pm.callback_power_runtime_term)
|
||||
kbdev->pm.callback_power_runtime_term(kbdev);
|
||||
}
|
||||
}
|
||||
|
||||
void kbase_pm_register_access_enable(struct kbase_device *kbdev)
|
||||
@@ -202,6 +201,13 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
|
||||
kbase_pm_hwcnt_disable_worker);
|
||||
kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
|
||||
|
||||
#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
|
||||
kbdev->pm.backend.gpu_sleep_supported =
|
||||
kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_GPU_SLEEP) &&
|
||||
!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TURSEHW_1997) &&
|
||||
kbdev->pm.backend.callback_power_runtime_gpu_active &&
|
||||
kbdev->pm.backend.callback_power_runtime_gpu_idle;
|
||||
#endif
|
||||
|
||||
if (IS_ENABLED(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED)) {
|
||||
kbdev->pm.backend.l2_always_on = false;
|
||||
@@ -288,7 +294,7 @@ static void pm_handle_power_off(struct kbase_device *kbdev)
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
|
||||
if (kbdev->pm.backend.gpu_wakeup_override ) {
|
||||
if (kbdev->pm.backend.gpu_wakeup_override) {
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
return;
|
||||
}
|
||||
@@ -362,11 +368,6 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data)
|
||||
|
||||
kbase_pm_lock(kbdev);
|
||||
|
||||
#ifdef CONFIG_MALI_ARBITER_SUPPORT
|
||||
if (kbase_pm_is_gpu_lost(kbdev))
|
||||
backend->poweron_required = false;
|
||||
#endif
|
||||
|
||||
pm_handle_power_off(kbdev);
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
@@ -683,6 +684,13 @@ void kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev)
|
||||
}
|
||||
KBASE_EXPORT_TEST_API(kbase_pm_wait_for_poweroff_work_complete);
|
||||
|
||||
/**
|
||||
* is_gpu_powered_down - Check whether GPU is powered down
|
||||
*
|
||||
* @kbdev: kbase device
|
||||
*
|
||||
* Return: true if GPU is powered down, false otherwise
|
||||
*/
|
||||
static bool is_gpu_powered_down(struct kbase_device *kbdev)
|
||||
{
|
||||
bool ret;
|
||||
@@ -882,7 +890,7 @@ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
|
||||
lockdep_assert_held(&kbdev->pm.lock);
|
||||
|
||||
if (kbase_dummy_job_wa_enabled(kbdev)) {
|
||||
dev_warn(kbdev->dev, "Change of core mask not supported for slot 0 as dummy job WA is enabled");
|
||||
dev_warn_once(kbdev->dev, "Change of core mask not supported for slot 0 as dummy job WA is enabled");
|
||||
new_core_mask_js0 = kbdev->pm.debug_core_mask[0];
|
||||
}
|
||||
|
||||
|
||||
@@ -55,6 +55,9 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
|
||||
{
|
||||
struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend;
|
||||
unsigned long flags;
|
||||
#if MALI_USE_CSF
|
||||
u64 old_core_mask = 0;
|
||||
#endif
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
@@ -65,6 +68,8 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
|
||||
core_mask, kbdev->pm.debug_core_mask);
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
old_core_mask = pm_backend->ca_cores_enabled;
|
||||
#else
|
||||
if (!(core_mask & kbdev->pm.debug_core_mask_all)) {
|
||||
dev_err(kbdev->dev, "OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n",
|
||||
@@ -73,20 +78,53 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
|
||||
}
|
||||
|
||||
if (kbase_dummy_job_wa_enabled(kbdev)) {
|
||||
dev_err(kbdev->dev, "Dynamic core scaling not supported as dummy job WA is enabled");
|
||||
dev_err_once(kbdev->dev, "Dynamic core scaling not supported as dummy job WA is enabled");
|
||||
goto unlock;
|
||||
}
|
||||
#endif /* MALI_USE_CSF */
|
||||
|
||||
pm_backend->ca_cores_enabled = core_mask;
|
||||
|
||||
kbase_pm_update_state(kbdev);
|
||||
|
||||
unlock:
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
#if MALI_USE_CSF
|
||||
/* Check if old_core_mask contained the undesired cores and wait
|
||||
* for those cores to get powered down
|
||||
*/
|
||||
if ((core_mask & old_core_mask) != old_core_mask) {
|
||||
bool can_wait;
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
can_wait = kbdev->pm.backend.gpu_ready && kbase_pm_is_mcu_desired(kbdev);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
/* This check is ideally not required, the wait function can
|
||||
* deal with the GPU power down. But it has been added to
|
||||
* address the scenario where down-scaling request comes from
|
||||
* the platform specific code soon after the GPU power down
|
||||
* and at the time same time application thread tries to
|
||||
* power up the GPU (on the flush of GPU queue).
|
||||
* The platform specific @ref callback_power_on that gets
|
||||
* invoked on power up does not return until down-scaling
|
||||
* request is complete. The check mitigates the race caused by
|
||||
* the problem in platform specific code.
|
||||
*/
|
||||
if (likely(can_wait)) {
|
||||
if (kbase_pm_wait_for_desired_state(kbdev)) {
|
||||
dev_warn(kbdev->dev,
|
||||
"Wait for update of core_mask from %llx to %llx failed",
|
||||
old_core_mask, core_mask);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX\n",
|
||||
pm_backend->ca_cores_enabled);
|
||||
|
||||
return;
|
||||
unlock:
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
}
|
||||
KBASE_EXPORT_TEST_API(kbase_devfreq_set_core_mask);
|
||||
#endif
|
||||
|
||||
@@ -101,6 +101,8 @@ static u64 kbase_pm_get_state(
|
||||
enum kbase_pm_core_type core_type,
|
||||
enum kbasep_pm_action action);
|
||||
|
||||
static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev);
|
||||
|
||||
#if MALI_USE_CSF
|
||||
bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev)
|
||||
{
|
||||
@@ -655,6 +657,35 @@ static void kbase_pm_enable_mcu_db_notification(struct kbase_device *kbdev)
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* kbasep_pm_toggle_power_interrupt - Toggles the IRQ mask for power interrupts
|
||||
* from the firmware
|
||||
*
|
||||
* @kbdev: Pointer to the device
|
||||
* @enable: boolean indicating to enable interrupts or not
|
||||
*
|
||||
* The POWER_CHANGED_ALL and POWER_CHANGED_SINGLE interrupts can be disabled
|
||||
* after L2 has been turned on when FW is controlling the power for the shader
|
||||
* cores. Correspondingly, the interrupts can be re-enabled after the MCU has
|
||||
* been disabled before the power down of L2.
|
||||
*/
|
||||
static void kbasep_pm_toggle_power_interrupt(struct kbase_device *kbdev, bool enable)
|
||||
{
|
||||
u32 irq_mask;
|
||||
|
||||
lockdep_assert_held(&kbdev->hwaccess_lock);
|
||||
|
||||
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
|
||||
|
||||
if (enable)
|
||||
irq_mask |= POWER_CHANGED_ALL | POWER_CHANGED_SINGLE;
|
||||
else
|
||||
irq_mask &= ~(POWER_CHANGED_ALL | POWER_CHANGED_SINGLE);
|
||||
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask);
|
||||
}
|
||||
|
||||
static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
|
||||
{
|
||||
struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
|
||||
@@ -698,6 +729,8 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
|
||||
kbase_pm_ca_get_core_mask(kbdev);
|
||||
kbase_csf_firmware_global_reinit(kbdev,
|
||||
backend->shaders_desired_mask);
|
||||
if (!kbdev->csf.firmware_hctl_core_pwr)
|
||||
kbasep_pm_toggle_power_interrupt(kbdev, false);
|
||||
backend->mcu_state =
|
||||
KBASE_MCU_ON_GLB_REINIT_PEND;
|
||||
}
|
||||
@@ -906,6 +939,8 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
|
||||
case KBASE_MCU_PEND_OFF:
|
||||
/* wait synchronously for the MCU to get disabled */
|
||||
kbase_csf_firmware_disable_mcu_wait(kbdev);
|
||||
if (!kbdev->csf.firmware_hctl_core_pwr)
|
||||
kbasep_pm_toggle_power_interrupt(kbdev, true);
|
||||
backend->mcu_state = KBASE_MCU_OFF;
|
||||
break;
|
||||
#ifdef KBASE_PM_RUNTIME
|
||||
@@ -924,6 +959,11 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
|
||||
backend->mcu_state = KBASE_MCU_IN_SLEEP;
|
||||
kbase_pm_enable_db_mirror_interrupt(kbdev);
|
||||
kbase_csf_scheduler_reval_idleness_post_sleep(kbdev);
|
||||
/* Enable PM interrupt, after MCU has been put
|
||||
* to sleep, for the power down of L2.
|
||||
*/
|
||||
if (!kbdev->csf.firmware_hctl_core_pwr)
|
||||
kbasep_pm_toggle_power_interrupt(kbdev, true);
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -934,6 +974,11 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
|
||||
kbdev, kbase_backend_get_cycle_cnt(kbdev));
|
||||
kbase_pm_enable_mcu_db_notification(kbdev);
|
||||
kbase_pm_disable_db_mirror_interrupt(kbdev);
|
||||
/* Disable PM interrupt after L2 has been
|
||||
* powered up for the wakeup of MCU.
|
||||
*/
|
||||
if (!kbdev->csf.firmware_hctl_core_pwr)
|
||||
kbasep_pm_toggle_power_interrupt(kbdev, false);
|
||||
backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE;
|
||||
}
|
||||
break;
|
||||
@@ -1017,6 +1062,18 @@ static void kbase_pm_l2_clear_backend_slot_submit_kctx(struct kbase_device *kbde
|
||||
}
|
||||
#endif
|
||||
|
||||
static bool can_power_down_l2(struct kbase_device *kbdev)
|
||||
{
|
||||
#if MALI_USE_CSF
|
||||
/* Due to the HW issue GPU2019-3878, need to prevent L2 power off
|
||||
* whilst MMU command is in progress.
|
||||
*/
|
||||
return !kbdev->mmu_hw_operation_in_progress;
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
|
||||
{
|
||||
struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
|
||||
@@ -1258,9 +1315,8 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
|
||||
}
|
||||
|
||||
backend->hwcnt_desired = false;
|
||||
if (!backend->hwcnt_disabled) {
|
||||
if (!backend->hwcnt_disabled)
|
||||
kbase_pm_trigger_hwcnt_disable(kbdev);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (backend->hwcnt_disabled) {
|
||||
@@ -1297,27 +1353,31 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
|
||||
break;
|
||||
|
||||
case KBASE_L2_POWER_DOWN:
|
||||
if (!backend->l2_always_on)
|
||||
/* Powering off the L2 will also power off the
|
||||
* tiler.
|
||||
*/
|
||||
kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2,
|
||||
l2_present,
|
||||
ACTION_PWROFF);
|
||||
else
|
||||
/* If L2 cache is powered then we must flush it
|
||||
* before we power off the GPU. Normally this
|
||||
* would have been handled when the L2 was
|
||||
* powered off.
|
||||
*/
|
||||
kbase_gpu_start_cache_clean_nolock(
|
||||
kbdev, GPU_COMMAND_CACHE_CLN_INV_L2);
|
||||
if (kbase_pm_is_l2_desired(kbdev))
|
||||
backend->l2_state = KBASE_L2_PEND_ON;
|
||||
else if (can_power_down_l2(kbdev)) {
|
||||
if (!backend->l2_always_on)
|
||||
/* Powering off the L2 will also power off the
|
||||
* tiler.
|
||||
*/
|
||||
kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2,
|
||||
l2_present,
|
||||
ACTION_PWROFF);
|
||||
else
|
||||
/* If L2 cache is powered then we must flush it
|
||||
* before we power off the GPU. Normally this
|
||||
* would have been handled when the L2 was
|
||||
* powered off.
|
||||
*/
|
||||
kbase_gpu_start_cache_clean_nolock(
|
||||
kbdev, GPU_COMMAND_CACHE_CLN_INV_L2);
|
||||
#if !MALI_USE_CSF
|
||||
KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, 0u);
|
||||
KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, 0u);
|
||||
#else
|
||||
KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL, 0u);
|
||||
KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL, 0u);
|
||||
#endif
|
||||
backend->l2_state = KBASE_L2_PEND_OFF;
|
||||
backend->l2_state = KBASE_L2_PEND_OFF;
|
||||
}
|
||||
break;
|
||||
|
||||
case KBASE_L2_PEND_OFF:
|
||||
@@ -1803,12 +1863,7 @@ static bool kbase_pm_is_in_desired_state_nolock(struct kbase_device *kbdev)
|
||||
|
||||
lockdep_assert_held(&kbdev->hwaccess_lock);
|
||||
|
||||
if (kbase_pm_is_l2_desired(kbdev) &&
|
||||
kbdev->pm.backend.l2_state != KBASE_L2_ON)
|
||||
in_desired_state = false;
|
||||
else if (!kbase_pm_is_l2_desired(kbdev) &&
|
||||
kbdev->pm.backend.l2_state != KBASE_L2_OFF)
|
||||
in_desired_state = false;
|
||||
in_desired_state = kbase_pm_l2_is_in_desired_state(kbdev);
|
||||
|
||||
#if !MALI_USE_CSF
|
||||
if (kbdev->pm.backend.shaders_desired &&
|
||||
@@ -1818,13 +1873,7 @@ static bool kbase_pm_is_in_desired_state_nolock(struct kbase_device *kbdev)
|
||||
kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF)
|
||||
in_desired_state = false;
|
||||
#else
|
||||
if (kbase_pm_is_mcu_desired(kbdev) &&
|
||||
kbdev->pm.backend.mcu_state != KBASE_MCU_ON)
|
||||
in_desired_state = false;
|
||||
else if (!kbase_pm_is_mcu_desired(kbdev) &&
|
||||
(kbdev->pm.backend.mcu_state != KBASE_MCU_OFF) &&
|
||||
(kbdev->pm.backend.mcu_state != KBASE_MCU_IN_SLEEP))
|
||||
in_desired_state = false;
|
||||
in_desired_state = kbase_pm_mcu_is_in_desired_state(kbdev);
|
||||
#endif
|
||||
|
||||
return in_desired_state;
|
||||
@@ -2077,11 +2126,13 @@ void kbase_pm_reset_complete(struct kbase_device *kbdev)
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
}
|
||||
|
||||
/* Timeout for kbase_pm_wait_for_desired_state when wait_event_killable has
|
||||
* aborted due to a fatal signal. If the time spent waiting has exceeded this
|
||||
* threshold then there is most likely a hardware issue.
|
||||
#if !MALI_USE_CSF
|
||||
/* Timeout in milliseconds for GPU Power Management to reach the desired
|
||||
* Shader and L2 state. If the time spent waiting has exceeded this threshold
|
||||
* then there is most likely a hardware issue.
|
||||
*/
|
||||
#define PM_TIMEOUT_MS (5000) /* 5s */
|
||||
#endif
|
||||
|
||||
static void kbase_pm_timed_out(struct kbase_device *kbdev)
|
||||
{
|
||||
@@ -2156,7 +2207,7 @@ int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev)
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
#if MALI_USE_CSF
|
||||
timeout = kbase_csf_timeout_in_jiffies(PM_TIMEOUT_MS);
|
||||
timeout = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT));
|
||||
#else
|
||||
timeout = msecs_to_jiffies(PM_TIMEOUT_MS);
|
||||
#endif
|
||||
@@ -2188,7 +2239,7 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev)
|
||||
unsigned long flags;
|
||||
long remaining;
|
||||
#if MALI_USE_CSF
|
||||
long timeout = kbase_csf_timeout_in_jiffies(PM_TIMEOUT_MS);
|
||||
long timeout = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT));
|
||||
#else
|
||||
long timeout = msecs_to_jiffies(PM_TIMEOUT_MS);
|
||||
#endif
|
||||
@@ -2285,6 +2336,7 @@ static void update_user_reg_page_mapping(struct kbase_device *kbdev)
|
||||
{
|
||||
lockdep_assert_held(&kbdev->pm.lock);
|
||||
|
||||
mutex_lock(&kbdev->csf.reg_lock);
|
||||
if (kbdev->csf.mali_file_inode) {
|
||||
/* This would zap the pte corresponding to the mapping of User
|
||||
* register page for all the Kbase contexts.
|
||||
@@ -2293,6 +2345,7 @@ static void update_user_reg_page_mapping(struct kbase_device *kbdev)
|
||||
BASEP_MEM_CSF_USER_REG_PAGE_HANDLE,
|
||||
PAGE_SIZE, 1);
|
||||
}
|
||||
mutex_unlock(&kbdev->csf.reg_lock);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -2358,6 +2411,7 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
|
||||
update_user_reg_page_mapping(kbdev);
|
||||
#endif
|
||||
|
||||
|
||||
if (reset_required) {
|
||||
/* GPU state was lost, reset GPU to ensure it is in a
|
||||
* consistent state
|
||||
@@ -2659,8 +2713,8 @@ static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
|
||||
{
|
||||
struct device_node *np = kbdev->dev->of_node;
|
||||
const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
|
||||
const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
|
||||
GPU_ID_VERSION_PRODUCT_ID_SHIFT;
|
||||
const u32 prod_id =
|
||||
(gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT;
|
||||
int error = 0;
|
||||
|
||||
kbdev->hw_quirks_gpu = 0;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -301,6 +301,8 @@ void kbase_pm_update_state(struct kbase_device *kbdev);
|
||||
* kbase_pm_state_machine_init - Initialize the state machines, primarily the
|
||||
* shader poweroff timer
|
||||
* @kbdev: Device pointer
|
||||
*
|
||||
* Return: 0 on success, error code on error
|
||||
*/
|
||||
int kbase_pm_state_machine_init(struct kbase_device *kbdev);
|
||||
|
||||
@@ -453,6 +455,8 @@ void kbase_pm_wait_for_gpu_power_down(struct kbase_device *kbdev);
|
||||
* Setup the power management callbacks and initialize/enable the runtime-pm
|
||||
* for the Mali GPU platform device, using the callback function. This must be
|
||||
* called before the kbase_pm_register_access_enable() function.
|
||||
*
|
||||
* Return: 0 on success, error code on error
|
||||
*/
|
||||
int kbase_pm_runtime_init(struct kbase_device *kbdev);
|
||||
|
||||
@@ -810,8 +814,49 @@ static inline bool kbase_pm_no_mcu_core_pwroff(struct kbase_device *kbdev)
|
||||
return kbdev->pm.backend.csf_pm_sched_flags &
|
||||
CSF_DYNAMIC_PM_CORE_KEEP_ON;
|
||||
}
|
||||
|
||||
/**
|
||||
* kbase_pm_mcu_is_in_desired_state - Check if MCU is in stable ON/OFF state.
|
||||
*
|
||||
* @kbdev: Device pointer
|
||||
*
|
||||
* Return: true if MCU is in stable ON/OFF state.
|
||||
*/
|
||||
static inline bool kbase_pm_mcu_is_in_desired_state(struct kbase_device *kbdev)
|
||||
{
|
||||
bool in_desired_state = true;
|
||||
|
||||
if (kbase_pm_is_mcu_desired(kbdev) && kbdev->pm.backend.mcu_state != KBASE_MCU_ON)
|
||||
in_desired_state = false;
|
||||
else if (!kbase_pm_is_mcu_desired(kbdev) &&
|
||||
(kbdev->pm.backend.mcu_state != KBASE_MCU_OFF) &&
|
||||
(kbdev->pm.backend.mcu_state != KBASE_MCU_IN_SLEEP))
|
||||
in_desired_state = false;
|
||||
|
||||
return in_desired_state;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* kbase_pm_l2_is_in_desired_state - Check if L2 is in stable ON/OFF state.
|
||||
*
|
||||
* @kbdev: Device pointer
|
||||
*
|
||||
* Return: true if L2 is in stable ON/OFF state.
|
||||
*/
|
||||
static inline bool kbase_pm_l2_is_in_desired_state(struct kbase_device *kbdev)
|
||||
{
|
||||
bool in_desired_state = true;
|
||||
|
||||
if (kbase_pm_is_l2_desired(kbdev) && kbdev->pm.backend.l2_state != KBASE_L2_ON)
|
||||
in_desired_state = false;
|
||||
else if (!kbase_pm_is_l2_desired(kbdev) && kbdev->pm.backend.l2_state != KBASE_L2_OFF)
|
||||
in_desired_state = false;
|
||||
|
||||
return in_desired_state;
|
||||
}
|
||||
|
||||
/**
|
||||
* kbase_pm_lock - Lock all necessary mutexes to perform PM actions
|
||||
*
|
||||
|
||||
@@ -491,8 +491,7 @@ static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev)
|
||||
BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)
|
||||
? katom->device_nr : 0;
|
||||
if (!WARN_ON(device_nr >= 2))
|
||||
kbdev->pm.backend.metrics.
|
||||
active_cl_ctx[device_nr] = 1;
|
||||
kbdev->pm.backend.metrics.active_cl_ctx[device_nr] = 1;
|
||||
} else {
|
||||
kbdev->pm.backend.metrics.active_gl_ctx[js] = 1;
|
||||
trace_sysgraph(SGR_ACTIVE, 0, js);
|
||||
|
||||
@@ -180,9 +180,8 @@ void kbase_pm_update_dynamic_cores_onoff(struct kbase_device *kbdev)
|
||||
|
||||
shaders_desired = kbdev->pm.backend.pm_current_policy->shaders_needed(kbdev);
|
||||
|
||||
if (shaders_desired && kbase_pm_is_l2_desired(kbdev)) {
|
||||
if (shaders_desired && kbase_pm_is_l2_desired(kbdev))
|
||||
kbase_pm_update_state(kbdev);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -249,9 +248,8 @@ KBASE_EXPORT_TEST_API(kbase_pm_get_policy);
|
||||
#if MALI_USE_CSF
|
||||
static int policy_change_wait_for_L2_off(struct kbase_device *kbdev)
|
||||
{
|
||||
#define WAIT_DURATION_MS (3000)
|
||||
long remaining;
|
||||
long timeout = kbase_csf_timeout_in_jiffies(WAIT_DURATION_MS);
|
||||
long timeout = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT));
|
||||
int err = 0;
|
||||
|
||||
/* Wait for L2 becoming off, by which the MCU is also implicitly off
|
||||
|
||||
@@ -113,39 +113,60 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev,
|
||||
*/
|
||||
|
||||
u64 timeout, nr_cycles = 0;
|
||||
/* Default value to mean 'no cap' */
|
||||
u64 timeout_cap = U64_MAX;
|
||||
u64 freq_khz = kbdev->lowest_gpu_freq_khz;
|
||||
/* Only for debug messages, safe default in case it's mis-maintained */
|
||||
const char *selector_str = "(unknown)";
|
||||
|
||||
WARN_ON(!freq_khz);
|
||||
|
||||
switch (selector) {
|
||||
/* use Firmware timeout if invalid selection */
|
||||
case KBASE_TIMEOUT_SELECTOR_COUNT:
|
||||
default:
|
||||
#if !MALI_USE_CSF
|
||||
WARN(1, "Invalid timeout selector used! Using default value");
|
||||
timeout = JM_DEFAULT_TIMEOUT_CYCLES;
|
||||
CSTD_UNUSED(nr_cycles);
|
||||
nr_cycles = JM_DEFAULT_TIMEOUT_CYCLES;
|
||||
break;
|
||||
#else
|
||||
/* Use Firmware timeout if invalid selection */
|
||||
WARN(1,
|
||||
"Invalid timeout selector used! Using CSF Firmware timeout");
|
||||
fallthrough;
|
||||
case CSF_FIRMWARE_TIMEOUT:
|
||||
selector_str = "CSF_FIRMWARE_TIMEOUT";
|
||||
nr_cycles = CSF_FIRMWARE_TIMEOUT_CYCLES;
|
||||
timeout = div_u64(nr_cycles, freq_khz);
|
||||
/* cap CSF FW timeout to FIRMWARE_PING_INTERVAL_MS
|
||||
* if calculated timeout exceeds it. This should be adapted to a
|
||||
* direct timeout comparison once the FIRMWARE_PING_INTERVAL_MS
|
||||
* option is added to this timeout function. A compile-time check
|
||||
* such as BUILD_BUG_ON can also be done once the firmware ping
|
||||
* interval in cycles becomes available as a macro.
|
||||
/* Setup a cap on CSF FW timeout to FIRMWARE_PING_INTERVAL_MS,
|
||||
* if calculated timeout exceeds it. This should be adapted to
|
||||
* a direct timeout comparison once the
|
||||
* FIRMWARE_PING_INTERVAL_MS option is added to this timeout
|
||||
* function. A compile-time check such as BUILD_BUG_ON can also
|
||||
* be done once the firmware ping interval in cycles becomes
|
||||
* available as a macro.
|
||||
*/
|
||||
if (timeout > FIRMWARE_PING_INTERVAL_MS) {
|
||||
dev_dbg(kbdev->dev, "Capped CSF_FIRMWARE_TIMEOUT %llu to %d",
|
||||
timeout, FIRMWARE_PING_INTERVAL_MS);
|
||||
timeout = FIRMWARE_PING_INTERVAL_MS;
|
||||
}
|
||||
#endif
|
||||
timeout_cap = FIRMWARE_PING_INTERVAL_MS;
|
||||
break;
|
||||
case CSF_PM_TIMEOUT:
|
||||
selector_str = "CSF_PM_TIMEOUT";
|
||||
nr_cycles = CSF_PM_TIMEOUT_CYCLES;
|
||||
break;
|
||||
case CSF_GPU_RESET_TIMEOUT:
|
||||
selector_str = "CSF_GPU_RESET_TIMEOUT";
|
||||
nr_cycles = CSF_GPU_RESET_TIMEOUT_CYCLES;
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
|
||||
timeout = div_u64(nr_cycles, freq_khz);
|
||||
if (timeout > timeout_cap) {
|
||||
dev_dbg(kbdev->dev, "Capped %s %llu to %llu", selector_str,
|
||||
(unsigned long long)timeout, (unsigned long long)timeout_cap);
|
||||
timeout = timeout_cap;
|
||||
}
|
||||
if (WARN(timeout > UINT_MAX,
|
||||
"Capping excessive timeout %llums for %s at freq %llukHz to UINT_MAX ms",
|
||||
(unsigned long long)timeout, selector_str, (unsigned long long)freq_khz))
|
||||
timeout = UINT_MAX;
|
||||
return (unsigned int)timeout;
|
||||
}
|
||||
|
||||
|
||||
@@ -34,6 +34,9 @@ bob_defaults {
|
||||
"CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}",
|
||||
],
|
||||
},
|
||||
mali_platform_dt_pin_rst: {
|
||||
kbuild_options: ["CONFIG_MALI_PLATFORM_DT_PIN_RST=y"],
|
||||
},
|
||||
gpu_has_csf: {
|
||||
kbuild_options: ["CONFIG_MALI_CSF_SUPPORT=y"],
|
||||
},
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -110,6 +110,11 @@ static void kbase_context_flush_jobs(struct kbase_context *kctx)
|
||||
flush_workqueue(kctx->jctx.job_done_wq);
|
||||
}
|
||||
|
||||
/**
|
||||
* kbase_context_free - Free kcontext at its destruction
|
||||
*
|
||||
* @kctx: kcontext to be freed
|
||||
*/
|
||||
static void kbase_context_free(struct kbase_context *kctx)
|
||||
{
|
||||
kbase_timeline_post_kbase_context_destroy(kctx);
|
||||
|
||||
@@ -152,6 +152,7 @@ int kbase_context_common_init(struct kbase_context *kctx)
|
||||
|
||||
init_waitqueue_head(&kctx->event_queue);
|
||||
atomic_set(&kctx->event_count, 0);
|
||||
|
||||
#if !MALI_USE_CSF
|
||||
atomic_set(&kctx->event_closed, false);
|
||||
#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS)
|
||||
@@ -159,6 +160,11 @@ int kbase_context_common_init(struct kbase_context *kctx)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if MALI_USE_CSF
|
||||
atomic64_set(&kctx->num_fixable_allocs, 0);
|
||||
atomic64_set(&kctx->num_fixed_allocs, 0);
|
||||
#endif
|
||||
|
||||
bitmap_copy(kctx->cookies, &cookies_mask, BITS_PER_LONG);
|
||||
|
||||
kctx->id = atomic_add_return(1, &(kctx->kbdev->ctx_num)) - 1;
|
||||
|
||||
@@ -52,7 +52,7 @@
|
||||
/*
|
||||
* Maximum number of loops polling the GPU before we assume the GPU has hung.
|
||||
*/
|
||||
#define IPA_INACTIVE_MAX_LOOPS ((unsigned int)8000000)
|
||||
#define IPA_INACTIVE_MAX_LOOPS (8000000U)
|
||||
|
||||
/*
|
||||
* Number of bits used to configure a performance counter in SELECT registers.
|
||||
@@ -347,9 +347,8 @@ void kbase_ipa_control_init(struct kbase_device *kbdev)
|
||||
|
||||
spin_lock_init(&ipa_ctrl->lock);
|
||||
ipa_ctrl->num_active_sessions = 0;
|
||||
for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) {
|
||||
for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++)
|
||||
ipa_ctrl->sessions[i].active = false;
|
||||
}
|
||||
|
||||
listener_data = kmalloc(sizeof(struct kbase_ipa_control_listener_data),
|
||||
GFP_KERNEL);
|
||||
@@ -514,8 +513,10 @@ int kbase_ipa_control_register(
|
||||
struct kbase_ipa_control_session *session = NULL;
|
||||
unsigned long flags;
|
||||
|
||||
if (WARN_ON(kbdev == NULL) || WARN_ON(perf_counters == NULL) ||
|
||||
WARN_ON(client == NULL) ||
|
||||
if (WARN_ON(unlikely(kbdev == NULL)))
|
||||
return -ENODEV;
|
||||
|
||||
if (WARN_ON(perf_counters == NULL) || WARN_ON(client == NULL) ||
|
||||
WARN_ON(num_counters > KBASE_IPA_CONTROL_MAX_COUNTERS)) {
|
||||
dev_err(kbdev->dev, "%s: wrong input arguments", __func__);
|
||||
return -EINVAL;
|
||||
@@ -697,7 +698,10 @@ int kbase_ipa_control_unregister(struct kbase_device *kbdev, const void *client)
|
||||
unsigned long flags;
|
||||
bool new_config = false, valid_session = false;
|
||||
|
||||
if (WARN_ON(kbdev == NULL) || WARN_ON(client == NULL)) {
|
||||
if (WARN_ON(unlikely(kbdev == NULL)))
|
||||
return -ENODEV;
|
||||
|
||||
if (WARN_ON(client == NULL)) {
|
||||
dev_err(kbdev->dev, "%s: wrong input arguments", __func__);
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -779,8 +783,10 @@ int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client,
|
||||
unsigned long flags;
|
||||
bool gpu_ready;
|
||||
|
||||
if (WARN_ON(kbdev == NULL) || WARN_ON(client == NULL) ||
|
||||
WARN_ON(values == NULL)) {
|
||||
if (WARN_ON(unlikely(kbdev == NULL)))
|
||||
return -ENODEV;
|
||||
|
||||
if (WARN_ON(client == NULL) || WARN_ON(values == NULL)) {
|
||||
dev_err(kbdev->dev, "%s: wrong input arguments", __func__);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
#include <linux/export.h>
|
||||
#include <linux/priority_control_manager.h>
|
||||
#include <linux/shmem_fs.h>
|
||||
#include <uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h>
|
||||
#include <csf/mali_kbase_csf_registers.h>
|
||||
#include "mali_kbase_csf_tiler_heap.h"
|
||||
#include <mmu/mali_kbase_mmu.h>
|
||||
#include "mali_kbase_csf_timeout.h"
|
||||
@@ -561,6 +561,10 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
|
||||
queue->sync_ptr = 0;
|
||||
queue->sync_value = 0;
|
||||
|
||||
#if IS_ENABLED(CONFIG_DEBUG_FS)
|
||||
queue->saved_cmd_ptr = 0;
|
||||
#endif
|
||||
|
||||
queue->sb_status = 0;
|
||||
queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED;
|
||||
|
||||
@@ -572,6 +576,8 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
|
||||
INIT_WORK(&queue->fatal_event_work, fatal_event_worker);
|
||||
list_add(&queue->link, &kctx->csf.queue_list);
|
||||
|
||||
queue->extract_ofs = 0;
|
||||
|
||||
region->flags |= KBASE_REG_NO_USER_FREE;
|
||||
region->user_data = queue;
|
||||
|
||||
@@ -621,13 +627,13 @@ int kbase_csf_queue_register_ex(struct kbase_context *kctx,
|
||||
return -EINVAL;
|
||||
|
||||
/* Validate the cs_trace configuration parameters */
|
||||
if (reg->ex_buffer_size &&
|
||||
((reg->ex_event_size > max_size) ||
|
||||
(reg->ex_buffer_size & (reg->ex_buffer_size - 1)) ||
|
||||
(reg->ex_buffer_size < min_buf_size)))
|
||||
return -EINVAL;
|
||||
if (reg->ex_buffer_size &&
|
||||
((reg->ex_event_size > max_size) ||
|
||||
(reg->ex_buffer_size & (reg->ex_buffer_size - 1)) ||
|
||||
(reg->ex_buffer_size < min_buf_size)))
|
||||
return -EINVAL;
|
||||
|
||||
return csf_queue_register_internal(kctx, NULL, reg);
|
||||
return csf_queue_register_internal(kctx, NULL, reg);
|
||||
}
|
||||
|
||||
static void unbind_queue(struct kbase_context *kctx,
|
||||
@@ -1195,7 +1201,7 @@ static int create_protected_suspend_buffer(struct kbase_device *const kbdev,
|
||||
}
|
||||
|
||||
s_buf->pma = kbase_csf_protected_memory_alloc(kbdev, phys,
|
||||
nr_pages);
|
||||
nr_pages, true);
|
||||
if (s_buf->pma == NULL) {
|
||||
err = -ENOMEM;
|
||||
goto pma_alloc_failed;
|
||||
@@ -1229,7 +1235,7 @@ mmu_insert_failed:
|
||||
mutex_unlock(&kbdev->csf.reg_lock);
|
||||
|
||||
add_va_region_failed:
|
||||
kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages);
|
||||
kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages, true);
|
||||
pma_alloc_failed:
|
||||
kfree(phys);
|
||||
phy_alloc_failed:
|
||||
@@ -1479,7 +1485,7 @@ static void term_protected_suspend_buffer(struct kbase_device *const kbdev,
|
||||
kbase_remove_va_region(kbdev, s_buf->reg);
|
||||
mutex_unlock(&kbdev->csf.reg_lock);
|
||||
|
||||
kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages);
|
||||
kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages, true);
|
||||
s_buf->pma = NULL;
|
||||
kfree(s_buf->reg);
|
||||
s_buf->reg = NULL;
|
||||
@@ -1925,7 +1931,7 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
|
||||
* This function will handle the OoM event request from the firmware for the
|
||||
* CS. It will retrieve the address of heap context and heap's
|
||||
* statistics (like number of render passes in-flight) from the CS's kernel
|
||||
* kernel output page and pass them to the tiler heap function to allocate a
|
||||
* output page and pass them to the tiler heap function to allocate a
|
||||
* new chunk.
|
||||
* It will also update the CS's kernel input page with the address
|
||||
* of a new chunk that was allocated.
|
||||
@@ -2521,8 +2527,24 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
|
||||
}
|
||||
}
|
||||
|
||||
if (protm_pend)
|
||||
queue_work(group->kctx->csf.wq, &group->protm_event_work);
|
||||
if (protm_pend) {
|
||||
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
|
||||
u32 current_protm_pending_seq =
|
||||
scheduler->tick_protm_pending_seq;
|
||||
|
||||
if (current_protm_pending_seq > group->scan_seq_num) {
|
||||
scheduler->tick_protm_pending_seq = group->scan_seq_num;
|
||||
queue_work(group->kctx->csf.wq, &group->protm_event_work);
|
||||
}
|
||||
|
||||
if (test_bit(group->csg_nr, scheduler->csg_slots_idle_mask)) {
|
||||
clear_bit(group->csg_nr,
|
||||
scheduler->csg_slots_idle_mask);
|
||||
dev_dbg(kbdev->dev,
|
||||
"Group-%d on slot %d de-idled by protm request",
|
||||
group->handle, group->csg_nr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -2593,6 +2615,10 @@ static void process_csg_interrupts(struct kbase_device *const kbdev,
|
||||
CSG_REQ, ack, CSG_REQ_SYNC_UPDATE_MASK);
|
||||
|
||||
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SYNC_UPDATE_INTERRUPT, group, req ^ ack);
|
||||
|
||||
/* SYNC_UPDATE events shall invalidate GPU idle event */
|
||||
atomic_set(&kbdev->csf.scheduler.gpu_no_longer_idle, true);
|
||||
|
||||
kbase_csf_event_signal_cpu_only(group->kctx);
|
||||
}
|
||||
|
||||
@@ -2609,15 +2635,25 @@ static void process_csg_interrupts(struct kbase_device *const kbdev,
|
||||
dev_dbg(kbdev->dev, "Idle notification received for Group %u on slot %d\n",
|
||||
group->handle, csg_nr);
|
||||
|
||||
/* Check if the scheduling tick can be advanced */
|
||||
if (kbase_csf_scheduler_all_csgs_idle(kbdev)) {
|
||||
if (!scheduler->gpu_idle_fw_timer_enabled)
|
||||
kbase_csf_scheduler_advance_tick_nolock(kbdev);
|
||||
} else if (atomic_read(&scheduler->non_idle_offslot_grps)) {
|
||||
if (atomic_read(&scheduler->non_idle_offslot_grps)) {
|
||||
/* If there are non-idle CSGs waiting for a slot, fire
|
||||
* a tock for a replacement.
|
||||
*/
|
||||
mod_delayed_work(scheduler->wq, &scheduler->tock_work, 0);
|
||||
} else {
|
||||
u32 current_protm_pending_seq =
|
||||
scheduler->tick_protm_pending_seq;
|
||||
|
||||
if ((current_protm_pending_seq !=
|
||||
KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID) &&
|
||||
(group->scan_seq_num < current_protm_pending_seq)) {
|
||||
/* If the protm enter was prevented due to groups
|
||||
* priority, then fire a tock for the scheduler
|
||||
* to re-examine the case.
|
||||
*/
|
||||
mod_delayed_work(scheduler->wq,
|
||||
&scheduler->tock_work, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2803,20 +2839,29 @@ static inline void process_protm_exit(struct kbase_device *kbdev, u32 glb_ack)
|
||||
void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
|
||||
{
|
||||
unsigned long flags;
|
||||
u32 remaining = val;
|
||||
u32 csg_interrupts = val & ~JOB_IRQ_GLOBAL_IF;
|
||||
|
||||
lockdep_assert_held(&kbdev->hwaccess_lock);
|
||||
|
||||
KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT, NULL, val);
|
||||
kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val);
|
||||
|
||||
if (csg_interrupts != 0) {
|
||||
kbase_csf_scheduler_spin_lock(kbdev, &flags);
|
||||
while (csg_interrupts != 0) {
|
||||
int const csg_nr = ffs(csg_interrupts) - 1;
|
||||
|
||||
process_csg_interrupts(kbdev, csg_nr);
|
||||
csg_interrupts &= ~(1 << csg_nr);
|
||||
}
|
||||
kbase_csf_scheduler_spin_unlock(kbdev, flags);
|
||||
}
|
||||
|
||||
if (val & JOB_IRQ_GLOBAL_IF) {
|
||||
const struct kbase_csf_global_iface *const global_iface =
|
||||
&kbdev->csf.global_iface;
|
||||
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
|
||||
|
||||
kbdev->csf.interrupt_received = true;
|
||||
remaining &= ~JOB_IRQ_GLOBAL_IF;
|
||||
|
||||
if (!kbdev->csf.firmware_reloaded)
|
||||
kbase_csf_firmware_reload_completed(kbdev);
|
||||
@@ -2837,31 +2882,12 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
|
||||
|
||||
/* Handle IDLE Hysteresis notification event */
|
||||
if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) {
|
||||
int non_idle_offslot_grps;
|
||||
bool can_suspend_on_idle;
|
||||
|
||||
dev_dbg(kbdev->dev, "Idle-hysteresis event flagged");
|
||||
kbase_csf_firmware_global_input_mask(
|
||||
global_iface, GLB_REQ, glb_ack,
|
||||
GLB_REQ_IDLE_EVENT_MASK);
|
||||
|
||||
non_idle_offslot_grps = atomic_read(&scheduler->non_idle_offslot_grps);
|
||||
can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev);
|
||||
KBASE_KTRACE_ADD(kbdev, SCHEDULER_CAN_IDLE, NULL,
|
||||
((u64)(u32)non_idle_offslot_grps) | (((u64)can_suspend_on_idle) << 32));
|
||||
|
||||
if (!non_idle_offslot_grps) {
|
||||
if (can_suspend_on_idle)
|
||||
queue_work(system_highpri_wq,
|
||||
&scheduler->gpu_idle_work);
|
||||
} else {
|
||||
/* Advance the scheduling tick to get
|
||||
* the non-idle suspended groups loaded
|
||||
* soon.
|
||||
*/
|
||||
kbase_csf_scheduler_advance_tick_nolock(
|
||||
kbdev);
|
||||
}
|
||||
kbase_csf_scheduler_process_gpu_idle_event(kbdev);
|
||||
}
|
||||
|
||||
process_prfcnt_interrupts(kbdev, glb_req, glb_ack);
|
||||
@@ -2873,23 +2899,8 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
|
||||
*/
|
||||
kbase_pm_update_state(kbdev);
|
||||
}
|
||||
|
||||
if (!remaining) {
|
||||
wake_up_all(&kbdev->csf.event_wait);
|
||||
KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
kbase_csf_scheduler_spin_lock(kbdev, &flags);
|
||||
while (remaining != 0) {
|
||||
int const csg_nr = ffs(remaining) - 1;
|
||||
|
||||
process_csg_interrupts(kbdev, csg_nr);
|
||||
remaining &= ~(1 << csg_nr);
|
||||
}
|
||||
kbase_csf_scheduler_spin_unlock(kbdev, flags);
|
||||
|
||||
wake_up_all(&kbdev->csf.event_wait);
|
||||
KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val);
|
||||
}
|
||||
|
||||
@@ -40,7 +40,12 @@
|
||||
*/
|
||||
#define KBASEP_USER_DB_NR_INVALID ((s8)-1)
|
||||
|
||||
#define FIRMWARE_PING_INTERVAL_MS (8000) /* 8 seconds */
|
||||
/* Indicates an invalid value for the scan out sequence number, used to
|
||||
* signify there is no group that has protected mode execution pending.
|
||||
*/
|
||||
#define KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID (U32_MAX)
|
||||
|
||||
#define FIRMWARE_PING_INTERVAL_MS (12000) /* 12 seconds */
|
||||
|
||||
#define FIRMWARE_IDLE_HYSTERESIS_TIME_MS (10) /* Default 10 milliseconds */
|
||||
|
||||
@@ -312,7 +317,7 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_csf_free_dummy_user_reg_page - Free the dummy page that was used
|
||||
* used to replace the User register page
|
||||
* to replace the User register page
|
||||
*
|
||||
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
|
||||
*/
|
||||
|
||||
@@ -54,7 +54,7 @@ static int kbasep_csf_cpu_queue_debugfs_show(struct seq_file *file, void *data)
|
||||
mutex_lock(&kctx->csf.lock);
|
||||
if (atomic_read(&kctx->csf.cpu_queue.dump_req_status) !=
|
||||
BASE_CSF_CPU_QUEUE_DUMP_COMPLETE) {
|
||||
seq_printf(file, "Dump request already started! (try again)\n");
|
||||
seq_puts(file, "Dump request already started! (try again)\n");
|
||||
mutex_unlock(&kctx->csf.lock);
|
||||
return -EBUSY;
|
||||
}
|
||||
@@ -64,7 +64,8 @@ static int kbasep_csf_cpu_queue_debugfs_show(struct seq_file *file, void *data)
|
||||
kbase_event_wakeup(kctx);
|
||||
mutex_unlock(&kctx->csf.lock);
|
||||
|
||||
seq_printf(file, "CPU Queues table (version:v%u):\n", MALI_CSF_CPU_QUEUE_DEBUGFS_VERSION);
|
||||
seq_puts(file,
|
||||
"CPU Queues table (version:v" __stringify(MALI_CSF_CPU_QUEUE_DEBUGFS_VERSION) "):\n");
|
||||
|
||||
wait_for_completion_timeout(&kctx->csf.cpu_queue.dump_cmp,
|
||||
msecs_to_jiffies(3000));
|
||||
@@ -79,9 +80,8 @@ static int kbasep_csf_cpu_queue_debugfs_show(struct seq_file *file, void *data)
|
||||
kfree(kctx->csf.cpu_queue.buffer);
|
||||
kctx->csf.cpu_queue.buffer = NULL;
|
||||
kctx->csf.cpu_queue.buffer_size = 0;
|
||||
}
|
||||
else
|
||||
seq_printf(file, "Dump error! (time out)\n");
|
||||
} else
|
||||
seq_puts(file, "Dump error! (time out)\n");
|
||||
|
||||
atomic_set(&kctx->csf.cpu_queue.dump_req_status,
|
||||
BASE_CSF_CPU_QUEUE_DUMP_COMPLETE);
|
||||
|
||||
@@ -172,16 +172,18 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file,
|
||||
cs_active = addr[CS_ACTIVE/4];
|
||||
|
||||
#define KBASEP_CSF_DEBUGFS_CS_HEADER_USER_IO \
|
||||
"Bind Idx, Ringbuf addr, Prio, Insert offset, Extract offset, Active, Doorbell\n"
|
||||
"Bind Idx, Ringbuf addr, Size, Prio, Insert offset, Extract offset, Active, Doorbell\n"
|
||||
|
||||
seq_printf(file, KBASEP_CSF_DEBUGFS_CS_HEADER_USER_IO "%8d, %16llx, %4u, %16llx, %16llx, %6u, %8d\n",
|
||||
queue->csi_index, queue->base_addr, queue->priority,
|
||||
cs_insert, cs_extract, cs_active, queue->doorbell_nr);
|
||||
seq_printf(file, KBASEP_CSF_DEBUGFS_CS_HEADER_USER_IO "%8d, %16llx, %8x, %4u, %16llx, %16llx, %6u, %8d\n",
|
||||
queue->csi_index, queue->base_addr,
|
||||
queue->size,
|
||||
queue->priority, cs_insert, cs_extract, cs_active, queue->doorbell_nr);
|
||||
|
||||
/* Print status information for blocked group waiting for sync object. For on-slot queues,
|
||||
* if cs_trace is enabled, dump the interface's cs_trace configuration.
|
||||
*/
|
||||
if (kbase_csf_scheduler_group_get_slot(queue->group) < 0) {
|
||||
seq_printf(file, "SAVED_CMD_PTR: 0x%llx\n", queue->saved_cmd_ptr);
|
||||
if (CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) {
|
||||
wait_status = queue->status_wait;
|
||||
wait_sync_value = queue->sync_value;
|
||||
@@ -268,17 +270,13 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file,
|
||||
seq_puts(file, "\n");
|
||||
}
|
||||
|
||||
/* Waiting timeout for STATUS_UPDATE acknowledgment, in milliseconds */
|
||||
#define CSF_STATUS_UPDATE_TO_MS (100)
|
||||
|
||||
static void update_active_group_status(struct seq_file *file,
|
||||
struct kbase_queue_group *const group)
|
||||
{
|
||||
struct kbase_device *const kbdev = group->kctx->kbdev;
|
||||
struct kbase_csf_cmd_stream_group_info const *const ginfo =
|
||||
&kbdev->csf.global_iface.groups[group->csg_nr];
|
||||
long remaining =
|
||||
kbase_csf_timeout_in_jiffies(CSF_STATUS_UPDATE_TO_MS);
|
||||
long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
|
||||
unsigned long flags;
|
||||
|
||||
/* Global doorbell ring for CSG STATUS_UPDATE request or User doorbell
|
||||
@@ -327,6 +325,7 @@ static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file,
|
||||
struct kbase_device *const kbdev = group->kctx->kbdev;
|
||||
u32 ep_c, ep_r;
|
||||
char exclusive;
|
||||
char idle = 'N';
|
||||
struct kbase_csf_cmd_stream_group_info const *const ginfo =
|
||||
&kbdev->csf.global_iface.groups[group->csg_nr];
|
||||
u8 slot_priority =
|
||||
@@ -345,8 +344,12 @@ static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file,
|
||||
else
|
||||
exclusive = '0';
|
||||
|
||||
seq_puts(file, "GroupID, CSG NR, CSG Prio, Run State, Priority, C_EP(Alloc/Req), F_EP(Alloc/Req), T_EP(Alloc/Req), Exclusive\n");
|
||||
seq_printf(file, "%7d, %6d, %8d, %9d, %8d, %11d/%3d, %11d/%3d, %11d/%3d, %9c\n",
|
||||
if (kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) &
|
||||
CSG_STATUS_STATE_IDLE_MASK)
|
||||
idle = 'Y';
|
||||
|
||||
seq_puts(file, "GroupID, CSG NR, CSG Prio, Run State, Priority, C_EP(Alloc/Req), F_EP(Alloc/Req), T_EP(Alloc/Req), Exclusive, Idle\n");
|
||||
seq_printf(file, "%7d, %6d, %8d, %9d, %8d, %11d/%3d, %11d/%3d, %11d/%3d, %9c, %4c\n",
|
||||
group->handle,
|
||||
group->csg_nr,
|
||||
slot_priority,
|
||||
@@ -358,7 +361,8 @@ static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file,
|
||||
CSG_STATUS_EP_REQ_FRAGMENT_EP_GET(ep_r),
|
||||
CSG_STATUS_EP_CURRENT_TILER_EP_GET(ep_c),
|
||||
CSG_STATUS_EP_REQ_TILER_EP_GET(ep_r),
|
||||
exclusive);
|
||||
exclusive,
|
||||
idle);
|
||||
|
||||
/* Wait for the User doobell ring to take effect */
|
||||
if (kbdev->csf.scheduler.state != SCHED_SLEEPING)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -252,6 +252,24 @@ enum kbase_queue_group_priority {
|
||||
KBASE_QUEUE_GROUP_PRIORITY_COUNT
|
||||
};
|
||||
|
||||
/**
|
||||
* enum kbase_timeout_selector - The choice of which timeout to get scaled
|
||||
* using the lowest GPU frequency.
|
||||
* @CSF_FIRMWARE_TIMEOUT: Response timeout from CSF firmware.
|
||||
* @CSF_PM_TIMEOUT: Timeout for GPU Power Management to reach the desired
|
||||
* Shader, L2 and MCU state.
|
||||
* @CSF_GPU_RESET_TIMEOUT: Waiting timeout for GPU reset to complete.
|
||||
* @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in
|
||||
* the enum.
|
||||
*/
|
||||
enum kbase_timeout_selector {
|
||||
CSF_FIRMWARE_TIMEOUT,
|
||||
CSF_PM_TIMEOUT,
|
||||
CSF_GPU_RESET_TIMEOUT,
|
||||
|
||||
/* Must be the last in the enum */
|
||||
KBASE_TIMEOUT_SELECTOR_COUNT
|
||||
};
|
||||
|
||||
/**
|
||||
* struct kbase_csf_notification - Event or error generated as part of command
|
||||
@@ -333,6 +351,13 @@ struct kbase_csf_notification {
|
||||
* @cs_fatal_info: Records additional information about the CS fatal event.
|
||||
* @cs_fatal: Records information about the CS fatal event.
|
||||
* @pending: Indicating whether the queue has new submitted work.
|
||||
* @extract_ofs: The current EXTRACT offset, this is updated during certain
|
||||
* events such as GPU idle IRQ in order to help detect a
|
||||
* queue's true idle status.
|
||||
* @saved_cmd_ptr: The command pointer value for the GPU queue, saved when the
|
||||
* group to which queue is bound is suspended.
|
||||
* This can be useful in certain cases to know that till which
|
||||
* point the execution reached in the Linear command buffer.
|
||||
*/
|
||||
struct kbase_queue {
|
||||
struct kbase_context *kctx;
|
||||
@@ -367,6 +392,10 @@ struct kbase_queue {
|
||||
u64 cs_fatal_info;
|
||||
u32 cs_fatal;
|
||||
atomic_t pending;
|
||||
u64 extract_ofs;
|
||||
#if IS_ENABLED(CONFIG_DEBUG_FS)
|
||||
u64 saved_cmd_ptr;
|
||||
#endif
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -851,11 +880,14 @@ struct kbase_csf_csg_slot {
|
||||
* This pointer being set doesn't necessarily indicates
|
||||
* that GPU is in protected mode, kbdev->protected_mode
|
||||
* needs to be checked for that.
|
||||
* @gpu_idle_fw_timer_enabled: Whether the CSF scheduler has activiated the
|
||||
* firmware idle hysteresis timer for preparing a
|
||||
* GPU suspend on idle.
|
||||
* @idle_wq: Workqueue for executing GPU idle notification
|
||||
* handler.
|
||||
* @gpu_idle_work: Work item for facilitating the scheduler to bring
|
||||
* the GPU to a low-power mode on becoming idle.
|
||||
* @gpu_no_longer_idle: Effective only when the GPU idle worker has been
|
||||
* queued for execution, this indicates whether the
|
||||
* GPU has become non-idle since the last time the
|
||||
* idle notification was received.
|
||||
* @non_idle_offslot_grps: Count of off-slot non-idle groups. Reset during
|
||||
* the scheduler active phase in a tick. It then
|
||||
* tracks the count of non-idle groups across all the
|
||||
@@ -876,6 +908,12 @@ struct kbase_csf_csg_slot {
|
||||
* when scheduling tick needs to be advanced from
|
||||
* interrupt context, without actually deactivating
|
||||
* the @tick_timer first and then enqueing @tick_work.
|
||||
* @tick_protm_pending_seq: Scan out sequence number of the group that has
|
||||
* protected mode execution pending for the queue(s)
|
||||
* bound to it and will be considered first for the
|
||||
* protected mode execution compared to other such
|
||||
* groups. It is updated on every tick/tock.
|
||||
* @interrupt_lock is used to serialize the access.
|
||||
*/
|
||||
struct kbase_csf_scheduler {
|
||||
struct mutex lock;
|
||||
@@ -907,13 +945,15 @@ struct kbase_csf_scheduler {
|
||||
struct kbase_queue_group *top_grp;
|
||||
bool tock_pending_request;
|
||||
struct kbase_queue_group *active_protm_grp;
|
||||
bool gpu_idle_fw_timer_enabled;
|
||||
struct workqueue_struct *idle_wq;
|
||||
struct work_struct gpu_idle_work;
|
||||
atomic_t gpu_no_longer_idle;
|
||||
atomic_t non_idle_offslot_grps;
|
||||
u32 non_idle_scanout_grps;
|
||||
u32 pm_active_count;
|
||||
unsigned int csg_scheduling_period_ms;
|
||||
bool tick_timer_active;
|
||||
u32 tick_protm_pending_seq;
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -1050,8 +1090,7 @@ struct kbase_ipa_control_prfcnt_config {
|
||||
*
|
||||
*/
|
||||
struct kbase_ipa_control_prfcnt_block {
|
||||
struct kbase_ipa_control_prfcnt_config
|
||||
select[KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS];
|
||||
struct kbase_ipa_control_prfcnt_config select[KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS];
|
||||
size_t num_available_counters;
|
||||
};
|
||||
|
||||
@@ -1074,8 +1113,7 @@ struct kbase_ipa_control_prfcnt_block {
|
||||
*/
|
||||
struct kbase_ipa_control {
|
||||
struct kbase_ipa_control_prfcnt_block blocks[KBASE_IPA_CORE_TYPE_NUM];
|
||||
struct kbase_ipa_control_session
|
||||
sessions[KBASE_IPA_CONTROL_MAX_SESSIONS];
|
||||
struct kbase_ipa_control_session sessions[KBASE_IPA_CONTROL_MAX_SESSIONS];
|
||||
spinlock_t lock;
|
||||
void *rtm_listener_data;
|
||||
size_t num_active_sessions;
|
||||
@@ -1089,8 +1127,15 @@ struct kbase_ipa_control {
|
||||
* @node: Interface objects are on the kbase_device:csf.firmware_interfaces
|
||||
* list using this list_head to link them
|
||||
* @phys: Array of the physical (tagged) addresses making up this interface
|
||||
* @reuse_pages: Flag used to identify if the FW interface entry reuses
|
||||
* physical pages allocated for another FW interface entry.
|
||||
* @is_small_page: Flag used to identify if small pages are used for
|
||||
* the FW interface entry.
|
||||
* @name: NULL-terminated string naming the interface
|
||||
* @num_pages: Number of entries in @phys and @pma (and length of the interface)
|
||||
* @num_pages_aligned: Same as @num_pages except for the case when @is_small_page
|
||||
* is false and @reuse_pages is false and therefore will be
|
||||
* aligned to NUM_4K_PAGES_IN_2MB_PAGE.
|
||||
* @virtual: Starting GPU virtual address this interface is mapped at
|
||||
* @flags: bitmask of CSF_FIRMWARE_ENTRY_* conveying the interface attributes
|
||||
* @data_start: Offset into firmware image at which the interface data starts
|
||||
@@ -1102,8 +1147,11 @@ struct kbase_ipa_control {
|
||||
struct kbase_csf_firmware_interface {
|
||||
struct list_head node;
|
||||
struct tagged_addr *phys;
|
||||
bool reuse_pages;
|
||||
bool is_small_page;
|
||||
char *name;
|
||||
u32 num_pages;
|
||||
u32 num_pages_aligned;
|
||||
u32 virtual;
|
||||
u32 flags;
|
||||
u32 data_start;
|
||||
@@ -1177,7 +1225,7 @@ struct kbase_csf_hwcnt {
|
||||
* @reg_lock: Lock to serialize the MCU firmware related actions
|
||||
* that affect all contexts such as allocation of
|
||||
* regions from shared interface area, assignment of
|
||||
* of hardware doorbell pages, assignment of CSGs,
|
||||
* hardware doorbell pages, assignment of CSGs,
|
||||
* sending global requests.
|
||||
* @event_wait: Wait queue to wait for receiving csf events, i.e.
|
||||
* the interrupt from CSF firmware, or scheduler state
|
||||
@@ -1200,6 +1248,10 @@ struct kbase_csf_hwcnt {
|
||||
* in GPU reset has completed.
|
||||
* @firmware_reload_needed: Flag for indicating that the firmware needs to be
|
||||
* reloaded as part of the GPU reset action.
|
||||
* @firmware_full_reload_needed: Flag for indicating that the firmware needs to
|
||||
* be fully re-loaded. This may be set when the
|
||||
* boot or re-init of MCU fails after a successful
|
||||
* soft reset.
|
||||
* @firmware_hctl_core_pwr: Flag for indicating that the host diver is in
|
||||
* charge of the shader core's power transitions, and
|
||||
* the mcu_core_pwroff timeout feature is disabled
|
||||
@@ -1259,6 +1311,7 @@ struct kbase_csf_device {
|
||||
bool firmware_inited;
|
||||
bool firmware_reloaded;
|
||||
bool firmware_reload_needed;
|
||||
bool firmware_full_reload_needed;
|
||||
bool firmware_hctl_core_pwr;
|
||||
struct work_struct firmware_reload_work;
|
||||
bool glb_init_request_pending;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -110,9 +110,9 @@ static inline void kbase_csf_event_signal_cpu_only(struct kbase_context *kctx)
|
||||
/**
|
||||
* kbase_csf_event_init - Initialize event object
|
||||
*
|
||||
* This function initializes the event object.
|
||||
*
|
||||
* @kctx: The kbase context whose event object will be initialized.
|
||||
*
|
||||
* This function initializes the event object.
|
||||
*/
|
||||
void kbase_csf_event_init(struct kbase_context *const kctx);
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -24,6 +24,7 @@
|
||||
#include "mali_kbase_csf_trace_buffer.h"
|
||||
#include "mali_kbase_csf_timeout.h"
|
||||
#include "mali_kbase_mem.h"
|
||||
#include "mali_kbase_mem_pool_group.h"
|
||||
#include "mali_kbase_reset_gpu.h"
|
||||
#include "mali_kbase_ctx_sched.h"
|
||||
#include "mali_kbase_csf_scheduler.h"
|
||||
@@ -35,7 +36,7 @@
|
||||
#include "mali_kbase_csf_tl_reader.h"
|
||||
#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
|
||||
#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
|
||||
#include <uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h>
|
||||
#include <csf/mali_kbase_csf_registers.h>
|
||||
|
||||
#include <linux/list.h>
|
||||
#include <linux/slab.h>
|
||||
@@ -50,7 +51,6 @@
|
||||
#include <asm/arch_timer.h>
|
||||
|
||||
#define MALI_MAX_FIRMWARE_NAME_LEN ((size_t)20)
|
||||
#define ACK_TIMEOUT_MILLISECONDS 1000
|
||||
|
||||
static char fw_name[MALI_MAX_FIRMWARE_NAME_LEN] = "mali_csffw.bin";
|
||||
module_param_string(fw_name, fw_name, sizeof(fw_name), 0644);
|
||||
@@ -105,9 +105,9 @@ MODULE_PARM_DESC(fw_debug,
|
||||
|
||||
#define CSF_MAX_FW_STOP_LOOPS (100000)
|
||||
|
||||
#define CSF_GLB_REQ_CFG_MASK \
|
||||
(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \
|
||||
GLB_REQ_CFG_PWROFF_TIMER_MASK)
|
||||
#define CSF_GLB_REQ_CFG_MASK \
|
||||
(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \
|
||||
GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK)
|
||||
|
||||
static inline u32 input_page_read(const u32 *const input, const u32 offset)
|
||||
{
|
||||
@@ -190,11 +190,11 @@ static int setup_shared_iface_static_region(struct kbase_device *kbdev)
|
||||
return -EINVAL;
|
||||
|
||||
reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
|
||||
interface->num_pages, KBASE_REG_ZONE_MCU_SHARED);
|
||||
interface->num_pages_aligned, KBASE_REG_ZONE_MCU_SHARED);
|
||||
if (reg) {
|
||||
mutex_lock(&kbdev->csf.reg_lock);
|
||||
ret = kbase_add_va_region_rbtree(kbdev, reg,
|
||||
interface->virtual, interface->num_pages, 1);
|
||||
interface->virtual, interface->num_pages_aligned, 1);
|
||||
mutex_unlock(&kbdev->csf.reg_lock);
|
||||
if (ret)
|
||||
kfree(reg);
|
||||
@@ -423,7 +423,7 @@ static void load_fw_image_section(struct kbase_device *kbdev, const u8 *data,
|
||||
}
|
||||
}
|
||||
|
||||
static int reload_fw_data_sections(struct kbase_device *kbdev)
|
||||
static int reload_fw_image(struct kbase_device *kbdev)
|
||||
{
|
||||
const u32 magic = FIRMWARE_HEADER_MAGIC;
|
||||
struct kbase_csf_firmware_interface *interface;
|
||||
@@ -451,23 +451,78 @@ static int reload_fw_data_sections(struct kbase_device *kbdev)
|
||||
}
|
||||
|
||||
list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) {
|
||||
/* Skip reload of text & read only data sections */
|
||||
if ((interface->flags & CSF_FIRMWARE_ENTRY_EXECUTE) ||
|
||||
!(interface->flags & CSF_FIRMWARE_ENTRY_WRITE))
|
||||
continue;
|
||||
/* Dont skip re-loading any section if full reload was requested */
|
||||
if (!kbdev->csf.firmware_full_reload_needed) {
|
||||
/* Skip reload of text & read only data sections */
|
||||
if ((interface->flags & CSF_FIRMWARE_ENTRY_EXECUTE) ||
|
||||
!(interface->flags & CSF_FIRMWARE_ENTRY_WRITE))
|
||||
continue;
|
||||
}
|
||||
|
||||
load_fw_image_section(kbdev, firmware->data, interface->phys,
|
||||
interface->num_pages, interface->flags,
|
||||
interface->data_start, interface->data_end);
|
||||
}
|
||||
|
||||
kbase_csf_firmware_reload_trace_buffers_data(kbdev);
|
||||
kbdev->csf.firmware_full_reload_needed = false;
|
||||
|
||||
kbase_csf_firmware_reload_trace_buffers_data(kbdev);
|
||||
out:
|
||||
release_firmware(firmware);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* entry_find_large_page_to_reuse() - Find if the large page of previously parsed
|
||||
* FW interface entry can be reused to store
|
||||
* the contents of new FW interface entry.
|
||||
*
|
||||
* @kbdev: Kbase device structure
|
||||
* @virtual_start: Start of the virtual address range required for an entry allocation
|
||||
* @virtual_end: End of the virtual address range required for an entry allocation
|
||||
* @phys: Pointer to the array of physical (tagged) addresses making up the new
|
||||
* FW interface entry. It is an output parameter which would be made to
|
||||
* point to an already existing array allocated for the previously parsed
|
||||
* FW interface entry using large page(s). If no appropriate entry is
|
||||
* found it is set to NULL.
|
||||
* @pma: Pointer to a protected memory allocation. It is an output parameter
|
||||
* which would be made to the protected memory allocation of a previously
|
||||
* parsed FW interface entry using large page(s) from protected memory.
|
||||
* If no appropriate entry is found it is set to NULL.
|
||||
* @num_pages: Number of pages requested.
|
||||
* @num_pages_aligned: This is an output parameter used to carry the number of 4KB pages
|
||||
* within the 2MB pages aligned allocation.
|
||||
* @is_small_page: This is an output flag used to select between the small and large page
|
||||
* to be used for the FW entry allocation.
|
||||
*
|
||||
* Go through all the already initialized interfaces and find if a previously
|
||||
* allocated large page can be used to store contents of new FW interface entry.
|
||||
*
|
||||
* Return: true if a large page can be reused, false otherwise.
|
||||
*/
|
||||
static inline bool entry_find_large_page_to_reuse(
|
||||
struct kbase_device *kbdev, const u32 virtual_start, const u32 virtual_end,
|
||||
struct tagged_addr **phys, struct protected_memory_allocation ***pma,
|
||||
u32 num_pages, u32 *num_pages_aligned, bool *is_small_page)
|
||||
{
|
||||
struct kbase_csf_firmware_interface *interface = NULL;
|
||||
struct kbase_csf_firmware_interface *target_interface = NULL;
|
||||
u32 virtual_diff_min = U32_MAX;
|
||||
bool reuse_large_page = false;
|
||||
|
||||
CSTD_UNUSED(interface);
|
||||
CSTD_UNUSED(target_interface);
|
||||
CSTD_UNUSED(virtual_diff_min);
|
||||
|
||||
*num_pages_aligned = num_pages;
|
||||
*is_small_page = true;
|
||||
*phys = NULL;
|
||||
*pma = NULL;
|
||||
|
||||
|
||||
return reuse_large_page;
|
||||
}
|
||||
|
||||
/**
|
||||
* parse_memory_setup_entry() - Process an "interface memory setup" section
|
||||
*
|
||||
@@ -493,6 +548,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
|
||||
const u32 data_start = entry[3];
|
||||
const u32 data_end = entry[4];
|
||||
u32 num_pages;
|
||||
u32 num_pages_aligned;
|
||||
char *name;
|
||||
struct tagged_addr *phys = NULL;
|
||||
struct kbase_csf_firmware_interface *interface = NULL;
|
||||
@@ -500,6 +556,8 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
|
||||
unsigned long mem_flags = 0;
|
||||
u32 cache_mode = 0;
|
||||
struct protected_memory_allocation **pma = NULL;
|
||||
bool reuse_pages = false;
|
||||
bool is_small_page = true;
|
||||
|
||||
if (data_end < data_start) {
|
||||
dev_err(kbdev->dev, "Firmware corrupt, data_end < data_start (0x%x<0x%x)\n",
|
||||
@@ -542,23 +600,37 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
|
||||
num_pages = (virtual_end - virtual_start)
|
||||
>> PAGE_SHIFT;
|
||||
|
||||
phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL);
|
||||
reuse_pages = entry_find_large_page_to_reuse(
|
||||
kbdev, virtual_start, virtual_end, &phys, &pma,
|
||||
num_pages, &num_pages_aligned, &is_small_page);
|
||||
if (!reuse_pages)
|
||||
phys = kmalloc_array(num_pages_aligned, sizeof(*phys), GFP_KERNEL);
|
||||
|
||||
if (!phys)
|
||||
return -ENOMEM;
|
||||
|
||||
if (protected_mode) {
|
||||
pma = kbase_csf_protected_memory_alloc(kbdev, phys, num_pages);
|
||||
|
||||
if (pma == NULL) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
if (!reuse_pages) {
|
||||
pma = kbase_csf_protected_memory_alloc(
|
||||
kbdev, phys, num_pages_aligned, is_small_page);
|
||||
}
|
||||
|
||||
if (!pma)
|
||||
ret = -ENOMEM;
|
||||
} else {
|
||||
ret = kbase_mem_pool_alloc_pages(
|
||||
&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
|
||||
num_pages, phys, false);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (!reuse_pages) {
|
||||
ret = kbase_mem_pool_alloc_pages(
|
||||
kbase_mem_pool_group_select(
|
||||
kbdev, KBASE_MEM_GROUP_CSF_FW, is_small_page),
|
||||
num_pages_aligned, phys, false);
|
||||
}
|
||||
}
|
||||
|
||||
if (ret < 0) {
|
||||
dev_err(kbdev->dev,
|
||||
"Failed to allocate %u physical pages for the firmware interface entry at VA 0x%x\n",
|
||||
num_pages_aligned, virtual_start);
|
||||
goto out;
|
||||
}
|
||||
|
||||
allocated_pages = true;
|
||||
@@ -584,7 +656,10 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
|
||||
|
||||
interface->name = name;
|
||||
interface->phys = phys;
|
||||
interface->reuse_pages = reuse_pages;
|
||||
interface->is_small_page = is_small_page;
|
||||
interface->num_pages = num_pages;
|
||||
interface->num_pages_aligned = num_pages_aligned;
|
||||
interface->virtual = virtual_start;
|
||||
interface->kernel_map = NULL;
|
||||
interface->flags = flags;
|
||||
@@ -645,15 +720,17 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
|
||||
|
||||
list_add(&interface->node, &kbdev->csf.firmware_interfaces);
|
||||
|
||||
ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu,
|
||||
virtual_start >> PAGE_SHIFT, phys, num_pages, mem_flags,
|
||||
KBASE_MEM_GROUP_CSF_FW);
|
||||
if (!reuse_pages) {
|
||||
ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu,
|
||||
virtual_start >> PAGE_SHIFT, phys, num_pages_aligned, mem_flags,
|
||||
KBASE_MEM_GROUP_CSF_FW);
|
||||
|
||||
if (ret != 0) {
|
||||
dev_err(kbdev->dev, "Failed to insert firmware pages\n");
|
||||
/* The interface has been added to the list, so cleanup will
|
||||
* be handled by firmware unloading
|
||||
*/
|
||||
if (ret != 0) {
|
||||
dev_err(kbdev->dev, "Failed to insert firmware pages\n");
|
||||
/* The interface has been added to the list, so cleanup will
|
||||
* be handled by firmware unloading
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
dev_dbg(kbdev->dev, "Processed section '%s'", name);
|
||||
@@ -662,16 +739,22 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
|
||||
|
||||
out:
|
||||
if (allocated_pages) {
|
||||
if (protected_mode) {
|
||||
kbase_csf_protected_memory_free(kbdev, pma, num_pages);
|
||||
} else {
|
||||
kbase_mem_pool_free_pages(
|
||||
&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
|
||||
num_pages, phys, false, false);
|
||||
if (!reuse_pages) {
|
||||
if (protected_mode) {
|
||||
kbase_csf_protected_memory_free(
|
||||
kbdev, pma, num_pages_aligned, is_small_page);
|
||||
} else {
|
||||
kbase_mem_pool_free_pages(
|
||||
kbase_mem_pool_group_select(
|
||||
kbdev, KBASE_MEM_GROUP_CSF_FW, is_small_page),
|
||||
num_pages_aligned, phys, false, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
kfree(phys);
|
||||
if (!reuse_pages)
|
||||
kfree(phys);
|
||||
|
||||
kfree(interface);
|
||||
return ret;
|
||||
}
|
||||
@@ -994,11 +1077,10 @@ static int parse_capabilities(struct kbase_device *kbdev)
|
||||
iface->group_stride = shared_info[GLB_GROUP_STRIDE/4];
|
||||
iface->prfcnt_size = shared_info[GLB_PRFCNT_SIZE/4];
|
||||
|
||||
if (iface->version >= kbase_csf_interface_version(1, 1, 0)) {
|
||||
if (iface->version >= kbase_csf_interface_version(1, 1, 0))
|
||||
iface->instr_features = shared_info[GLB_INSTR_FEATURES / 4];
|
||||
} else {
|
||||
else
|
||||
iface->instr_features = 0;
|
||||
}
|
||||
|
||||
if ((GROUP_CONTROL_0 +
|
||||
(unsigned long)iface->group_num * iface->group_stride) >
|
||||
@@ -1378,16 +1460,28 @@ static void set_timeout_global(
|
||||
set_global_request(global_iface, GLB_REQ_CFG_PROGRESS_TIMER_MASK);
|
||||
}
|
||||
|
||||
static void enable_gpu_idle_timer(struct kbase_device *const kbdev)
|
||||
{
|
||||
struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
|
||||
|
||||
kbase_csf_scheduler_spin_lock_assert_held(kbdev);
|
||||
|
||||
kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER,
|
||||
kbdev->csf.gpu_idle_dur_count);
|
||||
kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE,
|
||||
GLB_REQ_IDLE_ENABLE_MASK);
|
||||
dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x",
|
||||
kbdev->csf.gpu_idle_dur_count);
|
||||
}
|
||||
|
||||
static void global_init(struct kbase_device *const kbdev, u64 core_mask)
|
||||
{
|
||||
u32 const ack_irq_mask = GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK |
|
||||
GLB_ACK_IRQ_MASK_PING_MASK |
|
||||
GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK |
|
||||
GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
|
||||
GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK |
|
||||
GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
|
||||
GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK |
|
||||
GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK;
|
||||
u32 const ack_irq_mask =
|
||||
GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK | GLB_ACK_IRQ_MASK_PING_MASK |
|
||||
GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
|
||||
GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
|
||||
GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK |
|
||||
GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK;
|
||||
|
||||
const struct kbase_csf_global_iface *const global_iface =
|
||||
&kbdev->csf.global_iface;
|
||||
@@ -1401,6 +1495,12 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask)
|
||||
|
||||
set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev));
|
||||
|
||||
/* The GPU idle timer is always enabled for simplicity. Checks will be
|
||||
* done before scheduling the GPU idle worker to see if it is
|
||||
* appropriate for the current power policy.
|
||||
*/
|
||||
enable_gpu_idle_timer(kbdev);
|
||||
|
||||
/* Unmask the interrupts */
|
||||
kbase_csf_firmware_global_input(global_iface,
|
||||
GLB_ACK_IRQ_MASK, ack_irq_mask);
|
||||
@@ -1507,7 +1607,7 @@ static void kbase_csf_firmware_reload_worker(struct work_struct *work)
|
||||
KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_RELOADING(kbdev, kbase_backend_get_cycle_cnt(kbdev));
|
||||
|
||||
/* Reload just the data sections from firmware binary image */
|
||||
err = reload_fw_data_sections(kbdev);
|
||||
err = reload_fw_image(kbdev);
|
||||
if (err)
|
||||
return;
|
||||
|
||||
@@ -1598,7 +1698,14 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_m
|
||||
|
||||
u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev)
|
||||
{
|
||||
return kbdev->csf.gpu_idle_hysteresis_ms;
|
||||
unsigned long flags;
|
||||
u32 dur;
|
||||
|
||||
kbase_csf_scheduler_spin_lock(kbdev, &flags);
|
||||
dur = kbdev->csf.gpu_idle_hysteresis_ms;
|
||||
kbase_csf_scheduler_spin_unlock(kbdev, flags);
|
||||
|
||||
return dur;
|
||||
}
|
||||
|
||||
u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur)
|
||||
@@ -1606,11 +1713,53 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
|
||||
unsigned long flags;
|
||||
const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur);
|
||||
|
||||
/* The 'fw_load_lock' is taken to synchronize against the deferred
|
||||
* loading of FW, where the idle timer will be enabled.
|
||||
*/
|
||||
mutex_lock(&kbdev->fw_load_lock);
|
||||
if (unlikely(!kbdev->csf.firmware_inited)) {
|
||||
kbase_csf_scheduler_spin_lock(kbdev, &flags);
|
||||
kbdev->csf.gpu_idle_hysteresis_ms = dur;
|
||||
kbdev->csf.gpu_idle_dur_count = hysteresis_val;
|
||||
kbase_csf_scheduler_spin_unlock(kbdev, flags);
|
||||
mutex_unlock(&kbdev->fw_load_lock);
|
||||
goto end;
|
||||
}
|
||||
mutex_unlock(&kbdev->fw_load_lock);
|
||||
|
||||
kbase_csf_scheduler_pm_active(kbdev);
|
||||
if (kbase_csf_scheduler_wait_mcu_active(kbdev)) {
|
||||
dev_err(kbdev->dev,
|
||||
"Unable to activate the MCU, the idle hysteresis value shall remain unchanged");
|
||||
kbase_csf_scheduler_pm_idle(kbdev);
|
||||
return kbdev->csf.gpu_idle_dur_count;
|
||||
}
|
||||
|
||||
/* The 'reg_lock' is also taken and is held till the update is not
|
||||
* complete, to ensure the update of idle timer value by multiple Users
|
||||
* gets serialized.
|
||||
*/
|
||||
mutex_lock(&kbdev->csf.reg_lock);
|
||||
/* The firmware only reads the new idle timer value when the timer is
|
||||
* disabled.
|
||||
*/
|
||||
kbase_csf_scheduler_spin_lock(kbdev, &flags);
|
||||
kbase_csf_firmware_disable_gpu_idle_timer(kbdev);
|
||||
kbase_csf_scheduler_spin_unlock(kbdev, flags);
|
||||
/* Ensure that the request has taken effect */
|
||||
wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK);
|
||||
|
||||
kbase_csf_scheduler_spin_lock(kbdev, &flags);
|
||||
kbdev->csf.gpu_idle_hysteresis_ms = dur;
|
||||
kbdev->csf.gpu_idle_dur_count = hysteresis_val;
|
||||
kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
|
||||
kbase_csf_scheduler_spin_unlock(kbdev, flags);
|
||||
wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK);
|
||||
mutex_unlock(&kbdev->csf.reg_lock);
|
||||
|
||||
kbase_csf_scheduler_pm_idle(kbdev);
|
||||
|
||||
end:
|
||||
dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x",
|
||||
hysteresis_val);
|
||||
|
||||
@@ -1711,7 +1860,7 @@ static int kbase_device_csf_iterator_trace_init(struct kbase_device *kbdev)
|
||||
long ack_timeout;
|
||||
|
||||
ack_timeout = kbase_csf_timeout_in_jiffies(
|
||||
ACK_TIMEOUT_MILLISECONDS);
|
||||
kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT));
|
||||
|
||||
/* write enable request to global input */
|
||||
kbase_csf_firmware_global_input_mask(
|
||||
@@ -1748,6 +1897,20 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
|
||||
kbdev->csf.fw_timeout_ms =
|
||||
kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT);
|
||||
|
||||
kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
|
||||
#ifdef KBASE_PM_RUNTIME
|
||||
if (kbase_pm_gpu_sleep_allowed(kbdev))
|
||||
kbdev->csf.gpu_idle_hysteresis_ms /=
|
||||
FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
|
||||
#endif
|
||||
WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
|
||||
kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
|
||||
kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
|
||||
|
||||
kbdev->csf.mcu_core_pwroff_dur_us = DEFAULT_GLB_PWROFF_TIMEOUT_US;
|
||||
kbdev->csf.mcu_core_pwroff_dur_count = convert_dur_to_core_pwroff_count(
|
||||
kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_US);
|
||||
|
||||
INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces);
|
||||
INIT_LIST_HEAD(&kbdev->csf.firmware_config);
|
||||
INIT_LIST_HEAD(&kbdev->csf.firmware_timeline_metadata);
|
||||
@@ -1786,20 +1949,6 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
|
||||
return ret;
|
||||
}
|
||||
|
||||
kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
|
||||
#ifdef KBASE_PM_RUNTIME
|
||||
if (kbase_pm_gpu_sleep_allowed(kbdev))
|
||||
kbdev->csf.gpu_idle_hysteresis_ms /=
|
||||
FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
|
||||
#endif
|
||||
WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
|
||||
kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
|
||||
kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
|
||||
|
||||
kbdev->csf.mcu_core_pwroff_dur_us = DEFAULT_GLB_PWROFF_TIMEOUT_US;
|
||||
kbdev->csf.mcu_core_pwroff_dur_count = convert_dur_to_core_pwroff_count(
|
||||
kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_US);
|
||||
|
||||
ret = kbase_mcu_shared_interface_region_tracker_init(kbdev);
|
||||
if (ret != 0) {
|
||||
dev_err(kbdev->dev,
|
||||
@@ -1992,17 +2141,25 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
|
||||
list_del(&interface->node);
|
||||
|
||||
vunmap(interface->kernel_map);
|
||||
if (interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) {
|
||||
kbase_csf_protected_memory_free(kbdev, interface->pma,
|
||||
interface->num_pages);
|
||||
} else {
|
||||
kbase_mem_pool_free_pages(
|
||||
&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
|
||||
interface->num_pages, interface->phys,
|
||||
true, false);
|
||||
|
||||
if (!interface->reuse_pages) {
|
||||
if (interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) {
|
||||
kbase_csf_protected_memory_free(
|
||||
kbdev, interface->pma, interface->num_pages_aligned,
|
||||
interface->is_small_page);
|
||||
} else {
|
||||
kbase_mem_pool_free_pages(
|
||||
kbase_mem_pool_group_select(
|
||||
kbdev, KBASE_MEM_GROUP_CSF_FW,
|
||||
interface->is_small_page),
|
||||
interface->num_pages_aligned,
|
||||
interface->phys,
|
||||
true, false);
|
||||
}
|
||||
|
||||
kfree(interface->phys);
|
||||
}
|
||||
|
||||
kfree(interface->phys);
|
||||
kfree(interface);
|
||||
}
|
||||
|
||||
@@ -2034,29 +2191,19 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
|
||||
void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev)
|
||||
{
|
||||
struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
|
||||
const u32 glb_req =
|
||||
kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
|
||||
const u32 glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
|
||||
|
||||
kbase_csf_scheduler_spin_lock_assert_held(kbdev);
|
||||
|
||||
/* The scheduler is assumed to only call the enable when its internal
|
||||
* state indicates that the idle timer has previously been disabled. So
|
||||
* on entry the expected field values are:
|
||||
* 1. GLOBAL_INPUT_BLOCK.GLB_REQ.IDLE_ENABLE: 0
|
||||
* 2. GLOBAL_OUTPUT_BLOCK.GLB_ACK.IDLE_ENABLE: 0, or, on 1 -> 0
|
||||
*/
|
||||
|
||||
if (glb_req & GLB_REQ_IDLE_ENABLE_MASK)
|
||||
dev_err(kbdev->dev, "Incoherent scheduler state on REQ_IDLE_ENABLE!");
|
||||
|
||||
kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER,
|
||||
kbdev->csf.gpu_idle_dur_count);
|
||||
|
||||
kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ,
|
||||
GLB_REQ_REQ_IDLE_ENABLE, GLB_REQ_IDLE_ENABLE_MASK);
|
||||
|
||||
dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x",
|
||||
kbdev->csf.gpu_idle_dur_count);
|
||||
enable_gpu_idle_timer(kbdev);
|
||||
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
|
||||
}
|
||||
|
||||
@@ -2120,6 +2267,8 @@ void kbase_csf_enter_protected_mode(struct kbase_device *kbdev)
|
||||
{
|
||||
struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
|
||||
|
||||
KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev, kbdev);
|
||||
|
||||
kbase_csf_scheduler_spin_lock_assert_held(kbdev);
|
||||
set_global_request(global_iface, GLB_REQ_PROTM_ENTER_MASK);
|
||||
dev_dbg(kbdev->dev, "Sending request to enter protected mode");
|
||||
@@ -2134,6 +2283,8 @@ void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev)
|
||||
if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
|
||||
kbase_reset_gpu(kbdev);
|
||||
}
|
||||
|
||||
KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev, kbdev);
|
||||
}
|
||||
|
||||
void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -23,7 +23,7 @@
|
||||
#define _KBASE_CSF_FIRMWARE_H_
|
||||
|
||||
#include "device/mali_kbase_device.h"
|
||||
#include <uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h>
|
||||
#include <csf/mali_kbase_csf_registers.h>
|
||||
|
||||
/*
|
||||
* PAGE_KERNEL_RO was only defined on 32bit ARM in 4.19 in:
|
||||
@@ -75,7 +75,7 @@
|
||||
#define MAX_SUPPORTED_CSGS 31
|
||||
/* GROUP_STREAM_NUM: At least 8 CSs per CSG, but no more than 32 */
|
||||
#define MIN_SUPPORTED_STREAMS_PER_GROUP 8
|
||||
/* Maximum CSs per csg. */
|
||||
/* MAX_SUPPORTED_STREAMS_PER_GROUP: Maximum CSs per csg. */
|
||||
#define MAX_SUPPORTED_STREAMS_PER_GROUP 32
|
||||
|
||||
struct kbase_device;
|
||||
@@ -777,7 +777,7 @@ u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32
|
||||
/**
|
||||
* kbase_csf_interface_version - Helper function to build the full firmware
|
||||
* interface version in a format compatible with
|
||||
* with GLB_VERSION register
|
||||
* GLB_VERSION register
|
||||
*
|
||||
* @major: major version of csf interface
|
||||
* @minor: minor version of csf interface
|
||||
|
||||
@@ -67,9 +67,9 @@ struct firmware_config {
|
||||
.mode = VERIFY_OCTAL_PERMISSIONS(_mode), \
|
||||
}
|
||||
|
||||
static FW_CFG_ATTR(min, S_IRUGO);
|
||||
static FW_CFG_ATTR(max, S_IRUGO);
|
||||
static FW_CFG_ATTR(cur, S_IRUGO | S_IWUSR);
|
||||
static FW_CFG_ATTR(min, 0444);
|
||||
static FW_CFG_ATTR(max, 0444);
|
||||
static FW_CFG_ATTR(cur, 0644);
|
||||
|
||||
static void fw_cfg_kobj_release(struct kobject *kobj)
|
||||
{
|
||||
|
||||
@@ -101,7 +101,7 @@ struct dummy_firmware_interface {
|
||||
|
||||
#define CSF_GLB_REQ_CFG_MASK \
|
||||
(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \
|
||||
GLB_REQ_CFG_PWROFF_TIMER_MASK)
|
||||
GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK)
|
||||
|
||||
static inline u32 input_page_read(const u32 *const input, const u32 offset)
|
||||
{
|
||||
@@ -193,9 +193,8 @@ static int invent_cmd_stream_group_info(struct kbase_device *kbdev,
|
||||
ginfo->stream_stride = 0;
|
||||
|
||||
ginfo->streams = kcalloc(ginfo->stream_num, sizeof(*ginfo->streams), GFP_KERNEL);
|
||||
if (ginfo->streams == NULL) {
|
||||
if (ginfo->streams == NULL)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
for (sid = 0; sid < ginfo->stream_num; ++sid) {
|
||||
struct kbase_csf_cmd_stream_info *stream = &ginfo->streams[sid];
|
||||
@@ -241,9 +240,8 @@ static int invent_capabilities(struct kbase_device *kbdev)
|
||||
iface->group_stride = 0;
|
||||
|
||||
iface->groups = kcalloc(iface->group_num, sizeof(*iface->groups), GFP_KERNEL);
|
||||
if (iface->groups == NULL) {
|
||||
if (iface->groups == NULL)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
for (gid = 0; gid < iface->group_num; ++gid) {
|
||||
int err;
|
||||
@@ -619,6 +617,20 @@ static void set_timeout_global(
|
||||
set_global_request(global_iface, GLB_REQ_CFG_PROGRESS_TIMER_MASK);
|
||||
}
|
||||
|
||||
static void enable_gpu_idle_timer(struct kbase_device *const kbdev)
|
||||
{
|
||||
struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
|
||||
|
||||
kbase_csf_scheduler_spin_lock_assert_held(kbdev);
|
||||
|
||||
kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER,
|
||||
kbdev->csf.gpu_idle_dur_count);
|
||||
kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE,
|
||||
GLB_REQ_IDLE_ENABLE_MASK);
|
||||
dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x",
|
||||
kbdev->csf.gpu_idle_dur_count);
|
||||
}
|
||||
|
||||
static void global_init(struct kbase_device *const kbdev, u64 core_mask)
|
||||
{
|
||||
u32 const ack_irq_mask = GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK |
|
||||
@@ -628,7 +640,8 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask)
|
||||
GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
|
||||
GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK |
|
||||
GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK |
|
||||
GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK;
|
||||
GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK |
|
||||
GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK;
|
||||
|
||||
const struct kbase_csf_global_iface *const global_iface =
|
||||
&kbdev->csf.global_iface;
|
||||
@@ -642,6 +655,12 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask)
|
||||
|
||||
set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev));
|
||||
|
||||
/* The GPU idle timer is always enabled for simplicity. Checks will be
|
||||
* done before scheduling the GPU idle worker to see if it is
|
||||
* appropriate for the current power policy.
|
||||
*/
|
||||
enable_gpu_idle_timer(kbdev);
|
||||
|
||||
/* Unmask the interrupts */
|
||||
kbase_csf_firmware_global_input(global_iface,
|
||||
GLB_ACK_IRQ_MASK, ack_irq_mask);
|
||||
@@ -809,7 +828,14 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_m
|
||||
|
||||
u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev)
|
||||
{
|
||||
return kbdev->csf.gpu_idle_hysteresis_ms;
|
||||
unsigned long flags;
|
||||
u32 dur;
|
||||
|
||||
kbase_csf_scheduler_spin_lock(kbdev, &flags);
|
||||
dur = kbdev->csf.gpu_idle_hysteresis_ms;
|
||||
kbase_csf_scheduler_spin_unlock(kbdev, flags);
|
||||
|
||||
return dur;
|
||||
}
|
||||
|
||||
u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur)
|
||||
@@ -817,11 +843,53 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
|
||||
unsigned long flags;
|
||||
const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur);
|
||||
|
||||
/* The 'fw_load_lock' is taken to synchronize against the deferred
|
||||
* loading of FW, where the idle timer will be enabled.
|
||||
*/
|
||||
mutex_lock(&kbdev->fw_load_lock);
|
||||
if (unlikely(!kbdev->csf.firmware_inited)) {
|
||||
kbase_csf_scheduler_spin_lock(kbdev, &flags);
|
||||
kbdev->csf.gpu_idle_hysteresis_ms = dur;
|
||||
kbdev->csf.gpu_idle_dur_count = hysteresis_val;
|
||||
kbase_csf_scheduler_spin_unlock(kbdev, flags);
|
||||
mutex_unlock(&kbdev->fw_load_lock);
|
||||
goto end;
|
||||
}
|
||||
mutex_unlock(&kbdev->fw_load_lock);
|
||||
|
||||
kbase_csf_scheduler_pm_active(kbdev);
|
||||
if (kbase_csf_scheduler_wait_mcu_active(kbdev)) {
|
||||
dev_err(kbdev->dev,
|
||||
"Unable to activate the MCU, the idle hysteresis value shall remain unchanged");
|
||||
kbase_csf_scheduler_pm_idle(kbdev);
|
||||
return kbdev->csf.gpu_idle_dur_count;
|
||||
}
|
||||
|
||||
/* The 'reg_lock' is also taken and is held till the update is not
|
||||
* complete, to ensure the update of idle timer value by multiple Users
|
||||
* gets serialized.
|
||||
*/
|
||||
mutex_lock(&kbdev->csf.reg_lock);
|
||||
/* The firmware only reads the new idle timer value when the timer is
|
||||
* disabled.
|
||||
*/
|
||||
kbase_csf_scheduler_spin_lock(kbdev, &flags);
|
||||
kbase_csf_firmware_disable_gpu_idle_timer(kbdev);
|
||||
kbase_csf_scheduler_spin_unlock(kbdev, flags);
|
||||
/* Ensure that the request has taken effect */
|
||||
wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK);
|
||||
|
||||
kbase_csf_scheduler_spin_lock(kbdev, &flags);
|
||||
kbdev->csf.gpu_idle_hysteresis_ms = dur;
|
||||
kbdev->csf.gpu_idle_dur_count = hysteresis_val;
|
||||
kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
|
||||
kbase_csf_scheduler_spin_unlock(kbdev, flags);
|
||||
wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK);
|
||||
mutex_unlock(&kbdev->csf.reg_lock);
|
||||
|
||||
kbase_csf_scheduler_pm_idle(kbdev);
|
||||
|
||||
end:
|
||||
dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x",
|
||||
hysteresis_val);
|
||||
|
||||
@@ -897,6 +965,16 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
|
||||
kbdev->csf.fw_timeout_ms =
|
||||
kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT);
|
||||
|
||||
kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
|
||||
#ifdef KBASE_PM_RUNTIME
|
||||
if (kbase_pm_gpu_sleep_allowed(kbdev))
|
||||
kbdev->csf.gpu_idle_hysteresis_ms /=
|
||||
FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
|
||||
#endif
|
||||
WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
|
||||
kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
|
||||
kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
|
||||
|
||||
INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces);
|
||||
INIT_LIST_HEAD(&kbdev->csf.firmware_config);
|
||||
INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list);
|
||||
@@ -928,16 +1006,6 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
|
||||
return ret;
|
||||
}
|
||||
|
||||
kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
|
||||
#ifdef KBASE_PM_RUNTIME
|
||||
if (kbase_pm_gpu_sleep_allowed(kbdev))
|
||||
kbdev->csf.gpu_idle_hysteresis_ms /=
|
||||
FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
|
||||
#endif
|
||||
WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
|
||||
kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
|
||||
kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
|
||||
|
||||
ret = kbase_mcu_shared_interface_region_tracker_init(kbdev);
|
||||
if (ret != 0) {
|
||||
dev_err(kbdev->dev,
|
||||
@@ -1035,29 +1103,19 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
|
||||
void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev)
|
||||
{
|
||||
struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
|
||||
u32 glb_req;
|
||||
const u32 glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
|
||||
|
||||
kbase_csf_scheduler_spin_lock_assert_held(kbdev);
|
||||
|
||||
/* The scheduler is assumed to only call the enable when its internal
|
||||
* state indicates that the idle timer has previously been disabled. So
|
||||
* on entry the expected field values are:
|
||||
* 1. GLOBAL_INPUT_BLOCK.GLB_REQ.IDLE_ENABLE: 0
|
||||
* 2. GLOBAL_OUTPUT_BLOCK.GLB_ACK.IDLE_ENABLE: 0, or, on 1 -> 0
|
||||
*/
|
||||
|
||||
glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
|
||||
if (glb_req & GLB_REQ_IDLE_ENABLE_MASK)
|
||||
dev_err(kbdev->dev, "Incoherent scheduler state on REQ_IDLE_ENABLE!");
|
||||
|
||||
kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER,
|
||||
kbdev->csf.gpu_idle_dur_count);
|
||||
|
||||
kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ,
|
||||
GLB_REQ_REQ_IDLE_ENABLE, GLB_REQ_IDLE_ENABLE_MASK);
|
||||
|
||||
dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x",
|
||||
kbdev->csf.gpu_idle_dur_count);
|
||||
enable_gpu_idle_timer(kbdev);
|
||||
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
|
||||
}
|
||||
|
||||
|
||||
@@ -174,17 +174,15 @@ u64 kbase_csf_heap_context_allocator_alloc(
|
||||
* allocate it.
|
||||
*/
|
||||
if (!ctx_alloc->region) {
|
||||
ctx_alloc->region =
|
||||
kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
|
||||
&ctx_alloc->gpu_va, mmu_sync_info);
|
||||
ctx_alloc->region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
|
||||
&ctx_alloc->gpu_va, mmu_sync_info);
|
||||
}
|
||||
|
||||
/* If the pool still isn't allocated then an error occurred. */
|
||||
if (unlikely(!ctx_alloc->region)) {
|
||||
if (unlikely(!ctx_alloc->region))
|
||||
dev_dbg(kctx->kbdev->dev, "Failed to allocate a pool of tiler heap contexts");
|
||||
} else {
|
||||
else
|
||||
heap_gpu_va = sub_alloc(ctx_alloc);
|
||||
}
|
||||
|
||||
mutex_unlock(&ctx_alloc->lock);
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -45,6 +45,10 @@ static int kbase_kcpu_map_import_prepare(
|
||||
{
|
||||
struct kbase_context *const kctx = kcpu_queue->kctx;
|
||||
struct kbase_va_region *reg;
|
||||
struct kbase_mem_phy_alloc *alloc;
|
||||
struct page **pages;
|
||||
struct tagged_addr *pa;
|
||||
long i;
|
||||
int ret = 0;
|
||||
|
||||
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
|
||||
@@ -76,6 +80,13 @@ static int kbase_kcpu_map_import_prepare(
|
||||
ret = kbase_jd_user_buf_pin_pages(kctx, reg);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
alloc = reg->gpu_alloc;
|
||||
pa = kbase_get_gpu_phy_pages(reg);
|
||||
pages = alloc->imported.user_buf.pages;
|
||||
|
||||
for (i = 0; i < alloc->nents; i++)
|
||||
pa[i] = as_tagged(page_to_phys(pages[i]));
|
||||
}
|
||||
|
||||
current_command->type = BASE_KCPU_COMMAND_TYPE_MAP_IMPORT;
|
||||
@@ -172,8 +183,8 @@ static void kbase_jit_add_to_pending_alloc_list(
|
||||
list_for_each_entry(blocked_queue,
|
||||
&kctx->csf.kcpu_queues.jit_blocked_queues,
|
||||
jit_blocked) {
|
||||
struct kbase_kcpu_command const*const jit_alloc_cmd =
|
||||
&blocked_queue->commands[blocked_queue->start_offset];
|
||||
struct kbase_kcpu_command const *const jit_alloc_cmd =
|
||||
&blocked_queue->commands[blocked_queue->start_offset];
|
||||
|
||||
WARN_ON(jit_alloc_cmd->type != BASE_KCPU_COMMAND_TYPE_JIT_ALLOC);
|
||||
if (cmd->enqueue_ts < jit_alloc_cmd->enqueue_ts) {
|
||||
@@ -244,7 +255,7 @@ static int kbase_kcpu_jit_allocate_process(
|
||||
break;
|
||||
|
||||
if (jit_cmd->type == BASE_KCPU_COMMAND_TYPE_JIT_FREE) {
|
||||
u8 const*const free_ids = jit_cmd->info.jit_free.ids;
|
||||
u8 const *const free_ids = jit_cmd->info.jit_free.ids;
|
||||
|
||||
if (free_ids && *free_ids && kctx->jit_alloc[*free_ids]) {
|
||||
/*
|
||||
@@ -456,8 +467,8 @@ static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue,
|
||||
|
||||
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
|
||||
|
||||
KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END(
|
||||
queue->kctx->kbdev, queue);
|
||||
KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END(queue->kctx->kbdev,
|
||||
queue);
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
u64 pages_used = 0;
|
||||
@@ -636,7 +647,7 @@ static int kbase_csf_queue_group_suspend_prepare(
|
||||
struct tagged_addr *page_array;
|
||||
u64 start, end, i;
|
||||
|
||||
if (!(reg->flags & BASE_MEM_SAME_VA) ||
|
||||
if (((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_SAME_VA) ||
|
||||
reg->nr_pages < nr_pages ||
|
||||
kbase_reg_current_backed_size(reg) !=
|
||||
reg->nr_pages) {
|
||||
@@ -734,8 +745,8 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev,
|
||||
cqs_wait->objs[i].addr, &mapping);
|
||||
|
||||
if (!queue->command_started) {
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START(
|
||||
kbdev, queue);
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START(kbdev,
|
||||
queue);
|
||||
queue->command_started = true;
|
||||
KBASE_KTRACE_ADD_CSF_KCPU(kbdev, CQS_WAIT_START,
|
||||
queue, cqs_wait->nr_objs, 0);
|
||||
@@ -764,8 +775,7 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev,
|
||||
error);
|
||||
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END(
|
||||
kbdev, queue,
|
||||
evt[BASEP_EVENT_ERR_INDEX]);
|
||||
kbdev, queue, evt[BASEP_EVENT_ERR_INDEX]);
|
||||
queue->command_started = false;
|
||||
}
|
||||
|
||||
@@ -855,8 +865,7 @@ static void kbase_kcpu_cqs_set_process(struct kbase_device *kbdev,
|
||||
evt = (u32 *)kbase_phy_alloc_mapping_get(
|
||||
queue->kctx, cqs_set->objs[i].addr, &mapping);
|
||||
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET(kbdev, queue,
|
||||
evt ? 0 : 1);
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET(kbdev, queue, evt ? 0 : 1);
|
||||
|
||||
if (!evt) {
|
||||
dev_warn(kbdev->dev,
|
||||
@@ -1490,8 +1499,7 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_INFO(
|
||||
{
|
||||
u8 i;
|
||||
|
||||
KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(
|
||||
kbdev, queue);
|
||||
KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(kbdev, queue);
|
||||
for (i = 0; i < jit_alloc->count; i++) {
|
||||
const u8 id = jit_alloc->info[i].id;
|
||||
const struct kbase_va_region *reg = queue->kctx->jit_alloc[id];
|
||||
@@ -1521,16 +1529,14 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(
|
||||
struct kbase_device *kbdev,
|
||||
const struct kbase_kcpu_command_queue *queue)
|
||||
{
|
||||
KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(
|
||||
kbdev, queue);
|
||||
KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(kbdev, queue);
|
||||
}
|
||||
|
||||
static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END(
|
||||
struct kbase_device *kbdev,
|
||||
const struct kbase_kcpu_command_queue *queue)
|
||||
{
|
||||
KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END(
|
||||
kbdev, queue);
|
||||
KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END(kbdev, queue);
|
||||
}
|
||||
|
||||
static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
|
||||
@@ -1550,8 +1556,8 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
|
||||
switch (cmd->type) {
|
||||
case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT:
|
||||
if (!queue->command_started) {
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START(
|
||||
kbdev, queue);
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START(kbdev,
|
||||
queue);
|
||||
queue->command_started = true;
|
||||
}
|
||||
|
||||
@@ -1584,8 +1590,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
|
||||
}
|
||||
break;
|
||||
case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL:
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START(
|
||||
kbdev, queue);
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START(kbdev, queue);
|
||||
|
||||
status = 0;
|
||||
|
||||
@@ -1603,8 +1608,8 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
|
||||
queue->has_error = true;
|
||||
#endif
|
||||
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END(
|
||||
kbdev, queue, status);
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END(kbdev, queue,
|
||||
status);
|
||||
break;
|
||||
case BASE_KCPU_COMMAND_TYPE_CQS_WAIT:
|
||||
status = kbase_kcpu_cqs_wait_process(kbdev, queue,
|
||||
@@ -1654,15 +1659,14 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
|
||||
/* Clear the queue's error state */
|
||||
queue->has_error = false;
|
||||
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER(
|
||||
kbdev, queue);
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER(kbdev, queue);
|
||||
break;
|
||||
case BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: {
|
||||
struct kbase_ctx_ext_res_meta *meta = NULL;
|
||||
|
||||
if (!drain_queue) {
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START(
|
||||
kbdev, queue);
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START(kbdev,
|
||||
queue);
|
||||
|
||||
kbase_gpu_vm_lock(queue->kctx);
|
||||
meta = kbase_sticky_resource_acquire(
|
||||
@@ -1684,8 +1688,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
|
||||
case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: {
|
||||
bool ret;
|
||||
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START(
|
||||
kbdev, queue);
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START(kbdev, queue);
|
||||
|
||||
kbase_gpu_vm_lock(queue->kctx);
|
||||
ret = kbase_sticky_resource_release(
|
||||
@@ -1698,15 +1701,15 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
|
||||
"failed to release the reference. resource not found");
|
||||
}
|
||||
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END(
|
||||
kbdev, queue, ret ? 0 : 1);
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END(kbdev, queue,
|
||||
ret ? 0 : 1);
|
||||
break;
|
||||
}
|
||||
case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: {
|
||||
bool ret;
|
||||
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START(
|
||||
kbdev, queue);
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START(kbdev,
|
||||
queue);
|
||||
|
||||
kbase_gpu_vm_lock(queue->kctx);
|
||||
ret = kbase_sticky_resource_release_force(
|
||||
@@ -1729,8 +1732,8 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
|
||||
/* We still need to call this function to clean the JIT alloc info up */
|
||||
kbase_kcpu_jit_allocate_finish(queue, cmd);
|
||||
} else {
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START(
|
||||
kbdev, queue);
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START(kbdev,
|
||||
queue);
|
||||
|
||||
status = kbase_kcpu_jit_allocate_process(queue,
|
||||
cmd);
|
||||
@@ -1754,8 +1757,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
|
||||
break;
|
||||
}
|
||||
case BASE_KCPU_COMMAND_TYPE_JIT_FREE:
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START(
|
||||
kbdev, queue);
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START(kbdev, queue);
|
||||
|
||||
status = kbase_kcpu_jit_free_process(queue, cmd);
|
||||
if (status)
|
||||
@@ -1838,12 +1840,12 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
|
||||
|
||||
switch (cmd->type) {
|
||||
case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT:
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT(
|
||||
kbdev, queue, cmd->info.fence.fence);
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT(kbdev, queue,
|
||||
cmd->info.fence.fence);
|
||||
break;
|
||||
case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL:
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL(
|
||||
kbdev, queue, cmd->info.fence.fence);
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL(kbdev, queue,
|
||||
cmd->info.fence.fence);
|
||||
break;
|
||||
case BASE_KCPU_COMMAND_TYPE_CQS_WAIT:
|
||||
{
|
||||
@@ -1865,8 +1867,8 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < cmd->info.cqs_set.nr_objs; i++) {
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET(
|
||||
kbdev, queue, sets[i].addr);
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET(kbdev, queue,
|
||||
sets[i].addr);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -1881,16 +1883,15 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
|
||||
break;
|
||||
}
|
||||
case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER:
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER(kbdev,
|
||||
queue);
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER(kbdev, queue);
|
||||
break;
|
||||
case BASE_KCPU_COMMAND_TYPE_MAP_IMPORT:
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT(
|
||||
kbdev, queue, cmd->info.import.gpu_va);
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT(kbdev, queue,
|
||||
cmd->info.import.gpu_va);
|
||||
break;
|
||||
case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT:
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT(
|
||||
kbdev, queue, cmd->info.import.gpu_va);
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT(kbdev, queue,
|
||||
cmd->info.import.gpu_va);
|
||||
break;
|
||||
case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE:
|
||||
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE(
|
||||
@@ -1900,35 +1901,29 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
|
||||
{
|
||||
u8 i;
|
||||
|
||||
KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC(
|
||||
kbdev, queue);
|
||||
KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC(kbdev, queue);
|
||||
for (i = 0; i < cmd->info.jit_alloc.count; i++) {
|
||||
const struct base_jit_alloc_info *info =
|
||||
&cmd->info.jit_alloc.info[i];
|
||||
|
||||
KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC(
|
||||
kbdev, queue, info->gpu_alloc_addr,
|
||||
info->va_pages, info->commit_pages,
|
||||
info->extension, info->id, info->bin_id,
|
||||
info->max_allocations, info->flags,
|
||||
info->usage_id);
|
||||
kbdev, queue, info->gpu_alloc_addr, info->va_pages,
|
||||
info->commit_pages, info->extension, info->id, info->bin_id,
|
||||
info->max_allocations, info->flags, info->usage_id);
|
||||
}
|
||||
KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC(
|
||||
kbdev, queue);
|
||||
KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC(kbdev, queue);
|
||||
break;
|
||||
}
|
||||
case BASE_KCPU_COMMAND_TYPE_JIT_FREE:
|
||||
{
|
||||
u8 i;
|
||||
|
||||
KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE(
|
||||
kbdev, queue);
|
||||
KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE(kbdev, queue);
|
||||
for (i = 0; i < cmd->info.jit_free.count; i++) {
|
||||
KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE(
|
||||
kbdev, queue, cmd->info.jit_free.ids[i]);
|
||||
}
|
||||
KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE(
|
||||
kbdev, queue);
|
||||
KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE(kbdev, queue);
|
||||
break;
|
||||
}
|
||||
case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND:
|
||||
@@ -1936,6 +1931,9 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
|
||||
kbdev, queue, cmd->info.suspend_buf_copy.sus_buf,
|
||||
cmd->info.suspend_buf_copy.group_handle);
|
||||
break;
|
||||
default:
|
||||
dev_dbg(kbdev->dev, "Unknown command type %u", cmd->type);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2210,8 +2208,8 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
|
||||
/* Fire the tracepoint with the mutex held to enforce correct ordering
|
||||
* with the summary stream.
|
||||
*/
|
||||
KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE(
|
||||
kctx->kbdev, queue, kctx->id, queue->num_pending_cmds);
|
||||
KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE(kctx->kbdev, queue, queue->id, kctx->id,
|
||||
queue->num_pending_cmds);
|
||||
|
||||
KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_NEW, queue,
|
||||
queue->fence_context, 0);
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -206,14 +206,16 @@ struct kbase_kcpu_command_group_suspend_info {
|
||||
* indicates that it has been enqueued earlier.
|
||||
* @info: Structure which holds information about the command
|
||||
* dependent on the command type.
|
||||
* @info.fence: Fence
|
||||
* @info.cqs_wait: CQS wait
|
||||
* @info.cqs_set: CQS set
|
||||
* @info.import: import
|
||||
* @info.jit_alloc: jit allocation
|
||||
* @info.jit_free: jit deallocation
|
||||
* @info.suspend_buf_copy: suspend buffer copy
|
||||
* @info.sample_time: sample time
|
||||
* @info.fence: Fence
|
||||
* @info.cqs_wait: CQS wait
|
||||
* @info.cqs_set: CQS set
|
||||
* @info.cqs_wait_operation: CQS wait operation
|
||||
* @info.cqs_set_operation: CQS set operation
|
||||
* @info.import: import
|
||||
* @info.jit_alloc: JIT allocation
|
||||
* @info.jit_free: JIT deallocation
|
||||
* @info.suspend_buf_copy: suspend buffer copy
|
||||
* @info.sample_time: sample time
|
||||
*/
|
||||
struct kbase_kcpu_command {
|
||||
enum base_kcpu_command_type type;
|
||||
@@ -303,8 +305,6 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
|
||||
/**
|
||||
* kbase_csf_kcpu_queue_delete - Delete KCPU command queue.
|
||||
*
|
||||
* Return: 0 if successful, -EINVAL if the queue ID is invalid.
|
||||
*
|
||||
* @kctx: Pointer to the kbase context from which the KCPU command
|
||||
* queue is to be deleted.
|
||||
* @del: Pointer to the structure which specifies the KCPU command
|
||||
|
||||
@@ -71,29 +71,60 @@ struct protected_memory_allocation **
|
||||
kbase_csf_protected_memory_alloc(
|
||||
struct kbase_device *const kbdev,
|
||||
struct tagged_addr *phys,
|
||||
size_t num_pages)
|
||||
size_t num_pages,
|
||||
bool is_small_page)
|
||||
{
|
||||
size_t i;
|
||||
struct protected_memory_allocator_device *pma_dev =
|
||||
kbdev->csf.pma_dev;
|
||||
struct protected_memory_allocation **pma =
|
||||
kmalloc_array(num_pages, sizeof(*pma), GFP_KERNEL);
|
||||
struct protected_memory_allocation **pma = NULL;
|
||||
unsigned int order = KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER;
|
||||
unsigned int num_pages_order;
|
||||
|
||||
if (is_small_page)
|
||||
order = KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER;
|
||||
|
||||
num_pages_order = (1u << order);
|
||||
|
||||
/* Ensure the requested num_pages is aligned with
|
||||
* the order type passed as argument.
|
||||
*
|
||||
* pma_alloc_page() will then handle the granularity
|
||||
* of the allocation based on order.
|
||||
*/
|
||||
num_pages = div64_u64(num_pages + num_pages_order - 1, num_pages_order);
|
||||
|
||||
pma = kmalloc_array(num_pages, sizeof(*pma), GFP_KERNEL);
|
||||
|
||||
if (WARN_ON(!pma_dev) || WARN_ON(!phys) || !pma)
|
||||
return NULL;
|
||||
|
||||
for (i = 0; i < num_pages; i++) {
|
||||
pma[i] = pma_dev->ops.pma_alloc_page(pma_dev,
|
||||
KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER);
|
||||
phys_addr_t phys_addr;
|
||||
|
||||
pma[i] = pma_dev->ops.pma_alloc_page(pma_dev, order);
|
||||
if (!pma[i])
|
||||
break;
|
||||
|
||||
phys[i] = as_tagged(pma_dev->ops.pma_get_phys_addr(pma_dev,
|
||||
pma[i]));
|
||||
phys_addr = pma_dev->ops.pma_get_phys_addr(pma_dev, pma[i]);
|
||||
|
||||
if (order) {
|
||||
size_t j;
|
||||
|
||||
*phys++ = as_tagged_tag(phys_addr, HUGE_HEAD | HUGE_PAGE);
|
||||
|
||||
for (j = 1; j < num_pages_order; j++) {
|
||||
*phys++ = as_tagged_tag(phys_addr +
|
||||
PAGE_SIZE * j,
|
||||
HUGE_PAGE);
|
||||
}
|
||||
} else {
|
||||
phys[i] = as_tagged(phys_addr);
|
||||
}
|
||||
}
|
||||
|
||||
if (i != num_pages) {
|
||||
kbase_csf_protected_memory_free(kbdev, pma, i);
|
||||
kbase_csf_protected_memory_free(kbdev, pma, i * num_pages_order, is_small_page);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -103,15 +134,28 @@ struct protected_memory_allocation **
|
||||
void kbase_csf_protected_memory_free(
|
||||
struct kbase_device *const kbdev,
|
||||
struct protected_memory_allocation **pma,
|
||||
size_t num_pages)
|
||||
size_t num_pages,
|
||||
bool is_small_page)
|
||||
{
|
||||
size_t i;
|
||||
struct protected_memory_allocator_device *pma_dev =
|
||||
kbdev->csf.pma_dev;
|
||||
unsigned int num_pages_order = (1u << KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER);
|
||||
|
||||
if (is_small_page)
|
||||
num_pages_order = (1u << KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER);
|
||||
|
||||
if (WARN_ON(!pma_dev) || WARN_ON(!pma))
|
||||
return;
|
||||
|
||||
/* Ensure the requested num_pages is aligned with
|
||||
* the order type passed as argument.
|
||||
*
|
||||
* pma_alloc_page() will then handle the granularity
|
||||
* of the allocation based on order.
|
||||
*/
|
||||
num_pages = div64_u64(num_pages + num_pages_order - 1, num_pages_order);
|
||||
|
||||
for (i = 0; i < num_pages; i++)
|
||||
pma_dev->ops.pma_free_page(pma_dev, pma[i]);
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -46,6 +46,7 @@ void kbase_csf_protected_memory_term(struct kbase_device *const kbdev);
|
||||
* @phys: Array of physical addresses to be filled in by the protected
|
||||
* memory allocator.
|
||||
* @num_pages: Number of pages requested to be allocated.
|
||||
* @is_small_page: Flag used to select the order of protected memory page.
|
||||
*
|
||||
* Return: Pointer to an array of protected memory allocations on success,
|
||||
* or NULL on failure.
|
||||
@@ -54,7 +55,8 @@ struct protected_memory_allocation **
|
||||
kbase_csf_protected_memory_alloc(
|
||||
struct kbase_device *const kbdev,
|
||||
struct tagged_addr *phys,
|
||||
size_t num_pages);
|
||||
size_t num_pages,
|
||||
bool is_small_page);
|
||||
|
||||
/**
|
||||
* kbase_csf_protected_memory_free - Free the allocated
|
||||
@@ -63,9 +65,11 @@ struct protected_memory_allocation **
|
||||
* @kbdev: Device pointer.
|
||||
* @pma: Array of pointer to protected memory allocations.
|
||||
* @num_pages: Number of pages to be freed.
|
||||
* @is_small_page: Flag used to select the order of protected memory page.
|
||||
*/
|
||||
void kbase_csf_protected_memory_free(
|
||||
struct kbase_device *const kbdev,
|
||||
struct protected_memory_allocation **pma,
|
||||
size_t num_pages);
|
||||
size_t num_pages,
|
||||
bool is_small_page);
|
||||
#endif
|
||||
|
||||
@@ -24,8 +24,8 @@
|
||||
* expected) to have to add to it.
|
||||
*/
|
||||
|
||||
#ifndef _UAPI_GPU_CSF_REGISTERS_H_
|
||||
#define _UAPI_GPU_CSF_REGISTERS_H_
|
||||
#ifndef _KBASE_CSF_REGISTERS_H_
|
||||
#define _KBASE_CSF_REGISTERS_H_
|
||||
|
||||
/*
|
||||
* Begin register sets
|
||||
@@ -480,7 +480,7 @@
|
||||
/* CS_INSTR_BUFFER_OFFSET_POINTER register */
|
||||
#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT (0)
|
||||
#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK \
|
||||
((u64)0xFFFFFFFFFFFFFFFF) << CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT)
|
||||
(((u64)0xFFFFFFFFFFFFFFFF) << CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT)
|
||||
#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_GET(reg_val) \
|
||||
(((reg_val)&CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK) >> CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT)
|
||||
#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SET(reg_val, value) \
|
||||
@@ -1448,6 +1448,9 @@
|
||||
#define GLB_ACK_IRQ_MASK_IDLE_EVENT_SHIFT (26)
|
||||
#define GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK (0x1 << GLB_ACK_IRQ_MASK_IDLE_EVENT_SHIFT)
|
||||
|
||||
#define GLB_ACK_IRQ_MASK_IDLE_ENABLE_SHIFT GPU_U(10)
|
||||
#define GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK (GPU_U(0x1) << GLB_ACK_IRQ_MASK_IDLE_ENABLE_SHIFT)
|
||||
|
||||
#define GLB_IDLE_TIMER (0x0080)
|
||||
/* GLB_IDLE_TIMER register */
|
||||
#define GLB_IDLE_TIMER_TIMEOUT_SHIFT (0)
|
||||
@@ -1518,4 +1521,4 @@
|
||||
(((value) << GLB_REQ_ITER_TRACE_ENABLE_SHIFT) & \
|
||||
GLB_REQ_ITER_TRACE_ENABLE_MASK))
|
||||
|
||||
#endif /* _UAPI_GPU_CSF_REGISTERS_H_ */
|
||||
#endif /* _KBASE_CSF_REGISTERS_H_ */
|
||||
@@ -29,14 +29,14 @@
|
||||
#include <csf/mali_kbase_csf_trace_buffer.h>
|
||||
#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
|
||||
#include <mali_kbase_reset_gpu.h>
|
||||
#include <linux/string.h>
|
||||
|
||||
/* Waiting timeout for GPU reset to complete */
|
||||
#define GPU_RESET_TIMEOUT_MS (5000) /* 5 seconds */
|
||||
#define DUMP_DWORDS_PER_LINE (4)
|
||||
/* 16 characters needed for a 8 byte value in hex & 1 character for space */
|
||||
#define DUMP_HEX_CHARS_PER_DWORD ((2 * 8) + 1)
|
||||
#define DUMP_HEX_CHARS_PER_LINE \
|
||||
(DUMP_DWORDS_PER_LINE * DUMP_HEX_CHARS_PER_DWORD)
|
||||
enum kbasep_soft_reset_status {
|
||||
RESET_SUCCESS = 0,
|
||||
SOFT_RESET_FAILED,
|
||||
L2_ON_FAILED,
|
||||
MCU_REINIT_FAILED
|
||||
};
|
||||
|
||||
static inline bool
|
||||
kbase_csf_reset_state_is_silent(enum kbase_csf_reset_gpu_state state)
|
||||
@@ -259,8 +259,8 @@ static void kbase_csf_debug_dump_registers(struct kbase_device *kbdev)
|
||||
|
||||
static void kbase_csf_dump_firmware_trace_buffer(struct kbase_device *kbdev)
|
||||
{
|
||||
u8 *buf, *line_str;
|
||||
unsigned int read_size;
|
||||
u8 *buf, *p, *pnewline, *pend, *pendbuf;
|
||||
unsigned int read_size, remaining_size;
|
||||
struct firmware_trace_buffer *tb =
|
||||
kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME);
|
||||
|
||||
@@ -269,41 +269,53 @@ static void kbase_csf_dump_firmware_trace_buffer(struct kbase_device *kbdev)
|
||||
return;
|
||||
}
|
||||
|
||||
buf = kmalloc(PAGE_SIZE + DUMP_HEX_CHARS_PER_LINE + 1, GFP_KERNEL);
|
||||
buf = kmalloc(PAGE_SIZE + 1, GFP_KERNEL);
|
||||
if (buf == NULL) {
|
||||
dev_err(kbdev->dev, "Short of memory, firmware trace dump skipped");
|
||||
return;
|
||||
}
|
||||
line_str = &buf[PAGE_SIZE];
|
||||
|
||||
buf[PAGE_SIZE] = 0;
|
||||
|
||||
p = buf;
|
||||
pendbuf = &buf[PAGE_SIZE];
|
||||
|
||||
dev_err(kbdev->dev, "Firmware trace buffer dump:");
|
||||
while ((read_size = kbase_csf_firmware_trace_buffer_read_data(tb, buf,
|
||||
PAGE_SIZE))) {
|
||||
u64 *ptr = (u64 *)buf;
|
||||
u32 num_dwords;
|
||||
while ((read_size = kbase_csf_firmware_trace_buffer_read_data(tb, p,
|
||||
pendbuf - p))) {
|
||||
pend = p + read_size;
|
||||
p = buf;
|
||||
|
||||
for (num_dwords = read_size / sizeof(u64);
|
||||
num_dwords >= DUMP_DWORDS_PER_LINE;
|
||||
num_dwords -= DUMP_DWORDS_PER_LINE) {
|
||||
dev_err(kbdev->dev, "%016llx %016llx %016llx %016llx",
|
||||
ptr[0], ptr[1], ptr[2], ptr[3]);
|
||||
ptr += DUMP_DWORDS_PER_LINE;
|
||||
while (p < pend && (pnewline = memchr(p, '\n', pend - p))) {
|
||||
/* Null-terminate the string */
|
||||
*pnewline = 0;
|
||||
|
||||
dev_err(kbdev->dev, "FW> %s", p);
|
||||
|
||||
p = pnewline + 1;
|
||||
}
|
||||
|
||||
if (num_dwords) {
|
||||
int pos = 0;
|
||||
remaining_size = pend - p;
|
||||
|
||||
while (num_dwords--) {
|
||||
pos += snprintf(line_str + pos,
|
||||
DUMP_HEX_CHARS_PER_DWORD + 1,
|
||||
"%016llx ", ptr[0]);
|
||||
ptr++;
|
||||
}
|
||||
|
||||
dev_err(kbdev->dev, "%s", line_str);
|
||||
if (!remaining_size) {
|
||||
p = buf;
|
||||
} else if (remaining_size < PAGE_SIZE) {
|
||||
/* Copy unfinished string to the start of the buffer */
|
||||
memmove(buf, p, remaining_size);
|
||||
p = &buf[remaining_size];
|
||||
} else {
|
||||
/* Print abnormal page-long string without newlines */
|
||||
dev_err(kbdev->dev, "FW> %s", buf);
|
||||
p = buf;
|
||||
}
|
||||
}
|
||||
|
||||
if (p != buf) {
|
||||
/* Null-terminate and print last unfinished string */
|
||||
*p = 0;
|
||||
dev_err(kbdev->dev, "FW> %s", buf);
|
||||
}
|
||||
|
||||
kfree(buf);
|
||||
}
|
||||
|
||||
@@ -332,36 +344,12 @@ static void kbase_csf_hwcnt_on_reset_error(struct kbase_device *kbdev)
|
||||
kbase_csf_scheduler_spin_unlock(kbdev, flags);
|
||||
}
|
||||
|
||||
static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev,
|
||||
bool firmware_inited, bool silent)
|
||||
static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_device *kbdev,
|
||||
bool firmware_inited, bool silent)
|
||||
{
|
||||
unsigned long flags;
|
||||
int err;
|
||||
|
||||
WARN_ON(kbdev->irq_reset_flush);
|
||||
/* The reset must now be happening otherwise other threads will not
|
||||
* have been synchronized with to stop their access to the HW
|
||||
*/
|
||||
#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE
|
||||
lockdep_assert_held_write(&kbdev->csf.reset.sem);
|
||||
#elif KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE
|
||||
lockdep_assert_held_exclusive(&kbdev->csf.reset.sem);
|
||||
#else
|
||||
lockdep_assert_held(&kbdev->csf.reset.sem);
|
||||
#endif
|
||||
WARN_ON(!kbase_reset_gpu_is_active(kbdev));
|
||||
|
||||
/* Reset the scheduler state before disabling the interrupts as suspend
|
||||
* of active CSG slots would also be done as a part of reset.
|
||||
*/
|
||||
if (likely(firmware_inited))
|
||||
kbase_csf_scheduler_reset(kbdev);
|
||||
cancel_work_sync(&kbdev->csf.firmware_reload_work);
|
||||
|
||||
dev_dbg(kbdev->dev, "Disable GPU hardware counters.\n");
|
||||
/* This call will block until counters are disabled.
|
||||
*/
|
||||
kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
|
||||
enum kbasep_soft_reset_status ret = RESET_SUCCESS;
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
spin_lock(&kbdev->mmu_mask_change);
|
||||
@@ -380,8 +368,7 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev,
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
dev_dbg(kbdev->dev, "Ensure that any IRQ handlers have finished\n");
|
||||
/* Must be done without any locks IRQ handlers will take.
|
||||
*/
|
||||
/* Must be done without any locks IRQ handlers will take. */
|
||||
kbase_synchronize_irqs(kbdev);
|
||||
|
||||
dev_dbg(kbdev->dev, "Flush out any in-flight work items\n");
|
||||
@@ -421,10 +408,8 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev,
|
||||
|
||||
mutex_unlock(&kbdev->pm.lock);
|
||||
|
||||
if (WARN_ON(err)) {
|
||||
kbase_csf_hwcnt_on_reset_error(kbdev);
|
||||
return err;
|
||||
}
|
||||
if (WARN_ON(err))
|
||||
return SOFT_RESET_FAILED;
|
||||
|
||||
mutex_lock(&kbdev->mmu_hw_mutex);
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
@@ -441,20 +426,78 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev,
|
||||
err = kbase_pm_wait_for_desired_state(kbdev);
|
||||
mutex_unlock(&kbdev->pm.lock);
|
||||
|
||||
if (WARN_ON(err)) {
|
||||
kbase_csf_hwcnt_on_reset_error(kbdev);
|
||||
return err;
|
||||
if (err) {
|
||||
if (!kbase_pm_l2_is_in_desired_state(kbdev))
|
||||
ret = L2_ON_FAILED;
|
||||
else if (!kbase_pm_mcu_is_in_desired_state(kbdev))
|
||||
ret = MCU_REINIT_FAILED;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev, bool firmware_inited, bool silent)
|
||||
{
|
||||
unsigned long flags;
|
||||
enum kbasep_soft_reset_status ret;
|
||||
|
||||
WARN_ON(kbdev->irq_reset_flush);
|
||||
/* The reset must now be happening otherwise other threads will not
|
||||
* have been synchronized with to stop their access to the HW
|
||||
*/
|
||||
#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE
|
||||
lockdep_assert_held_write(&kbdev->csf.reset.sem);
|
||||
#elif KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE
|
||||
lockdep_assert_held_exclusive(&kbdev->csf.reset.sem);
|
||||
#else
|
||||
lockdep_assert_held(&kbdev->csf.reset.sem);
|
||||
#endif
|
||||
WARN_ON(!kbase_reset_gpu_is_active(kbdev));
|
||||
|
||||
/* Reset the scheduler state before disabling the interrupts as suspend
|
||||
* of active CSG slots would also be done as a part of reset.
|
||||
*/
|
||||
if (likely(firmware_inited))
|
||||
kbase_csf_scheduler_reset(kbdev);
|
||||
cancel_work_sync(&kbdev->csf.firmware_reload_work);
|
||||
|
||||
dev_dbg(kbdev->dev, "Disable GPU hardware counters.\n");
|
||||
/* This call will block until counters are disabled. */
|
||||
kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
|
||||
|
||||
ret = kbase_csf_reset_gpu_once(kbdev, firmware_inited, silent);
|
||||
if (ret == SOFT_RESET_FAILED) {
|
||||
dev_err(kbdev->dev, "Soft-reset failed");
|
||||
goto err;
|
||||
} else if (ret == L2_ON_FAILED) {
|
||||
dev_err(kbdev->dev, "L2 power up failed after the soft-reset");
|
||||
goto err;
|
||||
} else if (ret == MCU_REINIT_FAILED) {
|
||||
dev_err(kbdev->dev, "MCU re-init failed trying full firmware reload");
|
||||
/* Since MCU reinit failed despite successful soft reset, we can try
|
||||
* the firmware full reload.
|
||||
*/
|
||||
kbdev->csf.firmware_full_reload_needed = true;
|
||||
ret = kbase_csf_reset_gpu_once(kbdev, firmware_inited, true);
|
||||
if (ret != RESET_SUCCESS) {
|
||||
dev_err(kbdev->dev,
|
||||
"MCU Re-init failed even after trying full firmware reload, ret = [%d]",
|
||||
ret);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
/* Re-enable GPU hardware counters */
|
||||
kbase_csf_scheduler_spin_lock(kbdev, &flags);
|
||||
kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
|
||||
kbase_csf_scheduler_spin_unlock(kbdev, flags);
|
||||
|
||||
if (!silent)
|
||||
dev_err(kbdev->dev, "Reset complete");
|
||||
|
||||
return 0;
|
||||
err:
|
||||
|
||||
kbase_csf_hwcnt_on_reset_error(kbdev);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void kbase_csf_reset_gpu_worker(struct work_struct *data)
|
||||
@@ -593,7 +636,7 @@ bool kbase_reset_gpu_is_active(struct kbase_device *kbdev)
|
||||
int kbase_reset_gpu_wait(struct kbase_device *kbdev)
|
||||
{
|
||||
const long wait_timeout =
|
||||
kbase_csf_timeout_in_jiffies(GPU_RESET_TIMEOUT_MS);
|
||||
kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_GPU_RESET_TIMEOUT));
|
||||
long remaining;
|
||||
|
||||
/* Inform lockdep we might be trying to wait on a reset (as
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -28,7 +28,7 @@
|
||||
#include <tl/mali_kbase_tracepoints.h>
|
||||
#include <backend/gpu/mali_kbase_pm_internal.h>
|
||||
#include <linux/export.h>
|
||||
#include <uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h>
|
||||
#include <csf/mali_kbase_csf_registers.h>
|
||||
#include <uapi/gpu/arm/bifrost/mali_base_kernel.h>
|
||||
#include <mali_kbase_hwaccess_time.h>
|
||||
|
||||
@@ -246,7 +246,7 @@ static enum hrtimer_restart tick_timer_callback(struct hrtimer *timer)
|
||||
*
|
||||
* This function will start the scheduling tick hrtimer and is supposed to
|
||||
* be called only from the tick work item function. The tick hrtimer should
|
||||
* should not be active already.
|
||||
* not be active already.
|
||||
*/
|
||||
static void start_tick_timer(struct kbase_device *kbdev)
|
||||
{
|
||||
@@ -372,7 +372,7 @@ static void assign_user_doorbell_to_queue(struct kbase_device *kbdev,
|
||||
mutex_lock(&kbdev->csf.reg_lock);
|
||||
|
||||
/* If bind operation for the queue hasn't completed yet, then the
|
||||
* the CSI can't be programmed for the queue
|
||||
* CSI can't be programmed for the queue
|
||||
* (even in stopped state) and so the doorbell also can't be assigned
|
||||
* to it.
|
||||
*/
|
||||
@@ -406,6 +406,85 @@ static void scheduler_doorbell_init(struct kbase_device *kbdev)
|
||||
WARN_ON(doorbell_nr != CSF_KERNEL_DOORBELL_NR);
|
||||
}
|
||||
|
||||
/**
|
||||
* update_on_slot_queues_offsets - Update active queues' INSERT & EXTRACT ofs
|
||||
*
|
||||
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
|
||||
*
|
||||
* This function updates the EXTRACT offset for all queues which groups have
|
||||
* been assigned a physical slot. These values could be used to detect a
|
||||
* queue's true idleness status. This is intended to be an additional check
|
||||
* on top of the GPU idle notification to account for race conditions.
|
||||
* This function is supposed to be called only when GPU idle notification
|
||||
* interrupt is received.
|
||||
*/
|
||||
static void update_on_slot_queues_offsets(struct kbase_device *kbdev)
|
||||
{
|
||||
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
|
||||
/* All CSGs have the same number of CSs */
|
||||
size_t const max_streams = kbdev->csf.global_iface.groups[0].stream_num;
|
||||
size_t i;
|
||||
|
||||
lockdep_assert_held(&scheduler->interrupt_lock);
|
||||
|
||||
/* csg_slots_idle_mask is not used here for the looping, as it could get
|
||||
* updated concurrently when Scheduler re-evaluates the idle status of
|
||||
* the CSGs for which idle notification was received previously.
|
||||
*/
|
||||
for_each_set_bit(i, scheduler->csg_inuse_bitmap, kbdev->csf.global_iface.group_num) {
|
||||
struct kbase_queue_group *const group = scheduler->csg_slots[i].resident_group;
|
||||
size_t j;
|
||||
|
||||
if (WARN_ON(!group))
|
||||
continue;
|
||||
|
||||
for (j = 0; j < max_streams; ++j) {
|
||||
struct kbase_queue *const queue = group->bound_queues[j];
|
||||
|
||||
if (queue) {
|
||||
u64 const *const output_addr =
|
||||
(u64 const *)(queue->user_io_addr + PAGE_SIZE);
|
||||
|
||||
queue->extract_ofs = output_addr[CS_EXTRACT_LO / sizeof(u64)];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void enqueue_gpu_idle_work(struct kbase_csf_scheduler *const scheduler)
|
||||
{
|
||||
atomic_set(&scheduler->gpu_no_longer_idle, false);
|
||||
queue_work(scheduler->idle_wq, &scheduler->gpu_idle_work);
|
||||
}
|
||||
|
||||
void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev)
|
||||
{
|
||||
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
|
||||
int non_idle_offslot_grps;
|
||||
bool can_suspend_on_idle;
|
||||
|
||||
lockdep_assert_held(&scheduler->interrupt_lock);
|
||||
|
||||
non_idle_offslot_grps = atomic_read(&scheduler->non_idle_offslot_grps);
|
||||
can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev);
|
||||
KBASE_KTRACE_ADD(kbdev, SCHEDULER_CAN_IDLE, NULL,
|
||||
((u64)(u32)non_idle_offslot_grps) | (((u64)can_suspend_on_idle) << 32));
|
||||
|
||||
if (!non_idle_offslot_grps) {
|
||||
if (can_suspend_on_idle) {
|
||||
/* The GPU idle worker relies on update_on_slot_queues_offsets() to have
|
||||
* finished. It's queued before to reduce the time it takes till execution
|
||||
* but it'll eventually be blocked by the scheduler->interrupt_lock.
|
||||
*/
|
||||
enqueue_gpu_idle_work(scheduler);
|
||||
update_on_slot_queues_offsets(kbdev);
|
||||
}
|
||||
} else {
|
||||
/* Advance the scheduling tick to get the non-idle suspended groups loaded soon */
|
||||
kbase_csf_scheduler_advance_tick_nolock(kbdev);
|
||||
}
|
||||
}
|
||||
|
||||
u32 kbase_csf_scheduler_get_nr_active_csgs_locked(struct kbase_device *kbdev)
|
||||
{
|
||||
u32 nr_active_csgs;
|
||||
@@ -551,54 +630,6 @@ static bool scheduler_timer_is_enabled_nolock(struct kbase_device *kbdev)
|
||||
return kbdev->csf.scheduler.timer_enabled;
|
||||
}
|
||||
|
||||
static void enable_gpu_idle_fw_timer(struct kbase_device *kbdev)
|
||||
{
|
||||
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
|
||||
unsigned long flags;
|
||||
|
||||
lockdep_assert_held(&scheduler->lock);
|
||||
|
||||
if (scheduler->gpu_idle_fw_timer_enabled)
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
|
||||
|
||||
/* Update the timer_enabled flag requires holding interrupt_lock */
|
||||
scheduler->gpu_idle_fw_timer_enabled = true;
|
||||
kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
|
||||
|
||||
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
|
||||
}
|
||||
|
||||
static void disable_gpu_idle_fw_timer_locked(struct kbase_device *kbdev)
|
||||
{
|
||||
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
|
||||
|
||||
lockdep_assert_held(&scheduler->lock);
|
||||
lockdep_assert_held(&scheduler->interrupt_lock);
|
||||
|
||||
/* Update of the timer_enabled flag requires holding interrupt_lock */
|
||||
if (scheduler->gpu_idle_fw_timer_enabled) {
|
||||
scheduler->gpu_idle_fw_timer_enabled = false;
|
||||
kbase_csf_firmware_disable_gpu_idle_timer(kbdev);
|
||||
}
|
||||
}
|
||||
|
||||
static void disable_gpu_idle_fw_timer(struct kbase_device *kbdev)
|
||||
{
|
||||
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
|
||||
unsigned long flags;
|
||||
|
||||
lockdep_assert_held(&scheduler->lock);
|
||||
|
||||
if (!scheduler->gpu_idle_fw_timer_enabled)
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
|
||||
disable_gpu_idle_fw_timer_locked(kbdev);
|
||||
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* scheduler_pm_active_handle_suspend() - Acquire the PM reference count for
|
||||
* Scheduler
|
||||
@@ -631,12 +662,15 @@ static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev,
|
||||
if (!prev_count) {
|
||||
ret = kbase_pm_context_active_handle_suspend(kbdev,
|
||||
suspend_handler);
|
||||
if (ret) {
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
/* Invoke the PM state machines again as the change in MCU
|
||||
* desired status, due to the update of scheduler.pm_active_count,
|
||||
* may be missed by the thread that called pm_wait_for_desired_state()
|
||||
*/
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
if (ret)
|
||||
kbdev->csf.scheduler.pm_active_count--;
|
||||
kbase_pm_update_state(kbdev);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
}
|
||||
kbase_pm_update_state(kbdev);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
}
|
||||
|
||||
return ret;
|
||||
@@ -716,8 +750,16 @@ static void scheduler_pm_idle(struct kbase_device *kbdev)
|
||||
kbdev->csf.scheduler.pm_active_count--;
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
if (prev_count == 1)
|
||||
if (prev_count == 1) {
|
||||
kbase_pm_context_idle(kbdev);
|
||||
/* Invoke the PM state machines again as the change in MCU
|
||||
* desired status, due to the update of scheduler.pm_active_count,
|
||||
* may be missed by the thread that called pm_wait_for_desired_state()
|
||||
*/
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
kbase_pm_update_state(kbdev);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef KBASE_PM_RUNTIME
|
||||
@@ -746,8 +788,16 @@ static void scheduler_pm_idle_before_sleep(struct kbase_device *kbdev)
|
||||
kbdev->pm.backend.exit_gpu_sleep_mode = false;
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
if (prev_count == 1)
|
||||
if (prev_count == 1) {
|
||||
kbase_pm_context_idle(kbdev);
|
||||
/* Invoke the PM state machines again as the change in MCU
|
||||
* desired status, due to the update of scheduler.pm_active_count,
|
||||
* may be missed by the thread that called pm_wait_for_desired_state()
|
||||
*/
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
kbase_pm_update_state(kbdev);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1735,6 +1785,13 @@ bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo,
|
||||
u32 status = kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT);
|
||||
bool is_waiting = false;
|
||||
|
||||
#if IS_ENABLED(CONFIG_DEBUG_FS)
|
||||
u64 cmd_ptr = kbase_csf_firmware_cs_output(stream, CS_STATUS_CMD_PTR_LO);
|
||||
|
||||
cmd_ptr |= (u64)kbase_csf_firmware_cs_output(stream, CS_STATUS_CMD_PTR_HI) << 32;
|
||||
queue->saved_cmd_ptr = cmd_ptr;
|
||||
#endif
|
||||
|
||||
KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_STATUS_WAIT,
|
||||
queue->group, queue, status);
|
||||
|
||||
@@ -1948,7 +2005,7 @@ void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler,
|
||||
cancel_tick_timer(kctx->kbdev);
|
||||
WARN_ON(atomic_read(&scheduler->non_idle_offslot_grps));
|
||||
if (scheduler->state != SCHED_SUSPENDED)
|
||||
queue_work(system_wq, &scheduler->gpu_idle_work);
|
||||
enqueue_gpu_idle_work(scheduler);
|
||||
}
|
||||
KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp,
|
||||
scheduler->num_active_address_spaces |
|
||||
@@ -2078,7 +2135,7 @@ static void update_offslot_non_idle_cnt_on_grp_suspend(
|
||||
}
|
||||
}
|
||||
|
||||
static bool confirm_cmd_buf_empty(struct kbase_queue *queue)
|
||||
static bool confirm_cmd_buf_empty(struct kbase_queue const *queue)
|
||||
{
|
||||
bool cs_empty;
|
||||
bool cs_idle;
|
||||
@@ -2090,8 +2147,8 @@ static bool confirm_cmd_buf_empty(struct kbase_queue *queue)
|
||||
|
||||
u32 glb_version = iface->version;
|
||||
|
||||
u64 *input_addr = (u64 *)queue->user_io_addr;
|
||||
u64 *output_addr = (u64 *)(queue->user_io_addr + PAGE_SIZE);
|
||||
u64 const *input_addr = (u64 const *)queue->user_io_addr;
|
||||
u64 const *output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE);
|
||||
|
||||
if (glb_version >= kbase_csf_interface_version(1, 0, 0)) {
|
||||
/* CS_STATUS_SCOREBOARD supported from CSF 1.0 */
|
||||
@@ -2605,7 +2662,7 @@ void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group)
|
||||
if (kbase_csf_scheduler_wait_mcu_active(kbdev))
|
||||
dev_warn(
|
||||
kbdev->dev,
|
||||
"[%llu] Wait for MCU active failed when when terminating group %d of context %d_%d on slot %d",
|
||||
"[%llu] Wait for MCU active failed when terminating group %d of context %d_%d on slot %d",
|
||||
kbase_backend_get_cycle_cnt(kbdev),
|
||||
group->handle, group->kctx->tgid,
|
||||
group->kctx->id, group->csg_nr);
|
||||
@@ -2704,6 +2761,7 @@ static int scheduler_group_schedule(struct kbase_queue_group *group)
|
||||
}
|
||||
} else if (!queue_group_scheduled_locked(group)) {
|
||||
int new_val;
|
||||
|
||||
insert_group_to_runnable(&kbdev->csf.scheduler, group,
|
||||
KBASE_CSF_GROUP_RUNNABLE);
|
||||
/* A new group into the scheduler */
|
||||
@@ -3033,9 +3091,9 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev)
|
||||
struct kbase_queue_group *group =
|
||||
scheduler->csg_slots[i].resident_group;
|
||||
|
||||
if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i))) {
|
||||
if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i)))
|
||||
continue;
|
||||
}
|
||||
|
||||
/* The on slot csg is now stopped */
|
||||
clear_bit(i, slot_mask);
|
||||
|
||||
@@ -3533,13 +3591,13 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
|
||||
* GPUCORE-21394.
|
||||
*/
|
||||
|
||||
/* Disable the idle timer */
|
||||
disable_gpu_idle_fw_timer_locked(kbdev);
|
||||
|
||||
/* Switch to protected mode */
|
||||
scheduler->active_protm_grp = input_grp;
|
||||
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_ENTER_PROTM,
|
||||
input_grp, 0u);
|
||||
/* Reset the tick's pending protm seq number */
|
||||
scheduler->tick_protm_pending_seq =
|
||||
KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID;
|
||||
|
||||
kbase_csf_enter_protected_mode(kbdev);
|
||||
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
|
||||
@@ -3637,6 +3695,7 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev,
|
||||
struct kbase_queue_group *group;
|
||||
|
||||
lockdep_assert_held(&scheduler->lock);
|
||||
lockdep_assert_held(&scheduler->interrupt_lock);
|
||||
if (WARN_ON(priority < 0) ||
|
||||
WARN_ON(priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT))
|
||||
return;
|
||||
@@ -3656,6 +3715,14 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev,
|
||||
/* Set the scanout sequence number, starting from 0 */
|
||||
group->scan_seq_num = scheduler->csg_scan_count_for_tick++;
|
||||
|
||||
if (scheduler->tick_protm_pending_seq ==
|
||||
KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID) {
|
||||
if (!bitmap_empty(group->protm_pending_bitmap,
|
||||
kbdev->csf.global_iface.groups[0].stream_num))
|
||||
scheduler->tick_protm_pending_seq =
|
||||
group->scan_seq_num;
|
||||
}
|
||||
|
||||
if (queue_group_idle_locked(group)) {
|
||||
if (on_slot_group_idle_locked(group))
|
||||
list_add_tail(&group->link_to_schedule,
|
||||
@@ -3738,6 +3805,7 @@ static void scheduler_rotate_groups(struct kbase_device *kbdev)
|
||||
WARN_ON(top_grp->kctx != top_ctx);
|
||||
if (!WARN_ON(list_empty(list))) {
|
||||
struct kbase_queue_group *new_head_grp;
|
||||
|
||||
list_move_tail(&top_grp->link, list);
|
||||
new_head_grp = (!list_empty(list)) ?
|
||||
list_first_entry(list, struct kbase_queue_group, link) :
|
||||
@@ -3774,6 +3842,7 @@ static void scheduler_rotate_ctxs(struct kbase_device *kbdev)
|
||||
|
||||
if (!WARN_ON(!found)) {
|
||||
struct kbase_context *new_head_kctx;
|
||||
|
||||
list_move_tail(&pos->csf.link, list);
|
||||
KBASE_KTRACE_ADD(kbdev, SCHEDULER_ROTATE_RUNNABLE, pos,
|
||||
0u);
|
||||
@@ -4042,6 +4111,59 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* all_on_slot_groups_remained_idle - Live check for all groups' idleness
|
||||
*
|
||||
* @kbdev: Pointer to the device.
|
||||
*
|
||||
* Returns false if any of the queues inside any of the groups that have been
|
||||
* assigned a physical CSG slot have work to execute, or have executed work
|
||||
* since having received a GPU idle notification. This function is used to
|
||||
* handle a rance condition between firmware reporting GPU idle and userspace
|
||||
* submitting more work by directly ringing a doorbell.
|
||||
*
|
||||
* Return: false if any queue inside any resident group has work to be processed
|
||||
* or has processed work since GPU idle event, true otherwise.
|
||||
*/
|
||||
static bool all_on_slot_groups_remained_idle(struct kbase_device *kbdev)
|
||||
{
|
||||
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
|
||||
/* All CSGs have the same number of CSs */
|
||||
size_t const max_streams = kbdev->csf.global_iface.groups[0].stream_num;
|
||||
size_t i;
|
||||
|
||||
lockdep_assert_held(&scheduler->lock);
|
||||
lockdep_assert_held(&scheduler->interrupt_lock);
|
||||
|
||||
for_each_set_bit(i, scheduler->csg_slots_idle_mask,
|
||||
kbdev->csf.global_iface.group_num) {
|
||||
struct kbase_queue_group *const group =
|
||||
scheduler->csg_slots[i].resident_group;
|
||||
size_t j;
|
||||
|
||||
for (j = 0; j < max_streams; ++j) {
|
||||
struct kbase_queue const *const queue =
|
||||
group->bound_queues[j];
|
||||
u64 const *output_addr;
|
||||
u64 cur_extract_ofs;
|
||||
|
||||
if (!queue)
|
||||
continue;
|
||||
|
||||
output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE);
|
||||
cur_extract_ofs = output_addr[CS_EXTRACT_LO / sizeof(u64)];
|
||||
if (cur_extract_ofs != queue->extract_ofs) {
|
||||
/* More work has been executed since the idle
|
||||
* notification.
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool scheduler_idle_suspendable(struct kbase_device *kbdev)
|
||||
{
|
||||
bool suspend;
|
||||
@@ -4055,18 +4177,28 @@ static bool scheduler_idle_suspendable(struct kbase_device *kbdev)
|
||||
return false;
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
spin_lock(&scheduler->interrupt_lock);
|
||||
if (scheduler->total_runnable_grps) {
|
||||
spin_lock(&scheduler->interrupt_lock);
|
||||
|
||||
/* Check both on-slots and off-slots groups idle status */
|
||||
suspend = kbase_csf_scheduler_all_csgs_idle(kbdev) &&
|
||||
!atomic_read(&scheduler->non_idle_offslot_grps) &&
|
||||
kbase_pm_idle_groups_sched_suspendable(kbdev);
|
||||
|
||||
spin_unlock(&scheduler->interrupt_lock);
|
||||
} else
|
||||
suspend = kbase_pm_no_runnables_sched_suspendable(kbdev);
|
||||
|
||||
/* Confirm that all groups are actually idle before proceeding with
|
||||
* suspension as groups might potentially become active again without
|
||||
* informing the scheduler in case userspace rings a doorbell directly.
|
||||
*/
|
||||
if (suspend && (unlikely(atomic_read(&scheduler->gpu_no_longer_idle)) ||
|
||||
unlikely(!all_on_slot_groups_remained_idle(kbdev)))) {
|
||||
dev_info(kbdev->dev,
|
||||
"GPU suspension skipped due to active CSGs");
|
||||
suspend = false;
|
||||
}
|
||||
|
||||
spin_unlock(&scheduler->interrupt_lock);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
return suspend;
|
||||
@@ -4150,8 +4282,6 @@ static void gpu_idle_worker(struct work_struct *work)
|
||||
}
|
||||
mutex_lock(&scheduler->lock);
|
||||
|
||||
/* Cycle completed, disable the firmware idle timer */
|
||||
disable_gpu_idle_fw_timer(kbdev);
|
||||
scheduler_is_idle_suspendable = scheduler_idle_suspendable(kbdev);
|
||||
if (scheduler_is_idle_suspendable) {
|
||||
KBASE_KTRACE_ADD(kbdev, GPU_IDLE_HANDLING_START, NULL,
|
||||
@@ -4177,6 +4307,7 @@ static void gpu_idle_worker(struct work_struct *work)
|
||||
static int scheduler_prepare(struct kbase_device *kbdev)
|
||||
{
|
||||
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
|
||||
unsigned long flags;
|
||||
int i;
|
||||
|
||||
lockdep_assert_held(&scheduler->lock);
|
||||
@@ -4202,6 +4333,9 @@ static int scheduler_prepare(struct kbase_device *kbdev)
|
||||
scheduler->num_csg_slots_for_tick = 0;
|
||||
bitmap_zero(scheduler->csg_slots_prio_update, MAX_SUPPORTED_CSGS);
|
||||
|
||||
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
|
||||
scheduler->tick_protm_pending_seq =
|
||||
KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID;
|
||||
/* Scan out to run groups */
|
||||
for (i = 0; i < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++i) {
|
||||
struct kbase_context *kctx;
|
||||
@@ -4209,6 +4343,7 @@ static int scheduler_prepare(struct kbase_device *kbdev)
|
||||
list_for_each_entry(kctx, &scheduler->runnable_kctxs, csf.link)
|
||||
scheduler_ctx_scan_groups(kbdev, kctx, i);
|
||||
}
|
||||
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
|
||||
|
||||
/* Update this tick's non-idle groups */
|
||||
scheduler->non_idle_scanout_grps = scheduler->ngrp_to_schedule;
|
||||
@@ -4237,42 +4372,17 @@ static int scheduler_prepare(struct kbase_device *kbdev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void scheduler_handle_idle_timer_onoff(struct kbase_device *kbdev)
|
||||
{
|
||||
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
|
||||
|
||||
lockdep_assert_held(&scheduler->lock);
|
||||
|
||||
/* After the scheduler apply operation, the internal variable
|
||||
* scheduler->non_idle_offslot_grps reflects the end-point view
|
||||
* of the count at the end of the active phase.
|
||||
*
|
||||
* Any changes that follow (after the scheduler has dropped the
|
||||
* scheduler->lock), reflects async operations to the scheduler,
|
||||
* such as a group gets killed (evicted) or a new group inserted,
|
||||
* cqs wait-sync triggered state transtion etc.
|
||||
*
|
||||
* The condition for enable the idle timer is that there is no
|
||||
* non-idle groups off-slots. If there is non-idle group off-slot,
|
||||
* the timer should be disabled.
|
||||
*/
|
||||
if (atomic_read(&scheduler->non_idle_offslot_grps))
|
||||
disable_gpu_idle_fw_timer(kbdev);
|
||||
else
|
||||
enable_gpu_idle_fw_timer(kbdev);
|
||||
}
|
||||
|
||||
/**
|
||||
* keep_lru_on_slots() - Check the condition for LRU is met.
|
||||
*
|
||||
* @kbdev: Pointer to the device.
|
||||
*
|
||||
* This function tries to maintain the Last-Recent-Use case on slots, when
|
||||
* the scheduler has no non-idle off-slot CSGs for a replacement
|
||||
* consideration. This effectively extends the previous scheduling results
|
||||
* for the new one. That is, the last recent used CSGs are retained on slots
|
||||
* for the new tick/tock action.
|
||||
*
|
||||
* @kbdev: Pointer to the device.
|
||||
*
|
||||
* Return: true for avoiding on-slot CSGs changes (i.e. keep existing LRU),
|
||||
* otherwise false.
|
||||
*/
|
||||
@@ -4294,10 +4404,6 @@ static bool keep_lru_on_slots(struct kbase_device *kbdev)
|
||||
*/
|
||||
keep_lru = kbase_csf_scheduler_all_csgs_idle(kbdev);
|
||||
|
||||
if (keep_lru && !scheduler->gpu_idle_fw_timer_enabled) {
|
||||
scheduler->gpu_idle_fw_timer_enabled = true;
|
||||
kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
|
||||
}
|
||||
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
|
||||
|
||||
dev_dbg(kbdev->dev, "Keep_LRU: %d, CSGs on-slots: %d\n",
|
||||
@@ -4311,6 +4417,8 @@ static bool keep_lru_on_slots(struct kbase_device *kbdev)
|
||||
* prepare_fast_local_tock() - making preparation arrangement for exercizing
|
||||
* a fast local tock inside scheduling-actions.
|
||||
*
|
||||
* @kbdev: Pointer to the GPU device.
|
||||
*
|
||||
* The function assumes that a scheduling action of firing a fast local tock
|
||||
* call (i.e. an equivalent tock action without dropping the lock) is desired
|
||||
* if there are idle onslot CSGs. The function updates those affected CSGs'
|
||||
@@ -4320,8 +4428,6 @@ static bool keep_lru_on_slots(struct kbase_device *kbdev)
|
||||
* plus some potential newly idle CSGs in the scheduling action committing
|
||||
* steps.
|
||||
*
|
||||
* @kbdev: Pointer to the GPU device.
|
||||
*
|
||||
* Return: number of on-slots CSGs that can be considered for replacing.
|
||||
*/
|
||||
static int prepare_fast_local_tock(struct kbase_device *kbdev)
|
||||
@@ -4408,6 +4514,17 @@ static void schedule_actions(struct kbase_device *kbdev, bool is_tick)
|
||||
|
||||
redo_local_tock:
|
||||
scheduler_prepare(kbdev);
|
||||
/* Need to specifically enqueue the GPU idle work if there are no groups
|
||||
* to schedule despite the runnable groups. This scenario will happen
|
||||
* if System suspend is done when all groups are idle and and no work
|
||||
* is submitted for the groups after the System resume.
|
||||
*/
|
||||
if (unlikely(!scheduler->ngrp_to_schedule &&
|
||||
scheduler->total_runnable_grps)) {
|
||||
dev_dbg(kbdev->dev, "No groups to schedule in the tick");
|
||||
enqueue_gpu_idle_work(scheduler);
|
||||
return;
|
||||
}
|
||||
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
|
||||
protm_grp = scheduler->active_protm_grp;
|
||||
|
||||
@@ -4423,6 +4540,7 @@ redo_local_tock:
|
||||
*/
|
||||
if (protm_grp && scheduler->top_grp == protm_grp) {
|
||||
int new_val;
|
||||
|
||||
dev_dbg(kbdev->dev, "Scheduler keep protm exec: group-%d",
|
||||
protm_grp->handle);
|
||||
new_val = atomic_dec_return(&scheduler->non_idle_offslot_grps);
|
||||
@@ -4452,11 +4570,6 @@ redo_local_tock:
|
||||
|
||||
scheduler_apply(kbdev);
|
||||
|
||||
/* Post-apply, all the committed groups in this tick are on
|
||||
* slots, time to arrange the idle timer on/off decision.
|
||||
*/
|
||||
scheduler_handle_idle_timer_onoff(kbdev);
|
||||
|
||||
/* Scheduler is dropping the exec of the previous protm_grp,
|
||||
* Until the protm quit completes, the GPU is effectively
|
||||
* locked in the secure mode.
|
||||
@@ -4491,7 +4604,6 @@ redo_local_tock:
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
|
||||
return;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -4576,7 +4688,7 @@ static void schedule_on_tock(struct work_struct *work)
|
||||
|
||||
scheduler->state = SCHED_INACTIVE;
|
||||
if (!scheduler->total_runnable_grps)
|
||||
queue_work(system_wq, &scheduler->gpu_idle_work);
|
||||
enqueue_gpu_idle_work(scheduler);
|
||||
mutex_unlock(&scheduler->lock);
|
||||
kbase_reset_gpu_allow(kbdev);
|
||||
|
||||
@@ -4627,8 +4739,9 @@ static void schedule_on_tick(struct work_struct *work)
|
||||
dev_dbg(kbdev->dev,
|
||||
"scheduling for next tick, num_runnable_groups:%u\n",
|
||||
scheduler->total_runnable_grps);
|
||||
} else if (!scheduler->total_runnable_grps)
|
||||
queue_work(system_wq, &scheduler->gpu_idle_work);
|
||||
} else if (!scheduler->total_runnable_grps) {
|
||||
enqueue_gpu_idle_work(scheduler);
|
||||
}
|
||||
|
||||
scheduler->state = SCHED_INACTIVE;
|
||||
mutex_unlock(&scheduler->lock);
|
||||
@@ -5044,7 +5157,6 @@ static void firmware_aliveness_monitor(struct work_struct *work)
|
||||
exit:
|
||||
mutex_unlock(&kbdev->csf.scheduler.lock);
|
||||
kbase_reset_gpu_allow(kbdev);
|
||||
return;
|
||||
}
|
||||
|
||||
int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group,
|
||||
@@ -5289,6 +5401,8 @@ void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group)
|
||||
|
||||
mutex_lock(&scheduler->lock);
|
||||
|
||||
if (group->run_state == KBASE_CSF_GROUP_IDLE)
|
||||
group->run_state = KBASE_CSF_GROUP_RUNNABLE;
|
||||
/* Check if the group is now eligible for execution in protected mode. */
|
||||
if (scheduler_get_protm_enter_async_group(kbdev, group))
|
||||
scheduler_group_check_protm_enter(kbdev, group);
|
||||
@@ -5457,6 +5571,11 @@ static void check_sync_update_in_sleep_mode(struct kbase_device *kbdev)
|
||||
continue;
|
||||
|
||||
if (check_sync_update_for_on_slot_group(group)) {
|
||||
/* As sync update has been performed for an on-slot
|
||||
* group, when MCU is in sleep state, ring the doorbell
|
||||
* so that FW can re-evaluate the SYNC_WAIT on wakeup.
|
||||
*/
|
||||
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
|
||||
scheduler_wakeup(kbdev, true);
|
||||
return;
|
||||
}
|
||||
@@ -5529,6 +5648,7 @@ enum kbase_csf_event_callback_action check_group_sync_update_cb(void *param)
|
||||
struct kbase_context *const kctx = param;
|
||||
|
||||
KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT, kctx, 0u);
|
||||
|
||||
queue_work(kctx->csf.sched.sync_update_wq,
|
||||
&kctx->csf.sched.sync_update_work);
|
||||
|
||||
@@ -5610,6 +5730,14 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
|
||||
dev_err(kbdev->dev, "Failed to allocate scheduler workqueue\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
scheduler->idle_wq = alloc_ordered_workqueue(
|
||||
"csf_scheduler_gpu_idle_wq", WQ_HIGHPRI);
|
||||
if (!scheduler->idle_wq) {
|
||||
dev_err(kbdev->dev,
|
||||
"Failed to allocate GPU idle scheduler workqueue\n");
|
||||
destroy_workqueue(kbdev->csf.scheduler.wq);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
INIT_WORK(&scheduler->tick_work, schedule_on_tick);
|
||||
INIT_DEFERRABLE_WORK(&scheduler->tock_work, schedule_on_tock);
|
||||
@@ -5636,11 +5764,11 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
|
||||
scheduler->last_schedule = 0;
|
||||
scheduler->tock_pending_request = false;
|
||||
scheduler->active_protm_grp = NULL;
|
||||
scheduler->gpu_idle_fw_timer_enabled = false;
|
||||
scheduler->csg_scheduling_period_ms = CSF_SCHEDULER_TIME_TICK_MS;
|
||||
scheduler_doorbell_init(kbdev);
|
||||
|
||||
INIT_WORK(&scheduler->gpu_idle_work, gpu_idle_worker);
|
||||
atomic_set(&scheduler->gpu_no_longer_idle, false);
|
||||
atomic_set(&scheduler->non_idle_offslot_grps, 0);
|
||||
|
||||
hrtimer_init(&scheduler->tick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||
@@ -5684,6 +5812,8 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev)
|
||||
|
||||
void kbase_csf_scheduler_early_term(struct kbase_device *kbdev)
|
||||
{
|
||||
if (kbdev->csf.scheduler.idle_wq)
|
||||
destroy_workqueue(kbdev->csf.scheduler.idle_wq);
|
||||
if (kbdev->csf.scheduler.wq)
|
||||
destroy_workqueue(kbdev->csf.scheduler.wq);
|
||||
}
|
||||
@@ -5715,7 +5845,7 @@ static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev)
|
||||
enqueue_tick_work(kbdev);
|
||||
dev_dbg(kbdev->dev, "Re-enabling the scheduler timer\n");
|
||||
} else if (scheduler->state != SCHED_SUSPENDED) {
|
||||
queue_work(system_wq, &scheduler->gpu_idle_work);
|
||||
enqueue_gpu_idle_work(scheduler);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5805,8 +5935,6 @@ int kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev)
|
||||
|
||||
mutex_lock(&scheduler->lock);
|
||||
|
||||
disable_gpu_idle_fw_timer(kbdev);
|
||||
|
||||
#ifdef KBASE_PM_RUNTIME
|
||||
/* If scheduler is in sleeping state, then MCU needs to be activated
|
||||
* to suspend CSGs.
|
||||
@@ -5959,7 +6087,7 @@ void kbase_csf_scheduler_reval_idleness_post_sleep(struct kbase_device *kbdev)
|
||||
&kbdev->csf.global_iface.groups[csg_nr];
|
||||
bool csg_idle;
|
||||
|
||||
if (!kbdev->csf.scheduler.csg_slots[csg_nr].resident_group)
|
||||
if (!kbdev->csf.scheduler.csg_slots[csg_nr].resident_group)
|
||||
continue;
|
||||
|
||||
csg_idle =
|
||||
|
||||
@@ -569,6 +569,15 @@ void kbase_csf_scheduler_reval_idleness_post_sleep(struct kbase_device *kbdev);
|
||||
int kbase_csf_scheduler_handle_runtime_suspend(struct kbase_device *kbdev);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* kbase_csf_scheduler_process_gpu_idle_event() - Process GPU idle IRQ
|
||||
*
|
||||
* @kbdev: Pointer to the device
|
||||
*
|
||||
* This function is called when a GPU idle IRQ has been raised.
|
||||
*/
|
||||
void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_csf_scheduler_get_nr_active_csgs() - Get the number of active CSGs
|
||||
*
|
||||
|
||||
@@ -82,7 +82,7 @@ static struct kbase_csf_tiler_heap_chunk *get_last_chunk(
|
||||
* Unless the @chunk is the first in the kernel's list of chunks belonging to
|
||||
* a given tiler heap, this function stores the size and address of the @chunk
|
||||
* in the header of the preceding chunk. This requires the GPU memory region
|
||||
* containing the header to be be mapped temporarily, which can fail.
|
||||
* containing the header to be mapped temporarily, which can fail.
|
||||
*
|
||||
* Return: 0 if successful or a negative error code on failure.
|
||||
*/
|
||||
@@ -204,8 +204,8 @@ static int create_chunk(struct kbase_csf_tiler_heap *const heap,
|
||||
|
||||
/* Allocate GPU memory for the new chunk. */
|
||||
INIT_LIST_HEAD(&chunk->link);
|
||||
chunk->region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
|
||||
&chunk->gpu_va, mmu_sync_info);
|
||||
chunk->region =
|
||||
kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, &chunk->gpu_va, mmu_sync_info);
|
||||
|
||||
if (unlikely(!chunk->region)) {
|
||||
dev_err(kctx->kbdev->dev,
|
||||
@@ -464,21 +464,18 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx,
|
||||
err = -ENOMEM;
|
||||
} else {
|
||||
err = create_initial_chunks(heap, initial_chunks);
|
||||
if (unlikely(err)) {
|
||||
kbase_csf_heap_context_allocator_free(ctx_alloc,
|
||||
heap->gpu_va);
|
||||
}
|
||||
if (unlikely(err))
|
||||
kbase_csf_heap_context_allocator_free(ctx_alloc, heap->gpu_va);
|
||||
}
|
||||
|
||||
if (unlikely(err)) {
|
||||
kfree(heap);
|
||||
} else {
|
||||
struct kbase_csf_tiler_heap_chunk const *first_chunk =
|
||||
list_first_entry(&heap->chunks_list,
|
||||
struct kbase_csf_tiler_heap_chunk, link);
|
||||
struct kbase_csf_tiler_heap_chunk const *chunk = list_first_entry(
|
||||
&heap->chunks_list, struct kbase_csf_tiler_heap_chunk, link);
|
||||
|
||||
*heap_gpu_va = heap->gpu_va;
|
||||
*first_chunk_va = first_chunk->gpu_va;
|
||||
*first_chunk_va = chunk->gpu_va;
|
||||
|
||||
mutex_lock(&kctx->csf.tiler_heaps.lock);
|
||||
kctx->csf.tiler_heaps.nr_of_heaps++;
|
||||
@@ -488,17 +485,25 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx,
|
||||
KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(
|
||||
kctx->kbdev, kctx->id, heap->heap_id,
|
||||
PFN_UP(heap->chunk_size * heap->max_chunks),
|
||||
PFN_UP(heap->chunk_size * heap->chunk_count),
|
||||
heap->max_chunks, heap->chunk_size, heap->chunk_count,
|
||||
heap->target_in_flight, 0);
|
||||
PFN_UP(heap->chunk_size * heap->chunk_count), heap->max_chunks,
|
||||
heap->chunk_size, heap->chunk_count, heap->target_in_flight, 0);
|
||||
|
||||
dev_dbg(kctx->kbdev->dev, "Created tiler heap 0x%llX\n",
|
||||
heap->gpu_va);
|
||||
#if defined(CONFIG_MALI_VECTOR_DUMP)
|
||||
list_for_each_entry(chunk, &heap->chunks_list, link) {
|
||||
KBASE_TLSTREAM_JD_TILER_HEAP_CHUNK_ALLOC(
|
||||
kctx->kbdev, kctx->id, heap->heap_id, chunk->gpu_va);
|
||||
}
|
||||
#endif
|
||||
|
||||
dev_dbg(kctx->kbdev->dev, "Created tiler heap 0x%llX\n", heap->gpu_va);
|
||||
mutex_unlock(&kctx->csf.tiler_heaps.lock);
|
||||
kctx->running_total_tiler_heap_nr_chunks += heap->chunk_count;
|
||||
kctx->running_total_tiler_heap_memory += heap->chunk_size * heap->chunk_count;
|
||||
if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory)
|
||||
kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory;
|
||||
kctx->running_total_tiler_heap_memory +=
|
||||
heap->chunk_size * heap->chunk_count;
|
||||
if (kctx->running_total_tiler_heap_memory >
|
||||
kctx->peak_total_tiler_heap_memory)
|
||||
kctx->peak_total_tiler_heap_memory =
|
||||
kctx->running_total_tiler_heap_memory;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
@@ -609,6 +614,16 @@ int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx,
|
||||
if (likely(heap)) {
|
||||
err = alloc_new_chunk(heap, nr_in_flight, pending_frag_count,
|
||||
new_chunk_ptr);
|
||||
if (likely(!err)) {
|
||||
/* update total and peak tiler heap memory record */
|
||||
kctx->running_total_tiler_heap_nr_chunks++;
|
||||
kctx->running_total_tiler_heap_memory += heap->chunk_size;
|
||||
|
||||
if (kctx->running_total_tiler_heap_memory >
|
||||
kctx->peak_total_tiler_heap_memory)
|
||||
kctx->peak_total_tiler_heap_memory =
|
||||
kctx->running_total_tiler_heap_memory;
|
||||
}
|
||||
|
||||
KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(
|
||||
kctx->kbdev, kctx->id, heap->heap_id,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -28,14 +28,14 @@ struct kbase_context;
|
||||
#define MALI_CSF_TILER_HEAP_DEBUGFS_VERSION 0
|
||||
|
||||
/**
|
||||
* kbase_csf_tiler_heap_debugfs_init() - Create a debugfs entry for per context tiler heap
|
||||
* kbase_csf_tiler_heap_debugfs_init - Create a debugfs entry for per context tiler heap
|
||||
*
|
||||
* @kctx: The kbase_context for which to create the debugfs entry
|
||||
*/
|
||||
void kbase_csf_tiler_heap_debugfs_init(struct kbase_context *kctx);
|
||||
|
||||
/**
|
||||
* kbase_csf_tiler_heap_total_debugfs_init() - Create a debugfs entry for per context tiler heap
|
||||
* kbase_csf_tiler_heap_total_debugfs_init - Create a debugfs entry for per context tiler heap
|
||||
*
|
||||
* @kctx: The kbase_context for which to create the debugfs entry
|
||||
*/
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -59,18 +59,18 @@
|
||||
/**
|
||||
* struct kbase_csf_tiler_heap_chunk - A tiler heap chunk managed by the kernel
|
||||
*
|
||||
* Chunks are allocated upon initialization of a tiler heap or in response to
|
||||
* out-of-memory events from the firmware. Chunks are always fully backed by
|
||||
* physical memory to avoid the overhead of processing GPU page faults. The
|
||||
* allocated GPU memory regions are linked together independent of the list of
|
||||
* kernel objects of this type.
|
||||
*
|
||||
* @link: Link to this chunk in a list of chunks belonging to a
|
||||
* @kbase_csf_tiler_heap.
|
||||
* @region: Pointer to the GPU memory region allocated for the chunk.
|
||||
* @gpu_va: GPU virtual address of the start of the memory region.
|
||||
* This points to the header of the chunk and not to the low address
|
||||
* of free memory within it.
|
||||
*
|
||||
* Chunks are allocated upon initialization of a tiler heap or in response to
|
||||
* out-of-memory events from the firmware. Chunks are always fully backed by
|
||||
* physical memory to avoid the overhead of processing GPU page faults. The
|
||||
* allocated GPU memory regions are linked together independent of the list of
|
||||
* kernel objects of this type.
|
||||
*/
|
||||
struct kbase_csf_tiler_heap_chunk {
|
||||
struct list_head link;
|
||||
|
||||
@@ -139,8 +139,7 @@ static ssize_t progress_timeout_show(struct device * const dev,
|
||||
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(progress_timeout, 0644, progress_timeout_show,
|
||||
progress_timeout_store);
|
||||
static DEVICE_ATTR_RW(progress_timeout);
|
||||
|
||||
int kbase_csf_timeout_init(struct kbase_device *const kbdev)
|
||||
{
|
||||
|
||||
@@ -80,9 +80,8 @@ static int kbase_csf_tl_debugfs_poll_interval_write(void *data, u64 val)
|
||||
struct kbase_device *kbdev = (struct kbase_device *)data;
|
||||
struct kbase_csf_tl_reader *self = &kbdev->timeline->csf_tl_reader;
|
||||
|
||||
if (val > KBASE_CSF_TL_READ_INTERVAL_MAX || val < KBASE_CSF_TL_READ_INTERVAL_MIN) {
|
||||
if (val > KBASE_CSF_TL_READ_INTERVAL_MAX || val < KBASE_CSF_TL_READ_INTERVAL_MIN)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
self->timer_interval = (u32)val;
|
||||
|
||||
@@ -96,7 +95,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_tl_poll_interval_fops,
|
||||
|
||||
void kbase_csf_tl_reader_debugfs_init(struct kbase_device *kbdev)
|
||||
{
|
||||
debugfs_create_file("csf_tl_poll_interval_in_ms", S_IRUGO | S_IWUSR,
|
||||
debugfs_create_file("csf_tl_poll_interval_in_ms", 0644,
|
||||
kbdev->debugfs_instr_directory, kbdev,
|
||||
&kbase_csf_tl_poll_interval_fops);
|
||||
|
||||
@@ -406,9 +405,8 @@ static int tl_reader_init_late(
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (kbase_ts_converter_init(&self->ts_converter, kbdev)) {
|
||||
if (kbase_ts_converter_init(&self->ts_converter, kbdev))
|
||||
return -1;
|
||||
}
|
||||
|
||||
self->kbdev = kbdev;
|
||||
self->trace_buffer = tb;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -133,14 +133,12 @@ void kbase_csf_tl_reader_init(struct kbase_csf_tl_reader *self,
|
||||
void kbase_csf_tl_reader_term(struct kbase_csf_tl_reader *self);
|
||||
|
||||
/**
|
||||
* kbase_csf_tl_reader_flush_buffer() -
|
||||
* Flush trace from buffer into CSFFW timeline stream.
|
||||
* kbase_csf_tl_reader_flush_buffer() - Flush trace from buffer into CSFFW timeline stream.
|
||||
*
|
||||
* @self: CSFFW TL Reader instance.
|
||||
*
|
||||
* Return: Zero on success, negative error code (EBUSY) otherwise
|
||||
*/
|
||||
|
||||
int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self);
|
||||
|
||||
/**
|
||||
|
||||
@@ -179,13 +179,13 @@ int kbase_csf_firmware_trace_buffers_init(struct kbase_device *kbdev)
|
||||
extract_gpu_va =
|
||||
(kbdev->csf.firmware_trace_buffers.mcu_rw.va_reg->start_pfn << PAGE_SHIFT) +
|
||||
mcu_rw_offset;
|
||||
extract_cpu_va = (u32*)(
|
||||
extract_cpu_va = (u32 *)(
|
||||
kbdev->csf.firmware_trace_buffers.mcu_rw.cpu_addr +
|
||||
mcu_rw_offset);
|
||||
insert_gpu_va =
|
||||
(kbdev->csf.firmware_trace_buffers.mcu_write.va_reg->start_pfn << PAGE_SHIFT) +
|
||||
mcu_write_offset;
|
||||
insert_cpu_va = (u32*)(
|
||||
insert_cpu_va = (u32 *)(
|
||||
kbdev->csf.firmware_trace_buffers.mcu_write.cpu_addr +
|
||||
mcu_write_offset);
|
||||
data_buffer_gpu_va =
|
||||
@@ -323,13 +323,13 @@ void kbase_csf_firmware_reload_trace_buffers_data(struct kbase_device *kbdev)
|
||||
extract_gpu_va =
|
||||
(kbdev->csf.firmware_trace_buffers.mcu_rw.va_reg->start_pfn << PAGE_SHIFT) +
|
||||
mcu_rw_offset;
|
||||
extract_cpu_va = (u32*)(
|
||||
extract_cpu_va = (u32 *)(
|
||||
kbdev->csf.firmware_trace_buffers.mcu_rw.cpu_addr +
|
||||
mcu_rw_offset);
|
||||
insert_gpu_va =
|
||||
(kbdev->csf.firmware_trace_buffers.mcu_write.va_reg->start_pfn << PAGE_SHIFT) +
|
||||
mcu_write_offset;
|
||||
insert_cpu_va = (u32*)(
|
||||
insert_cpu_va = (u32 *)(
|
||||
kbdev->csf.firmware_trace_buffers.mcu_write.cpu_addr +
|
||||
mcu_write_offset);
|
||||
data_buffer_gpu_va =
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -203,6 +203,8 @@ static void kbase_csf_early_term(struct kbase_device *kbdev)
|
||||
* kbase_device_hwcnt_watchdog_if_init - Create hardware counter watchdog
|
||||
* interface.
|
||||
* @kbdev: Device pointer
|
||||
*
|
||||
* Return: 0 if successful or a negative error code on failure.
|
||||
*/
|
||||
static int kbase_device_hwcnt_watchdog_if_init(struct kbase_device *kbdev)
|
||||
{
|
||||
@@ -245,8 +247,9 @@ static void kbase_device_hwcnt_backend_csf_if_term(struct kbase_device *kbdev)
|
||||
/**
|
||||
* kbase_device_hwcnt_backend_csf_init - Create hardware counter backend.
|
||||
* @kbdev: Device pointer
|
||||
*
|
||||
* Return: 0 if successful or a negative error code on failure.
|
||||
*/
|
||||
|
||||
static int kbase_device_hwcnt_backend_csf_init(struct kbase_device *kbdev)
|
||||
{
|
||||
return kbase_hwcnt_backend_csf_create(
|
||||
@@ -390,7 +393,7 @@ int kbase_device_init(struct kbase_device *kbdev)
|
||||
* Hardware counter components depending on firmware are initialized after CSF
|
||||
* firmware is loaded.
|
||||
*
|
||||
* @return 0 on success. An error code on failure.
|
||||
* Return: 0 on success. An error code on failure.
|
||||
*/
|
||||
static int kbase_device_hwcnt_csf_deferred_init(struct kbase_device *kbdev)
|
||||
{
|
||||
@@ -457,7 +460,7 @@ virt_fail:
|
||||
* To meet Android GKI vendor guideline, firmware load is deferred at
|
||||
* the time when @ref kbase_open is called for the first time.
|
||||
*
|
||||
* @return 0 on success. An error code on failure.
|
||||
* Return: 0 on success. An error code on failure.
|
||||
*/
|
||||
static int kbase_csf_firmware_deferred_init(struct kbase_device *kbdev)
|
||||
{
|
||||
|
||||
@@ -133,8 +133,12 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
|
||||
if (val & RESET_COMPLETED)
|
||||
kbase_pm_reset_done(kbdev);
|
||||
|
||||
KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, val);
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val);
|
||||
/* Defer clearing CLEAN_CACHES_COMPLETED to kbase_clean_caches_done.
|
||||
* We need to acquire hwaccess_lock to avoid a race condition with
|
||||
* kbase_gpu_cache_flush_and_busy_wait
|
||||
*/
|
||||
KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, val & ~CLEAN_CACHES_COMPLETED);
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val & ~CLEAN_CACHES_COMPLETED);
|
||||
|
||||
#ifdef KBASE_PM_RUNTIME
|
||||
if (val & DOORBELL_MIRROR) {
|
||||
|
||||
@@ -66,8 +66,12 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
|
||||
if (val & PRFCNT_SAMPLE_COMPLETED)
|
||||
kbase_instr_hwcnt_sample_done(kbdev);
|
||||
|
||||
KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, val);
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val);
|
||||
/* Defer clearing CLEAN_CACHES_COMPLETED to kbase_clean_caches_done.
|
||||
* We need to acquire hwaccess_lock to avoid a race condition with
|
||||
* kbase_gpu_cache_flush_and_busy_wait
|
||||
*/
|
||||
KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, val & ~CLEAN_CACHES_COMPLETED);
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val & ~CLEAN_CACHES_COMPLETED);
|
||||
|
||||
/* kbase_pm_check_transitions (called by kbase_pm_power_changed) must
|
||||
* be called after the IRQ has been cleared. This is because it might
|
||||
|
||||
@@ -27,6 +27,9 @@
|
||||
#include <mali_kbase_hwaccess_backend.h>
|
||||
#include <mali_kbase_ctx_sched.h>
|
||||
#include <mali_kbase_reset_gpu.h>
|
||||
#include <mali_kbase_hwcnt_watchdog_if_timer.h>
|
||||
#include <mali_kbase_hwcnt_backend_jm.h>
|
||||
#include <mali_kbase_hwcnt_backend_jm_watchdog.h>
|
||||
|
||||
#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
|
||||
#include <backend/gpu/mali_kbase_model_linux.h>
|
||||
@@ -148,73 +151,115 @@ static void kbase_backend_late_term(struct kbase_device *kbdev)
|
||||
kbase_hwaccess_pm_term(kbdev);
|
||||
}
|
||||
|
||||
static int kbase_device_hwcnt_backend_jm_init(struct kbase_device *kbdev)
|
||||
/**
|
||||
* kbase_device_hwcnt_watchdog_if_init - Create hardware counter watchdog
|
||||
* interface.
|
||||
* @kbdev: Device pointer
|
||||
* Return: 0 on success, or an error code on failure.
|
||||
*/
|
||||
static int kbase_device_hwcnt_watchdog_if_init(struct kbase_device *kbdev)
|
||||
{
|
||||
return kbase_hwcnt_backend_jm_create(kbdev, &kbdev->hwcnt_gpu_iface);
|
||||
return kbase_hwcnt_watchdog_if_timer_create(&kbdev->hwcnt_watchdog_timer);
|
||||
}
|
||||
|
||||
/**
|
||||
* kbase_device_hwcnt_watchdog_if_term - Terminate hardware counter watchdog
|
||||
* interface.
|
||||
* @kbdev: Device pointer
|
||||
*/
|
||||
static void kbase_device_hwcnt_watchdog_if_term(struct kbase_device *kbdev)
|
||||
{
|
||||
kbase_hwcnt_watchdog_if_timer_destroy(&kbdev->hwcnt_watchdog_timer);
|
||||
}
|
||||
|
||||
/**
|
||||
* kbase_device_hwcnt_backend_jm_init - Create hardware counter backend.
|
||||
* @kbdev: Device pointer
|
||||
* Return: 0 on success, or an error code on failure.
|
||||
*/
|
||||
static int kbase_device_hwcnt_backend_jm_init(struct kbase_device *kbdev)
|
||||
{
|
||||
return kbase_hwcnt_backend_jm_create(kbdev, &kbdev->hwcnt_gpu_jm_backend);
|
||||
}
|
||||
|
||||
/**
|
||||
* kbase_device_hwcnt_backend_jm_term - Terminate hardware counter backend.
|
||||
* @kbdev: Device pointer
|
||||
*/
|
||||
static void kbase_device_hwcnt_backend_jm_term(struct kbase_device *kbdev)
|
||||
{
|
||||
kbase_hwcnt_backend_jm_destroy(&kbdev->hwcnt_gpu_iface);
|
||||
kbase_hwcnt_backend_jm_destroy(&kbdev->hwcnt_gpu_jm_backend);
|
||||
}
|
||||
|
||||
/**
|
||||
* kbase_device_hwcnt_backend_jm_watchdog_init - Create hardware counter watchdog backend.
|
||||
* @kbdev: Device pointer
|
||||
* Return: 0 on success, or an error code on failure.
|
||||
*/
|
||||
static int kbase_device_hwcnt_backend_jm_watchdog_init(struct kbase_device *kbdev)
|
||||
{
|
||||
return kbase_hwcnt_backend_jm_watchdog_create(&kbdev->hwcnt_gpu_jm_backend,
|
||||
&kbdev->hwcnt_watchdog_timer,
|
||||
&kbdev->hwcnt_gpu_iface);
|
||||
}
|
||||
|
||||
/**
|
||||
* kbase_device_hwcnt_backend_jm_watchdog_term - Terminate hardware counter watchdog backend.
|
||||
* @kbdev: Device pointer
|
||||
*/
|
||||
static void kbase_device_hwcnt_backend_jm_watchdog_term(struct kbase_device *kbdev)
|
||||
{
|
||||
kbase_hwcnt_backend_jm_watchdog_destroy(&kbdev->hwcnt_gpu_iface);
|
||||
}
|
||||
|
||||
static const struct kbase_device_init dev_init[] = {
|
||||
#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
|
||||
{ kbase_gpu_device_create, kbase_gpu_device_destroy,
|
||||
"Dummy model initialization failed" },
|
||||
{ kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" },
|
||||
#else
|
||||
{ assign_irqs, NULL, "IRQ search failed" },
|
||||
{ registers_map, registers_unmap, "Register map failed" },
|
||||
#endif
|
||||
{ kbase_device_io_history_init, kbase_device_io_history_term,
|
||||
"Register access history initialization failed" },
|
||||
{ kbase_device_pm_init, kbase_device_pm_term,
|
||||
"Power management initialization failed" },
|
||||
{ kbase_device_early_init, kbase_device_early_term,
|
||||
"Early device initialization failed" },
|
||||
{ kbase_device_populate_max_freq, NULL,
|
||||
"Populating max frequency failed" },
|
||||
{ kbase_device_pm_init, kbase_device_pm_term, "Power management initialization failed" },
|
||||
{ kbase_device_early_init, kbase_device_early_term, "Early device initialization failed" },
|
||||
{ kbase_device_populate_max_freq, NULL, "Populating max frequency failed" },
|
||||
{ kbase_device_misc_init, kbase_device_misc_term,
|
||||
"Miscellaneous device initialization failed" },
|
||||
{ kbase_device_pcm_dev_init, kbase_device_pcm_dev_term,
|
||||
"Priority control manager initialization failed" },
|
||||
{ kbase_ctx_sched_init, kbase_ctx_sched_term,
|
||||
"Context scheduler initialization failed" },
|
||||
{ kbase_mem_init, kbase_mem_term,
|
||||
"Memory subsystem initialization failed" },
|
||||
{ kbase_ctx_sched_init, kbase_ctx_sched_term, "Context scheduler initialization failed" },
|
||||
{ kbase_mem_init, kbase_mem_term, "Memory subsystem initialization failed" },
|
||||
{ kbase_device_coherency_init, NULL, "Device coherency init failed" },
|
||||
{ kbase_protected_mode_init, kbase_protected_mode_term,
|
||||
"Protected mode subsystem initialization failed" },
|
||||
{ kbase_device_list_init, kbase_device_list_term,
|
||||
"Device list setup failed" },
|
||||
{ kbasep_js_devdata_init, kbasep_js_devdata_term,
|
||||
"Job JS devdata initialization failed" },
|
||||
{ kbase_device_list_init, kbase_device_list_term, "Device list setup failed" },
|
||||
{ kbasep_js_devdata_init, kbasep_js_devdata_term, "Job JS devdata initialization failed" },
|
||||
{ kbase_device_timeline_init, kbase_device_timeline_term,
|
||||
"Timeline stream initialization failed" },
|
||||
{ kbase_clk_rate_trace_manager_init, kbase_clk_rate_trace_manager_term,
|
||||
"Clock rate trace manager initialization failed" },
|
||||
{ kbase_lowest_gpu_freq_init, NULL,
|
||||
"Lowest freq initialization failed" },
|
||||
{ kbase_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" },
|
||||
{ kbase_instr_backend_init, kbase_instr_backend_term,
|
||||
"Instrumentation backend initialization failed" },
|
||||
{ kbase_device_hwcnt_backend_jm_init,
|
||||
kbase_device_hwcnt_backend_jm_term,
|
||||
{ kbase_device_hwcnt_watchdog_if_init, kbase_device_hwcnt_watchdog_if_term,
|
||||
"GPU hwcnt backend watchdog interface creation failed" },
|
||||
{ kbase_device_hwcnt_backend_jm_init, kbase_device_hwcnt_backend_jm_term,
|
||||
"GPU hwcnt backend creation failed" },
|
||||
{ kbase_device_hwcnt_backend_jm_watchdog_init, kbase_device_hwcnt_backend_jm_watchdog_term,
|
||||
"GPU hwcnt watchdog backend creation failed" },
|
||||
{ kbase_device_hwcnt_context_init, kbase_device_hwcnt_context_term,
|
||||
"GPU hwcnt context initialization failed" },
|
||||
{ kbase_device_hwcnt_virtualizer_init,
|
||||
kbase_device_hwcnt_virtualizer_term,
|
||||
{ kbase_device_hwcnt_virtualizer_init, kbase_device_hwcnt_virtualizer_term,
|
||||
"GPU hwcnt virtualizer initialization failed" },
|
||||
{ kbase_device_vinstr_init, kbase_device_vinstr_term,
|
||||
"Virtual instrumentation initialization failed" },
|
||||
{ kbase_device_kinstr_prfcnt_init, kbase_device_kinstr_prfcnt_term,
|
||||
"Performance counter instrumentation initialization failed" },
|
||||
{ kbase_backend_late_init, kbase_backend_late_term,
|
||||
"Late backend initialization failed" },
|
||||
{ kbase_backend_late_init, kbase_backend_late_term, "Late backend initialization failed" },
|
||||
{ kbase_debug_job_fault_dev_init, kbase_debug_job_fault_dev_term,
|
||||
"Job fault debug initialization failed" },
|
||||
{ kbase_device_debugfs_init, kbase_device_debugfs_term,
|
||||
"DebugFS initialization failed" },
|
||||
{ kbase_device_debugfs_init, kbase_device_debugfs_term, "DebugFS initialization failed" },
|
||||
/* Sysfs init needs to happen before registering the device with
|
||||
* misc_register(), otherwise it causes a race condition between
|
||||
* registering the device and a uevent event being generated for
|
||||
@@ -233,8 +278,7 @@ static const struct kbase_device_init dev_init[] = {
|
||||
{ kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer,
|
||||
"GPU property population failed" },
|
||||
{ NULL, kbase_dummy_job_wa_cleanup, NULL },
|
||||
{ kbase_device_late_init, kbase_device_late_term,
|
||||
"Late device initialization failed" },
|
||||
{ kbase_device_late_init, kbase_device_late_term, "Late device initialization failed" },
|
||||
};
|
||||
|
||||
static void kbase_device_term_partial(struct kbase_device *kbdev,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -166,8 +166,11 @@ void kbase_device_pcm_dev_term(struct kbase_device *const kbdev)
|
||||
* @nb: notifier block - used to retrieve kbdev pointer
|
||||
* @action: action (unused)
|
||||
* @data: data pointer (unused)
|
||||
*
|
||||
* This function simply lists memory usage by the Mali driver, per GPU device,
|
||||
* for diagnostic purposes.
|
||||
*
|
||||
* Return: NOTIFY_OK on success, NOTIFY_BAD otherwise.
|
||||
*/
|
||||
static int mali_oom_notifier_handler(struct notifier_block *nb,
|
||||
unsigned long action, void *data)
|
||||
@@ -189,7 +192,7 @@ static int mali_oom_notifier_handler(struct notifier_block *nb,
|
||||
|
||||
mutex_lock(&kbdev->kctx_list_lock);
|
||||
|
||||
list_for_each_entry (kctx, &kbdev->kctx_list, kctx_list_link) {
|
||||
list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) {
|
||||
struct pid *pid_struct;
|
||||
struct task_struct *task;
|
||||
unsigned long task_alloc_total =
|
||||
@@ -483,6 +486,7 @@ int kbase_device_early_init(struct kbase_device *kbdev)
|
||||
{
|
||||
int err;
|
||||
|
||||
|
||||
err = kbasep_platform_device_init(kbdev);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -23,7 +23,6 @@
|
||||
|
||||
/**
|
||||
* kbase_device_get_list - get device list.
|
||||
*
|
||||
* Get access to device list.
|
||||
*
|
||||
* Return: Pointer to the linked list head.
|
||||
@@ -55,18 +54,18 @@ void kbase_increment_device_id(void);
|
||||
* When a device file is opened for the first time,
|
||||
* load firmware and initialize hardware counter components.
|
||||
*
|
||||
* @return 0 on success. An error code on failure.
|
||||
* Return: 0 on success. An error code on failure.
|
||||
*/
|
||||
int kbase_device_firmware_init_once(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_device_init - Device initialisation.
|
||||
*
|
||||
* @kbdev: The kbase device structure for the device (must be a valid pointer)
|
||||
*
|
||||
* This is called from device probe to initialise various other
|
||||
* components needed.
|
||||
*
|
||||
* @kbdev: The kbase device structure for the device (must be a valid pointer)
|
||||
*
|
||||
* Return: 0 on success and non-zero value on failure.
|
||||
*/
|
||||
int kbase_device_init(struct kbase_device *kbdev);
|
||||
@@ -74,11 +73,10 @@ int kbase_device_init(struct kbase_device *kbdev);
|
||||
/**
|
||||
* kbase_device_term - Device termination.
|
||||
*
|
||||
* This is called from device remove to terminate various components that
|
||||
* were initialised during kbase_device_init.
|
||||
*
|
||||
* @kbdev: The kbase device structure for the device (must be a valid pointer)
|
||||
*
|
||||
* This is called from device remove to terminate various components that
|
||||
* were initialised during kbase_device_init.
|
||||
*/
|
||||
void kbase_device_term(struct kbase_device *kbdev);
|
||||
|
||||
|
||||
@@ -63,6 +63,7 @@ static int busy_wait_cache_clean_irq(struct kbase_device *kbdev)
|
||||
}
|
||||
|
||||
/* Clear the interrupt CLEAN_CACHES_COMPLETED bit. */
|
||||
KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, CLEAN_CACHES_COMPLETED);
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR),
|
||||
CLEAN_CACHES_COMPLETED);
|
||||
|
||||
@@ -72,7 +73,6 @@ static int busy_wait_cache_clean_irq(struct kbase_device *kbdev)
|
||||
int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev,
|
||||
u32 flush_op)
|
||||
{
|
||||
u32 irq_mask;
|
||||
int need_to_wake_up = 0;
|
||||
int ret = 0;
|
||||
|
||||
@@ -81,17 +81,18 @@ int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev,
|
||||
*/
|
||||
lockdep_assert_held(&kbdev->hwaccess_lock);
|
||||
|
||||
/* 1. Check if CLEAN_CACHES_COMPLETED irq mask bit is set.
|
||||
/* 1. Check if kbdev->cache_clean_in_progress is set.
|
||||
* If it is set, it means there are threads waiting for
|
||||
* CLEAN_CACHES_COMPLETED irq to be raised.
|
||||
* CLEAN_CACHES_COMPLETED irq to be raised and that the
|
||||
* corresponding irq mask bit is set.
|
||||
* We'll clear the irq mask bit and busy-wait for the cache
|
||||
* clean operation to complete before submitting the cache
|
||||
* clean command required after the GPU page table update.
|
||||
* Pended flush commands will be merged to requested command.
|
||||
*/
|
||||
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
|
||||
if (irq_mask & CLEAN_CACHES_COMPLETED) {
|
||||
if (kbdev->cache_clean_in_progress) {
|
||||
/* disable irq first */
|
||||
u32 irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
|
||||
irq_mask & ~CLEAN_CACHES_COMPLETED);
|
||||
|
||||
@@ -182,22 +183,28 @@ void kbase_clean_caches_done(struct kbase_device *kbdev)
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
if (kbdev->cache_clean_queued) {
|
||||
u32 pended_flush_op = kbdev->cache_clean_queued;
|
||||
if (kbdev->cache_clean_in_progress) {
|
||||
/* Clear the interrupt CLEAN_CACHES_COMPLETED bit if set.
|
||||
* It might have already been done by kbase_gpu_cache_flush_and_busy_wait.
|
||||
*/
|
||||
KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, CLEAN_CACHES_COMPLETED);
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), CLEAN_CACHES_COMPLETED);
|
||||
|
||||
kbdev->cache_clean_queued = 0;
|
||||
if (kbdev->cache_clean_queued) {
|
||||
u32 pended_flush_op = kbdev->cache_clean_queued;
|
||||
|
||||
KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL,
|
||||
pended_flush_op);
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
|
||||
pended_flush_op);
|
||||
} else {
|
||||
/* Disable interrupt */
|
||||
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
|
||||
irq_mask & ~CLEAN_CACHES_COMPLETED);
|
||||
kbdev->cache_clean_queued = 0;
|
||||
|
||||
kbase_gpu_cache_clean_wait_complete(kbdev);
|
||||
KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, pended_flush_op);
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), pended_flush_op);
|
||||
} else {
|
||||
/* Disable interrupt */
|
||||
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
|
||||
irq_mask & ~CLEAN_CACHES_COMPLETED);
|
||||
|
||||
kbase_gpu_cache_clean_wait_complete(kbdev);
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
*/
|
||||
|
||||
#include <mali_kbase.h>
|
||||
#include <uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h>
|
||||
#include <csf/mali_kbase_csf_registers.h>
|
||||
#include <gpu/mali_kbase_gpu_fault.h>
|
||||
|
||||
const char *kbase_gpu_exception_name(u32 const exception_code)
|
||||
|
||||
@@ -19,8 +19,8 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _UAPI_KBASE_GPU_REGMAP_CSF_H_
|
||||
#define _UAPI_KBASE_GPU_REGMAP_CSF_H_
|
||||
#ifndef _KBASE_GPU_REGMAP_CSF_H_
|
||||
#define _KBASE_GPU_REGMAP_CSF_H_
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
@@ -365,4 +365,4 @@
|
||||
/* GPU_CONTROL_MCU.GPU_IRQ_RAWSTAT */
|
||||
#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when performance count sample has completed */
|
||||
|
||||
#endif /* _UAPI_KBASE_GPU_REGMAP_CSF_H_ */
|
||||
#endif /* _KBASE_GPU_REGMAP_CSF_H_ */
|
||||
293
drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h
Normal file
293
drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h
Normal file
@@ -0,0 +1,293 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU license.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, you can access it online at
|
||||
* http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
*
|
||||
*/
|
||||
#ifndef _KBASE_GPU_REGMAP_JM_H_
|
||||
#define _KBASE_GPU_REGMAP_JM_H_
|
||||
|
||||
#if MALI_USE_CSF && defined(__KERNEL__)
|
||||
#error "Cannot be compiled with CSF"
|
||||
#endif
|
||||
|
||||
/* Set to implementation defined, outer caching */
|
||||
#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull
|
||||
/* Set to write back memory, outer caching */
|
||||
#define AS_MEMATTR_AARCH64_OUTER_WA 0x8Dull
|
||||
/* Set to inner non-cacheable, outer-non-cacheable
|
||||
* Setting defined by the alloc bits is ignored, but set to a valid encoding:
|
||||
* - no-alloc on read
|
||||
* - no alloc on write
|
||||
*/
|
||||
#define AS_MEMATTR_AARCH64_NON_CACHEABLE 0x4Cull
|
||||
|
||||
/* Symbols for default MEMATTR to use
|
||||
* Default is - HW implementation defined caching
|
||||
*/
|
||||
#define AS_MEMATTR_INDEX_DEFAULT 0
|
||||
#define AS_MEMATTR_INDEX_DEFAULT_ACE 3
|
||||
|
||||
/* HW implementation defined caching */
|
||||
#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0
|
||||
/* Force cache on */
|
||||
#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL 1
|
||||
/* Write-alloc */
|
||||
#define AS_MEMATTR_INDEX_WRITE_ALLOC 2
|
||||
/* Outer coherent, inner implementation defined policy */
|
||||
#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF 3
|
||||
/* Outer coherent, write alloc inner */
|
||||
#define AS_MEMATTR_INDEX_OUTER_WA 4
|
||||
/* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */
|
||||
#define AS_MEMATTR_INDEX_NON_CACHEABLE 5
|
||||
|
||||
/* GPU control registers */
|
||||
|
||||
#define CORE_FEATURES 0x008 /* (RO) Shader Core Features */
|
||||
#define JS_PRESENT 0x01C /* (RO) Job slots present */
|
||||
|
||||
#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory
|
||||
* region base address, low word
|
||||
*/
|
||||
#define PRFCNT_BASE_HI 0x064 /* (RW) Performance counter memory
|
||||
* region base address, high word
|
||||
*/
|
||||
#define PRFCNT_CONFIG 0x068 /* (RW) Performance counter
|
||||
* configuration
|
||||
*/
|
||||
#define PRFCNT_JM_EN 0x06C /* (RW) Performance counter enable
|
||||
* flags for Job Manager
|
||||
*/
|
||||
#define PRFCNT_SHADER_EN 0x070 /* (RW) Performance counter enable
|
||||
* flags for shader cores
|
||||
*/
|
||||
#define PRFCNT_TILER_EN 0x074 /* (RW) Performance counter enable
|
||||
* flags for tiler
|
||||
*/
|
||||
#define PRFCNT_MMU_L2_EN 0x07C /* (RW) Performance counter enable
|
||||
* flags for MMU/L2 cache
|
||||
*/
|
||||
|
||||
#define JS0_FEATURES 0x0C0 /* (RO) Features of job slot 0 */
|
||||
#define JS1_FEATURES 0x0C4 /* (RO) Features of job slot 1 */
|
||||
#define JS2_FEATURES 0x0C8 /* (RO) Features of job slot 2 */
|
||||
#define JS3_FEATURES 0x0CC /* (RO) Features of job slot 3 */
|
||||
#define JS4_FEATURES 0x0D0 /* (RO) Features of job slot 4 */
|
||||
#define JS5_FEATURES 0x0D4 /* (RO) Features of job slot 5 */
|
||||
#define JS6_FEATURES 0x0D8 /* (RO) Features of job slot 6 */
|
||||
#define JS7_FEATURES 0x0DC /* (RO) Features of job slot 7 */
|
||||
#define JS8_FEATURES 0x0E0 /* (RO) Features of job slot 8 */
|
||||
#define JS9_FEATURES 0x0E4 /* (RO) Features of job slot 9 */
|
||||
#define JS10_FEATURES 0x0E8 /* (RO) Features of job slot 10 */
|
||||
#define JS11_FEATURES 0x0EC /* (RO) Features of job slot 11 */
|
||||
#define JS12_FEATURES 0x0F0 /* (RO) Features of job slot 12 */
|
||||
#define JS13_FEATURES 0x0F4 /* (RO) Features of job slot 13 */
|
||||
#define JS14_FEATURES 0x0F8 /* (RO) Features of job slot 14 */
|
||||
#define JS15_FEATURES 0x0FC /* (RO) Features of job slot 15 */
|
||||
|
||||
#define JS_FEATURES_REG(n) GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2))
|
||||
|
||||
#define JM_CONFIG 0xF00 /* (RW) Job manager configuration (implementation-specific) */
|
||||
|
||||
/* Job control registers */
|
||||
|
||||
#define JOB_IRQ_JS_STATE 0x010 /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */
|
||||
#define JOB_IRQ_THROTTLE 0x014 /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt. */
|
||||
|
||||
#define JOB_SLOT0 0x800 /* Configuration registers for job slot 0 */
|
||||
#define JOB_SLOT1 0x880 /* Configuration registers for job slot 1 */
|
||||
#define JOB_SLOT2 0x900 /* Configuration registers for job slot 2 */
|
||||
#define JOB_SLOT3 0x980 /* Configuration registers for job slot 3 */
|
||||
#define JOB_SLOT4 0xA00 /* Configuration registers for job slot 4 */
|
||||
#define JOB_SLOT5 0xA80 /* Configuration registers for job slot 5 */
|
||||
#define JOB_SLOT6 0xB00 /* Configuration registers for job slot 6 */
|
||||
#define JOB_SLOT7 0xB80 /* Configuration registers for job slot 7 */
|
||||
#define JOB_SLOT8 0xC00 /* Configuration registers for job slot 8 */
|
||||
#define JOB_SLOT9 0xC80 /* Configuration registers for job slot 9 */
|
||||
#define JOB_SLOT10 0xD00 /* Configuration registers for job slot 10 */
|
||||
#define JOB_SLOT11 0xD80 /* Configuration registers for job slot 11 */
|
||||
#define JOB_SLOT12 0xE00 /* Configuration registers for job slot 12 */
|
||||
#define JOB_SLOT13 0xE80 /* Configuration registers for job slot 13 */
|
||||
#define JOB_SLOT14 0xF00 /* Configuration registers for job slot 14 */
|
||||
#define JOB_SLOT15 0xF80 /* Configuration registers for job slot 15 */
|
||||
|
||||
#define JOB_SLOT_REG(n, r) (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
|
||||
|
||||
#define JS_HEAD_LO 0x00 /* (RO) Job queue head pointer for job slot n, low word */
|
||||
#define JS_HEAD_HI 0x04 /* (RO) Job queue head pointer for job slot n, high word */
|
||||
#define JS_TAIL_LO 0x08 /* (RO) Job queue tail pointer for job slot n, low word */
|
||||
#define JS_TAIL_HI 0x0C /* (RO) Job queue tail pointer for job slot n, high word */
|
||||
#define JS_AFFINITY_LO 0x10 /* (RO) Core affinity mask for job slot n, low word */
|
||||
#define JS_AFFINITY_HI 0x14 /* (RO) Core affinity mask for job slot n, high word */
|
||||
#define JS_CONFIG 0x18 /* (RO) Configuration settings for job slot n */
|
||||
/* (RO) Extended affinity mask for job slot n*/
|
||||
#define JS_XAFFINITY 0x1C
|
||||
|
||||
#define JS_COMMAND 0x20 /* (WO) Command register for job slot n */
|
||||
#define JS_STATUS 0x24 /* (RO) Status register for job slot n */
|
||||
|
||||
#define JS_HEAD_NEXT_LO 0x40 /* (RW) Next job queue head pointer for job slot n, low word */
|
||||
#define JS_HEAD_NEXT_HI 0x44 /* (RW) Next job queue head pointer for job slot n, high word */
|
||||
|
||||
#define JS_AFFINITY_NEXT_LO 0x50 /* (RW) Next core affinity mask for job slot n, low word */
|
||||
#define JS_AFFINITY_NEXT_HI 0x54 /* (RW) Next core affinity mask for job slot n, high word */
|
||||
#define JS_CONFIG_NEXT 0x58 /* (RW) Next configuration settings for job slot n */
|
||||
/* (RW) Next extended affinity mask for job slot n */
|
||||
#define JS_XAFFINITY_NEXT 0x5C
|
||||
|
||||
#define JS_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */
|
||||
|
||||
#define JS_FLUSH_ID_NEXT 0x70 /* (RW) Next job slot n cache flush ID */
|
||||
|
||||
/* No JM-specific MMU control registers */
|
||||
/* No JM-specific MMU address space control registers */
|
||||
|
||||
/* JS_COMMAND register commands */
|
||||
#define JS_COMMAND_NOP 0x00 /* NOP Operation. Writing this value is ignored */
|
||||
#define JS_COMMAND_START 0x01 /* Start processing a job chain. Writing this value is ignored */
|
||||
#define JS_COMMAND_SOFT_STOP 0x02 /* Gently stop processing a job chain */
|
||||
#define JS_COMMAND_HARD_STOP 0x03 /* Rudely stop processing a job chain */
|
||||
#define JS_COMMAND_SOFT_STOP_0 0x04 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */
|
||||
#define JS_COMMAND_HARD_STOP_0 0x05 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */
|
||||
#define JS_COMMAND_SOFT_STOP_1 0x06 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */
|
||||
#define JS_COMMAND_HARD_STOP_1 0x07 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */
|
||||
|
||||
#define JS_COMMAND_MASK 0x07 /* Mask of bits currently in use by the HW */
|
||||
|
||||
/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */
|
||||
#define JS_CONFIG_START_FLUSH_NO_ACTION (0u << 0)
|
||||
#define JS_CONFIG_START_FLUSH_CLEAN (1u << 8)
|
||||
#define JS_CONFIG_START_FLUSH_INV_SHADER_OTHER (2u << 8)
|
||||
#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8)
|
||||
#define JS_CONFIG_START_MMU (1u << 10)
|
||||
#define JS_CONFIG_JOB_CHAIN_FLAG (1u << 11)
|
||||
#define JS_CONFIG_END_FLUSH_NO_ACTION JS_CONFIG_START_FLUSH_NO_ACTION
|
||||
#define JS_CONFIG_END_FLUSH_CLEAN (1u << 12)
|
||||
#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE (3u << 12)
|
||||
#define JS_CONFIG_ENABLE_FLUSH_REDUCTION (1u << 14)
|
||||
#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK (1u << 15)
|
||||
#define JS_CONFIG_THREAD_PRI(n) ((n) << 16)
|
||||
|
||||
/* JS_XAFFINITY register values */
|
||||
#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0)
|
||||
#define JS_XAFFINITY_TILER_ENABLE (1u << 8)
|
||||
#define JS_XAFFINITY_CACHE_ENABLE (1u << 16)
|
||||
|
||||
/* JS_STATUS register values */
|
||||
|
||||
/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h.
|
||||
* The values are separated to avoid dependency of userspace and kernel code.
|
||||
*/
|
||||
|
||||
/* Group of values representing the job status instead of a particular fault */
|
||||
#define JS_STATUS_NO_EXCEPTION_BASE 0x00
|
||||
#define JS_STATUS_INTERRUPTED (JS_STATUS_NO_EXCEPTION_BASE + 0x02) /* 0x02 means INTERRUPTED */
|
||||
#define JS_STATUS_STOPPED (JS_STATUS_NO_EXCEPTION_BASE + 0x03) /* 0x03 means STOPPED */
|
||||
#define JS_STATUS_TERMINATED (JS_STATUS_NO_EXCEPTION_BASE + 0x04) /* 0x04 means TERMINATED */
|
||||
|
||||
/* General fault values */
|
||||
#define JS_STATUS_FAULT_BASE 0x40
|
||||
#define JS_STATUS_CONFIG_FAULT (JS_STATUS_FAULT_BASE) /* 0x40 means CONFIG FAULT */
|
||||
#define JS_STATUS_POWER_FAULT (JS_STATUS_FAULT_BASE + 0x01) /* 0x41 means POWER FAULT */
|
||||
#define JS_STATUS_READ_FAULT (JS_STATUS_FAULT_BASE + 0x02) /* 0x42 means READ FAULT */
|
||||
#define JS_STATUS_WRITE_FAULT (JS_STATUS_FAULT_BASE + 0x03) /* 0x43 means WRITE FAULT */
|
||||
#define JS_STATUS_AFFINITY_FAULT (JS_STATUS_FAULT_BASE + 0x04) /* 0x44 means AFFINITY FAULT */
|
||||
#define JS_STATUS_BUS_FAULT (JS_STATUS_FAULT_BASE + 0x08) /* 0x48 means BUS FAULT */
|
||||
|
||||
/* Instruction or data faults */
|
||||
#define JS_STATUS_INSTRUCTION_FAULT_BASE 0x50
|
||||
#define JS_STATUS_INSTR_INVALID_PC (JS_STATUS_INSTRUCTION_FAULT_BASE) /* 0x50 means INSTR INVALID PC */
|
||||
#define JS_STATUS_INSTR_INVALID_ENC (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01) /* 0x51 means INSTR INVALID ENC */
|
||||
#define JS_STATUS_INSTR_TYPE_MISMATCH (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02) /* 0x52 means INSTR TYPE MISMATCH */
|
||||
#define JS_STATUS_INSTR_OPERAND_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03) /* 0x53 means INSTR OPERAND FAULT */
|
||||
#define JS_STATUS_INSTR_TLS_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04) /* 0x54 means INSTR TLS FAULT */
|
||||
#define JS_STATUS_INSTR_BARRIER_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05) /* 0x55 means INSTR BARRIER FAULT */
|
||||
#define JS_STATUS_INSTR_ALIGN_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06) /* 0x56 means INSTR ALIGN FAULT */
|
||||
/* NOTE: No fault with 0x57 code defined in spec. */
|
||||
#define JS_STATUS_DATA_INVALID_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08) /* 0x58 means DATA INVALID FAULT */
|
||||
#define JS_STATUS_TILE_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09) /* 0x59 means TILE RANGE FAULT */
|
||||
#define JS_STATUS_ADDRESS_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A) /* 0x5A means ADDRESS RANGE FAULT */
|
||||
|
||||
/* Other faults */
|
||||
#define JS_STATUS_MEMORY_FAULT_BASE 0x60
|
||||
#define JS_STATUS_OUT_OF_MEMORY (JS_STATUS_MEMORY_FAULT_BASE) /* 0x60 means OUT OF MEMORY */
|
||||
#define JS_STATUS_UNKNOWN 0x7F /* 0x7F means UNKNOWN */
|
||||
|
||||
/* JS<n>_FEATURES register */
|
||||
#define JS_FEATURE_NULL_JOB (1u << 1)
|
||||
#define JS_FEATURE_SET_VALUE_JOB (1u << 2)
|
||||
#define JS_FEATURE_CACHE_FLUSH_JOB (1u << 3)
|
||||
#define JS_FEATURE_COMPUTE_JOB (1u << 4)
|
||||
#define JS_FEATURE_VERTEX_JOB (1u << 5)
|
||||
#define JS_FEATURE_GEOMETRY_JOB (1u << 6)
|
||||
#define JS_FEATURE_TILER_JOB (1u << 7)
|
||||
#define JS_FEATURE_FUSED_JOB (1u << 8)
|
||||
#define JS_FEATURE_FRAGMENT_JOB (1u << 9)
|
||||
|
||||
/* JM_CONFIG register */
|
||||
#define JM_TIMESTAMP_OVERRIDE (1ul << 0)
|
||||
#define JM_CLOCK_GATE_OVERRIDE (1ul << 1)
|
||||
#define JM_JOB_THROTTLE_ENABLE (1ul << 2)
|
||||
#define JM_JOB_THROTTLE_LIMIT_SHIFT (3)
|
||||
#define JM_MAX_JOB_THROTTLE_LIMIT (0x3F)
|
||||
#define JM_FORCE_COHERENCY_FEATURES_SHIFT (2)
|
||||
|
||||
/* GPU_COMMAND values */
|
||||
#define GPU_COMMAND_NOP 0x00 /* No operation, nothing happens */
|
||||
#define GPU_COMMAND_SOFT_RESET 0x01 /* Stop all external bus interfaces, and then reset the entire GPU. */
|
||||
#define GPU_COMMAND_HARD_RESET 0x02 /* Immediately reset the entire GPU. */
|
||||
#define GPU_COMMAND_PRFCNT_CLEAR 0x03 /* Clear all performance counters, setting them all to zero. */
|
||||
#define GPU_COMMAND_PRFCNT_SAMPLE 0x04 /* Sample all performance counters, writing them out to memory */
|
||||
#define GPU_COMMAND_CYCLE_COUNT_START 0x05 /* Starts the cycle counter, and system timestamp propagation */
|
||||
#define GPU_COMMAND_CYCLE_COUNT_STOP 0x06 /* Stops the cycle counter, and system timestamp propagation */
|
||||
#define GPU_COMMAND_CLEAN_CACHES 0x07 /* Clean all caches */
|
||||
#define GPU_COMMAND_CLEAN_INV_CACHES 0x08 /* Clean and invalidate all caches */
|
||||
#define GPU_COMMAND_SET_PROTECTED_MODE 0x09 /* Places the GPU in protected mode */
|
||||
|
||||
/* GPU_COMMAND cache flush alias to CSF command payload */
|
||||
#define GPU_COMMAND_CACHE_CLN_INV_L2 GPU_COMMAND_CLEAN_INV_CACHES
|
||||
#define GPU_COMMAND_CACHE_CLN_INV_L2_LSC GPU_COMMAND_CLEAN_INV_CACHES
|
||||
#define GPU_COMMAND_CACHE_CLN_INV_FULL GPU_COMMAND_CLEAN_INV_CACHES
|
||||
|
||||
/* Merge cache flush commands */
|
||||
#define GPU_COMMAND_FLUSH_CACHE_MERGE(cmd1, cmd2) \
|
||||
((cmd1) > (cmd2) ? (cmd1) : (cmd2))
|
||||
|
||||
/* IRQ flags */
|
||||
#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */
|
||||
#define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */
|
||||
#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */
|
||||
#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */
|
||||
#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */
|
||||
#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when a performance count sample has completed. */
|
||||
#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */
|
||||
|
||||
/*
|
||||
* In Debug build,
|
||||
* GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE is used to clear and enable interupts sources of GPU_IRQ
|
||||
* by writing it onto GPU_IRQ_CLEAR/MASK registers.
|
||||
*
|
||||
* In Release build,
|
||||
* GPU_IRQ_REG_COMMON is used.
|
||||
*
|
||||
* Note:
|
||||
* CLEAN_CACHES_COMPLETED - Used separately for cache operation.
|
||||
*/
|
||||
#define GPU_IRQ_REG_COMMON (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \
|
||||
| POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED)
|
||||
|
||||
#endif /* _KBASE_GPU_REGMAP_JM_H_ */
|
||||
@@ -1,7 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -23,8 +23,8 @@
|
||||
#define _KBASE_GPU_FAULT_H_
|
||||
|
||||
/**
|
||||
* kbase_gpu_exception_name() -
|
||||
* Returns the name associated with a Mali exception code
|
||||
* kbase_gpu_exception_name() - Returns associated string of the exception code
|
||||
*
|
||||
* @exception_code: exception code
|
||||
*
|
||||
* This function is called from the interrupt handler when a GPU fault occurs.
|
||||
|
||||
@@ -23,6 +23,565 @@
|
||||
#define _KBASE_GPU_REGMAP_H_
|
||||
|
||||
#include <uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h>
|
||||
#include <uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_coherency.h>
|
||||
#include <uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h>
|
||||
#if MALI_USE_CSF
|
||||
#include "backend/mali_kbase_gpu_regmap_csf.h"
|
||||
#else
|
||||
#include "backend/mali_kbase_gpu_regmap_jm.h"
|
||||
#endif
|
||||
|
||||
/* GPU_U definition */
|
||||
#ifdef __ASSEMBLER__
|
||||
#define GPU_U(x) x
|
||||
#else
|
||||
#define GPU_U(x) x##u
|
||||
#endif /* __ASSEMBLER__ */
|
||||
|
||||
/* Begin Register Offsets */
|
||||
/* GPU control registers */
|
||||
|
||||
#define GPU_CONTROL_BASE 0x0000
|
||||
#define GPU_CONTROL_REG(r) (GPU_CONTROL_BASE + (r))
|
||||
#define GPU_ID 0x000 /* (RO) GPU and revision identifier */
|
||||
#define L2_FEATURES 0x004 /* (RO) Level 2 cache features */
|
||||
#define TILER_FEATURES 0x00C /* (RO) Tiler Features */
|
||||
#define MEM_FEATURES 0x010 /* (RO) Memory system features */
|
||||
#define MMU_FEATURES 0x014 /* (RO) MMU features */
|
||||
#define AS_PRESENT 0x018 /* (RO) Address space slots present */
|
||||
#define GPU_IRQ_RAWSTAT 0x020 /* (RW) */
|
||||
#define GPU_IRQ_CLEAR 0x024 /* (WO) */
|
||||
#define GPU_IRQ_MASK 0x028 /* (RW) */
|
||||
#define GPU_IRQ_STATUS 0x02C /* (RO) */
|
||||
|
||||
#define GPU_COMMAND 0x030 /* (WO) */
|
||||
#define GPU_STATUS 0x034 /* (RO) */
|
||||
|
||||
#define GPU_DBGEN (1 << 8) /* DBGEN wire status */
|
||||
|
||||
#define GPU_FAULTSTATUS 0x03C /* (RO) GPU exception type and fault status */
|
||||
#define GPU_FAULTADDRESS_LO 0x040 /* (RO) GPU exception fault address, low word */
|
||||
#define GPU_FAULTADDRESS_HI 0x044 /* (RO) GPU exception fault address, high word */
|
||||
|
||||
#define L2_CONFIG 0x048 /* (RW) Level 2 cache configuration */
|
||||
|
||||
#define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */
|
||||
#define SUPER_L2_COHERENT (1 << 1) /* Shader cores within a core
|
||||
* supergroup are l2 coherent
|
||||
*/
|
||||
|
||||
#define PWR_KEY 0x050 /* (WO) Power manager key register */
|
||||
#define PWR_OVERRIDE0 0x054 /* (RW) Power manager override settings */
|
||||
#define PWR_OVERRIDE1 0x058 /* (RW) Power manager override settings */
|
||||
#define GPU_FEATURES_LO 0x060 /* (RO) GPU features, low word */
|
||||
#define GPU_FEATURES_HI 0x064 /* (RO) GPU features, high word */
|
||||
#define PRFCNT_FEATURES 0x068 /* (RO) Performance counter features */
|
||||
#define TIMESTAMP_OFFSET_LO 0x088 /* (RW) Global time stamp offset, low word */
|
||||
#define TIMESTAMP_OFFSET_HI 0x08C /* (RW) Global time stamp offset, high word */
|
||||
#define CYCLE_COUNT_LO 0x090 /* (RO) Cycle counter, low word */
|
||||
#define CYCLE_COUNT_HI 0x094 /* (RO) Cycle counter, high word */
|
||||
#define TIMESTAMP_LO 0x098 /* (RO) Global time stamp counter, low word */
|
||||
#define TIMESTAMP_HI 0x09C /* (RO) Global time stamp counter, high word */
|
||||
|
||||
#define THREAD_MAX_THREADS 0x0A0 /* (RO) Maximum number of threads per core */
|
||||
#define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */
|
||||
#define THREAD_MAX_BARRIER_SIZE 0x0A8 /* (RO) Maximum threads waiting at a barrier */
|
||||
#define THREAD_FEATURES 0x0AC /* (RO) Thread features */
|
||||
#define THREAD_TLS_ALLOC 0x310 /* (RO) Number of threads per core that TLS must be allocated for */
|
||||
|
||||
#define TEXTURE_FEATURES_0 0x0B0 /* (RO) Support flags for indexed texture formats 0..31 */
|
||||
#define TEXTURE_FEATURES_1 0x0B4 /* (RO) Support flags for indexed texture formats 32..63 */
|
||||
#define TEXTURE_FEATURES_2 0x0B8 /* (RO) Support flags for indexed texture formats 64..95 */
|
||||
#define TEXTURE_FEATURES_3 0x0BC /* (RO) Support flags for texture order */
|
||||
|
||||
#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2))
|
||||
|
||||
#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */
|
||||
#define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */
|
||||
|
||||
#define TILER_PRESENT_LO 0x110 /* (RO) Tiler core present bitmap, low word */
|
||||
#define TILER_PRESENT_HI 0x114 /* (RO) Tiler core present bitmap, high word */
|
||||
|
||||
#define L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */
|
||||
#define L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */
|
||||
|
||||
#define STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */
|
||||
#define STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */
|
||||
|
||||
#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */
|
||||
#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */
|
||||
|
||||
#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */
|
||||
#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */
|
||||
|
||||
#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */
|
||||
#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */
|
||||
|
||||
#define STACK_READY_LO 0xE10 /* (RO) Core stack ready bitmap, low word */
|
||||
#define STACK_READY_HI 0xE14 /* (RO) Core stack ready bitmap, high word */
|
||||
|
||||
#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */
|
||||
#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */
|
||||
|
||||
#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */
|
||||
#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */
|
||||
|
||||
#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */
|
||||
#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */
|
||||
|
||||
#define STACK_PWRON_LO 0xE20 /* (RO) Core stack power on bitmap, low word */
|
||||
#define STACK_PWRON_HI 0xE24 /* (RO) Core stack power on bitmap, high word */
|
||||
|
||||
#define SHADER_PWROFF_LO 0x1C0 /* (WO) Shader core power off bitmap, low word */
|
||||
#define SHADER_PWROFF_HI 0x1C4 /* (WO) Shader core power off bitmap, high word */
|
||||
|
||||
#define TILER_PWROFF_LO 0x1D0 /* (WO) Tiler core power off bitmap, low word */
|
||||
#define TILER_PWROFF_HI 0x1D4 /* (WO) Tiler core power off bitmap, high word */
|
||||
|
||||
#define L2_PWROFF_LO 0x1E0 /* (WO) Level 2 cache power off bitmap, low word */
|
||||
#define L2_PWROFF_HI 0x1E4 /* (WO) Level 2 cache power off bitmap, high word */
|
||||
|
||||
#define STACK_PWROFF_LO 0xE30 /* (RO) Core stack power off bitmap, low word */
|
||||
#define STACK_PWROFF_HI 0xE34 /* (RO) Core stack power off bitmap, high word */
|
||||
|
||||
#define SHADER_PWRTRANS_LO 0x200 /* (RO) Shader core power transition bitmap, low word */
|
||||
#define SHADER_PWRTRANS_HI 0x204 /* (RO) Shader core power transition bitmap, high word */
|
||||
|
||||
#define TILER_PWRTRANS_LO 0x210 /* (RO) Tiler core power transition bitmap, low word */
|
||||
#define TILER_PWRTRANS_HI 0x214 /* (RO) Tiler core power transition bitmap, high word */
|
||||
|
||||
#define L2_PWRTRANS_LO 0x220 /* (RO) Level 2 cache power transition bitmap, low word */
|
||||
#define L2_PWRTRANS_HI 0x224 /* (RO) Level 2 cache power transition bitmap, high word */
|
||||
|
||||
#define ASN_HASH_0 0x02C0 /* (RW) ASN hash function argument 0 */
|
||||
#define ASN_HASH(n) (ASN_HASH_0 + (n)*4)
|
||||
#define ASN_HASH_COUNT 3
|
||||
|
||||
#define SYSC_ALLOC0 0x0340 /* (RW) System cache allocation hint from source ID */
|
||||
#define SYSC_ALLOC(n) (SYSC_ALLOC0 + (n)*4)
|
||||
#define SYSC_ALLOC_COUNT 8
|
||||
|
||||
#define STACK_PWRTRANS_LO 0xE40 /* (RO) Core stack power transition bitmap, low word */
|
||||
#define STACK_PWRTRANS_HI 0xE44 /* (RO) Core stack power transition bitmap, high word */
|
||||
|
||||
#define SHADER_PWRACTIVE_LO 0x240 /* (RO) Shader core active bitmap, low word */
|
||||
#define SHADER_PWRACTIVE_HI 0x244 /* (RO) Shader core active bitmap, high word */
|
||||
|
||||
#define TILER_PWRACTIVE_LO 0x250 /* (RO) Tiler core active bitmap, low word */
|
||||
#define TILER_PWRACTIVE_HI 0x254 /* (RO) Tiler core active bitmap, high word */
|
||||
|
||||
#define L2_PWRACTIVE_LO 0x260 /* (RO) Level 2 cache active bitmap, low word */
|
||||
#define L2_PWRACTIVE_HI 0x264 /* (RO) Level 2 cache active bitmap, high word */
|
||||
|
||||
#define COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */
|
||||
#define COHERENCY_ENABLE 0x304 /* (RW) Coherency enable */
|
||||
|
||||
|
||||
#define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration (implementation-specific) */
|
||||
#define TILER_CONFIG 0xF08 /* (RW) Tiler core configuration (implementation-specific) */
|
||||
#define L2_MMU_CONFIG 0xF0C /* (RW) L2 cache and MMU configuration (implementation-specific) */
|
||||
|
||||
/* Job control registers */
|
||||
|
||||
#define JOB_CONTROL_BASE 0x1000
|
||||
|
||||
#define JOB_CONTROL_REG(r) (JOB_CONTROL_BASE + (r))
|
||||
|
||||
#define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */
|
||||
#define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */
|
||||
#define JOB_IRQ_MASK 0x008 /* Interrupt mask register */
|
||||
#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */
|
||||
|
||||
/* MMU control registers */
|
||||
|
||||
#define MMU_IRQ_CLEAR 0x004 /* (WO) Interrupt clear register */
|
||||
#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */
|
||||
#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */
|
||||
|
||||
#define MMU_AS0 0x400 /* Configuration registers for address space 0 */
|
||||
#define MMU_AS1 0x440 /* Configuration registers for address space 1 */
|
||||
#define MMU_AS2 0x480 /* Configuration registers for address space 2 */
|
||||
#define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */
|
||||
#define MMU_AS4 0x500 /* Configuration registers for address space 4 */
|
||||
#define MMU_AS5 0x540 /* Configuration registers for address space 5 */
|
||||
#define MMU_AS6 0x580 /* Configuration registers for address space 6 */
|
||||
#define MMU_AS7 0x5C0 /* Configuration registers for address space 7 */
|
||||
#define MMU_AS8 0x600 /* Configuration registers for address space 8 */
|
||||
#define MMU_AS9 0x640 /* Configuration registers for address space 9 */
|
||||
#define MMU_AS10 0x680 /* Configuration registers for address space 10 */
|
||||
#define MMU_AS11 0x6C0 /* Configuration registers for address space 11 */
|
||||
#define MMU_AS12 0x700 /* Configuration registers for address space 12 */
|
||||
#define MMU_AS13 0x740 /* Configuration registers for address space 13 */
|
||||
#define MMU_AS14 0x780 /* Configuration registers for address space 14 */
|
||||
#define MMU_AS15 0x7C0 /* Configuration registers for address space 15 */
|
||||
|
||||
/* MMU address space control registers */
|
||||
|
||||
#define MMU_AS_REG(n, r) (MMU_REG(MMU_AS0 + ((n) << 6)) + (r))
|
||||
|
||||
#define AS_TRANSTAB_LO 0x00 /* (RW) Translation Table Base Address for address space n, low word */
|
||||
#define AS_TRANSTAB_HI 0x04 /* (RW) Translation Table Base Address for address space n, high word */
|
||||
#define AS_MEMATTR_LO 0x08 /* (RW) Memory attributes for address space n, low word. */
|
||||
#define AS_MEMATTR_HI 0x0C /* (RW) Memory attributes for address space n, high word. */
|
||||
#define AS_LOCKADDR_LO 0x10 /* (RW) Lock region address for address space n, low word */
|
||||
#define AS_LOCKADDR_HI 0x14 /* (RW) Lock region address for address space n, high word */
|
||||
#define AS_COMMAND 0x18 /* (WO) MMU command register for address space n */
|
||||
#define AS_FAULTSTATUS 0x1C /* (RO) MMU fault status register for address space n */
|
||||
#define AS_FAULTADDRESS_LO 0x20 /* (RO) Fault Address for address space n, low word */
|
||||
#define AS_FAULTADDRESS_HI 0x24 /* (RO) Fault Address for address space n, high word */
|
||||
#define AS_STATUS 0x28 /* (RO) Status flags for address space n */
|
||||
|
||||
/* (RW) Translation table configuration for address space n, low word */
|
||||
#define AS_TRANSCFG_LO 0x30
|
||||
/* (RW) Translation table configuration for address space n, high word */
|
||||
#define AS_TRANSCFG_HI 0x34
|
||||
/* (RO) Secondary fault address for address space n, low word */
|
||||
#define AS_FAULTEXTRA_LO 0x38
|
||||
/* (RO) Secondary fault address for address space n, high word */
|
||||
#define AS_FAULTEXTRA_HI 0x3C
|
||||
|
||||
/* End Register Offsets */
|
||||
|
||||
#define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON)
|
||||
|
||||
/*
|
||||
* MMU_IRQ_RAWSTAT register values. Values are valid also for
|
||||
* MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers.
|
||||
*/
|
||||
|
||||
#define MMU_PAGE_FAULT_FLAGS 16
|
||||
|
||||
/* Macros returning a bitmask to retrieve page fault or bus error flags from
|
||||
* MMU registers
|
||||
*/
|
||||
#define MMU_PAGE_FAULT(n) (1UL << (n))
|
||||
#define MMU_BUS_ERROR(n) (1UL << ((n) + MMU_PAGE_FAULT_FLAGS))
|
||||
|
||||
/*
|
||||
* Begin AARCH64 MMU TRANSTAB register values
|
||||
*/
|
||||
#define MMU_HW_OUTA_BITS 40
|
||||
#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4))
|
||||
|
||||
/*
|
||||
* Begin MMU STATUS register values
|
||||
*/
|
||||
#define AS_STATUS_AS_ACTIVE 0x01
|
||||
|
||||
#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK (0x7<<3)
|
||||
#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT (0x0<<3)
|
||||
#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT (0x1<<3)
|
||||
#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT (0x2<<3)
|
||||
#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG (0x3<<3)
|
||||
#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT (0x4<<3)
|
||||
#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3)
|
||||
|
||||
#define AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0
|
||||
#define AS_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFF << AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT)
|
||||
#define AS_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \
|
||||
(((reg_val)&AS_FAULTSTATUS_EXCEPTION_TYPE_MASK) >> AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT)
|
||||
#define AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_0 0xC0
|
||||
|
||||
#define AS_FAULTSTATUS_ACCESS_TYPE_SHIFT 8
|
||||
#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3 << AS_FAULTSTATUS_ACCESS_TYPE_SHIFT)
|
||||
#define AS_FAULTSTATUS_ACCESS_TYPE_GET(reg_val) \
|
||||
(((reg_val)&AS_FAULTSTATUS_ACCESS_TYPE_MASK) >> AS_FAULTSTATUS_ACCESS_TYPE_SHIFT)
|
||||
|
||||
#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0)
|
||||
#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1)
|
||||
#define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2)
|
||||
#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3)
|
||||
|
||||
#define AS_FAULTSTATUS_SOURCE_ID_SHIFT 16
|
||||
#define AS_FAULTSTATUS_SOURCE_ID_MASK (0xFFFF << AS_FAULTSTATUS_SOURCE_ID_SHIFT)
|
||||
#define AS_FAULTSTATUS_SOURCE_ID_GET(reg_val) \
|
||||
(((reg_val)&AS_FAULTSTATUS_SOURCE_ID_MASK) >> AS_FAULTSTATUS_SOURCE_ID_SHIFT)
|
||||
|
||||
#define PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_SHIFT (0)
|
||||
#define PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_MASK \
|
||||
((0xFF) << PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_SHIFT)
|
||||
#define PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET(reg_val) \
|
||||
(((reg_val)&PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_MASK) >> \
|
||||
PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_SHIFT)
|
||||
|
||||
/*
|
||||
* Begin MMU TRANSCFG register values
|
||||
*/
|
||||
#define AS_TRANSCFG_ADRMODE_LEGACY 0
|
||||
#define AS_TRANSCFG_ADRMODE_UNMAPPED 1
|
||||
#define AS_TRANSCFG_ADRMODE_IDENTITY 2
|
||||
#define AS_TRANSCFG_ADRMODE_AARCH64_4K 6
|
||||
#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8
|
||||
|
||||
#define AS_TRANSCFG_ADRMODE_MASK 0xF
|
||||
|
||||
/*
|
||||
* Begin TRANSCFG register values
|
||||
*/
|
||||
#define AS_TRANSCFG_PTW_MEMATTR_MASK (3ull << 24)
|
||||
#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1ull << 24)
|
||||
#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2ull << 24)
|
||||
|
||||
#define AS_TRANSCFG_PTW_SH_MASK ((3ull << 28))
|
||||
#define AS_TRANSCFG_PTW_SH_OS (2ull << 28)
|
||||
#define AS_TRANSCFG_PTW_SH_IS (3ull << 28)
|
||||
#define AS_TRANSCFG_R_ALLOCATE (1ull << 30)
|
||||
|
||||
/*
|
||||
* Begin Command Values
|
||||
*/
|
||||
|
||||
/* AS_COMMAND register commands */
|
||||
#define AS_COMMAND_NOP 0x00 /* NOP Operation */
|
||||
#define AS_COMMAND_UPDATE 0x01 /* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */
|
||||
#define AS_COMMAND_LOCK 0x02 /* Issue a lock region command to all MMUs */
|
||||
#define AS_COMMAND_UNLOCK 0x03 /* Issue a flush region command to all MMUs */
|
||||
/* Flush all L2 caches then issue a flush region command to all MMUs */
|
||||
#define AS_COMMAND_FLUSH_PT 0x04
|
||||
/* Wait for memory accesses to complete, flush all the L1s cache then flush all
|
||||
* L2 caches then issue a flush region command to all MMUs
|
||||
*/
|
||||
#define AS_COMMAND_FLUSH_MEM 0x05
|
||||
|
||||
/* AS_LOCKADDR register */
|
||||
#define AS_LOCKADDR_LOCKADDR_SIZE_SHIFT GPU_U(0)
|
||||
#define AS_LOCKADDR_LOCKADDR_SIZE_MASK \
|
||||
(GPU_U(0x3F) << AS_LOCKADDR_LOCKADDR_SIZE_SHIFT)
|
||||
#define AS_LOCKADDR_LOCKADDR_SIZE_GET(reg_val) \
|
||||
(((reg_val)&AS_LOCKADDR_LOCKADDR_SIZE_MASK) >> \
|
||||
AS_LOCKADDR_LOCKADDR_SIZE_SHIFT)
|
||||
#define AS_LOCKADDR_LOCKADDR_SIZE_SET(reg_val, value) \
|
||||
(((reg_val) & ~AS_LOCKADDR_LOCKADDR_SIZE_MASK) | \
|
||||
(((value) << AS_LOCKADDR_LOCKADDR_SIZE_SHIFT) & \
|
||||
AS_LOCKADDR_LOCKADDR_SIZE_MASK))
|
||||
#define AS_LOCKADDR_LOCKADDR_BASE_SHIFT GPU_U(12)
|
||||
#define AS_LOCKADDR_LOCKADDR_BASE_MASK \
|
||||
(GPU_U(0xFFFFFFFFFFFFF) << AS_LOCKADDR_LOCKADDR_BASE_SHIFT)
|
||||
#define AS_LOCKADDR_LOCKADDR_BASE_GET(reg_val) \
|
||||
(((reg_val)&AS_LOCKADDR_LOCKADDR_BASE_MASK) >> \
|
||||
AS_LOCKADDR_LOCKADDR_BASE_SHIFT)
|
||||
#define AS_LOCKADDR_LOCKADDR_BASE_SET(reg_val, value) \
|
||||
(((reg_val) & ~AS_LOCKADDR_LOCKADDR_BASE_MASK) | \
|
||||
(((value) << AS_LOCKADDR_LOCKADDR_BASE_SHIFT) & \
|
||||
AS_LOCKADDR_LOCKADDR_BASE_MASK))
|
||||
|
||||
/* GPU_STATUS values */
|
||||
#define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */
|
||||
#define GPU_STATUS_CYCLE_COUNT_ACTIVE (1 << 6) /* Set if the cycle counter is active. */
|
||||
#define GPU_STATUS_PROTECTED_MODE_ACTIVE (1 << 7) /* Set if protected mode is active */
|
||||
|
||||
/* PRFCNT_CONFIG register values */
|
||||
#define PRFCNT_CONFIG_MODE_SHIFT 0 /* Counter mode position. */
|
||||
#define PRFCNT_CONFIG_AS_SHIFT 4 /* Address space bitmap position. */
|
||||
#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */
|
||||
|
||||
/* The performance counters are disabled. */
|
||||
#define PRFCNT_CONFIG_MODE_OFF 0
|
||||
/* The performance counters are enabled, but are only written out when a
|
||||
* PRFCNT_SAMPLE command is issued using the GPU_COMMAND register.
|
||||
*/
|
||||
#define PRFCNT_CONFIG_MODE_MANUAL 1
|
||||
/* The performance counters are enabled, and are written out each time a tile
|
||||
* finishes rendering.
|
||||
*/
|
||||
#define PRFCNT_CONFIG_MODE_TILE 2
|
||||
|
||||
/* AS<n>_MEMATTR values from MMU_MEMATTR_STAGE1: */
|
||||
/* Use GPU implementation-defined caching policy. */
|
||||
#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull
|
||||
/* The attribute set to force all resources to be cached. */
|
||||
#define AS_MEMATTR_FORCE_TO_CACHE_ALL 0x8Full
|
||||
/* Inner write-alloc cache setup, no outer caching */
|
||||
#define AS_MEMATTR_WRITE_ALLOC 0x8Dull
|
||||
|
||||
/* Use GPU implementation-defined caching policy. */
|
||||
#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull
|
||||
/* The attribute set to force all resources to be cached. */
|
||||
#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL 0x4Full
|
||||
/* Inner write-alloc cache setup, no outer caching */
|
||||
#define AS_MEMATTR_LPAE_WRITE_ALLOC 0x4Dull
|
||||
/* Set to implementation defined, outer caching */
|
||||
#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF 0x88ull
|
||||
/* Set to write back memory, outer caching */
|
||||
#define AS_MEMATTR_LPAE_OUTER_WA 0x8Dull
|
||||
/* There is no LPAE support for non-cacheable, since the memory type is always
|
||||
* write-back.
|
||||
* Marking this setting as reserved for LPAE
|
||||
*/
|
||||
#define AS_MEMATTR_LPAE_NON_CACHEABLE_RESERVED
|
||||
|
||||
/* L2_MMU_CONFIG register */
|
||||
#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT (23)
|
||||
#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT)
|
||||
|
||||
/* End L2_MMU_CONFIG register */
|
||||
|
||||
/* THREAD_* registers */
|
||||
|
||||
/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */
|
||||
#define IMPLEMENTATION_UNSPECIFIED 0
|
||||
#define IMPLEMENTATION_SILICON 1
|
||||
#define IMPLEMENTATION_FPGA 2
|
||||
#define IMPLEMENTATION_MODEL 3
|
||||
|
||||
/* Default values when registers are not supported by the implemented hardware */
|
||||
#define THREAD_MT_DEFAULT 256
|
||||
#define THREAD_MWS_DEFAULT 256
|
||||
#define THREAD_MBS_DEFAULT 256
|
||||
#define THREAD_MR_DEFAULT 1024
|
||||
#define THREAD_MTQ_DEFAULT 4
|
||||
#define THREAD_MTGS_DEFAULT 10
|
||||
|
||||
/* End THREAD_* registers */
|
||||
|
||||
/* SHADER_CONFIG register */
|
||||
#define SC_LS_ALLOW_ATTR_TYPES (1ul << 16)
|
||||
#define SC_TLS_HASH_ENABLE (1ul << 17)
|
||||
#define SC_LS_ATTR_CHECK_DISABLE (1ul << 18)
|
||||
#define SC_VAR_ALGORITHM (1ul << 29)
|
||||
/* End SHADER_CONFIG register */
|
||||
|
||||
/* TILER_CONFIG register */
|
||||
#define TC_CLOCK_GATE_OVERRIDE (1ul << 0)
|
||||
/* End TILER_CONFIG register */
|
||||
|
||||
/* L2_CONFIG register */
|
||||
#define L2_CONFIG_SIZE_SHIFT 16
|
||||
#define L2_CONFIG_SIZE_MASK (0xFFul << L2_CONFIG_SIZE_SHIFT)
|
||||
#define L2_CONFIG_HASH_SHIFT 24
|
||||
#define L2_CONFIG_HASH_MASK (0xFFul << L2_CONFIG_HASH_SHIFT)
|
||||
#define L2_CONFIG_ASN_HASH_ENABLE_SHIFT 24
|
||||
#define L2_CONFIG_ASN_HASH_ENABLE_MASK (1ul << L2_CONFIG_ASN_HASH_ENABLE_SHIFT)
|
||||
/* End L2_CONFIG register */
|
||||
|
||||
|
||||
/* IDVS_GROUP register */
|
||||
#define IDVS_GROUP_SIZE_SHIFT (16)
|
||||
#define IDVS_GROUP_MAX_SIZE (0x3F)
|
||||
|
||||
/* SYSC_ALLOC read IDs */
|
||||
#define SYSC_ALLOC_ID_R_OTHER 0x00
|
||||
#define SYSC_ALLOC_ID_R_CSF 0x02
|
||||
#define SYSC_ALLOC_ID_R_MMU 0x04
|
||||
#define SYSC_ALLOC_ID_R_TILER_VERT 0x08
|
||||
#define SYSC_ALLOC_ID_R_TILER_PTR 0x09
|
||||
#define SYSC_ALLOC_ID_R_TILER_INDEX 0x0A
|
||||
#define SYSC_ALLOC_ID_R_TILER_OTHER 0x0B
|
||||
#define SYSC_ALLOC_ID_R_IC 0x10
|
||||
#define SYSC_ALLOC_ID_R_ATTR 0x11
|
||||
#define SYSC_ALLOC_ID_R_SCM 0x12
|
||||
#define SYSC_ALLOC_ID_R_FSDC 0x13
|
||||
#define SYSC_ALLOC_ID_R_VL 0x14
|
||||
#define SYSC_ALLOC_ID_R_PLR 0x15
|
||||
#define SYSC_ALLOC_ID_R_TEX 0x18
|
||||
#define SYSC_ALLOC_ID_R_LSC 0x1c
|
||||
|
||||
/* SYSC_ALLOC write IDs */
|
||||
#define SYSC_ALLOC_ID_W_OTHER 0x00
|
||||
#define SYSC_ALLOC_ID_W_CSF 0x02
|
||||
#define SYSC_ALLOC_ID_W_PCB 0x07
|
||||
#define SYSC_ALLOC_ID_W_TILER_PTR 0x09
|
||||
#define SYSC_ALLOC_ID_W_TILER_VERT_PLIST 0x0A
|
||||
#define SYSC_ALLOC_ID_W_TILER_OTHER 0x0B
|
||||
#define SYSC_ALLOC_ID_W_L2_EVICT 0x0C
|
||||
#define SYSC_ALLOC_ID_W_L2_FLUSH 0x0D
|
||||
#define SYSC_ALLOC_ID_W_TIB_COLOR 0x10
|
||||
#define SYSC_ALLOC_ID_W_TIB_COLOR_AFBCH 0x11
|
||||
#define SYSC_ALLOC_ID_W_TIB_COLOR_AFBCB 0x12
|
||||
#define SYSC_ALLOC_ID_W_TIB_CRC 0x13
|
||||
#define SYSC_ALLOC_ID_W_TIB_DS 0x14
|
||||
#define SYSC_ALLOC_ID_W_TIB_DS_AFBCH 0x15
|
||||
#define SYSC_ALLOC_ID_W_TIB_DS_AFBCB 0x16
|
||||
#define SYSC_ALLOC_ID_W_LSC 0x1C
|
||||
|
||||
/* SYSC_ALLOC values */
|
||||
#define SYSC_ALLOC_L2_ALLOC 0x0
|
||||
#define SYSC_ALLOC_NEVER_ALLOC 0x2
|
||||
#define SYSC_ALLOC_ALWAYS_ALLOC 0x3
|
||||
#define SYSC_ALLOC_PTL_ALLOC 0x4
|
||||
#define SYSC_ALLOC_L2_PTL_ALLOC 0x5
|
||||
|
||||
/* SYSC_ALLOC register */
|
||||
#define SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT (0)
|
||||
#define SYSC_ALLOC_R_SYSC_ALLOC0_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT)
|
||||
#define SYSC_ALLOC_R_SYSC_ALLOC0_GET(reg_val) \
|
||||
(((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC0_MASK) >> \
|
||||
SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT)
|
||||
#define SYSC_ALLOC_R_SYSC_ALLOC0_SET(reg_val, value) \
|
||||
(((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC0_MASK) | \
|
||||
(((value) << SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT) & \
|
||||
SYSC_ALLOC_R_SYSC_ALLOC0_MASK))
|
||||
/* End of SYSC_ALLOC_R_SYSC_ALLOC0 values */
|
||||
#define SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT (4)
|
||||
#define SYSC_ALLOC_W_SYSC_ALLOC0_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT)
|
||||
#define SYSC_ALLOC_W_SYSC_ALLOC0_GET(reg_val) \
|
||||
(((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC0_MASK) >> \
|
||||
SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT)
|
||||
#define SYSC_ALLOC_W_SYSC_ALLOC0_SET(reg_val, value) \
|
||||
(((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC0_MASK) | \
|
||||
(((value) << SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT) & \
|
||||
SYSC_ALLOC_W_SYSC_ALLOC0_MASK))
|
||||
/* End of SYSC_ALLOC_W_SYSC_ALLOC0 values */
|
||||
#define SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT (8)
|
||||
#define SYSC_ALLOC_R_SYSC_ALLOC1_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT)
|
||||
#define SYSC_ALLOC_R_SYSC_ALLOC1_GET(reg_val) \
|
||||
(((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC1_MASK) >> \
|
||||
SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT)
|
||||
#define SYSC_ALLOC_R_SYSC_ALLOC1_SET(reg_val, value) \
|
||||
(((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC1_MASK) | \
|
||||
(((value) << SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT) & \
|
||||
SYSC_ALLOC_R_SYSC_ALLOC1_MASK))
|
||||
/* End of SYSC_ALLOC_R_SYSC_ALLOC1 values */
|
||||
#define SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT (12)
|
||||
#define SYSC_ALLOC_W_SYSC_ALLOC1_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT)
|
||||
#define SYSC_ALLOC_W_SYSC_ALLOC1_GET(reg_val) \
|
||||
(((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC1_MASK) >> \
|
||||
SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT)
|
||||
#define SYSC_ALLOC_W_SYSC_ALLOC1_SET(reg_val, value) \
|
||||
(((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC1_MASK) | \
|
||||
(((value) << SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT) & \
|
||||
SYSC_ALLOC_W_SYSC_ALLOC1_MASK))
|
||||
/* End of SYSC_ALLOC_W_SYSC_ALLOC1 values */
|
||||
#define SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT (16)
|
||||
#define SYSC_ALLOC_R_SYSC_ALLOC2_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT)
|
||||
#define SYSC_ALLOC_R_SYSC_ALLOC2_GET(reg_val) \
|
||||
(((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC2_MASK) >> \
|
||||
SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT)
|
||||
#define SYSC_ALLOC_R_SYSC_ALLOC2_SET(reg_val, value) \
|
||||
(((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC2_MASK) | \
|
||||
(((value) << SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT) & \
|
||||
SYSC_ALLOC_R_SYSC_ALLOC2_MASK))
|
||||
/* End of SYSC_ALLOC_R_SYSC_ALLOC2 values */
|
||||
#define SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT (20)
|
||||
#define SYSC_ALLOC_W_SYSC_ALLOC2_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT)
|
||||
#define SYSC_ALLOC_W_SYSC_ALLOC2_GET(reg_val) \
|
||||
(((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC2_MASK) >> \
|
||||
SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT)
|
||||
#define SYSC_ALLOC_W_SYSC_ALLOC2_SET(reg_val, value) \
|
||||
(((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC2_MASK) | \
|
||||
(((value) << SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT) & \
|
||||
SYSC_ALLOC_W_SYSC_ALLOC2_MASK))
|
||||
/* End of SYSC_ALLOC_W_SYSC_ALLOC2 values */
|
||||
#define SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT (24)
|
||||
#define SYSC_ALLOC_R_SYSC_ALLOC3_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT)
|
||||
#define SYSC_ALLOC_R_SYSC_ALLOC3_GET(reg_val) \
|
||||
(((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC3_MASK) >> \
|
||||
SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT)
|
||||
#define SYSC_ALLOC_R_SYSC_ALLOC3_SET(reg_val, value) \
|
||||
(((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC3_MASK) | \
|
||||
(((value) << SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT) & \
|
||||
SYSC_ALLOC_R_SYSC_ALLOC3_MASK))
|
||||
/* End of SYSC_ALLOC_R_SYSC_ALLOC3 values */
|
||||
#define SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT (28)
|
||||
#define SYSC_ALLOC_W_SYSC_ALLOC3_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT)
|
||||
#define SYSC_ALLOC_W_SYSC_ALLOC3_GET(reg_val) \
|
||||
(((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC3_MASK) >> \
|
||||
SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT)
|
||||
#define SYSC_ALLOC_W_SYSC_ALLOC3_SET(reg_val, value) \
|
||||
(((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC3_MASK) | \
|
||||
(((value) << SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT) & \
|
||||
SYSC_ALLOC_W_SYSC_ALLOC3_MASK))
|
||||
/* End of SYSC_ALLOC_W_SYSC_ALLOC3 values */
|
||||
|
||||
/* Include POWER_CHANGED_SINGLE in debug builds for use in irq latency test. */
|
||||
#ifdef CONFIG_MALI_BIFROST_DEBUG
|
||||
|
||||
@@ -94,7 +94,10 @@ struct kbase_ipa_model_vinstr_data {
|
||||
struct kbase_ipa_group {
|
||||
const char *name;
|
||||
s32 default_value;
|
||||
s64 (*op)(struct kbase_ipa_model_vinstr_data *, s32, u32);
|
||||
s64 (*op)(
|
||||
struct kbase_ipa_model_vinstr_data *model_data,
|
||||
s32 coeff,
|
||||
u32 counter_block_offset);
|
||||
u32 counter_block_offset;
|
||||
};
|
||||
|
||||
|
||||
@@ -115,8 +115,8 @@ static const struct kbase_ipa_counter ipa_top_level_cntrs_def_ttux[] = {
|
||||
};
|
||||
|
||||
/* These tables provide a description of each performance counter
|
||||
* used by the shader cores counter model for energy estimation.
|
||||
*/
|
||||
* used by the shader cores counter model for energy estimation.
|
||||
*/
|
||||
static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_todx[] = {
|
||||
SC_COUNTER_DEF("exec_instr_fma", 505449, EXEC_INSTR_FMA),
|
||||
SC_COUNTER_DEF("tex_filt_num_operations", 574869, TEX_FILT_NUM_OPS),
|
||||
@@ -150,7 +150,7 @@ static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_ttux[] = {
|
||||
SC_COUNTER_DEF("ls_mem_read_short", 322525, LS_MEM_READ_SHORT),
|
||||
SC_COUNTER_DEF("full_quad_warps", 844124, FULL_QUAD_WARPS),
|
||||
SC_COUNTER_DEF("exec_instr_cvt", 226411, EXEC_INSTR_CVT),
|
||||
SC_COUNTER_DEF("frag_quads_ezs_update",372032, FRAG_QUADS_EZS_UPDATE),
|
||||
SC_COUNTER_DEF("frag_quads_ezs_update", 372032, FRAG_QUADS_EZS_UPDATE),
|
||||
};
|
||||
|
||||
#define IPA_POWER_MODEL_OPS(gpu, init_token) \
|
||||
@@ -224,8 +224,8 @@ const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find(
|
||||
|
||||
const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id)
|
||||
{
|
||||
const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
|
||||
GPU_ID_VERSION_PRODUCT_ID_SHIFT;
|
||||
const u32 prod_id =
|
||||
(gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT;
|
||||
|
||||
switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) {
|
||||
case GPU_ID2_PRODUCT_TODX:
|
||||
|
||||
@@ -111,20 +111,21 @@ static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_da
|
||||
|
||||
/**
|
||||
* memsys_single_counter() - calculate energy for a single Memory System performance counter.
|
||||
* @model_data: pointer to GPU model data.
|
||||
* @coeff: default value of coefficient for IPA group.
|
||||
* @offset: offset in bytes of the counter inside the block it belongs to.
|
||||
* @model_data: pointer to GPU model data.
|
||||
* @coeff: default value of coefficient for IPA group.
|
||||
* @counter_block_offset: offset in bytes of the counter inside the block it belongs to.
|
||||
*
|
||||
* Return: Energy estimation for a single Memory System performance counter.
|
||||
*/
|
||||
static s64 kbase_g7x_sum_all_memsys_blocks(
|
||||
struct kbase_ipa_model_vinstr_data *model_data,
|
||||
s32 coeff,
|
||||
u32 offset)
|
||||
u32 counter_block_offset)
|
||||
{
|
||||
u32 counter;
|
||||
|
||||
counter = kbase_g7x_power_model_get_memsys_counter(model_data, offset);
|
||||
counter = kbase_g7x_power_model_get_memsys_counter(model_data,
|
||||
counter_block_offset);
|
||||
return kbase_ipa_sum_all_memsys_blocks(model_data, coeff, counter);
|
||||
}
|
||||
|
||||
@@ -531,8 +532,8 @@ const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find(
|
||||
|
||||
const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id)
|
||||
{
|
||||
const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
|
||||
GPU_ID_VERSION_PRODUCT_ID_SHIFT;
|
||||
const u32 prod_id =
|
||||
(gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT;
|
||||
|
||||
switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) {
|
||||
case GPU_ID2_PRODUCT_TMIX:
|
||||
|
||||
@@ -71,7 +71,7 @@ KBASE_EXPORT_TEST_API(kbase_ipa_model_ops_find);
|
||||
|
||||
const char *kbase_ipa_model_name_from_id(u32 gpu_id)
|
||||
{
|
||||
const char* model_name =
|
||||
const char *model_name =
|
||||
kbase_ipa_counter_model_name_from_id(gpu_id);
|
||||
|
||||
if (!model_name)
|
||||
@@ -610,7 +610,7 @@ static unsigned long kbase_get_dynamic_power(unsigned long freq,
|
||||
|
||||
/* Here unlike kbase_get_real_power(), shader core frequency is
|
||||
* used for the scaling as simple power model is used to obtain
|
||||
* the value of dynamic coefficient (which is is a fixed value
|
||||
* the value of dynamic coefficient (which is a fixed value
|
||||
* retrieved from the device tree).
|
||||
*/
|
||||
power += kbase_scale_dynamic_power(
|
||||
|
||||
@@ -128,8 +128,14 @@ static ssize_t param_string_set(struct file *file, const char __user *user_buf,
|
||||
|
||||
err = kbase_ipa_model_recalculate(model);
|
||||
if (err < 0) {
|
||||
u32 string_len = strscpy(param->addr.str, old_str, param->size);
|
||||
|
||||
string_len += sizeof(char);
|
||||
/* Make sure that the source string fit into the buffer. */
|
||||
KBASE_DEBUG_ASSERT(string_len <= param->size);
|
||||
CSTD_UNUSED(string_len);
|
||||
|
||||
ret = err;
|
||||
strlcpy(param->addr.str, old_str, param->size);
|
||||
}
|
||||
|
||||
end:
|
||||
@@ -275,7 +281,7 @@ static void kbase_ipa_model_debugfs_init(struct kbase_ipa_model *model)
|
||||
"Type not set for %s parameter %s\n",
|
||||
model->ops->name, param->name);
|
||||
} else {
|
||||
debugfs_create_file(param->name, S_IRUGO | S_IWUSR,
|
||||
debugfs_create_file(param->name, 0644,
|
||||
dir, param, fops);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -307,8 +307,12 @@ static int kbase_simple_power_model_recalculate(struct kbase_ipa_model *model)
|
||||
model_data->gpu_tz = NULL;
|
||||
} else {
|
||||
char tz_name[THERMAL_NAME_LENGTH];
|
||||
u32 string_len = strscpy(tz_name, model_data->tz_name, sizeof(tz_name));
|
||||
|
||||
strlcpy(tz_name, model_data->tz_name, sizeof(tz_name));
|
||||
string_len += sizeof(char);
|
||||
/* Make sure that the source string fit into the buffer. */
|
||||
KBASE_DEBUG_ASSERT(string_len <= sizeof(tz_name));
|
||||
CSTD_UNUSED(string_len);
|
||||
|
||||
/* Release ipa.lock so that thermal_list_lock is not acquired
|
||||
* with ipa.lock held, thereby avoid lock ordering violation
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -124,6 +124,18 @@
|
||||
/* Reset the GPU after each atom completion */
|
||||
#define KBASE_SERIALIZE_RESET (1 << 2)
|
||||
|
||||
/**
|
||||
* enum kbase_timeout_selector - The choice of which timeout to get scaled
|
||||
* using the lowest GPU frequency.
|
||||
* @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in
|
||||
* the enum.
|
||||
*/
|
||||
enum kbase_timeout_selector {
|
||||
|
||||
/* Must be the last in the enum */
|
||||
KBASE_TIMEOUT_SELECTOR_COUNT
|
||||
};
|
||||
|
||||
#if IS_ENABLED(CONFIG_DEBUG_FS)
|
||||
/**
|
||||
* struct base_job_fault_event - keeps track of the atom which faulted or which
|
||||
@@ -653,11 +665,12 @@ static inline bool kbase_jd_katom_is_protected(
|
||||
|
||||
/**
|
||||
* kbase_atom_is_younger - query if one atom is younger by age than another
|
||||
* @katom_a: the first atom
|
||||
* @katom_a: the second atom
|
||||
*
|
||||
* Return: true if the first atom is strictly younger than the second, false
|
||||
* otherwise.
|
||||
* @katom_a: the first atom
|
||||
* @katom_b: the second atom
|
||||
*
|
||||
* Return: true if the first atom is strictly younger than the second,
|
||||
* false otherwise.
|
||||
*/
|
||||
static inline bool kbase_jd_atom_is_younger(const struct kbase_jd_atom *katom_a,
|
||||
const struct kbase_jd_atom *katom_b)
|
||||
@@ -666,7 +679,9 @@ static inline bool kbase_jd_atom_is_younger(const struct kbase_jd_atom *katom_a,
|
||||
}
|
||||
|
||||
/**
|
||||
* kbase_jd_atom_is_earlier
|
||||
* kbase_jd_atom_is_earlier - Check whether the first atom has been submitted
|
||||
* earlier than the second one
|
||||
*
|
||||
* @katom_a: the first atom
|
||||
* @katom_b: the second atom
|
||||
*
|
||||
@@ -730,17 +745,13 @@ static inline bool kbase_jd_atom_is_earlier(const struct kbase_jd_atom *katom_a,
|
||||
* A state machine is used to control incremental rendering.
|
||||
*/
|
||||
enum kbase_jd_renderpass_state {
|
||||
KBASE_JD_RP_COMPLETE, /* COMPLETE => START */
|
||||
KBASE_JD_RP_START, /* START => PEND_OOM or COMPLETE */
|
||||
KBASE_JD_RP_PEND_OOM, /* PEND_OOM => OOM or COMPLETE */
|
||||
KBASE_JD_RP_OOM, /* OOM => RETRY */
|
||||
KBASE_JD_RP_RETRY, /* RETRY => RETRY_PEND_OOM or
|
||||
* COMPLETE
|
||||
*/
|
||||
KBASE_JD_RP_RETRY_PEND_OOM, /* RETRY_PEND_OOM => RETRY_OOM or
|
||||
* COMPLETE
|
||||
*/
|
||||
KBASE_JD_RP_RETRY_OOM, /* RETRY_OOM => RETRY */
|
||||
KBASE_JD_RP_COMPLETE, /* COMPLETE => START */
|
||||
KBASE_JD_RP_START, /* START => PEND_OOM or COMPLETE */
|
||||
KBASE_JD_RP_PEND_OOM, /* PEND_OOM => OOM or COMPLETE */
|
||||
KBASE_JD_RP_OOM, /* OOM => RETRY */
|
||||
KBASE_JD_RP_RETRY, /* RETRY => RETRY_PEND_OOM or COMPLETE */
|
||||
KBASE_JD_RP_RETRY_PEND_OOM, /* RETRY_PEND_OOM => RETRY_OOM or COMPLETE */
|
||||
KBASE_JD_RP_RETRY_OOM /* RETRY_OOM => RETRY */
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -813,7 +824,7 @@ struct kbase_jd_renderpass {
|
||||
* atom completes
|
||||
* execution on GPU or the input fence get signaled.
|
||||
* @tb_lock: Lock to serialize the write access made to @tb to
|
||||
* to store the register access trace messages.
|
||||
* store the register access trace messages.
|
||||
* @tb: Pointer to the Userspace accessible buffer storing
|
||||
* the trace messages for register read/write
|
||||
* accesses made by the Kbase. The buffer is filled
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -36,6 +36,8 @@
|
||||
* The struct kbasep_js_device_data sub-structure of kbdev must be zero
|
||||
* initialized before passing to the kbasep_js_devdata_init() function. This is
|
||||
* to give efficient error path code.
|
||||
*
|
||||
* Return: 0 on success, error code otherwise.
|
||||
*/
|
||||
int kbasep_js_devdata_init(struct kbase_device * const kbdev);
|
||||
|
||||
@@ -86,6 +88,8 @@ void kbasep_js_devdata_term(struct kbase_device *kbdev);
|
||||
*
|
||||
* The struct kbase_context must be zero initialized before passing to the
|
||||
* kbase_js_init() function. This is to give efficient error path code.
|
||||
*
|
||||
* Return: 0 on success, error code otherwise.
|
||||
*/
|
||||
int kbasep_js_kctx_init(struct kbase_context *const kctx);
|
||||
|
||||
@@ -206,7 +210,7 @@ bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom);
|
||||
* @kbdev: The kbase_device to operate on
|
||||
* @kctx: The kbase_context to operate on
|
||||
* @atom: Atom to remove
|
||||
*
|
||||
*
|
||||
* Completely removing a job requires several calls:
|
||||
* * kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of
|
||||
* the atom
|
||||
@@ -356,9 +360,10 @@ void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev,
|
||||
struct kbase_context *kctx);
|
||||
|
||||
/**
|
||||
* kbasep_js_runpool_release_ctx_and_katom_retained_state - Variant of
|
||||
* kbasep_js_runpool_release_ctx_and_katom_retained_state - Variant of
|
||||
* kbasep_js_runpool_release_ctx() that handles additional
|
||||
* actions from completing an atom.
|
||||
*
|
||||
* @kbdev: KBase device
|
||||
* @kctx: KBase context
|
||||
* @katom_retained_state: Retained state from the atom
|
||||
@@ -381,8 +386,8 @@ void kbasep_js_runpool_release_ctx_and_katom_retained_state(
|
||||
struct kbasep_js_atom_retained_state *katom_retained_state);
|
||||
|
||||
/**
|
||||
* kbasep_js_runpool_release_ctx_nolock -
|
||||
* Variant of kbase_js_runpool_release_ctx() w/out locks
|
||||
* kbasep_js_runpool_release_ctx_nolock - Variant of kbase_js_runpool_release_ctx()
|
||||
* without locks
|
||||
* @kbdev: KBase device
|
||||
* @kctx: KBase context
|
||||
*
|
||||
@@ -396,6 +401,7 @@ void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
|
||||
|
||||
/**
|
||||
* kbasep_js_schedule_privileged_ctx - Schedule in a privileged context
|
||||
*
|
||||
* @kbdev: KBase device
|
||||
* @kctx: KBase context
|
||||
*
|
||||
@@ -459,7 +465,7 @@ void kbase_js_try_run_jobs(struct kbase_device *kbdev);
|
||||
* contexts from (re)entering the runpool.
|
||||
*
|
||||
* This does not handle suspending the one privileged context: the caller must
|
||||
* instead do this by by suspending the GPU HW Counter Instrumentation.
|
||||
* instead do this by suspending the GPU HW Counter Instrumentation.
|
||||
*
|
||||
* This will eventually cause all Power Management active references held by
|
||||
* contexts on the runpool to be released, without running any more atoms.
|
||||
@@ -688,6 +694,8 @@ void kbase_js_update_ctx_priority(struct kbase_context *kctx);
|
||||
* As with any bool, never test the return value with true.
|
||||
*
|
||||
* The caller must hold hwaccess_lock.
|
||||
*
|
||||
* Return: true if the context is allowed to submit jobs, false otherwise.
|
||||
*/
|
||||
static inline bool kbasep_js_is_submit_allowed(
|
||||
struct kbasep_js_device_data *js_devdata,
|
||||
@@ -768,8 +776,9 @@ static inline void kbasep_js_clear_submit_allowed(
|
||||
}
|
||||
|
||||
/**
|
||||
* kbasep_js_atom_retained_state_init_invalid -
|
||||
* Create an initial 'invalid' atom retained state
|
||||
* kbasep_js_atom_retained_state_init_invalid - Create an initial 'invalid'
|
||||
* atom retained state
|
||||
*
|
||||
* @retained_state: pointer where to create and initialize the state
|
||||
*
|
||||
* Create an initial 'invalid' atom retained state, that requires no
|
||||
|
||||
@@ -55,10 +55,11 @@ typedef void kbasep_js_ctx_job_cb(struct kbase_device *kbdev,
|
||||
* @KBASEP_JS_CTX_ATTR_COMPUTE: Attribute indicating a context that contains
|
||||
* Compute jobs.
|
||||
* @KBASEP_JS_CTX_ATTR_NON_COMPUTE: Attribute indicating a context that contains
|
||||
* Non-Compute jobs.
|
||||
* Non-Compute jobs.
|
||||
* @KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES: Attribute indicating that a context
|
||||
* contains compute-job atoms that aren't restricted to a coherent group,
|
||||
* and can run on all cores.
|
||||
* contains compute-job atoms that aren't
|
||||
* restricted to a coherent group,
|
||||
* and can run on all cores.
|
||||
* @KBASEP_JS_CTX_ATTR_COUNT: Must be the last in the enum
|
||||
*
|
||||
* Each context attribute can be thought of as a boolean value that caches some
|
||||
@@ -115,7 +116,6 @@ typedef void kbasep_js_ctx_job_cb(struct kbase_device *kbdev,
|
||||
* BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy
|
||||
* enough to handle anyway.
|
||||
*
|
||||
*
|
||||
*/
|
||||
enum kbasep_js_ctx_attr {
|
||||
KBASEP_JS_CTX_ATTR_COMPUTE,
|
||||
@@ -217,44 +217,46 @@ typedef u32 kbase_atom_ordering_flag_t;
|
||||
/**
|
||||
* struct kbasep_js_device_data - KBase Device Data Job Scheduler sub-structure
|
||||
* @runpool_irq: Sub-structure to collect together Job Scheduling data used in
|
||||
* IRQ context. The hwaccess_lock must be held when accessing.
|
||||
* IRQ context. The hwaccess_lock must be held when accessing.
|
||||
* @runpool_irq.submit_allowed: Bitvector indicating whether a currently
|
||||
* scheduled context is allowed to submit jobs. When bit 'N' is set in
|
||||
* this, it indicates whether the context bound to address space 'N' is
|
||||
* allowed to submit jobs.
|
||||
* scheduled context is allowed to submit jobs.
|
||||
* When bit 'N' is set in this, it indicates whether
|
||||
* the context bound to address space 'N' is
|
||||
* allowed to submit jobs.
|
||||
* @runpool_irq.ctx_attr_ref_count: Array of Context Attributes Ref_counters:
|
||||
* Each is large enough to hold a refcount of the number of contexts
|
||||
* that can fit into the runpool. This is currently BASE_MAX_NR_AS.
|
||||
* Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store
|
||||
* the refcount. Hence, it's not worthwhile reducing this to
|
||||
* bit-manipulation on u32s to save space (where in contrast, 4 bit
|
||||
* sub-fields would be easy to do and would save space).
|
||||
* Whilst this must not become negative, the sign bit is used for:
|
||||
* - error detection in debug builds
|
||||
* - Optimization: it is undefined for a signed int to overflow, and so
|
||||
* the compiler can optimize for that never happening (thus, no masking
|
||||
* is required on updating the variable)
|
||||
* Each is large enough to hold a refcount of the number of contexts
|
||||
* that can fit into the runpool. This is currently BASE_MAX_NR_AS.
|
||||
* Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store
|
||||
* the refcount. Hence, it's not worthwhile reducing this to
|
||||
* bit-manipulation on u32s to save space (where in contrast, 4 bit
|
||||
* sub-fields would be easy to do and would save space).
|
||||
* Whilst this must not become negative, the sign bit is used for:
|
||||
* - error detection in debug builds
|
||||
* - Optimization: it is undefined for a signed int to overflow, and so
|
||||
* the compiler can optimize for that never happening (thus, no masking
|
||||
* is required on updating the variable)
|
||||
* @runpool_irq.slot_affinities: Affinity management and tracking. Bitvector
|
||||
* to aid affinity checking. Element 'n' bit 'i' indicates that slot 'n'
|
||||
* is using core i (i.e. slot_affinity_refcount[n][i] > 0)
|
||||
* to aid affinity checking.
|
||||
* Element 'n' bit 'i' indicates that slot 'n'
|
||||
* is using core i (i.e. slot_affinity_refcount[n][i] > 0)
|
||||
* @runpool_irq.slot_affinity_refcount: Array of fefcount for each core owned
|
||||
* by each slot. Used to generate the slot_affinities array of bitvectors.
|
||||
* The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS,
|
||||
* because it is refcounted only when a job is definitely about to be
|
||||
* submitted to a slot, and is de-refcounted immediately after a job
|
||||
* finishes
|
||||
* by each slot. Used to generate the slot_affinities array of bitvectors.
|
||||
* The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS,
|
||||
* because it is refcounted only when a job is definitely about to be
|
||||
* submitted to a slot, and is de-refcounted immediately after a job
|
||||
* finishes
|
||||
* @schedule_sem: Scheduling semaphore. This must be held when calling
|
||||
* kbase_jm_kick()
|
||||
* kbase_jm_kick()
|
||||
* @ctx_list_pullable: List of contexts that can currently be pulled from
|
||||
* @ctx_list_unpullable: List of contexts that can not currently be pulled
|
||||
* from, but have jobs currently running.
|
||||
* from, but have jobs currently running.
|
||||
* @nr_user_contexts_running: Number of currently scheduled user contexts
|
||||
* (excluding ones that are not submitting jobs)
|
||||
* (excluding ones that are not submitting jobs)
|
||||
* @nr_all_contexts_running: Number of currently scheduled contexts (including
|
||||
* ones that are not submitting jobs)
|
||||
* ones that are not submitting jobs)
|
||||
* @js_reqs: Core Requirements to match up with base_js_atom's core_req memeber
|
||||
* @note This is a write-once member, and so no locking is required to
|
||||
* read
|
||||
* @note This is a write-once member, and so no locking is required to
|
||||
* read
|
||||
* @scheduling_period_ns: Value for JS_SCHEDULING_PERIOD_NS
|
||||
* @soft_stop_ticks: Value for JS_SOFT_STOP_TICKS
|
||||
* @soft_stop_ticks_cl: Value for JS_SOFT_STOP_TICKS_CL
|
||||
@@ -268,16 +270,16 @@ typedef u32 kbase_atom_ordering_flag_t;
|
||||
* @suspended_soft_jobs_list: List of suspended soft jobs
|
||||
* @softstop_always: Support soft-stop on a single context
|
||||
* @init_status:The initialized-flag is placed at the end, to avoid
|
||||
* cache-pollution (we should only be using this during init/term paths).
|
||||
* @note This is a write-once member, and so no locking is required to
|
||||
* read
|
||||
* cache-pollution (we should only be using this during init/term paths).
|
||||
* @note This is a write-once member, and so no locking is required to
|
||||
* read
|
||||
* @nr_contexts_pullable:Number of contexts that can currently be pulled from
|
||||
* @nr_contexts_runnable:Number of contexts that can either be pulled from or
|
||||
* arecurrently running
|
||||
* arecurrently running
|
||||
* @soft_job_timeout_ms:Value for JS_SOFT_JOB_TIMEOUT
|
||||
* @queue_mutex: Queue Lock, used to access the Policy's queue of contexts
|
||||
* independently of the Run Pool.
|
||||
* Of course, you don't need the Run Pool lock to access this.
|
||||
* independently of the Run Pool.
|
||||
* Of course, you don't need the Run Pool lock to access this.
|
||||
* @runpool_mutex: Run Pool mutex, for managing contexts within the runpool.
|
||||
*
|
||||
* This encapsulates the current context of the Job Scheduler on a particular
|
||||
|
||||
@@ -168,6 +168,7 @@ __attribute__((unused)) static const enum base_hw_feature base_hw_features_tTUx[
|
||||
BASE_HW_FEATURE_L2_CONFIG,
|
||||
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
|
||||
BASE_HW_FEATURE_ASN_HASH,
|
||||
BASE_HW_FEATURE_GPU_SLEEP,
|
||||
BASE_HW_FEATURE_END
|
||||
};
|
||||
|
||||
|
||||
@@ -60,6 +60,7 @@ enum base_hw_issue {
|
||||
BASE_HW_ISSUE_TTRX_3485,
|
||||
BASE_HW_ISSUE_GPU2019_3212,
|
||||
BASE_HW_ISSUE_TURSEHW_1997,
|
||||
BASE_HW_ISSUE_GPU2019_3878,
|
||||
BASE_HW_ISSUE_END
|
||||
};
|
||||
|
||||
@@ -596,6 +597,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tODx_r0p0
|
||||
BASE_HW_ISSUE_TSIX_2033,
|
||||
BASE_HW_ISSUE_TTRX_1337,
|
||||
BASE_HW_ISSUE_GPU2019_3212,
|
||||
BASE_HW_ISSUE_GPU2019_3878,
|
||||
BASE_HW_ISSUE_END
|
||||
};
|
||||
|
||||
@@ -605,6 +607,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tOD
|
||||
BASE_HW_ISSUE_TSIX_2033,
|
||||
BASE_HW_ISSUE_TTRX_1337,
|
||||
BASE_HW_ISSUE_GPU2019_3212,
|
||||
BASE_HW_ISSUE_GPU2019_3878,
|
||||
BASE_HW_ISSUE_END
|
||||
};
|
||||
|
||||
@@ -612,6 +615,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGRx_r0p0
|
||||
BASE_HW_ISSUE_9435,
|
||||
BASE_HW_ISSUE_TSIX_2033,
|
||||
BASE_HW_ISSUE_TTRX_1337,
|
||||
BASE_HW_ISSUE_GPU2019_3878,
|
||||
BASE_HW_ISSUE_END
|
||||
};
|
||||
|
||||
@@ -620,6 +624,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGR
|
||||
BASE_HW_ISSUE_9435,
|
||||
BASE_HW_ISSUE_TSIX_2033,
|
||||
BASE_HW_ISSUE_TTRX_1337,
|
||||
BASE_HW_ISSUE_GPU2019_3878,
|
||||
BASE_HW_ISSUE_END
|
||||
};
|
||||
|
||||
@@ -627,6 +632,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tVAx_r0p0
|
||||
BASE_HW_ISSUE_9435,
|
||||
BASE_HW_ISSUE_TSIX_2033,
|
||||
BASE_HW_ISSUE_TTRX_1337,
|
||||
BASE_HW_ISSUE_GPU2019_3878,
|
||||
BASE_HW_ISSUE_END
|
||||
};
|
||||
|
||||
@@ -635,6 +641,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tVA
|
||||
BASE_HW_ISSUE_9435,
|
||||
BASE_HW_ISSUE_TSIX_2033,
|
||||
BASE_HW_ISSUE_TTRX_1337,
|
||||
BASE_HW_ISSUE_GPU2019_3878,
|
||||
BASE_HW_ISSUE_END
|
||||
};
|
||||
|
||||
@@ -643,6 +650,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTU
|
||||
BASE_HW_ISSUE_9435,
|
||||
BASE_HW_ISSUE_TSIX_2033,
|
||||
BASE_HW_ISSUE_TTRX_1337,
|
||||
BASE_HW_ISSUE_GPU2019_3878,
|
||||
BASE_HW_ISSUE_END
|
||||
};
|
||||
|
||||
@@ -651,6 +659,15 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0
|
||||
BASE_HW_ISSUE_TSIX_2033,
|
||||
BASE_HW_ISSUE_TTRX_1337,
|
||||
BASE_HW_ISSUE_TURSEHW_1997,
|
||||
BASE_HW_ISSUE_GPU2019_3878,
|
||||
BASE_HW_ISSUE_END
|
||||
};
|
||||
|
||||
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p0[] = {
|
||||
BASE_HW_ISSUE_9435,
|
||||
BASE_HW_ISSUE_TSIX_2033,
|
||||
BASE_HW_ISSUE_TTRX_1337,
|
||||
BASE_HW_ISSUE_GPU2019_3878,
|
||||
BASE_HW_ISSUE_END
|
||||
};
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -109,9 +109,9 @@
|
||||
|
||||
struct kbase_device *kbase_device_alloc(void);
|
||||
/*
|
||||
* note: configuration attributes member of kbdev needs to have
|
||||
* been setup before calling kbase_device_init
|
||||
*/
|
||||
* note: configuration attributes member of kbdev needs to have
|
||||
* been setup before calling kbase_device_init
|
||||
*/
|
||||
|
||||
int kbase_device_misc_init(struct kbase_device *kbdev);
|
||||
void kbase_device_misc_term(struct kbase_device *kbdev);
|
||||
@@ -256,8 +256,26 @@ void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timest
|
||||
kbasep_js_atom_done_code done_code);
|
||||
void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
|
||||
void kbase_jd_zap_context(struct kbase_context *kctx);
|
||||
bool jd_done_nolock(struct kbase_jd_atom *katom,
|
||||
struct list_head *completed_jobs_ctx);
|
||||
|
||||
/*
|
||||
* jd_done_nolock - Perform the necessary handling of an atom that has completed
|
||||
* the execution.
|
||||
*
|
||||
* @katom: Pointer to the atom that completed the execution
|
||||
* @post_immediately: Flag indicating that completion event can be posted
|
||||
* immediately for @katom and the other atoms depdendent
|
||||
* on @katom which also completed execution. The flag is
|
||||
* false only for the case where the function is called by
|
||||
* kbase_jd_done_worker() on the completion of atom running
|
||||
* on the GPU.
|
||||
*
|
||||
* Note that if this is a soft-job that has had kbase_prepare_soft_job called on it then the caller
|
||||
* is responsible for calling kbase_finish_soft_job *before* calling this function.
|
||||
*
|
||||
* The caller must hold the kbase_jd_context.lock.
|
||||
*/
|
||||
bool jd_done_nolock(struct kbase_jd_atom *katom, bool post_immediately);
|
||||
|
||||
void kbase_jd_free_external_resources(struct kbase_jd_atom *katom);
|
||||
void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom);
|
||||
|
||||
@@ -299,19 +317,73 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
|
||||
* virtual address space in a growable memory region and the atom currently
|
||||
* executing on a job slot is the tiler job chain at the start of a renderpass.
|
||||
*
|
||||
* Return 0 if successful, otherwise a negative error code.
|
||||
* Return: 0 if successful, otherwise a negative error code.
|
||||
*/
|
||||
int kbase_job_slot_softstop_start_rp(struct kbase_context *kctx,
|
||||
struct kbase_va_region *reg);
|
||||
|
||||
/**
|
||||
* kbase_job_slot_softstop - Soft-stop the specified job slot
|
||||
*
|
||||
* @kbdev: The kbase device
|
||||
* @js: The job slot to soft-stop
|
||||
* @target_katom: The job that should be soft-stopped (or NULL for any job)
|
||||
* Context:
|
||||
* The job slot lock must be held when calling this function.
|
||||
* The job slot must not already be in the process of being soft-stopped.
|
||||
*
|
||||
* Where possible any job in the next register is evicted before the soft-stop.
|
||||
*/
|
||||
void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
|
||||
struct kbase_jd_atom *target_katom);
|
||||
|
||||
void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
|
||||
struct kbase_jd_atom *target_katom, u32 sw_flags);
|
||||
|
||||
/**
|
||||
* kbase_job_slot_hardstop - Hard-stop the specified job slot
|
||||
* @kctx: The kbase context that contains the job(s) that should
|
||||
* be hard-stopped
|
||||
* @js: The job slot to hard-stop
|
||||
* @target_katom: The job that should be hard-stopped (or NULL for all
|
||||
* jobs from the context)
|
||||
* Context:
|
||||
* The job slot lock must be held when calling this function.
|
||||
*/
|
||||
void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
|
||||
struct kbase_jd_atom *target_katom);
|
||||
|
||||
/**
|
||||
* kbase_job_check_enter_disjoint - potentiall enter disjoint mode
|
||||
* @kbdev: kbase device
|
||||
* @action: the event which has occurred
|
||||
* @core_reqs: core requirements of the atom
|
||||
* @target_katom: the atom which is being affected
|
||||
*
|
||||
* For a certain soft-stop action, work out whether to enter disjoint
|
||||
* state.
|
||||
*
|
||||
* This does not register multiple disjoint events if the atom has already
|
||||
* started a disjoint period
|
||||
*
|
||||
* @core_reqs can be supplied as 0 if the atom had not started on the hardware
|
||||
* (and so a 'real' soft/hard-stop was not required, but it still interrupted
|
||||
* flow, perhaps on another context)
|
||||
*
|
||||
* kbase_job_check_leave_disjoint() should be used to end the disjoint
|
||||
* state when the soft/hard-stop action is complete
|
||||
*/
|
||||
void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
|
||||
base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom);
|
||||
|
||||
/**
|
||||
* kbase_job_check_leave_disjoint - potentially leave disjoint state
|
||||
* @kbdev: kbase device
|
||||
* @target_katom: atom which is finishing
|
||||
*
|
||||
* Work out whether to leave disjoint state when finishing an atom that was
|
||||
* originated by kbase_job_check_enter_disjoint().
|
||||
*/
|
||||
void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
|
||||
struct kbase_jd_atom *target_katom);
|
||||
|
||||
@@ -334,7 +406,7 @@ void kbase_event_wakeup(struct kbase_context *kctx);
|
||||
* allocation is to be validated.
|
||||
* @info: Pointer to struct @base_jit_alloc_info
|
||||
* which is to be validated.
|
||||
* @return: 0 if jit allocation is valid; negative error code otherwise
|
||||
* Return: 0 if jit allocation is valid; negative error code otherwise
|
||||
*/
|
||||
int kbasep_jit_alloc_validate(struct kbase_context *kctx,
|
||||
struct base_jit_alloc_info *info);
|
||||
@@ -381,9 +453,12 @@ static inline void kbase_free_user_buffer(
|
||||
* @buf_data: Pointer to the information about external resources:
|
||||
* pages pertaining to the external resource, number of
|
||||
* pages to copy.
|
||||
*
|
||||
* Return: 0 on success, error code otherwise.
|
||||
*/
|
||||
int kbase_mem_copy_from_extres(struct kbase_context *kctx,
|
||||
struct kbase_debug_copy_buffer *buf_data);
|
||||
|
||||
#if !MALI_USE_CSF
|
||||
int kbase_process_soft_job(struct kbase_jd_atom *katom);
|
||||
int kbase_prepare_soft_job(struct kbase_jd_atom *katom);
|
||||
@@ -405,7 +480,9 @@ void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt);
|
||||
void kbasep_as_do_poke(struct work_struct *work);
|
||||
|
||||
/**
|
||||
* Check whether a system suspend is in progress, or has already been suspended
|
||||
* kbase_pm_is_suspending - Check whether a system suspend is in progress,
|
||||
* or has already been suspended
|
||||
*
|
||||
* @kbdev: The kbase device structure for the device
|
||||
*
|
||||
* The caller should ensure that either kbdev->pm.active_count_lock is held, or
|
||||
@@ -533,10 +610,12 @@ int kbase_pm_force_mcu_wakeup_after_sleep(struct kbase_device *kbdev);
|
||||
|
||||
#if !MALI_USE_CSF
|
||||
/**
|
||||
* Return the atom's ID, as was originally supplied by userspace in
|
||||
* kbase_jd_atom_id - Return the atom's ID, as was originally supplied by userspace in
|
||||
* base_jd_atom::atom_number
|
||||
* @kctx: KBase context pointer
|
||||
* @katom: Atome for which to return ID
|
||||
*
|
||||
* Return: the atom's ID.
|
||||
*/
|
||||
static inline int kbase_jd_atom_id(struct kbase_context *kctx,
|
||||
const struct kbase_jd_atom *katom)
|
||||
@@ -567,7 +646,9 @@ static inline struct kbase_jd_atom *kbase_jd_atom_from_id(
|
||||
#endif /* !MALI_USE_CSF */
|
||||
|
||||
/**
|
||||
* Initialize the disjoint state
|
||||
* kbase_disjoint_init - Initialize the disjoint state
|
||||
*
|
||||
* @kbdev: The kbase device
|
||||
*
|
||||
* The disjoint event count and state are both set to zero.
|
||||
*
|
||||
@@ -589,14 +670,12 @@ static inline struct kbase_jd_atom *kbase_jd_atom_from_id(
|
||||
* The disjoint event counter is also incremented immediately whenever a job is soft stopped
|
||||
* and during context creation.
|
||||
*
|
||||
* @kbdev: The kbase device
|
||||
*
|
||||
* Return: 0 on success and non-zero value on failure.
|
||||
*/
|
||||
void kbase_disjoint_init(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* Increase the count of disjoint events
|
||||
* kbase_disjoint_event - Increase the count of disjoint events
|
||||
* called when a disjoint event has happened
|
||||
*
|
||||
* @kbdev: The kbase device
|
||||
@@ -604,42 +683,44 @@ void kbase_disjoint_init(struct kbase_device *kbdev);
|
||||
void kbase_disjoint_event(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* Increase the count of disjoint events only if the GPU is in a disjoint state
|
||||
* kbase_disjoint_event_potential - Increase the count of disjoint events
|
||||
* only if the GPU is in a disjoint state
|
||||
*
|
||||
* @kbdev: The kbase device
|
||||
*
|
||||
* This should be called when something happens which could be disjoint if the GPU
|
||||
* is in a disjoint state. The state refcount keeps track of this.
|
||||
*
|
||||
* @kbdev: The kbase device
|
||||
*/
|
||||
void kbase_disjoint_event_potential(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* Returns the count of disjoint events
|
||||
* kbase_disjoint_event_get - Returns the count of disjoint events
|
||||
*
|
||||
* @kbdev: The kbase device
|
||||
* @return the count of disjoint events
|
||||
* Return: the count of disjoint events
|
||||
*/
|
||||
u32 kbase_disjoint_event_get(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* Increment the refcount state indicating that the GPU is in a disjoint state.
|
||||
* kbase_disjoint_state_up - Increment the refcount state indicating that
|
||||
* the GPU is in a disjoint state.
|
||||
*
|
||||
* @kbdev: The kbase device
|
||||
*
|
||||
* Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
|
||||
* eventually after the disjoint state has completed @ref kbase_disjoint_state_down
|
||||
* should be called
|
||||
*
|
||||
* @kbdev: The kbase device
|
||||
*/
|
||||
void kbase_disjoint_state_up(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* Decrement the refcount state
|
||||
* kbase_disjoint_state_down - Decrement the refcount state
|
||||
*
|
||||
* @kbdev: The kbase device
|
||||
*
|
||||
* Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
|
||||
*
|
||||
* Called after @ref kbase_disjoint_state_up once the disjoint state is over
|
||||
*
|
||||
* @kbdev: The kbase device
|
||||
*/
|
||||
void kbase_disjoint_state_down(struct kbase_device *kbdev);
|
||||
|
||||
@@ -668,8 +749,8 @@ int kbase_device_pcm_dev_init(struct kbase_device *const kbdev);
|
||||
void kbase_device_pcm_dev_term(struct kbase_device *const kbdev);
|
||||
|
||||
/**
|
||||
* If a job is soft stopped and the number of contexts is >= this value
|
||||
* it is reported as a disjoint event
|
||||
* KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD - If a job is soft stopped
|
||||
* and the number of contexts is >= this value it is reported as a disjoint event
|
||||
*/
|
||||
#define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2
|
||||
|
||||
|
||||
@@ -99,7 +99,7 @@ void kbase_as_fault_debugfs_init(struct kbase_device *kbdev)
|
||||
} else {
|
||||
for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
|
||||
snprintf(as_name, ARRAY_SIZE(as_name), "as%u", i);
|
||||
debugfs_create_file(as_name, S_IRUGO,
|
||||
debugfs_create_file(as_name, 0444,
|
||||
debugfs_directory,
|
||||
(void *)(uintptr_t)i,
|
||||
&as_fault_fops);
|
||||
@@ -108,5 +108,4 @@ void kbase_as_fault_debugfs_init(struct kbase_device *kbdev)
|
||||
|
||||
#endif /* CONFIG_MALI_BIFROST_DEBUG */
|
||||
#endif /* CONFIG_DEBUG_FS */
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -43,7 +43,6 @@ kbase_as_fault_debugfs_new(struct kbase_device *kbdev, int as_no)
|
||||
kbdev->debugfs_as_read_bitmap |= (1ULL << as_no);
|
||||
#endif /* CONFIG_DEBUG_FS */
|
||||
#endif /* CONFIG_MALI_BIFROST_DEBUG */
|
||||
return;
|
||||
}
|
||||
|
||||
#endif /*_KBASE_AS_FAULT_DEBUG_FS_H*/
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -28,15 +28,24 @@
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
typedef enum mali_kbase_cap {
|
||||
/**
|
||||
* enum mali_kbase_cap - Enumeration for kbase capability
|
||||
*
|
||||
* @MALI_KBASE_CAP_SYSTEM_MONITOR: System Monitor
|
||||
* @MALI_KBASE_CAP_JIT_PRESSURE_LIMIT: JIT Pressure limit
|
||||
* @MALI_KBASE_CAP_MEM_GROW_ON_GPF: Memory grow on page fault
|
||||
* @MALI_KBASE_CAP_MEM_PROTECTED: Protected memory
|
||||
* @MALI_KBASE_NUM_CAPS: Delimiter
|
||||
*/
|
||||
enum mali_kbase_cap {
|
||||
MALI_KBASE_CAP_SYSTEM_MONITOR = 0,
|
||||
MALI_KBASE_CAP_JIT_PRESSURE_LIMIT,
|
||||
MALI_KBASE_CAP_MEM_GROW_ON_GPF,
|
||||
MALI_KBASE_CAP_MEM_PROTECTED,
|
||||
MALI_KBASE_NUM_CAPS
|
||||
} mali_kbase_cap;
|
||||
};
|
||||
|
||||
extern bool mali_kbase_supports_cap(unsigned long api_version, mali_kbase_cap cap);
|
||||
extern bool mali_kbase_supports_cap(unsigned long api_version, enum mali_kbase_cap cap);
|
||||
|
||||
static inline bool mali_kbase_supports_system_monitor(unsigned long api_version)
|
||||
{
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -51,7 +51,6 @@ struct kbase_ccswe {
|
||||
*/
|
||||
void kbase_ccswe_init(struct kbase_ccswe *self);
|
||||
|
||||
|
||||
/**
|
||||
* kbase_ccswe_cycle_at() - Estimate cycle count at given timestamp.
|
||||
*
|
||||
@@ -68,7 +67,7 @@ void kbase_ccswe_init(struct kbase_ccswe *self);
|
||||
* u64 ts = ktime_get_raw_ns();
|
||||
* u64 cycle = kbase_ccswe_cycle_at(&ccswe, ts)
|
||||
*
|
||||
* Returns: estimated value of cycle count at a given time.
|
||||
* Return: estimated value of cycle count at a given time.
|
||||
*/
|
||||
u64 kbase_ccswe_cycle_at(struct kbase_ccswe *self, u64 timestamp_ns);
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2010-2017, 2019-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2010-2017, 2019-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -246,8 +246,6 @@ struct kbase_pm_callback_conf {
|
||||
*
|
||||
* For linux this callback will be called by the kernel runtime_suspend callback.
|
||||
* Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
|
||||
*
|
||||
* @return 0 on success, else OS error code.
|
||||
*/
|
||||
void (*power_runtime_off_callback)(struct kbase_device *kbdev);
|
||||
|
||||
@@ -255,6 +253,8 @@ struct kbase_pm_callback_conf {
|
||||
*
|
||||
* For linux this callback will be called by the kernel runtime_resume callback.
|
||||
* Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
|
||||
*
|
||||
* @return 0 on success, else OS error code.
|
||||
*/
|
||||
int (*power_runtime_on_callback)(struct kbase_device *kbdev);
|
||||
|
||||
@@ -455,7 +455,7 @@ struct kbase_platform_config {
|
||||
/**
|
||||
* kbase_get_platform_config - Gets the pointer to platform config.
|
||||
*
|
||||
* @return Pointer to the platform config
|
||||
* Return: Pointer to the platform config
|
||||
*/
|
||||
struct kbase_platform_config *kbase_get_platform_config(void);
|
||||
|
||||
@@ -564,7 +564,6 @@ void kbasep_platform_event_atom_complete(struct kbase_jd_atom *katom);
|
||||
#ifndef CONFIG_OF
|
||||
/**
|
||||
* kbase_platform_register - Register a platform device for the GPU
|
||||
*
|
||||
* This can be used to register a platform device on systems where device tree
|
||||
* is not enabled and the platform initialisation code in the kernel doesn't
|
||||
* create the GPU device. Where possible device tree should be used instead.
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2013-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -31,33 +31,27 @@
|
||||
#include <mali_kbase_config_platform.h>
|
||||
|
||||
enum {
|
||||
/**
|
||||
* Use unrestricted Address ID width on the AXI bus.
|
||||
*/
|
||||
/* Use unrestricted Address ID width on the AXI bus. */
|
||||
KBASE_AID_32 = 0x0,
|
||||
|
||||
/**
|
||||
* Restrict GPU to a half of maximum Address ID count.
|
||||
/* Restrict GPU to a half of maximum Address ID count.
|
||||
* This will reduce performance, but reduce bus load due to GPU.
|
||||
*/
|
||||
KBASE_AID_16 = 0x3,
|
||||
|
||||
/**
|
||||
* Restrict GPU to a quarter of maximum Address ID count.
|
||||
/* Restrict GPU to a quarter of maximum Address ID count.
|
||||
* This will reduce performance, but reduce bus load due to GPU.
|
||||
*/
|
||||
KBASE_AID_8 = 0x2,
|
||||
KBASE_AID_8 = 0x2,
|
||||
|
||||
/**
|
||||
* Restrict GPU to an eighth of maximum Address ID count.
|
||||
/* Restrict GPU to an eighth of maximum Address ID count.
|
||||
* This will reduce performance, but reduce bus load due to GPU.
|
||||
*/
|
||||
KBASE_AID_4 = 0x1
|
||||
KBASE_AID_4 = 0x1
|
||||
};
|
||||
|
||||
enum {
|
||||
/**
|
||||
* Use unrestricted Address ID width on the AXI bus.
|
||||
/* Use unrestricted Address ID width on the AXI bus.
|
||||
* Restricting ID width will reduce performance & bus load due to GPU.
|
||||
*/
|
||||
KBASE_3BIT_AID_32 = 0x0,
|
||||
@@ -78,10 +72,10 @@ enum {
|
||||
KBASE_3BIT_AID_12 = 0x5,
|
||||
|
||||
/* Restrict GPU to 1/4 of maximum Address ID count. */
|
||||
KBASE_3BIT_AID_8 = 0x6,
|
||||
KBASE_3BIT_AID_8 = 0x6,
|
||||
|
||||
/* Restrict GPU to 1/8 of maximum Address ID count. */
|
||||
KBASE_3BIT_AID_4 = 0x7
|
||||
KBASE_3BIT_AID_4 = 0x7
|
||||
};
|
||||
|
||||
#if MALI_USE_CSF
|
||||
@@ -103,8 +97,7 @@ enum {
|
||||
#define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Power Management poweroff tick granuality. This is in nanoseconds to
|
||||
/* Power Management poweroff tick granuality. This is in nanoseconds to
|
||||
* allow HR timer support (can be overridden by platform header).
|
||||
*
|
||||
* On each scheduling tick, the power manager core may decide to:
|
||||
@@ -115,95 +108,106 @@ enum {
|
||||
#define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Power Manager number of ticks before shader cores are powered off
|
||||
/* Power Manager number of ticks before shader cores are powered off
|
||||
* (can be overridden by platform header).
|
||||
*/
|
||||
#ifndef DEFAULT_PM_POWEROFF_TICK_SHADER
|
||||
#define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Default scheduling tick granuality (can be overridden by platform header)
|
||||
*/
|
||||
/* Default scheduling tick granuality (can be overridden by platform header) */
|
||||
#ifndef DEFAULT_JS_SCHEDULING_PERIOD_NS
|
||||
#define DEFAULT_JS_SCHEDULING_PERIOD_NS (100000000u) /* 100ms */
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Default minimum number of scheduling ticks before jobs are soft-stopped.
|
||||
/* Default minimum number of scheduling ticks before jobs are soft-stopped.
|
||||
*
|
||||
* This defines the time-slice for a job (which may be different from that of a
|
||||
* context)
|
||||
*/
|
||||
#define DEFAULT_JS_SOFT_STOP_TICKS (1) /* 100ms-200ms */
|
||||
|
||||
/**
|
||||
* Default minimum number of scheduling ticks before CL jobs are soft-stopped.
|
||||
*/
|
||||
/* Default minimum number of scheduling ticks before CL jobs are soft-stopped. */
|
||||
#define DEFAULT_JS_SOFT_STOP_TICKS_CL (1) /* 100ms-200ms */
|
||||
|
||||
/**
|
||||
* Default minimum number of scheduling ticks before jobs are hard-stopped
|
||||
*/
|
||||
/* Default minimum number of scheduling ticks before jobs are hard-stopped */
|
||||
#define DEFAULT_JS_HARD_STOP_TICKS_SS (50) /* 5s */
|
||||
|
||||
/**
|
||||
* Default minimum number of scheduling ticks before CL jobs are hard-stopped.
|
||||
*/
|
||||
/* Default minimum number of scheduling ticks before CL jobs are hard-stopped. */
|
||||
#define DEFAULT_JS_HARD_STOP_TICKS_CL (50) /* 5s */
|
||||
|
||||
/**
|
||||
* Default minimum number of scheduling ticks before jobs are hard-stopped
|
||||
/* Default minimum number of scheduling ticks before jobs are hard-stopped
|
||||
* during dumping
|
||||
*/
|
||||
#define DEFAULT_JS_HARD_STOP_TICKS_DUMPING (15000) /* 1500s */
|
||||
|
||||
/**
|
||||
* Default timeout for some software jobs, after which the software event wait
|
||||
/* Default timeout for some software jobs, after which the software event wait
|
||||
* jobs will be cancelled.
|
||||
*/
|
||||
#define DEFAULT_JS_SOFT_JOB_TIMEOUT (3000) /* 3s */
|
||||
|
||||
/**
|
||||
* Default minimum number of scheduling ticks before the GPU is reset to clear a
|
||||
/* Default minimum number of scheduling ticks before the GPU is reset to clear a
|
||||
* "stuck" job
|
||||
*/
|
||||
#define DEFAULT_JS_RESET_TICKS_SS (55) /* 5.5s */
|
||||
|
||||
/**
|
||||
* Default minimum number of scheduling ticks before the GPU is reset to clear a
|
||||
/* Default minimum number of scheduling ticks before the GPU is reset to clear a
|
||||
* "stuck" CL job.
|
||||
*/
|
||||
#define DEFAULT_JS_RESET_TICKS_CL (55) /* 5.5s */
|
||||
|
||||
/**
|
||||
* Default minimum number of scheduling ticks before the GPU is reset to clear a
|
||||
/* Default minimum number of scheduling ticks before the GPU is reset to clear a
|
||||
* "stuck" job during dumping.
|
||||
*/
|
||||
#define DEFAULT_JS_RESET_TICKS_DUMPING (15020) /* 1502s */
|
||||
|
||||
/**
|
||||
* Default number of milliseconds given for other jobs on the GPU to be
|
||||
/* Default number of milliseconds given for other jobs on the GPU to be
|
||||
* soft-stopped when the GPU needs to be reset.
|
||||
*/
|
||||
#define DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */
|
||||
|
||||
/* Waiting timeout for status change acknowledgment, in clock cycles
|
||||
* Based on 3000ms timeout at nominal 100MHz, as is required for Android - based
|
||||
* on scaling from a 50MHz GPU system.
|
||||
/* Nominal reference frequency that was used to obtain all following
|
||||
* <...>_TIMEOUT_CYCLES macros, in kHz.
|
||||
*
|
||||
* Timeouts are scaled based on the relation between this value and the lowest
|
||||
* GPU clock frequency.
|
||||
*/
|
||||
#define DEFAULT_REF_TIMEOUT_FREQ_KHZ (100000)
|
||||
#define CSF_FIRMWARE_TIMEOUT_CYCLES (300000000)
|
||||
|
||||
/* A default timeout to be used when an invalid timeout selector is
|
||||
* used to retrieve the timeout, on JM GPUs. CSF GPUs use the Firmware
|
||||
* timeout as the default.
|
||||
#if MALI_USE_CSF
|
||||
/* Waiting timeout for status change acknowledgment, in clock cycles.
|
||||
*
|
||||
* This is also the default timeout to be used when an invalid timeout
|
||||
* selector is used to retrieve the timeout on CSF GPUs.
|
||||
*
|
||||
* Based on 75000ms timeout at nominal 100MHz, as is required for Android - based
|
||||
* on scaling from a 50MHz GPU system.
|
||||
*/
|
||||
#define CSF_FIRMWARE_TIMEOUT_CYCLES (7500000000)
|
||||
|
||||
/* Timeout in clock cycles for GPU Power Management to reach the desired
|
||||
* Shader, L2 and MCU state.
|
||||
*
|
||||
* Based on 2500ms timeout at nominal 100MHz, scaled from a 50MHz GPU system.
|
||||
*/
|
||||
#define CSF_PM_TIMEOUT_CYCLES (250000000)
|
||||
|
||||
/* Waiting timeout in clock cycles for GPU reset to complete.
|
||||
*
|
||||
* Based on 2500ms timeout at 100MHz, scaled from a 50MHz GPU system.
|
||||
*/
|
||||
#define CSF_GPU_RESET_TIMEOUT_CYCLES (250000000)
|
||||
|
||||
#else /* MALI_USE_CSF */
|
||||
|
||||
/* A default timeout in clock cycles to be used when an invalid timeout
|
||||
* selector is used to retrieve the timeout, on JM GPUs.
|
||||
*/
|
||||
#define JM_DEFAULT_TIMEOUT_CYCLES (150000000)
|
||||
|
||||
/**
|
||||
* Default timeslice that a context is scheduled in for, in nanoseconds.
|
||||
#endif /* MALI_USE_CSF */
|
||||
|
||||
/* Default timeslice that a context is scheduled in for, in nanoseconds.
|
||||
*
|
||||
* When a context has used up this amount of time across its jobs, it is
|
||||
* scheduled out to let another run.
|
||||
@@ -213,16 +217,14 @@ enum {
|
||||
*/
|
||||
#define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */
|
||||
|
||||
/**
|
||||
* Maximum frequency (in kHz) that the GPU can be clocked. For some platforms
|
||||
/* Maximum frequency (in kHz) that the GPU can be clocked. For some platforms
|
||||
* this isn't available, so we simply define a dummy value here. If devfreq
|
||||
* is enabled the value will be read from there, otherwise this should be
|
||||
* overridden by defining GPU_FREQ_KHZ_MAX in the platform file.
|
||||
*/
|
||||
#define DEFAULT_GPU_FREQ_KHZ_MAX (5000)
|
||||
|
||||
/**
|
||||
* Default timeout for task execution on an endpoint
|
||||
/* Default timeout for task execution on an endpoint
|
||||
*
|
||||
* Number of GPU clock cycles before the driver terminates a task that is
|
||||
* making no forward progress on an endpoint (e.g. shader core).
|
||||
@@ -231,8 +233,7 @@ enum {
|
||||
*/
|
||||
#define DEFAULT_PROGRESS_TIMEOUT ((u64)5 * 500 * 1024 * 1024)
|
||||
|
||||
/**
|
||||
* Default threshold at which to switch to incremental rendering
|
||||
/* Default threshold at which to switch to incremental rendering
|
||||
*
|
||||
* Fraction of the maximum size of an allocation that grows on GPU page fault
|
||||
* that can be used up before the driver switches to incremental rendering,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,7 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2017-2018, 2020-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2017-2018, 2020-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -79,7 +79,7 @@ void kbase_ctx_sched_term(struct kbase_device *kbdev);
|
||||
int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx);
|
||||
|
||||
/**
|
||||
* kbase_ctx_sched_retain_ctx_refcount
|
||||
* kbase_ctx_sched_retain_ctx_refcount - Retain a reference to the @ref kbase_context
|
||||
* @kctx: The context to which to retain a reference
|
||||
*
|
||||
* This function only retains a reference to the context. It must be called
|
||||
@@ -187,8 +187,8 @@ struct kbase_context *kbase_ctx_sched_as_to_ctx_nolock(
|
||||
* @kctx: Context to be refcounted
|
||||
*
|
||||
* The following locks must be held by the caller:
|
||||
* * kbase_device::mmu_hw_mutex
|
||||
* * kbase_device::hwaccess_lock
|
||||
* &kbase_device.mmu_hw_mutex
|
||||
* &kbase_device.hwaccess_lock
|
||||
*
|
||||
* Return: true if refcount succeeded, and the context will not be scheduled
|
||||
* out, false if the refcount failed (because the context is being/has been
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2012-2016, 2018, 2020-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2012-2016, 2018, 2020-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -53,7 +53,7 @@ void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev);
|
||||
* kbase_debug_job_fault_context_init - Initialize the relevant
|
||||
* data structure per context
|
||||
* @kctx: KBase context pointer
|
||||
* @return 0 on success
|
||||
* Return: 0 on success
|
||||
*/
|
||||
int kbase_debug_job_fault_context_init(struct kbase_context *kctx);
|
||||
|
||||
@@ -68,39 +68,42 @@ void kbase_debug_job_fault_context_term(struct kbase_context *kctx);
|
||||
* kbase_debug_job_fault_kctx_unblock - Unblock the atoms blocked on job fault
|
||||
* dumping on context termination.
|
||||
*
|
||||
* @kctx: KBase context pointer
|
||||
*
|
||||
* This function is called during context termination to unblock the atom for
|
||||
* which the job fault occurred and also the atoms following it. This is needed
|
||||
* otherwise the wait for zero jobs could timeout (leading to an assertion
|
||||
* failure, kernel panic in debug builds) in the pathological case where
|
||||
* although the thread/daemon capturing the job fault events is running,
|
||||
* but for some reasons has stopped consuming the events.
|
||||
*
|
||||
* @kctx: KBase context pointer
|
||||
*/
|
||||
void kbase_debug_job_fault_kctx_unblock(struct kbase_context *kctx);
|
||||
|
||||
/**
|
||||
* kbase_debug_job_fault_process - Process the failed job.
|
||||
* It will send a event and wake up the job fault waiting queue
|
||||
* Then create a work queue to wait for job dump finish
|
||||
* This function should be called in the interrupt handler and before
|
||||
* jd_done that make sure the jd_done_worker will be delayed until the
|
||||
* job dump finish
|
||||
*
|
||||
* @katom: The failed atom pointer
|
||||
* @completion_code: the job status
|
||||
* @return true if dump is going on
|
||||
*
|
||||
* It will send a event and wake up the job fault waiting queue
|
||||
* Then create a work queue to wait for job dump finish
|
||||
* This function should be called in the interrupt handler and before
|
||||
* jd_done that make sure the jd_done_worker will be delayed until the
|
||||
* job dump finish
|
||||
*
|
||||
* Return: true if dump is going on
|
||||
*/
|
||||
bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
|
||||
u32 completion_code);
|
||||
|
||||
|
||||
/**
|
||||
* kbase_debug_job_fault_reg_snapshot_init - Set the interested registers
|
||||
* address during the job fault process, the relevant registers will
|
||||
* be saved when a job fault happen
|
||||
* @kctx: KBase context pointer
|
||||
* @reg_range: Maximum register address space
|
||||
* @return true if initializing successfully
|
||||
*
|
||||
* Return: true if initializing successfully
|
||||
*/
|
||||
bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
|
||||
int reg_range);
|
||||
@@ -108,8 +111,10 @@ bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
|
||||
/**
|
||||
* kbase_job_fault_get_reg_snapshot - Read the interested registers for
|
||||
* failed job dump
|
||||
*
|
||||
* @kctx: KBase context pointer
|
||||
* @return true if getting registers successfully
|
||||
*
|
||||
* Return: true if getting registers successfully
|
||||
*/
|
||||
bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx);
|
||||
|
||||
|
||||
@@ -31,6 +31,22 @@
|
||||
|
||||
#if IS_ENABLED(CONFIG_DEBUG_FS)
|
||||
|
||||
#define SHOW_GPU_MEM_DATA(type, format) \
|
||||
{ \
|
||||
unsigned int i, j; \
|
||||
const type *ptr = (type *)cpu_addr; \
|
||||
const unsigned int col_width = sizeof(type); \
|
||||
const unsigned int row_width = (col_width == sizeof(u64)) ? 32 : 16; \
|
||||
const unsigned int num_cols = row_width / col_width; \
|
||||
for (i = 0; i < PAGE_SIZE; i += row_width) { \
|
||||
seq_printf(m, "%016llx:", gpu_addr + i); \
|
||||
for (j = 0; j < num_cols; j++) \
|
||||
seq_printf(m, format, ptr[j]); \
|
||||
ptr += num_cols; \
|
||||
seq_putc(m, '\n'); \
|
||||
} \
|
||||
}
|
||||
|
||||
struct debug_mem_mapping {
|
||||
struct list_head node;
|
||||
|
||||
@@ -44,6 +60,7 @@ struct debug_mem_mapping {
|
||||
struct debug_mem_data {
|
||||
struct list_head mapping_list;
|
||||
struct kbase_context *kctx;
|
||||
unsigned int column_width;
|
||||
};
|
||||
|
||||
struct debug_mem_seq_off {
|
||||
@@ -111,9 +128,9 @@ static int debug_mem_show(struct seq_file *m, void *v)
|
||||
struct debug_mem_data *mem_data = m->private;
|
||||
struct debug_mem_seq_off *data = v;
|
||||
struct debug_mem_mapping *map;
|
||||
int i, j;
|
||||
unsigned long long gpu_addr;
|
||||
struct page *page;
|
||||
uint32_t *mapping;
|
||||
void *cpu_addr;
|
||||
pgprot_t prot = PAGE_KERNEL;
|
||||
|
||||
map = list_entry(data->lh, struct debug_mem_mapping, node);
|
||||
@@ -130,20 +147,33 @@ static int debug_mem_show(struct seq_file *m, void *v)
|
||||
prot = pgprot_writecombine(prot);
|
||||
|
||||
page = as_page(map->alloc->pages[data->offset]);
|
||||
mapping = vmap(&page, 1, VM_MAP, prot);
|
||||
if (!mapping)
|
||||
cpu_addr = vmap(&page, 1, VM_MAP, prot);
|
||||
if (!cpu_addr)
|
||||
goto out;
|
||||
|
||||
for (i = 0; i < PAGE_SIZE; i += 4*sizeof(*mapping)) {
|
||||
seq_printf(m, "%016llx:", i + ((map->start_pfn +
|
||||
data->offset) << PAGE_SHIFT));
|
||||
gpu_addr = (map->start_pfn + data->offset) << PAGE_SHIFT;
|
||||
|
||||
for (j = 0; j < 4*sizeof(*mapping); j += sizeof(*mapping))
|
||||
seq_printf(m, " %08x", mapping[(i+j)/sizeof(*mapping)]);
|
||||
seq_putc(m, '\n');
|
||||
/* Cases for 4 supported values of column_width for showing
|
||||
* the GPU memory contents.
|
||||
*/
|
||||
switch (mem_data->column_width) {
|
||||
case 1:
|
||||
SHOW_GPU_MEM_DATA(u8, " %02hhx");
|
||||
break;
|
||||
case 2:
|
||||
SHOW_GPU_MEM_DATA(u16, " %04hx");
|
||||
break;
|
||||
case 4:
|
||||
SHOW_GPU_MEM_DATA(u32, " %08x");
|
||||
break;
|
||||
case 8:
|
||||
SHOW_GPU_MEM_DATA(u64, " %016llx");
|
||||
break;
|
||||
default:
|
||||
dev_warn(mem_data->kctx->kbdev->dev, "Unexpected column width");
|
||||
}
|
||||
|
||||
vunmap(mapping);
|
||||
vunmap(cpu_addr);
|
||||
|
||||
seq_putc(m, '\n');
|
||||
|
||||
@@ -207,6 +237,14 @@ static int debug_mem_open(struct inode *i, struct file *file)
|
||||
if (get_file_rcu(kctx->filp) == 0)
|
||||
return -ENOENT;
|
||||
|
||||
/* Check if file was opened in write mode. GPU memory contents
|
||||
* are returned only when the file is not opened in write mode.
|
||||
*/
|
||||
if (file->f_mode & FMODE_WRITE) {
|
||||
file->private_data = kctx;
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = seq_open(file, &ops);
|
||||
if (ret)
|
||||
goto open_fail;
|
||||
@@ -223,6 +261,8 @@ static int debug_mem_open(struct inode *i, struct file *file)
|
||||
|
||||
kbase_gpu_vm_lock(kctx);
|
||||
|
||||
mem_data->column_width = kctx->mem_view_column_width;
|
||||
|
||||
ret = debug_mem_zone_open(&kctx->reg_rbtree_same, mem_data);
|
||||
if (ret != 0) {
|
||||
kbase_gpu_vm_unlock(kctx);
|
||||
@@ -241,6 +281,20 @@ static int debug_mem_open(struct inode *i, struct file *file)
|
||||
goto out;
|
||||
}
|
||||
|
||||
#if MALI_USE_CSF
|
||||
ret = debug_mem_zone_open(&kctx->reg_rbtree_exec_fixed, mem_data);
|
||||
if (ret != 0) {
|
||||
kbase_gpu_vm_unlock(kctx);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = debug_mem_zone_open(&kctx->reg_rbtree_fixed, mem_data);
|
||||
if (ret != 0) {
|
||||
kbase_gpu_vm_unlock(kctx);
|
||||
goto out;
|
||||
}
|
||||
#endif
|
||||
|
||||
kbase_gpu_vm_unlock(kctx);
|
||||
|
||||
((struct seq_file *)file->private_data)->private = mem_data;
|
||||
@@ -270,32 +324,70 @@ open_fail:
|
||||
static int debug_mem_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct kbase_context *const kctx = inode->i_private;
|
||||
struct seq_file *sfile = file->private_data;
|
||||
struct debug_mem_data *mem_data = sfile->private;
|
||||
struct debug_mem_mapping *mapping;
|
||||
|
||||
seq_release(inode, file);
|
||||
/* If the file wasn't opened in write mode, then release the
|
||||
* memory allocated to show the GPU memory contents.
|
||||
*/
|
||||
if (!(file->f_mode & FMODE_WRITE)) {
|
||||
struct seq_file *sfile = file->private_data;
|
||||
struct debug_mem_data *mem_data = sfile->private;
|
||||
struct debug_mem_mapping *mapping;
|
||||
|
||||
while (!list_empty(&mem_data->mapping_list)) {
|
||||
mapping = list_first_entry(&mem_data->mapping_list,
|
||||
seq_release(inode, file);
|
||||
|
||||
while (!list_empty(&mem_data->mapping_list)) {
|
||||
mapping = list_first_entry(&mem_data->mapping_list,
|
||||
struct debug_mem_mapping, node);
|
||||
kbase_mem_phy_alloc_put(mapping->alloc);
|
||||
list_del(&mapping->node);
|
||||
kfree(mapping);
|
||||
}
|
||||
kbase_mem_phy_alloc_put(mapping->alloc);
|
||||
list_del(&mapping->node);
|
||||
kfree(mapping);
|
||||
}
|
||||
|
||||
kfree(mem_data);
|
||||
kfree(mem_data);
|
||||
}
|
||||
|
||||
fput(kctx->filp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t debug_mem_write(struct file *file, const char __user *ubuf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
struct kbase_context *const kctx = file->private_data;
|
||||
unsigned int column_width = 0;
|
||||
int ret = 0;
|
||||
|
||||
CSTD_UNUSED(ppos);
|
||||
|
||||
ret = kstrtouint_from_user(ubuf, count, 0, &column_width);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
if (!is_power_of_2(column_width)) {
|
||||
dev_dbg(kctx->kbdev->dev,
|
||||
"Column width %u not a multiple of power of 2", column_width);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (column_width > 8) {
|
||||
dev_dbg(kctx->kbdev->dev,
|
||||
"Column width %u greater than 8 not supported", column_width);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
kbase_gpu_vm_lock(kctx);
|
||||
kctx->mem_view_column_width = column_width;
|
||||
kbase_gpu_vm_unlock(kctx);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static const struct file_operations kbase_debug_mem_view_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = debug_mem_open,
|
||||
.release = debug_mem_release,
|
||||
.read = seq_read,
|
||||
.write = debug_mem_write,
|
||||
.llseek = seq_lseek
|
||||
};
|
||||
|
||||
@@ -308,6 +400,9 @@ void kbase_debug_mem_view_init(struct kbase_context *const kctx)
|
||||
WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry)))
|
||||
return;
|
||||
|
||||
/* Default column width is 4 */
|
||||
kctx->mem_view_column_width = sizeof(u32);
|
||||
|
||||
debugfs_create_file("mem_view", 0400, kctx->kctx_dentry, kctx,
|
||||
&kbase_debug_mem_view_fops);
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -34,20 +34,20 @@
|
||||
/**
|
||||
* set_attr_from_string - Parse a string to set elements of an array
|
||||
*
|
||||
* This is the core of the implementation of
|
||||
* kbase_debugfs_helper_set_attr_from_string. The only difference between the
|
||||
* two functions is that this one requires the input string to be writable.
|
||||
*
|
||||
* @buf: Input string to parse. Must be nul-terminated!
|
||||
* @array: Address of an object that can be accessed like an array.
|
||||
* @nelems: Number of elements in the array.
|
||||
* @set_attr_fn: Function to be called back for each array element.
|
||||
*
|
||||
* This is the core of the implementation of
|
||||
* kbase_debugfs_helper_set_attr_from_string. The only difference between the
|
||||
* two functions is that this one requires the input string to be writable.
|
||||
*
|
||||
* Return: 0 if success, negative error code otherwise.
|
||||
*/
|
||||
static int
|
||||
set_attr_from_string(char *const buf, void *const array, size_t const nelems,
|
||||
kbase_debugfs_helper_set_attr_fn *const set_attr_fn)
|
||||
kbase_debugfs_helper_set_attr_fn * const set_attr_fn)
|
||||
{
|
||||
size_t index, err = 0;
|
||||
char *ptr = buf;
|
||||
@@ -143,7 +143,7 @@ int kbase_debugfs_string_validator(char *const buf)
|
||||
|
||||
int kbase_debugfs_helper_set_attr_from_string(
|
||||
const char *const buf, void *const array, size_t const nelems,
|
||||
kbase_debugfs_helper_set_attr_fn *const set_attr_fn)
|
||||
kbase_debugfs_helper_set_attr_fn * const set_attr_fn)
|
||||
{
|
||||
char *const wbuf = kstrdup(buf, GFP_KERNEL);
|
||||
int err = 0;
|
||||
@@ -168,7 +168,7 @@ int kbase_debugfs_helper_set_attr_from_string(
|
||||
ssize_t kbase_debugfs_helper_get_attr_to_string(
|
||||
char *const buf, size_t const size, void *const array,
|
||||
size_t const nelems,
|
||||
kbase_debugfs_helper_get_attr_fn *const get_attr_fn)
|
||||
kbase_debugfs_helper_get_attr_fn * const get_attr_fn)
|
||||
{
|
||||
ssize_t total = 0;
|
||||
size_t index;
|
||||
@@ -189,7 +189,7 @@ ssize_t kbase_debugfs_helper_get_attr_to_string(
|
||||
int kbase_debugfs_helper_seq_write(
|
||||
struct file *const file, const char __user *const ubuf,
|
||||
size_t const count, size_t const nelems,
|
||||
kbase_debugfs_helper_set_attr_fn *const set_attr_fn)
|
||||
kbase_debugfs_helper_set_attr_fn * const set_attr_fn)
|
||||
{
|
||||
const struct seq_file *const sfile = file->private_data;
|
||||
void *const array = sfile->private;
|
||||
@@ -228,8 +228,8 @@ int kbase_debugfs_helper_seq_write(
|
||||
}
|
||||
|
||||
int kbase_debugfs_helper_seq_read(
|
||||
struct seq_file *const sfile, size_t const nelems,
|
||||
kbase_debugfs_helper_get_attr_fn *const get_attr_fn)
|
||||
struct seq_file * const sfile, size_t const nelems,
|
||||
kbase_debugfs_helper_get_attr_fn * const get_attr_fn)
|
||||
{
|
||||
void *const array = sfile->private;
|
||||
size_t index;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -37,6 +37,11 @@ typedef void kbase_debugfs_helper_set_attr_fn(void *array, size_t index,
|
||||
* kbase_debugfs_helper_set_attr_from_string - Parse a string to reconfigure an
|
||||
* array
|
||||
*
|
||||
* @buf: Input string to parse. Must be nul-terminated!
|
||||
* @array: Address of an object that can be accessed like an array.
|
||||
* @nelems: Number of elements in the array.
|
||||
* @set_attr_fn: Function to be called back for each array element.
|
||||
*
|
||||
* The given function is called once for each attribute value found in the
|
||||
* input string. It is not an error if the string specifies fewer attribute
|
||||
* values than the specified number of array elements.
|
||||
@@ -46,11 +51,6 @@ typedef void kbase_debugfs_helper_set_attr_fn(void *array, size_t index,
|
||||
* Attribute values are separated by one or more space characters.
|
||||
* Additional leading and trailing spaces are ignored.
|
||||
*
|
||||
* @buf: Input string to parse. Must be nul-terminated!
|
||||
* @array: Address of an object that can be accessed like an array.
|
||||
* @nelems: Number of elements in the array.
|
||||
* @set_attr_fn: Function to be called back for each array element.
|
||||
*
|
||||
* Return: 0 if success, negative error code otherwise.
|
||||
*/
|
||||
int kbase_debugfs_helper_set_attr_from_string(
|
||||
@@ -62,6 +62,8 @@ int kbase_debugfs_helper_set_attr_from_string(
|
||||
* debugfs file for any incorrect formats
|
||||
* or wrong values.
|
||||
*
|
||||
* @buf: Null-terminated string to validate.
|
||||
*
|
||||
* This function is to be used before any writes to debugfs values are done
|
||||
* such that any strings with erroneous values (such as octal 09 or
|
||||
* hexadecimal 0xGH are fully ignored) - without this validation, any correct
|
||||
@@ -73,8 +75,6 @@ int kbase_debugfs_helper_set_attr_from_string(
|
||||
* of the input string. This function also requires the input string to be
|
||||
* writable.
|
||||
*
|
||||
* @buf: Null-terminated string to validate.
|
||||
*
|
||||
* Return: 0 with no error, else -22 (the invalid return value of kstrtoul) if
|
||||
* any value in the string was wrong or with an incorrect format.
|
||||
*/
|
||||
@@ -95,17 +95,17 @@ typedef size_t kbase_debugfs_helper_get_attr_fn(void *array, size_t index);
|
||||
* kbase_debugfs_helper_get_attr_to_string - Construct a formatted string
|
||||
* from elements in an array
|
||||
*
|
||||
* The given function is called once for each array element to get the
|
||||
* value of the attribute to be inspected. The attribute values are
|
||||
* written to the buffer as a formatted string of decimal numbers
|
||||
* separated by spaces and terminated by a linefeed.
|
||||
*
|
||||
* @buf: Buffer in which to store the formatted output string.
|
||||
* @size: The size of the buffer, in bytes.
|
||||
* @array: Address of an object that can be accessed like an array.
|
||||
* @nelems: Number of elements in the array.
|
||||
* @get_attr_fn: Function to be called back for each array element.
|
||||
*
|
||||
* The given function is called once for each array element to get the
|
||||
* value of the attribute to be inspected. The attribute values are
|
||||
* written to the buffer as a formatted string of decimal numbers
|
||||
* separated by spaces and terminated by a linefeed.
|
||||
*
|
||||
* Return: Number of characters written excluding the nul terminator.
|
||||
*/
|
||||
ssize_t kbase_debugfs_helper_get_attr_to_string(
|
||||
@@ -116,6 +116,10 @@ ssize_t kbase_debugfs_helper_get_attr_to_string(
|
||||
* kbase_debugfs_helper_seq_read - Implements reads from a virtual file for an
|
||||
* array
|
||||
*
|
||||
* @sfile: A virtual file previously opened by calling single_open.
|
||||
* @nelems: Number of elements in the array.
|
||||
* @get_attr_fn: Function to be called back for each array element.
|
||||
*
|
||||
* The virtual file must have been opened by calling single_open and passing
|
||||
* the address of an object that can be accessed like an array.
|
||||
*
|
||||
@@ -124,10 +128,6 @@ ssize_t kbase_debugfs_helper_get_attr_to_string(
|
||||
* written to the buffer as a formatted string of decimal numbers
|
||||
* separated by spaces and terminated by a linefeed.
|
||||
*
|
||||
* @sfile: A virtual file previously opened by calling single_open.
|
||||
* @nelems: Number of elements in the array.
|
||||
* @get_attr_fn: Function to be called back for each array element.
|
||||
*
|
||||
* Return: 0 if success, negative error code otherwise.
|
||||
*/
|
||||
int kbase_debugfs_helper_seq_read(
|
||||
@@ -138,6 +138,12 @@ int kbase_debugfs_helper_seq_read(
|
||||
* kbase_debugfs_helper_seq_write - Implements writes to a virtual file for an
|
||||
* array
|
||||
*
|
||||
* @file: A virtual file previously opened by calling single_open.
|
||||
* @ubuf: Source address in user space.
|
||||
* @count: Number of bytes written to the virtual file.
|
||||
* @nelems: Number of elements in the array.
|
||||
* @set_attr_fn: Function to be called back for each array element.
|
||||
*
|
||||
* The virtual file must have been opened by calling single_open and passing
|
||||
* the address of an object that can be accessed like an array.
|
||||
*
|
||||
@@ -145,12 +151,6 @@ int kbase_debugfs_helper_seq_read(
|
||||
* data written to the virtual file. For further details, refer to the
|
||||
* description of set_attr_from_string.
|
||||
*
|
||||
* @file: A virtual file previously opened by calling single_open.
|
||||
* @ubuf: Source address in user space.
|
||||
* @count: Number of bytes written to the virtual file.
|
||||
* @nelems: Number of elements in the array.
|
||||
* @set_attr_fn: Function to be called back for each array element.
|
||||
*
|
||||
* Return: 0 if success, negative error code otherwise.
|
||||
*/
|
||||
int kbase_debugfs_helper_seq_write(struct file *file,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -35,11 +35,15 @@
|
||||
#include <backend/gpu/mali_kbase_instr_defs.h>
|
||||
#include <mali_kbase_pm.h>
|
||||
#include <mali_kbase_gpuprops_types.h>
|
||||
#include <mali_kbase_hwcnt_watchdog_if.h>
|
||||
|
||||
#if MALI_USE_CSF
|
||||
#include <mali_kbase_hwcnt_backend_csf.h>
|
||||
#else
|
||||
#include <mali_kbase_hwcnt_backend_jm.h>
|
||||
#include <mali_kbase_hwcnt_backend_jm_watchdog.h>
|
||||
#endif
|
||||
|
||||
#include <protected_mode_switcher.h>
|
||||
|
||||
#include <linux/atomic.h>
|
||||
@@ -82,7 +86,7 @@
|
||||
#define RESET_TIMEOUT 500
|
||||
|
||||
/**
|
||||
* The maximum number of Job Slots to support in the Hardware.
|
||||
* BASE_JM_MAX_NR_SLOTS - The maximum number of Job Slots to support in the Hardware.
|
||||
*
|
||||
* You can optimize this down if your target devices will only ever support a
|
||||
* small number of job slots.
|
||||
@@ -90,7 +94,7 @@
|
||||
#define BASE_JM_MAX_NR_SLOTS 3
|
||||
|
||||
/**
|
||||
* The maximum number of Address Spaces to support in the Hardware.
|
||||
* BASE_MAX_NR_AS - The maximum number of Address Spaces to support in the Hardware.
|
||||
*
|
||||
* You can optimize this down if your target devices will only ever support a
|
||||
* small number of Address Spaces
|
||||
@@ -110,19 +114,19 @@
|
||||
#define KBASEP_AS_NR_INVALID (-1)
|
||||
|
||||
/**
|
||||
* Maximum size in bytes of a MMU lock region, as a logarithm
|
||||
* KBASE_LOCK_REGION_MAX_SIZE_LOG2 - Maximum size in bytes of a MMU lock region,
|
||||
* as a logarithm
|
||||
*/
|
||||
#define KBASE_LOCK_REGION_MAX_SIZE_LOG2 (48) /* 256 TB */
|
||||
|
||||
/**
|
||||
* Minimum size in bytes of a MMU lock region, as a logarithm
|
||||
*/
|
||||
#define KBASE_LOCK_REGION_MIN_SIZE_LOG2 (15) /* 32 kB */
|
||||
|
||||
/**
|
||||
* Maximum number of GPU memory region zones
|
||||
* KBASE_REG_ZONE_MAX - Maximum number of GPU memory region zones
|
||||
*/
|
||||
#if MALI_USE_CSF
|
||||
#define KBASE_REG_ZONE_MAX 6ul
|
||||
#else
|
||||
#define KBASE_REG_ZONE_MAX 4ul
|
||||
#endif
|
||||
|
||||
#include "mali_kbase_hwaccess_defs.h"
|
||||
|
||||
@@ -248,9 +252,10 @@ struct kbase_fault {
|
||||
|
||||
/**
|
||||
* struct kbase_mmu_table - object representing a set of GPU page tables
|
||||
* @mmu_teardown_pages: Buffer of 4 Pages in size, used to cache the entries
|
||||
* of top & intermediate level page tables to avoid
|
||||
* repeated calls to kmap_atomic during the MMU teardown.
|
||||
* @mmu_teardown_pages: Array containing pointers to 3 separate pages, used
|
||||
* to cache the entries of top (L0) & intermediate level
|
||||
* page tables (L1 & L2) to avoid repeated calls to
|
||||
* kmap_atomic() during the MMU teardown.
|
||||
* @mmu_lock: Lock to serialize the accesses made to multi level GPU
|
||||
* page tables
|
||||
* @pgd: Physical address of the page allocated for the top
|
||||
@@ -265,7 +270,7 @@ struct kbase_fault {
|
||||
* it is NULL
|
||||
*/
|
||||
struct kbase_mmu_table {
|
||||
u64 *mmu_teardown_pages;
|
||||
u64 *mmu_teardown_pages[MIDGARD_MMU_BOTTOMLEVEL];
|
||||
struct mutex mmu_lock;
|
||||
phys_addr_t pgd;
|
||||
u8 group_id;
|
||||
@@ -357,8 +362,6 @@ struct kbase_clk_rate_listener {
|
||||
* enumerated GPU clock.
|
||||
* @clk_rate_trace_ops: Pointer to the platform specific GPU clock rate trace
|
||||
* operations.
|
||||
* @gpu_clk_rate_trace_write: Pointer to the function that would emit the
|
||||
* tracepoint for the clock rate change.
|
||||
* @listeners: List of listener attached.
|
||||
* @lock: Lock to serialize the actions of GPU clock rate trace
|
||||
* manager.
|
||||
@@ -373,13 +376,14 @@ struct kbase_clk_rate_trace_manager {
|
||||
|
||||
/**
|
||||
* struct kbase_pm_device_data - Data stored per device for power management.
|
||||
* @lock: The lock protecting Power Management structures accessed outside of
|
||||
* IRQ.
|
||||
* This lock must also be held whenever the GPU is being powered on or
|
||||
* off.
|
||||
* @active_count: The reference count of active contexts on this device. Note
|
||||
* that some code paths keep shaders/the tiler powered whilst this is 0.
|
||||
* Use kbase_pm_is_active() instead to check for such cases.
|
||||
* @lock: The lock protecting Power Management structures accessed
|
||||
* outside of IRQ.
|
||||
* This lock must also be held whenever the GPU is being
|
||||
* powered on or off.
|
||||
* @active_count: The reference count of active contexts on this device.
|
||||
* Note that some code paths keep shaders/the tiler
|
||||
* powered whilst this is 0.
|
||||
* Use kbase_pm_is_active() instead to check for such cases.
|
||||
* @suspending: Flag indicating suspending/suspended
|
||||
* @runtime_active: Flag to track if the GPU is in runtime suspended or active
|
||||
* state. This ensures that runtime_put and runtime_get
|
||||
@@ -388,24 +392,24 @@ struct kbase_clk_rate_trace_manager {
|
||||
* the call to it from runtime_gpu_active callback can be
|
||||
* skipped.
|
||||
* @gpu_lost: Flag indicating gpu lost
|
||||
* This structure contains data for the power management framework. There
|
||||
* is one instance of this structure per device in the system.
|
||||
* This structure contains data for the power management framework.
|
||||
* There is one instance of this structure per device in the system.
|
||||
* @zero_active_count_wait: Wait queue set when active_count == 0
|
||||
* @resume_wait: system resume of GPU device.
|
||||
* @debug_core_mask: Bit masks identifying the available shader cores that are
|
||||
* specified via sysfs. One mask per job slot.
|
||||
* specified via sysfs. One mask per job slot.
|
||||
* @debug_core_mask_all: Bit masks identifying the available shader cores that
|
||||
* are specified via sysfs.
|
||||
* are specified via sysfs.
|
||||
* @callback_power_runtime_init: Callback for initializing the runtime power
|
||||
* management. Return 0 on success, else error code
|
||||
* management. Return 0 on success, else error code
|
||||
* @callback_power_runtime_term: Callback for terminating the runtime power
|
||||
* management.
|
||||
* management.
|
||||
* @dvfs_period: Time in milliseconds between each dvfs sample
|
||||
* @backend: KBase PM backend data
|
||||
* @arb_vm_state: The state of the arbiter VM machine
|
||||
* @gpu_users_waiting: Used by virtualization to notify the arbiter that there
|
||||
* are users waiting for the GPU so that it can request and resume the
|
||||
* driver.
|
||||
* are users waiting for the GPU so that it can request
|
||||
* and resume the driver.
|
||||
* @clk_rtm: The state of the GPU clock rate trace manager
|
||||
*/
|
||||
struct kbase_pm_device_data {
|
||||
@@ -482,16 +486,16 @@ struct kbase_mem_pool {
|
||||
/**
|
||||
* struct kbase_mem_pool_group - a complete set of physical memory pools.
|
||||
*
|
||||
* @small: Array of objects containing the state for pools of 4 KiB size
|
||||
* physical pages.
|
||||
* @large: Array of objects containing the state for pools of 2 MiB size
|
||||
* physical pages.
|
||||
*
|
||||
* Memory pools are used to allow efficient reallocation of previously-freed
|
||||
* physical pages. A pair of memory pools is initialized for each physical
|
||||
* memory group: one for 4 KiB pages and one for 2 MiB pages. These arrays
|
||||
* should be indexed by physical memory group ID, the meaning of which is
|
||||
* defined by the systems integrator.
|
||||
*
|
||||
* @small: Array of objects containing the state for pools of 4 KiB size
|
||||
* physical pages.
|
||||
* @large: Array of objects containing the state for pools of 2 MiB size
|
||||
* physical pages.
|
||||
*/
|
||||
struct kbase_mem_pool_group {
|
||||
struct kbase_mem_pool small[MEMORY_GROUP_MANAGER_NR_GROUPS];
|
||||
@@ -512,11 +516,11 @@ struct kbase_mem_pool_config {
|
||||
* struct kbase_mem_pool_group_config - Initial configuration for a complete
|
||||
* set of physical memory pools
|
||||
*
|
||||
* This array should be indexed by physical memory group ID, the meaning
|
||||
* of which is defined by the systems integrator.
|
||||
*
|
||||
* @small: Array of initial configuration for pools of 4 KiB pages.
|
||||
* @large: Array of initial configuration for pools of 2 MiB pages.
|
||||
*
|
||||
* This array should be indexed by physical memory group ID, the meaning
|
||||
* of which is defined by the systems integrator.
|
||||
*/
|
||||
struct kbase_mem_pool_group_config {
|
||||
struct kbase_mem_pool_config small[MEMORY_GROUP_MANAGER_NR_GROUPS];
|
||||
@@ -750,8 +754,13 @@ struct kbase_process {
|
||||
* @hwcnt.addr: HW counter address
|
||||
* @hwcnt.addr_bytes: HW counter size in bytes
|
||||
* @hwcnt.backend: Kbase instrumentation backend
|
||||
* @hwcnt_watchdog_timer: Hardware counter watchdog interface.
|
||||
* @hwcnt_gpu_jm_backend: Job manager GPU backend interface, used as superclass reference
|
||||
* pointer by hwcnt_gpu_iface, which wraps this implementation in
|
||||
* order to extend it with periodic dumping functionality.
|
||||
* @hwcnt_gpu_iface: Backend interface for GPU hardware counter access.
|
||||
* @hwcnt_watchdog_timer: Watchdog interface, used by the GPU backend hwcnt_gpu_iface to
|
||||
* perform periodic dumps in order to prevent hardware counter value
|
||||
* overflow or saturation.
|
||||
* @hwcnt_gpu_ctx: Context for GPU hardware counter access.
|
||||
* @hwaccess_lock must be held when calling
|
||||
* kbase_hwcnt_context_enable() with @hwcnt_gpu_ctx.
|
||||
@@ -763,14 +772,6 @@ struct kbase_process {
|
||||
* therefore timeline is disabled.
|
||||
* @timeline: Timeline context created per device.
|
||||
* @ktrace: kbase device's ktrace
|
||||
* @trace_lock: Lock to serialize the access to trace buffer.
|
||||
* @trace_first_out: Index/offset in the trace buffer at which the first
|
||||
* unread message is present.
|
||||
* @trace_next_in: Index/offset in the trace buffer at which the new
|
||||
* message will be written.
|
||||
* @trace_rbuf: Pointer to the buffer storing debug messages/prints
|
||||
* tracing the various events in Driver.
|
||||
* The buffer is filled in circular fashion.
|
||||
* @reset_timeout_ms: Number of milliseconds to wait for the soft stop to
|
||||
* complete for the GPU jobs before proceeding with the
|
||||
* GPU reset.
|
||||
@@ -875,6 +876,13 @@ struct kbase_process {
|
||||
* backend specific data for HW access layer.
|
||||
* @faults_pending: Count of page/bus faults waiting for bottom half processing
|
||||
* via workqueues.
|
||||
* @mmu_hw_operation_in_progress: Set before sending the MMU command and is
|
||||
* cleared after the command is complete. Whilst this
|
||||
* flag is set, the write to L2_PWROFF register will be
|
||||
* skipped which is needed to workaround the HW issue
|
||||
* GPU2019-3878. PM state machine is invoked after
|
||||
* clearing this flag and @hwaccess_lock is used to
|
||||
* serialize the access.
|
||||
* @poweroff_pending: Set when power off operation for GPU is started, reset when
|
||||
* power on for GPU is started.
|
||||
* @infinite_cache_active_default: Set to enable using infinite cache for all the
|
||||
@@ -904,9 +912,6 @@ struct kbase_process {
|
||||
* enabled.
|
||||
* @protected_mode_hwcnt_disable_work: Work item to disable GPU hardware
|
||||
* counters, used if atomic disable is not possible.
|
||||
* @buslogger: Pointer to the structure required for interfacing
|
||||
* with the bus logger module to set the size of buffer
|
||||
* used by the module for capturing bus logs.
|
||||
* @irq_reset_flush: Flag to indicate that GPU reset is in-flight and flush of
|
||||
* IRQ + bottom half is being done, to prevent the writes
|
||||
* to MMU_IRQ_CLEAR & MMU_IRQ_MASK registers.
|
||||
@@ -1007,7 +1012,7 @@ struct kbase_device {
|
||||
struct memory_group_manager_device *mgm_dev;
|
||||
|
||||
struct kbase_as as[BASE_MAX_NR_AS];
|
||||
u16 as_free; /* Bitpattern of free Address Spaces */
|
||||
u16 as_free;
|
||||
struct kbase_context *as_to_kctx[BASE_MAX_NR_AS];
|
||||
|
||||
spinlock_t mmu_mask_change;
|
||||
@@ -1027,7 +1032,6 @@ struct kbase_device {
|
||||
|
||||
#if MALI_USE_CSF
|
||||
struct kbase_hwcnt_backend_csf_if hwcnt_backend_csf_if_fw;
|
||||
struct kbase_hwcnt_watchdog_interface hwcnt_watchdog_timer;
|
||||
#else
|
||||
struct kbase_hwcnt {
|
||||
spinlock_t lock;
|
||||
@@ -1038,9 +1042,13 @@ struct kbase_device {
|
||||
|
||||
struct kbase_instr_backend backend;
|
||||
} hwcnt;
|
||||
|
||||
struct kbase_hwcnt_backend_interface hwcnt_gpu_jm_backend;
|
||||
#endif
|
||||
|
||||
struct kbase_hwcnt_backend_interface hwcnt_gpu_iface;
|
||||
struct kbase_hwcnt_watchdog_interface hwcnt_watchdog_timer;
|
||||
|
||||
struct kbase_hwcnt_context *hwcnt_gpu_ctx;
|
||||
struct kbase_hwcnt_virtualizer *hwcnt_gpu_virt;
|
||||
struct kbase_vinstr_context *vinstr_ctx;
|
||||
@@ -1141,6 +1149,9 @@ struct kbase_device {
|
||||
|
||||
atomic_t faults_pending;
|
||||
|
||||
#if MALI_USE_CSF
|
||||
bool mmu_hw_operation_in_progress;
|
||||
#endif
|
||||
bool poweroff_pending;
|
||||
|
||||
#if (KERNEL_VERSION(4, 4, 0) <= LINUX_VERSION_CODE)
|
||||
@@ -1492,8 +1503,8 @@ struct kbase_sub_alloc {
|
||||
* @mem_partials_lock: Lock for protecting the operations done on the elements
|
||||
* added to @mem_partials list.
|
||||
* @mem_partials: List head for the list of large pages, 2MB in size, which
|
||||
* which have been split into 4 KB pages and are used
|
||||
* partially for the allocations >= 2 MB in size.
|
||||
* have been split into 4 KB pages and are used partially
|
||||
* for the allocations >= 2 MB in size.
|
||||
* @reg_lock: Lock used for GPU virtual address space management operations,
|
||||
* like adding/freeing a memory region in the address space.
|
||||
* Can be converted to a rwlock ?.
|
||||
@@ -1505,6 +1516,17 @@ struct kbase_sub_alloc {
|
||||
* @reg_rbtree_exec: RB tree of the memory regions allocated from the EXEC_VA
|
||||
* zone of the GPU virtual address space. Used for GPU-executable
|
||||
* allocations which don't need the SAME_VA property.
|
||||
* @reg_rbtree_exec_fixed: RB tree of the memory regions allocated from the
|
||||
* EXEC_FIXED_VA zone of the GPU virtual address space. Used for
|
||||
* GPU-executable allocations with FIXED/FIXABLE GPU virtual
|
||||
* addresses.
|
||||
* @reg_rbtree_fixed: RB tree of the memory regions allocated from the FIXED_VA zone
|
||||
* of the GPU virtual address space. Used for allocations with
|
||||
* FIXED/FIXABLE GPU virtual addresses.
|
||||
* @num_fixable_allocs: A count for the number of memory allocations with the
|
||||
* BASE_MEM_FIXABLE property.
|
||||
* @num_fixed_allocs: A count for the number of memory allocations with the
|
||||
* BASE_MEM_FIXED property.
|
||||
* @reg_zone: Zone information for the reg_rbtree_<...> members.
|
||||
* @cookies: Bitmask containing of BITS_PER_LONG bits, used mainly for
|
||||
* SAME_VA allocations to defer the reservation of memory region
|
||||
@@ -1608,6 +1630,8 @@ struct kbase_sub_alloc {
|
||||
* dumping of its debug info is in progress.
|
||||
* @job_fault_resume_event_list: List containing atoms completed after the faulty
|
||||
* atom but before the debug data for faulty atom was dumped.
|
||||
* @mem_view_column_width: Controls the number of bytes shown in every column of the
|
||||
* output of "mem_view" debugfs file.
|
||||
* @jsctx_queue: Per slot & priority arrays of object containing the root
|
||||
* of RB-tree holding currently runnable atoms on the job slot
|
||||
* and the head item of the linked list of atoms blocked on
|
||||
@@ -1748,6 +1772,12 @@ struct kbase_context {
|
||||
struct rb_root reg_rbtree_same;
|
||||
struct rb_root reg_rbtree_custom;
|
||||
struct rb_root reg_rbtree_exec;
|
||||
#if MALI_USE_CSF
|
||||
struct rb_root reg_rbtree_exec_fixed;
|
||||
struct rb_root reg_rbtree_fixed;
|
||||
atomic64_t num_fixable_allocs;
|
||||
atomic64_t num_fixed_allocs;
|
||||
#endif
|
||||
struct kbase_reg_zone reg_zone[KBASE_REG_ZONE_MAX];
|
||||
|
||||
#if MALI_USE_CSF
|
||||
@@ -1817,6 +1847,7 @@ struct kbase_context {
|
||||
unsigned int *reg_dump;
|
||||
atomic_t job_fault_count;
|
||||
struct list_head job_fault_resume_event_list;
|
||||
unsigned int mem_view_column_width;
|
||||
|
||||
#endif /* CONFIG_DEBUG_FS */
|
||||
struct kbase_va_region *jit_alloc[1 + BASE_JIT_ALLOC_COUNT];
|
||||
@@ -1924,13 +1955,6 @@ enum kbase_share_attr_bits {
|
||||
SHARE_INNER_BITS = (3ULL << 8) /* inner shareable coherency */
|
||||
};
|
||||
|
||||
/**
|
||||
* enum kbase_timeout_selector - The choice of which timeout to get scaled
|
||||
* using current GPU frequency.
|
||||
* @CSF_FIRMWARE_TIMEOUT: Response timeout from CSF firmware.
|
||||
*/
|
||||
enum kbase_timeout_selector { CSF_FIRMWARE_TIMEOUT };
|
||||
|
||||
/**
|
||||
* kbase_device_is_cpu_coherent - Returns if the device is CPU coherent.
|
||||
* @kbdev: kbase device
|
||||
@@ -1946,6 +1970,24 @@ static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev)
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* kbase_get_lock_region_min_size_log2 - Returns the minimum size of the MMU lock
|
||||
* region, as a logarithm
|
||||
*
|
||||
* @gpu_props: GPU properties
|
||||
*
|
||||
* Return: the minimum size of the MMU lock region as dictated by the corresponding
|
||||
* arch spec.
|
||||
*/
|
||||
static inline u64 kbase_get_lock_region_min_size_log2(struct kbase_gpu_props const *gpu_props)
|
||||
{
|
||||
if (GPU_ID2_MODEL_MATCH_VALUE(gpu_props->props.core_props.product_id) >=
|
||||
GPU_ID2_MODEL_MAKE(12, 0))
|
||||
return 12; /* 4 kB */
|
||||
|
||||
return 15; /* 32 kB */
|
||||
}
|
||||
|
||||
/* Conversion helpers for setting up high resolution timers */
|
||||
#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x))*1000000U))
|
||||
#define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x))
|
||||
@@ -1955,4 +1997,4 @@ static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev)
|
||||
/* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */
|
||||
#define KBASE_AS_INACTIVE_MAX_LOOPS 100000000
|
||||
|
||||
#endif /* _KBASE_DEFS_H_ */
|
||||
#endif /* _KBASE_DEFS_H_ */
|
||||
|
||||
@@ -161,7 +161,7 @@ kbase_dma_fence_cancel_atom(struct kbase_jd_atom *katom)
|
||||
if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) {
|
||||
/* Wait was cancelled - zap the atom */
|
||||
katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
|
||||
if (jd_done_nolock(katom, NULL))
|
||||
if (jd_done_nolock(katom, true))
|
||||
kbase_js_sched_all(katom->kctx->kbdev);
|
||||
}
|
||||
}
|
||||
@@ -196,7 +196,7 @@ kbase_dma_fence_work(struct work_struct *pwork)
|
||||
* dependency. Run jd_done_nolock() on the katom if it is completed.
|
||||
*/
|
||||
if (unlikely(katom->status == KBASE_JD_ATOM_STATE_COMPLETED))
|
||||
jd_done_nolock(katom, NULL);
|
||||
jd_done_nolock(katom, true);
|
||||
else
|
||||
kbase_jd_dep_clear_locked(katom);
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2010-2016, 2020-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2010-2016, 2020-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -132,6 +132,8 @@ void kbase_dma_fence_term(struct kbase_context *kctx);
|
||||
/**
|
||||
* kbase_dma_fence_init() - Initialize Mali dma-fence context
|
||||
* @kctx: kbase context to initialize
|
||||
*
|
||||
* Return: 0 on success, error code otherwise.
|
||||
*/
|
||||
int kbase_dma_fence_init(struct kbase_context *kctx);
|
||||
|
||||
|
||||
@@ -239,7 +239,7 @@ int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores)
|
||||
return failed ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
static ssize_t show_dummy_job_wa_info(struct device * const dev,
|
||||
static ssize_t dummy_job_wa_info_show(struct device * const dev,
|
||||
struct device_attribute * const attr, char * const buf)
|
||||
{
|
||||
struct kbase_device *const kbdev = dev_get_drvdata(dev);
|
||||
@@ -254,7 +254,7 @@ static ssize_t show_dummy_job_wa_info(struct device * const dev,
|
||||
return err;
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(dummy_job_wa_info, 0444, show_dummy_job_wa_info, NULL);
|
||||
static DEVICE_ATTR_RO(dummy_job_wa_info);
|
||||
|
||||
static bool wa_blob_load_needed(struct kbase_device *kbdev)
|
||||
{
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user