MALI: rockchip: upgrade bifrost DDK to g13p0-01eac0, from g12p0-01eac0

Note, the corresponding mali_csffw.bin for DDK g13 MUST be used.

Change-Id: I63c00b4eccd2e780aea2691faa2ecea6847c41e2
Signed-off-by: Zhen Chen <chenzhen@rock-chips.com>
This commit is contained in:
Zhen Chen
2022-06-19 09:45:39 +08:00
committed by Tao Huang
parent ae49253925
commit c38b77a6e2
123 changed files with 3777 additions and 2476 deletions

View File

@@ -211,6 +211,31 @@ Description:
without forward progress to allow to elapse before terminating a
GPU command queue group.
What: /sys/class/misc/mali%u/device/mcu_shader_pwroff_timeout
Description:
This attribute is available only with mali platform
device-driver that supports a CSF GPU. The duration value unit
is in micro-seconds and is used for configuring MCU shader Core power-off
timer. The configured MCU shader Core power-off timer will only have
effect when the host driver has delegated the shader cores
power management to MCU. The supplied value will be
recorded internally without any change. But the actual field
value will be subject to core power-off timer source frequency
scaling and maximum value limiting. The default source will be
SYSTEM_TIMESTAMP counter. But in case the platform is not able
to supply it, the GPU CYCLE_COUNTER source will be used as an
alternative.
If we set the value to zero then MCU-controlled shader/tiler
power management will be disabled.
What: /sys/class/misc/mali%u/device/csg_scheduling_period
Description:
This attribute is available only with mali platform
device-driver that supports a CSF GPU. The duration value unit
is in milliseconds and is used for configuring csf scheduling
tick duration.
What: /sys/class/misc/mali%u/device/reset_timeout
Description:
This attribute is used to set the number of milliseconds to

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2012-2014, 2017-2018, 2020-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2012-2014, 2017-2018, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -20,6 +20,7 @@
*/
#include <linux/version.h>
#include <linux/version_compat_defs.h>
#include <linux/uaccess.h>
#include <linux/init.h>
#include <linux/module.h>
@@ -99,9 +100,6 @@ static const struct file_operations dma_buf_lock_fops = {
#if defined(HAVE_COMPAT_IOCTL) || ((KERNEL_VERSION(5, 9, 0) <= LINUX_VERSION_CODE))
.compat_ioctl = dma_buf_lock_ioctl,
#endif
#if !defined(HAVE_UNLOCKED_IOCTL) && !defined(HAVE_COMPAT_IOCTL) && ((KERNEL_VERSION(2, 6, 36) > LINUX_VERSION_CODE))
.ioctl = dma_buf_lock_ioctl,
#endif
};
struct dma_buf_lock_resource {
@@ -480,15 +478,18 @@ static int dma_buf_lock_handle_release(struct inode *inode, struct file *file)
return 0;
}
static unsigned int dma_buf_lock_handle_poll(
struct file *file,
struct poll_table_struct *wait)
static __poll_t dma_buf_lock_handle_poll(struct file *file, poll_table *wait)
{
struct dma_buf_lock_resource *resource;
unsigned int ret = 0;
if (!is_dma_buf_lock_file(file))
if (!is_dma_buf_lock_file(file)) {
#if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE)
return POLLERR;
#else
return EPOLLERR;
#endif
}
resource = file->private_data;
#if DMA_BUF_LOCK_DEBUG
@@ -496,9 +497,15 @@ static unsigned int dma_buf_lock_handle_poll(
#endif
if (atomic_read(&resource->locked) == 1) {
/* Resources have been locked */
#if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE)
ret = POLLIN | POLLRDNORM;
if (resource->exclusive)
ret |= POLLOUT | POLLWRNORM;
ret |= POLLOUT | POLLWRNORM;
#else
ret = EPOLLIN | EPOLLRDNORM;
if (resource->exclusive)
ret |= EPOLLOUT | EPOLLWRNORM;
#endif
} else {
if (!poll_does_not_wait(wait))
poll_wait(file, &resource->wait, wait);
@@ -533,10 +540,12 @@ static int dma_buf_lock_dolock(struct dma_buf_lock_k_request *request)
{
struct dma_buf_lock_resource *resource;
struct ww_acquire_ctx ww_ctx;
struct file *file;
int size;
int fd;
int i;
int ret;
int error;
if (request->list_of_dma_buf_fds == NULL)
return -EINVAL;
@@ -634,15 +643,21 @@ static int dma_buf_lock_dolock(struct dma_buf_lock_k_request *request)
kref_get(&resource->refcount);
/* Create file descriptor associated with lock request */
fd = anon_inode_getfd("dma_buf_lock", &dma_buf_lock_handle_fops,
(void *)resource, 0);
if (fd < 0) {
error = get_unused_fd_flags(0);
if (error < 0)
return error;
fd = error;
file = anon_inode_getfile("dma_buf_lock", &dma_buf_lock_handle_fops, (void *)resource, 0);
if (IS_ERR(file)) {
put_unused_fd(fd);
mutex_lock(&dma_buf_lock_mutex);
kref_put(&resource->refcount, dma_buf_lock_dounlock);
kref_put(&resource->refcount, dma_buf_lock_dounlock);
mutex_unlock(&dma_buf_lock_mutex);
return fd;
return PTR_ERR(file);
}
resource->exclusive = request->exclusive;
@@ -711,9 +726,7 @@ static int dma_buf_lock_dolock(struct dma_buf_lock_k_request *request)
dma_resv_add_shared_fence(resv, &resource->fence);
#endif
} else {
ret = dma_buf_lock_add_fence_reservation_callback(resource,
resv,
true);
ret = dma_buf_lock_add_fence_reservation_callback(resource, resv, true);
if (ret) {
#if DMA_BUF_LOCK_DEBUG
pr_debug("%s : Error %d adding reservation to callback.\n", __func__, ret);
@@ -758,6 +771,10 @@ static int dma_buf_lock_dolock(struct dma_buf_lock_k_request *request)
kref_put(&resource->refcount, dma_buf_lock_dounlock);
mutex_unlock(&dma_buf_lock_mutex);
/* Installing the fd is deferred to the very last operation before return
* to avoid allowing userspace to close it during the setup.
*/
fd_install(fd, file);
return fd;
}

View File

@@ -30,9 +30,6 @@
#include <linux/atomic.h>
#include <linux/mm.h>
#include <linux/highmem.h>
#if (KERNEL_VERSION(4, 8, 0) > LINUX_VERSION_CODE)
#include <linux/dma-attrs.h>
#endif
#include <linux/dma-mapping.h>
/* Maximum size allowed in a single DMA_BUF_TE_ALLOC call */
@@ -211,20 +208,11 @@ static void dma_buf_te_release(struct dma_buf *buf)
/* no need for locking */
if (alloc->contiguous) {
#if (KERNEL_VERSION(4, 8, 0) <= LINUX_VERSION_CODE)
dma_free_attrs(te_device.this_device,
alloc->nr_pages * PAGE_SIZE,
alloc->contig_cpu_addr,
alloc->contig_dma_addr,
DMA_ATTR_WRITE_COMBINE);
#else
DEFINE_DMA_ATTRS(attrs);
dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
dma_free_attrs(te_device.this_device,
alloc->nr_pages * PAGE_SIZE,
alloc->contig_cpu_addr, alloc->contig_dma_addr, &attrs);
#endif
} else {
for (i = 0; i < alloc->nr_pages; i++)
__free_page(alloc->pages[i]);
@@ -269,32 +257,17 @@ static int dma_buf_te_sync(struct dma_buf *dmabuf,
return 0;
}
#if (KERNEL_VERSION(4, 6, 0) <= LINUX_VERSION_CODE)
static int dma_buf_te_begin_cpu_access(struct dma_buf *dmabuf,
enum dma_data_direction direction)
#else
static int dma_buf_te_begin_cpu_access(struct dma_buf *dmabuf, size_t start,
size_t len,
enum dma_data_direction direction)
#endif
{
return dma_buf_te_sync(dmabuf, direction, true);
}
#if (KERNEL_VERSION(4, 6, 0) <= LINUX_VERSION_CODE)
static int dma_buf_te_end_cpu_access(struct dma_buf *dmabuf,
enum dma_data_direction direction)
{
return dma_buf_te_sync(dmabuf, direction, false);
}
#else
static void dma_buf_te_end_cpu_access(struct dma_buf *dmabuf, size_t start,
size_t len,
enum dma_data_direction direction)
{
dma_buf_te_sync(dmabuf, direction, false);
}
#endif
static void dma_buf_te_mmap_open(struct vm_area_struct *vma)
{
@@ -521,21 +494,11 @@ static int do_dma_buf_te_ioctl_alloc(struct dma_buf_te_ioctl_alloc __user *buf,
if (contiguous) {
dma_addr_t dma_aux;
#if (KERNEL_VERSION(4, 8, 0) <= LINUX_VERSION_CODE)
alloc->contig_cpu_addr = dma_alloc_attrs(te_device.this_device,
alloc->nr_pages * PAGE_SIZE,
&alloc->contig_dma_addr,
GFP_KERNEL | __GFP_ZERO,
DMA_ATTR_WRITE_COMBINE);
#else
DEFINE_DMA_ATTRS(attrs);
dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
alloc->contig_cpu_addr = dma_alloc_attrs(te_device.this_device,
alloc->nr_pages * PAGE_SIZE,
&alloc->contig_dma_addr,
GFP_KERNEL | __GFP_ZERO, &attrs);
#endif
if (!alloc->contig_cpu_addr) {
dev_err(te_device.this_device, "%s: couldn't alloc contiguous buffer %zu pages",
__func__, alloc->nr_pages);
@@ -591,20 +554,11 @@ no_export:
/* i still valid */
no_page:
if (contiguous) {
#if (KERNEL_VERSION(4, 8, 0) <= LINUX_VERSION_CODE)
dma_free_attrs(te_device.this_device,
alloc->nr_pages * PAGE_SIZE,
alloc->contig_cpu_addr,
alloc->contig_dma_addr,
DMA_ATTR_WRITE_COMBINE);
#else
DEFINE_DMA_ATTRS(attrs);
dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
dma_free_attrs(te_device.this_device,
alloc->nr_pages * PAGE_SIZE,
alloc->contig_cpu_addr, alloc->contig_dma_addr, &attrs);
#endif
} else {
while (i-- > 0)
__free_page(alloc->pages[i]);
@@ -703,7 +657,6 @@ static u32 dma_te_buf_fill(struct dma_buf *dma_buf, unsigned int value)
struct sg_table *sgt;
struct scatterlist *sg;
unsigned int count;
unsigned int offset = 0;
int ret = 0;
size_t i;
@@ -717,11 +670,7 @@ static u32 dma_te_buf_fill(struct dma_buf *dma_buf, unsigned int value)
goto no_import;
}
ret = dma_buf_begin_cpu_access(dma_buf,
#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE
0, dma_buf->size,
#endif
DMA_BIDIRECTIONAL);
ret = dma_buf_begin_cpu_access(dma_buf, DMA_BIDIRECTIONAL);
if (ret)
goto no_cpu_access;
@@ -744,15 +693,10 @@ static u32 dma_te_buf_fill(struct dma_buf *dma_buf, unsigned int value)
dma_buf_kunmap(dma_buf, i >> PAGE_SHIFT, addr);
#endif
}
offset += sg_dma_len(sg);
}
no_kmap:
dma_buf_end_cpu_access(dma_buf,
#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE
0, dma_buf->size,
#endif
DMA_BIDIRECTIONAL);
dma_buf_end_cpu_access(dma_buf, DMA_BIDIRECTIONAL);
no_cpu_access:
dma_buf_unmap_attachment(attachment, sgt, DMA_BIDIRECTIONAL);
no_import:

View File

@@ -42,18 +42,7 @@
static inline vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma,
unsigned long addr, unsigned long pfn, pgprot_t pgprot)
{
int err;
#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \
((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \
(KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE)))
if (pgprot_val(pgprot) != pgprot_val(vma->vm_page_prot))
return VM_FAULT_SIGBUS;
err = vm_insert_pfn(vma, addr, pfn);
#else
err = vm_insert_pfn_prot(vma, addr, pfn, pgprot);
#endif
int err = vm_insert_pfn_prot(vma, addr, pfn, pgprot);
if (unlikely(err == -ENOMEM))
return VM_FAULT_OOM;
@@ -64,6 +53,10 @@ static inline vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma,
}
#endif
#define PTE_PBHA_SHIFT (59)
#define PTE_PBHA_MASK ((uint64_t)0xf << PTE_PBHA_SHIFT)
#define PTE_RES_BIT_MULTI_AS_SHIFT (63)
#define IMPORTED_MEMORY_ID (MEMORY_GROUP_MANAGER_NR_GROUPS - 1)
/**
@@ -335,8 +328,6 @@ static u64 example_mgm_update_gpu_pte(
int const mmu_level, u64 pte)
{
struct mgm_groups *const data = mgm_dev->data;
const u32 pbha_bit_pos = 59; /* bits 62:59 */
const u32 pbha_bit_mask = 0xf; /* 4-bit */
dev_dbg(data->dev,
"%s(mgm_dev=%p, group_id=%d, mmu_level=%d, pte=0x%llx)\n",
@@ -346,13 +337,27 @@ static u64 example_mgm_update_gpu_pte(
WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS))
return pte;
pte |= ((u64)group_id & pbha_bit_mask) << pbha_bit_pos;
pte |= ((u64)group_id << PTE_PBHA_SHIFT) & PTE_PBHA_MASK;
/* Address could be translated into a different bus address here */
pte |= ((u64)1 << PTE_RES_BIT_MULTI_AS_SHIFT);
data->groups[group_id].update_gpu_pte++;
return pte;
}
static u64 example_mgm_pte_to_original_pte(struct memory_group_manager_device *const mgm_dev,
int const group_id, int const mmu_level, u64 pte)
{
/* Undo the group ID modification */
pte &= ~PTE_PBHA_MASK;
/* Undo the bit set */
pte &= ~((u64)1 << PTE_RES_BIT_MULTI_AS_SHIFT);
return pte;
}
static vm_fault_t example_mgm_vmf_insert_pfn_prot(
struct memory_group_manager_device *const mgm_dev, int const group_id,
struct vm_area_struct *const vma, unsigned long const addr,
@@ -428,6 +433,7 @@ static int memory_group_manager_probe(struct platform_device *pdev)
example_mgm_get_import_memory_id;
mgm_dev->ops.mgm_vmf_insert_pfn_prot = example_mgm_vmf_insert_pfn_prot;
mgm_dev->ops.mgm_update_gpu_pte = example_mgm_update_gpu_pte;
mgm_dev->ops.mgm_pte_to_original_pte = example_mgm_pte_to_original_pte;
mgm_data = kzalloc(sizeof(*mgm_data), GFP_KERNEL);
if (!mgm_data) {

View File

@@ -71,7 +71,7 @@ endif
#
# Driver version string which is returned to userspace via an ioctl
MALI_RELEASE_NAME ?= '"g12p0-01eac0"'
MALI_RELEASE_NAME ?= '"g13p0-01eac0"'
# Set up defaults if not defined by build system
ifeq ($(CONFIG_MALI_BIFROST_DEBUG), y)
MALI_UNIT_TEST = 1

View File

@@ -214,6 +214,20 @@ config MALI_GEM5_BUILD
comment "Debug options"
depends on MALI_BIFROST && MALI_BIFROST_EXPERT
config MALI_FW_CORE_DUMP
bool "Enable support for FW core dump"
depends on MALI_BIFROST && MALI_BIFROST_EXPERT && MALI_CSF_SUPPORT
default n
help
Adds ability to request firmware core dump
Example:
* To explicitly request core dump:
echo 1 >/sys/kernel/debug/mali0/fw_core_dump
* To output current core dump (after explicitly requesting a core dump,
or kernel driver reported an internal firmware error):
cat /sys/kernel/debug/mali0/fw_core_dump
config MALI_BIFROST_DEBUG
bool "Enable debug build"
depends on MALI_BIFROST && MALI_BIFROST_EXPERT

View File

@@ -130,16 +130,19 @@ ifeq ($(CONFIG_MALI_BIFROST),m)
ifeq ($(CONFIG_MALI_KUTF), y)
CONFIG_MALI_KUTF_IRQ_TEST ?= y
CONFIG_MALI_KUTF_CLK_RATE_TRACE ?= y
CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST ?= y
else
# Prevent misuse when CONFIG_MALI_KUTF=n
CONFIG_MALI_KUTF_IRQ_TEST = n
CONFIG_MALI_KUTF_CLK_RATE_TRACE = n
CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n
endif
else
# Prevent misuse when CONFIG_MALI_BIFROST_DEBUG=n
CONFIG_MALI_KUTF = n
CONFIG_MALI_KUTF_IRQ_TEST = n
CONFIG_MALI_KUTF_CLK_RATE_TRACE = n
CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n
endif
else
# Prevent misuse when CONFIG_MALI_BIFROST=n
@@ -149,6 +152,7 @@ else
CONFIG_MALI_KUTF = n
CONFIG_MALI_KUTF_IRQ_TEST = n
CONFIG_MALI_KUTF_CLK_RATE_TRACE = n
CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n
endif
# All Mali CONFIG should be listed here
@@ -189,6 +193,7 @@ CONFIGS := \
CONFIG_MALI_KUTF \
CONFIG_MALI_KUTF_IRQ_TEST \
CONFIG_MALI_KUTF_CLK_RATE_TRACE \
CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST \
CONFIG_MALI_XEN

View File

@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
# (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved.
# (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -65,8 +65,7 @@ config MALI_CSF_SUPPORT
config MALI_BIFROST_DEVFREQ
bool "Enable devfreq support for Mali"
depends on MALI_BIFROST
default y if PLATFORM_JUNO
default y if PLATFORM_CUSTOM
default y
help
Support devfreq for Mali.
@@ -192,6 +191,20 @@ config MALI_CORESTACK
If unsure, say N.
config MALI_FW_CORE_DUMP
bool "Enable support for FW core dump"
depends on MALI_BIFROST && MALI_BIFROST_EXPERT && MALI_CSF_SUPPORT
default n
help
Adds ability to request firmware core dump
Example:
* To explicitly request core dump:
echo 1 >/sys/kernel/debug/mali0/fw_core_dump
* To output current core dump (after explicitly requesting a core dump,
or kernel driver reported an internal firmware error):
cat /sys/kernel/debug/mali0/fw_core_dump
choice
prompt "Error injection level"
depends on MALI_BIFROST && MALI_BIFROST_EXPERT

View File

@@ -955,7 +955,6 @@ static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld(
int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev,
enum kbase_pm_suspend_handler suspend_handler)
{
struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
int res = 0;
@@ -1008,11 +1007,9 @@ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev,
/* Need to synchronously wait for GPU assignment */
atomic_inc(&kbdev->pm.gpu_users_waiting);
mutex_unlock(&arb_vm_state->vm_state_lock);
mutex_unlock(&kbdev->pm.lock);
mutex_unlock(&js_devdata->runpool_mutex);
kbase_pm_unlock(kbdev);
kbase_arbiter_pm_vm_wait_gpu_assignment(kbdev);
mutex_lock(&js_devdata->runpool_mutex);
mutex_lock(&kbdev->pm.lock);
kbase_pm_lock(kbdev);
mutex_lock(&arb_vm_state->vm_state_lock);
atomic_dec(&kbdev->pm.gpu_users_waiting);
}

View File

@@ -824,12 +824,6 @@ int kbase_devfreq_init(struct kbase_device *kbdev)
goto ipa_init_failed;
}
} else {
err = kbase_ipa_init(kbdev);
if (err) {
dev_err(kbdev->dev, "IPA initialization failed\n");
goto ipa_init_failed;
}
kbdev->devfreq_cooling = of_devfreq_cooling_register_power(
kbdev->dev->of_node,
kbdev->devfreq,

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -40,19 +40,7 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev,
registers.l2_features = kbase_reg_read(kbdev,
GPU_CONTROL_REG(L2_FEATURES));
registers.core_features = 0;
#if !MALI_USE_CSF
/* TGOx */
registers.core_features = kbase_reg_read(kbdev,
GPU_CONTROL_REG(CORE_FEATURES));
#else /* !MALI_USE_CSF */
if (!(((registers.gpu_id & GPU_ID2_PRODUCT_MODEL) ==
GPU_ID2_PRODUCT_TDUX) ||
((registers.gpu_id & GPU_ID2_PRODUCT_MODEL) ==
GPU_ID2_PRODUCT_TODX)))
registers.core_features =
kbase_reg_read(kbdev, GPU_CONTROL_REG(CORE_FEATURES));
#endif /* MALI_USE_CSF */
registers.tiler_features = kbase_reg_read(kbdev,
GPU_CONTROL_REG(TILER_FEATURES));
registers.mem_features = kbase_reg_read(kbdev,
@@ -170,6 +158,11 @@ int kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
regdump->coherency_features = coherency_features;
if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CORE_FEATURES))
regdump->core_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(CORE_FEATURES));
else
regdump->core_features = 0;
kbase_pm_register_access_disable(kbdev);
return error;

View File

@@ -191,9 +191,7 @@ static u64 select_job_chain(struct kbase_jd_atom *katom)
return jc;
}
void kbase_job_hw_submit(struct kbase_device *kbdev,
struct kbase_jd_atom *katom,
int js)
int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js)
{
struct kbase_context *kctx;
u32 cfg;
@@ -202,13 +200,13 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
struct slot_rb *ptr_slot_rb = &kbdev->hwaccess.backend.slot_rb[js];
lockdep_assert_held(&kbdev->hwaccess_lock);
KBASE_DEBUG_ASSERT(kbdev);
KBASE_DEBUG_ASSERT(katom);
kctx = katom->kctx;
/* Command register must be available */
KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx));
if (WARN(!kbasep_jm_is_js_free(kbdev, js, kctx),
"Attempting to assign to occupied slot %d in kctx %pK\n", js, (void *)kctx))
return -EPERM;
dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %pK\n",
jc_head, (void *)katom);
@@ -329,6 +327,8 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
JS_COMMAND_START);
return 0;
}
/**
@@ -393,8 +393,6 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
lockdep_assert_held(&kbdev->hwaccess_lock);
KBASE_DEBUG_ASSERT(kbdev);
KBASE_KTRACE_ADD_JM(kbdev, JM_IRQ, NULL, NULL, 0, done);
end_timestamp = ktime_get_raw();
@@ -409,7 +407,8 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
* numbered interrupts before the higher numbered ones.
*/
i = ffs(finished) - 1;
KBASE_DEBUG_ASSERT(i >= 0);
if (WARN(i < 0, "%s: called without receiving any interrupts\n", __func__))
break;
do {
int nr_done;
@@ -619,7 +618,7 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
u64 job_in_head_before;
u32 status_reg_after;
KBASE_DEBUG_ASSERT(!(action & (~JS_COMMAND_MASK)));
WARN_ON(action & (~JS_COMMAND_MASK));
/* Check the head pointer */
job_in_head_before = ((u64) kbase_reg_read(kbdev,
@@ -697,7 +696,8 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_1, head_kctx, head, head->jc, js);
break;
default:
BUG();
WARN(1, "Unknown action %d on atom %pK in kctx %pK\n", action,
(void *)target_katom, (void *)target_katom->kctx);
break;
}
} else {
@@ -726,7 +726,8 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL, 0, js);
break;
default:
BUG();
WARN(1, "Unknown action %d on atom %pK in kctx %pK\n", action,
(void *)target_katom, (void *)target_katom->kctx);
break;
}
}
@@ -752,9 +753,7 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
int i;
bool stop_sent = false;
KBASE_DEBUG_ASSERT(kctx != NULL);
kbdev = kctx->kbdev;
KBASE_DEBUG_ASSERT(kbdev != NULL);
lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -934,7 +933,11 @@ void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
dev_dbg(kbdev->dev, "Soft-stop atom %pK with flags 0x%x (s:%d)\n",
target_katom, sw_flags, js);
KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK));
if (sw_flags & JS_COMMAND_MASK) {
WARN(true, "Atom %pK in kctx %pK received non-NOP flags %d\n", (void *)target_katom,
target_katom ? (void *)target_katom->kctx : NULL, sw_flags);
sw_flags &= ~((u32)JS_COMMAND_MASK);
}
kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom,
JS_COMMAND_SOFT_STOP | sw_flags);
}
@@ -1059,12 +1062,9 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
bool silent = false;
u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
KBASE_DEBUG_ASSERT(data);
kbdev = container_of(data, struct kbase_device,
hwaccess.backend.reset_work);
KBASE_DEBUG_ASSERT(kbdev);
js_devdata = &kbdev->js_data;
if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
@@ -1102,7 +1102,7 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
return;
}
KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false);
WARN(kbdev->irq_reset_flush, "%s: GPU reset already in flight\n", __func__);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
spin_lock(&kbdev->mmu_mask_change);
@@ -1143,7 +1143,8 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
mutex_lock(&kbdev->pm.lock);
/* We hold the pm lock, so there ought to be a current policy */
KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy);
if (unlikely(!kbdev->pm.backend.pm_current_policy))
dev_warn(kbdev->dev, "No power policy set!");
/* All slot have been soft-stopped and we've waited
* SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we
@@ -1240,8 +1241,6 @@ static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer)
struct kbase_device *kbdev = container_of(timer, struct kbase_device,
hwaccess.backend.reset_timer);
KBASE_DEBUG_ASSERT(kbdev);
/* Reset still pending? */
if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) ==
@@ -1262,8 +1261,6 @@ static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev)
int i;
int pending_jobs = 0;
KBASE_DEBUG_ASSERT(kbdev);
/* Count the number of jobs */
for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
pending_jobs += kbase_backend_nr_atoms_submitted(kbdev, i);
@@ -1321,8 +1318,6 @@ bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev,
{
int i;
KBASE_DEBUG_ASSERT(kbdev);
#ifdef CONFIG_MALI_ARBITER_SUPPORT
if (kbase_pm_is_gpu_lost(kbdev)) {
/* GPU access has been removed, reset will be done by
@@ -1376,13 +1371,11 @@ KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu);
*/
void kbase_reset_gpu(struct kbase_device *kbdev)
{
KBASE_DEBUG_ASSERT(kbdev);
/* Note this is an assert/atomic_set because it is a software issue for
* a race to be occurring here
*/
KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
KBASE_RESET_GPU_PREPARED);
if (WARN_ON(atomic_read(&kbdev->hwaccess.backend.reset_gpu) != KBASE_RESET_GPU_PREPARED))
return;
atomic_set(&kbdev->hwaccess.backend.reset_gpu,
KBASE_RESET_GPU_COMMITTED);
@@ -1401,13 +1394,11 @@ KBASE_EXPORT_TEST_API(kbase_reset_gpu);
void kbase_reset_gpu_locked(struct kbase_device *kbdev)
{
KBASE_DEBUG_ASSERT(kbdev);
/* Note this is an assert/atomic_set because it is a software issue for
* a race to be occurring here
*/
KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
KBASE_RESET_GPU_PREPARED);
if (WARN_ON(atomic_read(&kbdev->hwaccess.backend.reset_gpu) != KBASE_RESET_GPU_PREPARED))
return;
atomic_set(&kbdev->hwaccess.backend.reset_gpu,
KBASE_RESET_GPU_COMMITTED);

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2011-2016, 2018-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2011-2016, 2018-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -76,7 +76,6 @@ static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
}
#endif
/**
* kbase_job_hw_submit() - Submit a job to the GPU
* @kbdev: Device pointer
@@ -88,10 +87,10 @@ static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
*
* The following locking conditions are made on the caller:
* - it must hold the hwaccess_lock
*
* Return: 0 if the job was successfully submitted to hardware, an error otherwise.
*/
void kbase_job_hw_submit(struct kbase_device *kbdev,
struct kbase_jd_atom *katom,
int js);
int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js);
#if !MALI_USE_CSF
/**

View File

@@ -347,16 +347,35 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev,
katom->protected_state.exit !=
KBASE_ATOM_EXIT_PROTECTED_CHECK)
kbdev->protected_mode_transition = false;
/* If the atom is at KBASE_ATOM_ENTER_PROTECTED_HWCNT state, it means
* one of two events prevented it from progressing to the next state and
* ultimately reach protected mode:
* - hwcnts were enabled, and the atom had to schedule a worker to
* disable them.
* - the hwcnts were already disabled, but some other error occurred.
* In the first case, if the worker has not yet completed
* (kbdev->protected_mode_hwcnt_disabled == false), we need to re-enable
* them and signal to the worker they have already been enabled
*/
if (kbase_jd_katom_is_protected(katom) &&
(katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_HWCNT)) {
kbdev->protected_mode_hwcnt_desired = true;
if (kbdev->protected_mode_hwcnt_disabled) {
kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
kbdev->protected_mode_hwcnt_disabled = false;
}
}
/* If the atom has suspended hwcnt but has not yet entered
* protected mode, then resume hwcnt now. If the GPU is now in
* protected mode then hwcnt will be resumed by GPU reset so
* don't resume it here.
*/
if (kbase_jd_katom_is_protected(katom) &&
((katom->protected_state.enter ==
KBASE_ATOM_ENTER_PROTECTED_IDLE_L2) ||
(katom->protected_state.enter ==
KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY))) {
((katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_IDLE_L2) ||
(katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY) ||
(katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_FINISHED))) {
WARN_ON(!kbdev->protected_mode_hwcnt_disabled);
kbdev->protected_mode_hwcnt_desired = true;
if (kbdev->protected_mode_hwcnt_disabled) {
@@ -507,17 +526,14 @@ static int kbase_jm_protected_entry(struct kbase_device *kbdev,
KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev, kbdev);
if (err) {
/*
* Failed to switch into protected mode, resume
* GPU hwcnt and fail atom.
* Failed to switch into protected mode.
*
* At this point we expect:
* katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION &&
* katom->protected_state.enter = KBASE_ATOM_ENTER_PROTECTED_FINISHED
* ==>
* kbdev->protected_mode_hwcnt_disabled = false
*/
WARN_ON(!kbdev->protected_mode_hwcnt_disabled);
kbdev->protected_mode_hwcnt_desired = true;
if (kbdev->protected_mode_hwcnt_disabled) {
kbase_hwcnt_context_enable(
kbdev->hwcnt_gpu_ctx);
kbdev->protected_mode_hwcnt_disabled = false;
}
katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
/*
@@ -537,12 +553,9 @@ static int kbase_jm_protected_entry(struct kbase_device *kbdev,
/*
* Protected mode sanity checks.
*/
KBASE_DEBUG_ASSERT_MSG(
kbase_jd_katom_is_protected(katom[idx]) ==
kbase_gpu_in_protected_mode(kbdev),
"Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
kbase_jd_katom_is_protected(katom[idx]),
kbase_gpu_in_protected_mode(kbdev));
WARN(kbase_jd_katom_is_protected(katom[idx]) != kbase_gpu_in_protected_mode(kbdev),
"Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
kbase_jd_katom_is_protected(katom[idx]), kbase_gpu_in_protected_mode(kbdev));
katom[idx]->gpu_rb_state =
KBASE_ATOM_GPU_RB_READY;
@@ -952,18 +965,6 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
cores_ready = kbase_pm_cores_requested(kbdev,
true);
if (katom[idx]->event_code ==
BASE_JD_EVENT_PM_EVENT) {
KBASE_KTRACE_ADD_JM_SLOT_INFO(
kbdev, JM_MARK_FOR_RETURN_TO_JS,
katom[idx]->kctx, katom[idx],
katom[idx]->jc, js,
katom[idx]->event_code);
katom[idx]->gpu_rb_state =
KBASE_ATOM_GPU_RB_RETURN_TO_JS;
break;
}
if (!cores_ready)
break;
@@ -1012,9 +1013,10 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
kbase_pm_request_gpu_cycle_counter_l2_is_on(
kbdev);
kbase_job_hw_submit(kbdev, katom[idx], js);
katom[idx]->gpu_rb_state =
KBASE_ATOM_GPU_RB_SUBMITTED;
if (!kbase_job_hw_submit(kbdev, katom[idx], js))
katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_SUBMITTED;
else
break;
/* ***TRANSITION TO HIGHER STATE*** */
fallthrough;
@@ -1407,14 +1409,14 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
if (katom->protected_state.exit ==
KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT) {
/* protected mode sanity checks */
KBASE_DEBUG_ASSERT_MSG(
kbase_jd_katom_is_protected(katom) == kbase_gpu_in_protected_mode(kbdev),
"Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
kbase_jd_katom_is_protected(katom), kbase_gpu_in_protected_mode(kbdev));
KBASE_DEBUG_ASSERT_MSG(
(kbase_jd_katom_is_protected(katom) && js == 0) ||
!kbase_jd_katom_is_protected(katom),
"Protected atom on JS%d not supported", js);
WARN(kbase_jd_katom_is_protected(katom) !=
kbase_gpu_in_protected_mode(kbdev),
"Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
kbase_jd_katom_is_protected(katom),
kbase_gpu_in_protected_mode(kbdev));
WARN(!(kbase_jd_katom_is_protected(katom) && js == 0) &&
kbase_jd_katom_is_protected(katom),
"Protected atom on JS%d not supported", js);
}
if ((katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) &&
!kbase_ctx_flag(katom->kctx, KCTX_DYING))
@@ -1805,11 +1807,9 @@ void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
base_jd_core_req core_req)
{
if (!kbdev->pm.active_count) {
mutex_lock(&kbdev->js_data.runpool_mutex);
mutex_lock(&kbdev->pm.lock);
kbase_pm_lock(kbdev);
kbase_pm_update_active(kbdev);
mutex_unlock(&kbdev->pm.lock);
mutex_unlock(&kbdev->js_data.runpool_mutex);
kbase_pm_unlock(kbdev);
}
}

View File

@@ -1066,7 +1066,7 @@ static void midgard_model_get_outputs(void *h)
hw_error_status.gpu_error_irq ||
#if !MALI_USE_CSF
dummy->prfcnt_sample_completed ||
#endif /* !MALI_USE_CSF */
#endif
(dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled))
gpu_device_raise_irq(dummy, GPU_DUMMY_GPU_IRQ);
@@ -1247,7 +1247,7 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
if (value & (1 << 17))
dummy->clean_caches_completed = false;
#if !MALI_USE_CSF
#if !MALI_USE_CSF
if (value & PRFCNT_SAMPLE_COMPLETED)
dummy->prfcnt_sample_completed = 0;
#endif /* !MALI_USE_CSF */
@@ -1274,7 +1274,7 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
pr_debug("clean caches requested");
dummy->clean_caches_completed = true;
break;
#if !MALI_USE_CSF
#if !MALI_USE_CSF
case GPU_COMMAND_PRFCNT_SAMPLE:
midgard_model_dump_prfcnt();
dummy->prfcnt_sample_completed = 1;
@@ -1545,7 +1545,8 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
#endif /* !MALI_USE_CSF */
else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) {
*value = (dummy->reset_completed_mask << 8) |
(dummy->power_changed_mask << 9) | (1 << 7) | 1;
((dummy->clean_caches_completed_irq_enabled ? 1u : 0u) << 17) |
(dummy->power_changed_mask << 9) | (1 << 7) | 1;
pr_debug("GPU_IRQ_MASK read %x", *value);
} else if (addr == GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) {
*value = (dummy->power_changed << 9) | (dummy->power_changed << 10) |

View File

@@ -422,8 +422,7 @@ static void kbase_pm_l2_clock_slow(struct kbase_device *kbdev)
return;
/* Stop the metrics gathering framework */
if (kbase_pm_metrics_is_active(kbdev))
kbase_pm_metrics_stop(kbdev);
kbase_pm_metrics_stop(kbdev);
/* Keep the current freq to restore it upon resume */
kbdev->previous_frequency = clk_get_rate(clk);

View File

@@ -144,7 +144,7 @@ struct kbasep_pm_metrics {
* @initialized: tracks whether metrics_state has been initialized or not.
* @timer: timer to regularly make DVFS decisions based on the power
* management metrics.
* @timer_active: boolean indicating @timer is running
* @timer_state: atomic indicating current @timer state, on, off, or stopped.
* @dvfs_last: values of the PM metrics from the last DVFS tick
* @dvfs_diff: different between the current and previous PM metrics.
*/
@@ -168,7 +168,7 @@ struct kbasep_pm_metrics_state {
#ifdef CONFIG_MALI_BIFROST_DVFS
bool initialized;
struct hrtimer timer;
bool timer_active;
atomic_t timer_state;
struct kbasep_pm_metrics dvfs_last;
struct kbasep_pm_metrics dvfs_diff;
#endif

View File

@@ -939,7 +939,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
case KBASE_MCU_ON_PEND_HALT:
if (kbase_csf_firmware_mcu_halted(kbdev)) {
KBASE_KTRACE_ADD(kbdev, MCU_HALTED, NULL,
KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_MCU_HALTED, NULL,
kbase_csf_ktrace_gpu_cycle_cnt(kbdev));
if (kbdev->csf.firmware_hctl_core_pwr)
backend->mcu_state =
@@ -986,7 +986,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
case KBASE_MCU_ON_PEND_SLEEP:
if (kbase_csf_firmware_is_mcu_in_sleep(kbdev)) {
KBASE_KTRACE_ADD(kbdev, MCU_IN_SLEEP, NULL,
KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_MCU_SLEEP, NULL,
kbase_csf_ktrace_gpu_cycle_cnt(kbdev));
backend->mcu_state = KBASE_MCU_IN_SLEEP;
kbase_pm_enable_db_mirror_interrupt(kbdev);
@@ -1108,13 +1108,24 @@ static bool can_power_down_l2(struct kbase_device *kbdev)
#endif
}
static bool need_tiler_control(struct kbase_device *kbdev)
{
#if MALI_USE_CSF
if (kbase_pm_no_mcu_core_pwroff(kbdev))
return true;
else
return false;
#else
return true;
#endif
}
static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
{
struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
u64 l2_present = kbdev->gpu_props.curr_config.l2_present;
#if !MALI_USE_CSF
u64 tiler_present = kbdev->gpu_props.props.raw_props.tiler_present;
#endif
bool l2_power_up_done;
enum kbase_l2_core_state prev_state;
lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -1125,24 +1136,18 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
KBASE_PM_CORE_L2);
u64 l2_ready = kbase_pm_get_ready_cores(kbdev,
KBASE_PM_CORE_L2);
#if !MALI_USE_CSF
u64 tiler_trans = kbase_pm_get_trans_cores(kbdev,
KBASE_PM_CORE_TILER);
u64 tiler_ready = kbase_pm_get_ready_cores(kbdev,
KBASE_PM_CORE_TILER);
#endif
#ifdef CONFIG_MALI_ARBITER_SUPPORT
u64 tiler_trans = kbase_pm_get_trans_cores(
kbdev, KBASE_PM_CORE_TILER);
u64 tiler_ready = kbase_pm_get_ready_cores(
kbdev, KBASE_PM_CORE_TILER);
/*
* kbase_pm_get_ready_cores and kbase_pm_get_trans_cores
* are vulnerable to corruption if gpu is lost
*/
if (kbase_is_gpu_removed(kbdev)
#ifdef CONFIG_MALI_ARBITER_SUPPORT
|| kbase_pm_is_gpu_lost(kbdev)) {
#else
) {
#endif
backend->shaders_state =
KBASE_SHADERS_OFF_CORESTACK_OFF;
backend->hwcnt_desired = false;
@@ -1165,14 +1170,13 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
}
break;
}
#endif /* CONFIG_MALI_ARBITER_SUPPORT */
/* mask off ready from trans in case transitions finished
* between the register reads
*/
l2_trans &= ~l2_ready;
#if !MALI_USE_CSF
tiler_trans &= ~tiler_ready;
#endif
prev_state = backend->l2_state;
switch (backend->l2_state) {
@@ -1184,13 +1188,21 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
*/
kbase_pm_l2_config_override(kbdev);
kbase_pbha_write_settings(kbdev);
#if !MALI_USE_CSF
/* L2 is required, power on. Powering on the
* tiler will also power the first L2 cache.
*/
kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER,
tiler_present, ACTION_PWRON);
/* If Host is controlling the power for shader
* cores, then it also needs to control the
* power for Tiler.
* Powering on the tiler will also power the
* L2 cache.
*/
if (need_tiler_control(kbdev)) {
kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, tiler_present,
ACTION_PWRON);
} else {
kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, l2_present,
ACTION_PWRON);
}
#if !MALI_USE_CSF
/* If we have more than one L2 cache then we
* must power them on explicitly.
*/
@@ -1200,30 +1212,36 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
ACTION_PWRON);
/* Clear backend slot submission kctx */
kbase_pm_l2_clear_backend_slot_submit_kctx(kbdev);
#else
/* With CSF firmware, Host driver doesn't need to
* handle power management with both shader and tiler cores.
* The CSF firmware will power up the cores appropriately.
* So only power the l2 cache explicitly.
*/
kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2,
l2_present, ACTION_PWRON);
#endif
backend->l2_state = KBASE_L2_PEND_ON;
}
break;
case KBASE_L2_PEND_ON:
#if !MALI_USE_CSF
if (!l2_trans && l2_ready == l2_present && !tiler_trans
&& tiler_ready == tiler_present) {
KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL,
tiler_ready);
#else
l2_power_up_done = false;
if (!l2_trans && l2_ready == l2_present) {
KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL,
l2_ready);
if (need_tiler_control(kbdev)) {
#ifndef CONFIG_MALI_ARBITER_SUPPORT
u64 tiler_trans = kbase_pm_get_trans_cores(
kbdev, KBASE_PM_CORE_TILER);
u64 tiler_ready = kbase_pm_get_ready_cores(
kbdev, KBASE_PM_CORE_TILER);
#endif
tiler_trans &= ~tiler_ready;
if (!tiler_trans && tiler_ready == tiler_present) {
KBASE_KTRACE_ADD(kbdev,
PM_CORES_CHANGE_AVAILABLE_TILER,
NULL, tiler_ready);
l2_power_up_done = true;
}
} else {
KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL,
l2_ready);
l2_power_up_done = true;
}
}
if (l2_power_up_done) {
/*
* Ensure snoops are enabled after L2 is powered
* up. Note that kbase keeps track of the snoop
@@ -2248,12 +2266,14 @@ int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev)
/* Wait for cores */
#if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE
remaining = wait_event_killable_timeout(
remaining = wait_event_killable_timeout(kbdev->pm.backend.gpu_in_desired_state_wait,
kbase_pm_is_in_desired_state_with_l2_powered(kbdev),
timeout);
#else
remaining = wait_event_timeout(
#endif
kbdev->pm.backend.gpu_in_desired_state_wait,
kbase_pm_is_in_desired_state_with_l2_powered(kbdev), timeout);
#endif
if (!remaining) {
kbase_pm_timed_out(kbdev);

View File

@@ -49,27 +49,51 @@
#define GPU_ACTIVE_SCALING_FACTOR ((u64)1E9)
#endif
/*
* Possible state transitions
* ON -> ON | OFF | STOPPED
* STOPPED -> ON | OFF
* OFF -> ON
*
*
* ef
* v v
* ON a> STOPPED b> OFF
* ^^
* c
*
* d
*
* Transition effects:
* a. None
* b. Timer expires without restart
* c. Timer is not stopped, timer period is unaffected
* d. Timer must be restarted
* e. Callback is executed and the timer is restarted
* f. Timer is cancelled, or the callback is waited on if currently executing. This is called during
* tear-down and should not be subject to a race from an OFF->ON transition
*/
enum dvfs_metric_timer_state { TIMER_OFF, TIMER_STOPPED, TIMER_ON };
#ifdef CONFIG_MALI_BIFROST_DVFS
static enum hrtimer_restart dvfs_callback(struct hrtimer *timer)
{
unsigned long flags;
struct kbasep_pm_metrics_state *metrics;
KBASE_DEBUG_ASSERT(timer != NULL);
if (WARN_ON(!timer))
return HRTIMER_NORESTART;
metrics = container_of(timer, struct kbasep_pm_metrics_state, timer);
/* Transition (b) to fully off if timer was stopped, don't restart the timer in this case */
if (atomic_cmpxchg(&metrics->timer_state, TIMER_STOPPED, TIMER_OFF) != TIMER_ON)
return HRTIMER_NORESTART;
kbase_pm_get_dvfs_action(metrics->kbdev);
spin_lock_irqsave(&metrics->lock, flags);
if (metrics->timer_active)
hrtimer_start(timer,
HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period),
HRTIMER_MODE_REL);
spin_unlock_irqrestore(&metrics->lock, flags);
return HRTIMER_NORESTART;
/* Set the new expiration time and restart (transition e) */
hrtimer_forward_now(timer, HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period));
return HRTIMER_RESTART;
}
#endif /* CONFIG_MALI_BIFROST_DVFS */
@@ -135,6 +159,7 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev)
HRTIMER_MODE_REL);
kbdev->pm.backend.metrics.timer.function = dvfs_callback;
kbdev->pm.backend.metrics.initialized = true;
atomic_set(&kbdev->pm.backend.metrics.timer_state, TIMER_OFF);
kbase_pm_metrics_start(kbdev);
#endif /* CONFIG_MALI_BIFROST_DVFS */
@@ -153,16 +178,12 @@ KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init);
void kbasep_pm_metrics_term(struct kbase_device *kbdev)
{
#ifdef CONFIG_MALI_BIFROST_DVFS
unsigned long flags;
KBASE_DEBUG_ASSERT(kbdev != NULL);
spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
kbdev->pm.backend.metrics.timer_active = false;
spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
/* Cancel the timer, and block if the callback is currently executing (transition f) */
kbdev->pm.backend.metrics.initialized = false;
atomic_set(&kbdev->pm.backend.metrics.timer_state, TIMER_OFF);
hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
#endif /* CONFIG_MALI_BIFROST_DVFS */
#if MALI_USE_CSF
@@ -399,57 +420,33 @@ void kbase_pm_get_dvfs_action(struct kbase_device *kbdev)
bool kbase_pm_metrics_is_active(struct kbase_device *kbdev)
{
bool isactive;
unsigned long flags;
KBASE_DEBUG_ASSERT(kbdev != NULL);
spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
isactive = kbdev->pm.backend.metrics.timer_active;
spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
return isactive;
return atomic_read(&kbdev->pm.backend.metrics.timer_state) == TIMER_ON;
}
KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active);
void kbase_pm_metrics_start(struct kbase_device *kbdev)
{
unsigned long flags;
bool update = true;
struct kbasep_pm_metrics_state *metrics = &kbdev->pm.backend.metrics;
if (unlikely(!kbdev->pm.backend.metrics.initialized))
if (unlikely(!metrics->initialized))
return;
spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
if (!kbdev->pm.backend.metrics.timer_active)
kbdev->pm.backend.metrics.timer_active = true;
else
update = false;
spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
if (update)
hrtimer_start(&kbdev->pm.backend.metrics.timer,
HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period),
HRTIMER_MODE_REL);
/* Transition to ON, from a stopped state (transition c) */
if (atomic_xchg(&metrics->timer_state, TIMER_ON) == TIMER_OFF)
/* Start the timer only if it's been fully stopped (transition d)*/
hrtimer_start(&metrics->timer, HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period),
HRTIMER_MODE_REL);
}
void kbase_pm_metrics_stop(struct kbase_device *kbdev)
{
unsigned long flags;
bool update = true;
if (unlikely(!kbdev->pm.backend.metrics.initialized))
return;
spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
if (kbdev->pm.backend.metrics.timer_active)
kbdev->pm.backend.metrics.timer_active = false;
else
update = false;
spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
if (update)
hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
/* Timer is Stopped if its currently on (transition a) */
atomic_cmpxchg(&kbdev->pm.backend.metrics.timer_state, TIMER_ON, TIMER_STOPPED);
}

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -292,6 +292,8 @@ void kbase_pm_set_policy(struct kbase_device *kbdev,
unsigned int new_policy_csf_pm_sched_flags;
bool sched_suspend;
bool reset_gpu = false;
bool reset_op_prevented = true;
struct kbase_csf_scheduler *scheduler = NULL;
#endif
KBASE_DEBUG_ASSERT(kbdev != NULL);
@@ -300,9 +302,23 @@ void kbase_pm_set_policy(struct kbase_device *kbdev,
KBASE_KTRACE_ADD(kbdev, PM_SET_POLICY, NULL, new_policy->id);
#if MALI_USE_CSF
scheduler = &kbdev->csf.scheduler;
KBASE_DEBUG_ASSERT(scheduler != NULL);
/* Serialize calls on kbase_pm_set_policy() */
mutex_lock(&kbdev->pm.backend.policy_change_lock);
if (kbase_reset_gpu_prevent_and_wait(kbdev)) {
dev_warn(kbdev->dev, "Set PM policy failing to prevent gpu reset");
reset_op_prevented = false;
}
/* In case of CSF, the scheduler may be invoked to suspend. In that
* case, there is a risk that the L2 may be turned on by the time we
* check it here. So we hold the scheduler lock to avoid other operations
* interfering with the policy change and vice versa.
*/
mutex_lock(&scheduler->lock);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
/* policy_change_clamp_state_to_off, when needed, is set/cleared in
* this function, a very limited temporal scope for covering the
@@ -315,24 +331,22 @@ void kbase_pm_set_policy(struct kbase_device *kbdev,
* the always_on policy, reflected by the CSF_DYNAMIC_PM_CORE_KEEP_ON
* flag bit.
*/
sched_suspend = kbdev->csf.firmware_inited &&
sched_suspend = reset_op_prevented &&
(CSF_DYNAMIC_PM_CORE_KEEP_ON &
(new_policy_csf_pm_sched_flags |
kbdev->pm.backend.csf_pm_sched_flags));
(new_policy_csf_pm_sched_flags | kbdev->pm.backend.csf_pm_sched_flags));
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
if (sched_suspend)
kbase_csf_scheduler_pm_suspend(kbdev);
if (sched_suspend) {
/* Update the suspend flag to reflect actually suspend being done ! */
sched_suspend = !kbase_csf_scheduler_pm_suspend_no_lock(kbdev);
/* Set the reset recovery flag if the required suspend failed */
reset_gpu = !sched_suspend;
}
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
/* If the current active policy is always_on, one needs to clamp the
* MCU/L2 for reaching off-state
*/
if (sched_suspend)
kbdev->pm.backend.policy_change_clamp_state_to_off =
CSF_DYNAMIC_PM_CORE_KEEP_ON & kbdev->pm.backend.csf_pm_sched_flags;
kbdev->pm.backend.policy_change_clamp_state_to_off = sched_suspend;
kbase_pm_update_state(kbdev);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
@@ -391,13 +405,19 @@ void kbase_pm_set_policy(struct kbase_device *kbdev,
#if MALI_USE_CSF
/* Reverse the suspension done */
if (sched_suspend)
kbase_csf_scheduler_pm_resume_no_lock(kbdev);
mutex_unlock(&scheduler->lock);
if (reset_op_prevented)
kbase_reset_gpu_allow(kbdev);
if (reset_gpu) {
dev_warn(kbdev->dev, "Resorting to GPU reset for policy change\n");
if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(kbdev);
kbase_reset_gpu_wait(kbdev);
} else if (sched_suspend)
kbase_csf_scheduler_pm_resume(kbdev);
}
mutex_unlock(&kbdev->pm.backend.policy_change_lock);
#endif

View File

@@ -116,8 +116,6 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev,
*/
u64 timeout, nr_cycles = 0;
/* Default value to mean 'no cap' */
u64 timeout_cap = U64_MAX;
u64 freq_khz;
/* Only for debug messages, safe default in case it's mis-maintained */
@@ -144,16 +142,15 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev,
fallthrough;
case CSF_FIRMWARE_TIMEOUT:
selector_str = "CSF_FIRMWARE_TIMEOUT";
nr_cycles = CSF_FIRMWARE_TIMEOUT_CYCLES;
/* Setup a cap on CSF FW timeout to FIRMWARE_PING_INTERVAL_MS,
* if calculated timeout exceeds it. This should be adapted to
* a direct timeout comparison once the
* FIRMWARE_PING_INTERVAL_MS option is added to this timeout
* function. A compile-time check such as BUILD_BUG_ON can also
* be done once the firmware ping interval in cycles becomes
* available as a macro.
/* Any FW timeout cannot be longer than the FW ping interval, after which
* the firmware_aliveness_monitor will be triggered and may restart
* the GPU if the FW is unresponsive.
*/
timeout_cap = FIRMWARE_PING_INTERVAL_MS;
nr_cycles = min(CSF_FIRMWARE_PING_TIMEOUT_CYCLES, CSF_FIRMWARE_TIMEOUT_CYCLES);
if (nr_cycles == CSF_FIRMWARE_PING_TIMEOUT_CYCLES)
dev_warn(kbdev->dev, "Capping %s to CSF_FIRMWARE_PING_TIMEOUT\n",
selector_str);
break;
case CSF_PM_TIMEOUT:
selector_str = "CSF_PM_TIMEOUT";
@@ -171,6 +168,10 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev,
selector_str = "CSF_FIRMWARE_BOOT_TIMEOUT";
nr_cycles = CSF_FIRMWARE_BOOT_TIMEOUT_CYCLES;
break;
case CSF_FIRMWARE_PING_TIMEOUT:
selector_str = "CSF_FIRMWARE_PING_TIMEOUT";
nr_cycles = CSF_FIRMWARE_PING_TIMEOUT_CYCLES;
break;
case CSF_SCHED_PROTM_PROGRESS_TIMEOUT:
selector_str = "CSF_SCHED_PROTM_PROGRESS_TIMEOUT";
nr_cycles = kbase_csf_timeout_get(kbdev);
@@ -179,11 +180,6 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev,
}
timeout = div_u64(nr_cycles, freq_khz);
if (timeout > timeout_cap) {
dev_dbg(kbdev->dev, "Capped %s %llu to %llu", selector_str,
(unsigned long long)timeout, (unsigned long long)timeout_cap);
timeout = timeout_cap;
}
if (WARN(timeout > UINT_MAX,
"Capping excessive timeout %llums for %s at freq %llukHz to UINT_MAX ms",
(unsigned long long)timeout, selector_str, (unsigned long long)freq_khz))

View File

@@ -139,6 +139,9 @@ bob_defaults {
platform_is_fpga: {
kbuild_options: ["CONFIG_MALI_IS_FPGA=y"],
},
mali_fw_core_dump: {
kbuild_options: ["CONFIG_MALI_FW_CORE_DUMP=y"],
},
kbuild_options: [
"CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}",
"MALI_CUSTOMER_RELEASE={{.release}}",

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -239,7 +239,9 @@ static void kbase_remove_kctx_from_process(struct kbase_context *kctx)
/* Add checks, so that the terminating process Should not
* hold any gpu_memory.
*/
spin_lock(&kctx->kbdev->gpu_mem_usage_lock);
WARN_ON(kprcs->total_gpu_pages);
spin_unlock(&kctx->kbdev->gpu_mem_usage_lock);
WARN_ON(!RB_EMPTY_ROOT(&kprcs->dma_buf_root));
kfree(kprcs);
}

View File

@@ -40,6 +40,7 @@ bifrost_kbase-$(CONFIG_MALI_REAL_HW) += csf/mali_kbase_csf_firmware.o
bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += csf/mali_kbase_csf_firmware_no_mali.o
ifeq ($(KBUILD_EXTMOD),)
# in-tree
-include $(src)/csf/ipa_control/Kbuild

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -34,6 +34,7 @@
#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
#include <mali_kbase_hwaccess_time.h>
#include "mali_kbase_csf_event.h"
#include <linux/protected_memory_allocator.h>
#define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK)
#define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK)
@@ -52,6 +53,23 @@ const u8 kbasep_csf_relative_to_queue_group_priority[KBASE_QUEUE_GROUP_PRIORITY_
BASE_QUEUE_GROUP_PRIORITY_LOW
};
/*
* struct irq_idle_and_protm_track - Object that tracks the idle and protected mode
* request information in an interrupt case across
* groups.
*
* @protm_grp: Possibly schedulable group that requested protected mode in the interrupt.
* If NULL, no such case observed in the tracked interrupt case.
* @idle_seq: The highest priority group that notified idle. If no such instance in the
* interrupt case, marked with the largest field value: U32_MAX.
* @idle_slot: The slot number if @p idle_seq is valid in the given tracking case.
*/
struct irq_idle_and_protm_track {
struct kbase_queue_group *protm_grp;
u32 idle_seq;
s8 idle_slot;
};
static void put_user_pages_mmap_handle(struct kbase_context *kctx,
struct kbase_queue *queue)
{
@@ -112,13 +130,13 @@ static int get_user_pages_mmap_handle(struct kbase_context *kctx,
return 0;
}
static void gpu_munmap_user_io_pages(struct kbase_context *kctx,
struct kbase_va_region *reg)
static void gpu_munmap_user_io_pages(struct kbase_context *kctx, struct kbase_va_region *reg,
struct tagged_addr *phys)
{
size_t num_pages = 2;
kbase_mmu_teardown_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu,
reg->start_pfn, num_pages, MCU_AS_NR);
kbase_mmu_teardown_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu, reg->start_pfn, phys,
num_pages, MCU_AS_NR);
WARN_ON(reg->flags & KBASE_REG_FREE);
@@ -159,12 +177,6 @@ static int gpu_mmap_user_io_pages(struct kbase_device *kbdev,
*/
const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \
((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \
(KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE)))
mem_flags |=
KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
#else
if (kbdev->system_coherency == COHERENCY_NONE) {
mem_flags |=
KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
@@ -172,7 +184,6 @@ static int gpu_mmap_user_io_pages(struct kbase_device *kbdev,
mem_flags |= KBASE_REG_SHARE_BOTH |
KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED);
}
#endif
mutex_lock(&kbdev->csf.reg_lock);
ret = kbase_add_va_region_rbtree(kbdev, reg, 0, num_pages, 1);
@@ -201,8 +212,7 @@ static int gpu_mmap_user_io_pages(struct kbase_device *kbdev,
return 0;
bad_insert_output_page:
kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu,
reg->start_pfn, 1, MCU_AS_NR);
kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn, phys, 1, MCU_AS_NR);
bad_insert:
mutex_lock(&kbdev->csf.reg_lock);
kbase_remove_va_region(kbdev, reg);
@@ -231,6 +241,8 @@ static int kernel_map_user_io_pages(struct kbase_context *kctx,
{
struct page *page_list[2];
pgprot_t cpu_map_prot;
unsigned long flags;
char *user_io_addr;
int ret = 0;
size_t i;
@@ -245,25 +257,19 @@ static int kernel_map_user_io_pages(struct kbase_context *kctx,
/* The pages are mapped to Userspace also, so use the same mapping
* attributes as used inside the CPU page fault handler.
*/
#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \
((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \
(KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE)))
cpu_map_prot = pgprot_device(PAGE_KERNEL);
#else
if (kctx->kbdev->system_coherency == COHERENCY_NONE)
cpu_map_prot = pgprot_writecombine(PAGE_KERNEL);
else
cpu_map_prot = PAGE_KERNEL;
#endif
for (i = 0; i < ARRAY_SIZE(page_list); i++)
page_list[i] = as_page(queue->phys[i]);
queue->user_io_addr = vmap(page_list, ARRAY_SIZE(page_list), VM_MAP, cpu_map_prot);
user_io_addr = vmap(page_list, ARRAY_SIZE(page_list), VM_MAP, cpu_map_prot);
if (!queue->user_io_addr) {
if (!user_io_addr) {
dev_err(kctx->kbdev->dev,
"%s(): queue->user_io_addr is NULL, queue: %p",
"%s(): user_io_addr is NULL, queue: %p",
__func__,
queue);
ret = -ENOMEM;
@@ -271,6 +277,10 @@ static int kernel_map_user_io_pages(struct kbase_context *kctx,
atomic_add(ARRAY_SIZE(page_list), &kctx->permanent_mapped_pages);
}
kbase_csf_scheduler_spin_lock(kctx->kbdev, &flags);
queue->user_io_addr = user_io_addr;
kbase_csf_scheduler_spin_unlock(kctx->kbdev, flags);
unlock:
kbase_gpu_vm_unlock(kctx);
return ret;
@@ -307,7 +317,7 @@ static void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx,
{
const size_t num_pages = 2;
gpu_munmap_user_io_pages(kctx, queue->reg);
gpu_munmap_user_io_pages(kctx, queue->reg, &queue->phys[0]);
kernel_unmap_user_io_pages(kctx, queue);
kbase_mem_pool_free_pages(
@@ -934,7 +944,7 @@ static void unbind_stopped_queue(struct kbase_context *kctx,
kbase_csf_scheduler_spin_lock(kctx->kbdev, &flags);
bitmap_clear(queue->group->protm_pending_bitmap,
queue->csi_index, 1);
KBASE_KTRACE_ADD_CSF_GRP_Q(kctx->kbdev, PROTM_PENDING_CLEAR,
KBASE_KTRACE_ADD_CSF_GRP_Q(kctx->kbdev, CSI_PROTM_PEND_CLEAR,
queue->group, queue, queue->group->protm_pending_bitmap[0]);
queue->group->bound_queues[queue->csi_index] = NULL;
queue->group = NULL;
@@ -982,7 +992,7 @@ static void unbind_queue(struct kbase_context *kctx, struct kbase_queue *queue)
}
}
void kbase_csf_queue_unbind(struct kbase_queue *queue)
void kbase_csf_queue_unbind(struct kbase_queue *queue, bool process_exit)
{
struct kbase_context *kctx = queue->kctx;
@@ -996,7 +1006,7 @@ void kbase_csf_queue_unbind(struct kbase_queue *queue)
* whereas CSG TERM request would result in an immediate abort or
* cancellation of the pending work.
*/
if (current->flags & PF_EXITING) {
if (process_exit) {
struct kbase_queue_group *group = get_bound_queue_group(queue);
if (group)
@@ -1344,10 +1354,12 @@ static int create_queue_group(struct kbase_context *const kctx,
group->tiler_max = create->in.tiler_max;
group->fragment_max = create->in.fragment_max;
group->compute_max = create->in.compute_max;
group->csi_handlers = create->in.csi_handlers;
group->priority = kbase_csf_priority_queue_group_priority_to_relative(
kbase_csf_priority_check(kctx->kbdev, create->in.priority));
group->doorbell_nr = KBASEP_USER_DB_NR_INVALID;
group->faulted = false;
group->reevaluate_idle_status = false;
group->group_uid = generate_group_uid();
@@ -1391,6 +1403,14 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx,
const u32 tiler_count = hweight64(create->in.tiler_mask);
const u32 fragment_count = hweight64(create->in.fragment_mask);
const u32 compute_count = hweight64(create->in.compute_mask);
size_t i;
for (i = 0; i < sizeof(create->in.padding); i++) {
if (create->in.padding[i] != 0) {
dev_warn(kctx->kbdev->dev, "Invalid padding not 0 in queue group create\n");
return -EINVAL;
}
}
mutex_lock(&kctx->csf.lock);
@@ -1409,6 +1429,10 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx,
"No CSG has at least %d CSs",
create->in.cs_min);
err = -EINVAL;
} else if (create->in.csi_handlers & ~BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK) {
dev_warn(kctx->kbdev->dev, "Unknown exception handler flags set: %u",
create->in.csi_handlers & ~BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK);
err = -EINVAL;
} else if (create->in.reserved) {
dev_warn(kctx->kbdev->dev, "Reserved field was set to non-0");
err = -EINVAL;
@@ -1447,9 +1471,8 @@ static void term_normal_suspend_buffer(struct kbase_context *const kctx,
lockdep_assert_held(&kctx->csf.lock);
WARN_ON(kbase_mmu_teardown_pages(
kctx->kbdev, &kctx->kbdev->csf.mcu_mmu,
s_buf->reg->start_pfn, nr_pages, MCU_AS_NR));
WARN_ON(kbase_mmu_teardown_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu,
s_buf->reg->start_pfn, s_buf->phy, nr_pages, MCU_AS_NR));
WARN_ON(s_buf->reg->flags & KBASE_REG_FREE);
@@ -1479,10 +1502,16 @@ static void term_protected_suspend_buffer(struct kbase_device *const kbdev,
{
const size_t nr_pages =
PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
struct tagged_addr *phys = kmalloc(sizeof(*phys) * nr_pages, GFP_KERNEL);
size_t i = 0;
WARN_ON(kbase_mmu_teardown_pages(
kbdev, &kbdev->csf.mcu_mmu,
s_buf->reg->start_pfn, nr_pages, MCU_AS_NR));
for (i = 0; phys && i < nr_pages; i++)
phys[i] = as_tagged(s_buf->pma[i]->pa);
WARN_ON(kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, s_buf->reg->start_pfn, phys,
nr_pages, MCU_AS_NR));
kfree(phys);
WARN_ON(s_buf->reg->flags & KBASE_REG_FREE);
@@ -1929,7 +1958,7 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
* handle_oom_event - Handle the OoM event generated by the firmware for the
* CSI.
*
* @kctx: Pointer to the kbase context in which the tiler heap was initialized.
* @group: Pointer to the CSG group the oom-event belongs to.
* @stream: Pointer to the structure containing info provided by the firmware
* about the CSI.
*
@@ -1944,9 +1973,10 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
* Return: 0 if successfully handled the request, otherwise a negative error
* code on failure.
*/
static int handle_oom_event(struct kbase_context *const kctx,
struct kbase_csf_cmd_stream_info const *const stream)
static int handle_oom_event(struct kbase_queue_group *const group,
struct kbase_csf_cmd_stream_info const *const stream)
{
struct kbase_context *const kctx = group->kctx;
u64 gpu_heap_va =
kbase_csf_firmware_cs_output(stream, CS_HEAP_ADDRESS_LO) |
((u64)kbase_csf_firmware_cs_output(stream, CS_HEAP_ADDRESS_HI) << 32);
@@ -1973,12 +2003,18 @@ static int handle_oom_event(struct kbase_context *const kctx,
err = kbase_csf_tiler_heap_alloc_new_chunk(kctx,
gpu_heap_va, renderpasses_in_flight, pending_frag_count, &new_chunk_ptr);
/* It is okay to acknowledge with a NULL chunk (firmware will then wait
* for the fragment jobs to complete and release chunks)
*/
if (err == -EBUSY)
if ((group->csi_handlers & BASE_CSF_TILER_OOM_EXCEPTION_FLAG) &&
(pending_frag_count == 0) && (err == -ENOMEM || err == -EBUSY)) {
/* The group allows incremental rendering, trigger it */
new_chunk_ptr = 0;
else if (err)
dev_dbg(kctx->kbdev->dev, "Group-%d (slot-%d) enter incremental render\n",
group->handle, group->csg_nr);
} else if (err == -EBUSY) {
/* Acknowledge with a NULL chunk (firmware will then wait for
* the fragment jobs to complete and release chunks)
*/
new_chunk_ptr = 0;
} else if (err)
return err;
kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_START_LO,
@@ -2085,7 +2121,7 @@ static void kbase_queue_oom_event(struct kbase_queue *const queue)
if (cs_oom_ack == cs_oom_req)
goto unlock;
err = handle_oom_event(kctx, stream);
err = handle_oom_event(group, stream);
kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_oom_ack,
CS_REQ_TILER_OOM_MASK);
@@ -2221,7 +2257,7 @@ static void protm_event_worker(struct work_struct *data)
struct kbase_queue_group *const group =
container_of(data, struct kbase_queue_group, protm_event_work);
KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_BEGIN,
KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_START,
group, 0u);
kbase_csf_scheduler_group_protm_enter(group);
KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_END,
@@ -2441,6 +2477,9 @@ static void handle_queue_exception_event(struct kbase_queue *const queue,
* @ginfo: The CSG interface provided by the firmware.
* @irqreq: CSG's IRQ request bitmask (one bit per CS).
* @irqack: CSG's IRQ acknowledge bitmask (one bit per CS).
* @track: Pointer that tracks the highest scanout priority idle CSG
* and any newly potentially viable protected mode requesting
* CSG in current IRQ context.
*
* If the interrupt request bitmask differs from the acknowledge bitmask
* then the firmware is notifying the host of an event concerning those
@@ -2449,8 +2488,9 @@ static void handle_queue_exception_event(struct kbase_queue *const queue,
* the request and acknowledge registers for the individual CS(s).
*/
static void process_cs_interrupts(struct kbase_queue_group *const group,
struct kbase_csf_cmd_stream_group_info const *const ginfo,
u32 const irqreq, u32 const irqack)
struct kbase_csf_cmd_stream_group_info const *const ginfo,
u32 const irqreq, u32 const irqack,
struct irq_idle_and_protm_track *track)
{
struct kbase_device *const kbdev = group->kctx->kbdev;
u32 remaining = irqreq ^ irqack;
@@ -2482,7 +2522,8 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
if ((cs_req & CS_REQ_EXCEPTION_MASK) ^
(cs_ack & CS_ACK_EXCEPTION_MASK)) {
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_FAULT_INTERRUPT, group, queue, cs_req ^ cs_ack);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_FAULT,
group, queue, cs_req ^ cs_ack);
handle_queue_exception_event(queue, cs_req, cs_ack);
}
@@ -2494,16 +2535,18 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
u32 const cs_req_remain = cs_req & ~CS_REQ_EXCEPTION_MASK;
u32 const cs_ack_remain = cs_ack & ~CS_ACK_EXCEPTION_MASK;
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_IGNORED_INTERRUPTS_GROUP_SUSPEND,
group, queue, cs_req_remain ^ cs_ack_remain);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev,
CSI_INTERRUPT_GROUP_SUSPENDS_IGNORED,
group, queue,
cs_req_remain ^ cs_ack_remain);
continue;
}
if (((cs_req & CS_REQ_TILER_OOM_MASK) ^
(cs_ack & CS_ACK_TILER_OOM_MASK))) {
get_queue(queue);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_TILER_OOM_INTERRUPT, group, queue,
cs_req ^ cs_ack);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_TILER_OOM,
group, queue, cs_req ^ cs_ack);
if (WARN_ON(!queue_work(wq, &queue->oom_event_work))) {
/* The work item shall not have been
* already queued, there can be only
@@ -2516,8 +2559,8 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
if ((cs_req & CS_REQ_PROTM_PEND_MASK) ^
(cs_ack & CS_ACK_PROTM_PEND_MASK)) {
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_PEND_INTERRUPT, group, queue,
cs_req ^ cs_ack);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_PROTM_PEND,
group, queue, cs_req ^ cs_ack);
dev_dbg(kbdev->dev,
"Protected mode entry request for queue on csi %d bound to group-%d on slot %d",
@@ -2525,7 +2568,7 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
group->csg_nr);
bitmap_set(group->protm_pending_bitmap, i, 1);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, PROTM_PENDING_SET, group, queue,
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_PEND_SET, group, queue,
group->protm_pending_bitmap[0]);
protm_pend = true;
}
@@ -2534,12 +2577,10 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
if (protm_pend) {
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
u32 current_protm_pending_seq =
scheduler->tick_protm_pending_seq;
if (current_protm_pending_seq > group->scan_seq_num) {
if (scheduler->tick_protm_pending_seq > group->scan_seq_num) {
scheduler->tick_protm_pending_seq = group->scan_seq_num;
queue_work(group->kctx->csf.wq, &group->protm_event_work);
track->protm_grp = group;
}
if (test_bit(group->csg_nr, scheduler->csg_slots_idle_mask)) {
@@ -2557,6 +2598,8 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
* @csg_nr: CSG number.
* @track: Pointer that tracks the highest idle CSG and the newly possible viable
* protected mode requesting group, in current IRQ context.
*
* Handles interrupts for a CSG and for CSs within it.
*
@@ -2567,8 +2610,8 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
*
* See process_cs_interrupts() for details of per-stream interrupt handling.
*/
static void process_csg_interrupts(struct kbase_device *const kbdev,
int const csg_nr)
static void process_csg_interrupts(struct kbase_device *const kbdev, int const csg_nr,
struct irq_idle_and_protm_track *track)
{
struct kbase_csf_cmd_stream_group_info *ginfo;
struct kbase_queue_group *group = NULL;
@@ -2579,7 +2622,7 @@ static void process_csg_interrupts(struct kbase_device *const kbdev,
if (WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num))
return;
KBASE_KTRACE_ADD(kbdev, CSG_INTERRUPT_PROCESS, NULL, csg_nr);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_START, group, csg_nr);
ginfo = &kbdev->csf.global_iface.groups[csg_nr];
req = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ);
@@ -2619,7 +2662,7 @@ static void process_csg_interrupts(struct kbase_device *const kbdev,
kbase_csf_firmware_csg_input_mask(ginfo,
CSG_REQ, ack, CSG_REQ_SYNC_UPDATE_MASK);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SYNC_UPDATE_INTERRUPT, group, req ^ ack);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_SYNC_UPDATE, group, req ^ ack);
/* SYNC_UPDATE events shall invalidate GPU idle event */
atomic_set(&kbdev->csf.scheduler.gpu_no_longer_idle, true);
@@ -2636,7 +2679,7 @@ static void process_csg_interrupts(struct kbase_device *const kbdev,
set_bit(csg_nr, scheduler->csg_slots_idle_mask);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET, group,
scheduler->csg_slots_idle_mask[0]);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_IDLE_INTERRUPT, group, req ^ ack);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_IDLE, group, req ^ ack);
dev_dbg(kbdev->dev, "Idle notification received for Group %u on slot %d\n",
group->handle, csg_nr);
@@ -2645,20 +2688,11 @@ static void process_csg_interrupts(struct kbase_device *const kbdev,
* a tock for a replacement.
*/
mod_delayed_work(scheduler->wq, &scheduler->tock_work, 0);
} else {
u32 current_protm_pending_seq =
scheduler->tick_protm_pending_seq;
}
if ((current_protm_pending_seq !=
KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID) &&
(group->scan_seq_num < current_protm_pending_seq)) {
/* If the protm enter was prevented due to groups
* priority, then fire a tock for the scheduler
* to re-examine the case.
*/
mod_delayed_work(scheduler->wq,
&scheduler->tock_work, 0);
}
if (group->scan_seq_num < track->idle_seq) {
track->idle_seq = group->scan_seq_num;
track->idle_slot = csg_nr;
}
}
@@ -2666,7 +2700,7 @@ static void process_csg_interrupts(struct kbase_device *const kbdev,
kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack,
CSG_REQ_PROGRESS_TIMER_EVENT_MASK);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_PROGRESS_TIMER_INTERRUPT,
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROGRESS_TIMER_EVENT,
group, req ^ ack);
dev_info(kbdev->dev,
"[%llu] Iterator PROGRESS_TIMER timeout notification received for group %u of ctx %d_%d on slot %d\n",
@@ -2676,7 +2710,7 @@ static void process_csg_interrupts(struct kbase_device *const kbdev,
handle_progress_timer_event(group);
}
process_cs_interrupts(group, ginfo, irqreq, irqack);
process_cs_interrupts(group, ginfo, irqreq, irqack, track);
out:
/* group may still be NULL here */
@@ -2827,7 +2861,7 @@ static inline void process_protm_exit(struct kbase_device *kbdev, u32 glb_ack)
GLB_REQ_PROTM_EXIT_MASK);
if (likely(scheduler->active_protm_grp)) {
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_EXIT_PROTM,
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_EXIT,
scheduler->active_protm_grp, 0u);
scheduler->active_protm_grp = NULL;
} else {
@@ -2841,24 +2875,83 @@ static inline void process_protm_exit(struct kbase_device *kbdev, u32 glb_ack)
}
}
static inline void process_tracked_info_for_protm(struct kbase_device *kbdev,
struct irq_idle_and_protm_track *track)
{
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
struct kbase_queue_group *group = track->protm_grp;
u32 current_protm_pending_seq = scheduler->tick_protm_pending_seq;
kbase_csf_scheduler_spin_lock_assert_held(kbdev);
if (likely(current_protm_pending_seq == KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID))
return;
/* Handle protm from the tracked information */
if (track->idle_seq < current_protm_pending_seq) {
/* If the protm enter was prevented due to groups priority, then fire a tock
* for the scheduler to re-examine the case.
*/
dev_dbg(kbdev->dev, "Attempt pending protm from idle slot %d\n", track->idle_slot);
mod_delayed_work(scheduler->wq, &scheduler->tock_work, 0);
} else if (group) {
u32 i, num_groups = kbdev->csf.global_iface.group_num;
struct kbase_queue_group *grp;
bool tock_triggered = false;
/* A new protm request, and track->idle_seq is not sufficient, check across
* previously notified idle CSGs in the current tick/tock cycle.
*/
for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) {
if (i == track->idle_slot)
continue;
grp = kbase_csf_scheduler_get_group_on_slot(kbdev, i);
/* If not NULL then the group pointer cannot disappear as the
* scheduler spinlock is held.
*/
if (grp == NULL)
continue;
if (grp->scan_seq_num < current_protm_pending_seq) {
tock_triggered = true;
dev_dbg(kbdev->dev,
"Attempt new protm from tick/tock idle slot %d\n", i);
mod_delayed_work(scheduler->wq, &scheduler->tock_work, 0);
break;
}
}
if (!tock_triggered) {
dev_dbg(kbdev->dev, "Group-%d on slot-%d start protm work\n",
group->handle, group->csg_nr);
queue_work(group->kctx->csf.wq, &group->protm_event_work);
}
}
}
void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
{
unsigned long flags;
u32 csg_interrupts = val & ~JOB_IRQ_GLOBAL_IF;
struct irq_idle_and_protm_track track = { .protm_grp = NULL, .idle_seq = U32_MAX };
lockdep_assert_held(&kbdev->hwaccess_lock);
KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT, NULL, val);
KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_START, NULL, val);
kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val);
if (csg_interrupts != 0) {
kbase_csf_scheduler_spin_lock(kbdev, &flags);
/* Looping through and track the highest idle and protm groups */
while (csg_interrupts != 0) {
int const csg_nr = ffs(csg_interrupts) - 1;
process_csg_interrupts(kbdev, csg_nr);
process_csg_interrupts(kbdev, csg_nr, &track);
csg_interrupts &= ~(1 << csg_nr);
}
/* Handle protm from the tracked information */
process_tracked_info_for_protm(kbdev, &track);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
}
@@ -2878,7 +2971,7 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
global_iface, GLB_REQ);
glb_ack = kbase_csf_firmware_global_output(
global_iface, GLB_ACK);
KBASE_KTRACE_ADD(kbdev, GLB_REQ_ACQ, NULL, glb_req ^ glb_ack);
KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_GLB_REQ_ACK, NULL, glb_req ^ glb_ack);
check_protm_enter_req_complete(kbdev, glb_req, glb_ack);

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -45,8 +45,6 @@
*/
#define KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID (U32_MAX)
#define FIRMWARE_PING_INTERVAL_MS (12000) /* 12 seconds */
#define FIRMWARE_IDLE_HYSTERESIS_TIME_MS (10) /* Default 10 milliseconds */
/* Idle hysteresis time can be scaled down when GPU sleep feature is used */
@@ -161,8 +159,9 @@ int kbase_csf_queue_bind(struct kbase_context *kctx,
* are any.
*
* @queue: Pointer to queue to be unbound.
* @process_exit: Flag to indicate if process exit is happening.
*/
void kbase_csf_queue_unbind(struct kbase_queue *queue);
void kbase_csf_queue_unbind(struct kbase_queue *queue, bool process_exit);
/**
* kbase_csf_queue_unbind_stopped - Unbind a GPU command queue in the case

View File

@@ -500,11 +500,7 @@ static const struct file_operations kbasep_csf_queue_group_debugfs_fops = {
void kbase_csf_queue_group_debugfs_init(struct kbase_context *kctx)
{
struct dentry *file;
#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE)
const mode_t mode = 0444;
#else
const mode_t mode = 0400;
#endif
if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry)))
return;

View File

@@ -261,6 +261,8 @@ enum kbase_queue_group_priority {
* @CSF_GPU_RESET_TIMEOUT: Waiting timeout for GPU reset to complete.
* @CSF_CSG_SUSPEND_TIMEOUT: Timeout given for all active CSGs to be suspended.
* @CSF_FIRMWARE_BOOT_TIMEOUT: Maximum time to wait for firmware to boot.
* @CSF_FIRMWARE_PING_TIMEOUT: Maximum time to wait for firmware to respond
* to a ping from KBase.
* @CSF_SCHED_PROTM_PROGRESS_TIMEOUT: Timeout used to prevent protected mode execution hang.
* @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in
* the enum.
@@ -271,6 +273,7 @@ enum kbase_timeout_selector {
CSF_GPU_RESET_TIMEOUT,
CSF_CSG_SUSPEND_TIMEOUT,
CSF_FIRMWARE_BOOT_TIMEOUT,
CSF_FIRMWARE_PING_TIMEOUT,
CSF_SCHED_PROTM_PROGRESS_TIMEOUT,
/* Must be the last in the enum */
@@ -452,6 +455,7 @@ struct kbase_protected_suspend_buffer {
* allowed to use.
* @compute_max: Maximum number of compute endpoints the group is
* allowed to use.
* @csi_handlers: Requested CSI exception handler flags for the group.
* @tiler_mask: Mask of tiler endpoints the group is allowed to use.
* @fragment_mask: Mask of fragment endpoints the group is allowed to use.
* @compute_mask: Mask of compute endpoints the group is allowed to use.
@@ -473,6 +477,10 @@ struct kbase_protected_suspend_buffer {
* @faulted: Indicates that a GPU fault occurred for the queue group.
* This flag persists until the fault has been queued to be
* reported to userspace.
* @reevaluate_idle_status : Flag set when work is submitted for the normal group
* or it becomes unblocked during protected mode. The
* flag helps Scheduler confirm if the group actually
* became non idle or not.
* @bound_queues: Array of registered queues bound to this queue group.
* @doorbell_nr: Index of the hardware doorbell page assigned to the
* group.
@@ -500,6 +508,7 @@ struct kbase_queue_group {
u8 tiler_max;
u8 fragment_max;
u8 compute_max;
u8 csi_handlers;
u64 tiler_mask;
u64 fragment_mask;
@@ -513,6 +522,7 @@ struct kbase_queue_group {
u32 prepared_seq_num;
u32 scan_seq_num;
bool faulted;
bool reevaluate_idle_status;
struct kbase_queue *bound_queues[MAX_SUPPORTED_STREAMS_PER_GROUP];

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -102,7 +102,7 @@ static void sync_update_notify_gpu(struct kbase_context *kctx)
if (can_notify_gpu) {
kbase_csf_ring_doorbell(kctx->kbdev, CSF_KERNEL_DOORBELL_NR);
KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT_NOTIFY_GPU, kctx, 0u);
KBASE_KTRACE_ADD(kctx->kbdev, CSF_SYNC_UPDATE_NOTIFY_GPU_EVENT, kctx, 0u);
}
spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
@@ -226,12 +226,15 @@ void kbase_csf_event_add_error(struct kbase_context *const kctx,
return;
spin_lock_irqsave(&kctx->csf.event.lock, flags);
if (!WARN_ON(!list_empty(&error->link))) {
if (list_empty(&error->link)) {
error->data = *data;
list_add_tail(&error->link, &kctx->csf.event.error_list);
dev_dbg(kctx->kbdev->dev,
"Added error %pK of type %d in context %pK\n",
(void *)error, data->type, (void *)kctx);
} else {
dev_dbg(kctx->kbdev->dev, "Error %pK of type %d already pending in context %pK",
(void *)error, error->data.type, (void *)kctx);
}
spin_unlock_irqrestore(&kctx->csf.event.lock, flags);
}

View File

@@ -44,6 +44,7 @@
#include <linux/mman.h>
#include <linux/string.h>
#include <linux/mutex.h>
#include <linux/ctype.h>
#if (KERNEL_VERSION(4, 13, 0) <= LINUX_VERSION_CODE)
#include <linux/set_memory.h>
#endif
@@ -94,6 +95,7 @@ MODULE_PARM_DESC(fw_debug,
#define CSF_FIRMWARE_ENTRY_TYPE_FUTF_TEST (2)
#define CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER (3)
#define CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA (4)
#define CSF_FIRMWARE_ENTRY_TYPE_BUILD_INFO_METADATA (6)
#define CSF_FIRMWARE_CACHE_MODE_NONE (0ul << 3)
#define CSF_FIRMWARE_CACHE_MODE_CACHED (1ul << 3)
@@ -104,12 +106,18 @@ MODULE_PARM_DESC(fw_debug,
#define TL_METADATA_ENTRY_NAME_OFFSET (0x8)
#define BUILD_INFO_METADATA_SIZE_OFFSET (0x4)
#define BUILD_INFO_GIT_SHA_LEN (40U)
#define BUILD_INFO_GIT_DIRTY_LEN (1U)
#define BUILD_INFO_GIT_SHA_PATTERN "git_sha: "
#define CSF_MAX_FW_STOP_LOOPS (100000)
#define CSF_GLB_REQ_CFG_MASK \
(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \
GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK)
static inline u32 input_page_read(const u32 *const input, const u32 offset)
{
WARN_ON(offset % sizeof(u32));
@@ -719,8 +727,9 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
if (!reuse_pages) {
ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu,
virtual_start >> PAGE_SHIFT, phys, num_pages_aligned, mem_flags,
KBASE_MEM_GROUP_CSF_FW);
virtual_start >> PAGE_SHIFT, phys,
num_pages_aligned, mem_flags,
KBASE_MEM_GROUP_CSF_FW, NULL);
if (ret != 0) {
dev_err(kbdev->dev, "Failed to insert firmware pages\n");
@@ -810,6 +819,57 @@ static int parse_timeline_metadata_entry(struct kbase_device *kbdev,
return 0;
}
/**
* parse_build_info_metadata_entry() - Process a "build info metadata" section
* @kbdev: Kbase device structure
* @fw: Firmware image containing the section
* @entry: Pointer to the section
* @size: Size (in bytes) of the section
*
* This prints the git SHA of the firmware on frimware load.
*
* Return: 0 if successful, negative error code on failure
*/
static int parse_build_info_metadata_entry(struct kbase_device *kbdev,
const struct kbase_csf_mcu_fw *const fw,
const u32 *entry, unsigned int size)
{
const u32 meta_start_addr = entry[0];
char *ptr = NULL;
size_t sha_pattern_len = strlen(BUILD_INFO_GIT_SHA_PATTERN);
/* Only print git SHA to avoid releasing sensitive information */
ptr = strstr(fw->data + meta_start_addr, BUILD_INFO_GIT_SHA_PATTERN);
/* Check that we won't overrun the found string */
if (ptr &&
strlen(ptr) >= BUILD_INFO_GIT_SHA_LEN + BUILD_INFO_GIT_DIRTY_LEN + sha_pattern_len) {
char git_sha[BUILD_INFO_GIT_SHA_LEN + BUILD_INFO_GIT_DIRTY_LEN + 1];
int i = 0;
/* Move ptr to start of SHA */
ptr += sha_pattern_len;
for (i = 0; i < BUILD_INFO_GIT_SHA_LEN; i++) {
/* Ensure that the SHA is made up of hex digits */
if (!isxdigit(ptr[i]))
break;
git_sha[i] = ptr[i];
}
/* Check if the next char indicates git SHA is dirty */
if (ptr[i] == ' ' || ptr[i] == '+') {
git_sha[i] = ptr[i];
i++;
}
git_sha[i] = '\0';
dev_info(kbdev->dev, "Mali firmware git_sha: %s\n", git_sha);
} else
dev_info(kbdev->dev, "Mali firmware git_sha not found or invalid\n");
return 0;
}
/**
* load_firmware_entry() - Process an entry from a firmware image
*
@@ -889,6 +949,13 @@ static int load_firmware_entry(struct kbase_device *kbdev, const struct kbase_cs
return -EINVAL;
}
return parse_timeline_metadata_entry(kbdev, fw, entry, size);
case CSF_FIRMWARE_ENTRY_TYPE_BUILD_INFO_METADATA:
if (size < BUILD_INFO_METADATA_SIZE_OFFSET + sizeof(*entry)) {
dev_err(kbdev->dev, "Build info metadata entry too short (size=%u)\n",
size);
return -EINVAL;
}
return parse_build_info_metadata_entry(kbdev, fw, entry, size);
}
if (!optional) {
@@ -1491,6 +1558,7 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev)
kbdev->csf.gpu_idle_dur_count);
}
static void global_init(struct kbase_device *const kbdev, u64 core_mask)
{
u32 const ack_irq_mask =
@@ -1665,7 +1733,7 @@ void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev)
if (version != kbdev->csf.global_iface.version)
dev_err(kbdev->dev, "Version check failed in firmware reboot.");
KBASE_KTRACE_ADD(kbdev, FIRMWARE_REBOOT, NULL, 0u);
KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_REBOOT, NULL, 0u);
/* Tell MCU state machine to transit to next state */
kbdev->csf.firmware_reloaded = true;
@@ -2121,7 +2189,7 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
goto err_out;
/* Firmware loaded successfully, ret = 0 */
KBASE_KTRACE_ADD(kbdev, FIRMWARE_BOOT, NULL,
KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_BOOT, NULL,
(((u64)version_hash) << 32) |
(((u64)version_major) << 8) | version_minor);
return 0;
@@ -2611,9 +2679,9 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
gpu_map_properties &= (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR);
gpu_map_properties |= gpu_map_prot;
ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu,
va_reg->start_pfn, &phys[0], num_pages,
gpu_map_properties, KBASE_MEM_GROUP_CSF_FW);
ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn,
&phys[0], num_pages, gpu_map_properties,
KBASE_MEM_GROUP_CSF_FW, NULL);
if (ret)
goto mmu_insert_pages_error;
@@ -2674,3 +2742,4 @@ void kbase_csf_firmware_mcu_shared_mapping_term(
vunmap(csf_mapping->cpu_addr);
kfree(csf_mapping->phys);
}

View File

@@ -246,6 +246,7 @@ void kbase_csf_firmware_csg_input_mask(
u32 kbase_csf_firmware_csg_output(
const struct kbase_csf_cmd_stream_group_info *info, u32 offset);
/**
* struct kbase_csf_global_iface - Global CSF interface
* provided by the firmware.
@@ -794,4 +795,6 @@ static inline u32 kbase_csf_interface_version(u32 major, u32 minor, u32 patch)
* Return: 0 if success, or negative error code on failure.
*/
int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev);
#endif

View File

@@ -22,6 +22,7 @@
#include <mali_kbase.h>
#include "mali_kbase_csf_firmware_cfg.h"
#include <mali_kbase_reset_gpu.h>
#include <linux/version.h>
#if CONFIG_SYSFS
#define CSF_FIRMWARE_CFG_SYSFS_DIR_NAME "firmware_config"
@@ -209,11 +210,18 @@ static struct attribute *fw_cfg_attrs[] = {
&fw_cfg_attr_cur,
NULL,
};
#if (KERNEL_VERSION(5, 2, 0) <= LINUX_VERSION_CODE)
ATTRIBUTE_GROUPS(fw_cfg);
#endif
static struct kobj_type fw_cfg_kobj_type = {
.release = &fw_cfg_kobj_release,
.sysfs_ops = &fw_cfg_ops,
#if (KERNEL_VERSION(5, 2, 0) <= LINUX_VERSION_CODE)
.default_groups = fw_cfg_groups,
#else
.default_attrs = fw_cfg_attrs,
#endif
};
int kbase_csf_firmware_cfg_init(struct kbase_device *kbdev)

View File

@@ -104,6 +104,7 @@ struct dummy_firmware_interface {
(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \
GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK)
static inline u32 input_page_read(const u32 *const input, const u32 offset)
{
WARN_ON(offset % sizeof(u32));
@@ -703,17 +704,16 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev)
kbdev->csf.gpu_idle_dur_count);
}
static void global_init(struct kbase_device *const kbdev, u64 core_mask)
{
u32 const ack_irq_mask = GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK |
GLB_ACK_IRQ_MASK_PING_MASK |
GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK |
GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK |
GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK |
GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK |
GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK;
u32 const ack_irq_mask =
GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK | GLB_ACK_IRQ_MASK_PING_MASK |
GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK |
GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK |
0;
const struct kbase_csf_global_iface *const global_iface =
&kbdev->csf.global_iface;
@@ -1473,7 +1473,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
gpu_map_prot =
KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
cpu_map_prot = pgprot_writecombine(cpu_map_prot);
};
}
phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL);
if (!phys)
@@ -1511,9 +1511,9 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
gpu_map_properties &= (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR);
gpu_map_properties |= gpu_map_prot;
ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu,
va_reg->start_pfn, &phys[0], num_pages,
gpu_map_properties, KBASE_MEM_GROUP_CSF_FW);
ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn,
&phys[0], num_pages, gpu_map_properties,
KBASE_MEM_GROUP_CSF_FW, NULL);
if (ret)
goto mmu_insert_pages_error;
@@ -1574,3 +1574,4 @@ void kbase_csf_firmware_mcu_shared_mapping_term(
vunmap(csf_mapping->cpu_addr);
kfree(csf_mapping->phys);
}

View File

@@ -33,6 +33,10 @@
static DEFINE_SPINLOCK(kbase_csf_fence_lock);
#endif
#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG
#define FENCE_WAIT_TIMEOUT_MS 3000
#endif
static void kcpu_queue_process(struct kbase_kcpu_command_queue *kcpu_queue,
bool drain_queue);
@@ -748,7 +752,7 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev,
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START(kbdev,
queue);
queue->command_started = true;
KBASE_KTRACE_ADD_CSF_KCPU(kbdev, CQS_WAIT_START,
KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_WAIT_START,
queue, cqs_wait->nr_objs, 0);
}
@@ -770,7 +774,7 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev,
error = true;
}
KBASE_KTRACE_ADD_CSF_KCPU(kbdev, CQS_WAIT_END,
KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_WAIT_END,
queue, cqs_wait->objs[i].addr,
error);
@@ -877,7 +881,7 @@ static void kbase_kcpu_cqs_set_process(struct kbase_device *kbdev,
evt[BASEP_EVENT_VAL_INDEX]++;
kbase_phy_alloc_mapping_put(queue->kctx, mapping);
KBASE_KTRACE_ADD_CSF_KCPU(kbdev, CQS_SET,
KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_SET,
queue, cqs_set->objs[i].addr,
evt[BASEP_EVENT_ERR_INDEX]);
}
@@ -1200,7 +1204,11 @@ static void kbase_csf_fence_wait_callback(struct dma_fence *fence,
struct kbase_kcpu_command_queue *kcpu_queue = fence_info->kcpu_queue;
struct kbase_context *const kctx = kcpu_queue->kctx;
KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, FENCE_WAIT_END, kcpu_queue,
#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG
/* Fence gets signaled. Deactivate the timer for fence-wait timeout */
del_timer(&kcpu_queue->fence_timeout);
#endif
KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_END, kcpu_queue,
fence->context, fence->seqno);
/* Resume kcpu command queue processing. */
@@ -1222,8 +1230,15 @@ static void kbase_kcpu_fence_wait_cancel(
bool removed = dma_fence_remove_callback(fence_info->fence,
&fence_info->fence_cb);
#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG
/* Fence-wait cancelled or fence signaled. In the latter case
* the timer would already have been deactivated inside
* kbase_csf_fence_wait_callback().
*/
del_timer_sync(&kcpu_queue->fence_timeout);
#endif
if (removed)
KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, FENCE_WAIT_END,
KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_END,
kcpu_queue, fence_info->fence->context,
fence_info->fence->seqno);
}
@@ -1235,6 +1250,80 @@ static void kbase_kcpu_fence_wait_cancel(
fence_info->fence = NULL;
}
#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG
/**
* fence_timeout_callback() - Timeout callback function for fence-wait
*
* @timer: Timer struct
*
* Context and seqno of the timed-out fence will be displayed in dmesg.
* If the fence has been signalled a work will be enqueued to process
* the fence-wait without displaying debugging information.
*/
static void fence_timeout_callback(struct timer_list *timer)
{
struct kbase_kcpu_command_queue *kcpu_queue =
container_of(timer, struct kbase_kcpu_command_queue, fence_timeout);
struct kbase_context *const kctx = kcpu_queue->kctx;
struct kbase_kcpu_command *cmd = &kcpu_queue->commands[kcpu_queue->start_offset];
struct kbase_kcpu_command_fence_info *fence_info;
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
struct fence *fence;
#else
struct dma_fence *fence;
#endif
struct kbase_sync_fence_info info;
if (cmd->type != BASE_KCPU_COMMAND_TYPE_FENCE_WAIT) {
dev_err(kctx->kbdev->dev,
"%s: Unexpected command type %d in ctx:%d_%d kcpu queue:%u", __func__,
cmd->type, kctx->tgid, kctx->id, kcpu_queue->id);
return;
}
fence_info = &cmd->info.fence;
fence = kbase_fence_get(fence_info);
if (!fence) {
dev_err(kctx->kbdev->dev, "no fence found in ctx:%d_%d kcpu queue:%u", kctx->tgid,
kctx->id, kcpu_queue->id);
return;
}
kbase_sync_fence_info_get(fence, &info);
if (info.status == 1) {
queue_work(kctx->csf.kcpu_queues.wq, &kcpu_queue->work);
} else if (info.status == 0) {
dev_warn(kctx->kbdev->dev, "fence has not yet signalled in %ums",
FENCE_WAIT_TIMEOUT_MS);
dev_warn(kctx->kbdev->dev,
"ctx:%d_%d kcpu queue:%u still waiting for fence[%pK] context#seqno:%s",
kctx->tgid, kctx->id, kcpu_queue->id, fence, info.name);
} else {
dev_warn(kctx->kbdev->dev, "fence has got error");
dev_warn(kctx->kbdev->dev,
"ctx:%d_%d kcpu queue:%u faulty fence[%pK] context#seqno:%s error(%d)",
kctx->tgid, kctx->id, kcpu_queue->id, fence, info.name, info.status);
}
kbase_fence_put(fence);
}
/**
* fence_timeout_start() - Start a timer to check fence-wait timeout
*
* @cmd: KCPU command queue
*
* Activate a timer to check whether a fence-wait command in the queue
* gets completed within FENCE_WAIT_TIMEOUT_MS
*/
static void fence_timeout_start(struct kbase_kcpu_command_queue *cmd)
{
mod_timer(&cmd->fence_timeout, jiffies + msecs_to_jiffies(FENCE_WAIT_TIMEOUT_MS));
}
#endif
/**
* kbase_kcpu_fence_wait_process() - Process the kcpu fence wait command
*
@@ -1254,8 +1343,9 @@ static int kbase_kcpu_fence_wait_process(
#else
struct dma_fence *fence;
#endif
struct kbase_context *const kctx = kcpu_queue->kctx;
lockdep_assert_held(&kcpu_queue->kctx->csf.kcpu_queues.lock);
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
if (WARN_ON(!fence_info->fence))
return -EINVAL;
@@ -1269,14 +1359,26 @@ static int kbase_kcpu_fence_wait_process(
&fence_info->fence_cb,
kbase_csf_fence_wait_callback);
KBASE_KTRACE_ADD_CSF_KCPU(kcpu_queue->kctx->kbdev,
FENCE_WAIT_START, kcpu_queue,
KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev,
KCPU_FENCE_WAIT_START, kcpu_queue,
fence->context, fence->seqno);
fence_status = cb_err;
if (cb_err == 0)
if (cb_err == 0) {
kcpu_queue->fence_wait_processed = true;
else if (cb_err == -ENOENT)
#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG
fence_timeout_start(kcpu_queue);
#endif
} else if (cb_err == -ENOENT) {
fence_status = dma_fence_get_status(fence);
if (!fence_status) {
struct kbase_sync_fence_info info;
kbase_sync_fence_info_get(fence, &info);
dev_warn(kctx->kbdev->dev,
"Unexpected status for fence %s of ctx:%d_%d kcpu queue:%u",
info.name, kctx->tgid, kctx->id, kcpu_queue->id);
}
}
}
/*
@@ -1321,7 +1423,6 @@ static int kbase_kcpu_fence_wait_prepare(
current_command->type = BASE_KCPU_COMMAND_TYPE_FENCE_WAIT;
current_command->info.fence.fence = fence_in;
current_command->info.fence.kcpu_queue = kcpu_queue;
return 0;
}
@@ -1343,7 +1444,7 @@ static int kbase_kcpu_fence_signal_process(
ret = 0;
}
KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, FENCE_SIGNAL, kcpu_queue,
KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_SIGNAL, kcpu_queue,
fence_info->fence->context,
fence_info->fence->seqno);
@@ -1465,7 +1566,7 @@ static int delete_queue(struct kbase_context *kctx, u32 id)
struct kbase_kcpu_command_queue *queue =
kctx->csf.kcpu_queues.array[id];
KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_DESTROY,
KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_DELETE,
queue, queue->num_pending_cmds, queue->cqs_wait_count);
/* Drain the remaining work for this queue first and go past
@@ -2221,8 +2322,11 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE(kctx->kbdev, queue, queue->id, kctx->id,
queue->num_pending_cmds);
KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_NEW, queue,
KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_CREATE, queue,
queue->fence_context, 0);
#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG
kbase_timer_setup(&queue->fence_timeout, fence_timeout_callback);
#endif
out:
mutex_unlock(&kctx->csf.kcpu_queues.lock);

View File

@@ -47,9 +47,9 @@ struct kbase_kcpu_command_import_info {
* struct kbase_kcpu_command_fence_info - Structure which holds information
* about the fence object enqueued in the kcpu command queue
*
* @fence_cb: Fence callback
* @fence: Fence
* @kcpu_queue: kcpu command queue
* @fence_cb: Fence callback
* @fence: Fence
* @kcpu_queue: kcpu command queue
*/
struct kbase_kcpu_command_fence_info {
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
@@ -271,6 +271,7 @@ struct kbase_kcpu_command {
* or without errors since last cleaned.
* @jit_blocked: Used to keep track of command queues blocked
* by a pending JIT allocation command.
* @fence_timeout: Timer used to detect the fence wait timeout.
*/
struct kbase_kcpu_command_queue {
struct kbase_context *kctx;
@@ -287,6 +288,9 @@ struct kbase_kcpu_command_queue {
bool command_started;
struct list_head jit_blocked;
bool has_error;
#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG
struct timer_list fence_timeout;
#endif /* CONFIG_MALI_BIFROST_FENCE_DEBUG */
};
/**

View File

@@ -167,11 +167,7 @@ static const struct file_operations kbasep_csf_kcpu_debugfs_fops = {
void kbase_csf_kcpu_debugfs_init(struct kbase_context *kctx)
{
struct dentry *file;
#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE)
const mode_t mode = 0444;
#else
const mode_t mode = 0400;
#endif
if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry)))
return;

View File

@@ -387,7 +387,7 @@
/* CS_BASE register */
#define CS_BASE_POINTER_SHIFT 0
#define CS_BASE_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_BASE_POINTER_SHIFT)
#define CS_BASE_POINTER_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_BASE_POINTER_SHIFT)
#define CS_BASE_POINTER_GET(reg_val) (((reg_val)&CS_BASE_POINTER_MASK) >> CS_BASE_POINTER_SHIFT)
#define CS_BASE_POINTER_SET(reg_val, value) \
(((reg_val) & ~CS_BASE_POINTER_MASK) | (((value) << CS_BASE_POINTER_SHIFT) & CS_BASE_POINTER_MASK))
@@ -401,7 +401,8 @@
/* CS_TILER_HEAP_START register */
#define CS_TILER_HEAP_START_POINTER_SHIFT 0
#define CS_TILER_HEAP_START_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_TILER_HEAP_START_POINTER_SHIFT)
#define CS_TILER_HEAP_START_POINTER_MASK \
(GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_TILER_HEAP_START_POINTER_SHIFT)
#define CS_TILER_HEAP_START_POINTER_GET(reg_val) \
(((reg_val)&CS_TILER_HEAP_START_POINTER_MASK) >> CS_TILER_HEAP_START_POINTER_SHIFT)
#define CS_TILER_HEAP_START_POINTER_SET(reg_val, value) \
@@ -412,7 +413,8 @@
/* CS_TILER_HEAP_END register */
#define CS_TILER_HEAP_END_POINTER_SHIFT 0
#define CS_TILER_HEAP_END_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_TILER_HEAP_END_POINTER_SHIFT)
#define CS_TILER_HEAP_END_POINTER_MASK \
(GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_TILER_HEAP_END_POINTER_SHIFT)
#define CS_TILER_HEAP_END_POINTER_GET(reg_val) \
(((reg_val)&CS_TILER_HEAP_END_POINTER_MASK) >> CS_TILER_HEAP_END_POINTER_SHIFT)
#define CS_TILER_HEAP_END_POINTER_SET(reg_val, value) \
@@ -423,7 +425,7 @@
/* CS_USER_INPUT register */
#define CS_USER_INPUT_POINTER_SHIFT 0
#define CS_USER_INPUT_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_USER_INPUT_POINTER_SHIFT)
#define CS_USER_INPUT_POINTER_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_USER_INPUT_POINTER_SHIFT)
#define CS_USER_INPUT_POINTER_GET(reg_val) (((reg_val)&CS_USER_INPUT_POINTER_MASK) >> CS_USER_INPUT_POINTER_SHIFT)
#define CS_USER_INPUT_POINTER_SET(reg_val, value) \
(((reg_val) & ~CS_USER_INPUT_POINTER_MASK) | \
@@ -431,7 +433,7 @@
/* CS_USER_OUTPUT register */
#define CS_USER_OUTPUT_POINTER_SHIFT 0
#define CS_USER_OUTPUT_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_USER_OUTPUT_POINTER_SHIFT)
#define CS_USER_OUTPUT_POINTER_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_USER_OUTPUT_POINTER_SHIFT)
#define CS_USER_OUTPUT_POINTER_GET(reg_val) (((reg_val)&CS_USER_OUTPUT_POINTER_MASK) >> CS_USER_OUTPUT_POINTER_SHIFT)
#define CS_USER_OUTPUT_POINTER_SET(reg_val, value) \
(((reg_val) & ~CS_USER_OUTPUT_POINTER_MASK) | \
@@ -470,7 +472,8 @@
/* CS_INSTR_BUFFER_BASE register */
#define CS_INSTR_BUFFER_BASE_POINTER_SHIFT (0)
#define CS_INSTR_BUFFER_BASE_POINTER_MASK ((u64)0xFFFFFFFFFFFFFFFF << CS_INSTR_BUFFER_BASE_POINTER_SHIFT)
#define CS_INSTR_BUFFER_BASE_POINTER_MASK \
(GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_INSTR_BUFFER_BASE_POINTER_SHIFT)
#define CS_INSTR_BUFFER_BASE_POINTER_GET(reg_val) \
(((reg_val)&CS_INSTR_BUFFER_BASE_POINTER_MASK) >> CS_INSTR_BUFFER_BASE_POINTER_SHIFT)
#define CS_INSTR_BUFFER_BASE_POINTER_SET(reg_val, value) \
@@ -479,8 +482,8 @@
/* CS_INSTR_BUFFER_OFFSET_POINTER register */
#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT (0)
#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK \
(((u64)0xFFFFFFFFFFFFFFFF) << CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT)
#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK \
((GPU_ULL(0xFFFFFFFFFFFFFFFF)) << CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT)
#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_GET(reg_val) \
(((reg_val)&CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK) >> CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT)
#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SET(reg_val, value) \
@@ -529,7 +532,8 @@
/* CS_STATUS_CMD_PTR register */
#define CS_STATUS_CMD_PTR_POINTER_SHIFT 0
#define CS_STATUS_CMD_PTR_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_STATUS_CMD_PTR_POINTER_SHIFT)
#define CS_STATUS_CMD_PTR_POINTER_MASK \
(GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_STATUS_CMD_PTR_POINTER_SHIFT)
#define CS_STATUS_CMD_PTR_POINTER_GET(reg_val) \
(((reg_val)&CS_STATUS_CMD_PTR_POINTER_MASK) >> CS_STATUS_CMD_PTR_POINTER_SHIFT)
#define CS_STATUS_CMD_PTR_POINTER_SET(reg_val, value) \
@@ -608,7 +612,8 @@
/* CS_STATUS_WAIT_SYNC_POINTER register */
#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT 0
#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT)
#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK \
(GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT)
#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_GET(reg_val) \
(((reg_val)&CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK) >> CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT)
#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_SET(reg_val, value) \
@@ -709,7 +714,8 @@
/* CS_FAULT_INFO register */
#define CS_FAULT_INFO_EXCEPTION_DATA_SHIFT 0
#define CS_FAULT_INFO_EXCEPTION_DATA_MASK (0xFFFFFFFFFFFFFFFF << CS_FAULT_INFO_EXCEPTION_DATA_SHIFT)
#define CS_FAULT_INFO_EXCEPTION_DATA_MASK \
(GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_FAULT_INFO_EXCEPTION_DATA_SHIFT)
#define CS_FAULT_INFO_EXCEPTION_DATA_GET(reg_val) \
(((reg_val)&CS_FAULT_INFO_EXCEPTION_DATA_MASK) >> CS_FAULT_INFO_EXCEPTION_DATA_SHIFT)
#define CS_FAULT_INFO_EXCEPTION_DATA_SET(reg_val, value) \
@@ -718,7 +724,8 @@
/* CS_FATAL_INFO register */
#define CS_FATAL_INFO_EXCEPTION_DATA_SHIFT 0
#define CS_FATAL_INFO_EXCEPTION_DATA_MASK (0xFFFFFFFFFFFFFFFF << CS_FATAL_INFO_EXCEPTION_DATA_SHIFT)
#define CS_FATAL_INFO_EXCEPTION_DATA_MASK \
(GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_FATAL_INFO_EXCEPTION_DATA_SHIFT)
#define CS_FATAL_INFO_EXCEPTION_DATA_GET(reg_val) \
(((reg_val)&CS_FATAL_INFO_EXCEPTION_DATA_MASK) >> CS_FATAL_INFO_EXCEPTION_DATA_SHIFT)
#define CS_FATAL_INFO_EXCEPTION_DATA_SET(reg_val, value) \
@@ -750,7 +757,7 @@
/* CS_HEAP_ADDRESS register */
#define CS_HEAP_ADDRESS_POINTER_SHIFT 0
#define CS_HEAP_ADDRESS_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_HEAP_ADDRESS_POINTER_SHIFT)
#define CS_HEAP_ADDRESS_POINTER_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_HEAP_ADDRESS_POINTER_SHIFT)
#define CS_HEAP_ADDRESS_POINTER_GET(reg_val) (((reg_val)&CS_HEAP_ADDRESS_POINTER_MASK) >> CS_HEAP_ADDRESS_POINTER_SHIFT)
#define CS_HEAP_ADDRESS_POINTER_SET(reg_val, value) \
(((reg_val) & ~CS_HEAP_ADDRESS_POINTER_MASK) | \
@@ -761,14 +768,14 @@
/* CS_INSERT register */
#define CS_INSERT_VALUE_SHIFT 0
#define CS_INSERT_VALUE_MASK (0xFFFFFFFFFFFFFFFF << CS_INSERT_VALUE_SHIFT)
#define CS_INSERT_VALUE_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_INSERT_VALUE_SHIFT)
#define CS_INSERT_VALUE_GET(reg_val) (((reg_val)&CS_INSERT_VALUE_MASK) >> CS_INSERT_VALUE_SHIFT)
#define CS_INSERT_VALUE_SET(reg_val, value) \
(((reg_val) & ~CS_INSERT_VALUE_MASK) | (((value) << CS_INSERT_VALUE_SHIFT) & CS_INSERT_VALUE_MASK))
/* CS_EXTRACT_INIT register */
#define CS_EXTRACT_INIT_VALUE_SHIFT 0
#define CS_EXTRACT_INIT_VALUE_MASK (0xFFFFFFFFFFFFFFFF << CS_EXTRACT_INIT_VALUE_SHIFT)
#define CS_EXTRACT_INIT_VALUE_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_EXTRACT_INIT_VALUE_SHIFT)
#define CS_EXTRACT_INIT_VALUE_GET(reg_val) (((reg_val)&CS_EXTRACT_INIT_VALUE_MASK) >> CS_EXTRACT_INIT_VALUE_SHIFT)
#define CS_EXTRACT_INIT_VALUE_SET(reg_val, value) \
(((reg_val) & ~CS_EXTRACT_INIT_VALUE_MASK) | \
@@ -779,7 +786,7 @@
/* CS_EXTRACT register */
#define CS_EXTRACT_VALUE_SHIFT 0
#define CS_EXTRACT_VALUE_MASK (0xFFFFFFFFFFFFFFFF << CS_EXTRACT_VALUE_SHIFT)
#define CS_EXTRACT_VALUE_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CS_EXTRACT_VALUE_SHIFT)
#define CS_EXTRACT_VALUE_GET(reg_val) (((reg_val)&CS_EXTRACT_VALUE_MASK) >> CS_EXTRACT_VALUE_SHIFT)
#define CS_EXTRACT_VALUE_SET(reg_val, value) \
(((reg_val) & ~CS_EXTRACT_VALUE_MASK) | (((value) << CS_EXTRACT_VALUE_SHIFT) & CS_EXTRACT_VALUE_MASK))
@@ -932,7 +939,7 @@
/* CSG_SUSPEND_BUF register */
#define CSG_SUSPEND_BUF_POINTER_SHIFT 0
#define CSG_SUSPEND_BUF_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CSG_SUSPEND_BUF_POINTER_SHIFT)
#define CSG_SUSPEND_BUF_POINTER_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << CSG_SUSPEND_BUF_POINTER_SHIFT)
#define CSG_SUSPEND_BUF_POINTER_GET(reg_val) (((reg_val)&CSG_SUSPEND_BUF_POINTER_MASK) >> CSG_SUSPEND_BUF_POINTER_SHIFT)
#define CSG_SUSPEND_BUF_POINTER_SET(reg_val, value) \
(((reg_val) & ~CSG_SUSPEND_BUF_POINTER_MASK) | \
@@ -940,7 +947,8 @@
/* CSG_PROTM_SUSPEND_BUF register */
#define CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT 0
#define CSG_PROTM_SUSPEND_BUF_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT)
#define CSG_PROTM_SUSPEND_BUF_POINTER_MASK \
(GPU_ULL(0xFFFFFFFFFFFFFFFF) << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT)
#define CSG_PROTM_SUSPEND_BUF_POINTER_GET(reg_val) \
(((reg_val)&CSG_PROTM_SUSPEND_BUF_POINTER_MASK) >> CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT)
#define CSG_PROTM_SUSPEND_BUF_POINTER_SET(reg_val, value) \
@@ -1408,7 +1416,7 @@
/* GLB_ALLOC_EN register */
#define GLB_ALLOC_EN_MASK_SHIFT 0
#define GLB_ALLOC_EN_MASK_MASK (0xFFFFFFFFFFFFFFFF << GLB_ALLOC_EN_MASK_SHIFT)
#define GLB_ALLOC_EN_MASK_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << GLB_ALLOC_EN_MASK_SHIFT)
#define GLB_ALLOC_EN_MASK_GET(reg_val) (((reg_val)&GLB_ALLOC_EN_MASK_MASK) >> GLB_ALLOC_EN_MASK_SHIFT)
#define GLB_ALLOC_EN_MASK_SET(reg_val, value) \
(((reg_val) & ~GLB_ALLOC_EN_MASK_MASK) | (((value) << GLB_ALLOC_EN_MASK_SHIFT) & GLB_ALLOC_EN_MASK_MASK))
@@ -1549,5 +1557,16 @@
(((reg_val) & ~GLB_PRFCNT_SIZE_HARDWARE_SIZE_MASK) | \
((GLB_PRFCNT_SIZE_HARDWARE_SIZE_SET_MOD(value) << GLB_PRFCNT_SIZE_HARDWARE_SIZE_SHIFT) & \
GLB_PRFCNT_SIZE_HARDWARE_SIZE_MASK))
#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SET_MOD(value) ((value) >> 8)
#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET_MOD(value) ((value) << 8)
#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SHIFT GPU_U(16)
#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK (GPU_U(0xFFFF) << GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SHIFT)
#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET(reg_val) \
(GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET_MOD(((reg_val)&GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK) >> \
GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SHIFT))
#define GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SET(reg_val, value) \
(((reg_val) & ~GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK) | \
((GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SET_MOD(value) << GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SHIFT) & \
GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK))
#endif /* _KBASE_CSF_REGISTERS_H_ */

View File

@@ -236,7 +236,7 @@ static enum hrtimer_restart tick_timer_callback(struct hrtimer *timer)
struct kbase_device *kbdev = container_of(timer, struct kbase_device,
csf.scheduler.tick_timer);
kbase_csf_scheduler_advance_tick(kbdev);
kbase_csf_scheduler_tick_advance(kbdev);
return HRTIMER_NORESTART;
}
@@ -476,7 +476,7 @@ void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev)
non_idle_offslot_grps = atomic_read(&scheduler->non_idle_offslot_grps);
can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev);
KBASE_KTRACE_ADD(kbdev, SCHEDULER_CAN_IDLE, NULL,
KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_EVENT_CAN_SUSPEND, NULL,
((u64)(u32)non_idle_offslot_grps) | (((u64)can_suspend_on_idle) << 32));
if (!non_idle_offslot_grps) {
@@ -490,7 +490,7 @@ void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev)
}
} else {
/* Advance the scheduling tick to get the non-idle suspended groups loaded soon */
kbase_csf_scheduler_advance_tick_nolock(kbdev);
kbase_csf_scheduler_tick_advance_nolock(kbdev);
}
}
@@ -560,6 +560,12 @@ static bool on_slot_group_idle_locked(struct kbase_queue_group *group)
return (group->run_state == KBASE_CSF_GROUP_IDLE);
}
static bool can_schedule_idle_group(struct kbase_queue_group *group)
{
return (on_slot_group_idle_locked(group) ||
(group->priority == KBASE_QUEUE_GROUP_PRIORITY_REALTIME));
}
static bool queue_group_scheduled(struct kbase_queue_group *group)
{
return (group->run_state != KBASE_CSF_GROUP_INACTIVE &&
@@ -575,7 +581,7 @@ static bool queue_group_scheduled_locked(struct kbase_queue_group *group)
}
/**
* scheduler_wait_protm_quit() - Wait for GPU to exit protected mode.
* scheduler_protm_wait_quit() - Wait for GPU to exit protected mode.
*
* @kbdev: Pointer to the GPU device
*
@@ -584,7 +590,7 @@ static bool queue_group_scheduled_locked(struct kbase_queue_group *group)
*
* Return: true on success, false otherwise.
*/
static bool scheduler_wait_protm_quit(struct kbase_device *kbdev)
static bool scheduler_protm_wait_quit(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
long wt = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
@@ -593,8 +599,7 @@ static bool scheduler_wait_protm_quit(struct kbase_device *kbdev)
lockdep_assert_held(&scheduler->lock);
KBASE_KTRACE_ADD(kbdev, SCHEDULER_WAIT_PROTM_QUIT, NULL,
jiffies_to_msecs(wt));
KBASE_KTRACE_ADD(kbdev, SCHEDULER_PROTM_WAIT_QUIT_START, NULL, jiffies_to_msecs(wt));
remaining = wait_event_timeout(kbdev->csf.event_wait,
!kbase_csf_scheduler_protected_mode_in_use(kbdev), wt);
@@ -606,8 +611,7 @@ static bool scheduler_wait_protm_quit(struct kbase_device *kbdev)
success = false;
}
KBASE_KTRACE_ADD(kbdev, SCHEDULER_WAIT_PROTM_QUIT_DONE, NULL,
jiffies_to_msecs(remaining));
KBASE_KTRACE_ADD(kbdev, SCHEDULER_PROTM_WAIT_QUIT_END, NULL, jiffies_to_msecs(remaining));
return success;
}
@@ -631,7 +635,7 @@ static void scheduler_force_protm_exit(struct kbase_device *kbdev)
kbase_csf_firmware_ping(kbdev);
if (scheduler_wait_protm_quit(kbdev))
if (scheduler_protm_wait_quit(kbdev))
return;
dev_err(kbdev->dev, "Possible GPU hang in Protected mode");
@@ -958,8 +962,8 @@ static void update_idle_suspended_group_state(struct kbase_queue_group *group)
return;
new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps);
KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC,
group, new_val);
KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group,
new_val);
}
int kbase_csf_scheduler_group_get_slot_locked(struct kbase_queue_group *group)
@@ -1086,7 +1090,7 @@ static int halt_stream_sync(struct kbase_queue *queue)
kbase_csf_firmware_cs_input_mask(stream, CS_REQ, CS_REQ_STATE_STOP,
CS_REQ_STATE_MASK);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP_REQUESTED, group, queue, 0u);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP_REQ, group, queue, 0u);
kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr, true);
/* Timed wait */
@@ -1159,8 +1163,7 @@ static int sched_halt_stream(struct kbase_queue *queue)
long remaining;
int slot;
int err = 0;
const u32 group_schedule_timeout =
20 * kbdev->csf.scheduler.csg_scheduling_period_ms;
const u32 group_schedule_timeout = kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT);
if (WARN_ON(!group))
return -EINVAL;
@@ -1531,8 +1534,8 @@ int kbase_csf_scheduler_queue_start(struct kbase_queue *queue)
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_START, group, queue,
group->run_state);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_STATUS_WAIT, queue->group,
queue, queue->status_wait);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group, queue,
queue->status_wait);
if (group->run_state == KBASE_CSF_GROUP_FAULT_EVICTED) {
err = -EIO;
@@ -1564,9 +1567,9 @@ int kbase_csf_scheduler_queue_start(struct kbase_queue *queue)
} else
program_cs(kbdev, queue, true);
}
queue_delayed_work(system_long_wq,
&kbdev->csf.scheduler.ping_work,
msecs_to_jiffies(FIRMWARE_PING_INTERVAL_MS));
queue_delayed_work(system_long_wq, &kbdev->csf.scheduler.ping_work,
msecs_to_jiffies(kbase_get_timeout_ms(
kbdev, CSF_FIRMWARE_PING_TIMEOUT)));
}
}
@@ -1601,7 +1604,8 @@ static enum kbase_csf_csg_slot_state update_csg_slot_status(
slot_state = CSG_SLOT_RUNNING;
atomic_set(&csg_slot->state, slot_state);
csg_slot->trigger_jiffies = jiffies;
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STARTED, csg_slot->resident_group, state);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_RUNNING, csg_slot->resident_group,
state);
dev_dbg(kbdev->dev, "Group %u running on slot %d\n",
csg_slot->resident_group->handle, slot);
}
@@ -1716,7 +1720,7 @@ static void halt_csg_slot(struct kbase_queue_group *group, bool suspend)
flags);
atomic_set(&csg_slot[slot].state, CSG_SLOT_DOWN2STOP);
csg_slot[slot].trigger_jiffies = jiffies;
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP, group, halt_cmd);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP_REQ, group, halt_cmd);
KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG(
kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot);
@@ -1760,10 +1764,10 @@ static bool evaluate_sync_update(struct kbase_queue *queue)
sync_ptr = kbase_phy_alloc_mapping_get(queue->kctx, queue->sync_ptr,
&mapping);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE, queue->group,
queue, queue->sync_ptr);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_BLOCKED_REASON,
queue->group, queue, queue->blocked_reason);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVAL_START, queue->group, queue,
queue->sync_ptr);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_BLOCKED_REASON, queue->group, queue,
queue->blocked_reason);
if (!sync_ptr) {
dev_dbg(queue->kctx->kbdev->dev, "sync memory VA 0x%016llX already freed",
@@ -1778,11 +1782,11 @@ static bool evaluate_sync_update(struct kbase_queue *queue)
(sync_wait_cond != CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE));
sync_current_val = READ_ONCE(*sync_ptr);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_CURRENT_VAL, queue->group,
queue, sync_current_val);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_CUR_VAL, queue->group, queue,
sync_current_val);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_TEST_VAL, queue->group,
queue, queue->sync_value);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_TEST_VAL, queue->group, queue,
queue->sync_value);
if (((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) &&
(sync_current_val > queue->sync_value)) ||
@@ -1799,8 +1803,7 @@ static bool evaluate_sync_update(struct kbase_queue *queue)
kbase_phy_alloc_mapping_put(queue->kctx, mapping);
out:
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVALUATED,
queue->group, queue, updated);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVAL_END, queue->group, queue, updated);
return updated;
}
@@ -1834,8 +1837,8 @@ bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo,
queue->saved_cmd_ptr = cmd_ptr;
#endif
KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_STATUS_WAIT,
queue->group, queue, status);
KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group,
queue, status);
if (CS_STATUS_WAIT_SYNC_WAIT_GET(status)) {
queue->status_wait = status;
@@ -1921,7 +1924,7 @@ void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler,
list_add_tail(&group->link,
&kctx->csf.sched.runnable_groups[group->priority]);
kctx->csf.sched.num_runnable_grps++;
KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_INSERT_RUNNABLE, group,
KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_RUNNABLE_INSERT, group,
kctx->csf.sched.num_runnable_grps);
/* Add the kctx if not yet in runnable kctxs */
@@ -1929,7 +1932,7 @@ void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler,
/* First runnable csg, adds to the runnable_kctxs */
INIT_LIST_HEAD(&kctx->csf.link);
list_add_tail(&kctx->csf.link, &scheduler->runnable_kctxs);
KBASE_KTRACE_ADD(kbdev, SCHEDULER_INSERT_RUNNABLE, kctx, 0u);
KBASE_KTRACE_ADD(kbdev, SCHEDULER_RUNNABLE_KCTX_INSERT, kctx, 0u);
}
scheduler->total_runnable_grps++;
@@ -1986,7 +1989,7 @@ void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler,
if (kbase_prepare_to_reset_gpu(kctx->kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(kctx->kbdev);
KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_EXIT_PROTM,
KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_PROTM_EXIT,
scheduler->active_protm_grp, 0u);
scheduler->active_protm_grp = NULL;
}
@@ -2016,13 +2019,12 @@ void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler,
}
kctx->csf.sched.num_runnable_grps--;
KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_REMOVE_RUNNABLE, group,
KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_RUNNABLE_REMOVE, group,
kctx->csf.sched.num_runnable_grps);
new_head_grp = (!list_empty(list)) ?
list_first_entry(list, struct kbase_queue_group, link) :
NULL;
KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_HEAD_RUNNABLE, new_head_grp,
0u);
KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_RUNNABLE_HEAD, new_head_grp, 0u);
if (kctx->csf.sched.num_runnable_grps == 0) {
struct kbase_context *new_head_kctx;
@@ -2031,13 +2033,11 @@ void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler,
list_del_init(&kctx->csf.link);
if (scheduler->top_ctx == kctx)
scheduler->top_ctx = NULL;
KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_REMOVE_RUNNABLE, kctx,
0u);
KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_RUNNABLE_KCTX_REMOVE, kctx, 0u);
new_head_kctx = (!list_empty(kctx_list)) ?
list_first_entry(kctx_list, struct kbase_context, csf.link) :
NULL;
KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_HEAD_RUNNABLE,
new_head_kctx, 0u);
KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_RUNNABLE_KCTX_HEAD, new_head_kctx, 0u);
}
WARN_ON(scheduler->total_runnable_grps == 0);
@@ -2064,7 +2064,7 @@ static void insert_group_to_idle_wait(struct kbase_queue_group *const group)
list_add_tail(&group->link, &kctx->csf.sched.idle_wait_groups);
kctx->csf.sched.num_idle_wait_grps++;
KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_INSERT_IDLE_WAIT, group,
KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_INSERT, group,
kctx->csf.sched.num_idle_wait_grps);
group->run_state = KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC;
dev_dbg(kctx->kbdev->dev,
@@ -2085,13 +2085,12 @@ static void remove_group_from_idle_wait(struct kbase_queue_group *const group)
list_del_init(&group->link);
WARN_ON(kctx->csf.sched.num_idle_wait_grps == 0);
kctx->csf.sched.num_idle_wait_grps--;
KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_REMOVE_IDLE_WAIT, group,
KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_REMOVE, group,
kctx->csf.sched.num_idle_wait_grps);
new_head_grp = (!list_empty(list)) ?
list_first_entry(list, struct kbase_queue_group, link) :
NULL;
KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_HEAD_IDLE_WAIT,
new_head_grp, 0u);
KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_HEAD, new_head_grp, 0u);
group->run_state = KBASE_CSF_GROUP_INACTIVE;
}
@@ -2117,8 +2116,7 @@ static void update_offslot_non_idle_cnt_for_faulty_grp(struct kbase_queue_group
if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) {
int new_val =
atomic_dec_return(&scheduler->non_idle_offslot_grps);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC,
group, new_val);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group, new_val);
}
}
@@ -2134,8 +2132,7 @@ static void update_offslot_non_idle_cnt_for_onslot_grp(struct kbase_queue_group
if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) {
int new_val =
atomic_dec_return(&scheduler->non_idle_offslot_grps);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC,
group, new_val);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group, new_val);
}
}
@@ -2155,15 +2152,15 @@ static void update_offslot_non_idle_cnt_on_grp_suspend(
if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) {
int new_val = atomic_inc_return(
&scheduler->non_idle_offslot_grps);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC,
group, new_val);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC,
group, new_val);
}
} else {
if (group->run_state != KBASE_CSF_GROUP_SUSPENDED) {
int new_val = atomic_dec_return(
&scheduler->non_idle_offslot_grps);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC,
group, new_val);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC,
group, new_val);
}
}
} else {
@@ -2171,8 +2168,8 @@ static void update_offslot_non_idle_cnt_on_grp_suspend(
if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) {
int new_val = atomic_inc_return(
&scheduler->non_idle_offslot_grps);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC,
group, new_val);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group,
new_val);
}
}
}
@@ -2411,7 +2408,7 @@ static void update_csg_slot_priority(struct kbase_queue_group *group, u8 prio)
group->handle, group->kctx->tgid, group->kctx->id, slot,
prev_prio, prio);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_PRIO_UPDATE, group, prev_prio);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_PRIO_UPDATE, group, prev_prio);
kbase_csf_ring_csg_doorbell(kbdev, slot);
set_bit(slot, kbdev->csf.scheduler.csg_slots_prio_update);
@@ -2560,10 +2557,9 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
dev_dbg(kbdev->dev, "Starting group %d of context %d_%d on slot %d with priority %u\n",
group->handle, kctx->tgid, kctx->id, slot, prio);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_START, group,
(((u64)ep_cfg) << 32) |
((((u32)kctx->as_nr) & 0xF) << 16) |
(state & (CSG_REQ_STATE_MASK >> CS_REQ_STATE_SHIFT)));
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_START_REQ, group,
(((u64)ep_cfg) << 32) | ((((u32)kctx->as_nr) & 0xF) << 16) |
(state & (CSG_REQ_STATE_MASK >> CS_REQ_STATE_SHIFT)));
kbase_csf_ring_csg_doorbell(kbdev, slot);
@@ -2605,8 +2601,8 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault,
group->run_state == KBASE_CSF_GROUP_RUNNABLE)) {
int new_val = atomic_dec_return(
&scheduler->non_idle_offslot_grps);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC,
group, new_val);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group,
new_val);
}
for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) {
@@ -2630,9 +2626,9 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault,
if (fault)
group->run_state = KBASE_CSF_GROUP_FAULT_EVICTED;
KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_EVICT_SCHED, group,
(((u64)scheduler->total_runnable_grps) << 32) |
((u32)group->run_state));
KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_EVICT, group,
(((u64)scheduler->total_runnable_grps) << 32) |
((u32)group->run_state));
dev_dbg(kbdev->dev, "group %d exited scheduler, num_runnable_grps %d\n",
group->handle, scheduler->total_runnable_grps);
/* Notify a group has been evicted */
@@ -2783,6 +2779,8 @@ static int scheduler_group_schedule(struct kbase_queue_group *group)
if (protm_grp && protm_grp != group) {
clear_bit((unsigned int)group->csg_nr,
scheduler->csg_slots_idle_mask);
/* Request the update to confirm the condition inferred. */
group->reevaluate_idle_status = true;
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
scheduler->csg_slots_idle_mask[0]);
}
@@ -2809,8 +2807,7 @@ static int scheduler_group_schedule(struct kbase_queue_group *group)
/* A new group into the scheduler */
new_val = atomic_inc_return(
&kbdev->csf.scheduler.non_idle_offslot_grps);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC,
group, new_val);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group, new_val);
}
/* Since a group has become active now, check if GPU needs to be
@@ -3448,6 +3445,17 @@ static void wait_csg_slots_finish_prio_update(struct kbase_device *kbdev)
}
}
static void report_csg_termination(struct kbase_queue_group *const group)
{
struct base_gpu_queue_group_error
err = { .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL,
.payload = { .fatal_group = {
.status = GPU_EXCEPTION_TYPE_SW_FAULT_2,
} } };
kbase_csf_add_group_fatal_error(group, &err);
}
void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev,
struct kbase_context *kctx, struct list_head *evicted_groups)
{
@@ -3465,16 +3473,21 @@ void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev,
*/
WARN_ON(!kbase_reset_gpu_is_active(kbdev));
KBASE_KTRACE_ADD(kbdev, EVICT_CTX_SLOTS, kctx, 0u);
KBASE_KTRACE_ADD(kbdev, SCHEDULER_EVICT_CTX_SLOTS_START, kctx, 0u);
for (slot = 0; slot < num_groups; slot++) {
group = kbdev->csf.scheduler.csg_slots[slot].resident_group;
if (group && group->kctx == kctx) {
bool as_fault;
dev_dbg(kbdev->dev, "Evicting group [%d] running on slot [%d] due to reset",
group->handle, group->csg_nr);
term_csg_slot(group);
as_fault = cleanup_csg_slot(group);
/* remove the group from the scheduler list */
sched_evict_group(group, as_fault, false);
/* signal Userspace that CSG is being terminated */
report_csg_termination(group);
/* return the evicted group to the caller */
list_add_tail(&group->link, evicted_groups);
set_bit(slot, slot_mask);
@@ -3484,6 +3497,15 @@ void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev,
dev_info(kbdev->dev, "Evicting context %d_%d slots: 0x%*pb\n",
kctx->tgid, kctx->id, num_groups, slot_mask);
/* Fatal errors may have been the cause of the GPU reset
* taking place, in which case we want to make sure that
* we wake up the fatal event queue to notify userspace
* only once. Otherwise, we may have duplicate event
* notifications between the time the first notification
* occurs and the time the GPU is reset.
*/
kbase_event_wakeup(kctx);
mutex_unlock(&scheduler->lock);
}
@@ -3528,8 +3550,8 @@ static bool scheduler_slot_protm_ack(struct kbase_device *const kbdev,
struct kbase_queue *queue = group->bound_queues[i];
clear_bit(i, group->protm_pending_bitmap);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, PROTM_PENDING_CLEAR, group,
queue, group->protm_pending_bitmap[0]);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_PEND_CLEAR, group, queue,
group->protm_pending_bitmap[0]);
if (!WARN_ON(!queue) && queue->enabled) {
struct kbase_csf_cmd_stream_info *stream =
@@ -3564,6 +3586,42 @@ static bool scheduler_slot_protm_ack(struct kbase_device *const kbdev,
return protm_ack;
}
/**
* protm_enter_set_next_pending_seq - Update the scheduler's field of
* tick_protm_pending_seq to that from the next available on-slot protm
* pending CSG.
*
* @kbdev: Pointer to the GPU device.
*
* If applicable, the function updates the scheduler's tick_protm_pending_seq
* field from the next available on-slot protm pending CSG. If not, the field
* is set to KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID.
*/
static void protm_enter_set_next_pending_seq(struct kbase_device *const kbdev)
{
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
u32 num_groups = kbdev->csf.global_iface.group_num;
u32 num_csis = kbdev->csf.global_iface.groups[0].stream_num;
DECLARE_BITMAP(active_csgs, MAX_SUPPORTED_CSGS) = { 0 };
u32 i;
kbase_csf_scheduler_spin_lock_assert_held(kbdev);
bitmap_xor(active_csgs, scheduler->csg_slots_idle_mask, scheduler->csg_inuse_bitmap,
num_groups);
/* Reset the tick's pending protm seq number to invalid initially */
scheduler->tick_protm_pending_seq = KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID;
for_each_set_bit(i, active_csgs, num_groups) {
struct kbase_queue_group *group = scheduler->csg_slots[i].resident_group;
/* Set to the next pending protm group's scan_seq_number */
if ((group != scheduler->active_protm_grp) &&
(!bitmap_empty(group->protm_pending_bitmap, num_csis)) &&
(group->scan_seq_num < scheduler->tick_protm_pending_seq))
scheduler->tick_protm_pending_seq = group->scan_seq_num;
}
}
/**
* scheduler_group_check_protm_enter - Request the given group to be evaluated
* for triggering the protected mode.
@@ -3600,8 +3658,7 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
*/
protm_in_use = kbase_csf_scheduler_protected_mode_in_use(kbdev) ||
kbdev->protected_mode;
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_CHECK_PROTM_ENTER, input_grp,
protm_in_use);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_ENTER_CHECK, input_grp, protm_in_use);
/* Firmware samples the PROTM_PEND ACK bit for CSs when
* Host sends PROTM_ENTER global request. So if PROTM_PEND ACK bit
@@ -3641,13 +3698,13 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
/* Switch to protected mode */
scheduler->active_protm_grp = input_grp;
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_ENTER_PROTM,
input_grp, 0u);
/* Reset the tick's pending protm seq number */
scheduler->tick_protm_pending_seq =
KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID;
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_ENTER, input_grp,
0u);
kbase_csf_enter_protected_mode(kbdev);
/* Set the pending protm seq number to the next one */
protm_enter_set_next_pending_seq(kbdev);
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
kbase_csf_wait_protected_mode_enter(kbdev);
@@ -3812,7 +3869,7 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev,
}
if (queue_group_idle_locked(group)) {
if (on_slot_group_idle_locked(group))
if (can_schedule_idle_group(group))
list_add_tail(&group->link_to_schedule,
&scheduler->idle_groups_to_schedule);
continue;
@@ -3898,10 +3955,9 @@ static void scheduler_rotate_groups(struct kbase_device *kbdev)
new_head_grp = (!list_empty(list)) ?
list_first_entry(list, struct kbase_queue_group, link) :
NULL;
KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_ROTATE_RUNNABLE,
top_grp, top_ctx->csf.sched.num_runnable_grps);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_HEAD_RUNNABLE,
new_head_grp, 0u);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_RUNNABLE_ROTATE, top_grp,
top_ctx->csf.sched.num_runnable_grps);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_RUNNABLE_HEAD, new_head_grp, 0u);
dev_dbg(kbdev->dev,
"groups rotated for a context, num_runnable_groups: %u\n",
scheduler->top_ctx->csf.sched.num_runnable_grps);
@@ -3932,13 +3988,12 @@ static void scheduler_rotate_ctxs(struct kbase_device *kbdev)
struct kbase_context *new_head_kctx;
list_move_tail(&pos->csf.link, list);
KBASE_KTRACE_ADD(kbdev, SCHEDULER_ROTATE_RUNNABLE, pos,
0u);
KBASE_KTRACE_ADD(kbdev, SCHEDULER_RUNNABLE_KCTX_ROTATE, pos, 0u);
new_head_kctx = (!list_empty(list)) ?
list_first_entry(list, struct kbase_context, csf.link) :
NULL;
KBASE_KTRACE_ADD(kbdev, SCHEDULER_HEAD_RUNNABLE,
new_head_kctx, 0u);
KBASE_KTRACE_ADD(kbdev, SCHEDULER_RUNNABLE_KCTX_HEAD, new_head_kctx,
0u);
dev_dbg(kbdev->dev, "contexts rotated\n");
}
}
@@ -3953,12 +4008,17 @@ static void scheduler_rotate_ctxs(struct kbase_device *kbdev)
* @kbdev: Pointer to the GPU device.
* @csg_bitmap: Bitmap of the CSG slots for which
* the status update request completed successfully.
* @failed_csg_bitmap: Bitmap of the CSG slots for which
* @failed_csg_bitmap: Bitmap of the idle CSG slots for which
* the status update request timedout.
*
* This function sends a CSG status update request for all the CSG slots
* present in the bitmap scheduler->csg_slots_idle_mask and wait for the
* request to complete.
* present in the bitmap scheduler->csg_slots_idle_mask. Additionally, if
* the group's 'reevaluate_idle_status' field is set, the nominally non-idle
* slots are also included in the status update for a confirmation of their
* status. The function wait for the status update request to complete and
* returns the update completed slots bitmap and any timed out idle-flagged
* slots bitmap.
*
* The bits set in the scheduler->csg_slots_idle_mask bitmap are cleared by
* this function.
*/
@@ -3970,35 +4030,71 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev,
struct kbase_csf_global_iface *const global_iface =
&kbdev->csf.global_iface;
unsigned long flags, i;
u32 active_chk = 0;
lockdep_assert_held(&scheduler->lock);
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) {
for_each_set_bit(i, scheduler->csg_inuse_bitmap, num_groups) {
struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i];
struct kbase_queue_group *group = csg_slot->resident_group;
struct kbase_csf_cmd_stream_group_info *const ginfo =
&global_iface->groups[i];
u32 csg_req;
bool idle_flag;
clear_bit(i, scheduler->csg_slots_idle_mask);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
scheduler->csg_slots_idle_mask[0]);
if (WARN_ON(!group))
if (WARN_ON(!group)) {
clear_bit(i, scheduler->csg_inuse_bitmap);
clear_bit(i, scheduler->csg_slots_idle_mask);
continue;
}
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STATUS_UPDATE, group,
i);
idle_flag = test_bit(i, scheduler->csg_slots_idle_mask);
if (idle_flag || group->reevaluate_idle_status) {
if (idle_flag) {
#ifdef CONFIG_MALI_BIFROST_DEBUG
if (!bitmap_empty(group->protm_pending_bitmap,
ginfo->stream_num)) {
dev_warn(kbdev->dev,
"Idle bit set for group %d of ctx %d_%d on slot %d with pending protm execution",
group->handle, group->kctx->tgid,
group->kctx->id, (int)i);
}
#endif
clear_bit(i, scheduler->csg_slots_idle_mask);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
scheduler->csg_slots_idle_mask[0]);
} else {
/* Updates include slots for which reevaluation is needed.
* Here one tracks the extra included slots in active_chk.
* For protm pending slots, their status of activeness are
* assured so no need to request an update.
*/
active_chk |= BIT(i);
group->reevaluate_idle_status = false;
}
csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
csg_req ^= CSG_REQ_STATUS_UPDATE_MASK;
kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req,
CSG_REQ_STATUS_UPDATE_MASK);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_UPDATE_IDLE_SLOT_REQ, group, i);
csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
csg_req ^= CSG_REQ_STATUS_UPDATE_MASK;
kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req,
CSG_REQ_STATUS_UPDATE_MASK);
set_bit(i, csg_bitmap);
/* Track the slot update requests in csg_bitmap.
* Note, if the scheduler requested extended update, the resulting
* csg_bitmap would be the idle_flags + active_chk. Otherwise it's
* identical to the idle_flags.
*/
set_bit(i, csg_bitmap);
} else {
group->run_state = KBASE_CSF_GROUP_RUNNABLE;
}
}
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
/* The groups are aggregated into a single kernel doorbell request */
if (!bitmap_empty(csg_bitmap, num_groups)) {
long wt =
@@ -4019,9 +4115,19 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev,
/* Store the bitmap of timed out slots */
bitmap_copy(failed_csg_bitmap, csg_bitmap, num_groups);
csg_bitmap[0] = ~csg_bitmap[0] & db_slots;
/* Mask off any failed bit position contributed from active ones, as the
* intention is to retain the failed bit pattern contains only those from
* idle flags reporting back to the caller. This way, any failed to update
* original idle flag would be kept as 'idle' (an informed guess, as the
* update did not come to a conclusive result). So will be the failed
* active ones be treated as still 'non-idle'. This is for a graceful
* handling to the unexpected timeout condition.
*/
failed_csg_bitmap[0] &= ~active_chk;
} else {
KBASE_KTRACE_ADD(kbdev, SLOTS_STATUS_UPDATE_ACK, NULL,
db_slots);
KBASE_KTRACE_ADD(kbdev, SCHEDULER_UPDATE_IDLE_SLOTS_ACK, NULL, db_slots);
csg_bitmap[0] = db_slots;
}
}
@@ -4100,8 +4206,7 @@ static void scheduler_scan_idle_groups(struct kbase_device *kbdev)
list_for_each_entry_safe(group, n, &scheduler->idle_groups_to_schedule,
link_to_schedule) {
WARN_ON(!on_slot_group_idle_locked(group));
WARN_ON(!can_schedule_idle_group(group));
if (!scheduler->ngrp_to_schedule) {
/* keep the top csg's origin */
@@ -4235,7 +4340,7 @@ static bool all_on_slot_groups_remained_idle(struct kbase_device *kbdev)
u64 const *output_addr;
u64 cur_extract_ofs;
if (!queue)
if (!queue || !queue->user_io_addr)
continue;
output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE);
@@ -4336,7 +4441,7 @@ static bool scheduler_suspend_on_idle(struct kbase_device *kbdev)
atomic_read(
&kbdev->csf.scheduler.non_idle_offslot_grps));
/* Bring forward the next tick */
kbase_csf_scheduler_advance_tick(kbdev);
kbase_csf_scheduler_tick_advance(kbdev);
return false;
}
@@ -4354,14 +4459,14 @@ static void gpu_idle_worker(struct work_struct *work)
bool scheduler_is_idle_suspendable = false;
bool all_groups_suspended = false;
KBASE_KTRACE_ADD(kbdev, IDLE_WORKER_BEGIN, NULL, 0u);
KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_START, NULL, 0u);
#define __ENCODE_KTRACE_INFO(reset, idle, all_suspend) \
(((u32)reset) | (((u32)idle) << 4) | (((u32)all_suspend) << 8))
if (kbase_reset_gpu_try_prevent(kbdev)) {
dev_warn(kbdev->dev, "Quit idle for failing to prevent gpu reset.\n");
KBASE_KTRACE_ADD(kbdev, IDLE_WORKER_END, NULL,
KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_END, NULL,
__ENCODE_KTRACE_INFO(true, false, false));
return;
}
@@ -4369,7 +4474,7 @@ static void gpu_idle_worker(struct work_struct *work)
scheduler_is_idle_suspendable = scheduler_idle_suspendable(kbdev);
if (scheduler_is_idle_suspendable) {
KBASE_KTRACE_ADD(kbdev, GPU_IDLE_HANDLING_START, NULL,
KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_HANDLING_START, NULL,
kbase_csf_ktrace_gpu_cycle_cnt(kbdev));
#ifdef KBASE_PM_RUNTIME
if (kbase_pm_gpu_sleep_allowed(kbdev) &&
@@ -4382,9 +4487,8 @@ static void gpu_idle_worker(struct work_struct *work)
mutex_unlock(&scheduler->lock);
kbase_reset_gpu_allow(kbdev);
KBASE_KTRACE_ADD(kbdev, IDLE_WORKER_END, NULL,
__ENCODE_KTRACE_INFO(false,
scheduler_is_idle_suspendable,
KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_END, NULL,
__ENCODE_KTRACE_INFO(false, scheduler_is_idle_suspendable,
all_groups_suspended));
#undef __ENCODE_KTRACE_INFO
}
@@ -4440,7 +4544,7 @@ static int scheduler_prepare(struct kbase_device *kbdev)
*/
atomic_set(&scheduler->non_idle_offslot_grps,
scheduler->non_idle_scanout_grps);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, NULL,
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, NULL,
scheduler->non_idle_scanout_grps);
/* Adds those idle but runnable groups to the scanout list */
@@ -4629,8 +4733,8 @@ redo_local_tock:
dev_dbg(kbdev->dev, "Scheduler keep protm exec: group-%d",
protm_grp->handle);
new_val = atomic_dec_return(&scheduler->non_idle_offslot_grps);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC,
protm_grp, new_val);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, protm_grp,
new_val);
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
@@ -4767,7 +4871,7 @@ static void schedule_on_tock(struct work_struct *work)
scheduler->state = SCHED_BUSY;
/* Undertaking schedule action steps */
KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK, NULL, 0u);
KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_START, NULL, 0u);
schedule_actions(kbdev, false);
/* Record time information on a non-skipped tock */
@@ -4812,8 +4916,7 @@ static void schedule_on_tick(struct work_struct *work)
scheduler->state = SCHED_BUSY;
/* Undertaking schedule action steps */
KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK, NULL,
scheduler->total_runnable_grps);
KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_START, NULL, scheduler->total_runnable_grps);
schedule_actions(kbdev, true);
/* Record time information */
@@ -5074,8 +5177,7 @@ static bool scheduler_handle_reset_in_protected_mode(struct kbase_device *kbdev)
* anyways.
*/
new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC,
group, new_val);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group, new_val);
}
unlock:
@@ -5109,8 +5211,8 @@ static void scheduler_inner_reset(struct kbase_device *kbdev)
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
bitmap_fill(scheduler->csgs_events_enable_mask, MAX_SUPPORTED_CSGS);
if (scheduler->active_protm_grp)
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_EXIT_PROTM,
scheduler->active_protm_grp, 0u);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_EXIT, scheduler->active_protm_grp,
0u);
scheduler->active_protm_grp = NULL;
memset(kbdev->csf.scheduler.csg_slots, 0,
num_groups * sizeof(struct kbase_csf_csg_slot));
@@ -5133,7 +5235,7 @@ void kbase_csf_scheduler_reset(struct kbase_device *kbdev)
WARN_ON(!kbase_reset_gpu_is_active(kbdev));
KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET, NULL, 0u);
KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET_START, NULL, 0u);
if (scheduler_handle_reset_in_protected_mode(kbdev) &&
!suspend_active_queue_groups_on_reset(kbdev)) {
@@ -5235,9 +5337,9 @@ static void firmware_aliveness_monitor(struct work_struct *work)
kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
kbase_reset_gpu(kbdev);
} else if (kbase_csf_scheduler_get_nr_active_csgs(kbdev) == 1) {
queue_delayed_work(system_long_wq,
&kbdev->csf.scheduler.ping_work,
msecs_to_jiffies(FIRMWARE_PING_INTERVAL_MS));
queue_delayed_work(
system_long_wq, &kbdev->csf.scheduler.ping_work,
msecs_to_jiffies(kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_PING_TIMEOUT)));
}
kbase_pm_context_idle(kbdev);
@@ -5537,7 +5639,7 @@ static bool check_sync_update_for_on_slot_group(
stream, CS_STATUS_WAIT);
unsigned long flags;
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_STATUS_WAIT,
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS,
queue->group, queue, status);
if (!CS_STATUS_WAIT_SYNC_WAIT_GET(status))
@@ -5579,6 +5681,10 @@ static bool check_sync_update_for_on_slot_group(
scheduler->csg_slots_idle_mask[0]);
spin_unlock_irqrestore(
&scheduler->interrupt_lock, flags);
/* Request the scheduler to confirm the condition inferred
* here inside the protected mode.
*/
group->reevaluate_idle_status = true;
group->run_state = KBASE_CSF_GROUP_RUNNABLE;
}
@@ -5689,7 +5795,7 @@ static void check_group_sync_update_worker(struct work_struct *work)
mutex_lock(&scheduler->lock);
KBASE_KTRACE_ADD(kbdev, GROUP_SYNC_UPDATE_WORKER_BEGIN, kctx, 0u);
KBASE_KTRACE_ADD(kbdev, SCHEDULER_GROUP_SYNC_UPDATE_WORKER_START, kctx, 0u);
if (kctx->csf.sched.num_idle_wait_grps != 0) {
struct kbase_queue_group *group, *temp;
@@ -5719,7 +5825,7 @@ static void check_group_sync_update_worker(struct work_struct *work)
if (!sync_updated && (scheduler->state == SCHED_SLEEPING))
check_sync_update_in_sleep_mode(kbdev);
KBASE_KTRACE_ADD(kbdev, GROUP_SYNC_UPDATE_WORKER_END, kctx, 0u);
KBASE_KTRACE_ADD(kbdev, SCHEDULER_GROUP_SYNC_UPDATE_WORKER_END, kctx, 0u);
mutex_unlock(&scheduler->lock);
}
@@ -5729,7 +5835,7 @@ enum kbase_csf_event_callback_action check_group_sync_update_cb(void *param)
{
struct kbase_context *const kctx = param;
KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT, kctx, 0u);
KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_GROUP_SYNC_UPDATE_EVENT, kctx, 0u);
queue_work(kctx->csf.sched.sync_update_wq,
&kctx->csf.sched.sync_update_work);
@@ -5999,24 +6105,12 @@ out:
mutex_unlock(&scheduler->lock);
}
int kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev)
int kbase_csf_scheduler_pm_suspend_no_lock(struct kbase_device *kbdev)
{
int result = 0;
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
int result = 0;
/* Cancel any potential queued delayed work(s) */
cancel_work_sync(&scheduler->tick_work);
cancel_tock_work(scheduler);
result = kbase_reset_gpu_prevent_and_wait(kbdev);
if (result) {
dev_warn(kbdev->dev,
"Stop PM suspending for failing to prevent gpu reset.\n");
return result;
}
mutex_lock(&scheduler->lock);
lockdep_assert_held(&scheduler->lock);
#ifdef KBASE_PM_RUNTIME
/* If scheduler is in sleeping state, then MCU needs to be activated
* to suspend CSGs.
@@ -6043,6 +6137,27 @@ int kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev)
}
exit:
return result;
}
int kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev)
{
int result = 0;
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
/* Cancel any potential queued delayed work(s) */
cancel_work_sync(&scheduler->tick_work);
cancel_tock_work(scheduler);
result = kbase_reset_gpu_prevent_and_wait(kbdev);
if (result) {
dev_warn(kbdev->dev, "Stop PM suspending for failing to prevent gpu reset.\n");
return result;
}
mutex_lock(&scheduler->lock);
result = kbase_csf_scheduler_pm_suspend_no_lock(kbdev);
mutex_unlock(&scheduler->lock);
kbase_reset_gpu_allow(kbdev);
@@ -6051,17 +6166,24 @@ exit:
}
KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_suspend);
void kbase_csf_scheduler_pm_resume(struct kbase_device *kbdev)
void kbase_csf_scheduler_pm_resume_no_lock(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
mutex_lock(&scheduler->lock);
lockdep_assert_held(&scheduler->lock);
if ((scheduler->total_runnable_grps > 0) &&
(scheduler->state == SCHED_SUSPENDED)) {
dev_info(kbdev->dev, "Scheduler PM resume");
scheduler_wakeup(kbdev, true);
}
mutex_unlock(&scheduler->lock);
}
void kbase_csf_scheduler_pm_resume(struct kbase_device *kbdev)
{
mutex_lock(&kbdev->csf.scheduler.lock);
kbase_csf_scheduler_pm_resume_no_lock(kbdev);
mutex_unlock(&kbdev->csf.scheduler.lock);
}
KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_resume);

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -409,6 +409,17 @@ void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev);
*/
int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev);
/**
* kbase_csf_scheduler_pm_resume_no_lock - Reactivate the scheduler on system resume
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
*
* This function will make the scheduler resume the scheduling of queue groups
* and take the power managemenet reference, if there are any runnable groups.
* The caller must have acquired the global Scheduler lock.
*/
void kbase_csf_scheduler_pm_resume_no_lock(struct kbase_device *kbdev);
/**
* kbase_csf_scheduler_pm_resume - Reactivate the scheduler on system resume
*
@@ -419,6 +430,19 @@ int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev);
*/
void kbase_csf_scheduler_pm_resume(struct kbase_device *kbdev);
/**
* kbase_csf_scheduler_pm_suspend_no_lock - Idle the scheduler on system suspend
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
*
* This function will make the scheduler suspend all the running queue groups
* and drop its power managemenet reference.
* The caller must have acquired the global Scheduler lock.
*
* Return: 0 on success.
*/
int kbase_csf_scheduler_pm_suspend_no_lock(struct kbase_device *kbdev);
/**
* kbase_csf_scheduler_pm_suspend - Idle the scheduler on system suspend
*
@@ -448,7 +472,7 @@ static inline bool kbase_csf_scheduler_all_csgs_idle(struct kbase_device *kbdev)
}
/**
* kbase_csf_scheduler_advance_tick_nolock() - Advance the scheduling tick
* kbase_csf_scheduler_tick_advance_nolock() - Advance the scheduling tick
*
* @kbdev: Pointer to the device
*
@@ -458,23 +482,23 @@ static inline bool kbase_csf_scheduler_all_csgs_idle(struct kbase_device *kbdev)
* The caller must hold the interrupt lock.
*/
static inline void
kbase_csf_scheduler_advance_tick_nolock(struct kbase_device *kbdev)
kbase_csf_scheduler_tick_advance_nolock(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
lockdep_assert_held(&scheduler->interrupt_lock);
if (scheduler->tick_timer_active) {
KBASE_KTRACE_ADD(kbdev, SCHEDULER_ADVANCE_TICK, NULL, 0u);
KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_ADVANCE, NULL, 0u);
scheduler->tick_timer_active = false;
queue_work(scheduler->wq, &scheduler->tick_work);
} else {
KBASE_KTRACE_ADD(kbdev, SCHEDULER_NOADVANCE_TICK, NULL, 0u);
KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_NOADVANCE, NULL, 0u);
}
}
/**
* kbase_csf_scheduler_advance_tick() - Advance the scheduling tick
* kbase_csf_scheduler_tick_advance() - Advance the scheduling tick
*
* @kbdev: Pointer to the device
*
@@ -482,13 +506,13 @@ kbase_csf_scheduler_advance_tick_nolock(struct kbase_device *kbdev)
* immediate execution, but only if the tick hrtimer is active. If the timer
* is inactive then the tick work item is already in flight.
*/
static inline void kbase_csf_scheduler_advance_tick(struct kbase_device *kbdev)
static inline void kbase_csf_scheduler_tick_advance(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
unsigned long flags;
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
kbase_csf_scheduler_advance_tick_nolock(kbdev);
kbase_csf_scheduler_tick_advance_nolock(kbdev);
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
}

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -42,67 +42,67 @@ int dummy_array[] = {
/*
* Generic CSF events
*/
KBASE_KTRACE_CODE_MAKE_CODE(EVICT_CTX_SLOTS),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_EVICT_CTX_SLOTS_START),
/* info_val[0:7] == fw version_minor
* info_val[15:8] == fw version_major
* info_val[63:32] == fw version_hash
*/
KBASE_KTRACE_CODE_MAKE_CODE(FIRMWARE_BOOT),
KBASE_KTRACE_CODE_MAKE_CODE(FIRMWARE_REBOOT),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOCK),
KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_BOOT),
KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_REBOOT),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOCK_START),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOCK_END),
/* info_val == total number of runnable groups across all kctxs */
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_START),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_END),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RESET),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RESET_START),
/* info_val = timeout in ms */
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_WAIT_PROTM_QUIT),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_PROTM_WAIT_QUIT_START),
/* info_val = remaining ms timeout, or 0 if timedout */
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_WAIT_PROTM_QUIT_DONE),
KBASE_KTRACE_CODE_MAKE_CODE(SYNC_UPDATE_EVENT),
KBASE_KTRACE_CODE_MAKE_CODE(SYNC_UPDATE_EVENT_NOTIFY_GPU),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_PROTM_WAIT_QUIT_END),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GROUP_SYNC_UPDATE_EVENT),
KBASE_KTRACE_CODE_MAKE_CODE(CSF_SYNC_UPDATE_NOTIFY_GPU_EVENT),
/* info_val = JOB_IRQ_STATUS */
KBASE_KTRACE_CODE_MAKE_CODE(CSF_INTERRUPT),
KBASE_KTRACE_CODE_MAKE_CODE(CSF_INTERRUPT_START),
/* info_val = JOB_IRQ_STATUS */
KBASE_KTRACE_CODE_MAKE_CODE(CSF_INTERRUPT_END),
/* info_val = JOB_IRQ_STATUS */
KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_PROCESS),
KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_PROCESS_START),
/* info_val = GLB_REQ ^ GLB_ACQ */
KBASE_KTRACE_CODE_MAKE_CODE(GLB_REQ_ACQ),
KBASE_KTRACE_CODE_MAKE_CODE(CSF_INTERRUPT_GLB_REQ_ACK),
/* info_val[31:0] = num non idle offslot groups
* info_val[32] = scheduler can suspend on idle
*/
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_CAN_IDLE),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_ADVANCE_TICK),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NOADVANCE_TICK),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GPU_IDLE_EVENT_CAN_SUSPEND),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_ADVANCE),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_NOADVANCE),
/* kctx is added to the back of the list */
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_INSERT_RUNNABLE),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_REMOVE_RUNNABLE),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RUNNABLE_KCTX_INSERT),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RUNNABLE_KCTX_REMOVE),
/* kctx is moved to the back of the list */
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_ROTATE_RUNNABLE),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_HEAD_RUNNABLE),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RUNNABLE_KCTX_ROTATE),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RUNNABLE_KCTX_HEAD),
KBASE_KTRACE_CODE_MAKE_CODE(IDLE_WORKER_BEGIN),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GPU_IDLE_WORKER_START),
/* 4-bit encoding of boolean values (ease of reading as hex values)
*
* info_val[3:0] = was reset active/failed to be prevented
* info_val[7:4] = whether scheduler was both idle and suspendable
* info_val[11:8] = whether all groups were suspended
*/
KBASE_KTRACE_CODE_MAKE_CODE(IDLE_WORKER_END),
KBASE_KTRACE_CODE_MAKE_CODE(GROUP_SYNC_UPDATE_WORKER_BEGIN),
KBASE_KTRACE_CODE_MAKE_CODE(GROUP_SYNC_UPDATE_WORKER_END),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GPU_IDLE_WORKER_END),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GROUP_SYNC_UPDATE_WORKER_START),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GROUP_SYNC_UPDATE_WORKER_END),
/* info_val = bitmask of slots that gave an ACK for STATUS_UPDATE */
KBASE_KTRACE_CODE_MAKE_CODE(SLOTS_STATUS_UPDATE_ACK),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_UPDATE_IDLE_SLOTS_ACK),
/* info_val[63:0] = GPU cycle counter, used mainly for benchmarking
* purpose.
*/
KBASE_KTRACE_CODE_MAKE_CODE(GPU_IDLE_HANDLING_START),
KBASE_KTRACE_CODE_MAKE_CODE(MCU_HALTED),
KBASE_KTRACE_CODE_MAKE_CODE(MCU_IN_SLEEP),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GPU_IDLE_WORKER_HANDLING_START),
KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_MCU_HALTED),
KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_MCU_SLEEP),
/*
* Group events
@@ -111,17 +111,17 @@ int dummy_array[] = {
* info_val[19:16] == as_nr
* info_val[63:32] == endpoint config (max number of endpoints allowed)
*/
KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_START),
KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_START_REQ),
/* info_val == CSG_REQ state issued */
KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_STOP),
KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_STOP_REQ),
/* info_val == CSG_ACK state */
KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_STARTED),
KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_RUNNING),
/* info_val == CSG_ACK state */
KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_STOPPED),
/* info_val == slot cleaned */
KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_CLEANED),
/* info_val = slot requesting STATUS_UPDATE */
KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_STATUS_UPDATE),
KBASE_KTRACE_CODE_MAKE_CODE(CSG_UPDATE_IDLE_SLOT_REQ),
/* info_val = scheduler's new csg_slots_idle_mask[0]
* group->csg_nr indicates which bit was set
*/
@@ -133,13 +133,13 @@ int dummy_array[] = {
*/
KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_IDLE_CLEAR),
/* info_val == previous priority */
KBASE_KTRACE_CODE_MAKE_CODE(CSG_PRIO_UPDATE),
KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_PRIO_UPDATE),
/* info_val == CSG_REQ ^ CSG_ACK */
KBASE_KTRACE_CODE_MAKE_CODE(CSG_SYNC_UPDATE_INTERRUPT),
KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_SYNC_UPDATE),
/* info_val == CSG_REQ ^ CSG_ACK */
KBASE_KTRACE_CODE_MAKE_CODE(CSG_IDLE_INTERRUPT),
KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_IDLE),
/* info_val == CSG_REQ ^ CSG_ACK */
KBASE_KTRACE_CODE_MAKE_CODE(CSG_PROGRESS_TIMER_INTERRUPT),
KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_PROGRESS_TIMER_EVENT),
/* info_val[31:0] == CSG_REQ ^ CSG_ACQ
* info_val[63:32] == CSG_IRQ_REQ ^ CSG_IRQ_ACK
*/
@@ -152,34 +152,34 @@ int dummy_array[] = {
/* info_val[31:0] == new run state of the evicted group
* info_val[63:32] == number of runnable groups
*/
KBASE_KTRACE_CODE_MAKE_CODE(GROUP_EVICT_SCHED),
KBASE_KTRACE_CODE_MAKE_CODE(GROUP_EVICT),
/* info_val == new num_runnable_grps
* group is added to the back of the list for its priority level
*/
KBASE_KTRACE_CODE_MAKE_CODE(GROUP_INSERT_RUNNABLE),
KBASE_KTRACE_CODE_MAKE_CODE(GROUP_RUNNABLE_INSERT),
/* info_val == new num_runnable_grps
*/
KBASE_KTRACE_CODE_MAKE_CODE(GROUP_REMOVE_RUNNABLE),
KBASE_KTRACE_CODE_MAKE_CODE(GROUP_RUNNABLE_REMOVE),
/* info_val == num_runnable_grps
* group is moved to the back of the list for its priority level
*/
KBASE_KTRACE_CODE_MAKE_CODE(GROUP_ROTATE_RUNNABLE),
KBASE_KTRACE_CODE_MAKE_CODE(GROUP_HEAD_RUNNABLE),
KBASE_KTRACE_CODE_MAKE_CODE(GROUP_RUNNABLE_ROTATE),
KBASE_KTRACE_CODE_MAKE_CODE(GROUP_RUNNABLE_HEAD),
/* info_val == new num_idle_wait_grps
* group is added to the back of the list
*/
KBASE_KTRACE_CODE_MAKE_CODE(GROUP_INSERT_IDLE_WAIT),
KBASE_KTRACE_CODE_MAKE_CODE(GROUP_IDLE_WAIT_INSERT),
/* info_val == new num_idle_wait_grps
* group is added to the back of the list
*/
KBASE_KTRACE_CODE_MAKE_CODE(GROUP_REMOVE_IDLE_WAIT),
KBASE_KTRACE_CODE_MAKE_CODE(GROUP_HEAD_IDLE_WAIT),
KBASE_KTRACE_CODE_MAKE_CODE(GROUP_IDLE_WAIT_REMOVE),
KBASE_KTRACE_CODE_MAKE_CODE(GROUP_IDLE_WAIT_HEAD),
/* info_val == is scheduler running with protected mode tasks */
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_CHECK_PROTM_ENTER),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_ENTER_PROTM),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_EXIT_PROTM),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_PROTM_ENTER_CHECK),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_PROTM_ENTER),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_PROTM_EXIT),
/* info_val[31:0] == number of GPU address space slots in use
* info_val[63:32] == number of runnable groups
*/
@@ -187,11 +187,11 @@ int dummy_array[] = {
/* info_val == new count of off-slot non-idle groups
* no group indicates it was set rather than incremented
*/
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NONIDLE_OFFSLOT_INC),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NONIDLE_OFFSLOT_GRP_INC),
/* info_val == new count of off-slot non-idle groups */
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NONIDLE_OFFSLOT_DEC),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC),
KBASE_KTRACE_CODE_MAKE_CODE(PROTM_EVENT_WORKER_BEGIN),
KBASE_KTRACE_CODE_MAKE_CODE(PROTM_EVENT_WORKER_START),
KBASE_KTRACE_CODE_MAKE_CODE(PROTM_EVENT_WORKER_END),
/*
@@ -201,42 +201,42 @@ int dummy_array[] = {
KBASE_KTRACE_CODE_MAKE_CODE(CSI_START),
/* info_val == queue->enabled before stop */
KBASE_KTRACE_CODE_MAKE_CODE(CSI_STOP),
KBASE_KTRACE_CODE_MAKE_CODE(CSI_STOP_REQUESTED),
KBASE_KTRACE_CODE_MAKE_CODE(CSI_STOP_REQ),
/* info_val == CS_REQ ^ CS_ACK that were not processed due to the group
* being suspended
*/
KBASE_KTRACE_CODE_MAKE_CODE(CSI_IGNORED_INTERRUPTS_GROUP_SUSPEND),
KBASE_KTRACE_CODE_MAKE_CODE(CSI_INTERRUPT_GROUP_SUSPENDS_IGNORED),
/* info_val == CS_REQ ^ CS_ACK */
KBASE_KTRACE_CODE_MAKE_CODE(CSI_FAULT_INTERRUPT),
KBASE_KTRACE_CODE_MAKE_CODE(CSI_INTERRUPT_FAULT),
/* info_val == CS_REQ ^ CS_ACK */
KBASE_KTRACE_CODE_MAKE_CODE(CSI_TILER_OOM_INTERRUPT),
KBASE_KTRACE_CODE_MAKE_CODE(CSI_INTERRUPT_TILER_OOM),
/* info_val == CS_REQ ^ CS_ACK */
KBASE_KTRACE_CODE_MAKE_CODE(CSI_PROTM_PEND_INTERRUPT),
KBASE_KTRACE_CODE_MAKE_CODE(CSI_INTERRUPT_PROTM_PEND),
/* info_val == CS_ACK_PROTM_PEND ^ CS_REQ_PROTM_PEND */
KBASE_KTRACE_CODE_MAKE_CODE(CSI_PROTM_ACK),
/* info_val == group->run_State (for group the queue is bound to) */
KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_START),
KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_STOP),
/* info_val == contents of CS_STATUS_WAIT_SYNC_POINTER */
KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE),
KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_EVAL_START),
/* info_val == bool for result of the evaluation */
KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_EVALUATED),
KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_EVAL_END),
/* info_val == contents of CS_STATUS_WAIT */
KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_STATUS_WAIT),
KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_WAIT_STATUS),
/* info_val == current sync value pointed to by queue->sync_ptr */
KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_CURRENT_VAL),
KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_CUR_VAL),
/* info_val == current value of CS_STATUS_WAIT_SYNC_VALUE */
KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_TEST_VAL),
KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_TEST_VAL),
/* info_val == current value of CS_STATUS_BLOCKED_REASON */
KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_BLOCKED_REASON),
KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_BLOCKED_REASON),
/* info_val = group's new protm_pending_bitmap[0]
* queue->csi_index indicates which bit was set
*/
KBASE_KTRACE_CODE_MAKE_CODE(PROTM_PENDING_SET),
KBASE_KTRACE_CODE_MAKE_CODE(CSI_PROTM_PEND_SET),
/* info_val = group's new protm_pending_bitmap[0]
* queue->csi_index indicates which bit was cleared
*/
KBASE_KTRACE_CODE_MAKE_CODE(PROTM_PENDING_CLEAR),
KBASE_KTRACE_CODE_MAKE_CODE(CSI_PROTM_PEND_CLEAR),
/*
* KCPU queue events
@@ -244,42 +244,42 @@ int dummy_array[] = {
/* KTrace info_val == KCPU queue fence context
* KCPU extra_info_val == N/A.
*/
KBASE_KTRACE_CODE_MAKE_CODE(KCPU_QUEUE_NEW),
KBASE_KTRACE_CODE_MAKE_CODE(KCPU_QUEUE_CREATE),
/* KTrace info_val == Number of pending commands in KCPU queue when
* it is destroyed.
* KCPU extra_info_val == Number of CQS wait operations present in
* the KCPU queue when it is destroyed.
*/
KBASE_KTRACE_CODE_MAKE_CODE(KCPU_QUEUE_DESTROY),
KBASE_KTRACE_CODE_MAKE_CODE(KCPU_QUEUE_DELETE),
/* KTrace info_val == CQS event memory address
* KCPU extra_info_val == Upper 32 bits of event memory, i.e. contents
* of error field.
*/
KBASE_KTRACE_CODE_MAKE_CODE(CQS_SET),
KBASE_KTRACE_CODE_MAKE_CODE(KCPU_CQS_SET),
/* KTrace info_val == Number of CQS objects to be waited upon
* KCPU extra_info_val == N/A.
*/
KBASE_KTRACE_CODE_MAKE_CODE(CQS_WAIT_START),
KBASE_KTRACE_CODE_MAKE_CODE(KCPU_CQS_WAIT_START),
/* KTrace info_val == CQS event memory address
* KCPU extra_info_val == 1 if CQS was signaled with an error and queue
* inherited the error, otherwise 0.
*/
KBASE_KTRACE_CODE_MAKE_CODE(CQS_WAIT_END),
KBASE_KTRACE_CODE_MAKE_CODE(KCPU_CQS_WAIT_END),
/* KTrace info_val == Fence context
* KCPU extra_info_val == Fence seqno.
*/
KBASE_KTRACE_CODE_MAKE_CODE(FENCE_SIGNAL),
KBASE_KTRACE_CODE_MAKE_CODE(KCPU_FENCE_SIGNAL),
/* KTrace info_val == Fence context
* KCPU extra_info_val == Fence seqno.
*/
KBASE_KTRACE_CODE_MAKE_CODE(FENCE_WAIT_START),
KBASE_KTRACE_CODE_MAKE_CODE(KCPU_FENCE_WAIT_START),
/* KTrace info_val == Fence context
* KCPU extra_info_val == Fence seqno.
*/
KBASE_KTRACE_CODE_MAKE_CODE(FENCE_WAIT_END),
KBASE_KTRACE_CODE_MAKE_CODE(KCPU_FENCE_WAIT_END),
#if 0 /* Dummy section to avoid breaking formatting */
};
#endif
/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -98,6 +98,9 @@ void kbasep_ktrace_add_csf(struct kbase_device *kbdev,
struct kbase_ktrace_msg *trace_msg;
struct kbase_context *kctx = NULL;
if (unlikely(!kbasep_ktrace_initialized(&kbdev->ktrace)))
return;
spin_lock_irqsave(&kbdev->ktrace.lock, irqflags);
/* Reserve and update indices */
@@ -165,6 +168,9 @@ void kbasep_ktrace_add_csf_kcpu(struct kbase_device *kbdev,
struct kbase_ktrace_msg *trace_msg;
struct kbase_context *kctx = queue->kctx;
if (unlikely(!kbasep_ktrace_initialized(&kbdev->ktrace)))
return;
spin_lock_irqsave(&kbdev->ktrace.lock, irqflags);
/* Reserve and update indices */

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -47,7 +47,7 @@
* 1.3:
* Add a lot of extra new traces. Tweak some existing scheduler related traces
* to contain extra information information/happen at slightly different times.
* SCHEDULER_EXIT_PROTM now has group information
* SCHEDULER_PROTM_EXIT now has group information
*/
#define KBASE_KTRACE_VERSION_MAJOR 1
#define KBASE_KTRACE_VERSION_MINOR 3

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -80,6 +80,9 @@ void kbasep_ktrace_add_jm(struct kbase_device *kbdev,
unsigned long irqflags;
struct kbase_ktrace_msg *trace_msg;
if (unlikely(!kbasep_ktrace_initialized(&kbdev->ktrace)))
return;
spin_lock_irqsave(&kbdev->ktrace.lock, irqflags);
/* Reserve and update indices */

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -30,37 +30,36 @@
/*
* Generic CSF events - using the common DEFINE_MALI_ADD_EVENT
*/
DEFINE_MALI_ADD_EVENT(EVICT_CTX_SLOTS);
DEFINE_MALI_ADD_EVENT(FIRMWARE_BOOT);
DEFINE_MALI_ADD_EVENT(FIRMWARE_REBOOT);
DEFINE_MALI_ADD_EVENT(SCHEDULER_TOCK);
DEFINE_MALI_ADD_EVENT(SCHEDULER_EVICT_CTX_SLOTS_START);
DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_BOOT);
DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_REBOOT);
DEFINE_MALI_ADD_EVENT(SCHEDULER_TOCK_START);
DEFINE_MALI_ADD_EVENT(SCHEDULER_TOCK_END);
DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK);
DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_START);
DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_END);
DEFINE_MALI_ADD_EVENT(SCHEDULER_RESET);
DEFINE_MALI_ADD_EVENT(SCHEDULER_WAIT_PROTM_QUIT);
DEFINE_MALI_ADD_EVENT(SCHEDULER_WAIT_PROTM_QUIT_DONE);
DEFINE_MALI_ADD_EVENT(SYNC_UPDATE_EVENT);
DEFINE_MALI_ADD_EVENT(SYNC_UPDATE_EVENT_NOTIFY_GPU);
DEFINE_MALI_ADD_EVENT(CSF_INTERRUPT);
DEFINE_MALI_ADD_EVENT(SCHEDULER_RESET_START);
DEFINE_MALI_ADD_EVENT(SCHEDULER_PROTM_WAIT_QUIT_START);
DEFINE_MALI_ADD_EVENT(SCHEDULER_PROTM_WAIT_QUIT_END);
DEFINE_MALI_ADD_EVENT(SCHEDULER_GROUP_SYNC_UPDATE_EVENT);
DEFINE_MALI_ADD_EVENT(CSF_SYNC_UPDATE_NOTIFY_GPU_EVENT);
DEFINE_MALI_ADD_EVENT(CSF_INTERRUPT_START);
DEFINE_MALI_ADD_EVENT(CSF_INTERRUPT_END);
DEFINE_MALI_ADD_EVENT(CSG_INTERRUPT_PROCESS);
DEFINE_MALI_ADD_EVENT(GLB_REQ_ACQ);
DEFINE_MALI_ADD_EVENT(SCHEDULER_CAN_IDLE);
DEFINE_MALI_ADD_EVENT(SCHEDULER_ADVANCE_TICK);
DEFINE_MALI_ADD_EVENT(SCHEDULER_NOADVANCE_TICK);
DEFINE_MALI_ADD_EVENT(SCHEDULER_INSERT_RUNNABLE);
DEFINE_MALI_ADD_EVENT(SCHEDULER_REMOVE_RUNNABLE);
DEFINE_MALI_ADD_EVENT(SCHEDULER_ROTATE_RUNNABLE);
DEFINE_MALI_ADD_EVENT(SCHEDULER_HEAD_RUNNABLE);
DEFINE_MALI_ADD_EVENT(IDLE_WORKER_BEGIN);
DEFINE_MALI_ADD_EVENT(IDLE_WORKER_END);
DEFINE_MALI_ADD_EVENT(GROUP_SYNC_UPDATE_WORKER_BEGIN);
DEFINE_MALI_ADD_EVENT(GROUP_SYNC_UPDATE_WORKER_END);
DEFINE_MALI_ADD_EVENT(SLOTS_STATUS_UPDATE_ACK);
DEFINE_MALI_ADD_EVENT(GPU_IDLE_HANDLING_START);
DEFINE_MALI_ADD_EVENT(MCU_HALTED);
DEFINE_MALI_ADD_EVENT(MCU_IN_SLEEP);
DEFINE_MALI_ADD_EVENT(CSF_INTERRUPT_GLB_REQ_ACK);
DEFINE_MALI_ADD_EVENT(SCHEDULER_GPU_IDLE_EVENT_CAN_SUSPEND);
DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_ADVANCE);
DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_NOADVANCE);
DEFINE_MALI_ADD_EVENT(SCHEDULER_RUNNABLE_KCTX_INSERT);
DEFINE_MALI_ADD_EVENT(SCHEDULER_RUNNABLE_KCTX_REMOVE);
DEFINE_MALI_ADD_EVENT(SCHEDULER_RUNNABLE_KCTX_ROTATE);
DEFINE_MALI_ADD_EVENT(SCHEDULER_RUNNABLE_KCTX_HEAD);
DEFINE_MALI_ADD_EVENT(SCHEDULER_GPU_IDLE_WORKER_START);
DEFINE_MALI_ADD_EVENT(SCHEDULER_GPU_IDLE_WORKER_END);
DEFINE_MALI_ADD_EVENT(SCHEDULER_GROUP_SYNC_UPDATE_WORKER_START);
DEFINE_MALI_ADD_EVENT(SCHEDULER_GROUP_SYNC_UPDATE_WORKER_END);
DEFINE_MALI_ADD_EVENT(SCHEDULER_UPDATE_IDLE_SLOTS_ACK);
DEFINE_MALI_ADD_EVENT(SCHEDULER_GPU_IDLE_WORKER_HANDLING_START);
DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_MCU_HALTED);
DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_MCU_SLEEP);
DECLARE_EVENT_CLASS(mali_csf_grp_q_template,
TP_PROTO(struct kbase_device *kbdev, struct kbase_queue_group *group,
@@ -130,37 +129,38 @@ DECLARE_EVENT_CLASS(mali_csf_grp_q_template,
__entry->kctx_tgid, __entry->kctx_id, __entry->group_handle, \
__entry->csg_nr, __entry->slot_prio, __entry->info_val))
DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_START);
DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_STOP);
DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_STARTED);
DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_START_REQ);
DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_STOP_REQ);
DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_RUNNING);
DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_STOPPED);
DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_CLEANED);
DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_STATUS_UPDATE);
DEFINE_MALI_CSF_GRP_EVENT(CSG_UPDATE_IDLE_SLOT_REQ);
DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_IDLE_SET);
DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_IDLE_CLEAR);
DEFINE_MALI_CSF_GRP_EVENT(CSG_PRIO_UPDATE);
DEFINE_MALI_CSF_GRP_EVENT(CSG_SYNC_UPDATE_INTERRUPT);
DEFINE_MALI_CSF_GRP_EVENT(CSG_IDLE_INTERRUPT);
DEFINE_MALI_CSF_GRP_EVENT(CSG_PROGRESS_TIMER_INTERRUPT);
DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_PRIO_UPDATE);
DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_SYNC_UPDATE);
DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_IDLE);
DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_PROGRESS_TIMER_EVENT);
DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_PROCESS_START);
DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_PROCESS_END);
DEFINE_MALI_CSF_GRP_EVENT(GROUP_SYNC_UPDATE_DONE);
DEFINE_MALI_CSF_GRP_EVENT(GROUP_DESCHEDULE);
DEFINE_MALI_CSF_GRP_EVENT(GROUP_SCHEDULE);
DEFINE_MALI_CSF_GRP_EVENT(GROUP_EVICT_SCHED);
DEFINE_MALI_CSF_GRP_EVENT(GROUP_INSERT_RUNNABLE);
DEFINE_MALI_CSF_GRP_EVENT(GROUP_REMOVE_RUNNABLE);
DEFINE_MALI_CSF_GRP_EVENT(GROUP_ROTATE_RUNNABLE);
DEFINE_MALI_CSF_GRP_EVENT(GROUP_HEAD_RUNNABLE);
DEFINE_MALI_CSF_GRP_EVENT(GROUP_INSERT_IDLE_WAIT);
DEFINE_MALI_CSF_GRP_EVENT(GROUP_REMOVE_IDLE_WAIT);
DEFINE_MALI_CSF_GRP_EVENT(GROUP_HEAD_IDLE_WAIT);
DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_CHECK_PROTM_ENTER);
DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_ENTER_PROTM);
DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_EXIT_PROTM);
DEFINE_MALI_CSF_GRP_EVENT(GROUP_EVICT);
DEFINE_MALI_CSF_GRP_EVENT(GROUP_RUNNABLE_INSERT);
DEFINE_MALI_CSF_GRP_EVENT(GROUP_RUNNABLE_REMOVE);
DEFINE_MALI_CSF_GRP_EVENT(GROUP_RUNNABLE_ROTATE);
DEFINE_MALI_CSF_GRP_EVENT(GROUP_RUNNABLE_HEAD);
DEFINE_MALI_CSF_GRP_EVENT(GROUP_IDLE_WAIT_INSERT);
DEFINE_MALI_CSF_GRP_EVENT(GROUP_IDLE_WAIT_REMOVE);
DEFINE_MALI_CSF_GRP_EVENT(GROUP_IDLE_WAIT_HEAD);
DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_PROTM_ENTER_CHECK);
DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_PROTM_ENTER);
DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_PROTM_EXIT);
DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_TOP_GRP);
DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_NONIDLE_OFFSLOT_INC);
DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_NONIDLE_OFFSLOT_DEC);
DEFINE_MALI_CSF_GRP_EVENT(PROTM_EVENT_WORKER_BEGIN);
DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_NONIDLE_OFFSLOT_GRP_INC);
DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC);
DEFINE_MALI_CSF_GRP_EVENT(PROTM_EVENT_WORKER_START);
DEFINE_MALI_CSF_GRP_EVENT(PROTM_EVENT_WORKER_END);
#undef DEFINE_MALI_CSF_GRP_EVENT
@@ -176,22 +176,22 @@ DEFINE_MALI_CSF_GRP_EVENT(PROTM_EVENT_WORKER_END);
DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_START);
DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_STOP);
DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_STOP_REQUESTED);
DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_IGNORED_INTERRUPTS_GROUP_SUSPEND);
DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_FAULT_INTERRUPT);
DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_TILER_OOM_INTERRUPT);
DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_PROTM_PEND_INTERRUPT);
DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_STOP_REQ);
DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_INTERRUPT_GROUP_SUSPENDS_IGNORED);
DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_INTERRUPT_FAULT);
DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_INTERRUPT_TILER_OOM);
DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_INTERRUPT_PROTM_PEND);
DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_PROTM_ACK);
DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_START);
DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_STOP);
DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE);
DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_EVALUATED);
DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_STATUS_WAIT);
DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_CURRENT_VAL);
DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_TEST_VAL);
DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_BLOCKED_REASON);
DEFINE_MALI_CSF_GRP_Q_EVENT(PROTM_PENDING_SET);
DEFINE_MALI_CSF_GRP_Q_EVENT(PROTM_PENDING_CLEAR);
DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_EVAL_START);
DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_EVAL_END);
DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_WAIT_STATUS);
DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_CUR_VAL);
DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_TEST_VAL);
DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_BLOCKED_REASON);
DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_PROTM_PEND_SET);
DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_PROTM_PEND_CLEAR);
#undef DEFINE_MALI_CSF_GRP_Q_EVENT
@@ -230,14 +230,14 @@ DECLARE_EVENT_CLASS(mali_csf_kcpu_queue_template,
u64 info_val1, u64 info_val2), \
TP_ARGS(queue, info_val1, info_val2))
DEFINE_MALI_CSF_KCPU_EVENT(KCPU_QUEUE_NEW);
DEFINE_MALI_CSF_KCPU_EVENT(KCPU_QUEUE_DESTROY);
DEFINE_MALI_CSF_KCPU_EVENT(CQS_SET);
DEFINE_MALI_CSF_KCPU_EVENT(CQS_WAIT_START);
DEFINE_MALI_CSF_KCPU_EVENT(CQS_WAIT_END);
DEFINE_MALI_CSF_KCPU_EVENT(FENCE_SIGNAL);
DEFINE_MALI_CSF_KCPU_EVENT(FENCE_WAIT_START);
DEFINE_MALI_CSF_KCPU_EVENT(FENCE_WAIT_END);
DEFINE_MALI_CSF_KCPU_EVENT(KCPU_QUEUE_CREATE);
DEFINE_MALI_CSF_KCPU_EVENT(KCPU_QUEUE_DELETE);
DEFINE_MALI_CSF_KCPU_EVENT(KCPU_CQS_SET);
DEFINE_MALI_CSF_KCPU_EVENT(KCPU_CQS_WAIT_START);
DEFINE_MALI_CSF_KCPU_EVENT(KCPU_CQS_WAIT_END);
DEFINE_MALI_CSF_KCPU_EVENT(KCPU_FENCE_SIGNAL);
DEFINE_MALI_CSF_KCPU_EVENT(KCPU_FENCE_WAIT_START);
DEFINE_MALI_CSF_KCPU_EVENT(KCPU_FENCE_WAIT_END);
#undef DEFINE_MALI_CSF_KCPU_EVENT

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -27,13 +27,13 @@ int kbase_ktrace_init(struct kbase_device *kbdev)
#if KBASE_KTRACE_TARGET_RBUF
struct kbase_ktrace_msg *rbuf;
spin_lock_init(&kbdev->ktrace.lock);
rbuf = kmalloc_array(KBASE_KTRACE_SIZE, sizeof(*rbuf), GFP_KERNEL);
if (!rbuf)
return -EINVAL;
kbdev->ktrace.rbuf = rbuf;
spin_lock_init(&kbdev->ktrace.lock);
#endif /* KBASE_KTRACE_TARGET_RBUF */
return 0;
}
@@ -42,6 +42,7 @@ void kbase_ktrace_term(struct kbase_device *kbdev)
{
#if KBASE_KTRACE_TARGET_RBUF
kfree(kbdev->ktrace.rbuf);
kbdev->ktrace.rbuf = NULL;
#endif /* KBASE_KTRACE_TARGET_RBUF */
}
@@ -183,6 +184,9 @@ void kbasep_ktrace_add(struct kbase_device *kbdev, enum kbase_ktrace_code code,
unsigned long irqflags;
struct kbase_ktrace_msg *trace_msg;
if (unlikely(!kbasep_ktrace_initialized(&kbdev->ktrace)))
return;
WARN_ON((flags & ~KBASE_KTRACE_FLAG_COMMON_ALL));
spin_lock_irqsave(&kbdev->ktrace.lock, irqflags);

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -81,6 +81,18 @@ void kbase_ktrace_debugfs_init(struct kbase_device *kbdev);
* KTrace target for internal ringbuffer
*/
#if KBASE_KTRACE_TARGET_RBUF
/**
* kbasep_ktrace_initialized - Check whether kbase ktrace is initialized
*
* @ktrace: ktrace of kbase device.
*
* Return: true if ktrace has been initialized.
*/
static inline bool kbasep_ktrace_initialized(struct kbase_ktrace *ktrace)
{
return ktrace->rbuf != NULL;
}
/**
* kbasep_ktrace_add - internal function to add trace to the ringbuffer.
* @kbdev: kbase device

View File

@@ -315,10 +315,10 @@ static const struct kbase_device_init dev_init[] = {
"GPU hwcnt backend creation failed" },
{ kbase_device_hwcnt_context_init, kbase_device_hwcnt_context_term,
"GPU hwcnt context initialization failed" },
{ kbase_backend_late_init, kbase_backend_late_term,
"Late backend initialization failed" },
{ kbase_csf_early_init, kbase_csf_early_term,
"Early CSF initialization failed" },
{ kbase_backend_late_init, kbase_backend_late_term,
"Late backend initialization failed" },
{ NULL, kbase_device_firmware_hwcnt_term, NULL },
{ kbase_device_debugfs_init, kbase_device_debugfs_term,
"DebugFS initialization failed" },

View File

@@ -109,8 +109,7 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value)
{
KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
WARN_ON(!kbdev->pm.backend.gpu_powered);
writel(value, kbdev->reg + offset);
@@ -127,8 +126,7 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset)
{
u32 val;
KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
WARN_ON(!kbdev->pm.backend.gpu_powered);
val = readl(kbdev->reg + offset);

View File

@@ -291,12 +291,9 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
if (err)
goto dma_set_mask_failed;
err = kbase_ktrace_init(kbdev);
if (err)
goto term_as;
err = kbase_pbha_read_dtb(kbdev);
if (err)
goto term_ktrace;
goto term_as;
init_waitqueue_head(&kbdev->cache_clean_wait);
@@ -335,8 +332,6 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
return 0;
term_ktrace:
kbase_ktrace_term(kbdev);
term_as:
kbase_device_all_as_term(kbdev);
dma_set_mask_failed:
@@ -353,9 +348,6 @@ void kbase_device_misc_term(struct kbase_device *kbdev)
#if KBASE_KTRACE_ENABLE
kbase_debug_assert_register_hook(NULL, NULL);
#endif
kbase_ktrace_term(kbdev);
kbase_device_all_as_term(kbdev);
@@ -522,10 +514,14 @@ int kbase_device_early_init(struct kbase_device *kbdev)
{
int err;
err = kbase_ktrace_init(kbdev);
if (err)
return err;
err = kbasep_platform_device_init(kbdev);
if (err)
return err;
goto ktrace_term;
err = kbase_pm_runtime_init(kbdev);
if (err)
@@ -539,7 +535,12 @@ int kbase_device_early_init(struct kbase_device *kbdev)
/* Ensure we can access the GPU registers */
kbase_pm_register_access_enable(kbdev);
/* Find out GPU properties based on the GPU feature registers */
/*
* Find out GPU properties based on the GPU feature registers.
* Note that this does not populate the few properties that depend on
* hw_features being initialized. Those are set by kbase_gpuprops_set_features
* soon after this in the init process.
*/
kbase_gpuprops_set(kbdev);
/* We're done accessing the GPU registers for now. */
@@ -562,6 +563,8 @@ fail_interrupts:
kbase_pm_runtime_term(kbdev);
fail_runtime_pm:
kbasep_platform_device_term(kbdev);
ktrace_term:
kbase_ktrace_term(kbdev);
return err;
}
@@ -578,6 +581,7 @@ void kbase_device_early_term(struct kbase_device *kbdev)
#endif /* CONFIG_MALI_ARBITER_SUPPORT */
kbase_pm_runtime_term(kbdev);
kbasep_platform_device_term(kbdev);
kbase_ktrace_term(kbdev);
}
int kbase_device_late_init(struct kbase_device *kbdev)

View File

@@ -115,6 +115,22 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset);
*/
bool kbase_is_gpu_removed(struct kbase_device *kbdev);
/**
* kbase_gpu_cache_flush_pa_range_and_busy_wait() - Start a cache physical range flush
* and busy wait
*
* @kbdev: kbase device to issue the MMU operation on.
* @phys: Starting address of the physical range to start the operation on.
* @nr_bytes: Number of bytes to work on.
* @flush_op: Flush command register value to be sent to HW
*
* Issue a cache flush physical range command, then busy wait an irq status.
* This function will clear FLUSH_PA_RANGE_COMPLETED irq mask bit
* and busy-wait the rawstat register.
*
* Return: 0 if successful or a negative error code on failure.
*/
#define kbase_gpu_cache_flush_pa_range_and_busy_wait(kbdev, phys, nr_bytes, flush_op) (0)
/**
* kbase_gpu_cache_flush_and_busy_wait - Start a cache flush and busy wait
* @kbdev: Kbase device
@@ -188,7 +204,7 @@ int kbase_gpu_wait_cache_clean_timeout(struct kbase_device *kbdev,
void kbase_gpu_cache_clean_wait_complete(struct kbase_device *kbdev);
/**
* kbase_clean_caches_done - Issue preiously queued cache clean request or
* kbase_clean_caches_done - Issue previously queued cache clean request or
* wake up the requester that issued cache clean.
* @kbdev: Kbase device
*

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2014-2016, 2018-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -27,6 +27,9 @@
#include <mali_kbase_reset_gpu.h>
#include <mmu/mali_kbase_mmu.h>
#define U64_LO_MASK ((1ULL << 32) - 1)
#define U64_HI_MASK (~U64_LO_MASK)
#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
bool kbase_is_gpu_removed(struct kbase_device *kbdev)
{
@@ -38,8 +41,9 @@ bool kbase_is_gpu_removed(struct kbase_device *kbdev)
}
#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */
static int busy_wait_cache_clean_irq(struct kbase_device *kbdev)
static int busy_wait_on_irq(struct kbase_device *kbdev, u32 irq_bit)
{
char *irq_flag_name;
/* Previously MMU-AS command was used for L2 cache flush on page-table update.
* And we're using the same max-loops count for GPU command, because amount of
* L2 cache flush overhead are same between them.
@@ -48,28 +52,42 @@ static int busy_wait_cache_clean_irq(struct kbase_device *kbdev)
/* Wait for the GPU cache clean operation to complete */
while (--max_loops &&
!(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) &
CLEAN_CACHES_COMPLETED)) {
!(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) & irq_bit)) {
;
}
/* reset gpu if time-out occurred */
if (max_loops == 0) {
switch (irq_bit) {
case CLEAN_CACHES_COMPLETED:
irq_flag_name = "CLEAN_CACHES_COMPLETED";
break;
case FLUSH_PA_RANGE_COMPLETED:
irq_flag_name = "FLUSH_PA_RANGE_COMPLETED";
break;
default:
irq_flag_name = "UNKNOWN";
break;
}
dev_err(kbdev->dev,
"CLEAN_CACHES_COMPLETED bit stuck, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n");
"Stuck waiting on %s bit, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n",
irq_flag_name);
if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu_locked(kbdev);
return -EBUSY;
}
/* Clear the interrupt CLEAN_CACHES_COMPLETED bit. */
KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, CLEAN_CACHES_COMPLETED);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR),
CLEAN_CACHES_COMPLETED);
/* Clear the interrupt bit. */
KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, irq_bit);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), irq_bit);
return 0;
}
#define kbase_gpu_cache_flush_pa_range_and_busy_wait(kbdev, phys, nr_bytes, flush_op) (0)
int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev,
u32 flush_op)
{
@@ -97,7 +115,7 @@ int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev,
irq_mask & ~CLEAN_CACHES_COMPLETED);
/* busy wait irq status to be enabled */
ret = busy_wait_cache_clean_irq(kbdev);
ret = busy_wait_on_irq(kbdev, (u32)CLEAN_CACHES_COMPLETED);
if (ret)
return ret;
@@ -118,7 +136,7 @@ int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev,
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), flush_op);
/* 3. Busy-wait irq status to be enabled. */
ret = busy_wait_cache_clean_irq(kbdev);
ret = busy_wait_on_irq(kbdev, (u32)CLEAN_CACHES_COMPLETED);
if (ret)
return ret;

View File

@@ -138,6 +138,7 @@
#define GPU_COMMAND_CODE_SET_PROTECTED_MODE 0x05 /* Places the GPU in protected mode */
#define GPU_COMMAND_CODE_FINISH_HALT 0x06 /* Halt CSF */
#define GPU_COMMAND_CODE_CLEAR_FAULT 0x07 /* Clear GPU_FAULTSTATUS and GPU_FAULTADDRESS, TODX */
#define GPU_COMMAND_CODE_FLUSH_PA_RANGE 0x08 /* Flush the GPU caches for a physical range, TITX */
/* GPU_COMMAND_RESET payloads */
@@ -161,18 +162,29 @@
#define GPU_COMMAND_TIME_ENABLE 0x01 /* Enable cycle counter */
/* GPU_COMMAND_FLUSH_CACHES payloads bits for L2 caches */
#define GPU_COMMAND_FLUSH_PAYLOAD_L2_NONE 0x000 /* No flush */
#define GPU_COMMAND_FLUSH_PAYLOAD_L2_CLEAN 0x001 /* CLN only */
#define GPU_COMMAND_FLUSH_PAYLOAD_L2_CLEAN_INVALIDATE 0x003 /* CLN + INV */
#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_NONE 0x000 /* No flush */
#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN 0x001 /* CLN only */
#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN_INVALIDATE 0x003 /* CLN + INV */
/* GPU_COMMAND_FLUSH_CACHES payloads bits for Load-store caches */
#define GPU_COMMAND_FLUSH_PAYLOAD_LSC_NONE 0x000 /* No flush */
#define GPU_COMMAND_FLUSH_PAYLOAD_LSC_CLEAN 0x010 /* CLN only */
#define GPU_COMMAND_FLUSH_PAYLOAD_LSC_CLEAN_INVALIDATE 0x030 /* CLN + INV */
#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_NONE 0x000 /* No flush */
#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN 0x010 /* CLN only */
#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN_INVALIDATE 0x030 /* CLN + INV */
/* GPU_COMMAND_FLUSH_CACHES payloads bits for Other caches */
#define GPU_COMMAND_FLUSH_PAYLOAD_OTHER_NONE 0x000 /* No flush */
#define GPU_COMMAND_FLUSH_PAYLOAD_OTHER_INVALIDATE 0x200 /* INV only */
#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_NONE 0x000 /* No flush */
#define GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_INVALIDATE 0x200 /* INV only */
/* GPU_COMMAND_FLUSH_PA_RANGE payload bits for flush modes */
#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_NONE 0x00 /* No flush */
#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN 0x01 /* CLN only */
#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_INVALIDATE 0x02 /* INV only */
#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN_INVALIDATE 0x03 /* CLN + INV */
/* GPU_COMMAND_FLUSH_PA_RANGE payload bits for which caches should be the target of the command */
#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_OTHER_CACHE 0x10 /* Other caches */
#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_LSC_CACHE 0x20 /* Load-store caches */
#define GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_L2_CACHE 0x40 /* L2 caches */
/* GPU_COMMAND command + payload */
#define GPU_COMMAND_CODE_PAYLOAD(opcode, payload) \
@@ -200,28 +212,53 @@
GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_TIME, GPU_COMMAND_TIME_DISABLE)
/* Clean and invalidate L2 cache (Equivalent to FLUSH_PT) */
#define GPU_COMMAND_CACHE_CLN_INV_L2 \
GPU_COMMAND_CODE_PAYLOAD( \
GPU_COMMAND_CODE_FLUSH_CACHES, \
(GPU_COMMAND_FLUSH_PAYLOAD_L2_CLEAN_INVALIDATE | \
GPU_COMMAND_FLUSH_PAYLOAD_LSC_NONE | \
GPU_COMMAND_FLUSH_PAYLOAD_OTHER_NONE))
#define GPU_COMMAND_CACHE_CLN_INV_L2 \
GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, \
(GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN_INVALIDATE | \
GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_NONE | \
GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_NONE))
/* Clean and invalidate L2 and LSC caches (Equivalent to FLUSH_MEM) */
#define GPU_COMMAND_CACHE_CLN_INV_L2_LSC \
GPU_COMMAND_CODE_PAYLOAD( \
GPU_COMMAND_CODE_FLUSH_CACHES, \
(GPU_COMMAND_FLUSH_PAYLOAD_L2_CLEAN_INVALIDATE | \
GPU_COMMAND_FLUSH_PAYLOAD_LSC_CLEAN_INVALIDATE | \
GPU_COMMAND_FLUSH_PAYLOAD_OTHER_NONE))
#define GPU_COMMAND_CACHE_CLN_INV_L2_LSC \
GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, \
(GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN_INVALIDATE | \
GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN_INVALIDATE | \
GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_NONE))
/* Clean and invalidate L2, LSC, and Other caches */
#define GPU_COMMAND_CACHE_CLN_INV_FULL \
GPU_COMMAND_CODE_PAYLOAD( \
GPU_COMMAND_CODE_FLUSH_CACHES, \
(GPU_COMMAND_FLUSH_PAYLOAD_L2_CLEAN_INVALIDATE | \
GPU_COMMAND_FLUSH_PAYLOAD_LSC_CLEAN_INVALIDATE | \
GPU_COMMAND_FLUSH_PAYLOAD_OTHER_INVALIDATE))
#define GPU_COMMAND_CACHE_CLN_INV_FULL \
GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, \
(GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_CLEAN_INVALIDATE | \
GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN_INVALIDATE | \
GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_INVALIDATE))
/* Clean and invalidate only LSC cache */
#define GPU_COMMAND_CACHE_CLN_INV_LSC \
GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, \
(GPU_COMMAND_FLUSH_CACHES_PAYLOAD_L2_NONE | \
GPU_COMMAND_FLUSH_CACHES_PAYLOAD_LSC_CLEAN_INVALIDATE | \
GPU_COMMAND_FLUSH_CACHES_PAYLOAD_OTHER_NONE))
/* Clean and invalidate physical range L2 cache (equivalent to FLUSH_PT) */
#define GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2 \
GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_PA_RANGE, \
(GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN_INVALIDATE | \
GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_L2_CACHE))
/* Clean and invalidate physical range L2 and LSC cache (equivalent to FLUSH_MEM) */
#define GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC \
GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_PA_RANGE, \
(GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN_INVALIDATE | \
GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_LSC_CACHE | \
GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_L2_CACHE))
/* Clean and invalidate physical range L2, LSC and Other caches */
#define GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_FULL \
GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_PA_RANGE, \
(GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_MODE_CLEAN_INVALIDATE | \
GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_OTHER_CACHE | \
GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_LSC_CACHE | \
GPU_COMMAND_FLUSH_PA_RANGE_PAYLOAD_L2_CACHE))
/* Merge cache flush commands */
#define GPU_COMMAND_FLUSH_CACHE_MERGE(cmd1, cmd2) ((cmd1) | (cmd2))
@@ -302,14 +339,16 @@
(((value) << GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT) & GPU_FAULTSTATUS_ADDRESS_VALID_MASK))
/* IRQ flags */
#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */
#define GPU_PROTECTED_FAULT (1 << 1) /* A GPU fault has occurred in protected mode */
#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */
#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */
#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */
#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */
#define DOORBELL_MIRROR (1 << 18) /* Mirrors the doorbell interrupt line to the CPU */
#define MCU_STATUS_GPU_IRQ (1 << 19) /* MCU requires attention */
#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */
#define GPU_PROTECTED_FAULT (1 << 1) /* A GPU fault has occurred in protected mode */
#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */
#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */
#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */
#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */
#define DOORBELL_MIRROR (1 << 18) /* Mirrors the doorbell interrupt line to the CPU */
#define MCU_STATUS_GPU_IRQ (1 << 19) /* MCU requires attention */
#define FLUSH_PA_RANGE_COMPLETED \
(1 << 20) /* Set when a physical range cache clean operation has completed. */
/*
* In Debug build,

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -262,19 +262,22 @@
#define GPU_COMMAND_CACHE_CLN_INV_L2 GPU_COMMAND_CLEAN_INV_CACHES
#define GPU_COMMAND_CACHE_CLN_INV_L2_LSC GPU_COMMAND_CLEAN_INV_CACHES
#define GPU_COMMAND_CACHE_CLN_INV_FULL GPU_COMMAND_CLEAN_INV_CACHES
#define GPU_COMMAND_CACHE_CLN_INV_LSC GPU_COMMAND_CLEAN_INV_CACHES
/* Merge cache flush commands */
#define GPU_COMMAND_FLUSH_CACHE_MERGE(cmd1, cmd2) \
((cmd1) > (cmd2) ? (cmd1) : (cmd2))
/* IRQ flags */
#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */
#define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */
#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */
#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */
#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */
#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when a performance count sample has completed. */
#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */
#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */
#define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */
#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */
#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */
#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */
#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when a performance count sample has completed. */
#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */
#define FLUSH_PA_RANGE_COMPLETED \
(1 << 20) /* Set when a physical range cache clean operation has completed. */
/*
* In Debug build,

View File

@@ -100,6 +100,7 @@
#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2))
#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */
#define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */
@@ -368,6 +369,11 @@
(((reg_val) & ~AS_LOCKADDR_LOCKADDR_BASE_MASK) | \
(((value) << AS_LOCKADDR_LOCKADDR_BASE_SHIFT) & \
AS_LOCKADDR_LOCKADDR_BASE_MASK))
#define AS_LOCKADDR_FLUSH_SKIP_LEVELS_SHIFT (6)
#define AS_LOCKADDR_FLUSH_SKIP_LEVELS_MASK ((0xF) << AS_LOCKADDR_FLUSH_SKIP_LEVELS_SHIFT)
#define AS_LOCKADDR_FLUSH_SKIP_LEVELS_SET(reg_val, value) \
(((reg_val) & ~AS_LOCKADDR_FLUSH_SKIP_LEVELS_MASK) | \
((value << AS_LOCKADDR_FLUSH_SKIP_LEVELS_SHIFT) & AS_LOCKADDR_FLUSH_SKIP_LEVELS_MASK))
/* GPU_STATUS values */
#define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */

View File

@@ -186,8 +186,6 @@ struct kbase_jd_atom_dependency {
static inline const struct kbase_jd_atom *
kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep)
{
KBASE_DEBUG_ASSERT(dep != NULL);
return (const struct kbase_jd_atom *)(dep->atom);
}
@@ -201,8 +199,6 @@ kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep)
static inline u8 kbase_jd_katom_dep_type(
const struct kbase_jd_atom_dependency *dep)
{
KBASE_DEBUG_ASSERT(dep != NULL);
return dep->dep_type;
}
@@ -219,8 +215,6 @@ static inline void kbase_jd_katom_dep_set(
{
struct kbase_jd_atom_dependency *dep;
KBASE_DEBUG_ASSERT(const_dep != NULL);
dep = (struct kbase_jd_atom_dependency *)const_dep;
dep->atom = a;
@@ -237,8 +231,6 @@ static inline void kbase_jd_katom_dep_clear(
{
struct kbase_jd_atom_dependency *dep;
KBASE_DEBUG_ASSERT(const_dep != NULL);
dep = (struct kbase_jd_atom_dependency *)const_dep;
dep->atom = NULL;
@@ -508,7 +500,6 @@ struct kbase_ext_res {
* BASE_JD_REQ_START_RENDERPASS set in its core requirements
* with an atom that has BASE_JD_REQ_END_RENDERPASS set.
* @jc_fragment: Set of GPU fragment job chains
* @retry_count: TODO: Not used,to be removed
*/
struct kbase_jd_atom {
struct work_struct work;
@@ -619,8 +610,6 @@ struct kbase_jd_atom {
u32 atom_flags;
int retry_count;
enum kbase_atom_gpu_rb_state gpu_rb_state;
bool need_cache_flush_cores_retained;

View File

@@ -29,6 +29,8 @@
#include "mali_kbase_js_ctx_attr.h"
#define JS_MAX_RUNNING_JOBS 8
/**
* kbasep_js_devdata_init - Initialize the Job Scheduler
* @kbdev: The kbase_device to operate on
@@ -705,8 +707,10 @@ static inline bool kbasep_js_is_submit_allowed(
bool is_allowed;
/* Ensure context really is scheduled in */
KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
if (WARN((kctx->as_nr == KBASEP_AS_NR_INVALID) || !kbase_ctx_flag(kctx, KCTX_SCHEDULED),
"%s: kctx %pK has assigned AS %d and context flag %d\n", __func__, (void *)kctx,
kctx->as_nr, atomic_read(&kctx->flags)))
return false;
test_bit = (u16) (1u << kctx->as_nr);
@@ -733,8 +737,10 @@ static inline void kbasep_js_set_submit_allowed(
u16 set_bit;
/* Ensure context really is scheduled in */
KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
if (WARN((kctx->as_nr == KBASEP_AS_NR_INVALID) || !kbase_ctx_flag(kctx, KCTX_SCHEDULED),
"%s: kctx %pK has assigned AS %d and context flag %d\n", __func__, (void *)kctx,
kctx->as_nr, atomic_read(&kctx->flags)))
return;
set_bit = (u16) (1u << kctx->as_nr);
@@ -763,8 +769,10 @@ static inline void kbasep_js_clear_submit_allowed(
u16 clear_mask;
/* Ensure context really is scheduled in */
KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
if (WARN((kctx->as_nr == KBASEP_AS_NR_INVALID) || !kbase_ctx_flag(kctx, KCTX_SCHEDULED),
"%s: kctx %pK has assigned AS %d and context flag %d\n", __func__, (void *)kctx,
kctx->as_nr, atomic_read(&kctx->flags)))
return;
clear_bit = (u16) (1u << kctx->as_nr);
clear_mask = ~clear_bit;
@@ -798,7 +806,7 @@ static inline void kbasep_js_atom_retained_state_init_invalid(
* @retained_state: where to copy
* @katom: where to copy from
*
* Copy atom state that can be made available after jd_done_nolock() is called
* Copy atom state that can be made available after kbase_jd_done_nolock() is called
* on that atom.
*/
static inline void kbasep_js_atom_retained_state_copy(
@@ -872,9 +880,6 @@ static inline void kbase_js_runpool_inc_context_count(
struct kbasep_js_device_data *js_devdata;
struct kbasep_js_kctx_info *js_kctx_info;
KBASE_DEBUG_ASSERT(kbdev != NULL);
KBASE_DEBUG_ASSERT(kctx != NULL);
js_devdata = &kbdev->js_data;
js_kctx_info = &kctx->jctx.sched_info;
@@ -882,13 +887,12 @@ static inline void kbase_js_runpool_inc_context_count(
lockdep_assert_held(&js_devdata->runpool_mutex);
/* Track total contexts */
KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX);
WARN_ON_ONCE(js_devdata->nr_all_contexts_running >= JS_MAX_RUNNING_JOBS);
++(js_devdata->nr_all_contexts_running);
if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
/* Track contexts that can submit jobs */
KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running <
S8_MAX);
WARN_ON_ONCE(js_devdata->nr_user_contexts_running >= JS_MAX_RUNNING_JOBS);
++(js_devdata->nr_user_contexts_running);
}
}
@@ -909,9 +913,6 @@ static inline void kbase_js_runpool_dec_context_count(
struct kbasep_js_device_data *js_devdata;
struct kbasep_js_kctx_info *js_kctx_info;
KBASE_DEBUG_ASSERT(kbdev != NULL);
KBASE_DEBUG_ASSERT(kctx != NULL);
js_devdata = &kbdev->js_data;
js_kctx_info = &kctx->jctx.sched_info;
@@ -920,12 +921,12 @@ static inline void kbase_js_runpool_dec_context_count(
/* Track total contexts */
--(js_devdata->nr_all_contexts_running);
KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0);
WARN_ON_ONCE(js_devdata->nr_all_contexts_running < 0);
if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
/* Track contexts that can submit jobs */
--(js_devdata->nr_user_contexts_running);
KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0);
WARN_ON_ONCE(js_devdata->nr_user_contexts_running < 0);
}
}
@@ -950,8 +951,8 @@ extern const base_jd_prio
kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT];
/**
* kbasep_js_atom_prio_to_sched_prio(): - Convert atom priority (base_jd_prio)
* to relative ordering
* kbasep_js_atom_prio_to_sched_prio - Convert atom priority (base_jd_prio)
* to relative ordering.
* @atom_prio: Priority ID to translate.
*
* Atom priority values for @ref base_jd_prio cannot be compared directly to
@@ -980,16 +981,33 @@ static inline int kbasep_js_atom_prio_to_sched_prio(base_jd_prio atom_prio)
return kbasep_js_atom_priority_to_relative[atom_prio];
}
static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(int sched_prio)
/**
* kbasep_js_sched_prio_to_atom_prio - Convert relative scheduler priority
* to atom priority (base_jd_prio).
*
* @kbdev: Device pointer
* @sched_prio: Relative scheduler priority to translate.
*
* This function will convert relative scheduler priority back into base_jd_prio
* values. It takes values which priorities are monotonically increasing
* and converts them to the corresponding base_jd_prio values. If an invalid number is
* passed in (i.e. not within the expected range) an error code is returned instead.
*
* The mapping is 1:1 and the size of the valid input range is the same as the
* size of the valid output range, i.e.
* KBASE_JS_ATOM_SCHED_PRIO_COUNT == BASE_JD_NR_PRIO_LEVELS
*
* Return: On success: a value in the inclusive range
* 0..BASE_JD_NR_PRIO_LEVELS-1. On failure: BASE_JD_PRIO_INVALID.
*/
static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(struct kbase_device *kbdev,
int sched_prio)
{
unsigned int prio_idx;
KBASE_DEBUG_ASSERT(sched_prio >= 0 &&
sched_prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT);
prio_idx = (unsigned int)sched_prio;
return kbasep_js_relative_priority_to_atom[prio_idx];
if (likely(sched_prio >= 0 && sched_prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT))
return kbasep_js_relative_priority_to_atom[sched_prio];
/* Invalid priority value if reached here */
dev_warn(kbdev->dev, "Unknown JS scheduling priority %d", sched_prio);
return BASE_JD_PRIO_INVALID;
}
/**

View File

@@ -387,7 +387,7 @@ struct kbasep_js_kctx_info {
* @sched_priority: priority
* @device_nr: Core group atom was executed on
*
* Subset of atom state that can be available after jd_done_nolock() is called
* Subset of atom state that can be available after kbase_jd_done_nolock() is called
* on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(),
* because the original atom could disappear.
*/

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -38,6 +38,7 @@ enum base_hw_feature {
BASE_HW_FEATURE_ASN_HASH,
BASE_HW_FEATURE_GPU_SLEEP,
BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
BASE_HW_FEATURE_CORE_FEATURES,
BASE_HW_FEATURE_END
};
@@ -87,6 +88,7 @@ __attribute__((unused)) static const enum base_hw_feature base_hw_features_tGOx[
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_TLS_HASHING,
BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_CORE_FEATURES,
BASE_HW_FEATURE_END
};
@@ -151,6 +153,7 @@ __attribute__((unused)) static const enum base_hw_feature base_hw_features_tGRx[
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_L2_CONFIG,
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
BASE_HW_FEATURE_CORE_FEATURES,
BASE_HW_FEATURE_END
};
@@ -159,6 +162,7 @@ __attribute__((unused)) static const enum base_hw_feature base_hw_features_tVAx[
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_L2_CONFIG,
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
BASE_HW_FEATURE_CORE_FEATURES,
BASE_HW_FEATURE_END
};
@@ -169,6 +173,7 @@ __attribute__((unused)) static const enum base_hw_feature base_hw_features_tTUx[
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
BASE_HW_FEATURE_ASN_HASH,
BASE_HW_FEATURE_GPU_SLEEP,
BASE_HW_FEATURE_CORE_FEATURES,
BASE_HW_FEATURE_END
};

View File

@@ -62,6 +62,8 @@ enum base_hw_issue {
BASE_HW_ISSUE_TURSEHW_1997,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_TURSEHW_2716,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_GPU2021PRO_290,
BASE_HW_ISSUE_END
};
@@ -599,6 +601,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tODx_r0p0
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3212,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_END
};
@@ -609,6 +612,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tOD
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3212,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_END
};
@@ -617,6 +621,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGRx_r0p0
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_END
};
@@ -626,6 +631,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGR
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_END
};
@@ -634,6 +640,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tVAx_r0p0
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_END
};
@@ -643,6 +650,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tVA
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_END
};
@@ -653,6 +661,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0
BASE_HW_ISSUE_TURSEHW_1997,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_TURSEHW_2716,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_GPU2021PRO_290,
BASE_HW_ISSUE_END
};
@@ -663,6 +673,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTU
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_TURSEHW_2716,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_GPU2021PRO_290,
BASE_HW_ISSUE_END
};
@@ -672,6 +684,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p0
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_TURSEHW_2716,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_GPU2021PRO_290,
BASE_HW_ISSUE_END
};
@@ -681,6 +695,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p1
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_TURSEHW_2716,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_GPU2021PRO_290,
BASE_HW_ISSUE_END
};

View File

@@ -82,14 +82,7 @@
#if MALI_USE_CSF
#include "csf/mali_kbase_csf.h"
#endif
#ifndef u64_to_user_ptr
/* Introduced in Linux v4.6 */
#define u64_to_user_ptr(x) ((void __user *)(uintptr_t)x)
#endif
#if MALI_USE_CSF
/* Physical memory group ID for CSF user I/O.
*/
#define KBASE_MEM_GROUP_CSF_IO BASE_MEM_GROUP_DEFAULT
@@ -265,7 +258,7 @@ void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
void kbase_jd_zap_context(struct kbase_context *kctx);
/*
* jd_done_nolock - Perform the necessary handling of an atom that has completed
* kbase_jd_done_nolock - Perform the necessary handling of an atom that has completed
* the execution.
*
* @katom: Pointer to the atom that completed the execution
@@ -281,7 +274,7 @@ void kbase_jd_zap_context(struct kbase_context *kctx);
*
* The caller must hold the kbase_jd_context.lock.
*/
bool jd_done_nolock(struct kbase_jd_atom *katom, bool post_immediately);
bool kbase_jd_done_nolock(struct kbase_jd_atom *katom, bool post_immediately);
void kbase_jd_free_external_resources(struct kbase_jd_atom *katom);
void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom);

View File

@@ -176,7 +176,7 @@ enum {
* Based on 75000ms timeout at nominal 100MHz, as is required for Android - based
* on scaling from a 50MHz GPU system.
*/
#define CSF_FIRMWARE_TIMEOUT_CYCLES (7500000000)
#define CSF_FIRMWARE_TIMEOUT_CYCLES (7500000000ull)
/* Timeout in clock cycles for GPU Power Management to reach the desired
* Shader, L2 and MCU state.
@@ -203,6 +203,12 @@ enum {
*/
#define CSF_FIRMWARE_BOOT_TIMEOUT_CYCLES (25000000)
/* Waiting timeout for a ping request to be acknowledged, in clock cycles.
*
* Based on 6000ms timeout at 100MHz, scaled from a 50MHz GPU system.
*/
#define CSF_FIRMWARE_PING_TIMEOUT_CYCLES (600000000ull)
#else /* MALI_USE_CSF */
/* A default timeout in clock cycles to be used when an invalid timeout

View File

@@ -99,6 +99,7 @@
#include <linux/compat.h> /* is_compat_task/in_compat_syscall */
#include <linux/mman.h>
#include <linux/version.h>
#include <linux/version_compat_defs.h>
#include <mali_kbase_hw.h>
#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
#include <mali_kbase_sync.h>
@@ -311,10 +312,9 @@ static int kbase_file_create_kctx(struct kbase_file *kfile,
*
* @kfile: A device file created by kbase_file_new()
*
* This function returns an error code (encoded with ERR_PTR) if no context
* has been created for the given @kfile. This makes it safe to use in
* circumstances where the order of initialization cannot be enforced, but
* only if the caller checks the return value.
* This function returns NULL if no context has been created for the given @kfile.
* This makes it safe to use in circumstances where the order of initialization
* cannot be enforced, but only if the caller checks the return value.
*
* Return: Address of the kernel base context associated with the @kfile, or
* NULL if no context exists.
@@ -502,27 +502,6 @@ void kbase_release_device(struct kbase_device *kbdev)
EXPORT_SYMBOL(kbase_release_device);
#if IS_ENABLED(CONFIG_DEBUG_FS)
#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && \
!(KERNEL_VERSION(4, 4, 28) <= LINUX_VERSION_CODE && \
KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE)
/*
* Older versions, before v4.6, of the kernel doesn't have
* kstrtobool_from_user(), except longterm 4.4.y which had it added in 4.4.28
*/
static int kstrtobool_from_user(const char __user *s, size_t count, bool *res)
{
char buf[4];
count = min(count, sizeof(buf) - 1);
if (copy_from_user(buf, s, count))
return -EFAULT;
buf[count] = '\0';
return strtobool(buf, res);
}
#endif
static ssize_t write_ctx_infinite_cache(struct file *f, const char __user *ubuf, size_t size, loff_t *off)
{
struct kbase_context *kctx = f->private_data;
@@ -634,13 +613,8 @@ static int kbase_file_create_kctx(struct kbase_file *const kfile,
kbdev = kfile->kbdev;
#if (KERNEL_VERSION(4, 6, 0) <= LINUX_VERSION_CODE)
kctx = kbase_create_context(kbdev, in_compat_syscall(),
flags, kfile->api_version, kfile->filp);
#else
kctx = kbase_create_context(kbdev, is_compat_task(),
flags, kfile->api_version, kfile->filp);
#endif /* (KERNEL_VERSION(4, 6, 0) <= LINUX_VERSION_CODE) */
/* if bad flags, will stay stuck in setup mode */
if (!kctx)
@@ -661,16 +635,8 @@ static int kbase_file_create_kctx(struct kbase_file *const kfile,
/* we don't treat this as a fail - just warn about it */
dev_warn(kbdev->dev, "couldn't create debugfs dir for kctx\n");
} else {
#if (KERNEL_VERSION(4, 7, 0) > LINUX_VERSION_CODE)
/* prevent unprivileged use of debug file system
* in old kernel version
*/
debugfs_create_file("infinite_cache", 0600, kctx->kctx_dentry,
kctx, &kbase_infinite_cache_fops);
#else
debugfs_create_file("infinite_cache", 0644, kctx->kctx_dentry,
kctx, &kbase_infinite_cache_fops);
#endif
debugfs_create_file("force_same_va", 0600, kctx->kctx_dentry,
kctx, &kbase_force_same_va_fops);
@@ -2200,18 +2166,28 @@ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, lof
}
#endif /* MALI_USE_CSF */
static unsigned int kbase_poll(struct file *filp, poll_table *wait)
static __poll_t kbase_poll(struct file *filp, poll_table *wait)
{
struct kbase_file *const kfile = filp->private_data;
struct kbase_context *const kctx =
kbase_file_get_kctx_if_setup_complete(kfile);
if (unlikely(!kctx))
if (unlikely(!kctx)) {
#if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE)
return POLLERR;
#else
return EPOLLERR;
#endif
}
poll_wait(filp, &kctx->event_queue, wait);
if (kbase_event_pending(kctx))
if (kbase_event_pending(kctx)) {
#if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE)
return POLLIN | POLLRDNORM;
#else
return EPOLLIN | EPOLLRDNORM;
#endif
}
return 0;
}
@@ -4542,7 +4518,7 @@ int power_control_init(struct kbase_device *kbdev)
}
}
if (err == -EPROBE_DEFER) {
while ((i > 0) && (i < BASE_MAX_NR_CLOCKS_REGULATORS))
while (i > 0)
regulator_put(kbdev->regulators[--i]);
return err;
}
@@ -4579,8 +4555,8 @@ int power_control_init(struct kbase_device *kbdev)
}
}
if (err == -EPROBE_DEFER) {
while ((i > 0) && (i < BASE_MAX_NR_CLOCKS_REGULATORS)) {
clk_unprepare(kbdev->clocks[--i]);
while (i > 0) {
clk_disable_unprepare(kbdev->clocks[--i]);
clk_put(kbdev->clocks[i]);
}
goto clocks_probe_defer;
@@ -4627,6 +4603,19 @@ int power_control_init(struct kbase_device *kbdev)
#endif /* CONFIG_PM_OPP */
return 0;
#if defined(CONFIG_PM_OPP) && \
((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && defined(CONFIG_REGULATOR))
for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) {
if (kbdev->clocks[i]) {
if (__clk_is_enabled(kbdev->clocks[i]))
clk_disable_unprepare(kbdev->clocks[i]);
clk_put(kbdev->clocks[i]);
kbdev->clocks[i] = NULL;
} else
break;
}
#endif
clocks_probe_defer:
#if defined(CONFIG_REGULATOR)
for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++)
@@ -4819,12 +4808,7 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev)
/* prevent unprivileged use of debug file system
* in old kernel version
*/
#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE)
/* only for newer kernel version debug file system is safe */
const mode_t mode = 0644;
#else
const mode_t mode = 0600;
#endif
kbdev->mali_debugfs_directory = debugfs_create_dir(kbdev->devname,
NULL);
@@ -4930,6 +4914,7 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev)
#endif
kbase_dvfs_status_debugfs_init(kbdev);
return 0;
out:
@@ -5126,10 +5111,11 @@ static ssize_t fw_timeout_store(struct device *dev,
ret = kstrtouint(buf, 0, &fw_timeout);
if (ret || fw_timeout == 0) {
dev_err(kbdev->dev, "%s\n%s\n%u",
"Couldn't process fw_timeout write operation.",
"Use format 'fw_timeout_ms', and fw_timeout_ms > 0",
FIRMWARE_PING_INTERVAL_MS);
dev_err(kbdev->dev,
"Couldn't process fw_timeout write operation.\n"
"Use format 'fw_timeout_ms', and fw_timeout_ms > 0\n"
"Default fw_timeout: %u",
kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_PING_TIMEOUT));
return -EINVAL;
}

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2012-2016, 2018-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2012-2016, 2018-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -87,8 +87,7 @@ static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx)
static int wait_for_job_fault(struct kbase_device *kbdev)
{
#if KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE && \
KERNEL_VERSION(4, 15, 0) > LINUX_VERSION_CODE
#if KERNEL_VERSION(4, 15, 0) > LINUX_VERSION_CODE
int ret = wait_event_interruptible_timeout(kbdev->job_fault_wq,
kbase_is_job_fault_event_pending(kbdev),
msecs_to_jiffies(2000));

View File

@@ -579,7 +579,7 @@ struct kbase_mmu_mode {
int (*pte_is_valid)(u64 pte, int level);
void (*entry_set_ate)(u64 *entry, struct tagged_addr phy,
unsigned long flags, int level);
void (*entry_set_pte)(u64 *pgd, u64 vpfn, phys_addr_t phy);
void (*entry_set_pte)(u64 *entry, phys_addr_t phy);
void (*entry_invalidate)(u64 *entry);
unsigned int (*get_num_valid_entries)(u64 *pgd);
void (*set_num_valid_entries)(u64 *pgd,
@@ -1154,11 +1154,8 @@ struct kbase_device {
#endif
bool poweroff_pending;
#if (KERNEL_VERSION(4, 4, 0) <= LINUX_VERSION_CODE)
bool infinite_cache_active_default;
#else
u32 infinite_cache_active_default;
#endif
struct kbase_mem_pool_group_config mem_pool_defaults;
u32 current_gpu_coherency_mode;

View File

@@ -161,7 +161,7 @@ kbase_dma_fence_cancel_atom(struct kbase_jd_atom *katom)
if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) {
/* Wait was cancelled - zap the atom */
katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
if (jd_done_nolock(katom, true))
if (kbase_jd_done_nolock(katom, true))
kbase_js_sched_all(katom->kctx->kbdev);
}
}
@@ -193,10 +193,10 @@ kbase_dma_fence_work(struct work_struct *pwork)
kbase_fence_free_callbacks(katom);
/*
* Queue atom on GPU, unless it has already completed due to a failing
* dependency. Run jd_done_nolock() on the katom if it is completed.
* dependency. Run kbase_jd_done_nolock() on the katom if it is completed.
*/
if (unlikely(katom->status == KBASE_JD_ATOM_STATE_COMPLETED))
jd_done_nolock(katom, true);
kbase_jd_done_nolock(katom, true);
else
kbase_jd_dep_clear_locked(katom);

View File

@@ -105,7 +105,7 @@ void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx);
* This function cancels all dma-buf fence callbacks on @katom, but does not
* cancel the katom itself.
*
* The caller is responsible for ensuring that jd_done_nolock is called on
* The caller is responsible for ensuring that kbase_jd_done_nolock is called on
* @katom.
*
* Locking: jctx.lock must be held when calling this function.

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -68,11 +68,7 @@ static const struct file_operations kbasep_dvfs_utilization_debugfs_fops = {
void kbase_dvfs_status_debugfs_init(struct kbase_device *kbdev)
{
struct dentry *file;
#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE)
const mode_t mode = 0444;
#else
const mode_t mode = 0400;
#endif
if (WARN_ON(!kbdev || IS_ERR_OR_NULL(kbdev->mali_debugfs_directory)))
return;

View File

@@ -271,6 +271,16 @@ bool kbase_fence_free_callbacks(struct kbase_jd_atom *katom);
#endif /* !MALI_USE_CSF */
/**
* kbase_fence_get() - Retrieve fence for a KCPUQ fence command.
* @fence_info: KCPUQ fence command
*
* A ref will be taken for the fence, so use @kbase_fence_put() to release it
*
* Return: The fence, or NULL if there is no fence for KCPUQ fence command
*/
#define kbase_fence_get(fence_info) dma_fence_get((fence_info)->fence)
/**
* kbase_fence_put() - Releases a reference to a fence
* @fence: Fence to release reference for.

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -69,9 +69,11 @@ kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size)
}
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
extern const struct fence_ops kbase_fence_ops; /* silence checker warning */
const struct fence_ops kbase_fence_ops = {
.wait = fence_default_wait,
#else
extern const struct dma_fence_ops kbase_fence_ops; /* silence checker warning */
const struct dma_fence_ops kbase_fence_ops = {
.wait = dma_fence_default_wait,
#endif

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -198,7 +198,6 @@ static int kbase_gpuprops_get_props(struct base_gpu_props * const gpu_props,
gpu_props->raw_props.mem_features = regdump.mem_features;
gpu_props->raw_props.mmu_features = regdump.mmu_features;
gpu_props->raw_props.l2_features = regdump.l2_features;
gpu_props->raw_props.core_features = regdump.core_features;
gpu_props->raw_props.as_present = regdump.as_present;
gpu_props->raw_props.js_present = regdump.js_present;
@@ -324,9 +323,6 @@ static void kbase_gpuprops_calculate_props(
totalram_pages() << PAGE_SHIFT;
#endif
gpu_props->core_props.num_exec_engines =
KBASE_UBFX32(gpu_props->raw_props.core_features, 0, 4);
for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i];
@@ -507,6 +503,21 @@ int kbase_gpuprops_set_features(struct kbase_device *kbdev)
if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_THREAD_GROUP_SPLIT))
gpu_props->thread_props.max_thread_group_split = 0;
/*
* The CORE_FEATURES register has different meanings depending on GPU.
* On tGOx, bits[3:0] encode num_exec_engines.
* On CSF GPUs, bits[7:0] is an enumeration that needs to be parsed,
* instead.
* GPUs like tTIx have additional fields like LSC_SIZE that are
* otherwise reserved/RAZ on older GPUs.
*/
gpu_props->raw_props.core_features = regdump.core_features;
#if !MALI_USE_CSF
gpu_props->core_props.num_exec_engines =
KBASE_UBFX32(gpu_props->raw_props.core_features, 0, 4);
#endif
return err;
}
@@ -694,94 +705,102 @@ static struct {
#define PROP(name, member) \
{KBASE_GPUPROP_ ## name, offsetof(struct base_gpu_props, member), \
sizeof(((struct base_gpu_props *)0)->member)}
PROP(PRODUCT_ID, core_props.product_id),
PROP(VERSION_STATUS, core_props.version_status),
PROP(MINOR_REVISION, core_props.minor_revision),
PROP(MAJOR_REVISION, core_props.major_revision),
PROP(GPU_FREQ_KHZ_MAX, core_props.gpu_freq_khz_max),
PROP(LOG2_PROGRAM_COUNTER_SIZE, core_props.log2_program_counter_size),
PROP(TEXTURE_FEATURES_0, core_props.texture_features[0]),
PROP(TEXTURE_FEATURES_1, core_props.texture_features[1]),
PROP(TEXTURE_FEATURES_2, core_props.texture_features[2]),
PROP(TEXTURE_FEATURES_3, core_props.texture_features[3]),
PROP(GPU_AVAILABLE_MEMORY_SIZE, core_props.gpu_available_memory_size),
PROP(NUM_EXEC_ENGINES, core_props.num_exec_engines),
#define BACKWARDS_COMPAT_PROP(name, type) \
{ \
KBASE_GPUPROP_##name, SIZE_MAX, sizeof(type) \
}
PROP(PRODUCT_ID, core_props.product_id),
PROP(VERSION_STATUS, core_props.version_status),
PROP(MINOR_REVISION, core_props.minor_revision),
PROP(MAJOR_REVISION, core_props.major_revision),
PROP(GPU_FREQ_KHZ_MAX, core_props.gpu_freq_khz_max),
PROP(LOG2_PROGRAM_COUNTER_SIZE, core_props.log2_program_counter_size),
PROP(TEXTURE_FEATURES_0, core_props.texture_features[0]),
PROP(TEXTURE_FEATURES_1, core_props.texture_features[1]),
PROP(TEXTURE_FEATURES_2, core_props.texture_features[2]),
PROP(TEXTURE_FEATURES_3, core_props.texture_features[3]),
PROP(GPU_AVAILABLE_MEMORY_SIZE, core_props.gpu_available_memory_size),
PROP(L2_LOG2_LINE_SIZE, l2_props.log2_line_size),
PROP(L2_LOG2_CACHE_SIZE, l2_props.log2_cache_size),
PROP(L2_NUM_L2_SLICES, l2_props.num_l2_slices),
#if MALI_USE_CSF
BACKWARDS_COMPAT_PROP(NUM_EXEC_ENGINES, u8),
#else
PROP(NUM_EXEC_ENGINES, core_props.num_exec_engines),
#endif
PROP(TILER_BIN_SIZE_BYTES, tiler_props.bin_size_bytes),
PROP(TILER_MAX_ACTIVE_LEVELS, tiler_props.max_active_levels),
PROP(L2_LOG2_LINE_SIZE, l2_props.log2_line_size),
PROP(L2_LOG2_CACHE_SIZE, l2_props.log2_cache_size),
PROP(L2_NUM_L2_SLICES, l2_props.num_l2_slices),
PROP(MAX_THREADS, thread_props.max_threads),
PROP(MAX_WORKGROUP_SIZE, thread_props.max_workgroup_size),
PROP(MAX_BARRIER_SIZE, thread_props.max_barrier_size),
PROP(MAX_REGISTERS, thread_props.max_registers),
PROP(MAX_TASK_QUEUE, thread_props.max_task_queue),
PROP(MAX_THREAD_GROUP_SPLIT, thread_props.max_thread_group_split),
PROP(IMPL_TECH, thread_props.impl_tech),
PROP(TLS_ALLOC, thread_props.tls_alloc),
PROP(TILER_BIN_SIZE_BYTES, tiler_props.bin_size_bytes),
PROP(TILER_MAX_ACTIVE_LEVELS, tiler_props.max_active_levels),
PROP(RAW_SHADER_PRESENT, raw_props.shader_present),
PROP(RAW_TILER_PRESENT, raw_props.tiler_present),
PROP(RAW_L2_PRESENT, raw_props.l2_present),
PROP(RAW_STACK_PRESENT, raw_props.stack_present),
PROP(RAW_L2_FEATURES, raw_props.l2_features),
PROP(RAW_CORE_FEATURES, raw_props.core_features),
PROP(RAW_MEM_FEATURES, raw_props.mem_features),
PROP(RAW_MMU_FEATURES, raw_props.mmu_features),
PROP(RAW_AS_PRESENT, raw_props.as_present),
PROP(RAW_JS_PRESENT, raw_props.js_present),
PROP(RAW_JS_FEATURES_0, raw_props.js_features[0]),
PROP(RAW_JS_FEATURES_1, raw_props.js_features[1]),
PROP(RAW_JS_FEATURES_2, raw_props.js_features[2]),
PROP(RAW_JS_FEATURES_3, raw_props.js_features[3]),
PROP(RAW_JS_FEATURES_4, raw_props.js_features[4]),
PROP(RAW_JS_FEATURES_5, raw_props.js_features[5]),
PROP(RAW_JS_FEATURES_6, raw_props.js_features[6]),
PROP(RAW_JS_FEATURES_7, raw_props.js_features[7]),
PROP(RAW_JS_FEATURES_8, raw_props.js_features[8]),
PROP(RAW_JS_FEATURES_9, raw_props.js_features[9]),
PROP(RAW_JS_FEATURES_10, raw_props.js_features[10]),
PROP(RAW_JS_FEATURES_11, raw_props.js_features[11]),
PROP(RAW_JS_FEATURES_12, raw_props.js_features[12]),
PROP(RAW_JS_FEATURES_13, raw_props.js_features[13]),
PROP(RAW_JS_FEATURES_14, raw_props.js_features[14]),
PROP(RAW_JS_FEATURES_15, raw_props.js_features[15]),
PROP(RAW_TILER_FEATURES, raw_props.tiler_features),
PROP(RAW_TEXTURE_FEATURES_0, raw_props.texture_features[0]),
PROP(RAW_TEXTURE_FEATURES_1, raw_props.texture_features[1]),
PROP(RAW_TEXTURE_FEATURES_2, raw_props.texture_features[2]),
PROP(RAW_TEXTURE_FEATURES_3, raw_props.texture_features[3]),
PROP(RAW_GPU_ID, raw_props.gpu_id),
PROP(RAW_THREAD_MAX_THREADS, raw_props.thread_max_threads),
PROP(RAW_THREAD_MAX_WORKGROUP_SIZE,
raw_props.thread_max_workgroup_size),
PROP(MAX_THREADS, thread_props.max_threads),
PROP(MAX_WORKGROUP_SIZE, thread_props.max_workgroup_size),
PROP(MAX_BARRIER_SIZE, thread_props.max_barrier_size),
PROP(MAX_REGISTERS, thread_props.max_registers),
PROP(MAX_TASK_QUEUE, thread_props.max_task_queue),
PROP(MAX_THREAD_GROUP_SPLIT, thread_props.max_thread_group_split),
PROP(IMPL_TECH, thread_props.impl_tech),
PROP(TLS_ALLOC, thread_props.tls_alloc),
PROP(RAW_SHADER_PRESENT, raw_props.shader_present),
PROP(RAW_TILER_PRESENT, raw_props.tiler_present),
PROP(RAW_L2_PRESENT, raw_props.l2_present),
PROP(RAW_STACK_PRESENT, raw_props.stack_present),
PROP(RAW_L2_FEATURES, raw_props.l2_features),
PROP(RAW_CORE_FEATURES, raw_props.core_features),
PROP(RAW_MEM_FEATURES, raw_props.mem_features),
PROP(RAW_MMU_FEATURES, raw_props.mmu_features),
PROP(RAW_AS_PRESENT, raw_props.as_present),
PROP(RAW_JS_PRESENT, raw_props.js_present),
PROP(RAW_JS_FEATURES_0, raw_props.js_features[0]),
PROP(RAW_JS_FEATURES_1, raw_props.js_features[1]),
PROP(RAW_JS_FEATURES_2, raw_props.js_features[2]),
PROP(RAW_JS_FEATURES_3, raw_props.js_features[3]),
PROP(RAW_JS_FEATURES_4, raw_props.js_features[4]),
PROP(RAW_JS_FEATURES_5, raw_props.js_features[5]),
PROP(RAW_JS_FEATURES_6, raw_props.js_features[6]),
PROP(RAW_JS_FEATURES_7, raw_props.js_features[7]),
PROP(RAW_JS_FEATURES_8, raw_props.js_features[8]),
PROP(RAW_JS_FEATURES_9, raw_props.js_features[9]),
PROP(RAW_JS_FEATURES_10, raw_props.js_features[10]),
PROP(RAW_JS_FEATURES_11, raw_props.js_features[11]),
PROP(RAW_JS_FEATURES_12, raw_props.js_features[12]),
PROP(RAW_JS_FEATURES_13, raw_props.js_features[13]),
PROP(RAW_JS_FEATURES_14, raw_props.js_features[14]),
PROP(RAW_JS_FEATURES_15, raw_props.js_features[15]),
PROP(RAW_TILER_FEATURES, raw_props.tiler_features),
PROP(RAW_TEXTURE_FEATURES_0, raw_props.texture_features[0]),
PROP(RAW_TEXTURE_FEATURES_1, raw_props.texture_features[1]),
PROP(RAW_TEXTURE_FEATURES_2, raw_props.texture_features[2]),
PROP(RAW_TEXTURE_FEATURES_3, raw_props.texture_features[3]),
PROP(RAW_GPU_ID, raw_props.gpu_id),
PROP(RAW_THREAD_MAX_THREADS, raw_props.thread_max_threads),
PROP(RAW_THREAD_MAX_WORKGROUP_SIZE, raw_props.thread_max_workgroup_size),
PROP(RAW_THREAD_MAX_BARRIER_SIZE, raw_props.thread_max_barrier_size),
PROP(RAW_THREAD_FEATURES, raw_props.thread_features),
PROP(RAW_COHERENCY_MODE, raw_props.coherency_mode),
PROP(RAW_THREAD_TLS_ALLOC, raw_props.thread_tls_alloc),
PROP(RAW_GPU_FEATURES, raw_props.gpu_features),
PROP(COHERENCY_NUM_GROUPS, coherency_info.num_groups),
PROP(COHERENCY_NUM_CORE_GROUPS, coherency_info.num_core_groups),
PROP(COHERENCY_COHERENCY, coherency_info.coherency),
PROP(COHERENCY_GROUP_0, coherency_info.group[0].core_mask),
PROP(COHERENCY_GROUP_1, coherency_info.group[1].core_mask),
PROP(COHERENCY_GROUP_2, coherency_info.group[2].core_mask),
PROP(COHERENCY_GROUP_3, coherency_info.group[3].core_mask),
PROP(COHERENCY_GROUP_4, coherency_info.group[4].core_mask),
PROP(COHERENCY_GROUP_5, coherency_info.group[5].core_mask),
PROP(COHERENCY_GROUP_6, coherency_info.group[6].core_mask),
PROP(COHERENCY_GROUP_7, coherency_info.group[7].core_mask),
PROP(COHERENCY_GROUP_8, coherency_info.group[8].core_mask),
PROP(COHERENCY_GROUP_9, coherency_info.group[9].core_mask),
PROP(COHERENCY_GROUP_10, coherency_info.group[10].core_mask),
PROP(COHERENCY_GROUP_11, coherency_info.group[11].core_mask),
PROP(COHERENCY_GROUP_12, coherency_info.group[12].core_mask),
PROP(COHERENCY_GROUP_13, coherency_info.group[13].core_mask),
PROP(COHERENCY_GROUP_14, coherency_info.group[14].core_mask),
PROP(COHERENCY_GROUP_15, coherency_info.group[15].core_mask),
PROP(RAW_THREAD_FEATURES, raw_props.thread_features),
PROP(RAW_COHERENCY_MODE, raw_props.coherency_mode),
PROP(RAW_THREAD_TLS_ALLOC, raw_props.thread_tls_alloc),
PROP(RAW_GPU_FEATURES, raw_props.gpu_features),
PROP(COHERENCY_NUM_GROUPS, coherency_info.num_groups),
PROP(COHERENCY_NUM_CORE_GROUPS, coherency_info.num_core_groups),
PROP(COHERENCY_COHERENCY, coherency_info.coherency),
PROP(COHERENCY_GROUP_0, coherency_info.group[0].core_mask),
PROP(COHERENCY_GROUP_1, coherency_info.group[1].core_mask),
PROP(COHERENCY_GROUP_2, coherency_info.group[2].core_mask),
PROP(COHERENCY_GROUP_3, coherency_info.group[3].core_mask),
PROP(COHERENCY_GROUP_4, coherency_info.group[4].core_mask),
PROP(COHERENCY_GROUP_5, coherency_info.group[5].core_mask),
PROP(COHERENCY_GROUP_6, coherency_info.group[6].core_mask),
PROP(COHERENCY_GROUP_7, coherency_info.group[7].core_mask),
PROP(COHERENCY_GROUP_8, coherency_info.group[8].core_mask),
PROP(COHERENCY_GROUP_9, coherency_info.group[9].core_mask),
PROP(COHERENCY_GROUP_10, coherency_info.group[10].core_mask),
PROP(COHERENCY_GROUP_11, coherency_info.group[11].core_mask),
PROP(COHERENCY_GROUP_12, coherency_info.group[12].core_mask),
PROP(COHERENCY_GROUP_13, coherency_info.group[13].core_mask),
PROP(COHERENCY_GROUP_14, coherency_info.group[14].core_mask),
PROP(COHERENCY_GROUP_15, coherency_info.group[15].core_mask),
#undef PROP
};
@@ -818,7 +837,14 @@ int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev)
for (i = 0; i < count; i++) {
u32 type = gpu_property_mapping[i].type;
u8 type_size;
void *field = ((u8 *)props) + gpu_property_mapping[i].offset;
const size_t offset = gpu_property_mapping[i].offset;
const u64 dummy_backwards_compat_value = (u64)0;
const void *field;
if (likely(offset < sizeof(struct base_gpu_props)))
field = ((const u8 *)props) + offset;
else
field = &dummy_backwards_compat_value;
switch (gpu_property_mapping[i].size) {
case 1:
@@ -844,16 +870,16 @@ int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev)
switch (type_size) {
case KBASE_GPUPROP_VALUE_SIZE_U8:
WRITE_U8(*((u8 *)field));
WRITE_U8(*((const u8 *)field));
break;
case KBASE_GPUPROP_VALUE_SIZE_U16:
WRITE_U16(*((u16 *)field));
WRITE_U16(*((const u16 *)field));
break;
case KBASE_GPUPROP_VALUE_SIZE_U32:
WRITE_U32(*((u32 *)field));
WRITE_U32(*((const u32 *)field));
break;
case KBASE_GPUPROP_VALUE_SIZE_U64:
WRITE_U64(*((u64 *)field));
WRITE_U64(*((const u64 *)field));
break;
default: /* Cannot be reached */
WARN_ON(1);

View File

@@ -173,23 +173,29 @@ struct kbase_hwcnt_backend_csf_info {
/**
* struct kbase_hwcnt_csf_physical_layout - HWC sample memory physical layout
* information.
* @hw_block_cnt: Total number of hardware counters blocks. The hw counters blocks are
* sub-categorized into 4 classes: front-end, tiler, memory system, and shader.
* hw_block_cnt = fe_cnt + tiler_cnt + mmu_l2_cnt + shader_cnt.
* @fe_cnt: Front end block count.
* @tiler_cnt: Tiler block count.
* @mmu_l2_cnt: Memory system(MMU and L2 cache) block count.
* @mmu_l2_cnt: Memory system (MMU and L2 cache) block count.
* @shader_cnt: Shader Core block count.
* @block_cnt: Total block count (sum of all other block counts).
* @fw_block_cnt: Total number of firmware counters blocks.
* @block_cnt: Total block count (sum of all counter blocks: hw_block_cnt + fw_block_cnt).
* @shader_avail_mask: Bitmap of all shader cores in the system.
* @enable_mask_offset: Offset in array elements of enable mask in each block
* starting from the beginning of block.
* @headers_per_block: Header size per block.
* @counters_per_block: Counters size per block.
* @values_per_block: Total size per block.
* @headers_per_block: For any block, the number of counters designated as block's header.
* @counters_per_block: For any block, the number of counters designated as block's payload.
* @values_per_block: For any block, the number of counters in total (header + payload).
*/
struct kbase_hwcnt_csf_physical_layout {
u8 hw_block_cnt;
u8 fe_cnt;
u8 tiler_cnt;
u8 mmu_l2_cnt;
u8 shader_cnt;
u8 fw_block_cnt;
u8 block_cnt;
u64 shader_avail_mask;
size_t enable_mask_offset;
@@ -366,29 +372,38 @@ static void kbasep_hwcnt_backend_csf_init_layout(
const struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info,
struct kbase_hwcnt_csf_physical_layout *phys_layout)
{
u8 shader_core_cnt;
size_t shader_core_cnt;
size_t values_per_block;
size_t fw_blocks_count;
size_t hw_blocks_count;
WARN_ON(!prfcnt_info);
WARN_ON(!phys_layout);
shader_core_cnt = fls64(prfcnt_info->core_mask);
values_per_block =
prfcnt_info->prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES;
values_per_block = prfcnt_info->prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES;
fw_blocks_count = div_u64(prfcnt_info->prfcnt_fw_size, prfcnt_info->prfcnt_block_size);
hw_blocks_count = div_u64(prfcnt_info->prfcnt_hw_size, prfcnt_info->prfcnt_block_size);
/* The number of hardware counters reported by the GPU matches the legacy guess-work we
* have done in the past
*/
WARN_ON(hw_blocks_count != KBASE_HWCNT_V5_FE_BLOCK_COUNT +
KBASE_HWCNT_V5_TILER_BLOCK_COUNT +
prfcnt_info->l2_count + shader_core_cnt);
*phys_layout = (struct kbase_hwcnt_csf_physical_layout){
.fe_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT,
.tiler_cnt = KBASE_HWCNT_V5_TILER_BLOCK_COUNT,
.mmu_l2_cnt = prfcnt_info->l2_count,
.shader_cnt = shader_core_cnt,
.block_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT +
KBASE_HWCNT_V5_TILER_BLOCK_COUNT +
prfcnt_info->l2_count + shader_core_cnt,
.fw_block_cnt = fw_blocks_count,
.hw_block_cnt = hw_blocks_count,
.block_cnt = fw_blocks_count + hw_blocks_count,
.shader_avail_mask = prfcnt_info->core_mask,
.headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
.values_per_block = values_per_block,
.counters_per_block =
values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
.counters_per_block = values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
.enable_mask_offset = KBASE_HWCNT_V5_PRFCNT_EN_HEADER,
};
}
@@ -463,7 +478,15 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample(
u64 *acc_block = accum_buf;
const size_t values_per_block = phys_layout->values_per_block;
for (block_idx = 0; block_idx < phys_layout->block_cnt; block_idx++) {
/* Performance counter blocks for firmware are stored before blocks for hardware.
* We skip over the firmware's performance counter blocks (counters dumping is not
* supported for firmware blocks, only hardware ones).
*/
old_block += values_per_block * phys_layout->fw_block_cnt;
new_block += values_per_block * phys_layout->fw_block_cnt;
for (block_idx = phys_layout->fw_block_cnt; block_idx < phys_layout->block_cnt;
block_idx++) {
const u32 old_enable_mask =
old_block[phys_layout->enable_mask_offset];
const u32 new_enable_mask =
@@ -551,8 +574,8 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample(
old_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
WARN_ON(new_block !=
new_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
WARN_ON(acc_block !=
accum_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
WARN_ON(acc_block != accum_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES) -
(values_per_block * phys_layout->fw_block_cnt));
(void)dump_bytes;
}
@@ -1942,7 +1965,6 @@ void kbase_hwcnt_backend_csf_on_prfcnt_disable(
int kbase_hwcnt_backend_csf_metadata_init(
struct kbase_hwcnt_backend_interface *iface)
{
int errcode;
struct kbase_hwcnt_backend_csf_info *csf_info;
struct kbase_hwcnt_gpu_info gpu_info;
@@ -1968,19 +1990,8 @@ int kbase_hwcnt_backend_csf_metadata_init(
gpu_info.prfcnt_values_per_block =
csf_info->prfcnt_info.prfcnt_block_size /
KBASE_HWCNT_VALUE_HW_BYTES;
errcode = kbase_hwcnt_csf_metadata_create(
&gpu_info, csf_info->counter_set, &csf_info->metadata);
if (errcode)
return errcode;
/*
* Dump abstraction size should be exactly twice the size and layout as
* the physical dump size since 64-bit per value used in metadata.
*/
WARN_ON(csf_info->prfcnt_info.dump_bytes * 2 !=
csf_info->metadata->dump_buf_bytes);
return 0;
return kbase_hwcnt_csf_metadata_create(&gpu_info, csf_info->counter_set,
&csf_info->metadata);
}
void kbase_hwcnt_backend_csf_metadata_term(

View File

@@ -55,8 +55,12 @@ struct kbase_hwcnt_backend_csf_if_enable {
/**
* struct kbase_hwcnt_backend_csf_if_prfcnt_info - Performance counter
* information.
* @prfcnt_hw_size: Total length in bytes of all the hardware counters data. The hardware
* counters are sub-divided into 4 classes: front-end, shader, tiler, and
* memory system (l2 cache + MMU).
* @prfcnt_fw_size: Total length in bytes of all the firmware counters data.
* @dump_bytes: Bytes of GPU memory required to perform a performance
* counter dump.
* counter dump. dump_bytes = prfcnt_hw_size + prfcnt_fw_size.
* @prfcnt_block_size: Bytes of each performance counter block.
* @l2_count: The MMU L2 cache count.
* @core_mask: Shader core mask.
@@ -65,6 +69,8 @@ struct kbase_hwcnt_backend_csf_if_enable {
* is taken.
*/
struct kbase_hwcnt_backend_csf_if_prfcnt_info {
size_t prfcnt_hw_size;
size_t prfcnt_fw_size;
size_t dump_bytes;
size_t prfcnt_block_size;
size_t l2_count;

View File

@@ -221,30 +221,29 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info)
{
#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
size_t dummy_model_blk_count;
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
prfcnt_info->l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS;
prfcnt_info->core_mask =
(1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1;
/* 1 FE block + 1 Tiler block + l2_count blocks + shader_core blocks */
dummy_model_blk_count =
2 + prfcnt_info->l2_count + fls64(prfcnt_info->core_mask);
prfcnt_info->dump_bytes =
dummy_model_blk_count * KBASE_DUMMY_MODEL_BLOCK_SIZE;
prfcnt_info->prfcnt_block_size =
KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK *
KBASE_HWCNT_VALUE_HW_BYTES;
prfcnt_info->clk_cnt = 1;
prfcnt_info->clearing_samples = true;
*prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){
.l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS,
.core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1,
.prfcnt_hw_size =
KBASE_DUMMY_MODEL_MAX_NUM_HARDWARE_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE,
.prfcnt_fw_size =
KBASE_DUMMY_MODEL_MAX_FIRMWARE_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE,
.dump_bytes = KBASE_DUMMY_MODEL_MAX_SAMPLE_SIZE,
.prfcnt_block_size = KBASE_DUMMY_MODEL_BLOCK_SIZE,
.clk_cnt = 1,
.clearing_samples = true,
};
fw_ctx->buf_bytes = prfcnt_info->dump_bytes;
#else
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
struct kbase_device *kbdev;
u32 prfcnt_size;
u32 prfcnt_hw_size = 0;
u32 prfcnt_fw_size = 0;
u32 prfcnt_hw_size;
u32 prfcnt_fw_size;
u32 prfcnt_block_size = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK *
KBASE_HWCNT_VALUE_HW_BYTES;
@@ -254,8 +253,8 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
kbdev = fw_ctx->kbdev;
prfcnt_size = kbdev->csf.global_iface.prfcnt_size;
prfcnt_hw_size = (prfcnt_size & 0xFF) << 8;
prfcnt_fw_size = (prfcnt_size >> 16) << 8;
prfcnt_hw_size = GLB_PRFCNT_SIZE_HARDWARE_SIZE_GET(prfcnt_size);
prfcnt_fw_size = GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET(prfcnt_size);
fw_ctx->buf_bytes = prfcnt_hw_size + prfcnt_fw_size;
/* Read the block size if the GPU has the register PRFCNT_FEATURES
@@ -269,14 +268,16 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
<< 8;
}
prfcnt_info->dump_bytes = fw_ctx->buf_bytes;
prfcnt_info->prfcnt_block_size = prfcnt_block_size;
prfcnt_info->l2_count = kbdev->gpu_props.props.l2_props.num_l2_slices;
prfcnt_info->core_mask =
kbdev->gpu_props.props.coherency_info.group[0].core_mask;
prfcnt_info->clk_cnt = fw_ctx->clk_cnt;
prfcnt_info->clearing_samples = true;
*prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){
.prfcnt_hw_size = prfcnt_hw_size,
.prfcnt_fw_size = prfcnt_fw_size,
.dump_bytes = fw_ctx->buf_bytes,
.prfcnt_block_size = prfcnt_block_size,
.l2_count = kbdev->gpu_props.props.l2_props.num_l2_slices,
.core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask,
.clk_cnt = fw_ctx->clk_cnt,
.clearing_samples = true,
};
/* Block size must be multiple of counter size. */
WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_HW_BYTES) !=
@@ -506,10 +507,9 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(
if (fw_ring_buf->phys) {
u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START;
WARN_ON(kbase_mmu_teardown_pages(
fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu,
gpu_va_base >> PAGE_SHIFT, fw_ring_buf->num_pages,
MCU_AS_NR));
WARN_ON(kbase_mmu_teardown_pages(fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu,
gpu_va_base >> PAGE_SHIFT, fw_ring_buf->phys,
fw_ring_buf->num_pages, MCU_AS_NR));
vunmap(fw_ring_buf->cpu_dump_base);

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -161,7 +161,7 @@ void kbase_hwcnt_dump_buffer_narrow_free(
return;
kfree(dump_buf_narrow->dump_buf);
*dump_buf_narrow = (struct kbase_hwcnt_dump_buffer_narrow){ 0 };
*dump_buf_narrow = (struct kbase_hwcnt_dump_buffer_narrow){ NULL };
}
int kbase_hwcnt_dump_buffer_narrow_array_alloc(

View File

@@ -82,7 +82,7 @@ static void jd_mark_atom_complete(struct kbase_jd_atom *katom)
* Returns whether the JS needs a reschedule.
*
* Note that the caller must also check the atom status and
* if it is KBASE_JD_ATOM_STATE_COMPLETED must call jd_done_nolock
* if it is KBASE_JD_ATOM_STATE_COMPLETED must call kbase_jd_done_nolock
*/
static bool jd_run_atom(struct kbase_jd_atom *katom)
{
@@ -148,7 +148,7 @@ void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom)
if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
/* The atom has already finished */
resched |= jd_done_nolock(katom, true);
resched |= kbase_jd_done_nolock(katom, true);
}
if (resched)
@@ -703,7 +703,7 @@ static void jd_update_jit_usage(struct kbase_jd_atom *katom)
}
#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
bool jd_done_nolock(struct kbase_jd_atom *katom, bool post_immediately)
bool kbase_jd_done_nolock(struct kbase_jd_atom *katom, bool post_immediately)
{
struct kbase_context *kctx = katom->kctx;
struct list_head completed_jobs;
@@ -711,6 +711,8 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, bool post_immediately)
bool need_to_try_schedule_context = false;
int i;
lockdep_assert_held(&kctx->jctx.lock);
KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_START(kctx->kbdev, katom);
INIT_LIST_HEAD(&completed_jobs);
@@ -820,7 +822,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, bool post_immediately)
return need_to_try_schedule_context;
}
KBASE_EXPORT_TEST_API(jd_done_nolock);
KBASE_EXPORT_TEST_API(kbase_jd_done_nolock);
#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS)
enum {
@@ -928,7 +930,6 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
katom->jobslot = user_atom->jobslot;
katom->seq_nr = user_atom->seq_nr;
katom->atom_flags = 0;
katom->retry_count = 0;
katom->need_cache_flush_cores_retained = 0;
katom->pre_dep = NULL;
katom->post_dep = NULL;
@@ -989,7 +990,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
* dependencies.
*/
jd_trace_atom_submit(kctx, katom, NULL);
return jd_done_nolock(katom, true);
return kbase_jd_done_nolock(katom, true);
}
}
}
@@ -1053,7 +1054,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
if (err >= 0)
kbase_finish_soft_job(katom);
}
return jd_done_nolock(katom, true);
return kbase_jd_done_nolock(katom, true);
}
katom->will_fail_event_code = katom->event_code;
@@ -1087,7 +1088,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
"Rejecting atom with unsupported core_req 0x%x\n",
katom->core_req);
katom->event_code = BASE_JD_EVENT_JOB_INVALID;
return jd_done_nolock(katom, true);
return kbase_jd_done_nolock(katom, true);
}
#endif /* !MALI_INCREMENTAL_RENDERING_JM */
@@ -1101,7 +1102,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
*/
dev_err(kctx->kbdev->dev, "Rejecting atom with jc = NULL\n");
katom->event_code = BASE_JD_EVENT_JOB_INVALID;
return jd_done_nolock(katom, true);
return kbase_jd_done_nolock(katom, true);
}
/* Reject atoms with an invalid device_nr */
@@ -1111,7 +1112,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
"Rejecting atom with invalid device_nr %d\n",
katom->device_nr);
katom->event_code = BASE_JD_EVENT_JOB_INVALID;
return jd_done_nolock(katom, true);
return kbase_jd_done_nolock(katom, true);
}
/* Reject atoms with invalid core requirements */
@@ -1121,7 +1122,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
"Rejecting atom with invalid core requirements\n");
katom->event_code = BASE_JD_EVENT_JOB_INVALID;
katom->core_req &= ~BASE_JD_REQ_EVENT_COALESCE;
return jd_done_nolock(katom, true);
return kbase_jd_done_nolock(katom, true);
}
/* Reject soft-job atom of certain types from accessing external resources */
@@ -1132,7 +1133,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
dev_err(kctx->kbdev->dev,
"Rejecting soft-job atom accessing external resources\n");
katom->event_code = BASE_JD_EVENT_JOB_INVALID;
return jd_done_nolock(katom, true);
return kbase_jd_done_nolock(katom, true);
}
if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
@@ -1140,7 +1141,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
if (kbase_jd_pre_external_resources(katom, user_atom) != 0) {
/* setup failed (no access, bad resource, unknown resource types, etc.) */
katom->event_code = BASE_JD_EVENT_JOB_INVALID;
return jd_done_nolock(katom, true);
return kbase_jd_done_nolock(katom, true);
}
}
@@ -1151,7 +1152,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
* JIT IDs - atom is invalid.
*/
katom->event_code = BASE_JD_EVENT_JOB_INVALID;
return jd_done_nolock(katom, true);
return kbase_jd_done_nolock(katom, true);
}
#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
@@ -1165,13 +1166,13 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
if ((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0) {
if (!kbase_js_is_atom_valid(kctx->kbdev, katom)) {
katom->event_code = BASE_JD_EVENT_JOB_INVALID;
return jd_done_nolock(katom, true);
return kbase_jd_done_nolock(katom, true);
}
} else {
/* Soft-job */
if (kbase_prepare_soft_job(katom) != 0) {
katom->event_code = BASE_JD_EVENT_JOB_INVALID;
return jd_done_nolock(katom, true);
return kbase_jd_done_nolock(katom, true);
}
}
@@ -1193,7 +1194,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
if (kbase_process_soft_job(katom) == 0) {
kbase_finish_soft_job(katom);
return jd_done_nolock(katom, true);
return kbase_jd_done_nolock(katom, true);
}
return false;
}
@@ -1223,7 +1224,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
}
/* This is a pure dependency. Resolve it immediately */
return jd_done_nolock(katom, true);
return kbase_jd_done_nolock(katom, true);
}
int kbase_jd_submit(struct kbase_context *kctx,
@@ -1482,8 +1483,8 @@ void kbase_jd_done_worker(struct work_struct *data)
kbasep_js_remove_job(kbdev, kctx, katom);
mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
mutex_unlock(&js_devdata->queue_mutex);
/* jd_done_nolock() requires the jsctx_mutex lock to be dropped */
jd_done_nolock(katom, false);
/* kbase_jd_done_nolock() requires the jsctx_mutex lock to be dropped */
kbase_jd_done_nolock(katom, false);
/* katom may have been freed now, do not use! */
@@ -1549,7 +1550,7 @@ void kbase_jd_done_worker(struct work_struct *data)
kbase_js_sched_all(kbdev);
if (!atomic_dec_return(&kctx->work_count)) {
/* If worker now idle then post all events that jd_done_nolock()
/* If worker now idle then post all events that kbase_jd_done_nolock()
* has queued
*/
mutex_lock(&jctx->lock);
@@ -1623,7 +1624,7 @@ static void jd_cancel_worker(struct work_struct *data)
mutex_lock(&jctx->lock);
need_to_try_schedule_context = jd_done_nolock(katom, true);
need_to_try_schedule_context = kbase_jd_done_nolock(katom, true);
/* Because we're zapping, we're not adding any more jobs to this ctx, so no need to
* schedule the context. There's also no need for the jsctx_mutex to have been taken
* around this too.
@@ -1667,6 +1668,8 @@ void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr,
kbdev = kctx->kbdev;
KBASE_DEBUG_ASSERT(kbdev);
lockdep_assert_held(&kbdev->hwaccess_lock);
if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT)
katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;

View File

@@ -72,9 +72,7 @@ static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom,
#endif
seq_printf(sfile,
#if (KERNEL_VERSION(4, 8, 0) > LINUX_VERSION_CODE)
"Sd(%u#%u: %s) ",
#elif (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE)
#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE)
"Sd(%llu#%u: %s) ",
#else
"Sd(%llu#%llu: %s) ",
@@ -93,9 +91,7 @@ static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom,
#endif
seq_printf(sfile,
#if (KERNEL_VERSION(4, 8, 0) > LINUX_VERSION_CODE)
"Wd(%u#%u: %s) ",
#elif (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE)
#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE)
"Wd(%llu#%u: %s) ",
#else
"Wd(%llu#%llu: %s) ",
@@ -230,11 +226,7 @@ static const struct file_operations kbasep_jd_debugfs_atoms_fops = {
void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx)
{
#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE)
const mode_t mode = 0444;
#else
const mode_t mode = 0400;
#endif
/* Caller already ensures this, but we keep the pattern for
* maintenance safety.

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -4021,13 +4021,16 @@ base_jd_prio kbase_js_priority_check(struct kbase_device *kbdev, base_jd_prio pr
{
struct priority_control_manager_device *pcm_device = kbdev->pcm_dev;
int req_priority, out_priority;
base_jd_prio out_jd_priority = priority;
if (pcm_device) {
req_priority = kbasep_js_atom_prio_to_sched_prio(priority);
out_priority = pcm_device->ops.pcm_scheduler_priority_check(pcm_device, current, req_priority);
out_jd_priority = kbasep_js_sched_prio_to_atom_prio(out_priority);
}
return out_jd_priority;
req_priority = kbasep_js_atom_prio_to_sched_prio(priority);
out_priority = req_priority;
/* Does not use pcm defined priority check if PCM not defined or if
* kbasep_js_atom_prio_to_sched_prio returns an error
* (KBASE_JS_ATOM_SCHED_PRIO_INVALID).
*/
if (pcm_device && (req_priority != KBASE_JS_ATOM_SCHED_PRIO_INVALID))
out_priority = pcm_device->ops.pcm_scheduler_priority_check(pcm_device, current,
req_priority);
return kbasep_js_sched_prio_to_atom_prio(kbdev, out_priority);
}

View File

@@ -45,6 +45,7 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/version.h>
#include <linux/version_compat_defs.h>
#include <linux/wait.h>
/* Define static_assert().
@@ -60,10 +61,6 @@
#define __static_assert(e, msg, ...) _Static_assert(e, msg)
#endif
#if KERNEL_VERSION(4, 16, 0) >= LINUX_VERSION_CODE
typedef unsigned int __poll_t;
#endif
#ifndef ENOTSUP
#define ENOTSUP EOPNOTSUPP
#endif
@@ -637,11 +634,11 @@ static __poll_t reader_poll(struct file *const file,
struct reader_changes *changes;
if (unlikely(!file || !wait))
return -EINVAL;
return (__poll_t)-EINVAL;
reader = file->private_data;
if (unlikely(!reader))
return -EBADF;
return (__poll_t)-EBADF;
changes = &reader->changes;

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -71,8 +71,6 @@
#else
/* empty wrapper macros for userspace */
#define static_branch_unlikely(key) (1)
#define KERNEL_VERSION(a, b, c) (0)
#define LINUX_VERSION_CODE (1)
#endif /* __KERNEL__ */
/* Forward declarations */

View File

@@ -36,6 +36,7 @@
#include <linux/mutex.h>
#include <linux/poll.h>
#include <linux/slab.h>
#include <linux/version_compat_defs.h>
#include <linux/workqueue.h>
/* The minimum allowed interval between dumps, in nanoseconds
@@ -226,25 +227,19 @@ static struct prfcnt_enum_item kinstr_prfcnt_supported_requests[] = {
* Return: POLLIN if data can be read without blocking, 0 if data can not be
* read without blocking, else error code.
*/
#if KERNEL_VERSION(4, 16, 0) >= LINUX_VERSION_CODE
static unsigned int
kbasep_kinstr_prfcnt_hwcnt_reader_poll(struct file *filp,
struct poll_table_struct *wait)
#else
static __poll_t
kbasep_kinstr_prfcnt_hwcnt_reader_poll(struct file *filp,
struct poll_table_struct *wait)
#endif
{
struct kbase_kinstr_prfcnt_client *cli;
if (!filp || !wait)
return -EINVAL;
return (__poll_t)-EINVAL;
cli = filp->private_data;
if (!cli)
return -EINVAL;
return (__poll_t)-EINVAL;
poll_wait(filp, &cli->waitq, wait);

View File

@@ -1802,9 +1802,8 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
return err;
bad_insert:
kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
reg->start_pfn, reg->nr_pages,
kctx->as_nr);
kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages,
reg->nr_pages, kctx->as_nr);
kbase_remove_va_region(kctx->kbdev, reg);
@@ -1819,6 +1818,7 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx,
int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
{
int err = 0;
struct kbase_mem_phy_alloc *alloc;
if (reg->start_pfn == 0)
return 0;
@@ -1826,11 +1826,12 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
if (!reg->gpu_alloc)
return -EINVAL;
alloc = reg->gpu_alloc;
/* Tear down GPU page tables, depending on memory type. */
switch (reg->gpu_alloc->type) {
switch (alloc->type) {
case KBASE_MEM_TYPE_ALIAS: {
size_t i = 0;
struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
/* Due to the way the number of valid PTEs and ATEs are tracked
* currently, only the GPU virtual range that is backed & mapped
@@ -1842,9 +1843,8 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
if (alloc->imported.alias.aliased[i].alloc) {
int err_loop = kbase_mmu_teardown_pages(
kctx->kbdev, &kctx->mmu,
reg->start_pfn +
(i *
alloc->imported.alias.stride),
reg->start_pfn + (i * alloc->imported.alias.stride),
alloc->pages + (i * alloc->imported.alias.stride),
alloc->imported.alias.aliased[i].length,
kctx->as_nr);
if (WARN_ON_ONCE(err_loop))
@@ -1854,39 +1854,37 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
}
break;
case KBASE_MEM_TYPE_IMPORTED_UMM:
err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
reg->start_pfn, reg->nr_pages, kctx->as_nr);
err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
alloc->pages, reg->nr_pages, kctx->as_nr);
break;
default:
err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
reg->start_pfn, kbase_reg_current_backed_size(reg),
kctx->as_nr);
err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
alloc->pages, kbase_reg_current_backed_size(reg),
kctx->as_nr);
break;
}
/* Update tracking, and other cleanup, depending on memory type. */
switch (reg->gpu_alloc->type) {
switch (alloc->type) {
case KBASE_MEM_TYPE_ALIAS:
/* We mark the source allocs as unmapped from the GPU when
* putting reg's allocs
*/
break;
case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
struct kbase_alloc_import_user_buf *user_buf =
&reg->gpu_alloc->imported.user_buf;
struct kbase_alloc_import_user_buf *user_buf = &alloc->imported.user_buf;
if (user_buf->current_mapping_usage_count & PINNED_ON_IMPORT) {
user_buf->current_mapping_usage_count &=
~PINNED_ON_IMPORT;
if (user_buf->current_mapping_usage_count & PINNED_ON_IMPORT) {
user_buf->current_mapping_usage_count &= ~PINNED_ON_IMPORT;
/* The allocation could still have active mappings. */
if (user_buf->current_mapping_usage_count == 0) {
kbase_jd_user_buf_unmap(kctx, reg->gpu_alloc,
(reg->flags & (KBASE_REG_CPU_WR |
KBASE_REG_GPU_WR)));
}
/* The allocation could still have active mappings. */
if (user_buf->current_mapping_usage_count == 0) {
kbase_jd_user_buf_unmap(kctx, alloc,
(reg->flags &
(KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)));
}
}
}
fallthrough;
default:
kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc);
@@ -3687,12 +3685,7 @@ void kbase_jit_debugfs_init(struct kbase_context *kctx)
/* prevent unprivileged use of debug file system
* in old kernel version
*/
#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE)
/* only for newer kernel version debug file system is safe */
const mode_t mode = 0444;
#else
const mode_t mode = 0400;
#endif
/* Caller already ensures this, but we keep the pattern for
* maintenance safety.
@@ -4809,18 +4802,7 @@ int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx,
write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR);
#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE
pinned_pages = get_user_pages(NULL, mm, address, alloc->imported.user_buf.nr_pages,
#if KERNEL_VERSION(4, 4, 168) <= LINUX_VERSION_CODE && \
KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
write ? FOLL_WRITE : 0, pages, NULL);
#else
write, 0, pages, NULL);
#endif
#elif KERNEL_VERSION(4, 9, 0) > LINUX_VERSION_CODE
pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages,
write, 0, pages, NULL);
#elif KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE
#if KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE
pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages,
write ? FOLL_WRITE : 0, pages, NULL);
#elif KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE
@@ -5056,12 +5038,10 @@ void kbase_unmap_external_resource(struct kbase_context *kctx,
if (!kbase_is_region_invalid_or_free(reg) &&
reg->gpu_alloc == alloc)
kbase_mmu_teardown_pages(
kctx->kbdev,
&kctx->mmu,
reg->start_pfn,
kbase_reg_current_backed_size(reg),
kctx->as_nr);
kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
alloc->pages,
kbase_reg_current_backed_size(reg),
kctx->as_nr);
if (reg && ((reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)) == 0))
writeable = false;

View File

@@ -31,9 +31,6 @@
#include <linux/fs.h>
#include <linux/version.h>
#include <linux/dma-mapping.h>
#if (KERNEL_VERSION(4, 8, 0) > LINUX_VERSION_CODE)
#include <linux/dma-attrs.h>
#endif /* LINUX_VERSION_CODE < 4.8.0 */
#include <linux/dma-buf.h>
#include <linux/shrinker.h>
#include <linux/cache.h>
@@ -104,6 +101,23 @@ static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
struct kbase_va_region *reg,
u64 new_pages, u64 old_pages);
static bool is_process_exiting(struct vm_area_struct *vma)
{
/* PF_EXITING flag can't be reliably used here for the detection
* of process exit, as 'mm_users' counter could still be non-zero
* when all threads of the process have exited. Later when the
* thread (which took a reference on the 'mm' of process that
* exited) drops it reference, the vm_ops->close method would be
* called for all the vmas (owned by 'mm' of process that exited)
* but the PF_EXITING flag may not be neccessarily set for the
* thread at that time.
*/
if (atomic_read(&vma->vm_mm->mm_users))
return false;
return true;
}
/* Retrieve the associated region pointer if the GPU address corresponds to
* one of the event memory pages. The enclosing region, if found, shouldn't
* have been marked as free.
@@ -1103,19 +1117,7 @@ int kbase_mem_do_sync_imported(struct kbase_context *kctx,
ret = 0;
}
#else
/* Though the below version check could be superfluous depending upon the version condition
* used for enabling KBASE_MEM_ION_SYNC_WORKAROUND, we still keep this check here to allow
* ease of modification for non-ION systems or systems where ION has been patched.
*/
#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && !defined(CONFIG_CHROMEOS)
dma_buf_end_cpu_access(dma_buf,
0, dma_buf->size,
dir);
ret = 0;
#else
ret = dma_buf_end_cpu_access(dma_buf,
dir);
#endif
ret = dma_buf_end_cpu_access(dma_buf, dir);
#endif /* KBASE_MEM_ION_SYNC_WORKAROUND */
break;
case KBASE_SYNC_TO_CPU:
@@ -1132,11 +1134,7 @@ int kbase_mem_do_sync_imported(struct kbase_context *kctx,
ret = 0;
}
#else
ret = dma_buf_begin_cpu_access(dma_buf,
#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && !defined(CONFIG_CHROMEOS)
0, dma_buf->size,
#endif
dir);
ret = dma_buf_begin_cpu_access(dma_buf, dir);
#endif /* KBASE_MEM_ION_SYNC_WORKAROUND */
break;
}
@@ -1315,11 +1313,8 @@ int kbase_mem_umm_map(struct kbase_context *kctx,
return 0;
bad_pad_insert:
kbase_mmu_teardown_pages(kctx->kbdev,
&kctx->mmu,
reg->start_pfn,
alloc->nents,
kctx->as_nr);
kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages,
alloc->nents, kctx->as_nr);
bad_insert:
kbase_mem_umm_unmap_attachment(kctx, alloc);
bad_map_attachment:
@@ -1347,11 +1342,8 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx,
if (!kbase_is_region_invalid_or_free(reg) && reg->gpu_alloc == alloc) {
int err;
err = kbase_mmu_teardown_pages(kctx->kbdev,
&kctx->mmu,
reg->start_pfn,
reg->nr_pages,
kctx->as_nr);
err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
alloc->pages, reg->nr_pages, kctx->as_nr);
WARN_ON(err);
}
@@ -1669,18 +1661,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR);
#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE
faulted_pages = get_user_pages(current, current->mm, address, *va_pages,
#if KERNEL_VERSION(4, 4, 168) <= LINUX_VERSION_CODE && \
KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
write ? FOLL_WRITE : 0, pages, NULL);
#else
write, 0, pages, NULL);
#endif
#elif KERNEL_VERSION(4, 9, 0) > LINUX_VERSION_CODE
faulted_pages = get_user_pages(address, *va_pages,
write, 0, pages, NULL);
#elif KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE
#if KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE
faulted_pages = get_user_pages(address, *va_pages,
write ? FOLL_WRITE : 0, pages, NULL);
#else
@@ -2193,10 +2174,11 @@ static int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx,
u64 const new_pages, u64 const old_pages)
{
u64 delta = old_pages - new_pages;
struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
int ret = 0;
ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
reg->start_pfn + new_pages, delta, kctx->as_nr);
ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + new_pages,
alloc->pages + new_pages, delta, kctx->as_nr);
return ret;
}
@@ -2414,7 +2396,7 @@ static void kbase_cpu_vm_close(struct vm_area_struct *vma)
/* Avoid freeing memory on the process death which results in
* GPU Page Fault. Memory will be freed in kbase_destroy_context
*/
if (!(current->flags & PF_EXITING))
if (!is_process_exiting(vma))
kbase_mem_free_region(map->kctx, map->region);
}
@@ -3314,7 +3296,7 @@ static void kbase_csf_user_io_pages_vm_close(struct vm_area_struct *vma)
reset_prevented = true;
mutex_lock(&kctx->csf.lock);
kbase_csf_queue_unbind(queue);
kbase_csf_queue_unbind(queue, is_process_exiting(vma));
mutex_unlock(&kctx->csf.lock);
if (reset_prevented)
@@ -3355,13 +3337,6 @@ static vm_fault_t kbase_csf_user_io_pages_vm_fault(struct vm_fault *vmf)
/* Always map the doorbell page as uncached */
doorbell_pgprot = pgprot_device(vma->vm_page_prot);
#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \
((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \
(KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE)))
vma->vm_page_prot = doorbell_pgprot;
input_page_pgprot = doorbell_pgprot;
output_page_pgprot = doorbell_pgprot;
#else
if (kbdev->system_coherency == COHERENCY_NONE) {
input_page_pgprot = pgprot_writecombine(vma->vm_page_prot);
output_page_pgprot = pgprot_writecombine(vma->vm_page_prot);
@@ -3369,7 +3344,6 @@ static vm_fault_t kbase_csf_user_io_pages_vm_fault(struct vm_fault *vmf)
input_page_pgprot = vma->vm_page_prot;
output_page_pgprot = vma->vm_page_prot;
}
#endif
doorbell_cpu_addr = vma->vm_start;

View File

@@ -439,18 +439,7 @@ u32 kbase_get_cache_line_alignment(struct kbase_device *kbdev);
static inline vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma,
unsigned long addr, unsigned long pfn, pgprot_t pgprot)
{
int err;
#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \
((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \
(KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE)))
if (pgprot_val(pgprot) != pgprot_val(vma->vm_page_prot))
return VM_FAULT_SIGBUS;
err = vm_insert_pfn(vma, addr, pfn);
#else
err = vm_insert_pfn_prot(vma, addr, pfn, pgprot);
#endif
int err = vm_insert_pfn_prot(vma, addr, pfn, pgprot);
if (unlikely(err == -ENOMEM))
return VM_FAULT_OOM;

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -168,13 +168,7 @@ static const struct file_operations kbase_mem_pool_debugfs_max_size_fops = {
void kbase_mem_pool_debugfs_init(struct dentry *parent,
struct kbase_context *kctx)
{
/* prevent unprivileged use of debug file in old kernel version */
#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE)
/* only for newer kernel version debug file system is safe */
const mode_t mode = 0644;
#else
const mode_t mode = 0600;
#endif
debugfs_create_file("mem_pool_size", mode, parent,
&kctx->mem_pools.small, &kbase_mem_pool_debugfs_fops);

View File

@@ -69,11 +69,7 @@ static const struct file_operations kbasep_mem_profile_debugfs_fops = {
int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
size_t size)
{
#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE)
const mode_t mode = 0444;
#else
const mode_t mode = 0400;
#endif
int err = 0;
mutex_lock(&kctx->mem_profile_lock);

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -140,6 +140,30 @@ kbase_native_mgm_update_gpu_pte(struct memory_group_manager_device *mgm_dev,
return pte;
}
/**
* kbase_native_mgm_pte_to_original_pte - Native method to undo changes done in
* kbase_native_mgm_update_gpu_pte()
*
* @mgm_dev: The memory group manager the request is being made through.
* @group_id: A physical memory group ID, which must be valid but is not used.
* Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1.
* @mmu_level: The level of the MMU page table where the page is getting mapped.
* @pte: The prepared page table entry.
*
* This function simply returns the @pte without modification.
*
* Return: A GPU page table entry to be stored in a page table.
*/
static u64 kbase_native_mgm_pte_to_original_pte(struct memory_group_manager_device *mgm_dev,
int group_id, int mmu_level, u64 pte)
{
CSTD_UNUSED(mgm_dev);
CSTD_UNUSED(group_id);
CSTD_UNUSED(mmu_level);
return pte;
}
struct memory_group_manager_device kbase_native_mgm_dev = {
.ops = {
.mgm_alloc_page = kbase_native_mgm_alloc,
@@ -147,6 +171,7 @@ struct memory_group_manager_device kbase_native_mgm_dev = {
.mgm_get_import_memory_id = NULL,
.mgm_vmf_insert_pfn_prot = kbase_native_mgm_vmf_insert_pfn_prot,
.mgm_update_gpu_pte = kbase_native_mgm_update_gpu_pte,
.mgm_pte_to_original_pte = kbase_native_mgm_pte_to_original_pte,
},
.data = NULL
};

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -120,14 +120,10 @@ static const struct file_operations pbha_int_id_overrides_fops = {
void kbase_pbha_debugfs_init(struct kbase_device *kbdev)
{
if (kbasep_pbha_supported(kbdev)) {
#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE)
/* only for newer kernel version debug file system is safe */
const mode_t mode = 0644;
#else
const mode_t mode = 0600;
#endif
struct dentry *debugfs_pbha_dir = debugfs_create_dir(
"pbha", kbdev->mali_debugfs_directory);
if (IS_ERR_OR_NULL(debugfs_pbha_dir)) {
dev_err(kbdev->dev,
"Couldn't create mali debugfs page-based hardware attributes directory\n");

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -213,7 +213,7 @@ void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom)
mutex_lock(&kctx->jctx.lock);
kbasep_remove_waiting_soft_job(katom);
kbase_finish_soft_job(katom);
if (jd_done_nolock(katom, true))
if (kbase_jd_done_nolock(katom, true))
kbase_js_sched_all(kctx->kbdev);
mutex_unlock(&kctx->jctx.lock);
}
@@ -227,7 +227,7 @@ static void kbasep_soft_event_complete_job(struct work_struct *work)
int resched;
mutex_lock(&kctx->jctx.lock);
resched = jd_done_nolock(katom, true);
resched = kbase_jd_done_nolock(katom, true);
mutex_unlock(&kctx->jctx.lock);
if (resched)
@@ -498,7 +498,7 @@ out:
static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom)
{
katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
if (jd_done_nolock(katom, true))
if (kbase_jd_done_nolock(katom, true))
kbase_js_sched_all(katom->kctx->kbdev);
}
@@ -810,11 +810,7 @@ int kbase_mem_copy_from_extres(struct kbase_context *kctx,
dma_to_copy = min(dma_buf->size,
(size_t)(buf_data->nr_extres_pages * PAGE_SIZE));
ret = dma_buf_begin_cpu_access(dma_buf,
#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && !defined(CONFIG_CHROMEOS)
0, dma_to_copy,
#endif
DMA_FROM_DEVICE);
ret = dma_buf_begin_cpu_access(dma_buf, DMA_FROM_DEVICE);
if (ret)
goto out_unlock;
@@ -841,11 +837,7 @@ int kbase_mem_copy_from_extres(struct kbase_context *kctx,
break;
}
}
dma_buf_end_cpu_access(dma_buf,
#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && !defined(CONFIG_CHROMEOS)
0, dma_to_copy,
#endif
DMA_FROM_DEVICE);
dma_buf_end_cpu_access(dma_buf, DMA_FROM_DEVICE);
break;
}
default:
@@ -1355,7 +1347,7 @@ static void kbasep_jit_finish_worker(struct work_struct *work)
mutex_lock(&kctx->jctx.lock);
kbase_finish_soft_job(katom);
resched = jd_done_nolock(katom, true);
resched = kbase_jd_done_nolock(katom, true);
mutex_unlock(&kctx->jctx.lock);
if (resched)
@@ -1786,7 +1778,7 @@ void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev)
if (kbase_process_soft_job(katom_iter) == 0) {
kbase_finish_soft_job(katom_iter);
resched |= jd_done_nolock(katom_iter, true);
resched |= kbase_jd_done_nolock(katom_iter, true);
#ifdef CONFIG_MALI_ARBITER_SUPPORT
atomic_dec(&kbdev->pm.gpu_users_waiting);
#endif /* CONFIG_MALI_ARBITER_SUPPORT */

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2012-2017, 2020-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2012-2017, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -248,22 +248,17 @@ int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int tl_fd)
/* create a fd representing the fence */
fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
if (fd < 0) {
sync_pt_free(pt);
sync_fence_put(fence);
katom->fence = NULL;
goto out;
}
/* Place the successfully created fence in katom */
katom->fence = fence;
/* bind fence to the new fd */
sync_fence_install(fence, fd);
katom->fence = sync_fence_fdget(fd);
if (katom->fence == NULL) {
/* The only way the fence can be NULL is if userspace closed it
* for us, so we don't need to clear it up
*/
fd = -EINVAL;
goto out;
}
out:
fput(tl_file);
@@ -445,7 +440,7 @@ void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom)
kbasep_remove_waiting_soft_job(katom);
kbase_finish_soft_job(katom);
if (jd_done_nolock(katom, true))
if (kbase_jd_done_nolock(katom, true))
kbase_js_sched_all(katom->kctx->kbdev);
}

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -262,7 +262,7 @@ void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom)
kbasep_remove_waiting_soft_job(katom);
kbase_finish_soft_job(katom);
if (jd_done_nolock(katom, true))
if (kbase_jd_done_nolock(katom, true))
kbase_js_sched_all(katom->kctx->kbdev);
}
@@ -309,10 +309,7 @@ void kbase_sync_fence_info_get(struct dma_fence *fence,
info->status = 0; /* still active (unsignaled) */
}
#if (KERNEL_VERSION(4, 8, 0) > LINUX_VERSION_CODE)
scnprintf(info->name, sizeof(info->name), "%u#%u",
fence->context, fence->seqno);
#elif (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE)
#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE)
scnprintf(info->name, sizeof(info->name), "%llu#%u",
fence->context, fence->seqno);
#else

View File

@@ -38,6 +38,7 @@
#include <linux/mutex.h>
#include <linux/poll.h>
#include <linux/slab.h>
#include <linux/version_compat_defs.h>
#include <linux/workqueue.h>
/* Hwcnt reader API version */
@@ -113,9 +114,7 @@ struct kbase_vinstr_client {
wait_queue_head_t waitq;
};
static unsigned int kbasep_vinstr_hwcnt_reader_poll(
struct file *filp,
poll_table *wait);
static __poll_t kbasep_vinstr_hwcnt_reader_poll(struct file *filp, poll_table *wait);
static long kbasep_vinstr_hwcnt_reader_ioctl(
struct file *filp,
@@ -1038,18 +1037,16 @@ static long kbasep_vinstr_hwcnt_reader_ioctl(
* Return: POLLIN if data can be read without blocking, 0 if data can not be
* read without blocking, else error code.
*/
static unsigned int kbasep_vinstr_hwcnt_reader_poll(
struct file *filp,
poll_table *wait)
static __poll_t kbasep_vinstr_hwcnt_reader_poll(struct file *filp, poll_table *wait)
{
struct kbase_vinstr_client *cli;
if (!filp || !wait)
return -EINVAL;
return (__poll_t)-EINVAL;
cli = filp->private_data;
if (!cli)
return -EINVAL;
return (__poll_t)-EINVAL;
poll_wait(filp, &cli->waitq, wait);
if (kbasep_vinstr_hwcnt_reader_buffer_ready(cli))

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -152,8 +152,8 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx,
/* terminal fault, print info about the fault */
dev_err(kbdev->dev,
"GPU bus fault in AS%d at VA 0x%016llX\n"
"VA_VALID: %s\n"
"GPU bus fault in AS%d at PA 0x%016llX\n"
"PA_VALID: %s\n"
"raw fault status: 0x%X\n"
"exception type 0x%X: %s\n"
"access type 0x%X: %s\n"

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -66,7 +66,7 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx,
/* terminal fault, print info about the fault */
dev_err(kbdev->dev,
"GPU bus fault in AS%d at VA 0x%016llX\n"
"GPU bus fault in AS%d at PA 0x%016llX\n"
"raw fault status: 0x%X\n"
"exception type 0x%X: %s\n"
"exception data 0x%X\n"

File diff suppressed because it is too large Load Diff

View File

@@ -129,11 +129,9 @@ void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut);
u64 kbase_mmu_create_ate(struct kbase_device *kbdev,
struct tagged_addr phy, unsigned long flags, int level, int group_id);
int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev,
struct kbase_mmu_table *mmut,
const u64 start_vpfn,
struct tagged_addr *phys, size_t nr,
unsigned long flags, int group_id);
int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
const u64 start_vpfn, struct tagged_addr *phys, size_t nr,
unsigned long flags, int group_id, u64 *dirty_pgds);
int kbase_mmu_insert_pages(struct kbase_device *kbdev,
struct kbase_mmu_table *mmut, u64 vpfn,
struct tagged_addr *phys, size_t nr,
@@ -144,9 +142,8 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
unsigned long flags, int group_id,
enum kbase_caller_mmu_sync_info mmu_sync_info);
int kbase_mmu_teardown_pages(struct kbase_device *kbdev,
struct kbase_mmu_table *mmut, u64 vpfn,
size_t nr, int as_nr);
int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
struct tagged_addr *phys, size_t nr, int as_nr);
int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
struct tagged_addr *phys, size_t nr,
unsigned long flags, int const group_id);

View File

@@ -75,12 +75,14 @@ enum kbase_mmu_op_type {
};
/**
* struct kbase_mmu_hw_op_param - parameters for kbase_mmu_hw_do_operation()
* @vpfn: MMU Virtual Page Frame Number to start the operation on.
* @nr: Number of pages to work on.
* @op: Operation type (written to ASn_COMMAND).
* @kctx_id: Kernel context ID for MMU command tracepoint
* @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops.
* struct kbase_mmu_hw_op_param - parameters for kbase_mmu_hw_do_* functions
* @vpfn: MMU Virtual Page Frame Number to start the operation on.
* @nr: Number of pages to work on.
* @op: Operation type (written to ASn_COMMAND).
* @kctx_id: Kernel context ID for MMU command tracepoint.
* @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops.
* @flush_skip_levels: Page table levels to skip flushing. (Only
* applicable if GPU supports feature)
*/
struct kbase_mmu_hw_op_param {
u64 vpfn;
@@ -88,6 +90,7 @@ struct kbase_mmu_hw_op_param {
enum kbase_mmu_op_type op;
u32 kctx_id;
enum kbase_caller_mmu_sync_info mmu_sync_info;
u64 flush_skip_levels;
};
/**
@@ -102,18 +105,86 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev,
struct kbase_as *as);
/**
* kbase_mmu_hw_do_operation - Issue an operation to the MMU.
* @kbdev: kbase device to issue the MMU operation on.
* @as: address space to issue the MMU operation on.
* @op_param: parameters for the operation.
* kbase_mmu_hw_do_unlock_no_addr - Issue UNLOCK command to the MMU without
* programming the LOCKADDR register and wait
* for it to complete before returning.
*
* Issue an operation (MMU invalidate, MMU flush, etc) on the address space that
* is associated with the provided kbase_context over the specified range
* @kbdev: Kbase device to issue the MMU operation on.
* @as: Address space to issue the MMU operation on.
* @op_param: Pointer to struct containing information about the MMU
* operation to perform.
*
* Return: 0 if issuing the command was successful, otherwise an error code.
*/
int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *as,
const struct kbase_mmu_hw_op_param *op_param);
/**
* kbase_mmu_hw_do_unlock - Issue UNLOCK command to the MMU and wait for it
* to complete before returning.
*
* @kbdev: Kbase device to issue the MMU operation on.
* @as: Address space to issue the MMU operation on.
* @op_param: Pointer to struct containing information about the MMU
* operation to perform.
*
* Return: 0 if issuing the command was successful, otherwise an error code.
*/
int kbase_mmu_hw_do_unlock(struct kbase_device *kbdev, struct kbase_as *as,
const struct kbase_mmu_hw_op_param *op_param);
/**
* kbase_mmu_hw_do_flush - Issue a flush operation to the MMU.
*
* @kbdev: Kbase device to issue the MMU operation on.
* @as: Address space to issue the MMU operation on.
* @op_param: Pointer to struct containing information about the MMU
* operation to perform.
*
* Issue a flush operation on the address space as per the information
* specified inside @op_param. This function should not be called for
* GPUs where MMU command to flush the cache(s) is deprecated.
* mmu_hw_mutex needs to be held when calling this function.
*
* Return: Zero if the operation was successful, non-zero otherwise.
*/
int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
struct kbase_mmu_hw_op_param *op_param);
int kbase_mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as,
const struct kbase_mmu_hw_op_param *op_param);
/**
* kbase_mmu_hw_do_flush_locked - Issue a flush operation to the MMU.
*
* @kbdev: Kbase device to issue the MMU operation on.
* @as: Address space to issue the MMU operation on.
* @op_param: Pointer to struct containing information about the MMU
* operation to perform.
*
* Issue a flush operation on the address space as per the information
* specified inside @op_param. This function should not be called for
* GPUs where MMU command to flush the cache(s) is deprecated.
* Both mmu_hw_mutex and hwaccess_lock need to be held when calling this
* function.
*
* Return: Zero if the operation was successful, non-zero otherwise.
*/
int kbase_mmu_hw_do_flush_locked(struct kbase_device *kbdev, struct kbase_as *as,
const struct kbase_mmu_hw_op_param *op_param);
/**
* kbase_mmu_hw_do_flush_on_gpu_ctrl - Issue a flush operation to the MMU.
*
* @kbdev: Kbase device to issue the MMU operation on.
* @as: Address space to issue the MMU operation on.
* @op_param: Pointer to struct containing information about the MMU
* operation to perform.
*
* Issue a flush operation on the address space as per the information
* specified inside @op_param. GPU command is used to flush the cache(s)
* instead of the MMU command.
*
* Return: Zero if the operation was successful, non-zero otherwise.
*/
int kbase_mmu_hw_do_flush_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_as *as,
const struct kbase_mmu_hw_op_param *op_param);
/**
* kbase_mmu_hw_clear_fault - Clear a fault that has been previously reported by

View File

@@ -26,13 +26,17 @@
#include <mali_kbase_mem.h>
#include <mmu/mali_kbase_mmu_hw.h>
#include <tl/mali_kbase_tracepoints.h>
#include <linux/delay.h>
/**
* lock_region() - Generate lockaddr to lock memory region in MMU
* @gpu_props: GPU properties for finding the MMU lock region size
* @pfn: Starting page frame number of the region to lock
* @num_pages: Number of pages to lock. It must be greater than 0.
* @lockaddr: Address and size of memory region to lock
*
* @gpu_props: GPU properties for finding the MMU lock region size.
* @lockaddr: Address and size of memory region to lock.
* @op_param: Pointer to a struct containing the starting page frame number of
* the region to lock, the number of pages to lock and page table
* levels to skip when flushing (if supported).
*
* The lockaddr value is a combination of the starting address and
* the size of the region that encompasses all the memory pages to lock.
@@ -63,14 +67,14 @@
*
* Return: 0 if success, or an error code on failure.
*/
static int lock_region(struct kbase_gpu_props const *gpu_props, u64 pfn, u32 num_pages,
u64 *lockaddr)
static int lock_region(struct kbase_gpu_props const *gpu_props, u64 *lockaddr,
const struct kbase_mmu_hw_op_param *op_param)
{
const u64 lockaddr_base = pfn << PAGE_SHIFT;
const u64 lockaddr_end = ((pfn + num_pages) << PAGE_SHIFT) - 1;
const u64 lockaddr_base = op_param->vpfn << PAGE_SHIFT;
const u64 lockaddr_end = ((op_param->vpfn + op_param->nr) << PAGE_SHIFT) - 1;
u64 lockaddr_size_log2;
if (num_pages == 0)
if (op_param->nr == 0)
return -EINVAL;
/* The MMU lock region is a self-aligned region whose size
@@ -122,7 +126,6 @@ static int lock_region(struct kbase_gpu_props const *gpu_props, u64 pfn, u32 num
*/
*lockaddr = lockaddr_base & ~((1ull << lockaddr_size_log2) - 1);
*lockaddr |= lockaddr_size_log2 - 1;
return 0;
}
@@ -165,6 +168,100 @@ static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd)
return status;
}
#if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
static int wait_cores_power_trans_complete(struct kbase_device *kbdev)
{
#define WAIT_TIMEOUT 1000 /* 1ms timeout */
#define DELAY_TIME_IN_US 1
const int max_iterations = WAIT_TIMEOUT;
int loop;
lockdep_assert_held(&kbdev->hwaccess_lock);
for (loop = 0; loop < max_iterations; loop++) {
u32 lo =
kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_PWRTRANS_LO));
u32 hi =
kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_PWRTRANS_HI));
if (!lo && !hi)
break;
udelay(DELAY_TIME_IN_US);
}
if (loop == max_iterations) {
dev_warn(kbdev->dev, "SHADER_PWRTRANS set for too long");
return -ETIMEDOUT;
}
return 0;
}
/**
* apply_hw_issue_GPU2019_3901_wa - Apply WA for the HW issue GPU2019_3901
*
* @kbdev: Kbase device to issue the MMU operation on.
* @mmu_cmd: Pointer to the variable contain the value of MMU command
* that needs to be sent to flush the L2 cache and do an
* implicit unlock.
* @as_nr: Address space number for which MMU command needs to be
* sent.
* @hwaccess_locked: Flag to indicate if hwaccess_lock is held by the caller.
*
* This functions ensures that the flush of LSC is not missed for the pages that
* were unmapped from the GPU, due to the power down transition of shader cores.
*
* Return: 0 if the WA was successfully applied, non-zero otherwise.
*/
static int apply_hw_issue_GPU2019_3901_wa(struct kbase_device *kbdev,
u32 *mmu_cmd, unsigned int as_nr, bool hwaccess_locked)
{
unsigned long flags = 0;
int ret = 0;
if (!hwaccess_locked)
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
/* Check if L2 is OFF. The cores also must be OFF if L2 is not up, so
* the workaround can be safely skipped.
*/
if (kbdev->pm.backend.l2_state != KBASE_L2_OFF) {
if (*mmu_cmd != AS_COMMAND_FLUSH_MEM) {
dev_warn(kbdev->dev,
"Unexpected mmu command received");
ret = -EINVAL;
goto unlock;
}
/* Wait for the LOCK MMU command to complete, issued by the caller */
ret = wait_ready(kbdev, as_nr);
if (ret)
goto unlock;
ret = kbase_gpu_cache_flush_and_busy_wait(kbdev,
GPU_COMMAND_CACHE_CLN_INV_LSC);
if (ret)
goto unlock;
ret = wait_cores_power_trans_complete(kbdev);
if (ret)
goto unlock;
/* As LSC is guaranteed to have been flushed we can use FLUSH_PT
* MMU command to only flush the L2.
*/
*mmu_cmd = AS_COMMAND_FLUSH_PT;
}
unlock:
if (!hwaccess_locked)
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
return ret;
}
#endif
void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as)
{
struct kbase_mmu_setup *current_setup = &as->current_setup;
@@ -222,95 +319,245 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as)
#endif
}
int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
struct kbase_mmu_hw_op_param *op_param)
/**
* mmu_command_instr - Record an MMU command for instrumentation purposes.
*
* @kbdev: Kbase device used to issue MMU operation on.
* @kctx_id: Kernel context ID for MMU command tracepoint.
* @cmd: Command issued to the MMU.
* @lock_addr: Address of memory region locked for the operation.
* @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops.
*/
static void mmu_command_instr(struct kbase_device *kbdev, u32 kctx_id, u32 cmd, u64 lock_addr,
enum kbase_caller_mmu_sync_info mmu_sync_info)
{
u64 lock_addr_base = AS_LOCKADDR_LOCKADDR_BASE_GET(lock_addr);
u32 lock_addr_size = AS_LOCKADDR_LOCKADDR_SIZE_GET(lock_addr);
bool is_mmu_synchronous = (mmu_sync_info == CALLER_MMU_SYNC);
KBASE_TLSTREAM_AUX_MMU_COMMAND(kbdev, kctx_id, cmd, is_mmu_synchronous, lock_addr_base,
lock_addr_size);
}
/* Helper function to program the LOCKADDR register before LOCK/UNLOCK command
* is issued.
*/
static int mmu_hw_set_lock_addr(struct kbase_device *kbdev, int as_nr, u64 *lock_addr,
const struct kbase_mmu_hw_op_param *op_param)
{
int ret;
ret = lock_region(&kbdev->gpu_props, lock_addr, op_param);
if (!ret) {
/* Set the region that needs to be updated */
kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_LOCKADDR_LO),
*lock_addr & 0xFFFFFFFFUL);
kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_LOCKADDR_HI),
(*lock_addr >> 32) & 0xFFFFFFFFUL);
}
return ret;
}
/**
* mmu_hw_do_lock_no_wait - Issue LOCK command to the MMU and return without
* waiting for it's completion.
*
* @kbdev: Kbase device to issue the MMU operation on.
* @as: Address space to issue the MMU operation on.
* @lock_addr: Address of memory region locked for this operation.
* @op_param: Pointer to a struct containing information about the MMU operation.
*
* Return: 0 if issuing the command was successful, otherwise an error code.
*/
static int mmu_hw_do_lock_no_wait(struct kbase_device *kbdev, struct kbase_as *as, u64 *lock_addr,
const struct kbase_mmu_hw_op_param *op_param)
{
int ret;
ret = mmu_hw_set_lock_addr(kbdev, as->number, lock_addr, op_param);
if (!ret)
write_cmd(kbdev, as->number, AS_COMMAND_LOCK);
return ret;
}
static int mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as,
const struct kbase_mmu_hw_op_param *op_param)
{
int ret;
u64 lock_addr = 0x0;
if (WARN_ON(kbdev == NULL) ||
WARN_ON(as == NULL) ||
WARN_ON(op_param == NULL))
if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL))
return -EINVAL;
lockdep_assert_held(&kbdev->mmu_hw_mutex);
ret = mmu_hw_do_lock_no_wait(kbdev, as, &lock_addr, op_param);
if (op_param->op == KBASE_MMU_OP_UNLOCK) {
/* Unlock doesn't require a lock first */
ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK);
/* Wait for UNLOCK command to complete */
if (!ret)
ret = wait_ready(kbdev, as->number);
if (!ret) {
/* read MMU_AS_CONTROL.LOCKADDR register */
lock_addr |= (u64)kbase_reg_read(kbdev,
MMU_AS_REG(as->number, AS_LOCKADDR_HI)) << 32;
lock_addr |= (u64)kbase_reg_read(kbdev,
MMU_AS_REG(as->number, AS_LOCKADDR_LO));
}
} else if (op_param->op >= KBASE_MMU_OP_FIRST &&
op_param->op < KBASE_MMU_OP_COUNT) {
ret = lock_region(&kbdev->gpu_props, op_param->vpfn, op_param->nr, &lock_addr);
if (!ret)
mmu_command_instr(kbdev, op_param->kctx_id, AS_COMMAND_LOCK, lock_addr,
op_param->mmu_sync_info);
if (!ret) {
/* Lock the region that needs to be updated */
kbase_reg_write(kbdev,
MMU_AS_REG(as->number, AS_LOCKADDR_LO),
lock_addr & 0xFFFFFFFFUL);
kbase_reg_write(kbdev,
MMU_AS_REG(as->number, AS_LOCKADDR_HI),
(lock_addr >> 32) & 0xFFFFFFFFUL);
write_cmd(kbdev, as->number, AS_COMMAND_LOCK);
return ret;
}
/* Translate and send operation to HW */
switch (op_param->op) {
case KBASE_MMU_OP_FLUSH_PT:
write_cmd(kbdev, as->number,
AS_COMMAND_FLUSH_PT);
break;
case KBASE_MMU_OP_FLUSH_MEM:
write_cmd(kbdev, as->number,
AS_COMMAND_FLUSH_MEM);
break;
case KBASE_MMU_OP_LOCK:
/* No further operation. */
break;
default:
dev_warn(kbdev->dev,
"Unsupported MMU operation (op=%d).\n",
op_param->op);
return -EINVAL;
};
int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *as,
const struct kbase_mmu_hw_op_param *op_param)
{
int ret = 0;
/* Wait for the command to complete */
ret = wait_ready(kbdev, as->number);
}
} else {
/* Code should not reach here. */
dev_warn(kbdev->dev, "Invalid mmu operation (op=%d).\n",
op_param->op);
if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL))
return -EINVAL;
}
/* MMU command instrumentation */
ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK);
/* Wait for UNLOCK command to complete */
if (!ret)
ret = wait_ready(kbdev, as->number);
if (!ret) {
u64 lock_addr_base = AS_LOCKADDR_LOCKADDR_BASE_GET(lock_addr);
u32 lock_addr_size = AS_LOCKADDR_LOCKADDR_SIZE_GET(lock_addr);
u64 lock_addr = 0x0;
/* read MMU_AS_CONTROL.LOCKADDR register */
lock_addr |= (u64)kbase_reg_read(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI))
<< 32;
lock_addr |= (u64)kbase_reg_read(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO));
bool is_mmu_synchronous = false;
if (op_param->mmu_sync_info == CALLER_MMU_SYNC)
is_mmu_synchronous = true;
KBASE_TLSTREAM_AUX_MMU_COMMAND(kbdev, op_param->kctx_id,
op_param->op, is_mmu_synchronous,
lock_addr_base, lock_addr_size);
mmu_command_instr(kbdev, op_param->kctx_id, AS_COMMAND_UNLOCK,
lock_addr, op_param->mmu_sync_info);
}
return ret;
}
int kbase_mmu_hw_do_unlock(struct kbase_device *kbdev, struct kbase_as *as,
const struct kbase_mmu_hw_op_param *op_param)
{
int ret = 0;
u64 lock_addr = 0x0;
if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL))
return -EINVAL;
ret = mmu_hw_set_lock_addr(kbdev, as->number, &lock_addr, op_param);
if (!ret)
ret = kbase_mmu_hw_do_unlock_no_addr(kbdev, as,
op_param);
return ret;
}
static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as,
const struct kbase_mmu_hw_op_param *op_param, bool hwaccess_locked)
{
int ret;
u64 lock_addr = 0x0;
u32 mmu_cmd = AS_COMMAND_FLUSH_MEM;
if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL))
return -EINVAL;
/* MMU operations can be either FLUSH_PT or FLUSH_MEM, anything else at
* this point would be unexpected.
*/
if (op_param->op != KBASE_MMU_OP_FLUSH_PT &&
op_param->op != KBASE_MMU_OP_FLUSH_MEM) {
dev_err(kbdev->dev, "Unexpected flush operation received");
return -EINVAL;
}
lockdep_assert_held(&kbdev->mmu_hw_mutex);
if (op_param->op == KBASE_MMU_OP_FLUSH_PT)
mmu_cmd = AS_COMMAND_FLUSH_PT;
/* Lock the region that needs to be updated */
ret = mmu_hw_do_lock_no_wait(kbdev, as, &lock_addr, op_param);
if (ret)
return ret;
#if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
/* WA for the BASE_HW_ISSUE_GPU2019_3901. No runtime check is used here
* as the WA is applicable to all CSF GPUs where FLUSH_MEM/PT command is
* supported, and this function doesn't gets called for the GPUs where
* FLUSH_MEM/PT command is deprecated.
*/
if (mmu_cmd == AS_COMMAND_FLUSH_MEM) {
ret = apply_hw_issue_GPU2019_3901_wa(kbdev, &mmu_cmd,
as->number, hwaccess_locked);
if (ret)
return ret;
}
#endif
write_cmd(kbdev, as->number, mmu_cmd);
/* Wait for the command to complete */
ret = wait_ready(kbdev, as->number);
if (!ret)
mmu_command_instr(kbdev, op_param->kctx_id, mmu_cmd, lock_addr,
op_param->mmu_sync_info);
return ret;
}
int kbase_mmu_hw_do_flush_locked(struct kbase_device *kbdev, struct kbase_as *as,
const struct kbase_mmu_hw_op_param *op_param)
{
lockdep_assert_held(&kbdev->hwaccess_lock);
return mmu_hw_do_flush(kbdev, as, op_param, true);
}
int kbase_mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as,
const struct kbase_mmu_hw_op_param *op_param)
{
return mmu_hw_do_flush(kbdev, as, op_param, false);
}
int kbase_mmu_hw_do_flush_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_as *as,
const struct kbase_mmu_hw_op_param *op_param)
{
int ret, ret2;
u32 gpu_cmd = GPU_COMMAND_CACHE_CLN_INV_L2_LSC;
if (WARN_ON(kbdev == NULL) || WARN_ON(as == NULL))
return -EINVAL;
/* MMU operations can be either FLUSH_PT or FLUSH_MEM, anything else at
* this point would be unexpected.
*/
if (op_param->op != KBASE_MMU_OP_FLUSH_PT &&
op_param->op != KBASE_MMU_OP_FLUSH_MEM) {
dev_err(kbdev->dev, "Unexpected flush operation received");
return -EINVAL;
}
lockdep_assert_held(&kbdev->hwaccess_lock);
lockdep_assert_held(&kbdev->mmu_hw_mutex);
if (op_param->op == KBASE_MMU_OP_FLUSH_PT)
gpu_cmd = GPU_COMMAND_CACHE_CLN_INV_L2;
/* 1. Issue MMU_AS_CONTROL.COMMAND.LOCK operation. */
ret = mmu_hw_do_lock(kbdev, as, op_param);
if (ret)
return ret;
/* 2. Issue GPU_CONTROL.COMMAND.FLUSH_CACHES operation */
ret = kbase_gpu_cache_flush_and_busy_wait(kbdev, gpu_cmd);
/* 3. Issue MMU_AS_CONTROL.COMMAND.UNLOCK operation. */
ret2 = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, op_param);
return ret ?: ret2;
}
void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
enum kbase_mmu_fault_type type)
{

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2010-2014, 2016-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2010-2014, 2016-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -189,14 +189,9 @@ static void set_num_valid_entries(u64 *pgd, unsigned int num_of_valid_entries)
<< UNUSED_BIT_POSITION_IN_PAGE_DESCRIPTOR);
}
static void entry_set_pte(u64 *pgd, u64 vpfn, phys_addr_t phy)
static void entry_set_pte(u64 *entry, phys_addr_t phy)
{
unsigned int nr_entries = get_num_valid_entries(pgd);
page_table_entry_set(&pgd[vpfn], (phy & PAGE_MASK) | ENTRY_ACCESS_BIT |
ENTRY_IS_PTE);
set_num_valid_entries(pgd, nr_entries + 1);
page_table_entry_set(entry, (phy & PAGE_MASK) | ENTRY_ACCESS_BIT | ENTRY_IS_PTE);
}
static void entry_invalidate(u64 *entry)

View File

@@ -29,6 +29,7 @@
#include "mali_kbase_config_platform.h"
static void enable_gpu_power_control(struct kbase_device *kbdev)
{
unsigned int i;
@@ -50,7 +51,6 @@ static void enable_gpu_power_control(struct kbase_device *kbdev)
}
}
static void disable_gpu_power_control(struct kbase_device *kbdev)
{
unsigned int i;
@@ -99,9 +99,8 @@ static int pm_callback_power_on(struct kbase_device *kbdev)
#else
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
#ifdef KBASE_PM_RUNTIME
error = pm_runtime_get_sync(kbdev->dev);
enable_gpu_power_control(kbdev);
if (error == 1) {
/*
* Let core know that the chip has not been
@@ -109,8 +108,11 @@ static int pm_callback_power_on(struct kbase_device *kbdev)
*/
ret = 0;
}
dev_dbg(kbdev->dev, "pm_runtime_get_sync returned %d\n", error);
#else
enable_gpu_power_control(kbdev);
#endif /* KBASE_PM_RUNTIME */
#endif /* MALI_USE_CSF */
return ret;
@@ -243,7 +245,9 @@ static int pm_callback_runtime_on(struct kbase_device *kbdev)
{
dev_dbg(kbdev->dev, "%s\n", __func__);
#if !MALI_USE_CSF
enable_gpu_power_control(kbdev);
#endif
return 0;
}
@@ -251,7 +255,9 @@ static void pm_callback_runtime_off(struct kbase_device *kbdev)
{
dev_dbg(kbdev->dev, "%s\n", __func__);
#if !MALI_USE_CSF
disable_gpu_power_control(kbdev);
#endif
}
static void pm_callback_resume(struct kbase_device *kbdev)

View File

@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
# (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved.
# (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -27,4 +27,5 @@ subdir-ccflags-y += -I$(src)/include \
obj-$(CONFIG_MALI_KUTF) += kutf/
obj-$(CONFIG_MALI_KUTF_IRQ_TEST) += mali_kutf_irq_test/
obj-$(CONFIG_MALI_KUTF_CLK_RATE_TRACE) += mali_kutf_clk_rate_trace/kernel/
obj-$(CONFIG_MALI_KUTF_MGM_INTEGRATION) += mali_kutf_mgm_integration_test/

Some files were not shown because too many files have changed in this diff Show More