gpu/arm: bump driver to r43p0

This commit is contained in:
Mauro (mdrjr) Ribeiro
2023-04-19 12:30:49 -03:00
parent 1dadda45f0
commit 2944911cd6
146 changed files with 12031 additions and 4611 deletions

View File

@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
# (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved.
# (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -69,7 +69,7 @@ endif
#
# Driver version string which is returned to userspace via an ioctl
MALI_RELEASE_NAME ?= '"r40p0-01eac0"'
MALI_RELEASE_NAME ?= '"r43p0-01eac0"'
# Set up defaults if not defined by build system
ifeq ($(CONFIG_MALI_DEBUG), y)
MALI_UNIT_TEST = 1

View File

@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
# (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved.
# (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -43,9 +43,30 @@ config MALI_PLATFORM_NAME
include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must
exist.
config MALI_REAL_HW
choice
prompt "Mali HW backend"
depends on MALI_MIDGARD
def_bool !MALI_NO_MALI
default MALI_REAL_HW
config MALI_REAL_HW
bool "Enable build of Mali kernel driver for real HW"
depends on MALI_MIDGARD
help
This is the default HW backend.
config MALI_NO_MALI
bool "Enable build of Mali kernel driver for No Mali"
depends on MALI_MIDGARD && MALI_EXPERT
help
This can be used to test the driver in a simulated environment
whereby the hardware is not physically present. If the hardware is physically
present it will not be used. This can be used to test the majority of the
driver without needing actual hardware or for software benchmarking.
All calls to the simulated hardware will complete immediately as if the hardware
completed the task.
endchoice
menu "Platform specific options"
source "drivers/gpu/arm/midgard/platform/Kconfig"
@@ -130,6 +151,11 @@ config MALI_DMA_BUF_LEGACY_COMPAT
flushes in other drivers. This only has an effect for clients using
UK 11.18 or older. For later UK versions it is not possible.
config MALI_CORESIGHT
depends on MALI_MIDGARD && MALI_CSF_SUPPORT && !MALI_NO_MALI
bool "Enable Kbase CoreSight tracing support"
default n
menuconfig MALI_EXPERT
depends on MALI_MIDGARD
bool "Enable Expert Settings"
@@ -140,7 +166,19 @@ menuconfig MALI_EXPERT
if MALI_EXPERT
config MALI_2MB_ALLOC
config LARGE_PAGE_ALLOC_OVERRIDE
bool "Override default setting of 2MB pages"
depends on MALI_MIDGARD && MALI_EXPERT
default n
help
An override config for LARGE_PAGE_ALLOC config.
When LARGE_PAGE_ALLOC_OVERRIDE is Y, 2MB page allocation will be
enabled by LARGE_PAGE_ALLOC. When this is N, the feature will be
enabled when GPU HW satisfies requirements.
If in doubt, say N
config LARGE_PAGE_ALLOC
bool "Attempt to allocate 2MB pages"
depends on MALI_MIDGARD && MALI_EXPERT
default n
@@ -149,6 +187,10 @@ config MALI_2MB_ALLOC
allocate 2MB pages from the kernel. This reduces TLB pressure and
helps to prevent memory fragmentation.
Note this config applies only when LARGE_PAGE_ALLOC_OVERRIDE config
is enabled and enabling this on a GPU HW that does not satisfy
requirements can cause serious problem.
If in doubt, say N
config MALI_MEMORY_FULLY_BACKED
@@ -177,18 +219,6 @@ config MALI_CORESTACK
comment "Platform options"
depends on MALI_MIDGARD && MALI_EXPERT
config MALI_NO_MALI
bool "Enable No Mali"
depends on MALI_MIDGARD && MALI_EXPERT
default n
help
This can be used to test the driver in a simulated environment
whereby the hardware is not physically present. If the hardware is physically
present it will not be used. This can be used to test the majority of the
driver without needing actual hardware or for software benchmarking.
All calls to the simulated hardware will complete immediately as if the hardware
completed the task.
config MALI_ERROR_INJECT
bool "Enable No Mali error injection"
depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI
@@ -196,31 +226,9 @@ config MALI_ERROR_INJECT
help
Enables insertion of errors to test module failure and recovery mechanisms.
config MALI_GEM5_BUILD
bool "Enable build of Mali kernel driver for GEM5"
depends on MALI_MIDGARD && MALI_EXPERT
default n
help
This option is to do a Mali GEM5 build.
If unsure, say N.
comment "Debug options"
depends on MALI_MIDGARD && MALI_EXPERT
config MALI_FW_CORE_DUMP
bool "Enable support for FW core dump"
depends on MALI_MIDGARD && MALI_EXPERT && MALI_CSF_SUPPORT
default n
help
Adds ability to request firmware core dump
Example:
* To explicitly request core dump:
echo 1 >/sys/kernel/debug/mali0/fw_core_dump
* To output current core dump (after explicitly requesting a core dump,
or kernel driver reported an internal firmware error):
cat /sys/kernel/debug/mali0/fw_core_dump
config MALI_DEBUG
bool "Enable debug build"
depends on MALI_MIDGARD && MALI_EXPERT

View File

@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
# (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
# (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -39,17 +39,10 @@ ifeq ($(CONFIG_MALI_MIDGARD),m)
CONFIG_MALI_ARBITRATION ?= n
CONFIG_MALI_PARTITION_MANAGER ?= n
ifeq ($(origin CONFIG_MALI_ABITER_MODULES), undefined)
CONFIG_MALI_ARBITER_MODULES := $(CONFIG_MALI_ARBITRATION)
endif
ifeq ($(origin CONFIG_MALI_GPU_POWER_MODULES), undefined)
CONFIG_MALI_GPU_POWER_MODULES := $(CONFIG_MALI_ARBITRATION)
endif
ifneq ($(CONFIG_MALI_NO_MALI),y)
# Prevent misuse when CONFIG_MALI_NO_MALI=y
CONFIG_MALI_REAL_HW ?= y
CONFIG_MALI_CORESIGHT = n
endif
ifeq ($(CONFIG_MALI_MIDGARD_DVFS),y)
@@ -64,10 +57,8 @@ ifeq ($(CONFIG_MALI_MIDGARD),m)
CONFIG_MALI_DMA_BUF_LEGACY_COMPAT = n
endif
ifeq ($(CONFIG_XEN),y)
ifneq ($(CONFIG_MALI_ARBITER_SUPPORT), n)
CONFIG_MALI_XEN ?= m
endif
ifeq ($(CONFIG_MALI_CSF_SUPPORT), y)
CONFIG_MALI_CORESIGHT ?= n
endif
#
@@ -76,12 +67,14 @@ ifeq ($(CONFIG_MALI_MIDGARD),m)
ifeq ($(CONFIG_MALI_EXPERT), y)
ifeq ($(CONFIG_MALI_NO_MALI), y)
CONFIG_MALI_REAL_HW = n
else
# Prevent misuse when CONFIG_MALI_NO_MALI=n
CONFIG_MALI_REAL_HW = y
CONFIG_MALI_ERROR_INJECT = n
endif
ifeq ($(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED), y)
# Prevent misuse when CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED=y
CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n
@@ -105,7 +98,8 @@ ifeq ($(CONFIG_MALI_MIDGARD),m)
else
# Prevent misuse when CONFIG_MALI_EXPERT=n
CONFIG_MALI_CORESTACK = n
CONFIG_MALI_2MB_ALLOC = n
CONFIG_LARGE_PAGE_ALLOC_OVERRIDE = n
CONFIG_LARGE_PAGE_ALLOC = n
CONFIG_MALI_PWRSOFT_765 = n
CONFIG_MALI_MEMORY_FULLY_BACKED = n
CONFIG_MALI_JOB_DUMP = n
@@ -143,8 +137,6 @@ ifeq ($(CONFIG_MALI_MIDGARD),m)
else
# Prevent misuse when CONFIG_MALI_MIDGARD=n
CONFIG_MALI_ARBITRATION = n
CONFIG_MALI_ARBITER_MODULES = n
CONFIG_MALI_GPU_POWER_MODULES = n
CONFIG_MALI_KUTF = n
CONFIG_MALI_KUTF_IRQ_TEST = n
CONFIG_MALI_KUTF_CLK_RATE_TRACE = n
@@ -158,18 +150,16 @@ CONFIGS := \
CONFIG_MALI_GATOR_SUPPORT \
CONFIG_MALI_ARBITER_SUPPORT \
CONFIG_MALI_ARBITRATION \
CONFIG_MALI_ARBITER_MODULES \
CONFIG_MALI_GPU_POWER_MODULES \
CONFIG_MALI_PARTITION_MANAGER \
CONFIG_MALI_REAL_HW \
CONFIG_MALI_GEM5_BUILD \
CONFIG_MALI_DEVFREQ \
CONFIG_MALI_MIDGARD_DVFS \
CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND \
CONFIG_MALI_DMA_BUF_LEGACY_COMPAT \
CONFIG_MALI_EXPERT \
CONFIG_MALI_CORESTACK \
CONFIG_MALI_2MB_ALLOC \
CONFIG_LARGE_PAGE_ALLOC_OVERRIDE \
CONFIG_LARGE_PAGE_ALLOC \
CONFIG_MALI_PWRSOFT_765 \
CONFIG_MALI_MEMORY_FULLY_BACKED \
CONFIG_MALI_JOB_DUMP \
@@ -189,10 +179,13 @@ CONFIGS := \
CONFIG_MALI_KUTF_IRQ_TEST \
CONFIG_MALI_KUTF_CLK_RATE_TRACE \
CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST \
CONFIG_MALI_XEN
CONFIG_MALI_XEN \
CONFIG_MALI_CORESIGHT
#
THIS_DIR := $(dir $(lastword $(MAKEFILE_LIST)))
-include $(THIS_DIR)/../arbitration/Makefile
# MAKE_ARGS to pass the custom CONFIGs on out-of-tree build
#
# Generate the list of CONFIGs and values.
@@ -254,7 +247,8 @@ KBUILD_CFLAGS += -Wdisabled-optimization
# global variables.
KBUILD_CFLAGS += $(call cc-option, -Wlogical-op)
KBUILD_CFLAGS += -Wmissing-field-initializers
KBUILD_CFLAGS += -Wtype-limits
# -Wtype-limits must be disabled due to build failures on kernel 5.x
KBUILD_CFLAGS += -Wno-type-limit
KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized)
KBUILD_CFLAGS += $(call cc-option, -Wunused-macros)
@@ -263,6 +257,18 @@ KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2
# This warning is disabled to avoid build failures in some kernel versions
KBUILD_CFLAGS += -Wno-ignored-qualifiers
ifeq ($(CONFIG_GCOV_KERNEL),y)
KBUILD_CFLAGS += $(call cc-option, -ftest-coverage)
KBUILD_CFLAGS += $(call cc-option, -fprofile-arcs)
EXTRA_CFLAGS += -DGCOV_PROFILE=1
endif
ifeq ($(CONFIG_MALI_KCOV),y)
KBUILD_CFLAGS += $(call cc-option, -fsanitize-coverage=trace-cmp)
EXTRA_CFLAGS += -DKCOV=1
EXTRA_CFLAGS += -DKCOV_ENABLE_COMPARISONS=1
endif
all:
$(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules

View File

@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
# (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved.
# (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -41,11 +41,31 @@ config MALI_PLATFORM_NAME
When PLATFORM_CUSTOM is set, this needs to be set manually to
pick up the desired platform files.
config MALI_REAL_HW
bool
choice
prompt "Mali HW backend"
depends on MALI_MIDGARD
default y
default n if NO_MALI
default MALI_NO_MALI if NO_MALI
default MALI_REAL_HW
config MALI_REAL_HW
bool "Enable build of Mali kernel driver for real HW"
depends on MALI_MIDGARD
help
This is the default HW backend.
config MALI_NO_MALI
bool "Enable build of Mali kernel driver for No Mali"
depends on MALI_MIDGARD && MALI_EXPERT
help
This can be used to test the driver in a simulated environment
whereby the hardware is not physically present. If the hardware is physically
present it will not be used. This can be used to test the majority of the
driver without needing actual hardware or for software benchmarking.
All calls to the simulated hardware will complete immediately as if the hardware
completed the task.
endchoice
config MALI_PLATFORM_DT_PIN_RST
bool "Enable Juno GPU Pin reset"
@@ -139,6 +159,12 @@ config MALI_DMA_BUF_LEGACY_COMPAT
flushes in other drivers. This only has an effect for clients using
UK 11.18 or older. For later UK versions it is not possible.
config MALI_CORESIGHT
depends on MALI_MIDGARD && MALI_CSF_SUPPORT && !NO_MALI
select CSFFW_DEBUG_FW_AS_RW
bool "Enable Kbase CoreSight tracing support"
default n
menuconfig MALI_EXPERT
depends on MALI_MIDGARD
bool "Enable Expert Settings"
@@ -170,20 +196,6 @@ config MALI_CORESTACK
If unsure, say N.
config MALI_FW_CORE_DUMP
bool "Enable support for FW core dump"
depends on MALI_MIDGARD && MALI_EXPERT && MALI_CSF_SUPPORT
default n
help
Adds ability to request firmware core dump
Example:
* To explicitly request core dump:
echo 1 >/sys/kernel/debug/mali0/fw_core_dump
* To output current core dump (after explicitly requesting a core dump,
or kernel driver reported an internal firmware error):
cat /sys/kernel/debug/mali0/fw_core_dump
choice
prompt "Error injection level"
depends on MALI_MIDGARD && MALI_EXPERT
@@ -223,14 +235,6 @@ config MALI_ERROR_INJECT
depends on MALI_MIDGARD && MALI_EXPERT
default y if !MALI_ERROR_INJECT_NONE
config MALI_GEM5_BUILD
bool "Enable build of Mali kernel driver for GEM5"
depends on MALI_MIDGARD && MALI_EXPERT
default n
help
This option is to do a Mali GEM5 build.
If unsure, say N.
config MALI_DEBUG
bool "Enable debug build"
depends on MALI_MIDGARD && MALI_EXPERT
@@ -239,6 +243,23 @@ config MALI_DEBUG
help
Select this option for increased checking and reporting of errors.
config MALI_GCOV_KERNEL
bool "Enable branch coverage via gcov"
depends on MALI_MIDGARD && MALI_DEBUG
default n
help
Choose this option to enable building kbase with branch
coverage information. When built against a supporting kernel,
the coverage information will be available via debugfs.
config MALI_KCOV
bool "Enable kcov coverage to support fuzzers"
depends on MALI_MIDGARD && MALI_DEBUG
default n
help
Choose this option to enable building with fuzzing-oriented
coverage, to improve the random test cases that are generated.
config MALI_FENCE_DEBUG
bool "Enable debug sync fence usage"
depends on MALI_MIDGARD && MALI_EXPERT

View File

@@ -1,170 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU license.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
*/
/**
* DOC: Defines the Mali arbiter interface
*/
#ifndef _MALI_KBASE_ARBITER_INTERFACE_H_
#define _MALI_KBASE_ARBITER_INTERFACE_H_
/**
* DOC: Mali arbiter interface version
*
* This specifies the current version of the configuration interface. Whenever
* the arbiter interface changes, so that integration effort is required, the
* version number will be increased. Each configuration must make an effort
* to check that it implements the correct version.
*
* Version history:
* 1 - Added the Mali arbiter configuration interface.
* 2 - Strip out reference code from header
* 3 - Removed DVFS utilization interface (DVFS moved to arbiter side)
* 4 - Added max_config support
* 5 - Added GPU clock frequency reporting support from arbiter
*/
#define MALI_KBASE_ARBITER_INTERFACE_VERSION 5
/**
* DOC: NO_FREQ is used in case platform doesn't support reporting frequency
*/
#define NO_FREQ 0
struct arbiter_if_dev;
/**
* struct arbiter_if_arb_vm_ops - Interface to communicate messages to VM
*
* @arb_vm_gpu_stop: Callback to ask VM to stop using GPU.
* dev: The arbif kernel module device.
*
* Informs KBase to stop using the GPU as soon as possible.
* Note: Once the driver is no longer using the GPU, a call
* to vm_arb_gpu_stopped is expected by the arbiter.
* @arb_vm_gpu_granted: Callback to indicate that GPU has been granted to VM.
* dev: The arbif kernel module device.
*
* Informs KBase that the GPU can now be used by the VM.
* @arb_vm_gpu_lost: Callback to indicate that VM has lost the GPU.
* dev: The arbif kernel module device.
*
* This is called if KBase takes too long to respond to the
* arbiter stop request.
* Once this is called, KBase will assume that access to the
* GPU has been lost and will fail all running jobs and
* reset its internal state.
* If successful, will respond with a vm_arb_gpu_stopped
* message.
* @arb_vm_max_config: Callback to send the max config info to the VM.
* dev: The arbif kernel module device.
* max_l2_slices: The maximum number of L2 slices.
* max_core_mask: The largest core mask.
*
* Informs KBase the maximum resources that can be
* allocated to the partition in use.
* @arb_vm_update_freq: Callback to notify that GPU clock frequency has been
* updated.
* dev: The arbif kernel module device.
* freq: GPU clock frequency value reported from arbiter
*
* Informs KBase that the GPU clock frequency has been updated.
*
* This struct contains callbacks used to deliver messages
* from the arbiter to the corresponding VM.
* Note that calls into these callbacks may have synchronous calls back into
* the arbiter arbiter_if_vm_arb_ops callbacks below.
* For example vm_arb_gpu_stopped() may be called as a side effect of
* arb_vm_gpu_stop() being called here.
*/
struct arbiter_if_arb_vm_ops {
void (*arb_vm_gpu_stop)(struct device *dev);
void (*arb_vm_gpu_granted)(struct device *dev);
void (*arb_vm_gpu_lost)(struct device *dev);
void (*arb_vm_max_config)(struct device *dev, uint32_t max_l2_slices,
uint32_t max_core_mask);
void (*arb_vm_update_freq)(struct device *dev, uint32_t freq);
};
/**
* struct arbiter_if_vm_arb_ops - Interface to communicate messages to arbiter
*
* @vm_arb_register_dev: Callback to register VM device driver callbacks.
* arbif_dev: The arbiter interface to register
* with for device callbacks
* dev: The device structure to supply in the callbacks.
* ops: The callbacks that the device driver supports
* (none are optional).
*
* Returns
* 0 - successful.
* -EINVAL - invalid argument.
* -EPROBE_DEFER - module dependencies are not yet
* available.
* @vm_arb_unregister_dev: Callback to unregister VM device driver callbacks.
* arbif_dev: The arbiter interface to unregistering
* from.
* @vm_arb_get_max_config: Callback to Request the max config from the Arbiter.
* arbif_dev: The arbiter interface to issue the
* request to.
* @vm_arb_gpu_request: Callback to ask the arbiter interface for GPU access.
* arbif_dev: The arbiter interface to issue the request
* to.
* @vm_arb_gpu_active: Callback to inform arbiter that driver has gone active.
* arbif_dev: The arbiter interface device to notify.
* @vm_arb_gpu_idle: Callback to inform the arbiter that driver has gone idle.
* arbif_dev: The arbiter interface device to notify.
* @vm_arb_gpu_stopped: Callback to inform arbiter that driver has stopped
* using the GPU
* arbif_dev: The arbiter interface device to notify.
* gpu_required: The GPU is still needed to do more work.
*
* This struct contains callbacks used to request operations
* from the VM to the arbiter.
* Note that we must not make any synchronous calls back in to the VM
* (via arbiter_if_arb_vm_ops above) in the context of these callbacks.
*/
struct arbiter_if_vm_arb_ops {
int (*vm_arb_register_dev)(struct arbiter_if_dev *arbif_dev,
struct device *dev, struct arbiter_if_arb_vm_ops *ops);
void (*vm_arb_unregister_dev)(struct arbiter_if_dev *arbif_dev);
void (*vm_arb_get_max_config)(struct arbiter_if_dev *arbif_dev);
void (*vm_arb_gpu_request)(struct arbiter_if_dev *arbif_dev);
void (*vm_arb_gpu_active)(struct arbiter_if_dev *arbif_dev);
void (*vm_arb_gpu_idle)(struct arbiter_if_dev *arbif_dev);
void (*vm_arb_gpu_stopped)(struct arbiter_if_dev *arbif_dev,
u8 gpu_required);
};
/**
* struct arbiter_if_dev - Arbiter Interface
* @vm_ops: Callback functions for connecting KBase with
* arbiter interface device.
* @priv_data: Internal arbif data not used by KBASE.
*
* Arbiter Interface Kernel Module State used for linking KBase
* with an arbiter interface platform device
*/
struct arbiter_if_dev {
struct arbiter_if_vm_arb_ops vm_ops;
void *priv_data;
};
#endif /* _MALI_KBASE_ARBITER_INTERFACE_H_ */

View File

@@ -1,49 +0,0 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note OR MIT
#
# (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
# Foundation, and any use by you of this program is subject to the terms
# of such GNU license.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, you can access it online at
# http://www.gnu.org/licenses/gpl-2.0.html.
#
#
config MALI_XEN
tristate "Enable Xen Interface reference code"
depends on MALI_ARBITRATION && XEN
default n
help
Enables the build of xen interface modules used in the reference
virtualization setup for Mali
If unsure, say N.
config MALI_ARBITER_MODULES
tristate "Enable mali arbiter modules"
depends on MALI_ARBITRATION
default y
help
Enables the build of the arbiter modules used in the reference
virtualization setup for Mali
If unsure, say N
config MALI_GPU_POWER_MODULES
tristate "Enable gpu power modules"
depends on MALI_ARBITRATION
default y
help
Enables the build of the gpu power modules used in the reference
virtualization setup for Mali
If unsure, say N
source "drivers/gpu/arm/midgard/arbitration/ptm/Kconfig"

View File

@@ -1,28 +0,0 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note OR MIT
#
# (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
# Foundation, and any use by you of this program is subject to the terms
# of such GNU license.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, you can access it online at
# http://www.gnu.org/licenses/gpl-2.0.html.
#
#
config MALI_PARTITION_MANAGER
tristate "Enable compilation of partition manager modules"
depends on MALI_ARBITRATION
default n
help
This option enables the compilation of the partition manager
modules used to configure the Mali-G78AE GPU.

View File

@@ -47,8 +47,12 @@ endif
mali_kbase-$(CONFIG_MALI_DEVFREQ) += \
backend/gpu/mali_kbase_devfreq.o
# Dummy model
ifneq ($(CONFIG_MALI_REAL_HW),y)
mali_kbase-y += backend/gpu/mali_kbase_model_linux.o
endif
# NO_MALI Dummy model interface
mali_kbase-$(CONFIG_MALI_NO_MALI) += backend/gpu/mali_kbase_model_dummy.o
mali_kbase-$(CONFIG_MALI_NO_MALI) += backend/gpu/mali_kbase_model_linux.o
# HW error simulation
mali_kbase-$(CONFIG_MALI_NO_MALI) += backend/gpu/mali_kbase_model_error_generator.o

View File

@@ -25,12 +25,12 @@
#include <linux/interrupt.h>
#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
#if IS_ENABLED(CONFIG_MALI_REAL_HW)
/* GPU IRQ Tags */
#define JOB_IRQ_TAG 0
#define MMU_IRQ_TAG 1
#define GPU_IRQ_TAG 2
#define JOB_IRQ_TAG 0
#define MMU_IRQ_TAG 1
#define GPU_IRQ_TAG 2
static void *kbase_tag(void *ptr, u32 tag)
{
@@ -500,4 +500,4 @@ void kbase_synchronize_irqs(struct kbase_device *kbdev)
KBASE_EXPORT_TEST_API(kbase_synchronize_irqs);
#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -67,9 +67,8 @@ static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev,
kbase_js_runpool_inc_context_count(kbdev, kctx);
}
bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
struct kbase_context *kctx,
int js)
bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, struct kbase_context *kctx,
unsigned int js)
{
int i;
@@ -240,4 +239,3 @@ bool kbase_backend_use_ctx(struct kbase_device *kbdev,
return true;
}

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -44,9 +44,8 @@ static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev);
static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev,
const u64 affinity, const u64 limited_core_mask);
static u64 kbase_job_write_affinity(struct kbase_device *kbdev,
base_jd_core_req core_req,
int js, const u64 limited_core_mask)
static u64 kbase_job_write_affinity(struct kbase_device *kbdev, base_jd_core_req core_req,
unsigned int js, const u64 limited_core_mask)
{
u64 affinity;
bool skip_affinity_check = false;
@@ -191,7 +190,28 @@ static u64 select_job_chain(struct kbase_jd_atom *katom)
return jc;
}
int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js)
static inline bool kbasep_jm_wait_js_free(struct kbase_device *kbdev, unsigned int js,
struct kbase_context *kctx)
{
const ktime_t wait_loop_start = ktime_get_raw();
const s64 max_timeout = (s64)kbdev->js_data.js_free_wait_time_ms;
s64 diff = 0;
/* wait for the JS_COMMAND_NEXT register to reach the given status value */
do {
if (!kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT)))
return true;
diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start));
} while (diff < max_timeout);
dev_err(kbdev->dev, "Timeout in waiting for job slot %u to become free for ctx %d_%u", js,
kctx->tgid, kctx->id);
return false;
}
int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, unsigned int js)
{
struct kbase_context *kctx;
u32 cfg;
@@ -204,8 +224,7 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
kctx = katom->kctx;
/* Command register must be available */
if (WARN(!kbasep_jm_is_js_free(kbdev, js, kctx),
"Attempting to assign to occupied slot %d in kctx %pK\n", js, (void *)kctx))
if (!kbasep_jm_wait_js_free(kbdev, js, kctx))
return -EPERM;
dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %pK\n",
@@ -344,10 +363,8 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
* work out the best estimate (which might still result in an over-estimate to
* the calculated time spent)
*/
static void kbasep_job_slot_update_head_start_timestamp(
struct kbase_device *kbdev,
int js,
ktime_t end_timestamp)
static void kbasep_job_slot_update_head_start_timestamp(struct kbase_device *kbdev, unsigned int js,
ktime_t end_timestamp)
{
ktime_t timestamp_diff;
struct kbase_jd_atom *katom;
@@ -377,8 +394,7 @@ static void kbasep_job_slot_update_head_start_timestamp(
* Make a tracepoint call to the instrumentation module informing that
* softstop happened on given lpu (job slot).
*/
static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev,
int js)
static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev, unsigned int js)
{
KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(
kbdev,
@@ -387,7 +403,6 @@ static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev,
void kbase_job_done(struct kbase_device *kbdev, u32 done)
{
int i;
u32 count = 0;
ktime_t end_timestamp;
@@ -398,6 +413,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
end_timestamp = ktime_get_raw();
while (done) {
unsigned int i;
u32 failed = done >> 16;
/* treat failed slots as finished slots */
@@ -407,8 +423,6 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
* numbered interrupts before the higher numbered ones.
*/
i = ffs(finished) - 1;
if (WARN(i < 0, "%s: called without receiving any interrupts\n", __func__))
break;
do {
int nr_done;
@@ -607,11 +621,9 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
KBASE_KTRACE_ADD_JM(kbdev, JM_IRQ_END, NULL, NULL, 0, count);
}
void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
int js,
u32 action,
base_jd_core_req core_reqs,
struct kbase_jd_atom *target_katom)
void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, unsigned int js,
u32 action, base_jd_core_req core_reqs,
struct kbase_jd_atom *target_katom)
{
#if KBASE_KTRACE_ENABLE
u32 status_reg_before;
@@ -669,6 +681,10 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
struct kbase_context *head_kctx;
head = kbase_gpu_inspect(kbdev, js, 0);
if (unlikely(!head)) {
dev_err(kbdev->dev, "Can't get a katom from js(%d)\n", js);
return;
}
head_kctx = head->kctx;
if (status_reg_before == BASE_JD_EVENT_ACTIVE)
@@ -737,7 +753,7 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
void kbase_backend_jm_kill_running_jobs_from_kctx(struct kbase_context *kctx)
{
struct kbase_device *kbdev = kctx->kbdev;
int i;
unsigned int i;
lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -749,7 +765,7 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
struct kbase_jd_atom *target_katom)
{
struct kbase_device *kbdev;
int target_js = target_katom->slot_nr;
unsigned int target_js = target_katom->slot_nr;
int i;
bool stop_sent = false;
@@ -927,8 +943,8 @@ KBASE_EXPORT_TEST_API(kbase_job_slot_term);
*
* Where possible any job in the next register is evicted before the soft-stop.
*/
void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
struct kbase_jd_atom *target_katom, u32 sw_flags)
void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, unsigned int js,
struct kbase_jd_atom *target_katom, u32 sw_flags)
{
dev_dbg(kbdev->dev, "Soft-stop atom %pK with flags 0x%x (s:%d)\n",
target_katom, sw_flags, js);
@@ -948,8 +964,8 @@ void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u);
}
void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
struct kbase_jd_atom *target_katom)
void kbase_job_slot_hardstop(struct kbase_context *kctx, unsigned int js,
struct kbase_jd_atom *target_katom)
{
struct kbase_device *kbdev = kctx->kbdev;
bool stopped;
@@ -1255,7 +1271,7 @@ static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer)
static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev)
{
int i;
unsigned int i;
int pending_jobs = 0;
/* Count the number of jobs */

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2011-2016, 2018-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2011-2016, 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -33,21 +33,6 @@
#include <backend/gpu/mali_kbase_jm_rb.h>
#include <device/mali_kbase_device.h>
/**
* kbase_job_submit_nolock() - Submit a job to a certain job-slot
* @kbdev: Device pointer
* @katom: Atom to submit
* @js: Job slot to submit on
*
* The caller must check kbasep_jm_is_submit_slots_free() != false before
* calling this.
*
* The following locking conditions are made on the caller:
* - it must hold the hwaccess_lock
*/
void kbase_job_submit_nolock(struct kbase_device *kbdev,
struct kbase_jd_atom *katom, int js);
/**
* kbase_job_done_slot() - Complete the head job on a particular job-slot
* @kbdev: Device pointer
@@ -60,22 +45,13 @@ void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code,
u64 job_tail, ktime_t *end_timestamp);
#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS)
static inline char *kbasep_make_job_slot_string(int js, char *js_string,
size_t js_size)
static inline char *kbasep_make_job_slot_string(unsigned int js, char *js_string, size_t js_size)
{
snprintf(js_string, js_size, "job_slot_%i", js);
snprintf(js_string, js_size, "job_slot_%u", js);
return js_string;
}
#endif
#if !MALI_USE_CSF
static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
struct kbase_context *kctx)
{
return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT));
}
#endif
/**
* kbase_job_hw_submit() - Submit a job to the GPU
* @kbdev: Device pointer
@@ -90,7 +66,7 @@ static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
*
* Return: 0 if the job was successfully submitted to hardware, an error otherwise.
*/
int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js);
int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, unsigned int js);
#if !MALI_USE_CSF
/**
@@ -106,11 +82,9 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
* The following locking conditions are made on the caller:
* - it must hold the hwaccess_lock
*/
void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
int js,
u32 action,
base_jd_core_req core_reqs,
struct kbase_jd_atom *target_katom);
void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, unsigned int js,
u32 action, base_jd_core_req core_reqs,
struct kbase_jd_atom *target_katom);
#endif /* !MALI_USE_CSF */
/**
@@ -134,11 +108,8 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
*
* Return: true if an atom was stopped, false otherwise
*/
bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
struct kbase_context *kctx,
int js,
struct kbase_jd_atom *katom,
u32 action);
bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_context *kctx,
unsigned int js, struct kbase_jd_atom *katom, u32 action);
/**
* kbase_job_slot_init - Initialise job slot framework

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -93,9 +93,8 @@ static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev,
*
* Return: Atom removed from ringbuffer
*/
static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev,
int js,
ktime_t *end_timestamp)
static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev, unsigned int js,
ktime_t *end_timestamp)
{
struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
struct kbase_jd_atom *katom;
@@ -118,8 +117,7 @@ static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev,
return katom;
}
struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
int idx)
struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, unsigned int js, int idx)
{
struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
@@ -131,8 +129,7 @@ struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom;
}
struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
int js)
struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, unsigned int js)
{
struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
@@ -144,12 +141,13 @@ struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev)
{
int js;
int i;
unsigned int js;
lockdep_assert_held(&kbdev->hwaccess_lock);
for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
int i;
for (i = 0; i < SLOT_RB_SIZE; i++) {
struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
@@ -160,7 +158,7 @@ bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev)
return false;
}
int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js)
int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, unsigned int js)
{
int nr = 0;
int i;
@@ -178,7 +176,7 @@ int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js)
return nr;
}
int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js)
int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, unsigned int js)
{
int nr = 0;
int i;
@@ -193,8 +191,8 @@ int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js)
return nr;
}
static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js,
enum kbase_atom_gpu_rb_state min_rb_state)
static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, unsigned int js,
enum kbase_atom_gpu_rb_state min_rb_state)
{
int nr = 0;
int i;
@@ -244,9 +242,11 @@ static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure)
static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev,
bool secure)
{
int js, i;
unsigned int js;
for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
int i;
for (i = 0; i < SLOT_RB_SIZE; i++) {
struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
js, i);
@@ -261,7 +261,7 @@ static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev,
return false;
}
int kbase_backend_slot_free(struct kbase_device *kbdev, int js)
int kbase_backend_slot_free(struct kbase_device *kbdev, unsigned int js)
{
lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -430,9 +430,9 @@ static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev,
*
* Return: true if any slots other than @js are busy, false otherwise
*/
static inline bool other_slots_busy(struct kbase_device *kbdev, int js)
static inline bool other_slots_busy(struct kbase_device *kbdev, unsigned int js)
{
int slot;
unsigned int slot;
for (slot = 0; slot < kbdev->gpu_props.num_job_slots; slot++) {
if (slot == js)
@@ -844,7 +844,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
void kbase_backend_slot_update(struct kbase_device *kbdev)
{
int js;
unsigned int js;
lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -1001,37 +1001,34 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
other_slots_busy(kbdev, js))
break;
#ifdef CONFIG_MALI_GEM5_BUILD
if (!kbasep_jm_is_js_free(kbdev, js,
katom[idx]->kctx))
break;
#endif
/* Check if this job needs the cycle counter
* enabled before submission
*/
if (katom[idx]->core_req & BASE_JD_REQ_PERMON)
kbase_pm_request_gpu_cycle_counter_l2_is_on(
kbdev);
kbase_pm_request_gpu_cycle_counter_l2_is_on(kbdev);
if (!kbase_job_hw_submit(kbdev, katom[idx], js))
if (!kbase_job_hw_submit(kbdev, katom[idx], js)) {
katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_SUBMITTED;
else
/* Inform power management at start/finish of
* atom so it can update its GPU utilisation
* metrics.
*/
kbase_pm_metrics_update(kbdev,
&katom[idx]->start_timestamp);
/* Inform platform at start/finish of atom */
kbasep_platform_event_atom_submit(katom[idx]);
} else {
if (katom[idx]->core_req & BASE_JD_REQ_PERMON)
kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
break;
}
/* ***TRANSITION TO HIGHER STATE*** */
fallthrough;
case KBASE_ATOM_GPU_RB_SUBMITTED:
/* Inform power management at start/finish of
* atom so it can update its GPU utilisation
* metrics.
*/
kbase_pm_metrics_update(kbdev,
&katom[idx]->start_timestamp);
/* Inform platform at start/finish of atom */
kbasep_platform_event_atom_submit(katom[idx]);
break;
case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
@@ -1111,8 +1108,7 @@ kbase_rb_atom_might_depend(const struct kbase_jd_atom *katom_a,
*
* Return: true if an atom was evicted, false otherwise.
*/
bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
u32 completion_code)
bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 completion_code)
{
struct kbase_jd_atom *katom;
struct kbase_jd_atom *next_katom;
@@ -1120,6 +1116,10 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
lockdep_assert_held(&kbdev->hwaccess_lock);
katom = kbase_gpu_inspect(kbdev, js, 0);
if (!katom) {
dev_err(kbdev->dev, "Can't get a katom from js(%u)\n", js);
return false;
}
next_katom = kbase_gpu_inspect(kbdev, js, 1);
if (next_katom &&
@@ -1184,13 +1184,18 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
* on the HW and returned to the JS.
*/
void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
u32 completion_code,
u64 job_tail,
ktime_t *end_timestamp)
void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 completion_code,
u64 job_tail, ktime_t *end_timestamp)
{
struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
struct kbase_context *kctx = katom->kctx;
struct kbase_context *kctx = NULL;
if (unlikely(!katom)) {
dev_err(kbdev->dev, "Can't get a katom from js(%d)\n", js);
return;
}
kctx = katom->kctx;
dev_dbg(kbdev->dev,
"Atom %pK completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n",
@@ -1243,7 +1248,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
}
} else if (completion_code != BASE_JD_EVENT_DONE) {
struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
int i;
unsigned int i;
if (!kbase_ctx_flag(katom->kctx, KCTX_DYING)) {
dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)",
@@ -1388,7 +1393,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
{
int js;
unsigned int js;
lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -1416,7 +1421,7 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
kbase_gpu_in_protected_mode(kbdev));
WARN(!(kbase_jd_katom_is_protected(katom) && js == 0) &&
kbase_jd_katom_is_protected(katom),
"Protected atom on JS%d not supported", js);
"Protected atom on JS%u not supported", js);
}
if ((katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) &&
!kbase_ctx_flag(katom->kctx, KCTX_DYING))
@@ -1512,10 +1517,8 @@ static bool should_stop_next_atom(struct kbase_device *kbdev,
return ret;
}
static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev,
int js,
struct kbase_jd_atom *katom,
u32 action)
static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, unsigned int js,
struct kbase_jd_atom *katom, u32 action)
{
struct kbase_context *kctx = katom->kctx;
u32 hw_action = action & JS_COMMAND_MASK;
@@ -1559,11 +1562,8 @@ static int should_stop_x_dep_slot(struct kbase_jd_atom *katom)
return -1;
}
bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
struct kbase_context *kctx,
int js,
struct kbase_jd_atom *katom,
u32 action)
bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_context *kctx,
unsigned int js, struct kbase_jd_atom *katom, u32 action)
{
struct kbase_jd_atom *katom_idx0;
struct kbase_context *kctx_idx0 = NULL;
@@ -1816,7 +1816,7 @@ void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
void kbase_gpu_dump_slots(struct kbase_device *kbdev)
{
unsigned long flags;
int js;
unsigned int js;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@@ -1831,12 +1831,10 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev)
idx);
if (katom)
dev_info(kbdev->dev,
" js%d idx%d : katom=%pK gpu_rb_state=%d\n",
js, idx, katom, katom->gpu_rb_state);
dev_info(kbdev->dev, " js%u idx%d : katom=%pK gpu_rb_state=%d\n",
js, idx, katom, katom->gpu_rb_state);
else
dev_info(kbdev->dev, " js%d idx%d : empty\n",
js, idx);
dev_info(kbdev->dev, " js%u idx%d : empty\n", js, idx);
}
}
@@ -1845,7 +1843,7 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev)
void kbase_backend_slot_kctx_purge_locked(struct kbase_device *kbdev, struct kbase_context *kctx)
{
int js;
unsigned int js;
bool tracked = false;
lockdep_assert_held(&kbdev->hwaccess_lock);

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2014-2018, 2020-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2014-2018, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -40,8 +40,7 @@
*
* Return: true if job evicted from NEXT registers, false otherwise
*/
bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
u32 completion_code);
bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 completion_code);
/**
* kbase_gpu_complete_hw - Complete an atom on job slot js
@@ -53,10 +52,8 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
* completed
* @end_timestamp: Time of completion
*/
void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
u32 completion_code,
u64 job_tail,
ktime_t *end_timestamp);
void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 completion_code,
u64 job_tail, ktime_t *end_timestamp);
/**
* kbase_gpu_inspect - Inspect the contents of the HW access ringbuffer
@@ -68,8 +65,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
* Return: The atom at that position in the ringbuffer
* or NULL if no atom present
*/
struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
int idx);
struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, unsigned int js, int idx);
/**
* kbase_gpu_dump_slots - Print the contents of the slot ringbuffers

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -91,7 +91,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
struct kbase_device *kbdev;
struct kbasep_js_device_data *js_devdata;
struct kbase_backend_data *backend;
int s;
unsigned int s;
bool reset_needed = false;
KBASE_DEBUG_ASSERT(timer != NULL);
@@ -365,4 +365,3 @@ void kbase_backend_timeouts_changed(struct kbase_device *kbdev)
backend->timeouts_updated = true;
}

View File

@@ -62,8 +62,9 @@
* document
*/
#include <mali_kbase.h>
#include <device/mali_kbase_device.h>
#include <gpu/mali_kbase_gpu_regmap.h>
#include <backend/gpu/mali_kbase_model_dummy.h>
#include <backend/gpu/mali_kbase_model_linux.h>
#include <mali_kbase_mem_linux.h>
#if MALI_USE_CSF
@@ -319,7 +320,7 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TBEX,
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
@@ -337,21 +338,6 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tDUx",
.gpu_id = GPU_ID2_MAKE(10, 2, 0, 1, 0, 0, 0),
.as_present = 0xFF,
.thread_max_threads = 0x180,
.thread_max_workgroup_size = 0x180,
.thread_max_barrier_size = 0x180,
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
.tiler_features = 0x809,
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tODx",
.gpu_id = GPU_ID2_MAKE(10, 8, 0, 2, 0, 0, 0),
@@ -364,7 +350,7 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TODX,
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
@@ -412,7 +398,7 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2830,
.gpu_features_lo = 0xf,
.gpu_features_hi = 0,
.shader_present = 0xFF,
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTUX,
.stack_present = 0xF,
},
{
@@ -428,7 +414,7 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2830,
.gpu_features_lo = 0xf,
.gpu_features_hi = 0,
.shader_present = 0xFF,
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTIX,
.stack_present = 0xF,
},
};
@@ -530,17 +516,18 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type,
(ipa_ctl_select_config[core_type] >> (cnt_idx * 8)) & 0xFF;
/* Currently only primary counter blocks are supported */
if (WARN_ON(event_index >= 64))
if (WARN_ON(event_index >=
(KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS + KBASE_DUMMY_MODEL_COUNTER_PER_CORE)))
return 0;
/* The actual events start index 4 onwards. Spec also says PRFCNT_EN,
* TIMESTAMP_LO or TIMESTAMP_HI pseudo-counters do not make sense for
* IPA counters. If selected, the value returned for them will be zero.
*/
if (WARN_ON(event_index <= 3))
if (WARN_ON(event_index < KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS))
return 0;
event_index -= 4;
event_index -= KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS;
spin_lock_irqsave(&performance_counters.access_lock, flags);
@@ -736,7 +723,7 @@ void gpu_model_glb_request_job_irq(void *model)
spin_lock_irqsave(&hw_error_status.access_lock, flags);
hw_error_status.job_irq_status |= JOB_IRQ_GLOBAL_IF;
spin_unlock_irqrestore(&hw_error_status.access_lock, flags);
gpu_device_raise_irq(model, GPU_DUMMY_JOB_IRQ);
gpu_device_raise_irq(model, MODEL_LINUX_JOB_IRQ);
}
#endif /* !MALI_USE_CSF */
@@ -768,7 +755,7 @@ static void init_register_statuses(struct dummy_model_t *dummy)
performance_counters.time = 0;
}
static void update_register_statuses(struct dummy_model_t *dummy, int job_slot)
static void update_register_statuses(struct dummy_model_t *dummy, unsigned int job_slot)
{
lockdep_assert_held(&hw_error_status.access_lock);
@@ -1101,7 +1088,7 @@ static const struct control_reg_values_t *find_control_reg_values(const char *gp
return ret;
}
void *midgard_model_create(const void *config)
void *midgard_model_create(struct kbase_device *kbdev)
{
struct dummy_model_t *dummy = NULL;
@@ -1118,7 +1105,12 @@ void *midgard_model_create(const void *config)
GPU_CONTROL_REG(L2_PRESENT_LO), dummy->control_reg_values);
performance_counters.shader_present = get_implementation_register(
GPU_CONTROL_REG(SHADER_PRESENT_LO), dummy->control_reg_values);
gpu_device_set_data(dummy, kbdev);
dev_info(kbdev->dev, "Using Dummy Model");
}
return dummy;
}
@@ -1134,7 +1126,7 @@ static void midgard_model_get_outputs(void *h)
lockdep_assert_held(&hw_error_status.access_lock);
if (hw_error_status.job_irq_status)
gpu_device_raise_irq(dummy, GPU_DUMMY_JOB_IRQ);
gpu_device_raise_irq(dummy, MODEL_LINUX_JOB_IRQ);
if ((dummy->power_changed && dummy->power_changed_mask) ||
(dummy->reset_completed & dummy->reset_completed_mask) ||
@@ -1145,10 +1137,10 @@ static void midgard_model_get_outputs(void *h)
(dummy->flush_pa_range_completed && dummy->flush_pa_range_completed_irq_enabled) ||
#endif
(dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled))
gpu_device_raise_irq(dummy, GPU_DUMMY_GPU_IRQ);
gpu_device_raise_irq(dummy, MODEL_LINUX_GPU_IRQ);
if (hw_error_status.mmu_irq_rawstat & hw_error_status.mmu_irq_mask)
gpu_device_raise_irq(dummy, GPU_DUMMY_MMU_IRQ);
gpu_device_raise_irq(dummy, MODEL_LINUX_MMU_IRQ);
}
static void midgard_model_update(void *h)
@@ -1215,7 +1207,7 @@ static void invalidate_active_jobs(struct dummy_model_t *dummy)
}
}
u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
void midgard_model_write_reg(void *h, u32 addr, u32 value)
{
unsigned long flags;
struct dummy_model_t *dummy = (struct dummy_model_t *)h;
@@ -1225,7 +1217,7 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
#if !MALI_USE_CSF
if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) &&
(addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) {
int slot_idx = (addr >> 7) & 0xf;
unsigned int slot_idx = (addr >> 7) & 0xf;
KBASE_DEBUG_ASSERT(slot_idx < NUM_SLOTS);
if (addr == JOB_SLOT_REG(slot_idx, JS_HEAD_NEXT_LO)) {
@@ -1607,11 +1599,9 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
midgard_model_update(dummy);
midgard_model_get_outputs(dummy);
spin_unlock_irqrestore(&hw_error_status.access_lock, flags);
return 1;
}
u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
void midgard_model_read_reg(void *h, u32 addr, u32 *const value)
{
unsigned long flags;
struct dummy_model_t *dummy = (struct dummy_model_t *)h;
@@ -2034,8 +2024,6 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
*value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_SHADER,
counter_index, is_low_word);
} else if (addr == USER_REG(LATEST_FLUSH)) {
*value = 0;
}
#endif
else if (addr == GPU_CONTROL_REG(GPU_FEATURES_LO)) {
@@ -2051,8 +2039,6 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
spin_unlock_irqrestore(&hw_error_status.access_lock, flags);
CSTD_UNUSED(dummy);
return 1;
}
static u32 set_user_sample_core_type(u64 *counters, u32 *usr_data_start, u32 usr_data_offset,
@@ -2228,3 +2214,16 @@ int gpu_model_control(void *model,
return 0;
}
/**
* kbase_is_gpu_removed - Has the GPU been removed.
* @kbdev: Kbase device pointer
*
* This function would return true if the GPU has been removed.
* It is stubbed here
* Return: Always false
*/
bool kbase_is_gpu_removed(struct kbase_device *kbdev)
{
return false;
}

View File

@@ -21,11 +21,24 @@
/*
* Dummy Model interface
*
* Support for NO_MALI dummy Model interface.
*
* +-----------------------------------+
* | Kbase read/write/IRQ |
* +-----------------------------------+
* | Model Linux Framework |
* +-----------------------------------+
* | Model Dummy interface definitions |
* +-----------------+-----------------+
* | Fake R/W | Fake IRQ |
* +-----------------+-----------------+
*/
#ifndef _KBASE_MODEL_DUMMY_H_
#define _KBASE_MODEL_DUMMY_H_
#include <uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_linux.h>
#include <uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h>
#define model_error_log(module, ...) pr_err(__VA_ARGS__)
@@ -154,11 +167,6 @@ struct gpu_model_prfcnt_en {
u32 shader;
};
void *midgard_model_create(const void *config);
void midgard_model_destroy(void *h);
u8 midgard_model_write_reg(void *h, u32 addr, u32 value);
u8 midgard_model_read_reg(void *h, u32 addr,
u32 * const value);
void midgard_set_error(int job_slot);
int job_atom_inject_error(struct kbase_error_params *params);
int gpu_model_control(void *h,
@@ -211,17 +219,6 @@ void gpu_model_prfcnt_dump_request(uint32_t *sample_buf, struct gpu_model_prfcnt
void gpu_model_glb_request_job_irq(void *model);
#endif /* MALI_USE_CSF */
enum gpu_dummy_irq {
GPU_DUMMY_JOB_IRQ,
GPU_DUMMY_GPU_IRQ,
GPU_DUMMY_MMU_IRQ
};
void gpu_device_raise_irq(void *model,
enum gpu_dummy_irq irq);
void gpu_device_set_data(void *model, void *data);
void *gpu_device_get_data(void *model);
extern struct error_status_t hw_error_status;
#endif

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2014-2015, 2018-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2014-2015, 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -21,18 +21,21 @@
#include <mali_kbase.h>
#include <linux/random.h>
#include "backend/gpu/mali_kbase_model_dummy.h"
#include "backend/gpu/mali_kbase_model_linux.h"
static struct kbase_error_atom *error_track_list;
unsigned int rand_seed;
#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM
/** Kernel 6.1.0 has dropped prandom_u32(), use get_random_u32() */
#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
#define prandom_u32 get_random_u32
#endif
/*following error probability are set quite high in order to stress the driver*/
unsigned int error_probability = 50; /* to be set between 0 and 100 */
static unsigned int error_probability = 50; /* to be set between 0 and 100 */
/* probability to have multiple error give that there is an error */
unsigned int multiple_error_probability = 50;
#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM
static unsigned int multiple_error_probability = 50;
/* all the error conditions supported by the model */
#define TOTAL_FAULTS 27

View File

@@ -20,12 +20,12 @@
*/
/*
* Model interface
* Model Linux Framework interfaces.
*/
#include <mali_kbase.h>
#include <gpu/mali_kbase_gpu_regmap.h>
#include <backend/gpu/mali_kbase_model_dummy.h>
#include "backend/gpu/mali_kbase_model_linux.h"
#include "device/mali_kbase_device.h"
#include "mali_kbase_irq_internal.h"
@@ -105,8 +105,7 @@ static void serve_mmu_irq(struct work_struct *work)
kmem_cache_free(kbdev->irq_slab, data);
}
void gpu_device_raise_irq(void *model,
enum gpu_dummy_irq irq)
void gpu_device_raise_irq(void *model, u32 irq)
{
struct model_irq_data *data;
struct kbase_device *kbdev = gpu_device_get_data(model);
@@ -120,15 +119,15 @@ void gpu_device_raise_irq(void *model,
data->kbdev = kbdev;
switch (irq) {
case GPU_DUMMY_JOB_IRQ:
case MODEL_LINUX_JOB_IRQ:
INIT_WORK(&data->work, serve_job_irq);
atomic_set(&kbdev->serving_job_irq, 1);
break;
case GPU_DUMMY_GPU_IRQ:
case MODEL_LINUX_GPU_IRQ:
INIT_WORK(&data->work, serve_gpu_irq);
atomic_set(&kbdev->serving_gpu_irq, 1);
break;
case GPU_DUMMY_MMU_IRQ:
case MODEL_LINUX_MMU_IRQ:
INIT_WORK(&data->work, serve_mmu_irq);
atomic_set(&kbdev->serving_mmu_irq, 1);
break;
@@ -165,22 +164,8 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset)
return val;
}
KBASE_EXPORT_TEST_API(kbase_reg_read);
/**
* kbase_is_gpu_removed - Has the GPU been removed.
* @kbdev: Kbase device pointer
*
* This function would return true if the GPU has been removed.
* It is stubbed here
* Return: Always false
*/
bool kbase_is_gpu_removed(struct kbase_device *kbdev)
{
return false;
}
int kbase_install_interrupts(struct kbase_device *kbdev)
{
KBASE_DEBUG_ASSERT(kbdev);
@@ -239,16 +224,12 @@ KBASE_EXPORT_TEST_API(kbase_gpu_irq_test_handler);
int kbase_gpu_device_create(struct kbase_device *kbdev)
{
kbdev->model = midgard_model_create(NULL);
kbdev->model = midgard_model_create(kbdev);
if (kbdev->model == NULL)
return -ENOMEM;
gpu_device_set_data(kbdev->model, kbdev);
spin_lock_init(&kbdev->reg_op_lock);
dev_warn(kbdev->dev, "Using Dummy Model");
return 0;
}

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -20,13 +20,132 @@
*/
/*
* Model interface
* Model Linux Framework interfaces.
*
* This framework is used to provide generic Kbase Models interfaces.
* Note: Backends cannot be used together; the selection is done at build time.
*
* - Without Model Linux Framework:
* +-----------------------------+
* | Kbase read/write/IRQ |
* +-----------------------------+
* | HW interface definitions |
* +-----------------------------+
*
* - With Model Linux Framework:
* +-----------------------------+
* | Kbase read/write/IRQ |
* +-----------------------------+
* | Model Linux Framework |
* +-----------------------------+
* | Model interface definitions |
* +-----------------------------+
*/
#ifndef _KBASE_MODEL_LINUX_H_
#define _KBASE_MODEL_LINUX_H_
/*
* Include Model definitions
*/
#if IS_ENABLED(CONFIG_MALI_NO_MALI)
#include <backend/gpu/mali_kbase_model_dummy.h>
#endif /* IS_ENABLED(CONFIG_MALI_NO_MALI) */
#if !IS_ENABLED(CONFIG_MALI_REAL_HW)
/**
* kbase_gpu_device_create() - Generic create function.
*
* @kbdev: Kbase device.
*
* Specific model hook is implemented by midgard_model_create()
*
* Return: 0 on success, error code otherwise.
*/
int kbase_gpu_device_create(struct kbase_device *kbdev);
/**
* kbase_gpu_device_destroy() - Generic create function.
*
* @kbdev: Kbase device.
*
* Specific model hook is implemented by midgard_model_destroy()
*/
void kbase_gpu_device_destroy(struct kbase_device *kbdev);
#endif /* _KBASE_MODEL_LINUX_H_ */
/**
* midgard_model_create() - Private create function.
*
* @kbdev: Kbase device.
*
* This hook is specific to the model built in Kbase.
*
* Return: Model handle.
*/
void *midgard_model_create(struct kbase_device *kbdev);
/**
* midgard_model_destroy() - Private destroy function.
*
* @h: Model handle.
*
* This hook is specific to the model built in Kbase.
*/
void midgard_model_destroy(void *h);
/**
* midgard_model_write_reg() - Private model write function.
*
* @h: Model handle.
* @addr: Address at which to write.
* @value: value to write.
*
* This hook is specific to the model built in Kbase.
*/
void midgard_model_write_reg(void *h, u32 addr, u32 value);
/**
* midgard_model_read_reg() - Private model read function.
*
* @h: Model handle.
* @addr: Address from which to read.
* @value: Pointer where to store the read value.
*
* This hook is specific to the model built in Kbase.
*/
void midgard_model_read_reg(void *h, u32 addr, u32 *const value);
/**
* gpu_device_raise_irq() - Private IRQ raise function.
*
* @model: Model handle.
* @irq: IRQ type to raise.
*
* This hook is global to the model Linux framework.
*/
void gpu_device_raise_irq(void *model, u32 irq);
/**
* gpu_device_set_data() - Private model set data function.
*
* @model: Model handle.
* @data: Data carried by model.
*
* This hook is global to the model Linux framework.
*/
void gpu_device_set_data(void *model, void *data);
/**
* gpu_device_get_data() - Private model get data function.
*
* @model: Model handle.
*
* This hook is global to the model Linux framework.
*
* Return: Pointer to the data carried by model.
*/
void *gpu_device_get_data(void *model);
#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
#endif /* _KBASE_MODEL_LINUX_H_ */

View File

@@ -26,9 +26,7 @@
#include <mali_kbase.h>
#include <mali_kbase_pm.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
#if IS_ENABLED(CONFIG_MALI_NO_MALI)
#include <backend/gpu/mali_kbase_model_dummy.h>
#endif /* CONFIG_MALI_NO_MALI */
#include <backend/gpu/mali_kbase_model_linux.h>
#include <mali_kbase_dummy_job_wa.h>
int kbase_pm_ca_init(struct kbase_device *kbdev)

View File

@@ -804,6 +804,17 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
KBASE_MCU_HCTL_SHADERS_PEND_ON;
} else
backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE;
#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
if (kbase_debug_coresight_csf_state_check(
kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED)) {
kbase_debug_coresight_csf_state_request(
kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED);
backend->mcu_state = KBASE_MCU_CORESIGHT_ENABLE;
} else if (kbase_debug_coresight_csf_state_check(
kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED)) {
backend->mcu_state = KBASE_MCU_CORESIGHT_ENABLE;
}
#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
}
break;
@@ -832,8 +843,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
unsigned long flags;
kbase_csf_scheduler_spin_lock(kbdev, &flags);
kbase_hwcnt_context_enable(
kbdev->hwcnt_gpu_ctx);
kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
backend->hwcnt_disabled = false;
}
@@ -854,9 +864,19 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
backend->mcu_state =
KBASE_MCU_HCTL_MCU_ON_RECHECK;
}
} else if (kbase_pm_handle_mcu_core_attr_update(kbdev)) {
} else if (kbase_pm_handle_mcu_core_attr_update(kbdev))
backend->mcu_state = KBASE_MCU_ON_CORE_ATTR_UPDATE_PEND;
#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
else if (kbdev->csf.coresight.disable_on_pmode_enter) {
kbase_debug_coresight_csf_state_request(
kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED);
backend->mcu_state = KBASE_MCU_ON_PMODE_ENTER_CORESIGHT_DISABLE;
} else if (kbdev->csf.coresight.enable_on_pmode_exit) {
kbase_debug_coresight_csf_state_request(
kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED);
backend->mcu_state = KBASE_MCU_ON_PMODE_EXIT_CORESIGHT_ENABLE;
}
#endif
break;
case KBASE_MCU_HCTL_MCU_ON_RECHECK:
@@ -947,12 +967,46 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
#ifdef KBASE_PM_RUNTIME
if (backend->gpu_sleep_mode_active)
backend->mcu_state = KBASE_MCU_ON_SLEEP_INITIATE;
else
else {
#endif
backend->mcu_state = KBASE_MCU_ON_HALT;
#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
kbase_debug_coresight_csf_state_request(
kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED);
backend->mcu_state = KBASE_MCU_CORESIGHT_DISABLE;
#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
}
}
break;
#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
case KBASE_MCU_ON_PMODE_ENTER_CORESIGHT_DISABLE:
if (kbase_debug_coresight_csf_state_check(
kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED)) {
backend->mcu_state = KBASE_MCU_ON;
kbdev->csf.coresight.disable_on_pmode_enter = false;
}
break;
case KBASE_MCU_ON_PMODE_EXIT_CORESIGHT_ENABLE:
if (kbase_debug_coresight_csf_state_check(
kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED)) {
backend->mcu_state = KBASE_MCU_ON;
kbdev->csf.coresight.enable_on_pmode_exit = false;
}
break;
case KBASE_MCU_CORESIGHT_DISABLE:
if (kbase_debug_coresight_csf_state_check(
kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED))
backend->mcu_state = KBASE_MCU_ON_HALT;
break;
case KBASE_MCU_CORESIGHT_ENABLE:
if (kbase_debug_coresight_csf_state_check(
kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED))
backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE;
break;
#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
case KBASE_MCU_ON_HALT:
if (!kbase_pm_is_mcu_desired(kbdev)) {
kbase_csf_firmware_trigger_mcu_halt(kbdev);
@@ -1045,6 +1099,11 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
/* Reset complete */
if (!backend->in_reset)
backend->mcu_state = KBASE_MCU_OFF;
#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
kbdev->csf.coresight.disable_on_pmode_enter = false;
kbdev->csf.coresight.enable_on_pmode_exit = false;
#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
break;
default:
@@ -1142,13 +1201,22 @@ static bool can_power_down_l2(struct kbase_device *kbdev)
#if MALI_USE_CSF
/* Due to the HW issue GPU2019-3878, need to prevent L2 power off
* whilst MMU command is in progress.
* Also defer the power-down if MMU is in process of page migration.
*/
return !kbdev->mmu_hw_operation_in_progress;
return !kbdev->mmu_hw_operation_in_progress && !kbdev->mmu_page_migrate_in_progress;
#else
return true;
return !kbdev->mmu_page_migrate_in_progress;
#endif
}
static bool can_power_up_l2(struct kbase_device *kbdev)
{
lockdep_assert_held(&kbdev->hwaccess_lock);
/* Avoiding l2 transition if MMU is undergoing page migration */
return !kbdev->mmu_page_migrate_in_progress;
}
static bool need_tiler_control(struct kbase_device *kbdev)
{
#if MALI_USE_CSF
@@ -1220,7 +1288,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
switch (backend->l2_state) {
case KBASE_L2_OFF:
if (kbase_pm_is_l2_desired(kbdev)) {
if (kbase_pm_is_l2_desired(kbdev) && can_power_up_l2(kbdev)) {
#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
/* Enable HW timer of IPA control before
* L2 cache is powered-up.
@@ -2507,26 +2575,33 @@ void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts);
#if MALI_USE_CSF
/**
* update_user_reg_page_mapping - Update the mapping for USER Register page
*
* @kbdev: The kbase device structure for the device.
*
* This function must be called to unmap the dummy or real page from USER Register page
* mapping whenever GPU is powered up or down. The dummy or real page would get
* appropriately mapped in when Userspace reads the LATEST_FLUSH value.
*/
static void update_user_reg_page_mapping(struct kbase_device *kbdev)
{
struct kbase_context *kctx, *n;
lockdep_assert_held(&kbdev->pm.lock);
mutex_lock(&kbdev->csf.reg_lock);
/* Only if the mappings for USER page exist, update all PTEs associated to it */
if (kbdev->csf.nr_user_page_mapped > 0) {
if (likely(kbdev->csf.mali_file_inode)) {
/* This would zap the pte corresponding to the mapping of User
* register page for all the Kbase contexts.
*/
unmap_mapping_range(kbdev->csf.mali_file_inode->i_mapping,
BASEP_MEM_CSF_USER_REG_PAGE_HANDLE, PAGE_SIZE, 1);
} else {
dev_err(kbdev->dev,
"Device file inode not exist even if USER page previously mapped");
}
list_for_each_entry_safe(kctx, n, &kbdev->csf.user_reg.list, csf.user_reg.link) {
/* This would zap the PTE corresponding to the mapping of User
* Register page of the kbase context. The mapping will be reestablished
* when the context (user process) needs to access to the page.
*/
unmap_mapping_range(kbdev->csf.user_reg.filp->f_inode->i_mapping,
kctx->csf.user_reg.file_offset << PAGE_SHIFT, PAGE_SIZE, 1);
list_del_init(&kctx->csf.user_reg.link);
dev_dbg(kbdev->dev, "Updated USER Reg page mapping of ctx %d_%d", kctx->tgid,
kctx->id);
}
mutex_unlock(&kbdev->csf.reg_lock);
}
#endif

View File

@@ -995,4 +995,27 @@ static inline void kbase_pm_disable_db_mirror_interrupt(struct kbase_device *kbd
}
#endif
/**
* kbase_pm_l2_allow_mmu_page_migration - L2 state allows MMU page migration or not
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Check whether the L2 state is in power transition phase or not. If it is, the MMU
* page migration should be deferred. The caller must hold hwaccess_lock, and, if MMU
* page migration is intended, immediately start the MMU migration action without
* dropping the lock. When page migration begins, a flag is set in kbdev that would
* prevent the L2 state machine traversing into power transition phases, until
* the MMU migration action ends.
*
* Return: true if MMU page migration is allowed
*/
static inline bool kbase_pm_l2_allow_mmu_page_migration(struct kbase_device *kbdev)
{
struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
lockdep_assert_held(&kbdev->hwaccess_lock);
return (backend->l2_state != KBASE_L2_PEND_ON && backend->l2_state != KBASE_L2_PEND_OFF);
}
#endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -66,6 +66,13 @@
* is being put to sleep.
* @ON_PEND_SLEEP: MCU sleep is in progress.
* @IN_SLEEP: Sleep request is completed and MCU has halted.
* @ON_PMODE_ENTER_CORESIGHT_DISABLE: The MCU is on, protected mode enter is about to
* be requested, Coresight is being disabled.
* @ON_PMODE_EXIT_CORESIGHT_ENABLE : The MCU is on, protected mode exit has happened
* Coresight is being enabled.
* @CORESIGHT_DISABLE: The MCU is on and Coresight is being disabled.
* @CORESIGHT_ENABLE: The MCU is on, host does not have control and
* Coresight is being enabled.
*/
KBASEP_MCU_STATE(OFF)
KBASEP_MCU_STATE(PEND_ON_RELOAD)
@@ -92,3 +99,10 @@ KBASEP_MCU_STATE(HCTL_SHADERS_CORE_OFF_PEND)
KBASEP_MCU_STATE(ON_SLEEP_INITIATE)
KBASEP_MCU_STATE(ON_PEND_SLEEP)
KBASEP_MCU_STATE(IN_SLEEP)
#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
/* Additional MCU states for Coresight */
KBASEP_MCU_STATE(ON_PMODE_ENTER_CORESIGHT_DISABLE)
KBASEP_MCU_STATE(ON_PMODE_EXIT_CORESIGHT_ENABLE)
KBASEP_MCU_STATE(CORESIGHT_DISABLE)
KBASEP_MCU_STATE(CORESIGHT_ENABLE)
#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */

View File

@@ -464,7 +464,7 @@ void kbase_pm_metrics_stop(struct kbase_device *kbdev)
*/
static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev)
{
int js;
unsigned int js;
lockdep_assert_held(&kbdev->pm.backend.metrics.lock);

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,8 @@
#include <mali_kbase.h>
#include <mali_kbase_hwaccess_time.h>
#if MALI_USE_CSF
#include <asm/arch_timer.h>
#include <linux/gcd.h>
#include <csf/mali_kbase_csf_timeout.h>
#endif
#include <device/mali_kbase_device.h>
@@ -121,20 +123,29 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev,
/* Only for debug messages, safe default in case it's mis-maintained */
const char *selector_str = "(unknown)";
if (WARN(!kbdev->lowest_gpu_freq_khz,
"Lowest frequency uninitialized! Using reference frequency for scaling")) {
if (!kbdev->lowest_gpu_freq_khz) {
dev_dbg(kbdev->dev,
"Lowest frequency uninitialized! Using reference frequency for scaling");
freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ;
} else {
freq_khz = kbdev->lowest_gpu_freq_khz;
}
switch (selector) {
case MMU_AS_INACTIVE_WAIT_TIMEOUT:
selector_str = "MMU_AS_INACTIVE_WAIT_TIMEOUT";
nr_cycles = MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES;
break;
case KBASE_TIMEOUT_SELECTOR_COUNT:
default:
#if !MALI_USE_CSF
WARN(1, "Invalid timeout selector used! Using default value");
nr_cycles = JM_DEFAULT_TIMEOUT_CYCLES;
break;
case JM_DEFAULT_JS_FREE_TIMEOUT:
selector_str = "JM_DEFAULT_JS_FREE_TIMEOUT";
nr_cycles = JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES;
break;
#else
/* Use Firmware timeout if invalid selection */
WARN(1,
@@ -204,3 +215,65 @@ u64 kbase_backend_get_cycle_cnt(struct kbase_device *kbdev)
return lo | (((u64) hi1) << 32);
}
#if MALI_USE_CSF
u64 __maybe_unused kbase_backend_time_convert_gpu_to_cpu(struct kbase_device *kbdev, u64 gpu_ts)
{
if (WARN_ON(!kbdev))
return 0;
return div64_u64(gpu_ts * kbdev->backend_time.multiplier, kbdev->backend_time.divisor) +
kbdev->backend_time.offset;
}
/**
* get_cpu_gpu_time() - Get current CPU and GPU timestamps.
*
* @kbdev: Kbase device.
* @cpu_ts: Output CPU timestamp.
* @gpu_ts: Output GPU timestamp.
* @gpu_cycle: Output GPU cycle counts.
*/
static void get_cpu_gpu_time(struct kbase_device *kbdev, u64 *cpu_ts, u64 *gpu_ts, u64 *gpu_cycle)
{
struct timespec64 ts;
kbase_backend_get_gpu_time(kbdev, gpu_cycle, gpu_ts, &ts);
if (cpu_ts)
*cpu_ts = ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
}
#endif
int kbase_backend_time_init(struct kbase_device *kbdev)
{
#if MALI_USE_CSF
u64 cpu_ts = 0;
u64 gpu_ts = 0;
u64 freq;
u64 common_factor;
get_cpu_gpu_time(kbdev, &cpu_ts, &gpu_ts, NULL);
freq = arch_timer_get_cntfrq();
if (!freq) {
dev_warn(kbdev->dev, "arch_timer_get_rate() is zero!");
return -EINVAL;
}
common_factor = gcd(NSEC_PER_SEC, freq);
kbdev->backend_time.multiplier = div64_u64(NSEC_PER_SEC, common_factor);
kbdev->backend_time.divisor = div64_u64(freq, common_factor);
if (!kbdev->backend_time.divisor) {
dev_warn(kbdev->dev, "CPU to GPU divisor is zero!");
return -EINVAL;
}
kbdev->backend_time.offset = cpu_ts - div64_u64(gpu_ts * kbdev->backend_time.multiplier,
kbdev->backend_time.divisor);
#endif
return 0;
}

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2017-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -28,7 +28,7 @@ bob_defaults {
defaults: [
"kernel_defaults",
],
no_mali: {
mali_no_mali: {
kbuild_options: [
"CONFIG_MALI_NO_MALI=y",
"CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}",
@@ -62,8 +62,11 @@ bob_defaults {
mali_dma_buf_legacy_compat: {
kbuild_options: ["CONFIG_MALI_DMA_BUF_LEGACY_COMPAT=y"],
},
large_page_alloc_override: {
kbuild_options: ["CONFIG_LARGE_PAGE_ALLOC_OVERRIDE=y"],
},
large_page_alloc: {
kbuild_options: ["CONFIG_MALI_2MB_ALLOC=y"],
kbuild_options: ["CONFIG_LARGE_PAGE_ALLOC=y"],
},
mali_memory_fully_backed: {
kbuild_options: ["CONFIG_MALI_MEMORY_FULLY_BACKED=y"],
@@ -86,9 +89,6 @@ bob_defaults {
mali_error_inject: {
kbuild_options: ["CONFIG_MALI_ERROR_INJECT=y"],
},
mali_gem5_build: {
kbuild_options: ["CONFIG_MALI_GEM5_BUILD=y"],
},
mali_debug: {
kbuild_options: [
"CONFIG_MALI_DEBUG=y",
@@ -137,8 +137,8 @@ bob_defaults {
platform_is_fpga: {
kbuild_options: ["CONFIG_MALI_IS_FPGA=y"],
},
mali_fw_core_dump: {
kbuild_options: ["CONFIG_MALI_FW_CORE_DUMP=y"],
mali_coresight: {
kbuild_options: ["CONFIG_MALI_CORESIGHT=y"],
},
kbuild_options: [
"CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}",
@@ -191,6 +191,15 @@ bob_kernel_module {
"platform/*/*.c",
"platform/*/*.h",
"platform/*/Kbuild",
"platform/*/*/*.c",
"platform/*/*/*.h",
"platform/*/*/Kbuild",
"platform/*/*/*.c",
"platform/*/*/*.h",
"platform/*/*/Kbuild",
"platform/*/*/*/*.c",
"platform/*/*/*/*.h",
"platform/*/*/*/Kbuild",
"thirdparty/*.c",
"thirdparty/Kbuild",
"debug/*.c",

View File

@@ -34,6 +34,7 @@
#if IS_ENABLED(CONFIG_DEBUG_FS)
#include <csf/mali_kbase_csf_csg_debugfs.h>
#include <csf/mali_kbase_csf_kcpu_debugfs.h>
#include <csf/mali_kbase_csf_sync_debugfs.h>
#include <csf/mali_kbase_csf_tiler_heap_debugfs.h>
#include <csf/mali_kbase_csf_cpu_queue_debugfs.h>
#include <mali_kbase_debug_mem_view.h>
@@ -50,6 +51,7 @@ void kbase_context_debugfs_init(struct kbase_context *const kctx)
kbase_jit_debugfs_init(kctx);
kbase_csf_queue_group_debugfs_init(kctx);
kbase_csf_kcpu_debugfs_init(kctx);
kbase_csf_sync_debugfs_init(kctx);
kbase_csf_tiler_heap_debugfs_init(kctx);
kbase_csf_tiler_heap_total_debugfs_init(kctx);
kbase_csf_cpu_queue_debugfs_init(kctx);

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,12 @@
/*
* Base kernel context APIs
*/
#include <linux/version.h>
#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
#include <linux/sched/task.h>
#else
#include <linux/sched.h>
#endif
#include <mali_kbase.h>
#include <gpu/mali_kbase_gpu_regmap.h>
@@ -129,17 +135,51 @@ int kbase_context_common_init(struct kbase_context *kctx)
/* creating a context is considered a disjoint event */
kbase_disjoint_event(kctx->kbdev);
kctx->as_nr = KBASEP_AS_NR_INVALID;
atomic_set(&kctx->refcount, 0);
spin_lock_init(&kctx->mm_update_lock);
kctx->process_mm = NULL;
kctx->task = NULL;
atomic_set(&kctx->nonmapped_pages, 0);
atomic_set(&kctx->permanent_mapped_pages, 0);
kctx->tgid = current->tgid;
kctx->pid = current->pid;
/* Check if this is a Userspace created context */
if (likely(kctx->filp)) {
struct pid *pid_struct;
rcu_read_lock();
pid_struct = find_get_pid(kctx->tgid);
if (likely(pid_struct)) {
struct task_struct *task = pid_task(pid_struct, PIDTYPE_PID);
if (likely(task)) {
/* Take a reference on the task to avoid slow lookup
* later on from the page allocation loop.
*/
get_task_struct(task);
kctx->task = task;
} else {
dev_err(kctx->kbdev->dev,
"Failed to get task pointer for %s/%d",
current->comm, current->pid);
err = -ESRCH;
}
put_pid(pid_struct);
} else {
dev_err(kctx->kbdev->dev,
"Failed to get pid pointer for %s/%d",
current->comm, current->pid);
err = -ESRCH;
}
rcu_read_unlock();
if (unlikely(err))
return err;
kbase_mem_mmgrab();
kctx->process_mm = current->mm;
}
atomic_set(&kctx->used_pages, 0);
mutex_init(&kctx->reg_lock);
@@ -172,13 +212,16 @@ int kbase_context_common_init(struct kbase_context *kctx)
kctx->id = atomic_add_return(1, &(kctx->kbdev->ctx_num)) - 1;
mutex_lock(&kctx->kbdev->kctx_list_lock);
err = kbase_insert_kctx_to_process(kctx);
if (err)
dev_err(kctx->kbdev->dev,
"(err:%d) failed to insert kctx to kbase_process\n", err);
mutex_unlock(&kctx->kbdev->kctx_list_lock);
if (err) {
dev_err(kctx->kbdev->dev,
"(err:%d) failed to insert kctx to kbase_process", err);
if (likely(kctx->filp)) {
mmdrop(kctx->process_mm);
put_task_struct(kctx->task);
}
}
return err;
}
@@ -251,15 +294,8 @@ static void kbase_remove_kctx_from_process(struct kbase_context *kctx)
void kbase_context_common_term(struct kbase_context *kctx)
{
unsigned long flags;
int pages;
mutex_lock(&kctx->kbdev->mmu_hw_mutex);
spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags);
kbase_ctx_sched_remove_ctx(kctx);
spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
mutex_unlock(&kctx->kbdev->mmu_hw_mutex);
pages = atomic_read(&kctx->used_pages);
if (pages != 0)
dev_warn(kctx->kbdev->dev,
@@ -271,6 +307,11 @@ void kbase_context_common_term(struct kbase_context *kctx)
kbase_remove_kctx_from_process(kctx);
mutex_unlock(&kctx->kbdev->kctx_list_lock);
if (likely(kctx->filp)) {
mmdrop(kctx->process_mm);
put_task_struct(kctx->task);
}
KBASE_KTRACE_ADD(kctx->kbdev, CORE_CTX_DESTROY, kctx, 0u);
}

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2011-2017, 2019-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -92,6 +92,19 @@ static inline bool kbase_ctx_flag(struct kbase_context *kctx,
return atomic_read(&kctx->flags) & flag;
}
/**
* kbase_ctx_compat_mode - Indicate whether a kbase context needs to operate
* in compatibility mode for 32-bit userspace.
* @kctx: kbase context
*
* Return: True if needs to maintain compatibility, False otherwise.
*/
static inline bool kbase_ctx_compat_mode(struct kbase_context *kctx)
{
return !IS_ENABLED(CONFIG_64BIT) ||
(IS_ENABLED(CONFIG_64BIT) && kbase_ctx_flag(kctx, KCTX_COMPAT));
}
/**
* kbase_ctx_flag_clear - Clear @flag on @kctx
* @kctx: Pointer to kbase context

View File

@@ -31,20 +31,24 @@ mali_kbase-y += \
csf/mali_kbase_csf_reset_gpu.o \
csf/mali_kbase_csf_csg_debugfs.o \
csf/mali_kbase_csf_kcpu_debugfs.o \
csf/mali_kbase_csf_sync_debugfs.o \
csf/mali_kbase_csf_protected_memory.o \
csf/mali_kbase_csf_tiler_heap_debugfs.o \
csf/mali_kbase_csf_cpu_queue_debugfs.o \
csf/mali_kbase_csf_event.o \
csf/mali_kbase_csf_firmware_log.o \
csf/mali_kbase_csf_tiler_heap_reclaim.o
csf/mali_kbase_csf_firmware_core_dump.o \
csf/mali_kbase_csf_tiler_heap_reclaim.o \
csf/mali_kbase_csf_mcu_shared_reg.o
mali_kbase-$(CONFIG_MALI_REAL_HW) += csf/mali_kbase_csf_firmware.o
mali_kbase-$(CONFIG_MALI_NO_MALI) += csf/mali_kbase_csf_firmware_no_mali.o
ifeq ($(CONFIG_MALI_NO_MALI),y)
mali_kbase-y += csf/mali_kbase_csf_firmware_no_mali.o
else
mali_kbase-y += csf/mali_kbase_csf_firmware.o
endif
mali_kbase-$(CONFIG_DEBUG_FS) += csf/mali_kbase_debug_csf_fault.o
ifeq ($(KBUILD_EXTMOD),)
# in-tree
-include $(src)/csf/ipa_control/Kbuild

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -40,12 +40,15 @@
*/
#define KBASEP_USER_DB_NR_INVALID ((s8)-1)
/* Number of pages used for GPU command queue's User input & output data */
#define KBASEP_NUM_CS_USER_IO_PAGES (2)
/* Indicates an invalid value for the scan out sequence number, used to
* signify there is no group that has protected mode execution pending.
*/
#define KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID (U32_MAX)
#define FIRMWARE_IDLE_HYSTERESIS_TIME_MS (10) /* Default 10 milliseconds */
#define FIRMWARE_IDLE_HYSTERESIS_TIME_USEC (10000) /* Default 10 milliseconds */
/* Idle hysteresis time can be scaled down when GPU sleep feature is used */
#define FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER (5)
@@ -123,6 +126,25 @@ int kbase_csf_queue_register_ex(struct kbase_context *kctx,
void kbase_csf_queue_terminate(struct kbase_context *kctx,
struct kbase_ioctl_cs_queue_terminate *term);
/**
* kbase_csf_free_command_stream_user_pages() - Free the resources allocated
* for a queue at the time of bind.
*
* @kctx: Address of the kbase context within which the queue was created.
* @queue: Pointer to the queue to be unlinked.
*
* This function will free the pair of physical pages allocated for a GPU
* command queue, and also release the hardware doorbell page, that were mapped
* into the process address space to enable direct submission of commands to
* the hardware. Also releases the reference taken on the queue when the mapping
* was created.
*
* If an explicit or implicit unbind was missed by the userspace then the
* mapping will persist. On process exit kernel itself will remove the mapping.
*/
void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx,
struct kbase_queue *queue);
/**
* kbase_csf_alloc_command_stream_user_pages - Allocate resources for a
* GPU command queue.
@@ -185,6 +207,20 @@ void kbase_csf_queue_unbind_stopped(struct kbase_queue *queue);
int kbase_csf_queue_kick(struct kbase_context *kctx,
struct kbase_ioctl_cs_queue_kick *kick);
/**
* kbase_csf_queue_group_handle_is_valid - Find the queue group corresponding
* to the indicated handle.
*
* @kctx: The kbase context under which the queue group exists.
* @group_handle: Handle for the group which uniquely identifies it within
* the context with which it was created.
*
* This function is used to find the queue group when passed a handle.
*
* Return: Pointer to a queue group on success, NULL on failure
*/
struct kbase_queue_group *kbase_csf_find_queue_group(struct kbase_context *kctx, u8 group_handle);
/**
* kbase_csf_queue_group_handle_is_valid - Find if the given queue group handle
* is valid.
@@ -238,6 +274,7 @@ void kbase_csf_queue_group_terminate(struct kbase_context *kctx,
*/
void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group);
#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
/**
* kbase_csf_queue_group_suspend - Suspend a GPU command queue group
*
@@ -255,6 +292,7 @@ void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group);
*/
int kbase_csf_queue_group_suspend(struct kbase_context *kctx,
struct kbase_suspend_copy_buffer *sus_buf, u8 group_handle);
#endif
/**
* kbase_csf_add_group_fatal_error - Report a fatal group error to userspace
@@ -464,4 +502,5 @@ static inline u64 kbase_csf_ktrace_gpu_cycle_cnt(struct kbase_device *kbdev)
return 0;
#endif
}
#endif /* _KBASE_CSF_H_ */

View File

@@ -101,7 +101,7 @@ static void wait_csg_slots_status_update_finish(struct kbase_device *kbdev,
}
}
static void update_active_groups_status(struct kbase_device *kbdev, struct seq_file *file)
void kbase_csf_debugfs_update_active_groups_status(struct kbase_device *kbdev)
{
u32 max_csg_slots = kbdev->csf.global_iface.group_num;
DECLARE_BITMAP(used_csgs, MAX_SUPPORTED_CSGS) = { 0 };
@@ -117,6 +117,8 @@ static void update_active_groups_status(struct kbase_device *kbdev, struct seq_f
* status of all on-slot groups when MCU sleep request is sent to it.
*/
if (kbdev->csf.scheduler.state == SCHED_SLEEPING) {
/* Wait for the MCU sleep request to complete. */
kbase_pm_wait_for_desired_state(kbdev);
bitmap_copy(csg_slots_status_updated,
kbdev->csf.scheduler.csg_inuse_bitmap, max_csg_slots);
return;
@@ -497,23 +499,19 @@ static int kbasep_csf_queue_group_debugfs_show(struct seq_file *file,
{
u32 gr;
struct kbase_context *const kctx = file->private;
struct kbase_device *const kbdev = kctx->kbdev;
struct kbase_device *kbdev;
if (WARN_ON(!kctx))
return -EINVAL;
kbdev = kctx->kbdev;
seq_printf(file, "MALI_CSF_CSG_DEBUGFS_VERSION: v%u\n",
MALI_CSF_CSG_DEBUGFS_VERSION);
mutex_lock(&kctx->csf.lock);
kbase_csf_scheduler_lock(kbdev);
if (kbdev->csf.scheduler.state == SCHED_SLEEPING) {
/* Wait for the MCU sleep request to complete. Please refer the
* update_active_groups_status() function for the explanation.
*/
kbase_pm_wait_for_desired_state(kbdev);
}
update_active_groups_status(kbdev, file);
kbase_csf_debugfs_update_active_groups_status(kbdev);
for (gr = 0; gr < MAX_QUEUE_GROUP_NUM; gr++) {
struct kbase_queue_group *const group =
kctx->csf.queue_groups[gr];
@@ -547,13 +545,7 @@ static int kbasep_csf_scheduler_dump_active_groups(struct seq_file *file,
MALI_CSF_CSG_DEBUGFS_VERSION);
kbase_csf_scheduler_lock(kbdev);
if (kbdev->csf.scheduler.state == SCHED_SLEEPING) {
/* Wait for the MCU sleep request to complete. Please refer the
* update_active_groups_status() function for the explanation.
*/
kbase_pm_wait_for_desired_state(kbdev);
}
update_active_groups_status(kbdev, file);
kbase_csf_debugfs_update_active_groups_status(kbdev);
for (csg_nr = 0; csg_nr < num_groups; csg_nr++) {
struct kbase_queue_group *const group =
kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -44,4 +44,11 @@ void kbase_csf_queue_group_debugfs_init(struct kbase_context *kctx);
*/
void kbase_csf_debugfs_init(struct kbase_device *kbdev);
/**
* kbase_csf_debugfs_update_active_groups_status() - Update on-slot group statuses
*
* @kbdev: Pointer to the device
*/
void kbase_csf_debugfs_update_active_groups_status(struct kbase_device *kbdev);
#endif /* _KBASE_CSF_CSG_DEBUGFS_H_ */

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -30,9 +30,14 @@
#include <linux/wait.h>
#include "mali_kbase_csf_firmware.h"
#include "mali_kbase_refcount_defs.h"
#include "mali_kbase_csf_event.h"
#include <uapi/gpu/arm/midgard/csf/mali_kbase_csf_errors_dumpfault.h>
#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
#include <debug/backend/mali_kbase_debug_coresight_internal_csf.h>
#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
/* Maximum number of KCPU command queues to be created per GPU address space.
*/
#define KBASEP_MAX_KCPU_QUEUES ((size_t)256)
@@ -265,6 +270,8 @@ enum kbase_queue_group_priority {
* @CSF_FIRMWARE_PING_TIMEOUT: Maximum time to wait for firmware to respond
* to a ping from KBase.
* @CSF_SCHED_PROTM_PROGRESS_TIMEOUT: Timeout used to prevent protected mode execution hang.
* @MMU_AS_INACTIVE_WAIT_TIMEOUT: Maximum waiting time in ms for the completion
* of a MMU operation
* @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in
* the enum.
*/
@@ -276,6 +283,7 @@ enum kbase_timeout_selector {
CSF_FIRMWARE_BOOT_TIMEOUT,
CSF_FIRMWARE_PING_TIMEOUT,
CSF_SCHED_PROTM_PROGRESS_TIMEOUT,
MMU_AS_INACTIVE_WAIT_TIMEOUT,
/* Must be the last in the enum */
KBASE_TIMEOUT_SELECTOR_COUNT
@@ -298,9 +306,9 @@ struct kbase_csf_notification {
*
* @kctx: Pointer to the base context with which this GPU command queue
* is associated.
* @reg: Pointer to the region allocated from the shared
* interface segment for mapping the User mode
* input/output pages in MCU firmware address space.
* @user_io_gpu_va: The start GPU VA address of this queue's userio pages. Only
* valid (i.e. not 0 ) when the queue is enabled and its owner
* group has a runtime bound csg_reg (group region).
* @phys: Pointer to the physical pages allocated for the
* pair or User mode input/output page
* @user_io_addr: Pointer to the permanent kernel mapping of User mode
@@ -376,18 +384,14 @@ struct kbase_csf_notification {
*/
struct kbase_queue {
struct kbase_context *kctx;
struct kbase_va_region *reg;
u64 user_io_gpu_va;
struct tagged_addr phys[2];
char *user_io_addr;
u64 handle;
int doorbell_nr;
unsigned long db_file_offset;
struct list_head link;
#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
atomic_t refcount;
#else
refcount_t refcount;
#endif
kbase_refcount_t refcount;
struct kbase_queue_group *group;
struct kbase_va_region *queue_reg;
struct work_struct oom_event_work;
@@ -421,26 +425,33 @@ struct kbase_queue {
/**
* struct kbase_normal_suspend_buffer - Object representing a normal
* suspend buffer for queue group.
* @reg: Memory region allocated for the normal-mode suspend buffer.
* @gpu_va: The start GPU VA address of the bound suspend buffer. Note, this
* field is only valid when the owner group has a region bound at
* runtime.
* @phy: Array of physical memory pages allocated for the normal-
* mode suspend buffer.
*/
struct kbase_normal_suspend_buffer {
struct kbase_va_region *reg;
u64 gpu_va;
struct tagged_addr *phy;
};
/**
* struct kbase_protected_suspend_buffer - Object representing a protected
* suspend buffer for queue group.
* @reg: Memory region allocated for the protected-mode suspend buffer.
* @gpu_va: The start GPU VA address of the bound protected mode suspend buffer.
* Note, this field is only valid when the owner group has a region
* bound at runtime.
* @pma: Array of pointer to protected mode allocations containing
* information about memory pages allocated for protected mode
* suspend buffer.
* @alloc_retries: Number of times we retried allocing physical pages
* for protected suspend buffers.
*/
struct kbase_protected_suspend_buffer {
struct kbase_va_region *reg;
u64 gpu_va;
struct protected_memory_allocation **pma;
u8 alloc_retries;
};
/**
@@ -512,6 +523,13 @@ struct kbase_protected_suspend_buffer {
* @deschedule_deferred_cnt: Counter keeping a track of the number of threads
* that tried to deschedule the group and had to defer
* the descheduling due to the dump on fault.
* @csg_reg: An opaque pointer to the runtime bound shared regions. It is
* dynamically managed by the scheduler and can be NULL if the
* group is off-slot.
* @csg_reg_bind_retries: Runtime MCU shared region map operation attempted counts.
* It is accumulated on consecutive mapping attempt failures. On
* reaching a preset limit, the group is regarded as suffered
* a fatal error and triggers a fatal error notification.
*/
struct kbase_queue_group {
struct kbase_context *kctx;
@@ -562,6 +580,8 @@ struct kbase_queue_group {
#if IS_ENABLED(CONFIG_DEBUG_FS)
u32 deschedule_deferred_cnt;
#endif
void *csg_reg;
u8 csg_reg_bind_retries;
};
/**
@@ -623,6 +643,8 @@ struct kbase_csf_cpu_queue_context {
* @lock: Lock preventing concurrent access to the @in_use bitmap.
* @in_use: Bitmap that indicates which heap context structures are currently
* allocated (in @region).
* @heap_context_size_aligned: Size of a heap context structure, in bytes,
* aligned to GPU cacheline size.
*
* Heap context structures are allocated by the kernel for use by the firmware.
* The current implementation subdivides a single GPU memory region for use as
@@ -634,6 +656,7 @@ struct kbase_csf_heap_context_allocator {
u64 gpu_va;
struct mutex lock;
DECLARE_BITMAP(in_use, MAX_TILER_HEAPS);
u32 heap_context_size_aligned;
};
/**
@@ -755,6 +778,23 @@ struct kbase_csf_event {
spinlock_t lock;
};
/**
* struct kbase_csf_user_reg_context - Object containing members to manage the mapping
* of USER Register page for a context.
*
* @vma: Pointer to the VMA corresponding to the virtual mapping
* of the USER register page.
* @file_offset: File offset value that is assigned to userspace mapping
* of the USER Register page. It is in page units.
* @link: Links the context to the device list when mapping is pointing to
* either the dummy or the real Register page.
*/
struct kbase_csf_user_reg_context {
struct vm_area_struct *vma;
u32 file_offset;
struct list_head link;
};
/**
* struct kbase_csf_context - Object representing CSF for a GPU address space.
*
@@ -793,13 +833,11 @@ struct kbase_csf_event {
* used by GPU command queues, and progress timeout events.
* @link: Link to this csf context in the 'runnable_kctxs' list of
* the scheduler instance
* @user_reg_vma: Pointer to the vma corresponding to the virtual mapping
* of the USER register page. Currently used only for sanity
* checking.
* @sched: Object representing the scheduler's context
* @pending_submission_work: Work item to process pending kicked GPU command queues.
* @cpu_queue: CPU queue information. Only be available when DEBUG_FS
* is enabled.
* @user_reg: Collective information to support mapping to USER Register page.
*/
struct kbase_csf_context {
struct list_head event_pages_head;
@@ -814,12 +852,12 @@ struct kbase_csf_context {
struct kbase_csf_tiler_heap_context tiler_heaps;
struct workqueue_struct *wq;
struct list_head link;
struct vm_area_struct *user_reg_vma;
struct kbase_csf_scheduler_context sched;
struct work_struct pending_submission_work;
#if IS_ENABLED(CONFIG_DEBUG_FS)
struct kbase_csf_cpu_queue_context cpu_queue;
#endif
struct kbase_csf_user_reg_context user_reg;
};
/**
@@ -874,6 +912,33 @@ struct kbase_csf_sched_heap_reclaim_mgr {
atomic_t unused_pages;
};
/**
* struct kbase_csf_mcu_shared_regions - Control data for managing the MCU shared
* interface segment regions for scheduler
* operations
*
* @array_csg_regs: Base pointer of an internally created array_csg_regs[].
* @unused_csg_regs: List contains unused csg_regs items. When an item is bound to a
* group that is placed onto on-slot by the scheduler, it is dropped
* from the list (i.e busy active). The Scheduler will put an active
* item back when it's becoming off-slot (not in use).
* @dummy_phys: An array of dummy phys[nr_susp_pages] pages for use with normal
* and pmode suspend buffers, as a default replacement of a CSG's pages
* for the MMU mapping when the csg_reg is not bound to a group.
* @pma_phys: Pre-allocated array phy[nr_susp_pages] for transitional use with
* protected suspend buffer MMU map operations.
* @userio_mem_rd_flags: Userio input page's read access mapping configuration flags.
* @dummy_phys_allocated: Indicating the @p dummy_phy page is allocated when true.
*/
struct kbase_csf_mcu_shared_regions {
void *array_csg_regs;
struct list_head unused_csg_regs;
struct tagged_addr *dummy_phys;
struct tagged_addr *pma_phys;
unsigned long userio_mem_rd_flags;
bool dummy_phys_allocated;
};
/**
* struct kbase_csf_scheduler - Object representing the scheduler used for
* CSF for an instance of GPU platform device.
@@ -1008,6 +1073,9 @@ struct kbase_csf_sched_heap_reclaim_mgr {
* @interrupt_lock is used to serialize the access.
* @protm_enter_time: GPU protected mode enter time.
* @reclaim_mgr: CSGs tiler heap manager object.
* @mcu_regs_data: Scheduler MCU shared regions data for managing the
* shared interface mappings for on-slot queues and
* CSG suspend buffers.
*/
struct kbase_csf_scheduler {
struct mutex lock;
@@ -1051,6 +1119,7 @@ struct kbase_csf_scheduler {
u32 tick_protm_pending_seq;
ktime_t protm_enter_time;
struct kbase_csf_sched_heap_reclaim_mgr reclaim_mgr;
struct kbase_csf_mcu_shared_regions mcu_regs_data;
};
/*
@@ -1328,6 +1397,24 @@ struct kbase_csf_firmware_log {
u32 func_call_list_va_end;
};
/**
* struct kbase_csf_firmware_core_dump - Object containing members for handling
* firmware core dump.
*
* @mcu_regs_addr: GPU virtual address of the start of the MCU registers buffer
* in Firmware.
* @version: Version of the FW image header core dump data format. Bits
* 7:0 specify version minor and 15:8 specify version major.
* @available: Flag to identify if the FW core dump buffer is available.
* True if entry is available in the FW image header and version
* is supported, False otherwise.
*/
struct kbase_csf_firmware_core_dump {
u32 mcu_regs_addr;
u16 version;
bool available;
};
#if IS_ENABLED(CONFIG_DEBUG_FS)
/**
* struct kbase_csf_dump_on_fault - Faulty information to deliver to the daemon
@@ -1354,6 +1441,37 @@ struct kbase_csf_dump_on_fault {
};
#endif /* CONFIG_DEBUG_FS*/
/**
* struct kbase_csf_user_reg - Object containing members to manage the mapping
* of USER Register page for all contexts
*
* @dummy_page: Address of a dummy page that is mapped in place
* of the real USER Register page just before the GPU
* is powered down. The USER Register page is mapped
* in the address space of every process, that created
* a Base context, to enable the access to LATEST_FLUSH
* register from userspace.
* @filp: Pointer to a dummy file, that along with @file_offset,
* facilitates the use of unique file offset for the userspace mapping
* created for USER Register page.
* The userspace mapping is made to point to this file
* inside the mmap handler.
* @file_offset: Counter that is incremented every time Userspace creates a mapping of
* USER Register page, to provide a unique file offset range for
* @filp file, so that the CPU PTE of the Userspace mapping can be zapped
* through the kernel function unmap_mapping_range().
* It is incremented in page units.
* @list: Linked list to maintain user processes(contexts)
* having the mapping to USER Register page.
* It's protected by &kbase_csf_device.reg_lock.
*/
struct kbase_csf_user_reg {
struct tagged_addr dummy_page;
struct file *filp;
u32 file_offset;
struct list_head list;
};
/**
* struct kbase_csf_device - Object representing CSF for an instance of GPU
* platform device.
@@ -1391,20 +1509,6 @@ struct kbase_csf_dump_on_fault {
* of the real Hw doorbell page for the active GPU
* command queues after they are stopped or after the
* GPU is powered down.
* @dummy_user_reg_page: Address of the dummy page that is mapped in place
* of the real User register page just before the GPU
* is powered down. The User register page is mapped
* in the address space of every process, that created
* a Base context, to enable the access to LATEST_FLUSH
* register from userspace.
* @nr_user_page_mapped: The number of clients using the mapping of USER page.
* This is used to maintain backward compatibility.
* It's protected by @reg_lock.
* @mali_file_inode: Pointer to the inode corresponding to mali device
* file. This is needed in order to switch to the
* @dummy_user_reg_page on GPU power down.
* All instances of the mali device file will point to
* the same inode. It's protected by @reg_lock.
* @reg_lock: Lock to serialize the MCU firmware related actions
* that affect all contexts such as allocation of
* regions from shared interface area, assignment of
@@ -1458,9 +1562,9 @@ struct kbase_csf_dump_on_fault {
* the glb_pwoff register. This is separated from
* the @p mcu_core_pwroff_dur_count as an update
* to the latter is asynchronous.
* @gpu_idle_hysteresis_ms: Sysfs attribute for the idle hysteresis time
* window in unit of ms. The firmware does not use it
* directly.
* @gpu_idle_hysteresis_us: Sysfs attribute for the idle hysteresis time
* window in unit of microseconds. The firmware does not
* use it directly.
* @gpu_idle_dur_count: The counterpart of the hysteresis time window in
* interface required format, ready to be used
* directly in the firmware.
@@ -1470,7 +1574,11 @@ struct kbase_csf_dump_on_fault {
* HW counters.
* @fw: Copy of the loaded MCU firmware image.
* @fw_log: Contain members required for handling firmware log.
* @fw_core_dump: Contain members required for handling the firmware
* core dump.
* @dof: Structure for dump on fault.
* @user_reg: Collective information to support the mapping to
* USER Register page for user processes.
*/
struct kbase_csf_device {
struct kbase_mmu_table mcu_mmu;
@@ -1484,9 +1592,6 @@ struct kbase_csf_device {
struct file *db_filp;
u32 db_file_offsets;
struct tagged_addr dummy_db_page;
struct tagged_addr dummy_user_reg_page;
u32 nr_user_page_mapped;
struct inode *mali_file_inode;
struct mutex reg_lock;
wait_queue_head_t event_wait;
bool interrupt_received;
@@ -1507,15 +1612,23 @@ struct kbase_csf_device {
u32 mcu_core_pwroff_dur_us;
u32 mcu_core_pwroff_dur_count;
u32 mcu_core_pwroff_reg_shadow;
u32 gpu_idle_hysteresis_ms;
u32 gpu_idle_hysteresis_us;
u32 gpu_idle_dur_count;
unsigned int fw_timeout_ms;
struct kbase_csf_hwcnt hwcnt;
struct kbase_csf_mcu_fw fw;
struct kbase_csf_firmware_log fw_log;
struct kbase_csf_firmware_core_dump fw_core_dump;
#if IS_ENABLED(CONFIG_DEBUG_FS)
struct kbase_csf_dump_on_fault dof;
#endif /* CONFIG_DEBUG_FS */
#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
/**
* @coresight: Coresight device structure.
*/
struct kbase_debug_coresight_device coresight;
#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
struct kbase_csf_user_reg user_reg;
};
/**
@@ -1532,6 +1645,10 @@ struct kbase_csf_device {
* @bf_data: Data relating to Bus fault.
* @gf_data: Data relating to GPU fault.
* @current_setup: Stores the MMU configuration for this address space.
* @is_unresponsive: Flag to indicate MMU is not responding.
* Set if a MMU command isn't completed within
* &kbase_device:mmu_as_inactive_wait_time_ms.
* Clear by kbase_ctx_sched_restore_all_as() after GPU reset completes.
*/
struct kbase_as {
int number;
@@ -1543,6 +1660,7 @@ struct kbase_as {
struct kbase_fault bf_data;
struct kbase_fault gf_data;
struct kbase_mmu_setup current_setup;
bool is_unresponsive;
};
#endif /* _KBASE_CSF_DEFS_H_ */

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,7 @@
#include "mali_kbase.h"
#include "mali_kbase_csf_firmware_cfg.h"
#include "mali_kbase_csf_firmware_log.h"
#include "mali_kbase_csf_firmware_core_dump.h"
#include "mali_kbase_csf_trace_buffer.h"
#include "mali_kbase_csf_timeout.h"
#include "mali_kbase_mem.h"
@@ -38,7 +39,6 @@
#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
#include <csf/mali_kbase_csf_registers.h>
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/firmware.h>
@@ -81,7 +81,7 @@ MODULE_PARM_DESC(fw_debug,
#define FIRMWARE_HEADER_MAGIC (0xC3F13A6Eul)
#define FIRMWARE_HEADER_VERSION_MAJOR (0ul)
#define FIRMWARE_HEADER_VERSION_MINOR (2ul)
#define FIRMWARE_HEADER_VERSION_MINOR (3ul)
#define FIRMWARE_HEADER_LENGTH (0x14ul)
#define CSF_FIRMWARE_ENTRY_SUPPORTED_FLAGS \
@@ -93,12 +93,13 @@ MODULE_PARM_DESC(fw_debug,
CSF_FIRMWARE_ENTRY_ZERO | \
CSF_FIRMWARE_ENTRY_CACHE_MODE)
#define CSF_FIRMWARE_ENTRY_TYPE_INTERFACE (0)
#define CSF_FIRMWARE_ENTRY_TYPE_CONFIGURATION (1)
#define CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER (3)
#define CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA (4)
#define CSF_FIRMWARE_ENTRY_TYPE_INTERFACE (0)
#define CSF_FIRMWARE_ENTRY_TYPE_CONFIGURATION (1)
#define CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER (3)
#define CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA (4)
#define CSF_FIRMWARE_ENTRY_TYPE_BUILD_INFO_METADATA (6)
#define CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST (7)
#define CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST (7)
#define CSF_FIRMWARE_ENTRY_TYPE_CORE_DUMP (9)
#define CSF_FIRMWARE_CACHE_MODE_NONE (0ul << 3)
#define CSF_FIRMWARE_CACHE_MODE_CACHED (1ul << 3)
@@ -120,7 +121,6 @@ MODULE_PARM_DESC(fw_debug,
(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \
GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK)
static inline u32 input_page_read(const u32 *const input, const u32 offset)
{
WARN_ON(offset % sizeof(u32));
@@ -201,8 +201,8 @@ static int setup_shared_iface_static_region(struct kbase_device *kbdev)
if (!interface)
return -EINVAL;
reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
interface->num_pages_aligned, KBASE_REG_ZONE_MCU_SHARED);
reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0,
interface->num_pages_aligned, KBASE_REG_ZONE_MCU_SHARED);
if (reg) {
mutex_lock(&kbdev->csf.reg_lock);
ret = kbase_add_va_region_rbtree(kbdev, reg,
@@ -286,22 +286,51 @@ static void boot_csf_firmware(struct kbase_device *kbdev)
{
kbase_csf_firmware_enable_mcu(kbdev);
#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
kbase_debug_coresight_csf_state_request(kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED);
if (!kbase_debug_coresight_csf_state_wait(kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED))
dev_err(kbdev->dev, "Timeout waiting for CoreSight to be enabled");
#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
wait_for_firmware_boot(kbdev);
}
static void wait_ready(struct kbase_device *kbdev)
/**
* wait_ready() - Wait for previously issued MMU command to complete.
*
* @kbdev: Kbase device to wait for a MMU command to complete.
*
* Reset GPU if the wait for previously issued command times out.
*
* Return: 0 on success, error code otherwise.
*/
static int wait_ready(struct kbase_device *kbdev)
{
u32 max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
u32 val;
const ktime_t wait_loop_start = ktime_get_raw();
const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_as_inactive_wait_time_ms;
s64 diff;
val = kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS));
do {
unsigned int i;
/* Wait for a while for the update command to take effect */
while (--max_loops && (val & AS_STATUS_AS_ACTIVE))
val = kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS));
for (i = 0; i < 1000; i++) {
/* Wait for the MMU status to indicate there is no active command */
if (!(kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)) &
AS_STATUS_AS_ACTIVE))
return 0;
}
if (max_loops == 0)
dev_err(kbdev->dev, "AS_ACTIVE bit stuck, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n");
diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start));
} while (diff < mmu_as_inactive_wait_time_ms);
dev_err(kbdev->dev,
"AS_ACTIVE bit stuck for MCU AS. Might be caused by unstable GPU clk/pwr or faulty system");
if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
kbase_reset_gpu_locked(kbdev);
return -ETIMEDOUT;
}
static void unload_mmu_tables(struct kbase_device *kbdev)
@@ -316,7 +345,7 @@ static void unload_mmu_tables(struct kbase_device *kbdev)
mutex_unlock(&kbdev->mmu_hw_mutex);
}
static void load_mmu_tables(struct kbase_device *kbdev)
static int load_mmu_tables(struct kbase_device *kbdev)
{
unsigned long irq_flags;
@@ -327,7 +356,7 @@ static void load_mmu_tables(struct kbase_device *kbdev)
mutex_unlock(&kbdev->mmu_hw_mutex);
/* Wait for a while for the update command to take effect */
wait_ready(kbdev);
return wait_ready(kbdev);
}
/**
@@ -488,6 +517,7 @@ out:
* @kbdev: Kbase device structure
* @virtual_start: Start of the virtual address range required for an entry allocation
* @virtual_end: End of the virtual address range required for an entry allocation
* @flags: Firmware entry flags for comparison with the reusable pages found
* @phys: Pointer to the array of physical (tagged) addresses making up the new
* FW interface entry. It is an output parameter which would be made to
* point to an already existing array allocated for the previously parsed
@@ -508,10 +538,12 @@ out:
*
* Return: true if a large page can be reused, false otherwise.
*/
static inline bool entry_find_large_page_to_reuse(
struct kbase_device *kbdev, const u32 virtual_start, const u32 virtual_end,
struct tagged_addr **phys, struct protected_memory_allocation ***pma,
u32 num_pages, u32 *num_pages_aligned, bool *is_small_page)
static inline bool entry_find_large_page_to_reuse(struct kbase_device *kbdev,
const u32 virtual_start, const u32 virtual_end,
const u32 flags, struct tagged_addr **phys,
struct protected_memory_allocation ***pma,
u32 num_pages, u32 *num_pages_aligned,
bool *is_small_page)
{
struct kbase_csf_firmware_interface *interface = NULL;
struct kbase_csf_firmware_interface *target_interface = NULL;
@@ -557,7 +589,7 @@ static inline bool entry_find_large_page_to_reuse(
if (interface->virtual & (SZ_2M - 1))
continue;
if (virtual_diff < virtual_diff_min) {
if ((virtual_diff < virtual_diff_min) && (interface->flags == flags)) {
target_interface = interface;
virtual_diff_min = virtual_diff;
}
@@ -620,6 +652,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
struct protected_memory_allocation **pma = NULL;
bool reuse_pages = false;
bool is_small_page = true;
bool ignore_page_migration = true;
if (data_end < data_start) {
dev_err(kbdev->dev, "Firmware corrupt, data_end < data_start (0x%x<0x%x)\n",
@@ -662,9 +695,9 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
num_pages = (virtual_end - virtual_start)
>> PAGE_SHIFT;
reuse_pages = entry_find_large_page_to_reuse(
kbdev, virtual_start, virtual_end, &phys, &pma,
num_pages, &num_pages_aligned, &is_small_page);
reuse_pages =
entry_find_large_page_to_reuse(kbdev, virtual_start, virtual_end, flags, &phys,
&pma, num_pages, &num_pages_aligned, &is_small_page);
if (!reuse_pages)
phys = kmalloc_array(num_pages_aligned, sizeof(*phys), GFP_KERNEL);
@@ -684,7 +717,8 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
ret = kbase_mem_pool_alloc_pages(
kbase_mem_pool_group_select(kbdev, KBASE_MEM_GROUP_CSF_FW,
is_small_page),
num_pages_aligned, phys, false);
num_pages_aligned, phys, false, NULL);
ignore_page_migration = false;
}
}
@@ -794,7 +828,8 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu,
virtual_start >> PAGE_SHIFT, phys,
num_pages_aligned, mem_flags,
KBASE_MEM_GROUP_CSF_FW, NULL);
KBASE_MEM_GROUP_CSF_FW, NULL, NULL,
ignore_page_migration);
if (ret != 0) {
dev_err(kbdev->dev, "Failed to insert firmware pages\n");
@@ -1023,20 +1058,26 @@ static int load_firmware_entry(struct kbase_device *kbdev, const struct kbase_cs
return parse_build_info_metadata_entry(kbdev, fw, entry, size);
case CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST:
/* Function call list section */
if (size < 2 * sizeof(*entry)) {
if (size < FUNC_CALL_LIST_ENTRY_NAME_OFFSET + sizeof(*entry)) {
dev_err(kbdev->dev, "Function call list entry too short (size=%u)\n",
size);
return -EINVAL;
}
kbase_csf_firmware_log_parse_logging_call_list_entry(kbdev, entry);
break;
}
if (!optional) {
dev_err(kbdev->dev,
"Unsupported non-optional entry type %u in firmware\n",
type);
return -EINVAL;
return 0;
case CSF_FIRMWARE_ENTRY_TYPE_CORE_DUMP:
/* Core Dump section */
if (size < CORE_DUMP_ENTRY_START_ADDR_OFFSET + sizeof(*entry)) {
dev_err(kbdev->dev, "FW Core dump entry too short (size=%u)\n", size);
return -EINVAL;
}
return kbase_csf_firmware_core_dump_entry_parse(kbdev, entry);
default:
if (!optional) {
dev_err(kbdev->dev, "Unsupported non-optional entry type %u in firmware\n",
type);
return -EINVAL;
}
}
return 0;
@@ -1687,6 +1728,71 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev)
kbdev->csf.gpu_idle_dur_count);
}
static bool global_debug_request_complete(struct kbase_device *const kbdev, u32 const req_mask)
{
struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
bool complete = false;
unsigned long flags;
kbase_csf_scheduler_spin_lock(kbdev, &flags);
if ((kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK) & req_mask) ==
(kbase_csf_firmware_global_input_read(global_iface, GLB_DEBUG_REQ) & req_mask))
complete = true;
kbase_csf_scheduler_spin_unlock(kbdev, flags);
return complete;
}
static void set_global_debug_request(const struct kbase_csf_global_iface *const global_iface,
u32 const req_mask)
{
u32 glb_debug_req;
kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev);
glb_debug_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK);
glb_debug_req ^= req_mask;
kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_debug_req, req_mask);
}
static void request_fw_core_dump(
const struct kbase_csf_global_iface *const global_iface)
{
uint32_t run_mode = GLB_DEBUG_REQ_RUN_MODE_SET(0, GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP);
set_global_debug_request(global_iface, GLB_DEBUG_REQ_DEBUG_RUN_MASK | run_mode);
set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK);
}
int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev)
{
const struct kbase_csf_global_iface *const global_iface =
&kbdev->csf.global_iface;
unsigned long flags;
int ret;
/* Serialize CORE_DUMP requests. */
mutex_lock(&kbdev->csf.reg_lock);
/* Update GLB_REQ with CORE_DUMP request and make firmware act on it. */
kbase_csf_scheduler_spin_lock(kbdev, &flags);
request_fw_core_dump(global_iface);
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
/* Wait for firmware to acknowledge completion of the CORE_DUMP request. */
ret = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK);
if (!ret)
WARN_ON(!global_debug_request_complete(kbdev, GLB_DEBUG_REQ_DEBUG_RUN_MASK));
mutex_unlock(&kbdev->csf.reg_lock);
return ret;
}
/**
* kbasep_enable_rtu - Enable Ray Tracing Unit on powering up shader core
@@ -1714,7 +1820,7 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask)
GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK |
GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK;
GLB_REQ_DEBUG_CSF_REQ_MASK | GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK;
const struct kbase_csf_global_iface *const global_iface =
&kbdev->csf.global_iface;
@@ -1740,6 +1846,14 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask)
kbase_csf_firmware_global_input(global_iface,
GLB_ACK_IRQ_MASK, ack_irq_mask);
#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
/* Enable FW MCU read/write debug interfaces */
kbase_csf_firmware_global_input_mask(
global_iface, GLB_DEBUG_ACK_IRQ_MASK,
GLB_DEBUG_REQ_FW_AS_READ_MASK | GLB_DEBUG_REQ_FW_AS_WRITE_MASK,
GLB_DEBUG_REQ_FW_AS_READ_MASK | GLB_DEBUG_REQ_FW_AS_WRITE_MASK);
#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
@@ -1890,12 +2004,12 @@ void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev)
kbase_pm_update_state(kbdev);
}
static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_ms)
static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_us)
{
#define HYSTERESIS_VAL_UNIT_SHIFT (10)
/* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */
u64 freq = arch_timer_get_cntfrq();
u64 dur_val = dur_ms;
u64 dur_val = dur_us;
u32 cnt_val_u32, reg_val_u32;
bool src_system_timestamp = freq > 0;
@@ -1913,9 +2027,9 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_m
"Can't get the timestamp frequency, use cycle counter format with firmware idle hysteresis!");
}
/* Formula for dur_val = ((dur_ms/1000) * freq_HZ) >> 10) */
/* Formula for dur_val = ((dur_us/1000000) * freq_HZ) >> 10) */
dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT;
dur_val = div_u64(dur_val, 1000);
dur_val = div_u64(dur_val, 1000000);
/* Interface limits the value field to S32_MAX */
cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val;
@@ -1938,7 +2052,7 @@ u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev)
u32 dur;
kbase_csf_scheduler_spin_lock(kbdev, &flags);
dur = kbdev->csf.gpu_idle_hysteresis_ms;
dur = kbdev->csf.gpu_idle_hysteresis_us;
kbase_csf_scheduler_spin_unlock(kbdev, flags);
return dur;
@@ -1955,7 +2069,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
mutex_lock(&kbdev->fw_load_lock);
if (unlikely(!kbdev->csf.firmware_inited)) {
kbase_csf_scheduler_spin_lock(kbdev, &flags);
kbdev->csf.gpu_idle_hysteresis_ms = dur;
kbdev->csf.gpu_idle_hysteresis_us = dur;
kbdev->csf.gpu_idle_dur_count = hysteresis_val;
kbase_csf_scheduler_spin_unlock(kbdev, flags);
mutex_unlock(&kbdev->fw_load_lock);
@@ -1986,7 +2100,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK);
kbase_csf_scheduler_spin_lock(kbdev, &flags);
kbdev->csf.gpu_idle_hysteresis_ms = dur;
kbdev->csf.gpu_idle_hysteresis_us = dur;
kbdev->csf.gpu_idle_dur_count = hysteresis_val;
kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
@@ -2148,6 +2262,7 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
INIT_LIST_HEAD(&kbdev->csf.firmware_config);
INIT_LIST_HEAD(&kbdev->csf.firmware_timeline_metadata);
INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list);
INIT_LIST_HEAD(&kbdev->csf.user_reg.list);
INIT_WORK(&kbdev->csf.firmware_reload_work,
kbase_csf_firmware_reload_worker);
INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker);
@@ -2166,14 +2281,14 @@ void kbase_csf_firmware_early_term(struct kbase_device *kbdev)
int kbase_csf_firmware_late_init(struct kbase_device *kbdev)
{
kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
kbdev->csf.gpu_idle_hysteresis_us = FIRMWARE_IDLE_HYSTERESIS_TIME_USEC;
#ifdef KBASE_PM_RUNTIME
if (kbase_pm_gpu_sleep_allowed(kbdev))
kbdev->csf.gpu_idle_hysteresis_ms /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
kbdev->csf.gpu_idle_hysteresis_us /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
#endif
WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
WARN_ON(!kbdev->csf.gpu_idle_hysteresis_us);
kbdev->csf.gpu_idle_dur_count =
convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_us);
return 0;
}
@@ -2311,7 +2426,9 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
kbase_pm_wait_for_l2_powered(kbdev);
/* Load the MMU tables into the selected address space */
load_mmu_tables(kbdev);
ret = load_mmu_tables(kbdev);
if (ret != 0)
goto err_out;
boot_csf_firmware(kbdev);
@@ -2353,6 +2470,9 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
goto err_out;
}
if (kbdev->csf.fw_core_dump.available)
kbase_csf_firmware_core_dump_init(kbdev);
/* Firmware loaded successfully, ret = 0 */
KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_BOOT, NULL,
(((u64)version_hash) << 32) |
@@ -2470,6 +2590,119 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev)
kbdev->as_free |= MCU_AS_BITMASK;
}
#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
int kbase_csf_firmware_mcu_register_write(struct kbase_device *const kbdev, u32 const reg_addr,
u32 const reg_val)
{
struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
unsigned long flags;
int err;
u32 glb_req;
mutex_lock(&kbdev->csf.reg_lock);
kbase_csf_scheduler_spin_lock(kbdev, &flags);
/* Set the address and value to write */
kbase_csf_firmware_global_input(global_iface, GLB_DEBUG_ARG_IN0, reg_addr);
kbase_csf_firmware_global_input(global_iface, GLB_DEBUG_ARG_IN1, reg_val);
/* Set the Global Debug request for FW MCU write */
glb_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK);
glb_req ^= GLB_DEBUG_REQ_FW_AS_WRITE_MASK;
kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_req,
GLB_DEBUG_REQ_FW_AS_WRITE_MASK);
set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK);
/* Notify FW about the Global Debug request */
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
err = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK);
mutex_unlock(&kbdev->csf.reg_lock);
dev_dbg(kbdev->dev, "w: reg %08x val %08x", reg_addr, reg_val);
return err;
}
int kbase_csf_firmware_mcu_register_read(struct kbase_device *const kbdev, u32 const reg_addr,
u32 *reg_val)
{
struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
unsigned long flags;
int err;
u32 glb_req;
if (WARN_ON(reg_val == NULL))
return -EINVAL;
mutex_lock(&kbdev->csf.reg_lock);
kbase_csf_scheduler_spin_lock(kbdev, &flags);
/* Set the address to read */
kbase_csf_firmware_global_input(global_iface, GLB_DEBUG_ARG_IN0, reg_addr);
/* Set the Global Debug request for FW MCU read */
glb_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK);
glb_req ^= GLB_DEBUG_REQ_FW_AS_READ_MASK;
kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_req,
GLB_DEBUG_REQ_FW_AS_READ_MASK);
set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK);
/* Notify FW about the Global Debug request */
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
err = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK);
if (!err) {
kbase_csf_scheduler_spin_lock(kbdev, &flags);
*reg_val = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ARG_OUT0);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
}
mutex_unlock(&kbdev->csf.reg_lock);
dev_dbg(kbdev->dev, "r: reg %08x val %08x", reg_addr, *reg_val);
return err;
}
int kbase_csf_firmware_mcu_register_poll(struct kbase_device *const kbdev, u32 const reg_addr,
u32 const val_mask, u32 const reg_val)
{
unsigned long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms) + jiffies;
u32 read_val;
dev_dbg(kbdev->dev, "p: reg %08x val %08x mask %08x", reg_addr, reg_val, val_mask);
while (time_before(jiffies, remaining)) {
int err = kbase_csf_firmware_mcu_register_read(kbdev, reg_addr, &read_val);
if (err) {
dev_err(kbdev->dev,
"Error reading MCU register value (read_val = %u, expect = %u)\n",
read_val, reg_val);
return err;
}
if ((read_val & val_mask) == reg_val)
return 0;
}
dev_err(kbdev->dev,
"Timeout waiting for MCU register value to be set (read_val = %u, expect = %u)\n",
read_val, reg_val);
return -ETIMEDOUT;
}
#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev)
{
struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
@@ -2820,7 +3053,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
goto page_list_alloc_error;
ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
phys, false);
phys, false, NULL);
if (ret <= 0)
goto phys_mem_pool_alloc_error;
@@ -2831,8 +3064,8 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
if (!cpu_addr)
goto vmap_error;
va_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
num_pages, KBASE_REG_ZONE_MCU_SHARED);
va_reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, num_pages,
KBASE_REG_ZONE_MCU_SHARED);
if (!va_reg)
goto va_region_alloc_error;
@@ -2848,7 +3081,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn,
&phys[0], num_pages, gpu_map_properties,
KBASE_MEM_GROUP_CSF_FW, NULL);
KBASE_MEM_GROUP_CSF_FW, NULL, NULL, false);
if (ret)
goto mmu_insert_pages_error;
@@ -2909,4 +3142,3 @@ void kbase_csf_firmware_mcu_shared_mapping_term(
vunmap(csf_mapping->cpu_addr);
kfree(csf_mapping->phys);
}

View File

@@ -246,7 +246,6 @@ void kbase_csf_firmware_csg_input_mask(
u32 kbase_csf_firmware_csg_output(
const struct kbase_csf_cmd_stream_group_info *info, u32 offset);
/**
* struct kbase_csf_global_iface - Global CSF interface
* provided by the firmware.
@@ -450,6 +449,50 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev);
*/
void kbase_csf_firmware_unload_term(struct kbase_device *kbdev);
#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
/**
* kbase_csf_firmware_mcu_register_write - Write to MCU register
*
* @kbdev: Instance of a gpu platform device that implements a csf interface.
* @reg_addr: Register address to write into
* @reg_val: Value to be written
*
* Write a desired value to a register in MCU address space.
*
* return: 0 on success, or negative on failure.
*/
int kbase_csf_firmware_mcu_register_write(struct kbase_device *const kbdev, u32 const reg_addr,
u32 const reg_val);
/**
* kbase_csf_firmware_mcu_register_read - Read from MCU register
*
* @kbdev: Instance of a gpu platform device that implements a csf interface.
* @reg_addr: Register address to read from
* @reg_val: Value as present in reg_addr register
*
* Read a value from MCU address space.
*
* return: 0 on success, or negative on failure.
*/
int kbase_csf_firmware_mcu_register_read(struct kbase_device *const kbdev, u32 const reg_addr,
u32 *reg_val);
/**
* kbase_csf_firmware_mcu_register_poll - Poll MCU register
*
* @kbdev: Instance of a gpu platform device that implements a csf interface.
* @reg_addr: Register address to read from
* @val_mask: Value to mask the read value for comparison
* @reg_val: Value to be compared against
*
* Continue to read a value from MCU address space until it matches given mask and value.
*
* return: 0 on success, or negative on failure.
*/
int kbase_csf_firmware_mcu_register_poll(struct kbase_device *const kbdev, u32 const reg_addr,
u32 const val_mask, u32 const reg_val);
#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
/**
* kbase_csf_firmware_ping - Send the ping request to firmware.
*
@@ -858,5 +901,16 @@ static inline u32 kbase_csf_interface_version(u32 major, u32 minor, u32 patch)
*/
int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev);
/**
* kbase_csf_firmware_req_core_dump - Request a firmware core dump
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
*
* Request a firmware core dump and wait for for firmware to acknowledge.
* Firmware will enter infinite loop after the firmware core dump is created.
*
* Return: 0 if success, or negative error code on failure.
*/
int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev);
#endif

View File

@@ -0,0 +1,807 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU license.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
*/
#include <linux/kernel.h>
#include <linux/device.h>
#include <linux/list.h>
#include <linux/file.h>
#include <linux/elf.h>
#include <linux/elfcore.h>
#include "mali_kbase.h"
#include "mali_kbase_csf_firmware_core_dump.h"
#include "backend/gpu/mali_kbase_pm_internal.h"
/* Page size in bytes in use by MCU. */
#define FW_PAGE_SIZE 4096
/*
* FW image header core dump data format supported.
* Currently only version 0.1 is supported.
*/
#define FW_CORE_DUMP_DATA_VERSION_MAJOR 0
#define FW_CORE_DUMP_DATA_VERSION_MINOR 1
/* Full version of the image header core dump data format */
#define FW_CORE_DUMP_DATA_VERSION \
((FW_CORE_DUMP_DATA_VERSION_MAJOR << 8) | FW_CORE_DUMP_DATA_VERSION_MINOR)
/* Validity flag to indicate if the MCU registers in the buffer are valid */
#define FW_MCU_STATUS_MASK 0x1
#define FW_MCU_STATUS_VALID (1 << 0)
/* Core dump entry fields */
#define FW_CORE_DUMP_VERSION_INDEX 0
#define FW_CORE_DUMP_START_ADDR_INDEX 1
/* MCU registers stored by a firmware core dump */
struct fw_core_dump_mcu {
u32 r0;
u32 r1;
u32 r2;
u32 r3;
u32 r4;
u32 r5;
u32 r6;
u32 r7;
u32 r8;
u32 r9;
u32 r10;
u32 r11;
u32 r12;
u32 sp;
u32 lr;
u32 pc;
};
/* Any ELF definitions used in this file are from elf.h/elfcore.h except
* when specific 32-bit versions are required (mainly for the
* ELF_PRSTATUS32 note that is used to contain the MCU registers).
*/
/* - 32-bit version of timeval structures used in ELF32 PRSTATUS note. */
struct prstatus32_timeval {
int tv_sec;
int tv_usec;
};
/* - Structure defining ELF32 PRSTATUS note contents, as defined by the
* GNU binutils BFD library used by GDB, in bfd/hosts/x86-64linux.h.
* Note: GDB checks for the size of this structure to be 0x94.
* Modified pr_reg (array containing the Arm 32-bit MCU registers) to
* use u32[18] instead of elf_gregset32_t to prevent introducing new typedefs.
*/
struct elf_prstatus32 {
struct elf_siginfo pr_info; /* Info associated with signal. */
short int pr_cursig; /* Current signal. */
unsigned int pr_sigpend; /* Set of pending signals. */
unsigned int pr_sighold; /* Set of held signals. */
pid_t pr_pid;
pid_t pr_ppid;
pid_t pr_pgrp;
pid_t pr_sid;
struct prstatus32_timeval pr_utime; /* User time. */
struct prstatus32_timeval pr_stime; /* System time. */
struct prstatus32_timeval pr_cutime; /* Cumulative user time. */
struct prstatus32_timeval pr_cstime; /* Cumulative system time. */
u32 pr_reg[18]; /* GP registers. */
int pr_fpvalid; /* True if math copro being used. */
};
/**
* struct fw_core_dump_data - Context for seq_file operations used on 'fw_core_dump'
* debugfs file.
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
*/
struct fw_core_dump_data {
struct kbase_device *kbdev;
};
/*
* struct fw_core_dump_seq_off - Iterator for seq_file operations used on 'fw_core_dump'
* debugfs file.
* @interface: current firmware memory interface
* @page_num: current page number (0..) within @interface
*/
struct fw_core_dump_seq_off {
struct kbase_csf_firmware_interface *interface;
u32 page_num;
};
/**
* fw_get_core_dump_mcu - Get the MCU registers saved by a firmware core dump
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
* @regs: Pointer to a core dump mcu struct where the MCU registers are copied
* to. Should be allocated by the called.
*
* Return: 0 if successfully copied the MCU registers, negative error code otherwise.
*/
static int fw_get_core_dump_mcu(struct kbase_device *kbdev, struct fw_core_dump_mcu *regs)
{
unsigned int i;
u32 status = 0;
u32 data_addr = kbdev->csf.fw_core_dump.mcu_regs_addr;
u32 *data = (u32 *)regs;
/* Check if the core dump entry exposed the buffer */
if (!regs || !kbdev->csf.fw_core_dump.available)
return -EPERM;
/* Check if the data in the buffer is valid, if not, return error */
kbase_csf_read_firmware_memory(kbdev, data_addr, &status);
if ((status & FW_MCU_STATUS_MASK) != FW_MCU_STATUS_VALID)
return -EPERM;
/* According to image header documentation, the MCU registers core dump
* buffer is 32-bit aligned.
*/
for (i = 1; i <= sizeof(struct fw_core_dump_mcu) / sizeof(u32); ++i)
kbase_csf_read_firmware_memory(kbdev, data_addr + i * sizeof(u32), &data[i - 1]);
return 0;
}
/**
* fw_core_dump_fill_elf_header - Initializes an ELF32 header
* @hdr: ELF32 header to initialize
* @sections: Number of entries in the ELF program header table
*
* Initializes an ELF32 header for an ARM 32-bit little-endian
* 'Core file' object file.
*/
static void fw_core_dump_fill_elf_header(struct elf32_hdr *hdr, unsigned int sections)
{
/* Reset all members in header. */
memset(hdr, 0, sizeof(*hdr));
/* Magic number identifying file as an ELF object. */
memcpy(hdr->e_ident, ELFMAG, SELFMAG);
/* Identify file as 32-bit, little-endian, using current
* ELF header version, with no OS or ABI specific ELF
* extensions used.
*/
hdr->e_ident[EI_CLASS] = ELFCLASS32;
hdr->e_ident[EI_DATA] = ELFDATA2LSB;
hdr->e_ident[EI_VERSION] = EV_CURRENT;
hdr->e_ident[EI_OSABI] = ELFOSABI_NONE;
/* 'Core file' type of object file. */
hdr->e_type = ET_CORE;
/* ARM 32-bit architecture (AARCH32) */
hdr->e_machine = EM_ARM;
/* Object file version: the original format. */
hdr->e_version = EV_CURRENT;
/* Offset of program header table in file. */
hdr->e_phoff = sizeof(struct elf32_hdr);
/* No processor specific flags. */
hdr->e_flags = 0;
/* Size of the ELF header in bytes. */
hdr->e_ehsize = sizeof(struct elf32_hdr);
/* Size of the ELF program header entry in bytes. */
hdr->e_phentsize = sizeof(struct elf32_phdr);
/* Number of entries in the program header table. */
hdr->e_phnum = sections;
}
/**
* fw_core_dump_fill_elf_program_header_note - Initializes an ELF32 program header
* for holding auxiliary information
* @phdr: ELF32 program header
* @file_offset: Location of the note in the file in bytes
* @size: Size of the note in bytes.
*
* Initializes an ELF32 program header describing auxiliary information (containing
* one or more notes) of @size bytes alltogether located in the file at offset
* @file_offset.
*/
static void fw_core_dump_fill_elf_program_header_note(struct elf32_phdr *phdr, u32 file_offset,
u32 size)
{
/* Auxiliary information (note) in program header. */
phdr->p_type = PT_NOTE;
/* Location of first note in file in bytes. */
phdr->p_offset = file_offset;
/* Size of all notes combined in bytes. */
phdr->p_filesz = size;
/* Other members not relevant for a note. */
phdr->p_vaddr = 0;
phdr->p_paddr = 0;
phdr->p_memsz = 0;
phdr->p_align = 0;
phdr->p_flags = 0;
}
/**
* fw_core_dump_fill_elf_program_header - Initializes an ELF32 program header for a loadable segment
* @phdr: ELF32 program header to initialize.
* @file_offset: Location of loadable segment in file in bytes
* (aligned to FW_PAGE_SIZE bytes)
* @vaddr: 32-bit virtual address where to write the segment
* (aligned to FW_PAGE_SIZE bytes)
* @size: Size of the segment in bytes.
* @flags: CSF_FIRMWARE_ENTRY_* flags describing access permissions.
*
* Initializes an ELF32 program header describing a loadable segment of
* @size bytes located in the file at offset @file_offset to be loaded
* at virtual address @vaddr with access permissions as described by
* CSF_FIRMWARE_ENTRY_* flags in @flags.
*/
static void fw_core_dump_fill_elf_program_header(struct elf32_phdr *phdr, u32 file_offset,
u32 vaddr, u32 size, u32 flags)
{
/* Loadable segment in program header. */
phdr->p_type = PT_LOAD;
/* Location of segment in file in bytes. Aligned to p_align bytes. */
phdr->p_offset = file_offset;
/* Virtual address of segment. Aligned to p_align bytes. */
phdr->p_vaddr = vaddr;
/* Physical address of segment. Not relevant. */
phdr->p_paddr = 0;
/* Size of segment in file and memory. */
phdr->p_filesz = size;
phdr->p_memsz = size;
/* Alignment of segment in the file and memory in bytes (integral power of 2). */
phdr->p_align = FW_PAGE_SIZE;
/* Set segment access permissions. */
phdr->p_flags = 0;
if (flags & CSF_FIRMWARE_ENTRY_READ)
phdr->p_flags |= PF_R;
if (flags & CSF_FIRMWARE_ENTRY_WRITE)
phdr->p_flags |= PF_W;
if (flags & CSF_FIRMWARE_ENTRY_EXECUTE)
phdr->p_flags |= PF_X;
}
/**
* fw_core_dump_get_prstatus_note_size - Calculates size of a ELF32 PRSTATUS note
* @name: Name given to the PRSTATUS note.
*
* Calculates the size of a 32-bit PRSTATUS note (which contains information
* about a process like the current MCU registers) taking into account
* @name must be padded to a 4-byte multiple.
*
* Return: size of 32-bit PRSTATUS note in bytes.
*/
static unsigned int fw_core_dump_get_prstatus_note_size(char *name)
{
return sizeof(struct elf32_note) + roundup(strlen(name) + 1, 4) +
sizeof(struct elf_prstatus32);
}
/**
* fw_core_dump_fill_elf_prstatus - Initializes an ELF32 PRSTATUS structure
* @prs: ELF32 PRSTATUS note to initialize
* @regs: MCU registers to copy into the PRSTATUS note
*
* Initializes an ELF32 PRSTATUS structure with MCU registers @regs.
* Other process information is N/A for CSF Firmware.
*/
static void fw_core_dump_fill_elf_prstatus(struct elf_prstatus32 *prs,
struct fw_core_dump_mcu *regs)
{
/* Only fill in registers (32-bit) of PRSTATUS note. */
memset(prs, 0, sizeof(*prs));
prs->pr_reg[0] = regs->r0;
prs->pr_reg[1] = regs->r1;
prs->pr_reg[2] = regs->r2;
prs->pr_reg[3] = regs->r3;
prs->pr_reg[4] = regs->r4;
prs->pr_reg[5] = regs->r5;
prs->pr_reg[6] = regs->r0;
prs->pr_reg[7] = regs->r7;
prs->pr_reg[8] = regs->r8;
prs->pr_reg[9] = regs->r9;
prs->pr_reg[10] = regs->r10;
prs->pr_reg[11] = regs->r11;
prs->pr_reg[12] = regs->r12;
prs->pr_reg[13] = regs->sp;
prs->pr_reg[14] = regs->lr;
prs->pr_reg[15] = regs->pc;
}
/**
* fw_core_dump_create_prstatus_note - Creates an ELF32 PRSTATUS note
* @name: Name for the PRSTATUS note
* @prs: ELF32 PRSTATUS structure to put in the PRSTATUS note
* @created_prstatus_note:
* Pointer to the allocated ELF32 PRSTATUS note
*
* Creates an ELF32 note with one PRSTATUS entry containing the
* ELF32 PRSTATUS structure @prs. Caller needs to free the created note in
* @created_prstatus_note.
*
* Return: 0 on failure, otherwise size of ELF32 PRSTATUS note in bytes.
*/
static unsigned int fw_core_dump_create_prstatus_note(char *name, struct elf_prstatus32 *prs,
struct elf32_note **created_prstatus_note)
{
struct elf32_note *note;
unsigned int note_name_sz;
unsigned int note_sz;
/* Allocate memory for ELF32 note containing a PRSTATUS note. */
note_name_sz = strlen(name) + 1;
note_sz = sizeof(struct elf32_note) + roundup(note_name_sz, 4) +
sizeof(struct elf_prstatus32);
note = kmalloc(note_sz, GFP_KERNEL);
if (!note)
return 0;
/* Fill in ELF32 note with one entry for a PRSTATUS note. */
note->n_namesz = note_name_sz;
note->n_descsz = sizeof(struct elf_prstatus32);
note->n_type = NT_PRSTATUS;
memcpy(note + 1, name, note_name_sz);
memcpy((char *)(note + 1) + roundup(note_name_sz, 4), prs, sizeof(*prs));
/* Return pointer and size of the created ELF32 note. */
*created_prstatus_note = note;
return note_sz;
}
/**
* fw_core_dump_write_elf_header - Writes ELF header for the FW core dump
* @m: the seq_file handle
*
* Writes the ELF header of the core dump including program headers for
* memory sections and a note containing the current MCU register
* values.
*
* Excludes memory sections without read access permissions or
* are for protected memory.
*
* The data written is as follows:
* - ELF header
* - ELF PHDRs for memory sections
* - ELF PHDR for program header NOTE
* - ELF PRSTATUS note
* - 0-bytes padding to multiple of ELF_EXEC_PAGESIZE
*
* The actual memory section dumps should follow this (not written
* by this function).
*
* Retrieves the necessary information via the struct
* fw_core_dump_data stored in the private member of the seq_file
* handle.
*
* Return:
* * 0 - success
* * -ENOMEM - not enough memory for allocating ELF32 note
*/
static int fw_core_dump_write_elf_header(struct seq_file *m)
{
struct elf32_hdr hdr;
struct elf32_phdr phdr;
struct fw_core_dump_data *dump_data = m->private;
struct kbase_device *const kbdev = dump_data->kbdev;
struct kbase_csf_firmware_interface *interface;
struct elf_prstatus32 elf_prs;
struct elf32_note *elf_prstatus_note;
unsigned int sections = 0;
unsigned int elf_prstatus_note_size;
u32 elf_prstatus_offset;
u32 elf_phdr_note_offset;
u32 elf_memory_sections_data_offset;
u32 total_pages = 0;
u32 padding_size, *padding;
struct fw_core_dump_mcu regs = { 0 };
/* Count number of memory sections. */
list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) {
/* Skip memory sections that cannot be read or are protected. */
if ((interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) ||
(interface->flags & CSF_FIRMWARE_ENTRY_READ) == 0)
continue;
sections++;
}
/* Prepare ELF header. */
fw_core_dump_fill_elf_header(&hdr, sections + 1);
seq_write(m, &hdr, sizeof(struct elf32_hdr));
elf_prstatus_note_size = fw_core_dump_get_prstatus_note_size("CORE");
/* PHDRs of PT_LOAD type. */
elf_phdr_note_offset = sizeof(struct elf32_hdr) + sections * sizeof(struct elf32_phdr);
/* PHDR of PT_NOTE type. */
elf_prstatus_offset = elf_phdr_note_offset + sizeof(struct elf32_phdr);
elf_memory_sections_data_offset = elf_prstatus_offset + elf_prstatus_note_size;
/* Calculate padding size to page offset. */
padding_size = roundup(elf_memory_sections_data_offset, ELF_EXEC_PAGESIZE) -
elf_memory_sections_data_offset;
elf_memory_sections_data_offset += padding_size;
/* Prepare ELF program header table. */
list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) {
/* Skip memory sections that cannot be read or are protected. */
if ((interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) ||
(interface->flags & CSF_FIRMWARE_ENTRY_READ) == 0)
continue;
fw_core_dump_fill_elf_program_header(&phdr, elf_memory_sections_data_offset,
interface->virtual,
interface->num_pages * FW_PAGE_SIZE,
interface->flags);
seq_write(m, &phdr, sizeof(struct elf32_phdr));
elf_memory_sections_data_offset += interface->num_pages * FW_PAGE_SIZE;
total_pages += interface->num_pages;
}
/* Prepare PHDR of PT_NOTE type. */
fw_core_dump_fill_elf_program_header_note(&phdr, elf_prstatus_offset,
elf_prstatus_note_size);
seq_write(m, &phdr, sizeof(struct elf32_phdr));
/* Prepare ELF note of PRSTATUS type. */
if (fw_get_core_dump_mcu(kbdev, &regs))
dev_dbg(kbdev->dev, "MCU Registers not available, all registers set to zero");
/* Even if MCU Registers are not available the ELF prstatus is still
* filled with the registers equal to zero.
*/
fw_core_dump_fill_elf_prstatus(&elf_prs, &regs);
elf_prstatus_note_size =
fw_core_dump_create_prstatus_note("CORE", &elf_prs, &elf_prstatus_note);
if (elf_prstatus_note_size == 0)
return -ENOMEM;
seq_write(m, elf_prstatus_note, elf_prstatus_note_size);
kfree(elf_prstatus_note);
/* Pad file to page size. */
padding = kzalloc(padding_size, GFP_KERNEL);
seq_write(m, padding, padding_size);
kfree(padding);
return 0;
}
/**
* fw_core_dump_create - Requests firmware to save state for a firmware core dump
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
*
* Return: 0 on success, error code otherwise.
*/
static int fw_core_dump_create(struct kbase_device *kbdev)
{
int err;
/* Ensure MCU is active before requesting the core dump. */
kbase_csf_scheduler_pm_active(kbdev);
err = kbase_csf_scheduler_wait_mcu_active(kbdev);
if (!err)
err = kbase_csf_firmware_req_core_dump(kbdev);
kbase_csf_scheduler_pm_idle(kbdev);
return err;
}
/**
* fw_core_dump_seq_start - seq_file start operation for firmware core dump file
* @m: the seq_file handle
* @_pos: holds the current position in pages
* (0 or most recent position used in previous session)
*
* Starts a seq_file session, positioning the iterator for the session to page @_pos - 1
* within the firmware interface memory sections. @_pos value 0 is used to indicate the
* position of the ELF header at the start of the file.
*
* Retrieves the necessary information via the struct fw_core_dump_data stored in
* the private member of the seq_file handle.
*
* Return:
* * iterator pointer - pointer to iterator struct fw_core_dump_seq_off
* * SEQ_START_TOKEN - special iterator pointer indicating its is the start of the file
* * NULL - iterator could not be allocated
*/
static void *fw_core_dump_seq_start(struct seq_file *m, loff_t *_pos)
{
struct fw_core_dump_data *dump_data = m->private;
struct fw_core_dump_seq_off *data;
struct kbase_csf_firmware_interface *interface;
loff_t pos = *_pos;
if (pos == 0)
return SEQ_START_TOKEN;
/* Move iterator in the right position based on page number within
* available pages of firmware interface memory sections.
*/
pos--; /* ignore start token */
list_for_each_entry(interface, &dump_data->kbdev->csf.firmware_interfaces, node) {
/* Skip memory sections that cannot be read or are protected. */
if ((interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) ||
(interface->flags & CSF_FIRMWARE_ENTRY_READ) == 0)
continue;
if (pos >= interface->num_pages) {
pos -= interface->num_pages;
} else {
data = kmalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return NULL;
data->interface = interface;
data->page_num = pos;
return data;
}
}
return NULL;
}
/**
* fw_core_dump_seq_stop - seq_file stop operation for firmware core dump file
* @m: the seq_file handle
* @v: the current iterator (pointer to struct fw_core_dump_seq_off)
*
* Closes the current session and frees any memory related.
*/
static void fw_core_dump_seq_stop(struct seq_file *m, void *v)
{
kfree(v);
}
/**
* fw_core_dump_seq_next - seq_file next operation for firmware core dump file
* @m: the seq_file handle
* @v: the current iterator (pointer to struct fw_core_dump_seq_off)
* @pos: holds the current position in pages
* (0 or most recent position used in previous session)
*
* Moves the iterator @v forward to the next page within the firmware interface
* memory sections and returns the updated position in @pos.
* @v value SEQ_START_TOKEN indicates the ELF header position.
*
* Return:
* * iterator pointer - pointer to iterator struct fw_core_dump_seq_off
* * NULL - iterator could not be allocated
*/
static void *fw_core_dump_seq_next(struct seq_file *m, void *v, loff_t *pos)
{
struct fw_core_dump_data *dump_data = m->private;
struct fw_core_dump_seq_off *data = v;
struct kbase_csf_firmware_interface *interface;
struct list_head *interfaces = &dump_data->kbdev->csf.firmware_interfaces;
/* Is current position at the ELF header ? */
if (v == SEQ_START_TOKEN) {
if (list_empty(interfaces))
return NULL;
/* Prepare iterator for starting at first page in firmware interface
* memory sections.
*/
data = kmalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return NULL;
data->interface =
list_first_entry(interfaces, struct kbase_csf_firmware_interface, node);
data->page_num = 0;
++*pos;
return data;
}
/* First attempt to satisfy from current firmware interface memory section. */
interface = data->interface;
if (data->page_num + 1 < interface->num_pages) {
data->page_num++;
++*pos;
return data;
}
/* Need next firmware interface memory section. This could be the last one. */
if (list_is_last(&interface->node, interfaces)) {
kfree(data);
return NULL;
}
/* Move to first page in next firmware interface memory section. */
data->interface = list_next_entry(interface, node);
data->page_num = 0;
++*pos;
return data;
}
/**
* fw_core_dump_seq_show - seq_file show operation for firmware core dump file
* @m: the seq_file handle
* @v: the current iterator (pointer to struct fw_core_dump_seq_off)
*
* Writes the current page in a firmware interface memory section indicated
* by the iterator @v to the file. If @v is SEQ_START_TOKEN the ELF
* header is written.
*
* Return: 0 on success, error code otherwise.
*/
static int fw_core_dump_seq_show(struct seq_file *m, void *v)
{
struct fw_core_dump_seq_off *data = v;
struct page *page;
u32 *p;
/* Either write the ELF header or current page. */
if (v == SEQ_START_TOKEN)
return fw_core_dump_write_elf_header(m);
/* Write the current page. */
page = as_page(data->interface->phys[data->page_num]);
p = kmap_atomic(page);
seq_write(m, p, FW_PAGE_SIZE);
kunmap_atomic(p);
return 0;
}
/* Sequence file operations for firmware core dump file. */
static const struct seq_operations fw_core_dump_seq_ops = {
.start = fw_core_dump_seq_start,
.next = fw_core_dump_seq_next,
.stop = fw_core_dump_seq_stop,
.show = fw_core_dump_seq_show,
};
/**
* fw_core_dump_debugfs_open - callback for opening the 'fw_core_dump' debugfs file
* @inode: inode of the file
* @file: file pointer
*
* Prepares for servicing a write request to request a core dump from firmware and
* a read request to retrieve the core dump.
*
* Returns an error if the firmware is not initialized yet.
*
* Return: 0 on success, error code otherwise.
*/
static int fw_core_dump_debugfs_open(struct inode *inode, struct file *file)
{
struct kbase_device *const kbdev = inode->i_private;
struct fw_core_dump_data *dump_data;
int ret;
/* Fail if firmware is not initialized yet. */
if (!kbdev->csf.firmware_inited) {
ret = -ENODEV;
goto open_fail;
}
/* Open a sequence file for iterating through the pages in the
* firmware interface memory pages. seq_open stores a
* struct seq_file * in the private_data field of @file.
*/
ret = seq_open(file, &fw_core_dump_seq_ops);
if (ret)
goto open_fail;
/* Allocate a context for sequence file operations. */
dump_data = kmalloc(sizeof(*dump_data), GFP_KERNEL);
if (!dump_data) {
ret = -ENOMEM;
goto out;
}
/* Kbase device will be shared with sequence file operations. */
dump_data->kbdev = kbdev;
/* Link our sequence file context. */
((struct seq_file *)file->private_data)->private = dump_data;
return 0;
out:
seq_release(inode, file);
open_fail:
return ret;
}
/**
* fw_core_dump_debugfs_write - callback for a write to the 'fw_core_dump' debugfs file
* @file: file pointer
* @ubuf: user buffer containing data to store
* @count: number of bytes in user buffer
* @ppos: file position
*
* Any data written to the file triggers a firmware core dump request which
* subsequently can be retrieved by reading from the file.
*
* Return: @count if the function succeeded. An error code on failure.
*/
static ssize_t fw_core_dump_debugfs_write(struct file *file, const char __user *ubuf, size_t count,
loff_t *ppos)
{
int err;
struct fw_core_dump_data *dump_data = ((struct seq_file *)file->private_data)->private;
struct kbase_device *const kbdev = dump_data->kbdev;
CSTD_UNUSED(ppos);
err = fw_core_dump_create(kbdev);
return err ? err : count;
}
/**
* fw_core_dump_debugfs_release - callback for releasing the 'fw_core_dump' debugfs file
* @inode: inode of the file
* @file: file pointer
*
* Return: 0 on success, error code otherwise.
*/
static int fw_core_dump_debugfs_release(struct inode *inode, struct file *file)
{
struct fw_core_dump_data *dump_data = ((struct seq_file *)file->private_data)->private;
seq_release(inode, file);
kfree(dump_data);
return 0;
}
/* Debugfs file operations for firmware core dump file. */
static const struct file_operations kbase_csf_fw_core_dump_fops = {
.owner = THIS_MODULE,
.open = fw_core_dump_debugfs_open,
.read = seq_read,
.write = fw_core_dump_debugfs_write,
.llseek = seq_lseek,
.release = fw_core_dump_debugfs_release,
};
void kbase_csf_firmware_core_dump_init(struct kbase_device *const kbdev)
{
#if IS_ENABLED(CONFIG_DEBUG_FS)
debugfs_create_file("fw_core_dump", 0600, kbdev->mali_debugfs_directory, kbdev,
&kbase_csf_fw_core_dump_fops);
#endif /* CONFIG_DEBUG_FS */
}
int kbase_csf_firmware_core_dump_entry_parse(struct kbase_device *kbdev, const u32 *entry)
{
/* Casting to u16 as version is defined by bits 15:0 */
kbdev->csf.fw_core_dump.version = (u16)entry[FW_CORE_DUMP_VERSION_INDEX];
if (kbdev->csf.fw_core_dump.version != FW_CORE_DUMP_DATA_VERSION)
return -EPERM;
kbdev->csf.fw_core_dump.mcu_regs_addr = entry[FW_CORE_DUMP_START_ADDR_INDEX];
kbdev->csf.fw_core_dump.available = true;
return 0;
}

View File

@@ -0,0 +1,65 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU license.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
*/
#ifndef _KBASE_CSF_FIRMWARE_CORE_DUMP_H_
#define _KBASE_CSF_FIRMWARE_CORE_DUMP_H_
struct kbase_device;
/** Offset of the last field of core dump entry from the image header */
#define CORE_DUMP_ENTRY_START_ADDR_OFFSET (0x4)
/**
* kbase_csf_firmware_core_dump_entry_parse() - Parse a "core dump" entry from
* the image header.
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
* @entry: Pointer to section.
*
* Read a "core dump" entry from the image header, check the version for
* compatibility and store the address pointer.
*
* Return: 0 if successfully parse entry, negative error code otherwise.
*/
int kbase_csf_firmware_core_dump_entry_parse(struct kbase_device *kbdev, const u32 *entry);
/**
* kbase_csf_firmware_core_dump_init() - Initialize firmware core dump support
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
* Must be zero-initialized.
*
* Creates the fw_core_dump debugfs file through which to request a firmware
* core dump. The created debugfs file is cleaned up as part of kbdev debugfs
* cleanup.
*
* The fw_core_dump debugs file that case be used in the following way:
*
* To explicitly request core dump:
* echo 1 >/sys/kernel/debug/mali0/fw_core_dump
*
* To output current core dump (after explicitly requesting a core dump, or
* kernel driver reported an internal firmware error):
* cat /sys/kernel/debug/mali0/fw_core_dump
*/
void kbase_csf_firmware_core_dump_init(struct kbase_device *const kbdev);
#endif /* _KBASE_CSF_FIRMWARE_CORE_DUMP_H_ */

View File

@@ -85,7 +85,7 @@ static int kbase_csf_firmware_log_enable_mask_write(void *data, u64 val)
dev_dbg(kbdev->dev, "Limit enabled bits count from %u to 64", enable_bits_count);
enable_bits_count = 64;
}
new_mask = val & ((1 << enable_bits_count) - 1);
new_mask = val & (UINT64_MAX >> (64 - enable_bits_count));
if (new_mask != kbase_csf_firmware_trace_buffer_get_active_mask64(tb))
return kbase_csf_firmware_trace_buffer_set_active_mask64(tb, new_mask);
@@ -350,7 +350,7 @@ static void toggle_logging_calls_in_loaded_image(struct kbase_device *kbdev, boo
diff = callee_address - calling_address - 4;
sign = !!(diff & 0x80000000);
if (ARMV7_T1_BL_IMM_RANGE_MIN > (int32_t)diff &&
if (ARMV7_T1_BL_IMM_RANGE_MIN > (int32_t)diff ||
ARMV7_T1_BL_IMM_RANGE_MAX < (int32_t)diff) {
dev_warn(kbdev->dev, "FW log patch 0x%x out of range, skipping",
calling_address);

View File

@@ -24,6 +24,9 @@
#include <mali_kbase.h>
/** Offset of the last field of functions call list entry from the image header */
#define FUNC_CALL_LIST_ENTRY_NAME_OFFSET (0x8)
/*
* Firmware log dumping buffer size.
*/

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -32,7 +32,8 @@
#include "mali_kbase_csf_scheduler.h"
#include "mmu/mali_kbase_mmu.h"
#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
#include <backend/gpu/mali_kbase_model_dummy.h>
#include <backend/gpu/mali_kbase_model_linux.h>
#include <csf/mali_kbase_csf_registers.h>
#include <linux/list.h>
#include <linux/slab.h>
@@ -104,7 +105,6 @@ struct dummy_firmware_interface {
(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \
GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK)
static inline u32 input_page_read(const u32 *const input, const u32 offset)
{
WARN_ON(offset % sizeof(u32));
@@ -716,6 +716,71 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev)
kbdev->csf.gpu_idle_dur_count);
}
static bool global_debug_request_complete(struct kbase_device *const kbdev, u32 const req_mask)
{
struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
bool complete = false;
unsigned long flags;
kbase_csf_scheduler_spin_lock(kbdev, &flags);
if ((kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK) & req_mask) ==
(kbase_csf_firmware_global_input_read(global_iface, GLB_DEBUG_REQ) & req_mask))
complete = true;
kbase_csf_scheduler_spin_unlock(kbdev, flags);
return complete;
}
static void set_global_debug_request(const struct kbase_csf_global_iface *const global_iface,
u32 const req_mask)
{
u32 glb_debug_req;
kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev);
glb_debug_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK);
glb_debug_req ^= req_mask;
kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_debug_req, req_mask);
}
static void request_fw_core_dump(
const struct kbase_csf_global_iface *const global_iface)
{
uint32_t run_mode = GLB_DEBUG_REQ_RUN_MODE_SET(0, GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP);
set_global_debug_request(global_iface, GLB_DEBUG_REQ_DEBUG_RUN_MASK | run_mode);
set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK);
}
int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev)
{
const struct kbase_csf_global_iface *const global_iface =
&kbdev->csf.global_iface;
unsigned long flags;
int ret;
/* Serialize CORE_DUMP requests. */
mutex_lock(&kbdev->csf.reg_lock);
/* Update GLB_REQ with CORE_DUMP request and make firmware act on it. */
kbase_csf_scheduler_spin_lock(kbdev, &flags);
request_fw_core_dump(global_iface);
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
/* Wait for firmware to acknowledge completion of the CORE_DUMP request. */
ret = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK);
if (!ret)
WARN_ON(!global_debug_request_complete(kbdev, GLB_DEBUG_REQ_DEBUG_RUN_MASK));
mutex_unlock(&kbdev->csf.reg_lock);
return ret;
}
static void global_init(struct kbase_device *const kbdev, u64 core_mask)
{
@@ -724,8 +789,7 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask)
GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK |
GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK |
0;
GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK | GLB_REQ_DEBUG_CSF_REQ_MASK;
const struct kbase_csf_global_iface *const global_iface =
&kbdev->csf.global_iface;
@@ -917,7 +981,7 @@ u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev)
u32 dur;
kbase_csf_scheduler_spin_lock(kbdev, &flags);
dur = kbdev->csf.gpu_idle_hysteresis_ms;
dur = kbdev->csf.gpu_idle_hysteresis_us;
kbase_csf_scheduler_spin_unlock(kbdev, flags);
return dur;
@@ -934,7 +998,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
mutex_lock(&kbdev->fw_load_lock);
if (unlikely(!kbdev->csf.firmware_inited)) {
kbase_csf_scheduler_spin_lock(kbdev, &flags);
kbdev->csf.gpu_idle_hysteresis_ms = dur;
kbdev->csf.gpu_idle_hysteresis_us = dur;
kbdev->csf.gpu_idle_dur_count = hysteresis_val;
kbase_csf_scheduler_spin_unlock(kbdev, flags);
mutex_unlock(&kbdev->fw_load_lock);
@@ -965,7 +1029,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK);
kbase_csf_scheduler_spin_lock(kbdev, &flags);
kbdev->csf.gpu_idle_hysteresis_ms = dur;
kbdev->csf.gpu_idle_hysteresis_us = dur;
kbdev->csf.gpu_idle_dur_count = hysteresis_val;
kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
@@ -1060,6 +1124,7 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces);
INIT_LIST_HEAD(&kbdev->csf.firmware_config);
INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list);
INIT_LIST_HEAD(&kbdev->csf.user_reg.list);
INIT_WORK(&kbdev->csf.firmware_reload_work,
kbase_csf_firmware_reload_worker);
INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker);
@@ -1076,14 +1141,14 @@ void kbase_csf_firmware_early_term(struct kbase_device *kbdev)
int kbase_csf_firmware_late_init(struct kbase_device *kbdev)
{
kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
kbdev->csf.gpu_idle_hysteresis_us = FIRMWARE_IDLE_HYSTERESIS_TIME_USEC;
#ifdef KBASE_PM_RUNTIME
if (kbase_pm_gpu_sleep_allowed(kbdev))
kbdev->csf.gpu_idle_hysteresis_ms /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
kbdev->csf.gpu_idle_hysteresis_us /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
#endif
WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
WARN_ON(!kbdev->csf.gpu_idle_hysteresis_us);
kbdev->csf.gpu_idle_dur_count =
convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_us);
return 0;
}
@@ -1166,8 +1231,6 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev)
/* NO_MALI: Don't stop firmware or unload MMU tables */
kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu);
kbase_csf_scheduler_term(kbdev);
kbase_csf_free_dummy_user_reg_page(kbdev);
@@ -1197,6 +1260,8 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev)
* entry parsed from the firmware image.
*/
kbase_mcu_shared_interface_region_tracker_term(kbdev);
kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu);
}
void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev)
@@ -1505,7 +1570,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
goto page_list_alloc_error;
ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
phys, false);
phys, false, NULL);
if (ret <= 0)
goto phys_mem_pool_alloc_error;
@@ -1516,8 +1581,8 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
if (!cpu_addr)
goto vmap_error;
va_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
num_pages, KBASE_REG_ZONE_MCU_SHARED);
va_reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, num_pages,
KBASE_REG_ZONE_MCU_SHARED);
if (!va_reg)
goto va_region_alloc_error;
@@ -1533,7 +1598,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn,
&phys[0], num_pages, gpu_map_properties,
KBASE_MEM_GROUP_CSF_FW, NULL);
KBASE_MEM_GROUP_CSF_FW, NULL, NULL, false);
if (ret)
goto mmu_insert_pages_error;
@@ -1594,4 +1659,3 @@ void kbase_csf_firmware_mcu_shared_mapping_term(
vunmap(csf_mapping->cpu_addr);
kfree(csf_mapping->phys);
}

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -23,10 +23,7 @@
#include "mali_kbase_csf_heap_context_alloc.h"
/* Size of one heap context structure, in bytes. */
#define HEAP_CTX_SIZE ((size_t)32)
/* Total size of the GPU memory region allocated for heap contexts, in bytes. */
#define HEAP_CTX_REGION_SIZE (MAX_TILER_HEAPS * HEAP_CTX_SIZE)
#define HEAP_CTX_SIZE ((u32)32)
/**
* sub_alloc - Sub-allocate a heap context from a GPU memory region
@@ -38,8 +35,8 @@
static u64 sub_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc)
{
struct kbase_context *const kctx = ctx_alloc->kctx;
int heap_nr = 0;
size_t ctx_offset = 0;
unsigned long heap_nr = 0;
u32 ctx_offset = 0;
u64 heap_gpu_va = 0;
struct kbase_vmap_struct mapping;
void *ctx_ptr = NULL;
@@ -55,29 +52,64 @@ static u64 sub_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc)
return 0;
}
ctx_offset = heap_nr * HEAP_CTX_SIZE;
ctx_offset = heap_nr * ctx_alloc->heap_context_size_aligned;
heap_gpu_va = ctx_alloc->gpu_va + ctx_offset;
ctx_ptr = kbase_vmap_prot(kctx, heap_gpu_va,
HEAP_CTX_SIZE, KBASE_REG_CPU_WR, &mapping);
ctx_alloc->heap_context_size_aligned, KBASE_REG_CPU_WR, &mapping);
if (unlikely(!ctx_ptr)) {
dev_err(kctx->kbdev->dev,
"Failed to map tiler heap context %d (0x%llX)\n",
"Failed to map tiler heap context %lu (0x%llX)\n",
heap_nr, heap_gpu_va);
return 0;
}
memset(ctx_ptr, 0, HEAP_CTX_SIZE);
memset(ctx_ptr, 0, ctx_alloc->heap_context_size_aligned);
kbase_vunmap(ctx_ptr, &mapping);
bitmap_set(ctx_alloc->in_use, heap_nr, 1);
dev_dbg(kctx->kbdev->dev, "Allocated tiler heap context %d (0x%llX)\n",
dev_dbg(kctx->kbdev->dev, "Allocated tiler heap context %lu (0x%llX)\n",
heap_nr, heap_gpu_va);
return heap_gpu_va;
}
/**
* evict_heap_context - Evict the data of heap context from GPU's L2 cache.
*
* @ctx_alloc: Pointer to the heap context allocator.
* @heap_gpu_va: The GPU virtual address of a heap context structure to free.
*
* This function is called when memory for the heap context is freed. It uses the
* FLUSH_PA_RANGE command to evict the data of heap context, so on older CSF GPUs
* there is nothing done. The whole GPU cache is anyways expected to be flushed
* on older GPUs when initial chunks of the heap are freed just before the memory
* for heap context is freed.
*/
static void evict_heap_context(struct kbase_csf_heap_context_allocator *const ctx_alloc,
u64 const heap_gpu_va)
{
struct kbase_context *const kctx = ctx_alloc->kctx;
u32 offset_in_bytes = (u32)(heap_gpu_va - ctx_alloc->gpu_va);
u32 offset_within_page = offset_in_bytes & ~PAGE_MASK;
u32 page_index = offset_in_bytes >> PAGE_SHIFT;
struct tagged_addr page =
kbase_get_gpu_phy_pages(ctx_alloc->region)[page_index];
phys_addr_t heap_context_pa = as_phys_addr_t(page) + offset_within_page;
lockdep_assert_held(&ctx_alloc->lock);
/* There is no need to take vm_lock here as the ctx_alloc region is protected
* via a nonzero no_user_free_count. The region and the backing page can't
* disappear whilst this function is executing. Flush type is passed as FLUSH_PT
* to CLN+INV L2 only.
*/
kbase_mmu_flush_pa_range(kctx->kbdev, kctx,
heap_context_pa, ctx_alloc->heap_context_size_aligned,
KBASE_MMU_OP_FLUSH_PT);
}
/**
* sub_free - Free a heap context sub-allocated from a GPU memory region
*
@@ -88,7 +120,7 @@ static void sub_free(struct kbase_csf_heap_context_allocator *const ctx_alloc,
u64 const heap_gpu_va)
{
struct kbase_context *const kctx = ctx_alloc->kctx;
u64 ctx_offset = 0;
u32 ctx_offset = 0;
unsigned int heap_nr = 0;
lockdep_assert_held(&ctx_alloc->lock);
@@ -99,13 +131,15 @@ static void sub_free(struct kbase_csf_heap_context_allocator *const ctx_alloc,
if (WARN_ON(heap_gpu_va < ctx_alloc->gpu_va))
return;
ctx_offset = heap_gpu_va - ctx_alloc->gpu_va;
ctx_offset = (u32)(heap_gpu_va - ctx_alloc->gpu_va);
if (WARN_ON(ctx_offset >= HEAP_CTX_REGION_SIZE) ||
WARN_ON(ctx_offset % HEAP_CTX_SIZE))
if (WARN_ON(ctx_offset >= (ctx_alloc->region->nr_pages << PAGE_SHIFT)) ||
WARN_ON(ctx_offset % ctx_alloc->heap_context_size_aligned))
return;
heap_nr = ctx_offset / HEAP_CTX_SIZE;
evict_heap_context(ctx_alloc, heap_gpu_va);
heap_nr = ctx_offset / ctx_alloc->heap_context_size_aligned;
dev_dbg(kctx->kbdev->dev,
"Freed tiler heap context %d (0x%llX)\n", heap_nr, heap_gpu_va);
@@ -116,12 +150,17 @@ int kbase_csf_heap_context_allocator_init(
struct kbase_csf_heap_context_allocator *const ctx_alloc,
struct kbase_context *const kctx)
{
const u32 gpu_cache_line_size =
(1U << kctx->kbdev->gpu_props.props.l2_props.log2_line_size);
/* We cannot pre-allocate GPU memory here because the
* custom VA zone may not have been created yet.
*/
ctx_alloc->kctx = kctx;
ctx_alloc->region = NULL;
ctx_alloc->gpu_va = 0;
ctx_alloc->heap_context_size_aligned =
(HEAP_CTX_SIZE + gpu_cache_line_size - 1) & ~(gpu_cache_line_size - 1);
mutex_init(&ctx_alloc->lock);
bitmap_zero(ctx_alloc->in_use, MAX_TILER_HEAPS);
@@ -142,7 +181,9 @@ void kbase_csf_heap_context_allocator_term(
if (ctx_alloc->region) {
kbase_gpu_vm_lock(kctx);
ctx_alloc->region->flags &= ~KBASE_REG_NO_USER_FREE;
WARN_ON(!kbase_va_region_is_no_user_free(ctx_alloc->region));
kbase_va_region_no_user_free_dec(ctx_alloc->region);
kbase_mem_free_region(kctx, ctx_alloc->region);
kbase_gpu_vm_unlock(kctx);
}
@@ -156,7 +197,7 @@ u64 kbase_csf_heap_context_allocator_alloc(
struct kbase_context *const kctx = ctx_alloc->kctx;
u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR |
BASEP_MEM_NO_USER_FREE | BASE_MEM_PROT_CPU_RD;
u64 nr_pages = PFN_UP(HEAP_CTX_REGION_SIZE);
u64 nr_pages = PFN_UP(MAX_TILER_HEAPS * ctx_alloc->heap_context_size_aligned);
u64 heap_gpu_va = 0;
/* Calls to this function are inherently asynchronous, with respect to

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -80,7 +80,14 @@ static int kbase_kcpu_map_import_prepare(
* on the physical pages tracking object. When the last
* reference to the tracking object is dropped the pages
* would be unpinned if they weren't unpinned before.
*
* Region should be CPU cached: abort if it isn't.
*/
if (WARN_ON(!(reg->flags & KBASE_REG_CPU_CACHED))) {
ret = -EINVAL;
goto out;
}
ret = kbase_jd_user_buf_pin_pages(kctx, reg);
if (ret)
goto out;
@@ -358,15 +365,16 @@ static int kbase_kcpu_jit_allocate_prepare(
{
struct kbase_context *const kctx = kcpu_queue->kctx;
void __user *data = u64_to_user_ptr(alloc_info->info);
struct base_jit_alloc_info *info;
struct base_jit_alloc_info *info = NULL;
u32 count = alloc_info->count;
int ret = 0;
u32 i;
lockdep_assert_held(&kcpu_queue->lock);
if (!data || count > kcpu_queue->kctx->jit_max_allocations ||
count > ARRAY_SIZE(kctx->jit_alloc)) {
if ((count == 0) || (count > ARRAY_SIZE(kctx->jit_alloc)) ||
(count > kcpu_queue->kctx->jit_max_allocations) || (!data) ||
!kbase_mem_allow_alloc(kctx)) {
ret = -EINVAL;
goto out;
}
@@ -603,6 +611,7 @@ out:
return ret;
}
#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
static int kbase_csf_queue_group_suspend_prepare(
struct kbase_kcpu_command_queue *kcpu_queue,
struct base_kcpu_command_group_suspend_info *suspend_buf,
@@ -674,9 +683,7 @@ static int kbase_csf_queue_group_suspend_prepare(
(kbase_reg_current_backed_size(reg) < nr_pages) ||
!(reg->flags & KBASE_REG_CPU_WR) ||
(reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) ||
(reg->flags & KBASE_REG_DONT_NEED) ||
(reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC) ||
(reg->flags & KBASE_REG_NO_USER_FREE)) {
(kbase_is_region_shrinkable(reg)) || (kbase_va_region_is_no_user_free(reg))) {
ret = -EINVAL;
goto out_clean_pages;
}
@@ -720,6 +727,7 @@ static int kbase_csf_queue_group_suspend_process(struct kbase_context *kctx,
{
return kbase_csf_queue_group_suspend(kctx, sus_buf, group_handle);
}
#endif
static enum kbase_csf_event_callback_action event_cqs_callback(void *param)
{
@@ -784,13 +792,14 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev,
return -EINVAL;
}
sig_set = evt[BASEP_EVENT_VAL_INDEX] > cqs_wait->objs[i].val;
sig_set =
evt[BASEP_EVENT32_VAL_OFFSET / sizeof(u32)] > cqs_wait->objs[i].val;
if (sig_set) {
bool error = false;
bitmap_set(cqs_wait->signaled, i, 1);
if ((cqs_wait->inherit_err_flags & (1U << i)) &&
evt[BASEP_EVENT_ERR_INDEX] > 0) {
evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)] > 0) {
queue->has_error = true;
error = true;
}
@@ -800,7 +809,7 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev,
error);
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END(
kbdev, queue, evt[BASEP_EVENT_ERR_INDEX]);
kbdev, queue, evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)]);
queue->command_started = false;
}
@@ -817,12 +826,34 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev,
return bitmap_full(cqs_wait->signaled, cqs_wait->nr_objs);
}
static inline bool kbase_kcpu_cqs_is_data_type_valid(u8 data_type)
{
return data_type == BASEP_CQS_DATA_TYPE_U32 || data_type == BASEP_CQS_DATA_TYPE_U64;
}
static inline bool kbase_kcpu_cqs_is_aligned(u64 addr, u8 data_type)
{
BUILD_BUG_ON(BASEP_EVENT32_ALIGN_BYTES != BASEP_EVENT32_SIZE_BYTES);
BUILD_BUG_ON(BASEP_EVENT64_ALIGN_BYTES != BASEP_EVENT64_SIZE_BYTES);
WARN_ON(!kbase_kcpu_cqs_is_data_type_valid(data_type));
switch (data_type) {
default:
return false;
case BASEP_CQS_DATA_TYPE_U32:
return (addr & (BASEP_EVENT32_ALIGN_BYTES - 1)) == 0;
case BASEP_CQS_DATA_TYPE_U64:
return (addr & (BASEP_EVENT64_ALIGN_BYTES - 1)) == 0;
}
}
static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue,
struct base_kcpu_command_cqs_wait_info *cqs_wait_info,
struct kbase_kcpu_command *current_command)
{
struct base_cqs_wait_info *objs;
unsigned int nr_objs = cqs_wait_info->nr_objs;
unsigned int i;
lockdep_assert_held(&queue->lock);
@@ -842,6 +873,17 @@ static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue,
return -ENOMEM;
}
/* Check the CQS objects as early as possible. By checking their alignment
* (required alignment equals to size for Sync32 and Sync64 objects), we can
* prevent overrunning the supplied event page.
*/
for (i = 0; i < nr_objs; i++) {
if (!kbase_kcpu_cqs_is_aligned(objs[i].addr, BASEP_CQS_DATA_TYPE_U32)) {
kfree(objs);
return -EINVAL;
}
}
if (++queue->cqs_wait_count == 1) {
if (kbase_csf_event_wait_add(queue->kctx,
event_cqs_callback, queue)) {
@@ -897,14 +939,13 @@ static void kbase_kcpu_cqs_set_process(struct kbase_device *kbdev,
"Sync memory %llx already freed", cqs_set->objs[i].addr);
queue->has_error = true;
} else {
evt[BASEP_EVENT_ERR_INDEX] = queue->has_error;
evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)] = queue->has_error;
/* Set to signaled */
evt[BASEP_EVENT_VAL_INDEX]++;
evt[BASEP_EVENT32_VAL_OFFSET / sizeof(u32)]++;
kbase_phy_alloc_mapping_put(queue->kctx, mapping);
KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_SET,
queue, cqs_set->objs[i].addr,
evt[BASEP_EVENT_ERR_INDEX]);
KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_SET, queue, cqs_set->objs[i].addr,
evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)]);
}
}
@@ -921,6 +962,7 @@ static int kbase_kcpu_cqs_set_prepare(
{
struct base_cqs_set *objs;
unsigned int nr_objs = cqs_set_info->nr_objs;
unsigned int i;
lockdep_assert_held(&kcpu_queue->lock);
@@ -940,6 +982,17 @@ static int kbase_kcpu_cqs_set_prepare(
return -ENOMEM;
}
/* Check the CQS objects as early as possible. By checking their alignment
* (required alignment equals to size for Sync32 and Sync64 objects), we can
* prevent overrunning the supplied event page.
*/
for (i = 0; i < nr_objs; i++) {
if (!kbase_kcpu_cqs_is_aligned(objs[i].addr, BASEP_CQS_DATA_TYPE_U32)) {
kfree(objs);
return -EINVAL;
}
}
current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_SET;
current_command->info.cqs_set.nr_objs = nr_objs;
current_command->info.cqs_set.objs = objs;
@@ -982,12 +1035,16 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev,
if (!test_bit(i, cqs_wait_operation->signaled)) {
struct kbase_vmap_struct *mapping;
bool sig_set;
u64 *evt = (u64 *)kbase_phy_alloc_mapping_get(queue->kctx,
cqs_wait_operation->objs[i].addr, &mapping);
uintptr_t evt = (uintptr_t)kbase_phy_alloc_mapping_get(
queue->kctx, cqs_wait_operation->objs[i].addr, &mapping);
u64 val = 0;
/* GPUCORE-28172 RDT to review */
if (!queue->command_started)
if (!queue->command_started) {
queue->command_started = true;
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START(
kbdev, queue);
}
if (!evt) {
dev_warn(kbdev->dev,
@@ -996,12 +1053,29 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev,
return -EINVAL;
}
switch (cqs_wait_operation->objs[i].data_type) {
default:
WARN_ON(!kbase_kcpu_cqs_is_data_type_valid(
cqs_wait_operation->objs[i].data_type));
kbase_phy_alloc_mapping_put(queue->kctx, mapping);
queue->has_error = true;
return -EINVAL;
case BASEP_CQS_DATA_TYPE_U32:
val = *(u32 *)evt;
evt += BASEP_EVENT32_ERR_OFFSET - BASEP_EVENT32_VAL_OFFSET;
break;
case BASEP_CQS_DATA_TYPE_U64:
val = *(u64 *)evt;
evt += BASEP_EVENT64_ERR_OFFSET - BASEP_EVENT64_VAL_OFFSET;
break;
}
switch (cqs_wait_operation->objs[i].operation) {
case BASEP_CQS_WAIT_OPERATION_LE:
sig_set = *evt <= cqs_wait_operation->objs[i].val;
sig_set = val <= cqs_wait_operation->objs[i].val;
break;
case BASEP_CQS_WAIT_OPERATION_GT:
sig_set = *evt > cqs_wait_operation->objs[i].val;
sig_set = val > cqs_wait_operation->objs[i].val;
break;
default:
dev_dbg(kbdev->dev,
@@ -1013,28 +1087,15 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev,
return -EINVAL;
}
/* Increment evt up to the error_state value depending on the CQS data type */
switch (cqs_wait_operation->objs[i].data_type) {
default:
dev_dbg(kbdev->dev, "Unreachable data_type=%d", cqs_wait_operation->objs[i].data_type);
/* Fallthrough - hint to compiler that there's really only 2 options at present */
fallthrough;
case BASEP_CQS_DATA_TYPE_U32:
evt = (u64 *)((u8 *)evt + sizeof(u32));
break;
case BASEP_CQS_DATA_TYPE_U64:
evt = (u64 *)((u8 *)evt + sizeof(u64));
break;
}
if (sig_set) {
bitmap_set(cqs_wait_operation->signaled, i, 1);
if ((cqs_wait_operation->inherit_err_flags & (1U << i)) &&
*evt > 0) {
*(u32 *)evt > 0) {
queue->has_error = true;
}
/* GPUCORE-28172 RDT to review */
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END(
kbdev, queue, *(u32 *)evt);
queue->command_started = false;
}
@@ -1058,6 +1119,7 @@ static int kbase_kcpu_cqs_wait_operation_prepare(struct kbase_kcpu_command_queue
{
struct base_cqs_wait_operation_info *objs;
unsigned int nr_objs = cqs_wait_operation_info->nr_objs;
unsigned int i;
lockdep_assert_held(&queue->lock);
@@ -1077,6 +1139,18 @@ static int kbase_kcpu_cqs_wait_operation_prepare(struct kbase_kcpu_command_queue
return -ENOMEM;
}
/* Check the CQS objects as early as possible. By checking their alignment
* (required alignment equals to size for Sync32 and Sync64 objects), we can
* prevent overrunning the supplied event page.
*/
for (i = 0; i < nr_objs; i++) {
if (!kbase_kcpu_cqs_is_data_type_valid(objs[i].data_type) ||
!kbase_kcpu_cqs_is_aligned(objs[i].addr, objs[i].data_type)) {
kfree(objs);
return -EINVAL;
}
}
if (++queue->cqs_wait_count == 1) {
if (kbase_csf_event_wait_add(queue->kctx,
event_cqs_callback, queue)) {
@@ -1107,6 +1181,44 @@ static int kbase_kcpu_cqs_wait_operation_prepare(struct kbase_kcpu_command_queue
return 0;
}
static void kbasep_kcpu_cqs_do_set_operation_32(struct kbase_kcpu_command_queue *queue,
uintptr_t evt, u8 operation, u64 val)
{
struct kbase_device *kbdev = queue->kctx->kbdev;
switch (operation) {
case BASEP_CQS_SET_OPERATION_ADD:
*(u32 *)evt += (u32)val;
break;
case BASEP_CQS_SET_OPERATION_SET:
*(u32 *)evt = val;
break;
default:
dev_dbg(kbdev->dev, "Unsupported CQS set operation %d", operation);
queue->has_error = true;
break;
}
}
static void kbasep_kcpu_cqs_do_set_operation_64(struct kbase_kcpu_command_queue *queue,
uintptr_t evt, u8 operation, u64 val)
{
struct kbase_device *kbdev = queue->kctx->kbdev;
switch (operation) {
case BASEP_CQS_SET_OPERATION_ADD:
*(u64 *)evt += val;
break;
case BASEP_CQS_SET_OPERATION_SET:
*(u64 *)evt = val;
break;
default:
dev_dbg(kbdev->dev, "Unsupported CQS set operation %d", operation);
queue->has_error = true;
break;
}
}
static void kbase_kcpu_cqs_set_operation_process(
struct kbase_device *kbdev,
struct kbase_kcpu_command_queue *queue,
@@ -1121,51 +1233,42 @@ static void kbase_kcpu_cqs_set_operation_process(
for (i = 0; i < cqs_set_operation->nr_objs; i++) {
struct kbase_vmap_struct *mapping;
u64 *evt;
uintptr_t evt;
evt = (u64 *)kbase_phy_alloc_mapping_get(
evt = (uintptr_t)kbase_phy_alloc_mapping_get(
queue->kctx, cqs_set_operation->objs[i].addr, &mapping);
/* GPUCORE-28172 RDT to review */
if (!evt) {
dev_warn(kbdev->dev,
"Sync memory %llx already freed", cqs_set_operation->objs[i].addr);
queue->has_error = true;
} else {
switch (cqs_set_operation->objs[i].operation) {
case BASEP_CQS_SET_OPERATION_ADD:
*evt += cqs_set_operation->objs[i].val;
break;
case BASEP_CQS_SET_OPERATION_SET:
*evt = cqs_set_operation->objs[i].val;
break;
default:
dev_dbg(kbdev->dev,
"Unsupported CQS set operation %d", cqs_set_operation->objs[i].operation);
queue->has_error = true;
break;
}
struct base_cqs_set_operation_info *obj = &cqs_set_operation->objs[i];
/* Increment evt up to the error_state value depending on the CQS data type */
switch (cqs_set_operation->objs[i].data_type) {
switch (obj->data_type) {
default:
dev_dbg(kbdev->dev, "Unreachable data_type=%d", cqs_set_operation->objs[i].data_type);
/* Fallthrough - hint to compiler that there's really only 2 options at present */
fallthrough;
WARN_ON(!kbase_kcpu_cqs_is_data_type_valid(obj->data_type));
queue->has_error = true;
goto skip_err_propagation;
case BASEP_CQS_DATA_TYPE_U32:
evt = (u64 *)((u8 *)evt + sizeof(u32));
kbasep_kcpu_cqs_do_set_operation_32(queue, evt, obj->operation,
obj->val);
evt += BASEP_EVENT32_ERR_OFFSET - BASEP_EVENT32_VAL_OFFSET;
break;
case BASEP_CQS_DATA_TYPE_U64:
evt = (u64 *)((u8 *)evt + sizeof(u64));
kbasep_kcpu_cqs_do_set_operation_64(queue, evt, obj->operation,
obj->val);
evt += BASEP_EVENT64_ERR_OFFSET - BASEP_EVENT64_VAL_OFFSET;
break;
}
/* GPUCORE-28172 RDT to review */
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION(
kbdev, queue, *(u32 *)evt ? 1 : 0);
/* Always propagate errors */
*evt = queue->has_error;
*(u32 *)evt = queue->has_error;
skip_err_propagation:
kbase_phy_alloc_mapping_put(queue->kctx, mapping);
}
}
@@ -1183,6 +1286,7 @@ static int kbase_kcpu_cqs_set_operation_prepare(
{
struct base_cqs_set_operation_info *objs;
unsigned int nr_objs = cqs_set_operation_info->nr_objs;
unsigned int i;
lockdep_assert_held(&kcpu_queue->lock);
@@ -1202,6 +1306,18 @@ static int kbase_kcpu_cqs_set_operation_prepare(
return -ENOMEM;
}
/* Check the CQS objects as early as possible. By checking their alignment
* (required alignment equals to size for Sync32 and Sync64 objects), we can
* prevent overrunning the supplied event page.
*/
for (i = 0; i < nr_objs; i++) {
if (!kbase_kcpu_cqs_is_data_type_valid(objs[i].data_type) ||
!kbase_kcpu_cqs_is_aligned(objs[i].addr, objs[i].data_type)) {
kfree(objs);
return -EINVAL;
}
}
current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION;
current_command->info.cqs_set_operation.nr_objs = nr_objs;
current_command->info.cqs_set_operation.objs = objs;
@@ -1234,9 +1350,8 @@ static void kbase_csf_fence_wait_callback(struct dma_fence *fence,
queue_work(kcpu_queue->wq, &kcpu_queue->work);
}
static void kbase_kcpu_fence_wait_cancel(
struct kbase_kcpu_command_queue *kcpu_queue,
struct kbase_kcpu_command_fence_info *fence_info)
static void kbasep_kcpu_fence_wait_cancel(struct kbase_kcpu_command_queue *kcpu_queue,
struct kbase_kcpu_command_fence_info *fence_info)
{
struct kbase_context *const kctx = kcpu_queue->kctx;
@@ -1410,15 +1525,14 @@ static int kbase_kcpu_fence_wait_process(
*/
if (fence_status)
kbase_kcpu_fence_wait_cancel(kcpu_queue, fence_info);
kbasep_kcpu_fence_wait_cancel(kcpu_queue, fence_info);
return fence_status;
}
static int kbase_kcpu_fence_wait_prepare(
struct kbase_kcpu_command_queue *kcpu_queue,
struct base_kcpu_command_fence_info *fence_info,
struct kbase_kcpu_command *current_command)
static int kbase_kcpu_fence_wait_prepare(struct kbase_kcpu_command_queue *kcpu_queue,
struct base_kcpu_command_fence_info *fence_info,
struct kbase_kcpu_command *current_command)
{
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
struct fence *fence_in;
@@ -1429,8 +1543,7 @@ static int kbase_kcpu_fence_wait_prepare(
lockdep_assert_held(&kcpu_queue->lock);
if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence),
sizeof(fence)))
if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), sizeof(fence)))
return -ENOMEM;
fence_in = sync_file_get_fence(fence.basep.fd);
@@ -1444,9 +1557,8 @@ static int kbase_kcpu_fence_wait_prepare(
return 0;
}
static int kbase_kcpu_fence_signal_process(
struct kbase_kcpu_command_queue *kcpu_queue,
struct kbase_kcpu_command_fence_info *fence_info)
static int kbasep_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue,
struct kbase_kcpu_command_fence_info *fence_info)
{
struct kbase_context *const kctx = kcpu_queue->kctx;
int ret;
@@ -1467,37 +1579,37 @@ static int kbase_kcpu_fence_signal_process(
fence_info->fence->seqno);
/* dma_fence refcount needs to be decreased to release it. */
dma_fence_put(fence_info->fence);
kbase_fence_put(fence_info->fence);
fence_info->fence = NULL;
return ret;
}
static int kbase_kcpu_fence_signal_prepare(
struct kbase_kcpu_command_queue *kcpu_queue,
struct base_kcpu_command_fence_info *fence_info,
struct kbase_kcpu_command *current_command)
static int kbasep_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue,
struct kbase_kcpu_command *current_command,
struct base_fence *fence, struct sync_file **sync_file,
int *fd)
{
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
struct fence *fence_out;
#else
struct dma_fence *fence_out;
#endif
struct base_fence fence;
struct sync_file *sync_file;
struct kbase_kcpu_dma_fence *kcpu_fence;
int ret = 0;
int fd;
lockdep_assert_held(&kcpu_queue->lock);
if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence),
sizeof(fence)))
return -EFAULT;
fence_out = kzalloc(sizeof(*fence_out), GFP_KERNEL);
if (!fence_out)
kcpu_fence = kzalloc(sizeof(*kcpu_fence), GFP_KERNEL);
if (!kcpu_fence)
return -ENOMEM;
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
fence_out = (struct fence *)kcpu_fence;
#else
fence_out = (struct dma_fence *)kcpu_fence;
#endif
dma_fence_init(fence_out,
&kbase_fence_ops,
&kbase_csf_fence_lock,
@@ -1513,28 +1625,66 @@ static int kbase_kcpu_fence_signal_prepare(
dma_fence_get(fence_out);
#endif
/* Set reference to KCPU metadata and increment refcount */
kcpu_fence->metadata = kcpu_queue->metadata;
WARN_ON(!kbase_refcount_inc_not_zero(&kcpu_fence->metadata->refcount));
/* create a sync_file fd representing the fence */
sync_file = sync_file_create(fence_out);
if (!sync_file) {
*sync_file = sync_file_create(fence_out);
if (!(*sync_file)) {
ret = -ENOMEM;
goto file_create_fail;
}
fd = get_unused_fd_flags(O_CLOEXEC);
if (fd < 0) {
ret = fd;
*fd = get_unused_fd_flags(O_CLOEXEC);
if (*fd < 0) {
ret = *fd;
goto fd_flags_fail;
}
fence.basep.fd = fd;
fence->basep.fd = *fd;
current_command->type = BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL;
current_command->info.fence.fence = fence_out;
return 0;
fd_flags_fail:
fput((*sync_file)->file);
file_create_fail:
/*
* Upon failure, dma_fence refcount that was increased by
* dma_fence_get() or sync_file_create() needs to be decreased
* to release it.
*/
kbase_fence_put(fence_out);
current_command->info.fence.fence = NULL;
return ret;
}
static int kbase_kcpu_fence_signal_prepare(struct kbase_kcpu_command_queue *kcpu_queue,
struct base_kcpu_command_fence_info *fence_info,
struct kbase_kcpu_command *current_command)
{
struct base_fence fence;
struct sync_file *sync_file = NULL;
int fd;
int ret = 0;
lockdep_assert_held(&kcpu_queue->lock);
if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), sizeof(fence)))
return -EFAULT;
ret = kbasep_kcpu_fence_signal_init(kcpu_queue, current_command, &fence, &sync_file, &fd);
if (ret)
return ret;
if (copy_to_user(u64_to_user_ptr(fence_info->fence), &fence,
sizeof(fence))) {
ret = -EFAULT;
goto fd_flags_fail;
goto fail;
}
/* 'sync_file' pointer can't be safely dereferenced once 'fd' is
@@ -1544,21 +1694,34 @@ static int kbase_kcpu_fence_signal_prepare(
fd_install(fd, sync_file->file);
return 0;
fd_flags_fail:
fail:
fput(sync_file->file);
file_create_fail:
/*
* Upon failure, dma_fence refcount that was increased by
* dma_fence_get() or sync_file_create() needs to be decreased
* to release it.
*/
dma_fence_put(fence_out);
kbase_fence_put(current_command->info.fence.fence);
current_command->info.fence.fence = NULL;
kfree(fence_out);
return ret;
}
int kbase_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue,
struct kbase_kcpu_command_fence_info *fence_info)
{
if (!kcpu_queue || !fence_info)
return -EINVAL;
return kbasep_kcpu_fence_signal_process(kcpu_queue, fence_info);
}
KBASE_EXPORT_TEST_API(kbase_kcpu_fence_signal_process);
int kbase_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue,
struct kbase_kcpu_command *current_command,
struct base_fence *fence, struct sync_file **sync_file, int *fd)
{
if (!kcpu_queue || !current_command || !fence || !sync_file || !fd)
return -EINVAL;
return kbasep_kcpu_fence_signal_init(kcpu_queue, current_command, fence, sync_file, fd);
}
KBASE_EXPORT_TEST_API(kbase_kcpu_fence_signal_init);
#endif /* CONFIG_SYNC_FILE */
static void kcpu_queue_process_worker(struct work_struct *data)
@@ -1595,6 +1758,9 @@ static int delete_queue(struct kbase_context *kctx, u32 id)
mutex_lock(&queue->lock);
/* Metadata struct may outlive KCPU queue. */
kbase_kcpu_dma_fence_meta_put(queue->metadata);
/* Drain the remaining work for this queue first and go past
* all the waits.
*/
@@ -1701,8 +1867,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
status = 0;
#if IS_ENABLED(CONFIG_SYNC_FILE)
if (drain_queue) {
kbase_kcpu_fence_wait_cancel(queue,
&cmd->info.fence);
kbasep_kcpu_fence_wait_cancel(queue, &cmd->info.fence);
} else {
status = kbase_kcpu_fence_wait_process(queue,
&cmd->info.fence);
@@ -1732,8 +1897,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
status = 0;
#if IS_ENABLED(CONFIG_SYNC_FILE)
status = kbase_kcpu_fence_signal_process(
queue, &cmd->info.fence);
status = kbasep_kcpu_fence_signal_process(queue, &cmd->info.fence);
if (status < 0)
queue->has_error = true;
@@ -1893,7 +2057,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
break;
}
case BASE_KCPU_COMMAND_TYPE_JIT_FREE:
case BASE_KCPU_COMMAND_TYPE_JIT_FREE: {
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START(kbdev, queue);
status = kbase_kcpu_jit_free_process(queue, cmd);
@@ -1903,6 +2067,8 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END(
kbdev, queue);
break;
}
#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: {
struct kbase_suspend_copy_buffer *sus_buf =
cmd->info.suspend_buf_copy.sus_buf;
@@ -1919,24 +2085,25 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END(
kbdev, queue, status);
}
if (!sus_buf->cpu_alloc) {
int i;
if (!sus_buf->cpu_alloc) {
int i;
for (i = 0; i < sus_buf->nr_pages; i++)
put_page(sus_buf->pages[i]);
} else {
kbase_mem_phy_alloc_kernel_unmapped(
sus_buf->cpu_alloc);
kbase_mem_phy_alloc_put(
sus_buf->cpu_alloc);
}
for (i = 0; i < sus_buf->nr_pages; i++)
put_page(sus_buf->pages[i]);
} else {
kbase_mem_phy_alloc_kernel_unmapped(
sus_buf->cpu_alloc);
kbase_mem_phy_alloc_put(
sus_buf->cpu_alloc);
}
kfree(sus_buf->pages);
kfree(sus_buf);
break;
}
#endif
default:
dev_dbg(kbdev->dev,
"Unrecognized command type");
@@ -2011,12 +2178,29 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
}
case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION:
{
/* GPUCORE-28172 RDT to review */
const struct base_cqs_wait_operation_info *waits =
cmd->info.cqs_wait_operation.objs;
u32 inherit_err_flags = cmd->info.cqs_wait_operation.inherit_err_flags;
unsigned int i;
for (i = 0; i < cmd->info.cqs_wait_operation.nr_objs; i++) {
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION(
kbdev, queue, waits[i].addr, waits[i].val,
waits[i].operation, waits[i].data_type,
(inherit_err_flags & ((uint32_t)1 << i)) ? 1 : 0);
}
break;
}
case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION:
{
/* GPUCORE-28172 RDT to review */
const struct base_cqs_set_operation_info *sets = cmd->info.cqs_set_operation.objs;
unsigned int i;
for (i = 0; i < cmd->info.cqs_set_operation.nr_objs; i++) {
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION(
kbdev, queue, sets[i].addr, sets[i].val,
sets[i].operation, sets[i].data_type);
}
break;
}
case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER:
@@ -2063,11 +2247,13 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE(kbdev, queue);
break;
}
#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND:
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND(
kbdev, queue, cmd->info.suspend_buf_copy.sus_buf,
cmd->info.suspend_buf_copy.group_handle);
break;
#endif
default:
dev_dbg(kbdev->dev, "Unknown command type %u", cmd->type);
break;
@@ -2103,14 +2289,30 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
return -EINVAL;
}
/* There might be a race between one thread trying to enqueue commands to the queue
* and other thread trying to delete the same queue.
* This racing could lead to use-after-free problem by enqueuing thread if
* resources for the queue has already been freed by deleting thread.
*
* To prevent the issue, two mutexes are acquired/release asymmetrically as follows.
*
* Lock A (kctx mutex)
* Lock B (queue mutex)
* Unlock A
* Unlock B
*
* With the kctx mutex being held, enqueuing thread will check the queue
* and will return error code if the queue had already been deleted.
*/
mutex_lock(&kctx->csf.kcpu_queues.lock);
queue = kctx->csf.kcpu_queues.array[enq->id];
mutex_unlock(&kctx->csf.kcpu_queues.lock);
if (queue == NULL)
if (queue == NULL) {
dev_dbg(kctx->kbdev->dev, "Invalid KCPU queue (id:%u)", enq->id);
mutex_unlock(&kctx->csf.kcpu_queues.lock);
return -EINVAL;
}
mutex_lock(&queue->lock);
mutex_unlock(&kctx->csf.kcpu_queues.lock);
if (kcpu_queue_get_space(queue) < enq->nr_commands) {
ret = -EBUSY;
@@ -2208,11 +2410,13 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
ret = kbase_kcpu_jit_free_prepare(queue,
&command.info.jit_free, kcpu_cmd);
break;
#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND:
ret = kbase_csf_queue_group_suspend_prepare(queue,
&command.info.suspend_buf_copy,
kcpu_cmd);
break;
#endif
default:
dev_dbg(queue->kctx->kbdev->dev,
"Unknown command type %u", command.type);
@@ -2275,6 +2479,7 @@ void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx)
mutex_destroy(&kctx->csf.kcpu_queues.lock);
}
KBASE_EXPORT_TEST_API(kbase_csf_kcpu_queue_context_term);
int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx,
struct kbase_ioctl_kcpu_queue_delete *del)
@@ -2287,8 +2492,11 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
{
struct kbase_kcpu_command_queue *queue;
int idx;
int n;
int ret = 0;
#if IS_ENABLED(CONFIG_SYNC_FILE)
struct kbase_kcpu_dma_fence_meta *metadata;
#endif
/* The queue id is of u8 type and we use the index of the kcpu_queues
* array as an id, so the number of elements in the array can't be
* more than 256.
@@ -2334,7 +2542,31 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
queue->fence_context = dma_fence_context_alloc(1);
queue->fence_seqno = 0;
queue->fence_wait_processed = false;
#endif
metadata = kzalloc(sizeof(*metadata), GFP_KERNEL);
if (!metadata) {
destroy_workqueue(queue->wq);
kfree(queue);
ret = -ENOMEM;
goto out;
}
metadata->kbdev = kctx->kbdev;
metadata->kctx_id = kctx->id;
n = snprintf(metadata->timeline_name, MAX_TIMELINE_NAME, "%d-%d_%d-%lld-kcpu",
kctx->kbdev->id, kctx->tgid, kctx->id, queue->fence_context);
if (WARN_ON(n >= MAX_TIMELINE_NAME)) {
destroy_workqueue(queue->wq);
kfree(queue);
kfree(metadata);
ret = -EINVAL;
goto out;
}
kbase_refcount_set(&metadata->refcount, 1);
queue->metadata = metadata;
atomic_inc(&kctx->kbdev->live_fence_metadata);
#endif /* CONFIG_SYNC_FILE */
queue->enqueue_failed = false;
queue->command_started = false;
INIT_LIST_HEAD(&queue->jit_blocked);
@@ -2360,3 +2592,4 @@ out:
return ret;
}
KBASE_EXPORT_TEST_API(kbase_csf_kcpu_queue_new);

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,9 @@
#ifndef _KBASE_CSF_KCPU_H_
#define _KBASE_CSF_KCPU_H_
#include <mali_kbase_fence.h>
#include <mali_kbase_sync.h>
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
#include <linux/fence.h>
#else
@@ -44,8 +47,8 @@ struct kbase_kcpu_command_import_info {
};
/**
* struct kbase_kcpu_command_fence_info - Structure which holds information
* about the fence object enqueued in the kcpu command queue
* struct kbase_kcpu_command_fence_info - Structure which holds information about the
* fence object enqueued in the kcpu command queue
*
* @fence_cb: Fence callback
* @fence: Fence
@@ -183,6 +186,7 @@ struct kbase_suspend_copy_buffer {
struct kbase_mem_phy_alloc *cpu_alloc;
};
#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
/**
* struct kbase_kcpu_command_group_suspend_info - structure which contains
* suspend buffer data captured for a suspended queue group.
@@ -195,6 +199,7 @@ struct kbase_kcpu_command_group_suspend_info {
struct kbase_suspend_copy_buffer *sus_buf;
u8 group_handle;
};
#endif
/**
@@ -229,7 +234,9 @@ struct kbase_kcpu_command {
struct kbase_kcpu_command_import_info import;
struct kbase_kcpu_command_jit_alloc_info jit_alloc;
struct kbase_kcpu_command_jit_free_info jit_free;
#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
struct kbase_kcpu_command_group_suspend_info suspend_buf_copy;
#endif
} info;
};
@@ -274,6 +281,8 @@ struct kbase_kcpu_command {
* @jit_blocked: Used to keep track of command queues blocked
* by a pending JIT allocation command.
* @fence_timeout: Timer used to detect the fence wait timeout.
* @metadata: Metadata structure containing basic information about
* this queue for any fence objects associated with this queue.
*/
struct kbase_kcpu_command_queue {
struct mutex lock;
@@ -295,6 +304,9 @@ struct kbase_kcpu_command_queue {
#ifdef CONFIG_MALI_FENCE_DEBUG
struct timer_list fence_timeout;
#endif /* CONFIG_MALI_FENCE_DEBUG */
#if IS_ENABLED(CONFIG_SYNC_FILE)
struct kbase_kcpu_dma_fence_meta *metadata;
#endif /* CONFIG_SYNC_FILE */
};
/**
@@ -359,4 +371,14 @@ int kbase_csf_kcpu_queue_context_init(struct kbase_context *kctx);
*/
void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx);
#if IS_ENABLED(CONFIG_SYNC_FILE)
/* Test wrappers for dma fence operations. */
int kbase_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue,
struct kbase_kcpu_command_fence_info *fence_info);
int kbase_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue,
struct kbase_kcpu_command *current_command,
struct base_fence *fence, struct sync_file **sync_file, int *fd);
#endif /* CONFIG_SYNC_FILE */
#endif /* _KBASE_CSF_KCPU_H_ */

View File

@@ -0,0 +1,817 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU license.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
*/
#include <linux/protected_memory_allocator.h>
#include <mali_kbase.h>
#include "mali_kbase_csf.h"
#include "mali_kbase_csf_mcu_shared_reg.h"
#include <mali_kbase_mem_migrate.h>
/* Scaling factor in pre-allocating shared regions for suspend bufs and userios */
#define MCU_SHARED_REGS_PREALLOCATE_SCALE (8)
/* MCU shared region map attempt limit */
#define MCU_SHARED_REGS_BIND_ATTEMPT_LIMIT (4)
/* Convert a VPFN to its start addr */
#define GET_VPFN_VA(vpfn) ((vpfn) << PAGE_SHIFT)
/* Macros for extract the corresponding VPFNs from a CSG_REG */
#define CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages) (reg->start_pfn)
#define CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages) (reg->start_pfn + nr_susp_pages)
#define CSG_REG_USERIO_VPFN(reg, csi, nr_susp_pages) (reg->start_pfn + 2 * (nr_susp_pages + csi))
/* MCU shared segment dummy page mapping flags */
#define DUMMY_PAGE_MAP_FLAGS (KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT) | KBASE_REG_GPU_NX)
/* MCU shared segment suspend buffer mapping flags */
#define SUSP_PAGE_MAP_FLAGS \
(KBASE_REG_GPU_RD | KBASE_REG_GPU_WR | KBASE_REG_GPU_NX | \
KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT))
/**
* struct kbase_csg_shared_region - Wrapper object for use with a CSG on runtime
* resources for suspend buffer pages, userio pages
* and their corresponding mapping GPU VA addresses
* from the MCU shared interface segment
*
* @link: Link to the managing list for the wrapper object.
* @reg: pointer to the region allocated from the shared interface segment, which
* covers the normal/P-mode suspend buffers, userio pages of the queues
* @grp: Pointer to the bound kbase_queue_group, or NULL if no binding (free).
* @pmode_mapped: Boolean for indicating the region has MMU mapped with the bound group's
* protected mode suspend buffer pages.
*/
struct kbase_csg_shared_region {
struct list_head link;
struct kbase_va_region *reg;
struct kbase_queue_group *grp;
bool pmode_mapped;
};
static unsigned long get_userio_mmu_flags(struct kbase_device *kbdev)
{
unsigned long userio_map_flags;
if (kbdev->system_coherency == COHERENCY_NONE)
userio_map_flags =
KBASE_REG_GPU_RD | KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
else
userio_map_flags = KBASE_REG_GPU_RD | KBASE_REG_SHARE_BOTH |
KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED);
return (userio_map_flags | KBASE_REG_GPU_NX);
}
static void set_page_meta_status_not_movable(struct tagged_addr phy)
{
if (kbase_page_migration_enabled) {
struct kbase_page_metadata *page_md = kbase_page_private(as_page(phy));
if (page_md) {
spin_lock(&page_md->migrate_lock);
page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
spin_unlock(&page_md->migrate_lock);
}
}
}
static struct kbase_csg_shared_region *get_group_bound_csg_reg(struct kbase_queue_group *group)
{
return (struct kbase_csg_shared_region *)group->csg_reg;
}
static inline int update_mapping_with_dummy_pages(struct kbase_device *kbdev, u64 vpfn,
u32 nr_pages)
{
struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
const unsigned long mem_flags = DUMMY_PAGE_MAP_FLAGS;
return kbase_mmu_update_csf_mcu_pages(kbdev, vpfn, shared_regs->dummy_phys, nr_pages,
mem_flags, KBASE_MEM_GROUP_CSF_FW);
}
static inline int insert_dummy_pages(struct kbase_device *kbdev, u64 vpfn, u32 nr_pages)
{
struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
const unsigned long mem_flags = DUMMY_PAGE_MAP_FLAGS;
const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
return kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
nr_pages, mem_flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW,
mmu_sync_info, NULL, false);
}
/* Reset consecutive retry count to zero */
static void notify_group_csg_reg_map_done(struct kbase_queue_group *group)
{
lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock);
/* Just clear the internal map retry count */
group->csg_reg_bind_retries = 0;
}
/* Return true if a fatal group error has already been triggered */
static bool notify_group_csg_reg_map_error(struct kbase_queue_group *group)
{
struct kbase_device *kbdev = group->kctx->kbdev;
lockdep_assert_held(&kbdev->csf.scheduler.lock);
if (group->csg_reg_bind_retries < U8_MAX)
group->csg_reg_bind_retries++;
/* Allow only one fatal error notification */
if (group->csg_reg_bind_retries == MCU_SHARED_REGS_BIND_ATTEMPT_LIMIT) {
struct base_gpu_queue_group_error const err_payload = {
.error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL,
.payload = { .fatal_group = { .status = GPU_EXCEPTION_TYPE_SW_FAULT_0 } }
};
dev_err(kbdev->dev, "Fatal: group_%d_%d_%d exceeded shared region map retry limit",
group->kctx->tgid, group->kctx->id, group->handle);
kbase_csf_add_group_fatal_error(group, &err_payload);
kbase_event_wakeup(group->kctx);
}
return group->csg_reg_bind_retries >= MCU_SHARED_REGS_BIND_ATTEMPT_LIMIT;
}
/* Replace the given phys at vpfn (reflecting a queue's userio_pages) mapping.
* If phys is NULL, the internal dummy_phys is used, which effectively
* restores back to the initialized state for the given queue's userio_pages
* (i.e. mapped to the default dummy page).
* In case of CSF mmu update error on a queue, the dummy phy is used to restore
* back the default 'unbound' (i.e. mapped to dummy) condition.
*
* It's the caller's responsibility to ensure that the given vpfn is extracted
* correctly from a CSG_REG object, for example, using CSG_REG_USERIO_VPFN().
*/
static int userio_pages_replace_phys(struct kbase_device *kbdev, u64 vpfn, struct tagged_addr *phys)
{
struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
int err = 0, err1;
lockdep_assert_held(&kbdev->csf.scheduler.lock);
if (phys) {
unsigned long mem_flags_input = shared_regs->userio_mem_rd_flags;
unsigned long mem_flags_output = mem_flags_input | KBASE_REG_GPU_WR;
/* Dealing with a queue's INPUT page */
err = kbase_mmu_update_csf_mcu_pages(kbdev, vpfn, &phys[0], 1, mem_flags_input,
KBASE_MEM_GROUP_CSF_IO);
/* Dealing with a queue's OUTPUT page */
err1 = kbase_mmu_update_csf_mcu_pages(kbdev, vpfn + 1, &phys[1], 1,
mem_flags_output, KBASE_MEM_GROUP_CSF_IO);
if (unlikely(err1))
err = err1;
}
if (unlikely(err) || !phys) {
/* Restore back to dummy_userio_phy */
update_mapping_with_dummy_pages(kbdev, vpfn, KBASEP_NUM_CS_USER_IO_PAGES);
}
return err;
}
/* Update a group's queues' mappings for a group with its runtime bound group region */
static int csg_reg_update_on_csis(struct kbase_device *kbdev, struct kbase_queue_group *group,
struct kbase_queue_group *prev_grp)
{
struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group);
const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
const u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num;
struct tagged_addr *phy;
int err = 0, err1;
u32 i;
lockdep_assert_held(&kbdev->csf.scheduler.lock);
if (WARN_ONCE(!csg_reg, "Update_userio pages: group has no bound csg_reg"))
return -EINVAL;
for (i = 0; i < nr_csis; i++) {
struct kbase_queue *queue = group->bound_queues[i];
struct kbase_queue *prev_queue = prev_grp ? prev_grp->bound_queues[i] : NULL;
/* Set the phy if the group's queue[i] needs mapping, otherwise NULL */
phy = (queue && queue->enabled && !queue->user_io_gpu_va) ? queue->phys : NULL;
/* Either phy is valid, or this update is for a transition change from
* prev_group, and the prev_queue was mapped, so an update is required.
*/
if (phy || (prev_queue && prev_queue->user_io_gpu_va)) {
u64 vpfn = CSG_REG_USERIO_VPFN(csg_reg->reg, i, nr_susp_pages);
err1 = userio_pages_replace_phys(kbdev, vpfn, phy);
if (unlikely(err1)) {
dev_warn(kbdev->dev,
"%s: Error in update queue-%d mapping for csg_%d_%d_%d",
__func__, i, group->kctx->tgid, group->kctx->id,
group->handle);
err = err1;
} else if (phy)
queue->user_io_gpu_va = GET_VPFN_VA(vpfn);
/* Mark prev_group's queue has lost its mapping */
if (prev_queue)
prev_queue->user_io_gpu_va = 0;
}
}
return err;
}
/* Bind a group to a given csg_reg, any previous mappings with the csg_reg are replaced
* with the given group's phy pages, or, if no replacement, the default dummy pages.
* Note, the csg_reg's fields are in transition step-by-step from the prev_grp to its
* new binding owner in this function. At the end, the prev_grp would be completely
* detached away from the previously bound csg_reg.
*/
static int group_bind_csg_reg(struct kbase_device *kbdev, struct kbase_queue_group *group,
struct kbase_csg_shared_region *csg_reg)
{
const unsigned long mem_flags = SUSP_PAGE_MAP_FLAGS;
const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
struct kbase_queue_group *prev_grp = csg_reg->grp;
struct kbase_va_region *reg = csg_reg->reg;
struct tagged_addr *phy;
int err = 0, err1;
lockdep_assert_held(&kbdev->csf.scheduler.lock);
/* The csg_reg is expected still on the unused list so its link is not empty */
if (WARN_ON_ONCE(list_empty(&csg_reg->link))) {
dev_dbg(kbdev->dev, "csg_reg is marked in active use");
return -EINVAL;
}
if (WARN_ON_ONCE(prev_grp && prev_grp->csg_reg != csg_reg)) {
dev_dbg(kbdev->dev, "Unexpected bound lost on prev_group");
prev_grp->csg_reg = NULL;
return -EINVAL;
}
/* Replacing the csg_reg bound group to the newly given one */
csg_reg->grp = group;
group->csg_reg = csg_reg;
/* Resolving mappings, deal with protected mode first */
if (group->protected_suspend_buf.pma) {
/* We are binding a new group with P-mode ready, the prev_grp's P-mode mapping
* status is now stale during this transition of ownership. For the new owner,
* its mapping would have been updated away when it lost its binding previously.
* So it needs an update to this pma map. By clearing here the mapped flag
* ensures it reflects the new owner's condition.
*/
csg_reg->pmode_mapped = false;
err = kbase_csf_mcu_shared_group_update_pmode_map(kbdev, group);
} else if (csg_reg->pmode_mapped) {
/* Need to unmap the previous one, use the dummy pages */
err = update_mapping_with_dummy_pages(
kbdev, CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages), nr_susp_pages);
if (unlikely(err))
dev_warn(kbdev->dev, "%s: Failed to update P-mode dummy for csg_%d_%d_%d",
__func__, group->kctx->tgid, group->kctx->id, group->handle);
csg_reg->pmode_mapped = false;
}
/* Unlike the normal suspend buf, the mapping of the protected mode suspend buffer is
* actually reflected by a specific mapped flag (due to phys[] is only allocated on
* in-need basis). So the GPU_VA is always updated to the bound region's corresponding
* VA, as a reflection of the binding to the csg_reg.
*/
group->protected_suspend_buf.gpu_va =
GET_VPFN_VA(CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages));
/* Deal with normal mode suspend buffer */
phy = group->normal_suspend_buf.phy;
err1 = kbase_mmu_update_csf_mcu_pages(kbdev, CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages), phy,
nr_susp_pages, mem_flags, KBASE_MEM_GROUP_CSF_FW);
if (unlikely(err1)) {
dev_warn(kbdev->dev, "%s: Failed to update suspend buffer for csg_%d_%d_%d",
__func__, group->kctx->tgid, group->kctx->id, group->handle);
/* Attempt a restore to default dummy for removing previous mapping */
if (prev_grp)
update_mapping_with_dummy_pages(
kbdev, CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages), nr_susp_pages);
err = err1;
/* Marking the normal suspend buffer is not mapped (due to error) */
group->normal_suspend_buf.gpu_va = 0;
} else {
/* Marking the normal suspend buffer is actually mapped */
group->normal_suspend_buf.gpu_va =
GET_VPFN_VA(CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages));
}
/* Deal with queue uerio_pages */
err1 = csg_reg_update_on_csis(kbdev, group, prev_grp);
if (likely(!err1))
err = err1;
/* Reset the previous group's suspend buffers' GPU_VAs as it has lost its bound */
if (prev_grp) {
prev_grp->normal_suspend_buf.gpu_va = 0;
prev_grp->protected_suspend_buf.gpu_va = 0;
prev_grp->csg_reg = NULL;
}
return err;
}
/* Notify the group is placed on-slot, hence the bound csg_reg is active in use */
void kbase_csf_mcu_shared_set_group_csg_reg_active(struct kbase_device *kbdev,
struct kbase_queue_group *group)
{
struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group);
lockdep_assert_held(&kbdev->csf.scheduler.lock);
if (WARN_ONCE(!csg_reg || csg_reg->grp != group, "Group_%d_%d_%d has no csg_reg bounding",
group->kctx->tgid, group->kctx->id, group->handle))
return;
/* By dropping out the csg_reg from the unused list, it becomes active and is tracked
* by its bound group that is on-slot. The design is that, when this on-slot group is
* moved to off-slot, the scheduler slot-clean up will add it back to the tail of the
* unused list.
*/
if (!WARN_ON_ONCE(list_empty(&csg_reg->link)))
list_del_init(&csg_reg->link);
}
/* Notify the group is placed off-slot, hence the bound csg_reg is not in active use
* anymore. Existing bounding/mappings are left untouched. These would only be dealt with
* if the bound csg_reg is to be reused with another group.
*/
void kbase_csf_mcu_shared_set_group_csg_reg_unused(struct kbase_device *kbdev,
struct kbase_queue_group *group)
{
struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group);
struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
lockdep_assert_held(&kbdev->csf.scheduler.lock);
if (WARN_ONCE(!csg_reg || csg_reg->grp != group, "Group_%d_%d_%d has no csg_reg bound",
group->kctx->tgid, group->kctx->id, group->handle))
return;
/* By adding back the csg_reg to the unused list, it becomes available for another
* group to break its existing binding and set up a new one.
*/
if (!list_empty(&csg_reg->link)) {
WARN_ONCE(group->csg_nr >= 0, "Group is assumed vacated from slot");
list_move_tail(&csg_reg->link, &shared_regs->unused_csg_regs);
} else
list_add_tail(&csg_reg->link, &shared_regs->unused_csg_regs);
}
/* Adding a new queue to an existing on-slot group */
int kbase_csf_mcu_shared_add_queue(struct kbase_device *kbdev, struct kbase_queue *queue)
{
struct kbase_queue_group *group = queue->group;
struct kbase_csg_shared_region *csg_reg;
const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
u64 vpfn;
int err;
lockdep_assert_held(&kbdev->csf.scheduler.lock);
if (WARN_ONCE(!group || group->csg_nr < 0, "No bound group, or group is not on-slot"))
return -EIO;
csg_reg = get_group_bound_csg_reg(group);
if (WARN_ONCE(!csg_reg || !list_empty(&csg_reg->link),
"No bound csg_reg, or in wrong state"))
return -EIO;
vpfn = CSG_REG_USERIO_VPFN(csg_reg->reg, queue->csi_index, nr_susp_pages);
err = userio_pages_replace_phys(kbdev, vpfn, queue->phys);
if (likely(!err)) {
/* Mark the queue has been successfully mapped */
queue->user_io_gpu_va = GET_VPFN_VA(vpfn);
} else {
/* Mark the queue has no mapping on its phys[] */
queue->user_io_gpu_va = 0;
dev_dbg(kbdev->dev,
"%s: Error in mapping userio pages for queue-%d of csg_%d_%d_%d", __func__,
queue->csi_index, group->kctx->tgid, group->kctx->id, group->handle);
/* notify the error for the bound group */
if (notify_group_csg_reg_map_error(group))
err = -EIO;
}
return err;
}
/* Unmap a given queue's userio pages, when the queue is deleted */
void kbase_csf_mcu_shared_drop_stopped_queue(struct kbase_device *kbdev, struct kbase_queue *queue)
{
struct kbase_queue_group *group;
struct kbase_csg_shared_region *csg_reg;
const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
u64 vpfn;
lockdep_assert_held(&kbdev->csf.scheduler.lock);
/* The queue has no existing mapping, nothing to do */
if (!queue || !queue->user_io_gpu_va)
return;
group = queue->group;
if (WARN_ONCE(!group || !group->csg_reg, "Queue/Group has no bound region"))
return;
csg_reg = get_group_bound_csg_reg(group);
vpfn = CSG_REG_USERIO_VPFN(csg_reg->reg, queue->csi_index, nr_susp_pages);
WARN_ONCE(userio_pages_replace_phys(kbdev, vpfn, NULL),
"Unexpected restoring to dummy map update error");
queue->user_io_gpu_va = 0;
}
int kbase_csf_mcu_shared_group_update_pmode_map(struct kbase_device *kbdev,
struct kbase_queue_group *group)
{
struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group);
const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
int err = 0, err1;
lockdep_assert_held(&kbdev->csf.scheduler.lock);
if (WARN_ONCE(!csg_reg, "Update_pmode_map: the bound csg_reg can't be NULL"))
return -EINVAL;
/* If the pmode already mapped, nothing to do */
if (csg_reg->pmode_mapped)
return 0;
/* P-mode map not in place and the group has allocated P-mode pages, map it */
if (group->protected_suspend_buf.pma) {
unsigned long mem_flags = SUSP_PAGE_MAP_FLAGS;
struct tagged_addr *phy = shared_regs->pma_phys;
struct kbase_va_region *reg = csg_reg->reg;
u64 vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages);
u32 i;
/* Populate the protected phys from pma to phy[] */
for (i = 0; i < nr_susp_pages; i++)
phy[i] = as_tagged(group->protected_suspend_buf.pma[i]->pa);
/* Add the P-mode suspend buffer mapping */
err = kbase_mmu_update_csf_mcu_pages(kbdev, vpfn, phy, nr_susp_pages, mem_flags,
KBASE_MEM_GROUP_CSF_FW);
/* If error, restore to default dummpy */
if (unlikely(err)) {
err1 = update_mapping_with_dummy_pages(kbdev, vpfn, nr_susp_pages);
if (unlikely(err1))
dev_warn(
kbdev->dev,
"%s: Failed in recovering to P-mode dummy for csg_%d_%d_%d",
__func__, group->kctx->tgid, group->kctx->id,
group->handle);
csg_reg->pmode_mapped = false;
} else
csg_reg->pmode_mapped = true;
}
return err;
}
void kbase_csf_mcu_shared_clear_evicted_group_csg_reg(struct kbase_device *kbdev,
struct kbase_queue_group *group)
{
struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group);
struct kbase_va_region *reg;
const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num;
int err = 0;
u32 i;
lockdep_assert_held(&kbdev->csf.scheduler.lock);
/* Nothing to do for clearing up if no bound csg_reg */
if (!csg_reg)
return;
reg = csg_reg->reg;
/* Restore mappings default dummy pages for any mapped pages */
if (csg_reg->pmode_mapped) {
err = update_mapping_with_dummy_pages(
kbdev, CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages), nr_susp_pages);
WARN_ONCE(unlikely(err), "Restore dummy failed for clearing pmod buffer mapping");
csg_reg->pmode_mapped = false;
}
if (group->normal_suspend_buf.gpu_va) {
err = update_mapping_with_dummy_pages(
kbdev, CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages), nr_susp_pages);
WARN_ONCE(err, "Restore dummy failed for clearing suspend buffer mapping");
}
/* Deal with queue uerio pages */
for (i = 0; i < nr_csis; i++)
kbase_csf_mcu_shared_drop_stopped_queue(kbdev, group->bound_queues[i]);
group->normal_suspend_buf.gpu_va = 0;
group->protected_suspend_buf.gpu_va = 0;
/* Break the binding */
group->csg_reg = NULL;
csg_reg->grp = NULL;
/* Put the csg_reg to the front of the unused list */
if (WARN_ON_ONCE(list_empty(&csg_reg->link)))
list_add(&csg_reg->link, &shared_regs->unused_csg_regs);
else
list_move(&csg_reg->link, &shared_regs->unused_csg_regs);
}
int kbase_csf_mcu_shared_group_bind_csg_reg(struct kbase_device *kbdev,
struct kbase_queue_group *group)
{
struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
struct kbase_csg_shared_region *csg_reg;
int err;
lockdep_assert_held(&kbdev->csf.scheduler.lock);
csg_reg = get_group_bound_csg_reg(group);
if (!csg_reg)
csg_reg = list_first_entry_or_null(&shared_regs->unused_csg_regs,
struct kbase_csg_shared_region, link);
if (!WARN_ON_ONCE(!csg_reg)) {
struct kbase_queue_group *prev_grp = csg_reg->grp;
/* Deal with the previous binding and lazy unmap, i.e if the previous mapping not
* the required one, unmap it.
*/
if (prev_grp == group) {
/* Update existing bindings, if there have been some changes */
err = kbase_csf_mcu_shared_group_update_pmode_map(kbdev, group);
if (likely(!err))
err = csg_reg_update_on_csis(kbdev, group, NULL);
} else
err = group_bind_csg_reg(kbdev, group, csg_reg);
} else {
/* This should not have been possible if the code operates rightly */
dev_err(kbdev->dev, "%s: Unexpected NULL csg_reg for group %d of context %d_%d",
__func__, group->handle, group->kctx->tgid, group->kctx->id);
return -EIO;
}
if (likely(!err))
notify_group_csg_reg_map_done(group);
else
notify_group_csg_reg_map_error(group);
return err;
}
static int shared_mcu_csg_reg_init(struct kbase_device *kbdev,
struct kbase_csg_shared_region *csg_reg)
{
struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num;
const size_t nr_csg_reg_pages = 2 * (nr_susp_pages + nr_csis);
struct kbase_va_region *reg;
u64 vpfn;
int err, i;
INIT_LIST_HEAD(&csg_reg->link);
reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, nr_csg_reg_pages,
KBASE_REG_ZONE_MCU_SHARED);
if (!reg) {
dev_err(kbdev->dev, "%s: Failed to allocate a MCU shared region for %zu pages\n",
__func__, nr_csg_reg_pages);
return -ENOMEM;
}
/* Insert the region into rbtree, so it becomes ready to use */
mutex_lock(&kbdev->csf.reg_lock);
err = kbase_add_va_region_rbtree(kbdev, reg, 0, nr_csg_reg_pages, 1);
reg->flags &= ~KBASE_REG_FREE;
mutex_unlock(&kbdev->csf.reg_lock);
if (err) {
kfree(reg);
dev_err(kbdev->dev, "%s: Failed to add a region of %zu pages into rbtree", __func__,
nr_csg_reg_pages);
return err;
}
/* Initialize the mappings so MMU only need to update the the corresponding
* mapped phy-pages at runtime.
* Map the normal suspend buffer pages to the prepared dummy phys[].
*/
vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages);
err = insert_dummy_pages(kbdev, vpfn, nr_susp_pages);
if (unlikely(err))
goto fail_susp_map_fail;
/* Map the protected suspend buffer pages to the prepared dummy phys[] */
vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages);
err = insert_dummy_pages(kbdev, vpfn, nr_susp_pages);
if (unlikely(err))
goto fail_pmod_map_fail;
for (i = 0; i < nr_csis; i++) {
vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages);
err = insert_dummy_pages(kbdev, vpfn, KBASEP_NUM_CS_USER_IO_PAGES);
if (unlikely(err))
goto fail_userio_pages_map_fail;
}
/* Replace the previous NULL-valued field with the successully initialized reg */
csg_reg->reg = reg;
return 0;
fail_userio_pages_map_fail:
while (i-- > 0) {
vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages);
kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
KBASEP_NUM_CS_USER_IO_PAGES, KBASEP_NUM_CS_USER_IO_PAGES,
MCU_AS_NR, true);
}
vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages);
kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
nr_susp_pages, nr_susp_pages, MCU_AS_NR, true);
fail_pmod_map_fail:
vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages);
kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
nr_susp_pages, nr_susp_pages, MCU_AS_NR, true);
fail_susp_map_fail:
mutex_lock(&kbdev->csf.reg_lock);
kbase_remove_va_region(kbdev, reg);
mutex_unlock(&kbdev->csf.reg_lock);
kfree(reg);
return err;
}
/* Note, this helper can only be called on scheduler shutdown */
static void shared_mcu_csg_reg_term(struct kbase_device *kbdev,
struct kbase_csg_shared_region *csg_reg)
{
struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
struct kbase_va_region *reg = csg_reg->reg;
const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
const u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num;
u64 vpfn;
int i;
for (i = 0; i < nr_csis; i++) {
vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages);
kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
KBASEP_NUM_CS_USER_IO_PAGES, KBASEP_NUM_CS_USER_IO_PAGES,
MCU_AS_NR, true);
}
vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages);
kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
nr_susp_pages, nr_susp_pages, MCU_AS_NR, true);
vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages);
kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
nr_susp_pages, nr_susp_pages, MCU_AS_NR, true);
mutex_lock(&kbdev->csf.reg_lock);
kbase_remove_va_region(kbdev, reg);
mutex_unlock(&kbdev->csf.reg_lock);
kfree(reg);
}
int kbase_csf_mcu_shared_regs_data_init(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
struct kbase_csf_mcu_shared_regions *shared_regs = &scheduler->mcu_regs_data;
struct kbase_csg_shared_region *array_csg_regs;
const size_t nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
const u32 nr_groups = kbdev->csf.global_iface.group_num;
const u32 nr_csg_regs = MCU_SHARED_REGS_PREALLOCATE_SCALE * nr_groups;
const u32 nr_dummy_phys = MAX(nr_susp_pages, KBASEP_NUM_CS_USER_IO_PAGES);
u32 i;
int err;
shared_regs->userio_mem_rd_flags = get_userio_mmu_flags(kbdev);
INIT_LIST_HEAD(&shared_regs->unused_csg_regs);
shared_regs->dummy_phys =
kcalloc(nr_dummy_phys, sizeof(*shared_regs->dummy_phys), GFP_KERNEL);
if (!shared_regs->dummy_phys)
return -ENOMEM;
if (kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1,
&shared_regs->dummy_phys[0], false, NULL) <= 0)
return -ENOMEM;
shared_regs->dummy_phys_allocated = true;
set_page_meta_status_not_movable(shared_regs->dummy_phys[0]);
/* Replicate the allocated single shared_regs->dummy_phys[0] to the full array */
for (i = 1; i < nr_dummy_phys; i++)
shared_regs->dummy_phys[i] = shared_regs->dummy_phys[0];
shared_regs->pma_phys = kcalloc(nr_susp_pages, sizeof(*shared_regs->pma_phys), GFP_KERNEL);
if (!shared_regs->pma_phys)
return -ENOMEM;
array_csg_regs = kcalloc(nr_csg_regs, sizeof(*array_csg_regs), GFP_KERNEL);
if (!array_csg_regs)
return -ENOMEM;
shared_regs->array_csg_regs = array_csg_regs;
/* All fields in scheduler->mcu_regs_data except the shared_regs->array_csg_regs
* are properly populated and ready to use. Now initialize the items in
* shared_regs->array_csg_regs[]
*/
for (i = 0; i < nr_csg_regs; i++) {
err = shared_mcu_csg_reg_init(kbdev, &array_csg_regs[i]);
if (err)
return err;
list_add_tail(&array_csg_regs[i].link, &shared_regs->unused_csg_regs);
}
return 0;
}
void kbase_csf_mcu_shared_regs_data_term(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
struct kbase_csf_mcu_shared_regions *shared_regs = &scheduler->mcu_regs_data;
struct kbase_csg_shared_region *array_csg_regs =
(struct kbase_csg_shared_region *)shared_regs->array_csg_regs;
const u32 nr_groups = kbdev->csf.global_iface.group_num;
const u32 nr_csg_regs = MCU_SHARED_REGS_PREALLOCATE_SCALE * nr_groups;
if (array_csg_regs) {
struct kbase_csg_shared_region *csg_reg;
u32 i, cnt_csg_regs = 0;
for (i = 0; i < nr_csg_regs; i++) {
csg_reg = &array_csg_regs[i];
/* There should not be any group mapping bindings */
WARN_ONCE(csg_reg->grp, "csg_reg has a bound group");
if (csg_reg->reg) {
shared_mcu_csg_reg_term(kbdev, csg_reg);
cnt_csg_regs++;
}
}
/* The nr_susp_regs counts should match the array_csg_regs' length */
list_for_each_entry(csg_reg, &shared_regs->unused_csg_regs, link)
cnt_csg_regs--;
WARN_ONCE(cnt_csg_regs, "Unmatched counts of susp_regs");
kfree(shared_regs->array_csg_regs);
}
if (shared_regs->dummy_phys_allocated) {
struct page *page = as_page(shared_regs->dummy_phys[0]);
kbase_mem_pool_free(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, false);
}
kfree(shared_regs->dummy_phys);
kfree(shared_regs->pma_phys);
}

View File

@@ -0,0 +1,139 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU license.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
*/
#ifndef _KBASE_CSF_MCU_SHARED_REG_H_
#define _KBASE_CSF_MCU_SHARED_REG_H_
/**
* kbase_csf_mcu_shared_set_group_csg_reg_active - Notify that the group is active on-slot with
* scheduling action. Essential runtime resources
* are bound with the group for it to run
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
* @group: Pointer to the group that is placed into active on-slot running by the scheduler.
*
*/
void kbase_csf_mcu_shared_set_group_csg_reg_active(struct kbase_device *kbdev,
struct kbase_queue_group *group);
/**
* kbase_csf_mcu_shared_set_group_csg_reg_unused - Notify that the group is placed off-slot with
* scheduling action. Some of bound runtime
* resources can be reallocated for others to use
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
* @group: Pointer to the group that is placed off-slot by the scheduler.
*
*/
void kbase_csf_mcu_shared_set_group_csg_reg_unused(struct kbase_device *kbdev,
struct kbase_queue_group *group);
/**
* kbase_csf_mcu_shared_group_update_pmode_map - Request to update the given group's protected
* suspend buffer pages to be mapped for supporting
* protected mode operations.
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
* @group: Pointer to the group for attempting a protected mode suspend buffer binding/mapping.
*
* Return: 0 for success, the group has a protected suspend buffer region mapped. Otherwise an
* error code is returned.
*/
int kbase_csf_mcu_shared_group_update_pmode_map(struct kbase_device *kbdev,
struct kbase_queue_group *group);
/**
* kbase_csf_mcu_shared_clear_evicted_group_csg_reg - Clear any bound regions/mappings as the
* given group is evicted out of the runtime
* operations.
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
* @group: Pointer to the group that has been evicted out of set of operational groups.
*
* This function will taken away any of the bindings/mappings immediately so the resources
* are not tied up to the given group, which has been evicted out of scheduling action for
* termination.
*/
void kbase_csf_mcu_shared_clear_evicted_group_csg_reg(struct kbase_device *kbdev,
struct kbase_queue_group *group);
/**
* kbase_csf_mcu_shared_add_queue - Request to add a newly activated queue for a group to be
* run on slot.
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
* @queue: Pointer to the queue that requires some runtime resource to be bound for joining
* others that are already running on-slot with their bound group.
*
* Return: 0 on success, or negative on failure.
*/
int kbase_csf_mcu_shared_add_queue(struct kbase_device *kbdev, struct kbase_queue *queue);
/**
* kbase_csf_mcu_shared_drop_stopped_queue - Request to drop a queue after it has been stopped
* from its operational state from a group.
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
* @queue: Pointer to the queue that has been stopped from operational state.
*
*/
void kbase_csf_mcu_shared_drop_stopped_queue(struct kbase_device *kbdev, struct kbase_queue *queue);
/**
* kbase_csf_mcu_shared_group_bind_csg_reg - Bind some required runtime resources to the given
* group for ready to run on-slot.
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
* @group: Pointer to the queue group that requires the runtime resources.
*
* This function binds/maps the required suspend buffer pages and userio pages for the given
* group, readying it to run on-slot.
*
* Return: 0 on success, or negative on failure.
*/
int kbase_csf_mcu_shared_group_bind_csg_reg(struct kbase_device *kbdev,
struct kbase_queue_group *group);
/**
* kbase_csf_mcu_shared_regs_data_init - Allocate and initialize the MCU shared regions data for
* the given device.
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
*
* This function allocate and initialize the MCU shared VA regions for runtime operations
* of the CSF scheduler.
*
* Return: 0 on success, or an error code.
*/
int kbase_csf_mcu_shared_regs_data_init(struct kbase_device *kbdev);
/**
* kbase_csf_mcu_shared_regs_data_term - Terminate the allocated MCU shared regions data for
* the given device.
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
*
* This function terminates the MCU shared VA regions allocated for runtime operations
* of the CSF scheduler.
*/
void kbase_csf_mcu_shared_regs_data_term(struct kbase_device *kbdev);
#endif /* _KBASE_CSF_MCU_SHARED_REG_H_ */

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -31,10 +31,6 @@
* Begin register sets
*/
/* DOORBELLS base address */
#define DOORBELLS_BASE 0x0080000
#define DOORBELLS_REG(r) (DOORBELLS_BASE + (r))
/* CS_KERNEL_INPUT_BLOCK base address */
#define CS_KERNEL_INPUT_BLOCK_BASE 0x0000
#define CS_KERNEL_INPUT_BLOCK_REG(r) (CS_KERNEL_INPUT_BLOCK_BASE + (r))
@@ -71,10 +67,6 @@
#define GLB_OUTPUT_BLOCK_BASE 0x0000
#define GLB_OUTPUT_BLOCK_REG(r) (GLB_OUTPUT_BLOCK_BASE + (r))
/* USER base address */
#define USER_BASE 0x0010000
#define USER_REG(r) (USER_BASE + (r))
/* End register sets */
/*
@@ -229,24 +221,43 @@
#define GLB_PRFCNT_TILER_EN 0x0058 /* () Performance counter enable for tiler */
#define GLB_PRFCNT_MMU_L2_EN 0x005C /* () Performance counter enable for MMU/L2 cache */
#define GLB_DEBUG_FWUTF_DESTROY 0x0FE0 /* () Test fixture destroy function address */
#define GLB_DEBUG_FWUTF_TEST 0x0FE4 /* () Test index */
#define GLB_DEBUG_FWUTF_FIXTURE 0x0FE8 /* () Test fixture index */
#define GLB_DEBUG_FWUTF_CREATE 0x0FEC /* () Test fixture create function address */
#define GLB_DEBUG_ARG_IN0 0x0FE0 /* Firmware Debug argument array element 0 */
#define GLB_DEBUG_ARG_IN1 0x0FE4 /* Firmware Debug argument array element 1 */
#define GLB_DEBUG_ARG_IN2 0x0FE8 /* Firmware Debug argument array element 2 */
#define GLB_DEBUG_ARG_IN3 0x0FEC /* Firmware Debug argument array element 3 */
/* Mappings based on GLB_DEBUG_REQ.FWUTF_RUN bit being different from GLB_DEBUG_ACK.FWUTF_RUN */
#define GLB_DEBUG_FWUTF_DESTROY GLB_DEBUG_ARG_IN0 /* () Test fixture destroy function address */
#define GLB_DEBUG_FWUTF_TEST GLB_DEBUG_ARG_IN1 /* () Test index */
#define GLB_DEBUG_FWUTF_FIXTURE GLB_DEBUG_ARG_IN2 /* () Test fixture index */
#define GLB_DEBUG_FWUTF_CREATE GLB_DEBUG_ARG_IN3 /* () Test fixture create function address */
#define GLB_DEBUG_ACK_IRQ_MASK 0x0FF8 /* () Global debug acknowledge interrupt mask */
#define GLB_DEBUG_REQ 0x0FFC /* () Global debug request */
/* GLB_OUTPUT_BLOCK register offsets */
#define GLB_DEBUG_ARG_OUT0 0x0FE0 /* Firmware debug result element 0 */
#define GLB_DEBUG_ARG_OUT1 0x0FE4 /* Firmware debug result element 1 */
#define GLB_DEBUG_ARG_OUT2 0x0FE8 /* Firmware debug result element 2 */
#define GLB_DEBUG_ARG_OUT3 0x0FEC /* Firmware debug result element 3 */
#define GLB_ACK 0x0000 /* () Global acknowledge */
#define GLB_DB_ACK 0x0008 /* () Global doorbell acknowledge */
#define GLB_HALT_STATUS 0x0010 /* () Global halt status */
#define GLB_PRFCNT_STATUS 0x0014 /* () Performance counter status */
#define GLB_PRFCNT_INSERT 0x0018 /* () Performance counter buffer insert index */
#define GLB_DEBUG_FWUTF_RESULT 0x0FE0 /* () Firmware debug test result */
#define GLB_DEBUG_FWUTF_RESULT GLB_DEBUG_ARG_OUT0 /* () Firmware debug test result */
#define GLB_DEBUG_ACK 0x0FFC /* () Global debug acknowledge */
/* USER register offsets */
#define LATEST_FLUSH 0x0000 /* () Flush ID of latest clean-and-invalidate operation */
#ifdef CONFIG_MALI_CORESIGHT
#define GLB_DEBUG_REQ_FW_AS_WRITE_SHIFT 4
#define GLB_DEBUG_REQ_FW_AS_WRITE_MASK (0x1 << GLB_DEBUG_REQ_FW_AS_WRITE_SHIFT)
#define GLB_DEBUG_REQ_FW_AS_READ_SHIFT 5
#define GLB_DEBUG_REQ_FW_AS_READ_MASK (0x1 << GLB_DEBUG_REQ_FW_AS_READ_SHIFT)
#define GLB_DEBUG_ARG_IN0 0x0FE0
#define GLB_DEBUG_ARG_IN1 0x0FE4
#define GLB_DEBUG_ARG_OUT0 0x0FE0
#endif /* CONFIG_MALI_CORESIGHT */
/* End register offsets */
@@ -304,10 +315,17 @@
#define CS_REQ_IDLE_RESOURCE_REQ_SHIFT 11
#define CS_REQ_IDLE_RESOURCE_REQ_MASK (0x1 << CS_REQ_IDLE_RESOURCE_REQ_SHIFT)
#define CS_REQ_IDLE_RESOURCE_REQ_GET(reg_val) \
(((reg_val)&CS_REQ_IDLE_RESOURCE_REQ_MASK) >> CS_REQ_IDLE_RESOURCE_REQ_SHIFT)
(((reg_val) & CS_REQ_IDLE_RESOURCE_REQ_MASK) >> CS_REQ_IDLE_RESOURCE_REQ_SHIFT)
#define CS_REQ_IDLE_RESOURCE_REQ_SET(reg_val, value) \
(((reg_val) & ~CS_REQ_IDLE_RESOURCE_REQ_MASK) | \
(((value) << CS_REQ_IDLE_RESOURCE_REQ_SHIFT) & CS_REQ_IDLE_RESOURCE_REQ_MASK))
#define CS_REQ_IDLE_SHARED_SB_DEC_SHIFT 12
#define CS_REQ_IDLE_SHARED_SB_DEC_MASK (0x1 << CS_REQ_IDLE_SHARED_SB_DEC_SHIFT)
#define CS_REQ_IDLE_SHARED_SB_DEC_GET(reg_val) \
(((reg_val) & CS_REQ_IDLE_SHARED_SB_DEC_MASK) >> CS_REQ_IDLE_SHARED_SB_DEC_SHIFT)
#define CS_REQ_IDLE_SHARED_SB_DEC_REQ_SET(reg_val, value) \
(((reg_val) & ~CS_REQ_IDLE_SHARED_SB_DEC_MASK) | \
(((value) << CS_REQ_IDLE_SHARED_SB_DEC_SHIFT) & CS_REQ_IDLE_SHARED_SB_DEC_MASK))
#define CS_REQ_TILER_OOM_SHIFT 26
#define CS_REQ_TILER_OOM_MASK (0x1 << CS_REQ_TILER_OOM_SHIFT)
#define CS_REQ_TILER_OOM_GET(reg_val) (((reg_val)&CS_REQ_TILER_OOM_MASK) >> CS_REQ_TILER_OOM_SHIFT)
@@ -582,6 +600,13 @@
#define CS_STATUS_WAIT_PROTM_PEND_SET(reg_val, value) \
(((reg_val) & ~CS_STATUS_WAIT_PROTM_PEND_MASK) | \
(((value) << CS_STATUS_WAIT_PROTM_PEND_SHIFT) & CS_STATUS_WAIT_PROTM_PEND_MASK))
#define CS_STATUS_WAIT_SYNC_WAIT_SIZE_SHIFT 30
#define CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK (0x1 << CS_STATUS_WAIT_SYNC_WAIT_SIZE_SHIFT)
#define CS_STATUS_WAIT_SYNC_WAIT_SIZE_GET(reg_val) \
(((reg_val)&CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK) >> CS_STATUS_WAIT_SYNC_WAIT_SIZE_SHIFT)
#define CS_STATUS_WAIT_SYNC_WAIT_SIZE_SET(reg_val, value) \
(((reg_val) & ~CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK) | \
(((value) << CS_STATUS_WAIT_SYNC_WAIT_SIZE_SHIFT) & CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK))
#define CS_STATUS_WAIT_SYNC_WAIT_SHIFT 31
#define CS_STATUS_WAIT_SYNC_WAIT_MASK (0x1 << CS_STATUS_WAIT_SYNC_WAIT_SHIFT)
#define CS_STATUS_WAIT_SYNC_WAIT_GET(reg_val) \
@@ -692,6 +717,27 @@
#define CS_FAULT_EXCEPTION_TYPE_ADDR_RANGE_FAULT 0x5A
#define CS_FAULT_EXCEPTION_TYPE_IMPRECISE_FAULT 0x5B
#define CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT 0x69
#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L0 0xC0
#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L1 0xC1
#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L2 0xC2
#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L3 0xC3
#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L4 0xC4
#define CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_0 0xC8
#define CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_1 0xC9
#define CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_2 0xCA
#define CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_3 0xCB
#define CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_1 0xD9
#define CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_2 0xDA
#define CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_3 0xDB
#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN 0xE0
#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_0 0xE4
#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_1 0xE5
#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_2 0xE6
#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_3 0xE7
#define CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_0 0xE8
#define CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_1 0xE9
#define CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_2 0xEA
#define CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_3 0xEB
/* End of CS_FAULT_EXCEPTION_TYPE values */
#define CS_FAULT_EXCEPTION_DATA_SHIFT 8
#define CS_FAULT_EXCEPTION_DATA_MASK (0xFFFFFF << CS_FAULT_EXCEPTION_DATA_SHIFT)
@@ -1590,4 +1636,43 @@
((GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SET_MOD(value) << GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SHIFT) & \
GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK))
/* GLB_DEBUG_REQ register */
#define GLB_DEBUG_REQ_DEBUG_RUN_SHIFT GPU_U(23)
#define GLB_DEBUG_REQ_DEBUG_RUN_MASK (GPU_U(0x1) << GLB_DEBUG_REQ_DEBUG_RUN_SHIFT)
#define GLB_DEBUG_REQ_DEBUG_RUN_GET(reg_val) \
(((reg_val)&GLB_DEBUG_REQ_DEBUG_RUN_MASK) >> GLB_DEBUG_REQ_DEBUG_RUN_SHIFT)
#define GLB_DEBUG_REQ_DEBUG_RUN_SET(reg_val, value) \
(((reg_val) & ~GLB_DEBUG_REQ_DEBUG_RUN_MASK) | \
(((value) << GLB_DEBUG_REQ_DEBUG_RUN_SHIFT) & GLB_DEBUG_REQ_DEBUG_RUN_MASK))
#define GLB_DEBUG_REQ_RUN_MODE_SHIFT GPU_U(24)
#define GLB_DEBUG_REQ_RUN_MODE_MASK (GPU_U(0xFF) << GLB_DEBUG_REQ_RUN_MODE_SHIFT)
#define GLB_DEBUG_REQ_RUN_MODE_GET(reg_val) \
(((reg_val)&GLB_DEBUG_REQ_RUN_MODE_MASK) >> GLB_DEBUG_REQ_RUN_MODE_SHIFT)
#define GLB_DEBUG_REQ_RUN_MODE_SET(reg_val, value) \
(((reg_val) & ~GLB_DEBUG_REQ_RUN_MODE_MASK) | \
(((value) << GLB_DEBUG_REQ_RUN_MODE_SHIFT) & GLB_DEBUG_REQ_RUN_MODE_MASK))
/* GLB_DEBUG_ACK register */
#define GLB_DEBUG_ACK_DEBUG_RUN_SHIFT GPU_U(23)
#define GLB_DEBUG_ACK_DEBUG_RUN_MASK (GPU_U(0x1) << GLB_DEBUG_ACK_DEBUG_RUN_SHIFT)
#define GLB_DEBUG_ACK_DEBUG_RUN_GET(reg_val) \
(((reg_val)&GLB_DEBUG_ACK_DEBUG_RUN_MASK) >> GLB_DEBUG_ACK_DEBUG_RUN_SHIFT)
#define GLB_DEBUG_ACK_DEBUG_RUN_SET(reg_val, value) \
(((reg_val) & ~GLB_DEBUG_ACK_DEBUG_RUN_MASK) | \
(((value) << GLB_DEBUG_ACK_DEBUG_RUN_SHIFT) & GLB_DEBUG_ACK_DEBUG_RUN_MASK))
#define GLB_DEBUG_ACK_RUN_MODE_SHIFT GPU_U(24)
#define GLB_DEBUG_ACK_RUN_MODE_MASK (GPU_U(0xFF) << GLB_DEBUG_ACK_RUN_MODE_SHIFT)
#define GLB_DEBUG_ACK_RUN_MODE_GET(reg_val) \
(((reg_val)&GLB_DEBUG_ACK_RUN_MODE_MASK) >> GLB_DEBUG_ACK_RUN_MODE_SHIFT)
#define GLB_DEBUG_ACK_RUN_MODE_SET(reg_val, value) \
(((reg_val) & ~GLB_DEBUG_ACK_RUN_MODE_MASK) | \
(((value) << GLB_DEBUG_ACK_RUN_MODE_SHIFT) & GLB_DEBUG_ACK_RUN_MODE_MASK))
/* RUN_MODE values */
#define GLB_DEBUG_RUN_MODE_TYPE_NOP 0x0
#define GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP 0x1
/* End of RUN_MODE values */
#endif /* _KBASE_CSF_REGISTERS_H_ */

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -32,6 +32,7 @@
#include <uapi/gpu/arm/midgard/mali_base_kernel.h>
#include <mali_kbase_hwaccess_time.h>
#include "mali_kbase_csf_tiler_heap_reclaim.h"
#include "mali_kbase_csf_mcu_shared_reg.h"
/* Value to indicate that a queue group is not groups_to_schedule list */
#define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX)
@@ -57,6 +58,9 @@
/* Time to wait for completion of PING req before considering MCU as hung */
#define FW_PING_AFTER_ERROR_TIMEOUT_MS (10)
/* Explicitly defining this blocked_reason code as SB_WAIT for clarity */
#define CS_STATUS_BLOCKED_ON_SB_WAIT CS_STATUS_BLOCKED_REASON_REASON_WAIT
static int scheduler_group_schedule(struct kbase_queue_group *group);
static void remove_group_from_idle_wait(struct kbase_queue_group *const group);
static
@@ -553,7 +557,7 @@ void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev)
* updated whilst gpu_idle_worker() is executing.
*/
scheduler->fast_gpu_idle_handling =
(kbdev->csf.gpu_idle_hysteresis_ms == 0) ||
(kbdev->csf.gpu_idle_hysteresis_us == 0) ||
!kbase_csf_scheduler_all_csgs_idle(kbdev);
/* The GPU idle worker relies on update_on_slot_queues_offsets() to have
@@ -1450,6 +1454,7 @@ int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue)
err = sched_halt_stream(queue);
unassign_user_doorbell_from_queue(kbdev, queue);
kbase_csf_mcu_shared_drop_stopped_queue(kbdev, queue);
}
mutex_unlock(&kbdev->csf.scheduler.lock);
@@ -1549,11 +1554,13 @@ static void program_cs(struct kbase_device *kbdev,
WARN_ON(csi_index >= ginfo->stream_num))
return;
assign_user_doorbell_to_queue(kbdev, queue);
if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)
return;
if (queue->enabled) {
assign_user_doorbell_to_queue(kbdev, queue);
if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)
return;
WARN_ON(queue->doorbell_nr != queue->group->doorbell_nr);
WARN_ON(queue->doorbell_nr != queue->group->doorbell_nr);
}
if (queue->enabled && queue_group_suspended_locked(group))
program_cs_extract_init(queue);
@@ -1567,17 +1574,15 @@ static void program_cs(struct kbase_device *kbdev,
kbase_csf_firmware_cs_input(stream, CS_SIZE,
queue->size);
user_input = (queue->reg->start_pfn << PAGE_SHIFT);
kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_LO,
user_input & 0xFFFFFFFF);
kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_HI,
user_input >> 32);
user_input = queue->user_io_gpu_va;
WARN_ONCE(!user_input && queue->enabled, "Enabled queue should have a valid gpu_va");
user_output = ((queue->reg->start_pfn + 1) << PAGE_SHIFT);
kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_LO,
user_output & 0xFFFFFFFF);
kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_HI,
user_output >> 32);
kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_LO, user_input & 0xFFFFFFFF);
kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_HI, user_input >> 32);
user_output = user_input + PAGE_SIZE;
kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_LO, user_output & 0xFFFFFFFF);
kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_HI, user_output >> 32);
kbase_csf_firmware_cs_input(stream, CS_CONFIG,
(queue->doorbell_nr << 8) | (queue->priority & 0xF));
@@ -1608,8 +1613,10 @@ static void program_cs(struct kbase_device *kbdev,
* or protected mode switch.
*/
kbase_csf_firmware_cs_input_mask(stream, CS_REQ,
CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK,
CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK);
CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK |
CS_REQ_IDLE_SHARED_SB_DEC_MASK,
CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK |
CS_REQ_IDLE_SHARED_SB_DEC_MASK);
/* Set state to START/STOP */
kbase_csf_firmware_cs_input_mask(stream, CS_REQ,
@@ -1624,6 +1631,20 @@ static void program_cs(struct kbase_device *kbdev,
update_hw_active(queue, true);
}
static int onslot_csg_add_new_queue(struct kbase_queue *queue)
{
struct kbase_device *kbdev = queue->kctx->kbdev;
int err;
lockdep_assert_held(&kbdev->csf.scheduler.lock);
err = kbase_csf_mcu_shared_add_queue(kbdev, queue);
if (!err)
program_cs(kbdev, queue, true);
return err;
}
int kbase_csf_scheduler_queue_start(struct kbase_queue *queue)
{
struct kbase_queue_group *group = queue->group;
@@ -1679,8 +1700,28 @@ int kbase_csf_scheduler_queue_start(struct kbase_queue *queue)
* user door-bell on such a case.
*/
kbase_csf_ring_cs_user_doorbell(kbdev, queue);
} else
program_cs(kbdev, queue, true);
} else {
err = onslot_csg_add_new_queue(queue);
/* For an on slot CSG, the only error in adding a new
* queue to run is that the scheduler could not map
* the required userio pages due to likely some resource
* issues. In such a case, and if the group is yet
* to enter its fatal error state, we return a -EBUSY
* to the submitter for another kick. The queue itself
* has yet to be programmed hence needs to remain its
* previous (disabled) state. If the error persists,
* the group will eventually reports a fatal error by
* the group's error reporting mechanism, when the MCU
* shared region map retry limit of the group is
* exceeded. For such a case, the expected error value
* is -EIO.
*/
if (unlikely(err)) {
queue->enabled = cs_enabled;
mutex_unlock(&kbdev->csf.scheduler.lock);
return (err != -EIO) ? -EBUSY : err;
}
}
}
queue_delayed_work(system_long_wq, &kbdev->csf.scheduler.ping_work,
msecs_to_jiffies(kbase_get_timeout_ms(
@@ -1821,6 +1862,7 @@ static void halt_csg_slot(struct kbase_queue_group *group, bool suspend)
unsigned long flags;
struct kbase_csf_cmd_stream_group_info *ginfo =
&global_iface->groups[slot];
u32 halt_cmd = suspend ? CSG_REQ_STATE_SUSPEND :
CSG_REQ_STATE_TERMINATE;
@@ -1838,8 +1880,8 @@ static void halt_csg_slot(struct kbase_queue_group *group, bool suspend)
csg_slot[slot].trigger_jiffies = jiffies;
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP_REQ, group, halt_cmd);
KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG(
kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot);
KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG(
kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot, suspend);
}
}
@@ -1891,9 +1933,12 @@ static bool evaluate_sync_update(struct kbase_queue *queue)
struct kbase_vmap_struct *mapping;
bool updated = false;
u32 *sync_ptr;
u32 sync_wait_size;
u32 sync_wait_align_mask;
u32 sync_wait_cond;
u32 sync_current_val;
struct kbase_device *kbdev;
bool sync_wait_align_valid = false;
bool sync_wait_cond_valid = false;
if (WARN_ON(!queue))
@@ -1903,6 +1948,16 @@ static bool evaluate_sync_update(struct kbase_queue *queue)
lockdep_assert_held(&kbdev->csf.scheduler.lock);
sync_wait_size = CS_STATUS_WAIT_SYNC_WAIT_SIZE_GET(queue->status_wait);
sync_wait_align_mask =
(sync_wait_size == 0 ? BASEP_EVENT32_ALIGN_BYTES : BASEP_EVENT64_ALIGN_BYTES) - 1;
sync_wait_align_valid = ((uintptr_t)queue->sync_ptr & sync_wait_align_mask) == 0;
if (!sync_wait_align_valid) {
dev_dbg(queue->kctx->kbdev->dev, "sync memory VA 0x%016llX is misaligned",
queue->sync_ptr);
goto out;
}
sync_ptr = kbase_phy_alloc_mapping_get(queue->kctx, queue->sync_ptr,
&mapping);
@@ -1987,7 +2042,7 @@ bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo,
KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group,
queue, status);
if (CS_STATUS_WAIT_SYNC_WAIT_GET(status)) {
if (CS_STATUS_WAIT_SYNC_WAIT_GET(status) || CS_STATUS_WAIT_SB_MASK_GET(status)) {
queue->status_wait = status;
queue->sync_ptr = kbase_csf_firmware_cs_output(stream,
CS_STATUS_WAIT_SYNC_POINTER_LO);
@@ -2003,7 +2058,8 @@ bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo,
kbase_csf_firmware_cs_output(stream,
CS_STATUS_BLOCKED_REASON));
if (!evaluate_sync_update(queue)) {
if ((queue->blocked_reason == CS_STATUS_BLOCKED_ON_SB_WAIT) ||
!evaluate_sync_update(queue)) {
is_waiting = true;
} else {
/* Sync object already got updated & met the condition
@@ -2297,7 +2353,7 @@ static void deschedule_idle_wait_group(struct kbase_csf_scheduler *scheduler,
insert_group_to_idle_wait(group);
}
static void update_offslot_non_idle_cnt_for_faulty_grp(struct kbase_queue_group *group)
static void update_offslot_non_idle_cnt(struct kbase_queue_group *group)
{
struct kbase_device *kbdev = group->kctx->kbdev;
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
@@ -2434,9 +2490,14 @@ static void save_csg_slot(struct kbase_queue_group *group)
if (!queue || !queue->enabled)
continue;
if (save_slot_cs(ginfo, queue))
sync_wait = true;
else {
if (save_slot_cs(ginfo, queue)) {
/* sync_wait is only true if the queue is blocked on
* a CQS and not a scoreboard.
*/
if (queue->blocked_reason !=
CS_STATUS_BLOCKED_ON_SB_WAIT)
sync_wait = true;
} else {
/* Need to confirm if ringbuffer of the GPU
* queue is empty or not. A race can arise
* between the flush of GPU queue and suspend
@@ -2550,6 +2611,11 @@ static bool cleanup_csg_slot(struct kbase_queue_group *group)
KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG(kbdev,
kbdev->gpu_props.props.raw_props.gpu_id, slot);
/* Notify the group is off-slot and the csg_reg might be available for
* resue with other groups in a 'lazy unbinding' style.
*/
kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group);
return as_fault;
}
@@ -2633,8 +2699,8 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
u32 state;
int i;
unsigned long flags;
const u64 normal_suspend_buf =
group->normal_suspend_buf.reg->start_pfn << PAGE_SHIFT;
u64 normal_suspend_buf;
u64 protm_suspend_buf;
struct kbase_csf_csg_slot *csg_slot =
&kbdev->csf.scheduler.csg_slots[slot];
@@ -2646,6 +2712,19 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
WARN_ON(atomic_read(&csg_slot->state) != CSG_SLOT_READY);
if (unlikely(kbase_csf_mcu_shared_group_bind_csg_reg(kbdev, group))) {
dev_warn(kbdev->dev,
"Couldn't bind MCU shared csg_reg for group %d of context %d_%d, slot=%u",
group->handle, group->kctx->tgid, kctx->id, slot);
kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group);
return;
}
/* The suspend buf has already been mapped through binding to csg_reg */
normal_suspend_buf = group->normal_suspend_buf.gpu_va;
protm_suspend_buf = group->protected_suspend_buf.gpu_va;
WARN_ONCE(!normal_suspend_buf, "Normal suspend buffer not mapped");
ginfo = &global_iface->groups[slot];
/* Pick an available address space for this context */
@@ -2658,6 +2737,7 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
if (kctx->as_nr == KBASEP_AS_NR_INVALID) {
dev_warn(kbdev->dev, "Could not get a valid AS for group %d of context %d_%d on slot %d\n",
group->handle, kctx->tgid, kctx->id, slot);
kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group);
return;
}
@@ -2708,15 +2788,15 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_HI,
normal_suspend_buf >> 32);
if (group->protected_suspend_buf.reg) {
const u64 protm_suspend_buf =
group->protected_suspend_buf.reg->start_pfn <<
PAGE_SHIFT;
kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_LO,
protm_suspend_buf & U32_MAX);
kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_HI,
protm_suspend_buf >> 32);
}
/* Note, we program the P-mode buffer pointer here, but actual runtime
* enter into pmode execution is controlled by the P-mode phy pages are
* allocated and mapped with the bound csg_reg, which has a specific flag
* for indicating this P-mode runnable condition before a group is
* granted its p-mode section entry. Without a P-mode entry, the buffer
* pointed is not going to be accessed at all.
*/
kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_LO, protm_suspend_buf & U32_MAX);
kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_HI, protm_suspend_buf >> 32);
if (group->dvs_buf) {
kbase_csf_firmware_csg_input(ginfo, CSG_DVS_BUF_LO,
@@ -2769,6 +2849,9 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
/* Programming a slot consumes a group from scanout */
update_offslot_non_idle_cnt_for_onslot_grp(group);
/* Notify the group's bound csg_reg is now in active use */
kbase_csf_mcu_shared_set_group_csg_reg_active(kbdev, group);
}
static void remove_scheduled_group(struct kbase_device *kbdev,
@@ -2789,7 +2872,7 @@ static void remove_scheduled_group(struct kbase_device *kbdev,
}
static void sched_evict_group(struct kbase_queue_group *group, bool fault,
bool update_non_idle_offslot_grps_cnt)
bool update_non_idle_offslot_grps_cnt_from_run_state)
{
struct kbase_context *kctx = group->kctx;
struct kbase_device *kbdev = kctx->kbdev;
@@ -2800,7 +2883,7 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault,
if (queue_group_scheduled_locked(group)) {
u32 i;
if (update_non_idle_offslot_grps_cnt &&
if (update_non_idle_offslot_grps_cnt_from_run_state &&
(group->run_state == KBASE_CSF_GROUP_SUSPENDED ||
group->run_state == KBASE_CSF_GROUP_RUNNABLE)) {
int new_val = atomic_dec_return(
@@ -2815,8 +2898,11 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault,
}
if (group->prepared_seq_num !=
KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID)
KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID) {
if (!update_non_idle_offslot_grps_cnt_from_run_state)
update_offslot_non_idle_cnt(group);
remove_scheduled_group(kbdev, group);
}
if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC)
remove_group_from_idle_wait(group);
@@ -2843,6 +2929,9 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault,
}
kbase_csf_tiler_heap_reclaim_sched_notify_grp_evict(group);
/* Clear all the bound shared regions and unmap any in-place MMU maps */
kbase_csf_mcu_shared_clear_evicted_group_csg_reg(kbdev, group);
}
static int term_group_sync(struct kbase_queue_group *group)
@@ -3222,8 +3311,7 @@ static void program_group_on_vacant_csg_slot(struct kbase_device *kbdev,
scheduler->remaining_tick_slots--;
}
} else {
update_offslot_non_idle_cnt_for_faulty_grp(
group);
update_offslot_non_idle_cnt(group);
remove_scheduled_group(kbdev, group);
}
}
@@ -3348,6 +3436,9 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev)
/* The on slot csg is now stopped */
clear_bit(i, slot_mask);
KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG(
kbdev, kbdev->gpu_props.props.raw_props.gpu_id, i);
if (likely(group)) {
bool as_fault;
/* Only do save/cleanup if the
@@ -3413,8 +3504,6 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev)
*/
clear_bit(i, slot_mask);
set_bit(i, scheduler->csgs_events_enable_mask);
update_offslot_non_idle_cnt_for_onslot_grp(
group);
}
suspend_wait_failed = true;
@@ -3874,11 +3963,16 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
struct kbase_queue_group *const input_grp)
{
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
struct kbase_protected_suspend_buffer *sbuf = &input_grp->protected_suspend_buf;
unsigned long flags;
bool protm_in_use;
lockdep_assert_held(&scheduler->lock);
/* Return early if the physical pages have not been allocated yet */
if (unlikely(!sbuf->pma))
return;
/* This lock is taken to prevent the issuing of MMU command during the
* transition to protected mode. This helps avoid the scenario where the
* entry to protected mode happens with a memory region being locked and
@@ -3937,6 +4031,15 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_ENTER, input_grp,
0u);
#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
/* Coresight must be disabled before entering protected mode. */
kbase_debug_coresight_csf_disable_pmode_enter(kbdev);
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
kbase_csf_enter_protected_mode(kbdev);
/* Set the pending protm seq number to the next one */
protm_enter_set_next_pending_seq(kbdev);
@@ -4049,8 +4152,7 @@ static void scheduler_apply(struct kbase_device *kbdev)
if (!kctx_as_enabled(group->kctx) || group->faulted) {
/* Drop the head group and continue */
update_offslot_non_idle_cnt_for_faulty_grp(
group);
update_offslot_non_idle_cnt(group);
remove_scheduled_group(kbdev, group);
continue;
}
@@ -4329,6 +4431,8 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev,
set_bit(i, csg_bitmap);
} else {
group->run_state = KBASE_CSF_GROUP_RUNNABLE;
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
group->run_state);
}
}
@@ -4973,6 +5077,9 @@ static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slo
/* The on slot csg is now stopped */
clear_bit(i, slot_mask_local);
KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG(
kbdev, kbdev->gpu_props.props.raw_props.gpu_id, i);
group = scheduler->csg_slots[i].resident_group;
if (likely(group)) {
/* Only do save/cleanup if the
@@ -5031,8 +5138,13 @@ static void evict_lru_or_blocked_csg(struct kbase_device *kbdev)
if (all_addr_spaces_used) {
for (i = 0; i != total_csg_slots; ++i) {
if (scheduler->csg_slots[i].resident_group != NULL)
if (scheduler->csg_slots[i].resident_group != NULL) {
if (WARN_ON(scheduler->csg_slots[i].resident_group->kctx->as_nr <
0))
continue;
as_usage[scheduler->csg_slots[i].resident_group->kctx->as_nr]++;
}
}
}
@@ -5053,6 +5165,9 @@ static void evict_lru_or_blocked_csg(struct kbase_device *kbdev)
(group->priority != BASE_QUEUE_GROUP_PRIORITY_REALTIME) &&
((lru_idle_group == NULL) ||
(lru_idle_group->prepared_seq_num < group->prepared_seq_num))) {
if (WARN_ON(group->kctx->as_nr < 0))
continue;
/* If all address spaces are used, we need to ensure the group does not
* share the AS with other active CSGs. Or CSG would be freed without AS
* and this optimization would not work.
@@ -5165,16 +5280,12 @@ redo_local_tock:
* queue jobs.
*/
if (protm_grp && scheduler->top_grp == protm_grp) {
int new_val;
dev_dbg(kbdev->dev, "Scheduler keep protm exec: group-%d",
protm_grp->handle);
new_val = atomic_dec_return(&scheduler->non_idle_offslot_grps);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, protm_grp,
new_val);
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
update_offslot_non_idle_cnt_for_onslot_grp(protm_grp);
remove_scheduled_group(kbdev, protm_grp);
scheduler_check_pmode_progress(kbdev);
} else if (scheduler->top_grp) {
if (protm_grp)
@@ -5988,8 +6099,11 @@ void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group)
mutex_lock(&scheduler->lock);
if (group->run_state == KBASE_CSF_GROUP_IDLE)
if (group->run_state == KBASE_CSF_GROUP_IDLE) {
group->run_state = KBASE_CSF_GROUP_RUNNABLE;
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
group->run_state);
}
/* Check if the group is now eligible for execution in protected mode. */
if (scheduler_get_protm_enter_async_group(kbdev, group))
scheduler_group_check_protm_enter(kbdev, group);
@@ -6257,6 +6371,8 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx)
int priority;
int err;
kbase_ctx_sched_init_ctx(kctx);
for (priority = 0; priority < KBASE_QUEUE_GROUP_PRIORITY_COUNT;
++priority) {
INIT_LIST_HEAD(&kctx->csf.sched.runnable_groups[priority]);
@@ -6273,7 +6389,8 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx)
if (!kctx->csf.sched.sync_update_wq) {
dev_err(kctx->kbdev->dev,
"Failed to initialize scheduler context workqueue");
return -ENOMEM;
err = -ENOMEM;
goto alloc_wq_failed;
}
INIT_WORK(&kctx->csf.sched.sync_update_work,
@@ -6286,10 +6403,16 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx)
if (err) {
dev_err(kctx->kbdev->dev,
"Failed to register a sync update callback");
destroy_workqueue(kctx->csf.sched.sync_update_wq);
goto event_wait_add_failed;
}
return err;
event_wait_add_failed:
destroy_workqueue(kctx->csf.sched.sync_update_wq);
alloc_wq_failed:
kbase_ctx_sched_remove_ctx(kctx);
return err;
}
void kbase_csf_scheduler_context_term(struct kbase_context *kctx)
@@ -6297,6 +6420,8 @@ void kbase_csf_scheduler_context_term(struct kbase_context *kctx)
kbase_csf_event_wait_remove(kctx, check_group_sync_update_cb, kctx);
cancel_work_sync(&kctx->csf.sched.sync_update_work);
destroy_workqueue(kctx->csf.sched.sync_update_wq);
kbase_ctx_sched_remove_ctx(kctx);
}
int kbase_csf_scheduler_init(struct kbase_device *kbdev)
@@ -6315,7 +6440,7 @@ int kbase_csf_scheduler_init(struct kbase_device *kbdev)
return -ENOMEM;
}
return 0;
return kbase_csf_mcu_shared_regs_data_init(kbdev);
}
int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
@@ -6415,6 +6540,8 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev)
}
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_TERMINATED, NULL,
kbase_csf_scheduler_get_nr_active_csgs(kbdev));
/* Terminating the MCU shared regions, following the release of slots */
kbase_csf_mcu_shared_regs_data_term(kbdev);
}
void kbase_csf_scheduler_early_term(struct kbase_device *kbdev)

View File

@@ -0,0 +1,788 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU license.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
*/
#include "mali_kbase_csf_sync_debugfs.h"
#include "mali_kbase_csf_csg_debugfs.h"
#include <mali_kbase.h>
#include <linux/seq_file.h>
#if IS_ENABLED(CONFIG_SYNC_FILE)
#include "mali_kbase_sync.h"
#endif
#if IS_ENABLED(CONFIG_DEBUG_FS)
#define CQS_UNREADABLE_LIVE_VALUE "(unavailable)"
/* GPU queue related values */
#define GPU_CSF_MOVE_OPCODE ((u64)0x1)
#define GPU_CSF_MOVE32_OPCODE ((u64)0x2)
#define GPU_CSF_SYNC_ADD_OPCODE ((u64)0x25)
#define GPU_CSF_SYNC_SET_OPCODE ((u64)0x26)
#define GPU_CSF_SYNC_WAIT_OPCODE ((u64)0x27)
#define GPU_CSF_SYNC_ADD64_OPCODE ((u64)0x33)
#define GPU_CSF_SYNC_SET64_OPCODE ((u64)0x34)
#define GPU_CSF_SYNC_WAIT64_OPCODE ((u64)0x35)
#define GPU_CSF_CALL_OPCODE ((u64)0x20)
#define MAX_NR_GPU_CALLS (5)
#define INSTR_OPCODE_MASK ((u64)0xFF << 56)
#define INSTR_OPCODE_GET(value) ((value & INSTR_OPCODE_MASK) >> 56)
#define MOVE32_IMM_MASK ((u64)0xFFFFFFFFFUL)
#define MOVE_DEST_MASK ((u64)0xFF << 48)
#define MOVE_DEST_GET(value) ((value & MOVE_DEST_MASK) >> 48)
#define MOVE_IMM_MASK ((u64)0xFFFFFFFFFFFFUL)
#define SYNC_SRC0_MASK ((u64)0xFF << 40)
#define SYNC_SRC1_MASK ((u64)0xFF << 32)
#define SYNC_SRC0_GET(value) (u8)((value & SYNC_SRC0_MASK) >> 40)
#define SYNC_SRC1_GET(value) (u8)((value & SYNC_SRC1_MASK) >> 32)
#define SYNC_WAIT_CONDITION_MASK ((u64)0xF << 28)
#define SYNC_WAIT_CONDITION_GET(value) (u8)((value & SYNC_WAIT_CONDITION_MASK) >> 28)
/* Enumeration for types of GPU queue sync events for
* the purpose of dumping them through debugfs.
*/
enum debugfs_gpu_sync_type {
DEBUGFS_GPU_SYNC_WAIT,
DEBUGFS_GPU_SYNC_SET,
DEBUGFS_GPU_SYNC_ADD,
NUM_DEBUGFS_GPU_SYNC_TYPES
};
/**
* kbasep_csf_debugfs_get_cqs_live_u32() - Obtain live (u32) value for a CQS object.
*
* @kctx: The context of the queue.
* @obj_addr: Pointer to the CQS live 32-bit value.
* @live_val: Pointer to the u32 that will be set to the CQS object's current, live
* value.
*
* Return: 0 if successful or a negative error code on failure.
*/
static int kbasep_csf_debugfs_get_cqs_live_u32(struct kbase_context *kctx, u64 obj_addr,
u32 *live_val)
{
struct kbase_vmap_struct *mapping;
u32 *const cpu_ptr = (u32 *)kbase_phy_alloc_mapping_get(kctx, obj_addr, &mapping);
if (!cpu_ptr)
return -1;
*live_val = *cpu_ptr;
kbase_phy_alloc_mapping_put(kctx, mapping);
return 0;
}
/**
* kbasep_csf_debugfs_get_cqs_live_u64() - Obtain live (u64) value for a CQS object.
*
* @kctx: The context of the queue.
* @obj_addr: Pointer to the CQS live value (32 or 64-bit).
* @live_val: Pointer to the u64 that will be set to the CQS object's current, live
* value.
*
* Return: 0 if successful or a negative error code on failure.
*/
static int kbasep_csf_debugfs_get_cqs_live_u64(struct kbase_context *kctx, u64 obj_addr,
u64 *live_val)
{
struct kbase_vmap_struct *mapping;
u64 *cpu_ptr = (u64 *)kbase_phy_alloc_mapping_get(kctx, obj_addr, &mapping);
if (!cpu_ptr)
return -1;
*live_val = *cpu_ptr;
kbase_phy_alloc_mapping_put(kctx, mapping);
return 0;
}
/**
* kbasep_csf_sync_print_kcpu_fence_wait_or_signal() - Print details of a CSF SYNC Fence Wait
* or Fence Signal command, contained in a
* KCPU queue.
*
* @file: The seq_file for printing to.
* @cmd: The KCPU Command to be printed.
* @cmd_name: The name of the command: indicates either a fence SIGNAL or WAIT.
*/
static void kbasep_csf_sync_print_kcpu_fence_wait_or_signal(struct seq_file *file,
struct kbase_kcpu_command *cmd,
const char *cmd_name)
{
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
struct fence *fence = NULL;
#else
struct dma_fence *fence = NULL;
#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */
struct kbase_sync_fence_info info;
const char *timeline_name = NULL;
bool is_signaled = false;
fence = cmd->info.fence.fence;
if (WARN_ON(!fence))
return;
kbase_sync_fence_info_get(cmd->info.fence.fence, &info);
timeline_name = fence->ops->get_timeline_name(fence);
is_signaled = info.status > 0;
seq_printf(file, "cmd:%s obj:0x%pK live_value:0x%.8x | ", cmd_name, cmd->info.fence.fence,
is_signaled);
/* Note: fence->seqno was u32 until 5.1 kernel, then u64 */
seq_printf(file, "timeline_name:%s timeline_context:0x%.16llx fence_seqno:0x%.16llx",
timeline_name, fence->context, (u64)fence->seqno);
}
/**
* kbasep_csf_sync_print_kcpu_cqs_wait() - Print details of a CSF SYNC CQS Wait command,
* contained in a KCPU queue.
*
* @file: The seq_file for printing to.
* @cmd: The KCPU Command to be printed.
*/
static void kbasep_csf_sync_print_kcpu_cqs_wait(struct seq_file *file,
struct kbase_kcpu_command *cmd)
{
struct kbase_context *kctx = file->private;
size_t i;
for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) {
struct base_cqs_wait_info *cqs_obj = &cmd->info.cqs_wait.objs[i];
u32 live_val;
int ret = kbasep_csf_debugfs_get_cqs_live_u32(kctx, cqs_obj->addr, &live_val);
bool live_val_valid = (ret >= 0);
seq_printf(file, "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr);
if (live_val_valid)
seq_printf(file, "0x%.16llx", (u64)live_val);
else
seq_puts(file, CQS_UNREADABLE_LIVE_VALUE);
seq_printf(file, " | op:gt arg_value:0x%.8x", cqs_obj->val);
}
}
/**
* kbasep_csf_sync_print_kcpu_cqs_set() - Print details of a CSF SYNC CQS
* Set command, contained in a KCPU queue.
*
* @file: The seq_file for printing to.
* @cmd: The KCPU Command to be printed.
*/
static void kbasep_csf_sync_print_kcpu_cqs_set(struct seq_file *file,
struct kbase_kcpu_command *cmd)
{
struct kbase_context *kctx = file->private;
size_t i;
for (i = 0; i < cmd->info.cqs_set.nr_objs; i++) {
struct base_cqs_set *cqs_obj = &cmd->info.cqs_set.objs[i];
u32 live_val;
int ret = kbasep_csf_debugfs_get_cqs_live_u32(kctx, cqs_obj->addr, &live_val);
bool live_val_valid = (ret >= 0);
seq_printf(file, "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr);
if (live_val_valid)
seq_printf(file, "0x%.16llx", (u64)live_val);
else
seq_puts(file, CQS_UNREADABLE_LIVE_VALUE);
seq_printf(file, " | op:add arg_value:0x%.8x", 1);
}
}
/**
* kbasep_csf_sync_get_wait_op_name() - Print the name of a CQS Wait Operation.
*
* @op: The numerical value of operation.
*
* Return: const static pointer to the command name, or '??' if unknown.
*/
static const char *kbasep_csf_sync_get_wait_op_name(basep_cqs_wait_operation_op op)
{
const char *string;
switch (op) {
case BASEP_CQS_WAIT_OPERATION_LE:
string = "le";
break;
case BASEP_CQS_WAIT_OPERATION_GT:
string = "gt";
break;
default:
string = "??";
break;
}
return string;
}
/**
* kbasep_csf_sync_get_set_op_name() - Print the name of a CQS Set Operation.
*
* @op: The numerical value of operation.
*
* Return: const static pointer to the command name, or '??' if unknown.
*/
static const char *kbasep_csf_sync_get_set_op_name(basep_cqs_set_operation_op op)
{
const char *string;
switch (op) {
case BASEP_CQS_SET_OPERATION_ADD:
string = "add";
break;
case BASEP_CQS_SET_OPERATION_SET:
string = "set";
break;
default:
string = "???";
break;
}
return string;
}
/**
* kbasep_csf_sync_print_kcpu_cqs_wait_op() - Print details of a CSF SYNC CQS
* Wait Operation command, contained
* in a KCPU queue.
*
* @file: The seq_file for printing to.
* @cmd: The KCPU Command to be printed.
*/
static void kbasep_csf_sync_print_kcpu_cqs_wait_op(struct seq_file *file,
struct kbase_kcpu_command *cmd)
{
size_t i;
struct kbase_context *kctx = file->private;
for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) {
struct base_cqs_wait_operation_info *wait_op =
&cmd->info.cqs_wait_operation.objs[i];
const char *op_name = kbasep_csf_sync_get_wait_op_name(wait_op->operation);
u64 live_val;
int ret = kbasep_csf_debugfs_get_cqs_live_u64(kctx, wait_op->addr, &live_val);
bool live_val_valid = (ret >= 0);
seq_printf(file, "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", wait_op->addr);
if (live_val_valid)
seq_printf(file, "0x%.16llx", live_val);
else
seq_puts(file, CQS_UNREADABLE_LIVE_VALUE);
seq_printf(file, " | op:%s arg_value:0x%.16llx", op_name, wait_op->val);
}
}
/**
* kbasep_csf_sync_print_kcpu_cqs_set_op() - Print details of a CSF SYNC CQS
* Set Operation command, contained
* in a KCPU queue.
*
* @file: The seq_file for printing to.
* @cmd: The KCPU Command to be printed.
*/
static void kbasep_csf_sync_print_kcpu_cqs_set_op(struct seq_file *file,
struct kbase_kcpu_command *cmd)
{
size_t i;
struct kbase_context *kctx = file->private;
for (i = 0; i < cmd->info.cqs_set_operation.nr_objs; i++) {
struct base_cqs_set_operation_info *set_op = &cmd->info.cqs_set_operation.objs[i];
const char *op_name = kbasep_csf_sync_get_set_op_name(
(basep_cqs_set_operation_op)set_op->operation);
u64 live_val;
int ret = kbasep_csf_debugfs_get_cqs_live_u64(kctx, set_op->addr, &live_val);
bool live_val_valid = (ret >= 0);
seq_printf(file, "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", set_op->addr);
if (live_val_valid)
seq_printf(file, "0x%.16llx", live_val);
else
seq_puts(file, CQS_UNREADABLE_LIVE_VALUE);
seq_printf(file, " | op:%s arg_value:0x%.16llx", op_name, set_op->val);
}
}
/**
* kbasep_csf_kcpu_debugfs_print_queue() - Print debug data for a KCPU queue
*
* @file: The seq_file to print to.
* @queue: Pointer to the KCPU queue.
*/
static void kbasep_csf_sync_kcpu_debugfs_print_queue(struct seq_file *file,
struct kbase_kcpu_command_queue *queue)
{
char started_or_pending;
struct kbase_kcpu_command *cmd;
struct kbase_context *kctx = file->private;
size_t i;
if (WARN_ON(!queue))
return;
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
mutex_lock(&queue->lock);
for (i = 0; i != queue->num_pending_cmds; ++i) {
started_or_pending = ((i == 0) && queue->command_started) ? 'S' : 'P';
seq_printf(file, "queue:KCPU-%u-%u exec:%c ", kctx->id, queue->id,
started_or_pending);
cmd = &queue->commands[queue->start_offset + i];
switch (cmd->type) {
#if IS_ENABLED(CONFIG_SYNC_FILE)
case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL:
kbasep_csf_sync_print_kcpu_fence_wait_or_signal(file, cmd, "FENCE_SIGNAL");
break;
case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT:
kbasep_csf_sync_print_kcpu_fence_wait_or_signal(file, cmd, "FENCE_WAIT");
break;
#endif
case BASE_KCPU_COMMAND_TYPE_CQS_WAIT:
kbasep_csf_sync_print_kcpu_cqs_wait(file, cmd);
break;
case BASE_KCPU_COMMAND_TYPE_CQS_SET:
kbasep_csf_sync_print_kcpu_cqs_set(file, cmd);
break;
case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION:
kbasep_csf_sync_print_kcpu_cqs_wait_op(file, cmd);
break;
case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION:
kbasep_csf_sync_print_kcpu_cqs_set_op(file, cmd);
break;
default:
seq_puts(file, ", U, Unknown blocking command");
break;
}
seq_puts(file, "\n");
}
mutex_unlock(&queue->lock);
}
/**
* kbasep_csf_sync_kcpu_debugfs_show() - Print CSF KCPU queue sync info
*
* @file: The seq_file for printing to.
*
* Return: Negative error code or 0 on success.
*/
static int kbasep_csf_sync_kcpu_debugfs_show(struct seq_file *file)
{
struct kbase_context *kctx = file->private;
unsigned long queue_idx;
mutex_lock(&kctx->csf.kcpu_queues.lock);
seq_printf(file, "KCPU queues for ctx %u:\n", kctx->id);
queue_idx = find_first_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES);
while (queue_idx < KBASEP_MAX_KCPU_QUEUES) {
kbasep_csf_sync_kcpu_debugfs_print_queue(file,
kctx->csf.kcpu_queues.array[queue_idx]);
queue_idx = find_next_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES,
queue_idx + 1);
}
mutex_unlock(&kctx->csf.kcpu_queues.lock);
return 0;
}
/**
* kbasep_csf_get_move_immediate_value() - Get the immediate values for sync operations
* from a MOVE instruction.
*
* @move_cmd: Raw MOVE instruction.
* @sync_addr_reg: Register identifier from SYNC_* instruction.
* @compare_val_reg: Register identifier from SYNC_* instruction.
* @sync_val: Pointer to store CQS object address for sync operation.
* @compare_val: Pointer to store compare value for sync operation.
*
* Return: True if value is obtained by checking for correct register identifier,
* or false otherwise.
*/
static bool kbasep_csf_get_move_immediate_value(u64 move_cmd, u64 sync_addr_reg,
u64 compare_val_reg, u64 *sync_val,
u64 *compare_val)
{
u64 imm_mask;
/* Verify MOVE instruction and get immediate mask */
if (INSTR_OPCODE_GET(move_cmd) == GPU_CSF_MOVE32_OPCODE)
imm_mask = MOVE32_IMM_MASK;
else if (INSTR_OPCODE_GET(move_cmd) == GPU_CSF_MOVE_OPCODE)
imm_mask = MOVE_IMM_MASK;
else
/* Error return */
return false;
/* Verify value from MOVE instruction and assign to variable */
if (sync_addr_reg == MOVE_DEST_GET(move_cmd))
*sync_val = move_cmd & imm_mask;
else if (compare_val_reg == MOVE_DEST_GET(move_cmd))
*compare_val = move_cmd & imm_mask;
else
/* Error return */
return false;
return true;
}
/** kbasep_csf_read_ringbuffer_value() - Reads a u64 from the ringbuffer at a provided
* offset.
*
* @queue: Pointer to the queue.
* @ringbuff_offset: Ringbuffer offset.
*
* Return: the u64 in the ringbuffer at the desired offset.
*/
static u64 kbasep_csf_read_ringbuffer_value(struct kbase_queue *queue, u32 ringbuff_offset)
{
u64 page_off = ringbuff_offset >> PAGE_SHIFT;
u64 offset_within_page = ringbuff_offset & ~PAGE_MASK;
struct page *page = as_page(queue->queue_reg->gpu_alloc->pages[page_off]);
u64 *ringbuffer = kmap_atomic(page);
u64 value = ringbuffer[offset_within_page / sizeof(u64)];
kunmap_atomic(ringbuffer);
return value;
}
/**
* kbasep_csf_print_gpu_sync_op() - Print sync operation info for given sync command.
*
* @file: Pointer to debugfs seq_file file struct for writing output.
* @kctx: Pointer to kbase context.
* @queue: Pointer to the GPU command queue.
* @ringbuff_offset: Offset to index the ring buffer with, for the given sync command.
* (Useful for finding preceding MOVE commands)
* @sync_cmd: Entire u64 of the sync command, which has both sync address and
* comparison-value encoded in it.
* @type: Type of GPU sync command (e.g. SYNC_SET, SYNC_ADD, SYNC_WAIT).
* @is_64bit: Bool to indicate if operation is 64 bit (true) or 32 bit (false).
* @follows_wait: Bool to indicate if the operation follows at least one wait
* operation. Used to determine whether it's pending or started.
*/
static void kbasep_csf_print_gpu_sync_op(struct seq_file *file, struct kbase_context *kctx,
struct kbase_queue *queue, u32 ringbuff_offset,
u64 sync_cmd, enum debugfs_gpu_sync_type type,
bool is_64bit, bool follows_wait)
{
u64 sync_addr = 0, compare_val = 0, live_val = 0;
u64 move_cmd;
u8 sync_addr_reg, compare_val_reg, wait_condition = 0;
int err;
static const char *const gpu_sync_type_name[] = { "SYNC_WAIT", "SYNC_SET", "SYNC_ADD" };
static const char *const gpu_sync_type_op[] = {
"wait", /* This should never be printed, only included to simplify indexing */
"set", "add"
};
if (type >= NUM_DEBUGFS_GPU_SYNC_TYPES) {
dev_warn(kctx->kbdev->dev, "Expected GPU queue sync type is unknown!");
return;
}
/* We expect there to be at least 2 preceding MOVE instructions, and
* Base will always arrange for the 2 MOVE + SYNC instructions to be
* contiguously located, and is therefore never expected to be wrapped
* around the ringbuffer boundary.
*/
if (unlikely(ringbuff_offset < (2 * sizeof(u64)))) {
dev_warn(kctx->kbdev->dev,
"Unexpected wraparound detected between %s & MOVE instruction",
gpu_sync_type_name[type]);
return;
}
/* 1. Get Register identifiers from SYNC_* instruction */
sync_addr_reg = SYNC_SRC0_GET(sync_cmd);
compare_val_reg = SYNC_SRC1_GET(sync_cmd);
/* 2. Get values from first MOVE command */
ringbuff_offset -= sizeof(u64);
move_cmd = kbasep_csf_read_ringbuffer_value(queue, ringbuff_offset);
if (!kbasep_csf_get_move_immediate_value(move_cmd, sync_addr_reg, compare_val_reg,
&sync_addr, &compare_val))
return;
/* 3. Get values from next MOVE command */
ringbuff_offset -= sizeof(u64);
move_cmd = kbasep_csf_read_ringbuffer_value(queue, ringbuff_offset);
if (!kbasep_csf_get_move_immediate_value(move_cmd, sync_addr_reg, compare_val_reg,
&sync_addr, &compare_val))
return;
/* 4. Get CQS object value */
if (is_64bit)
err = kbasep_csf_debugfs_get_cqs_live_u64(kctx, sync_addr, &live_val);
else
err = kbasep_csf_debugfs_get_cqs_live_u32(kctx, sync_addr, (u32 *)(&live_val));
if (err)
return;
/* 5. Print info */
seq_printf(file, "queue:GPU-%u-%u-%u exec:%c cmd:%s ", kctx->id, queue->group->handle,
queue->csi_index, queue->enabled && !follows_wait ? 'S' : 'P',
gpu_sync_type_name[type]);
if (queue->group->csg_nr == KBASEP_CSG_NR_INVALID)
seq_puts(file, "slot:-");
else
seq_printf(file, "slot:%d", (int)queue->group->csg_nr);
seq_printf(file, " obj:0x%.16llx live_value:0x%.16llx | ", sync_addr, live_val);
if (type == DEBUGFS_GPU_SYNC_WAIT) {
wait_condition = SYNC_WAIT_CONDITION_GET(sync_cmd);
seq_printf(file, "op:%s ", kbasep_csf_sync_get_wait_op_name(wait_condition));
} else
seq_printf(file, "op:%s ", gpu_sync_type_op[type]);
seq_printf(file, "arg_value:0x%.16llx\n", compare_val);
}
/**
* kbasep_csf_dump_active_queue_sync_info() - Print GPU command queue sync information.
*
* @file: seq_file for printing to.
* @queue: Address of a GPU command queue to examine.
*
* This function will iterate through each command in the ring buffer of the given GPU queue from
* CS_EXTRACT, and if is a SYNC_* instruction it will attempt to decode the sync operation and
* print relevant information to the debugfs file.
* This function will stop iterating once the CS_INSERT address is reached by the cursor (i.e.
* when there are no more commands to view) or a number of consumed GPU CALL commands have
* been observed.
*/
static void kbasep_csf_dump_active_queue_sync_info(struct seq_file *file, struct kbase_queue *queue)
{
struct kbase_context *kctx;
u32 *addr;
u64 cs_extract, cs_insert, instr, cursor;
bool follows_wait = false;
int nr_calls = 0;
if (!queue)
return;
kctx = queue->kctx;
addr = (u32 *)queue->user_io_addr;
cs_insert = addr[CS_INSERT_LO / 4] | ((u64)addr[CS_INSERT_HI / 4] << 32);
addr = (u32 *)(queue->user_io_addr + PAGE_SIZE);
cs_extract = addr[CS_EXTRACT_LO / 4] | ((u64)addr[CS_EXTRACT_HI / 4] << 32);
cursor = cs_extract;
if (!is_power_of_2(queue->size)) {
dev_warn(kctx->kbdev->dev, "GPU queue %u size of %u not a power of 2",
queue->csi_index, queue->size);
return;
}
while ((cursor < cs_insert) && (nr_calls < MAX_NR_GPU_CALLS)) {
bool instr_is_64_bit = false;
/* Calculate offset into ringbuffer from the absolute cursor,
* by finding the remainder of the cursor divided by the
* ringbuffer size. The ringbuffer size is guaranteed to be
* a power of 2, so the remainder can be calculated without an
* explicit modulo. queue->size - 1 is the ringbuffer mask.
*/
u32 cursor_ringbuff_offset = (u32)(cursor & (queue->size - 1));
/* Find instruction that cursor is currently on */
instr = kbasep_csf_read_ringbuffer_value(queue, cursor_ringbuff_offset);
switch (INSTR_OPCODE_GET(instr)) {
case GPU_CSF_SYNC_ADD64_OPCODE:
case GPU_CSF_SYNC_SET64_OPCODE:
case GPU_CSF_SYNC_WAIT64_OPCODE:
instr_is_64_bit = true;
default:
break;
}
switch (INSTR_OPCODE_GET(instr)) {
case GPU_CSF_SYNC_ADD_OPCODE:
case GPU_CSF_SYNC_ADD64_OPCODE:
kbasep_csf_print_gpu_sync_op(file, kctx, queue, cursor_ringbuff_offset,
instr, DEBUGFS_GPU_SYNC_ADD, instr_is_64_bit,
follows_wait);
break;
case GPU_CSF_SYNC_SET_OPCODE:
case GPU_CSF_SYNC_SET64_OPCODE:
kbasep_csf_print_gpu_sync_op(file, kctx, queue, cursor_ringbuff_offset,
instr, DEBUGFS_GPU_SYNC_SET, instr_is_64_bit,
follows_wait);
break;
case GPU_CSF_SYNC_WAIT_OPCODE:
case GPU_CSF_SYNC_WAIT64_OPCODE:
kbasep_csf_print_gpu_sync_op(file, kctx, queue, cursor_ringbuff_offset,
instr, DEBUGFS_GPU_SYNC_WAIT, instr_is_64_bit,
follows_wait);
follows_wait = true; /* Future commands will follow at least one wait */
break;
case GPU_CSF_CALL_OPCODE:
nr_calls++;
/* Fallthrough */
default:
/* Unrecognized command, skip past it */
break;
}
cursor += sizeof(u64);
}
}
/**
* kbasep_csf_dump_active_group_sync_state() - Prints SYNC commands in all GPU queues of
* the provided queue group.
*
* @file: seq_file for printing to.
* @group: Address of a GPU command group to iterate through.
*
* This function will iterate through each queue in the provided GPU queue group and
* print its SYNC related commands.
*/
static void kbasep_csf_dump_active_group_sync_state(struct seq_file *file,
struct kbase_queue_group *const group)
{
struct kbase_context *kctx = file->private;
unsigned int i;
seq_printf(file, "GPU queues for group %u (slot %d) of ctx %d_%d\n", group->handle,
group->csg_nr, kctx->tgid, kctx->id);
for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++)
kbasep_csf_dump_active_queue_sync_info(file, group->bound_queues[i]);
}
/**
* kbasep_csf_sync_gpu_debugfs_show() - Print CSF GPU queue sync info
*
* @file: The seq_file for printing to.
*
* Return: Negative error code or 0 on success.
*/
static int kbasep_csf_sync_gpu_debugfs_show(struct seq_file *file)
{
u32 gr;
struct kbase_context *kctx = file->private;
struct kbase_device *kbdev;
if (WARN_ON(!kctx))
return -EINVAL;
kbdev = kctx->kbdev;
kbase_csf_scheduler_lock(kbdev);
kbase_csf_debugfs_update_active_groups_status(kbdev);
for (gr = 0; gr < kbdev->csf.global_iface.group_num; gr++) {
struct kbase_queue_group *const group =
kbdev->csf.scheduler.csg_slots[gr].resident_group;
if (!group || group->kctx != kctx)
continue;
kbasep_csf_dump_active_group_sync_state(file, group);
}
kbase_csf_scheduler_unlock(kbdev);
return 0;
}
/**
* kbasep_csf_sync_debugfs_show() - Print CSF queue sync information
*
* @file: The seq_file for printing to.
* @data: The debugfs dentry private data, a pointer to kbase_context.
*
* Return: Negative error code or 0 on success.
*/
static int kbasep_csf_sync_debugfs_show(struct seq_file *file, void *data)
{
seq_printf(file, "MALI_CSF_SYNC_DEBUGFS_VERSION: v%u\n", MALI_CSF_SYNC_DEBUGFS_VERSION);
kbasep_csf_sync_kcpu_debugfs_show(file);
kbasep_csf_sync_gpu_debugfs_show(file);
return 0;
}
static int kbasep_csf_sync_debugfs_open(struct inode *in, struct file *file)
{
return single_open(file, kbasep_csf_sync_debugfs_show, in->i_private);
}
static const struct file_operations kbasep_csf_sync_debugfs_fops = {
.open = kbasep_csf_sync_debugfs_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
/**
* kbase_csf_sync_debugfs_init() - Initialise debugfs file.
*
* @kctx: Kernel context pointer.
*/
void kbase_csf_sync_debugfs_init(struct kbase_context *kctx)
{
struct dentry *file;
const mode_t mode = 0444;
if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry)))
return;
file = debugfs_create_file("csf_sync", mode, kctx->kctx_dentry, kctx,
&kbasep_csf_sync_debugfs_fops);
if (IS_ERR_OR_NULL(file))
dev_warn(kctx->kbdev->dev, "Unable to create CSF Sync debugfs entry");
}
#else
/*
* Stub functions for when debugfs is disabled
*/
void kbase_csf_sync_debugfs_init(struct kbase_context *kctx)
{
}
#endif /* CONFIG_DEBUG_FS */

View File

@@ -0,0 +1,37 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU license.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
*/
#ifndef _KBASE_CSF_SYNC_DEBUGFS_H_
#define _KBASE_CSF_SYNC_DEBUGFS_H_
/* Forward declaration */
struct kbase_context;
#define MALI_CSF_SYNC_DEBUGFS_VERSION 0
/**
* kbase_csf_sync_debugfs_init() - Create a debugfs entry for CSF queue sync info
*
* @kctx: The kbase_context for which to create the debugfs entry
*/
void kbase_csf_sync_debugfs_init(struct kbase_context *kctx);
#endif /* _KBASE_CSF_SYNC_DEBUGFS_H_ */

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -101,7 +101,7 @@ static struct kbase_csf_tiler_heap_chunk *get_last_chunk(
* @kctx: kbase context the chunk belongs to.
* @chunk: The chunk whose external mappings are going to be removed.
*
* This function marks the region as DONT NEED. Along with KBASE_REG_NO_USER_FREE, this indicates
* This function marks the region as DONT NEED. Along with NO_USER_FREE, this indicates
* that the VA region is owned by the tiler heap and could potentially be shrunk at any time. Other
* parts of kbase outside of tiler heap management should not take references on its physical
* pages, and should not modify them.
@@ -227,12 +227,14 @@ static void remove_unlinked_chunk(struct kbase_context *kctx,
kbase_gpu_vm_lock(kctx);
kbase_vunmap(kctx, &chunk->map);
/* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT
* regions), and so we must clear that flag too before freeing
* regions), and so we must clear that flag too before freeing.
* For "no user free count", we check that the count is 1 as it is a shrinkable region;
* no other code part within kbase can take a reference to it.
*/
WARN_ON(atomic_read(&chunk->region->no_user_free_count) > 1);
kbase_va_region_no_user_free_dec(chunk->region);
#if !defined(CONFIG_MALI_VECTOR_DUMP)
chunk->region->flags &= ~(KBASE_REG_NO_USER_FREE | KBASE_REG_DONT_NEED);
#else
chunk->region->flags &= ~KBASE_REG_NO_USER_FREE;
chunk->region->flags &= ~KBASE_REG_DONT_NEED;
#endif
kbase_mem_free_region(kctx, chunk->region);
kbase_gpu_vm_unlock(kctx);
@@ -297,7 +299,7 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context *
kbase_gpu_vm_lock(kctx);
/* Some checks done here as KBASE_REG_NO_USER_FREE still allows such things to be made
/* Some checks done here as NO_USER_FREE still allows such things to be made
* whilst we had dropped the region lock
*/
if (unlikely(atomic_read(&chunk->region->gpu_alloc->kernel_mappings) > 0)) {
@@ -305,32 +307,45 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context *
goto unroll_region;
}
/* There is a race condition with regard to KBASE_REG_DONT_NEED, where another
* thread can have the "no user free" refcount increased between kbase_mem_alloc
* and kbase_gpu_vm_lock (above) and before KBASE_REG_DONT_NEED is set by
* remove_external_chunk_mappings (below).
*
* It should be fine and not a security risk if we let the region leak till
* region tracker termination in such a case.
*/
if (unlikely(atomic_read(&chunk->region->no_user_free_count) > 1)) {
dev_err(kctx->kbdev->dev, "Chunk region has no_user_free_count > 1!\n");
goto unroll_region;
}
/* Whilst we can be sure of a number of other restrictions due to BASEP_MEM_NO_USER_FREE
* being requested, it's useful to document in code what those restrictions are, and ensure
* they remain in place in future.
*/
if (WARN(!chunk->region->gpu_alloc,
"KBASE_REG_NO_USER_FREE chunks should not have had their alloc freed")) {
"NO_USER_FREE chunks should not have had their alloc freed")) {
goto unroll_region;
}
if (WARN(chunk->region->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE,
"KBASE_REG_NO_USER_FREE chunks should not have been freed and then reallocated as imported/non-native regions")) {
"NO_USER_FREE chunks should not have been freed and then reallocated as imported/non-native regions")) {
goto unroll_region;
}
if (WARN((chunk->region->flags & KBASE_REG_ACTIVE_JIT_ALLOC),
"KBASE_REG_NO_USER_FREE chunks should not have been freed and then reallocated as JIT regions")) {
"NO_USER_FREE chunks should not have been freed and then reallocated as JIT regions")) {
goto unroll_region;
}
if (WARN((chunk->region->flags & KBASE_REG_DONT_NEED),
"KBASE_REG_NO_USER_FREE chunks should not have been made ephemeral")) {
"NO_USER_FREE chunks should not have been made ephemeral")) {
goto unroll_region;
}
if (WARN(atomic_read(&chunk->region->cpu_alloc->gpu_mappings) > 1,
"KBASE_REG_NO_USER_FREE chunks should not have been aliased")) {
"NO_USER_FREE chunks should not have been aliased")) {
goto unroll_region;
}
@@ -344,16 +359,21 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context *
remove_external_chunk_mappings(kctx, chunk);
kbase_gpu_vm_unlock(kctx);
/* If page migration is enabled, we don't want to migrate tiler heap pages.
* This does not change if the constituent pages are already marked as isolated.
*/
if (kbase_page_migration_enabled)
kbase_set_phy_alloc_page_status(chunk->region->gpu_alloc, NOT_MOVABLE);
return chunk;
unroll_region:
/* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT
* regions), and so we must clear that flag too before freeing.
*/
kbase_va_region_no_user_free_dec(chunk->region);
#if !defined(CONFIG_MALI_VECTOR_DUMP)
chunk->region->flags &= ~(KBASE_REG_NO_USER_FREE | KBASE_REG_DONT_NEED);
#else
chunk->region->flags &= ~KBASE_REG_NO_USER_FREE;
chunk->region->flags &= ~KBASE_REG_DONT_NEED;
#endif
kbase_mem_free_region(kctx, chunk->region);
kbase_gpu_vm_unlock(kctx);
@@ -511,7 +531,7 @@ static void delete_heap(struct kbase_csf_tiler_heap *heap)
if (heap->buf_desc_reg) {
kbase_vunmap(kctx, &heap->buf_desc_map);
kbase_gpu_vm_lock(kctx);
heap->buf_desc_reg->flags &= ~KBASE_REG_NO_USER_FREE;
kbase_va_region_no_user_free_dec(heap->buf_desc_reg);
kbase_gpu_vm_unlock(kctx);
}
@@ -629,8 +649,8 @@ static bool kbasep_is_buffer_descriptor_region_suitable(struct kbase_context *co
return false;
}
if (!(reg->flags & KBASE_REG_CPU_RD) || (reg->flags & KBASE_REG_DONT_NEED) ||
(reg->flags & KBASE_REG_PF_GROW) || (reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC)) {
if (!(reg->flags & KBASE_REG_CPU_RD) || kbase_is_region_shrinkable(reg) ||
(reg->flags & KBASE_REG_PF_GROW)) {
dev_err(kctx->kbdev->dev, "Region has invalid flags: 0x%lX!\n", reg->flags);
return false;
}
@@ -719,14 +739,18 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_
/* If we don't prevent userspace from unmapping this, we may run into
* use-after-free, as we don't check for the existence of the region throughout.
*/
buf_desc_reg->flags |= KBASE_REG_NO_USER_FREE;
heap->buf_desc_va = buf_desc_va;
heap->buf_desc_reg = buf_desc_reg;
kbase_va_region_no_user_free_inc(buf_desc_reg);
vmap_ptr = kbase_vmap_reg(kctx, buf_desc_reg, buf_desc_va, TILER_BUF_DESC_SIZE,
KBASE_REG_CPU_RD, &heap->buf_desc_map,
KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING);
if (kbase_page_migration_enabled)
kbase_set_phy_alloc_page_status(buf_desc_reg->gpu_alloc, NOT_MOVABLE);
kbase_gpu_vm_unlock(kctx);
if (unlikely(!vmap_ptr)) {
@@ -811,7 +835,7 @@ heap_context_alloc_failed:
buf_desc_vmap_failed:
if (heap->buf_desc_reg) {
kbase_gpu_vm_lock(kctx);
heap->buf_desc_reg->flags &= ~KBASE_REG_NO_USER_FREE;
kbase_va_region_no_user_free_dec(heap->buf_desc_reg);
kbase_gpu_vm_unlock(kctx);
}
buf_desc_not_suitable:
@@ -866,6 +890,25 @@ int kbase_csf_tiler_heap_term(struct kbase_context *const kctx,
return err;
}
/**
* validate_allocation_request - Check whether the chunk allocation request
* received on tiler OOM should be handled at
* current time.
*
* @heap: The tiler heap the OOM is associated with
* @nr_in_flight: Number of fragment jobs in flight
* @pending_frag_count: Number of pending fragment jobs
*
* Context: must hold the tiler heap lock to guarantee its lifetime
*
* Return:
* * 0 - allowed to allocate an additional chunk
* * -EINVAL - invalid
* * -EBUSY - there are fragment jobs still in flight, which may free chunks
* after completing
* * -ENOMEM - the targeted number of in-flight chunks has been reached and
* no new ones will be allocated
*/
static int validate_allocation_request(struct kbase_csf_tiler_heap *heap, u32 nr_in_flight,
u32 pending_frag_count)
{
@@ -925,7 +968,12 @@ int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx,
err = validate_allocation_request(heap, nr_in_flight, pending_frag_count);
if (unlikely(err)) {
dev_err(kctx->kbdev->dev,
/* The allocation request can be legitimate, but be invoked on a heap
* that has already reached the maximum pre-configured capacity. This
* is useful debug information, but should not be treated as an error,
* since the request will be re-sent at a later point.
*/
dev_dbg(kctx->kbdev->dev,
"Not allocating new chunk for heap 0x%llX due to current heap state (err %d)",
gpu_heap_va, err);
mutex_unlock(&kctx->csf.tiler_heaps.lock);

View File

@@ -346,7 +346,11 @@ void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev)
reclaim->batch = HEAP_SHRINKER_BATCH;
#if !defined(CONFIG_MALI_VECTOR_DUMP)
#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE
register_shrinker(reclaim);
#else
register_shrinker(reclaim, "mali-csf-tiler-heap");
#endif
#endif
}

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -31,9 +31,7 @@
#include "mali_kbase_pm.h"
#include "mali_kbase_hwaccess_time.h"
#include <linux/gcd.h>
#include <linux/math64.h>
#include <asm/arch_timer.h>
#if IS_ENABLED(CONFIG_DEBUG_FS)
#include "tl/mali_kbase_timeline_priv.h"
@@ -97,81 +95,6 @@ void kbase_csf_tl_reader_debugfs_init(struct kbase_device *kbdev)
}
#endif
/**
* get_cpu_gpu_time() - Get current CPU and GPU timestamps.
*
* @kbdev: Kbase device.
* @cpu_ts: Output CPU timestamp.
* @gpu_ts: Output GPU timestamp.
* @gpu_cycle: Output GPU cycle counts.
*/
static void get_cpu_gpu_time(
struct kbase_device *kbdev,
u64 *cpu_ts,
u64 *gpu_ts,
u64 *gpu_cycle)
{
struct timespec64 ts;
kbase_pm_context_active(kbdev);
kbase_backend_get_gpu_time(kbdev, gpu_cycle, gpu_ts, &ts);
kbase_pm_context_idle(kbdev);
if (cpu_ts)
*cpu_ts = ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
}
/**
* kbase_ts_converter_init() - Initialize system timestamp converter.
*
* @self: System Timestamp Converter instance.
* @kbdev: Kbase device pointer
*
* Return: Zero on success, -1 otherwise.
*/
static int kbase_ts_converter_init(
struct kbase_ts_converter *self,
struct kbase_device *kbdev)
{
u64 cpu_ts = 0;
u64 gpu_ts = 0;
u64 freq;
u64 common_factor;
get_cpu_gpu_time(kbdev, &cpu_ts, &gpu_ts, NULL);
freq = arch_timer_get_cntfrq();
if (!freq) {
dev_warn(kbdev->dev, "arch_timer_get_rate() is zero!");
return -1;
}
common_factor = gcd(NSEC_PER_SEC, freq);
self->multiplier = div64_u64(NSEC_PER_SEC, common_factor);
self->divisor = div64_u64(freq, common_factor);
self->offset =
cpu_ts - div64_u64(gpu_ts * self->multiplier, self->divisor);
return 0;
}
/**
* kbase_ts_converter_convert() - Convert GPU timestamp to CPU timestamp.
*
* @self: System Timestamp Converter instance.
* @gpu_ts: System timestamp value to converter.
*
* Return: The CPU timestamp.
*/
static u64 __maybe_unused
kbase_ts_converter_convert(const struct kbase_ts_converter *self, u64 gpu_ts)
{
return div64_u64(gpu_ts * self->multiplier, self->divisor) +
self->offset;
}
/**
* tl_reader_overflow_notify() - Emit stream overflow tracepoint.
*
@@ -322,8 +245,8 @@ int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self)
{
struct kbase_csffw_tl_message *msg =
(struct kbase_csffw_tl_message *) csffw_data_it;
msg->timestamp = kbase_ts_converter_convert(&self->ts_converter,
msg->timestamp);
msg->timestamp =
kbase_backend_time_convert_gpu_to_cpu(kbdev, msg->timestamp);
}
/* Copy the message out to the tl_stream. */
@@ -397,9 +320,6 @@ static int tl_reader_init_late(
return -1;
}
if (kbase_ts_converter_init(&self->ts_converter, kbdev))
return -1;
self->kbdev = kbdev;
self->trace_buffer = tb;
self->tl_header.data = hdr;

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -39,37 +39,6 @@ struct firmware_trace_buffer;
struct kbase_tlstream;
struct kbase_device;
/**
* struct kbase_ts_converter - System timestamp to CPU timestamp converter state.
*
* @multiplier: Numerator of the converter's fraction.
* @divisor: Denominator of the converter's fraction.
* @offset: Converter's offset term.
*
* According to Generic timer spec, system timer:
* - Increments at a fixed frequency
* - Starts operating from zero
*
* Hence CPU time is a linear function of System Time.
*
* CPU_ts = alpha * SYS_ts + beta
*
* Where
* - alpha = 10^9/SYS_ts_freq
* - beta is calculated by two timer samples taken at the same time:
* beta = CPU_ts_s - SYS_ts_s * alpha
*
* Since alpha is a rational number, we minimizing possible
* rounding error by simplifying the ratio. Thus alpha is stored
* as a simple `multiplier / divisor` ratio.
*
*/
struct kbase_ts_converter {
u64 multiplier;
u64 divisor;
s64 offset;
};
/**
* struct kbase_csf_tl_reader - CSFFW timeline reader state.
*
@@ -106,7 +75,6 @@ struct kbase_csf_tl_reader {
size_t size;
size_t btc;
} tl_header;
struct kbase_ts_converter ts_converter;
bool got_first_event;
bool is_active;

View File

@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
# (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
# (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,7 @@ mali_kbase-y += debug/mali_kbase_debug_ktrace.o
ifeq ($(CONFIG_MALI_CSF_SUPPORT),y)
mali_kbase-y += debug/backend/mali_kbase_debug_ktrace_csf.o
mali_kbase-$(CONFIG_MALI_CORESIGHT) += debug/backend/mali_kbase_debug_coresight_csf.o
else
mali_kbase-y += debug/backend/mali_kbase_debug_ktrace_jm.o
endif

View File

@@ -0,0 +1,851 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU license.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
*/
#include <mali_kbase.h>
#include <linux/slab.h>
#include <csf/mali_kbase_csf_registers.h>
#include <csf/mali_kbase_csf_firmware.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
#include <linux/mali_kbase_debug_coresight_csf.h>
#include <debug/backend/mali_kbase_debug_coresight_internal_csf.h>
static const char *coresight_state_to_string(enum kbase_debug_coresight_csf_state state)
{
switch (state) {
case KBASE_DEBUG_CORESIGHT_CSF_DISABLED:
return "DISABLED";
case KBASE_DEBUG_CORESIGHT_CSF_ENABLED:
return "ENABLED";
default:
break;
}
return "UNKNOWN";
}
static bool validate_reg_addr(struct kbase_debug_coresight_csf_client *client,
struct kbase_device *kbdev, u32 reg_addr, u8 op_type)
{
int i;
if (reg_addr & 0x3) {
dev_err(kbdev->dev, "Invalid operation %d: reg_addr (0x%x) not 32bit aligned",
op_type, reg_addr);
return false;
}
for (i = 0; i < client->nr_ranges; i++) {
struct kbase_debug_coresight_csf_address_range *range = &client->addr_ranges[i];
if ((range->start <= reg_addr) && (reg_addr <= range->end))
return true;
}
dev_err(kbdev->dev, "Invalid operation %d: reg_addr (0x%x) not in client range", op_type,
reg_addr);
return false;
}
static bool validate_op(struct kbase_debug_coresight_csf_client *client,
struct kbase_debug_coresight_csf_op *op)
{
struct kbase_device *kbdev;
u32 reg;
if (!op)
return false;
if (!client)
return false;
kbdev = (struct kbase_device *)client->drv_data;
switch (op->type) {
case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_NOP:
return true;
case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM:
if (validate_reg_addr(client, kbdev, op->op.write_imm.reg_addr, op->type))
return true;
break;
case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM_RANGE:
for (reg = op->op.write_imm_range.reg_start; reg <= op->op.write_imm_range.reg_end;
reg += sizeof(u32)) {
if (!validate_reg_addr(client, kbdev, reg, op->type))
return false;
}
return true;
case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE:
if (!op->op.write.ptr) {
dev_err(kbdev->dev, "Invalid operation %d: ptr not set", op->type);
break;
}
if (validate_reg_addr(client, kbdev, op->op.write.reg_addr, op->type))
return true;
break;
case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_READ:
if (!op->op.read.ptr) {
dev_err(kbdev->dev, "Invalid operation %d: ptr not set", op->type);
break;
}
if (validate_reg_addr(client, kbdev, op->op.read.reg_addr, op->type))
return true;
break;
case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_POLL:
if (validate_reg_addr(client, kbdev, op->op.poll.reg_addr, op->type))
return true;
break;
case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_AND:
fallthrough;
case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_OR:
fallthrough;
case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_XOR:
fallthrough;
case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_NOT:
if (op->op.bitw.ptr != NULL)
return true;
dev_err(kbdev->dev, "Invalid bitwise operation pointer");
break;
default:
dev_err(kbdev->dev, "Invalid operation %d", op->type);
break;
}
return false;
}
static bool validate_seq(struct kbase_debug_coresight_csf_client *client,
struct kbase_debug_coresight_csf_sequence *seq)
{
struct kbase_debug_coresight_csf_op *ops = seq->ops;
int nr_ops = seq->nr_ops;
int i;
for (i = 0; i < nr_ops; i++) {
if (!validate_op(client, &ops[i]))
return false;
}
return true;
}
static int execute_op(struct kbase_device *kbdev, struct kbase_debug_coresight_csf_op *op)
{
int result = -EINVAL;
u32 reg;
dev_dbg(kbdev->dev, "Execute operation %d", op->type);
switch (op->type) {
case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_NOP:
result = 0;
break;
case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM:
result = kbase_csf_firmware_mcu_register_write(kbdev, op->op.write.reg_addr,
op->op.write_imm.val);
break;
case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM_RANGE:
for (reg = op->op.write_imm_range.reg_start; reg <= op->op.write_imm_range.reg_end;
reg += sizeof(u32)) {
result = kbase_csf_firmware_mcu_register_write(kbdev, reg,
op->op.write_imm_range.val);
if (!result)
break;
}
break;
case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE:
result = kbase_csf_firmware_mcu_register_write(kbdev, op->op.write.reg_addr,
*op->op.write.ptr);
break;
case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_READ:
result = kbase_csf_firmware_mcu_register_read(kbdev, op->op.read.reg_addr,
op->op.read.ptr);
break;
case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_POLL:
result = kbase_csf_firmware_mcu_register_poll(kbdev, op->op.poll.reg_addr,
op->op.poll.mask, op->op.poll.val);
break;
case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_AND:
*op->op.bitw.ptr &= op->op.bitw.val;
result = 0;
break;
case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_OR:
*op->op.bitw.ptr |= op->op.bitw.val;
result = 0;
break;
case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_XOR:
*op->op.bitw.ptr ^= op->op.bitw.val;
result = 0;
break;
case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_NOT:
*op->op.bitw.ptr = ~(*op->op.bitw.ptr);
result = 0;
break;
default:
dev_err(kbdev->dev, "Invalid operation %d", op->type);
break;
}
return result;
}
static int coresight_config_enable(struct kbase_device *kbdev,
struct kbase_debug_coresight_csf_config *config)
{
int ret = 0;
int i;
if (!config)
return -EINVAL;
if (config->state == KBASE_DEBUG_CORESIGHT_CSF_ENABLED)
return ret;
for (i = 0; config->enable_seq && !ret && i < config->enable_seq->nr_ops; i++)
ret = execute_op(kbdev, &config->enable_seq->ops[i]);
if (!ret) {
dev_dbg(kbdev->dev, "Coresight config (0x%pK) state transition: %s to %s", config,
coresight_state_to_string(config->state),
coresight_state_to_string(KBASE_DEBUG_CORESIGHT_CSF_ENABLED));
config->state = KBASE_DEBUG_CORESIGHT_CSF_ENABLED;
}
/* Always assign the return code during config enable.
* It gets propagated when calling config disable.
*/
config->error = ret;
return ret;
}
static int coresight_config_disable(struct kbase_device *kbdev,
struct kbase_debug_coresight_csf_config *config)
{
int ret = 0;
int i;
if (!config)
return -EINVAL;
if (config->state == KBASE_DEBUG_CORESIGHT_CSF_DISABLED)
return ret;
for (i = 0; config->disable_seq && !ret && i < config->disable_seq->nr_ops; i++)
ret = execute_op(kbdev, &config->disable_seq->ops[i]);
if (!ret) {
dev_dbg(kbdev->dev, "Coresight config (0x%pK) state transition: %s to %s", config,
coresight_state_to_string(config->state),
coresight_state_to_string(KBASE_DEBUG_CORESIGHT_CSF_DISABLED));
config->state = KBASE_DEBUG_CORESIGHT_CSF_DISABLED;
} else {
/* Only assign the error if ret is not 0.
* As we don't want to overwrite an error from config enable
*/
if (!config->error)
config->error = ret;
}
return ret;
}
void *kbase_debug_coresight_csf_register(void *drv_data,
struct kbase_debug_coresight_csf_address_range *ranges,
int nr_ranges)
{
struct kbase_debug_coresight_csf_client *client, *client_entry;
struct kbase_device *kbdev;
unsigned long flags;
int k;
if (unlikely(!drv_data)) {
pr_err("NULL drv_data");
return NULL;
}
kbdev = (struct kbase_device *)drv_data;
if (unlikely(!ranges)) {
dev_err(kbdev->dev, "NULL ranges");
return NULL;
}
if (unlikely(!nr_ranges)) {
dev_err(kbdev->dev, "nr_ranges is 0");
return NULL;
}
for (k = 0; k < nr_ranges; k++) {
if (ranges[k].end < ranges[k].start) {
dev_err(kbdev->dev, "Invalid address ranges 0x%08x - 0x%08x",
ranges[k].start, ranges[k].end);
return NULL;
}
}
client = kzalloc(sizeof(struct kbase_debug_coresight_csf_client), GFP_KERNEL);
if (!client)
return NULL;
spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
list_for_each_entry(client_entry, &kbdev->csf.coresight.clients, link) {
struct kbase_debug_coresight_csf_address_range *client_ranges =
client_entry->addr_ranges;
int i;
for (i = 0; i < client_entry->nr_ranges; i++) {
int j;
for (j = 0; j < nr_ranges; j++) {
if ((ranges[j].start < client_ranges[i].end) &&
(client_ranges[i].start < ranges[j].end)) {
spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
kfree(client);
dev_err(kbdev->dev,
"Client with range 0x%08x - 0x%08x already present at address range 0x%08x - 0x%08x",
client_ranges[i].start, client_ranges[i].end,
ranges[j].start, ranges[j].end);
return NULL;
}
}
}
}
client->drv_data = drv_data;
client->addr_ranges = ranges;
client->nr_ranges = nr_ranges;
list_add(&client->link, &kbdev->csf.coresight.clients);
spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
return client;
}
EXPORT_SYMBOL(kbase_debug_coresight_csf_register);
void kbase_debug_coresight_csf_unregister(void *client_data)
{
struct kbase_debug_coresight_csf_client *client;
struct kbase_debug_coresight_csf_config *config_entry;
struct kbase_device *kbdev;
unsigned long flags;
bool retry = true;
if (unlikely(!client_data)) {
pr_err("NULL client");
return;
}
client = (struct kbase_debug_coresight_csf_client *)client_data;
kbdev = (struct kbase_device *)client->drv_data;
if (unlikely(!kbdev)) {
pr_err("NULL drv_data in client");
return;
}
/* check for active config from client */
spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
list_del_init(&client->link);
while (retry && !list_empty(&kbdev->csf.coresight.configs)) {
retry = false;
list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) {
if (config_entry->client == client) {
spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
kbase_debug_coresight_csf_config_free(config_entry);
spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
retry = true;
break;
}
}
}
spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
kfree(client);
}
EXPORT_SYMBOL(kbase_debug_coresight_csf_unregister);
void *
kbase_debug_coresight_csf_config_create(void *client_data,
struct kbase_debug_coresight_csf_sequence *enable_seq,
struct kbase_debug_coresight_csf_sequence *disable_seq)
{
struct kbase_debug_coresight_csf_client *client;
struct kbase_debug_coresight_csf_config *config;
struct kbase_device *kbdev;
if (unlikely(!client_data)) {
pr_err("NULL client");
return NULL;
}
client = (struct kbase_debug_coresight_csf_client *)client_data;
kbdev = (struct kbase_device *)client->drv_data;
if (unlikely(!kbdev)) {
pr_err("NULL drv_data in client");
return NULL;
}
if (enable_seq) {
if (!validate_seq(client, enable_seq)) {
dev_err(kbdev->dev, "Invalid enable_seq");
return NULL;
}
}
if (disable_seq) {
if (!validate_seq(client, disable_seq)) {
dev_err(kbdev->dev, "Invalid disable_seq");
return NULL;
}
}
config = kzalloc(sizeof(struct kbase_debug_coresight_csf_config), GFP_KERNEL);
if (WARN_ON(!client))
return NULL;
config->client = client;
config->enable_seq = enable_seq;
config->disable_seq = disable_seq;
config->error = 0;
config->state = KBASE_DEBUG_CORESIGHT_CSF_DISABLED;
INIT_LIST_HEAD(&config->link);
return config;
}
EXPORT_SYMBOL(kbase_debug_coresight_csf_config_create);
void kbase_debug_coresight_csf_config_free(void *config_data)
{
struct kbase_debug_coresight_csf_config *config;
if (unlikely(!config_data)) {
pr_err("NULL config");
return;
}
config = (struct kbase_debug_coresight_csf_config *)config_data;
kbase_debug_coresight_csf_config_disable(config);
kfree(config);
}
EXPORT_SYMBOL(kbase_debug_coresight_csf_config_free);
int kbase_debug_coresight_csf_config_enable(void *config_data)
{
struct kbase_debug_coresight_csf_config *config;
struct kbase_debug_coresight_csf_client *client;
struct kbase_device *kbdev;
struct kbase_debug_coresight_csf_config *config_entry;
unsigned long flags;
int ret = 0;
if (unlikely(!config_data)) {
pr_err("NULL config");
return -EINVAL;
}
config = (struct kbase_debug_coresight_csf_config *)config_data;
client = (struct kbase_debug_coresight_csf_client *)config->client;
if (unlikely(!client)) {
pr_err("NULL client in config");
return -EINVAL;
}
kbdev = (struct kbase_device *)client->drv_data;
if (unlikely(!kbdev)) {
pr_err("NULL drv_data in client");
return -EINVAL;
}
/* Check to prevent double entry of config */
spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) {
if (config_entry == config) {
spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
dev_err(kbdev->dev, "Config already enabled");
return -EINVAL;
}
}
spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
kbase_csf_scheduler_lock(kbdev);
kbase_csf_scheduler_spin_lock(kbdev, &flags);
/* Check the state of Scheduler to confirm the desired state of MCU */
if (((kbdev->csf.scheduler.state != SCHED_SUSPENDED) &&
(kbdev->csf.scheduler.state != SCHED_SLEEPING) &&
!kbase_csf_scheduler_protected_mode_in_use(kbdev)) ||
kbase_pm_get_policy(kbdev) == &kbase_pm_always_on_policy_ops) {
kbase_csf_scheduler_spin_unlock(kbdev, flags);
/* Wait for MCU to reach the stable ON state */
ret = kbase_pm_wait_for_desired_state(kbdev);
if (ret)
dev_err(kbdev->dev,
"Wait for PM state failed when enabling coresight config");
else
ret = coresight_config_enable(kbdev, config);
kbase_csf_scheduler_spin_lock(kbdev, &flags);
}
/* Add config to next enable sequence */
if (!ret) {
spin_lock(&kbdev->csf.coresight.lock);
list_add(&config->link, &kbdev->csf.coresight.configs);
spin_unlock(&kbdev->csf.coresight.lock);
}
kbase_csf_scheduler_spin_unlock(kbdev, flags);
kbase_csf_scheduler_unlock(kbdev);
return ret;
}
EXPORT_SYMBOL(kbase_debug_coresight_csf_config_enable);
int kbase_debug_coresight_csf_config_disable(void *config_data)
{
struct kbase_debug_coresight_csf_config *config;
struct kbase_debug_coresight_csf_client *client;
struct kbase_device *kbdev;
struct kbase_debug_coresight_csf_config *config_entry;
bool found_in_list = false;
unsigned long flags;
int ret = 0;
if (unlikely(!config_data)) {
pr_err("NULL config");
return -EINVAL;
}
config = (struct kbase_debug_coresight_csf_config *)config_data;
/* Exit early if not enabled prior */
if (list_empty(&config->link))
return ret;
client = (struct kbase_debug_coresight_csf_client *)config->client;
if (unlikely(!client)) {
pr_err("NULL client in config");
return -EINVAL;
}
kbdev = (struct kbase_device *)client->drv_data;
if (unlikely(!kbdev)) {
pr_err("NULL drv_data in client");
return -EINVAL;
}
/* Check if the config is in the correct list */
spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) {
if (config_entry == config) {
found_in_list = true;
break;
}
}
spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
if (!found_in_list) {
dev_err(kbdev->dev, "Config looks corrupted");
return -EINVAL;
}
kbase_csf_scheduler_lock(kbdev);
kbase_csf_scheduler_spin_lock(kbdev, &flags);
/* Check the state of Scheduler to confirm the desired state of MCU */
if (((kbdev->csf.scheduler.state != SCHED_SUSPENDED) &&
(kbdev->csf.scheduler.state != SCHED_SLEEPING) &&
!kbase_csf_scheduler_protected_mode_in_use(kbdev)) ||
kbase_pm_get_policy(kbdev) == &kbase_pm_always_on_policy_ops) {
kbase_csf_scheduler_spin_unlock(kbdev, flags);
/* Wait for MCU to reach the stable ON state */
ret = kbase_pm_wait_for_desired_state(kbdev);
if (ret)
dev_err(kbdev->dev,
"Wait for PM state failed when disabling coresight config");
else
ret = coresight_config_disable(kbdev, config);
kbase_csf_scheduler_spin_lock(kbdev, &flags);
} else if (kbdev->pm.backend.mcu_state == KBASE_MCU_OFF) {
/* MCU is OFF, so the disable sequence was already executed.
*
* Propagate any error that would have occurred during the enable
* or disable sequence.
*
* This is done as part of the disable sequence, since the call from
* client is synchronous.
*/
ret = config->error;
}
/* Remove config from next disable sequence */
spin_lock(&kbdev->csf.coresight.lock);
list_del_init(&config->link);
spin_unlock(&kbdev->csf.coresight.lock);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
kbase_csf_scheduler_unlock(kbdev);
return ret;
}
EXPORT_SYMBOL(kbase_debug_coresight_csf_config_disable);
static void coresight_config_enable_all(struct work_struct *data)
{
struct kbase_device *kbdev =
container_of(data, struct kbase_device, csf.coresight.enable_work);
struct kbase_debug_coresight_csf_config *config_entry;
unsigned long flags;
spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) {
spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
if (coresight_config_enable(kbdev, config_entry))
dev_err(kbdev->dev, "enable config (0x%pK) failed", config_entry);
spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
}
spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbase_pm_update_state(kbdev);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
wake_up_all(&kbdev->csf.coresight.event_wait);
}
static void coresight_config_disable_all(struct work_struct *data)
{
struct kbase_device *kbdev =
container_of(data, struct kbase_device, csf.coresight.disable_work);
struct kbase_debug_coresight_csf_config *config_entry;
unsigned long flags;
spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) {
spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
if (coresight_config_disable(kbdev, config_entry))
dev_err(kbdev->dev, "disable config (0x%pK) failed", config_entry);
spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
}
spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbase_pm_update_state(kbdev);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
wake_up_all(&kbdev->csf.coresight.event_wait);
}
void kbase_debug_coresight_csf_disable_pmode_enter(struct kbase_device *kbdev)
{
unsigned long flags;
dev_dbg(kbdev->dev, "Coresight state %s before protected mode enter",
coresight_state_to_string(KBASE_DEBUG_CORESIGHT_CSF_ENABLED));
lockdep_assert_held(&kbdev->csf.scheduler.lock);
kbase_pm_lock(kbdev);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbdev->csf.coresight.disable_on_pmode_enter = true;
kbdev->csf.coresight.enable_on_pmode_exit = false;
kbase_pm_update_state(kbdev);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
kbase_pm_wait_for_desired_state(kbdev);
kbase_pm_unlock(kbdev);
}
void kbase_debug_coresight_csf_enable_pmode_exit(struct kbase_device *kbdev)
{
dev_dbg(kbdev->dev, "Coresight state %s after protected mode exit",
coresight_state_to_string(KBASE_DEBUG_CORESIGHT_CSF_DISABLED));
lockdep_assert_held(&kbdev->hwaccess_lock);
WARN_ON(kbdev->csf.coresight.disable_on_pmode_enter);
kbdev->csf.coresight.enable_on_pmode_exit = true;
kbase_pm_update_state(kbdev);
}
void kbase_debug_coresight_csf_state_request(struct kbase_device *kbdev,
enum kbase_debug_coresight_csf_state state)
{
if (unlikely(!kbdev))
return;
if (unlikely(!kbdev->csf.coresight.workq))
return;
dev_dbg(kbdev->dev, "Coresight state %s requested", coresight_state_to_string(state));
switch (state) {
case KBASE_DEBUG_CORESIGHT_CSF_DISABLED:
queue_work(kbdev->csf.coresight.workq, &kbdev->csf.coresight.disable_work);
break;
case KBASE_DEBUG_CORESIGHT_CSF_ENABLED:
queue_work(kbdev->csf.coresight.workq, &kbdev->csf.coresight.enable_work);
break;
default:
dev_err(kbdev->dev, "Invalid Coresight state %d", state);
break;
}
}
bool kbase_debug_coresight_csf_state_check(struct kbase_device *kbdev,
enum kbase_debug_coresight_csf_state state)
{
struct kbase_debug_coresight_csf_config *config_entry;
unsigned long flags;
bool success = true;
dev_dbg(kbdev->dev, "Coresight check for state: %s", coresight_state_to_string(state));
spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) {
if (state != config_entry->state) {
success = false;
break;
}
}
spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
return success;
}
KBASE_EXPORT_TEST_API(kbase_debug_coresight_csf_state_check);
bool kbase_debug_coresight_csf_state_wait(struct kbase_device *kbdev,
enum kbase_debug_coresight_csf_state state)
{
const long wait_timeout = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
struct kbase_debug_coresight_csf_config *config_entry, *next_config_entry;
unsigned long flags;
bool success = true;
dev_dbg(kbdev->dev, "Coresight wait for state: %s", coresight_state_to_string(state));
spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
list_for_each_entry_safe(config_entry, next_config_entry, &kbdev->csf.coresight.configs,
link) {
const enum kbase_debug_coresight_csf_state prev_state = config_entry->state;
long remaining;
spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
remaining = wait_event_timeout(kbdev->csf.coresight.event_wait,
state == config_entry->state, wait_timeout);
spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
if (!remaining) {
success = false;
dev_err(kbdev->dev,
"Timeout waiting for Coresight state transition %s to %s",
coresight_state_to_string(prev_state),
coresight_state_to_string(state));
}
}
spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
return success;
}
KBASE_EXPORT_TEST_API(kbase_debug_coresight_csf_state_wait);
int kbase_debug_coresight_csf_init(struct kbase_device *kbdev)
{
kbdev->csf.coresight.workq = alloc_ordered_workqueue("Mali CoreSight workqueue", 0);
if (kbdev->csf.coresight.workq == NULL)
return -ENOMEM;
INIT_LIST_HEAD(&kbdev->csf.coresight.clients);
INIT_LIST_HEAD(&kbdev->csf.coresight.configs);
INIT_WORK(&kbdev->csf.coresight.enable_work, coresight_config_enable_all);
INIT_WORK(&kbdev->csf.coresight.disable_work, coresight_config_disable_all);
init_waitqueue_head(&kbdev->csf.coresight.event_wait);
spin_lock_init(&kbdev->csf.coresight.lock);
kbdev->csf.coresight.disable_on_pmode_enter = false;
kbdev->csf.coresight.enable_on_pmode_exit = false;
return 0;
}
void kbase_debug_coresight_csf_term(struct kbase_device *kbdev)
{
struct kbase_debug_coresight_csf_client *client_entry, *next_client_entry;
struct kbase_debug_coresight_csf_config *config_entry, *next_config_entry;
unsigned long flags;
kbdev->csf.coresight.disable_on_pmode_enter = false;
kbdev->csf.coresight.enable_on_pmode_exit = false;
cancel_work_sync(&kbdev->csf.coresight.enable_work);
cancel_work_sync(&kbdev->csf.coresight.disable_work);
destroy_workqueue(kbdev->csf.coresight.workq);
kbdev->csf.coresight.workq = NULL;
spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
list_for_each_entry_safe(config_entry, next_config_entry, &kbdev->csf.coresight.configs,
link) {
list_del_init(&config_entry->link);
kfree(config_entry);
}
list_for_each_entry_safe(client_entry, next_client_entry, &kbdev->csf.coresight.clients,
link) {
list_del_init(&client_entry->link);
kfree(client_entry);
}
spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
}

View File

@@ -0,0 +1,182 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU license.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
*/
#ifndef _KBASE_DEBUG_CORESIGHT_INTERNAL_CSF_H_
#define _KBASE_DEBUG_CORESIGHT_INTERNAL_CSF_H_
#include <mali_kbase.h>
#include <linux/mali_kbase_debug_coresight_csf.h>
/**
* struct kbase_debug_coresight_csf_client - Coresight client definition
*
* @drv_data: Pointer to driver device data.
* @addr_ranges: Arrays of address ranges used by the registered client.
* @nr_ranges: Size of @addr_ranges array.
* @link: Link item of a Coresight client.
* Linked to &struct_kbase_device.csf.coresight.clients.
*/
struct kbase_debug_coresight_csf_client {
void *drv_data;
struct kbase_debug_coresight_csf_address_range *addr_ranges;
u32 nr_ranges;
struct list_head link;
};
/**
* enum kbase_debug_coresight_csf_state - Coresight configuration states
*
* @KBASE_DEBUG_CORESIGHT_CSF_DISABLED: Coresight configuration is disabled.
* @KBASE_DEBUG_CORESIGHT_CSF_ENABLED: Coresight configuration is enabled.
*/
enum kbase_debug_coresight_csf_state {
KBASE_DEBUG_CORESIGHT_CSF_DISABLED = 0,
KBASE_DEBUG_CORESIGHT_CSF_ENABLED,
};
/**
* struct kbase_debug_coresight_csf_config - Coresight configuration definition
*
* @client: Pointer to the client for which the configuration is created.
* @enable_seq: Array of operations for Coresight client enable sequence. Can be NULL.
* @disable_seq: Array of operations for Coresight client disable sequence. Can be NULL.
* @state: Current Coresight configuration state.
* @error: Error code used to know if an error occurred during the execution
* of the enable or disable sequences.
* @link: Link item of a Coresight configuration.
* Linked to &struct_kbase_device.csf.coresight.configs.
*/
struct kbase_debug_coresight_csf_config {
void *client;
struct kbase_debug_coresight_csf_sequence *enable_seq;
struct kbase_debug_coresight_csf_sequence *disable_seq;
enum kbase_debug_coresight_csf_state state;
int error;
struct list_head link;
};
/**
* struct kbase_debug_coresight_device - Object representing the Coresight device
*
* @clients: List head to maintain Coresight clients.
* @configs: List head to maintain Coresight configs.
* @lock: A lock to protect client/config lists.
* Lists can be accessed concurrently by
* Coresight kernel modules and kernel threads.
* @workq: Work queue for Coresight enable/disable execution.
* @enable_work: Work item used to enable Coresight.
* @disable_work: Work item used to disable Coresight.
* @event_wait: Wait queue for Coresight events.
* @enable_on_pmode_exit: Flag used by the PM state machine to
* identify if Coresight enable is needed.
* @disable_on_pmode_enter: Flag used by the PM state machine to
* identify if Coresight disable is needed.
*/
struct kbase_debug_coresight_device {
struct list_head clients;
struct list_head configs;
spinlock_t lock;
struct workqueue_struct *workq;
struct work_struct enable_work;
struct work_struct disable_work;
wait_queue_head_t event_wait;
bool enable_on_pmode_exit;
bool disable_on_pmode_enter;
};
/**
* kbase_debug_coresight_csf_init - Initialize Coresight resources.
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
*
* This function should be called once at device initialization.
*
* Return: 0 on success.
*/
int kbase_debug_coresight_csf_init(struct kbase_device *kbdev);
/**
* kbase_debug_coresight_csf_term - Terminate Coresight resources.
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
*
* This function should be called at device termination to prevent any
* memory leaks if Coresight module would have been removed without calling
* kbasep_debug_coresight_csf_trace_disable().
*/
void kbase_debug_coresight_csf_term(struct kbase_device *kbdev);
/**
* kbase_debug_coresight_csf_disable_pmode_enter - Disable Coresight on Protected
* mode enter.
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
*
* This function should be called just before requesting to enter protected mode.
* It will trigger a PM state machine transition from MCU_ON
* to ON_PMODE_ENTER_CORESIGHT_DISABLE.
*/
void kbase_debug_coresight_csf_disable_pmode_enter(struct kbase_device *kbdev);
/**
* kbase_debug_coresight_csf_enable_pmode_exit - Enable Coresight on Protected
* mode enter.
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
*
* This function should be called after protected mode exit is acknowledged.
* It will trigger a PM state machine transition from MCU_ON
* to ON_PMODE_EXIT_CORESIGHT_ENABLE.
*/
void kbase_debug_coresight_csf_enable_pmode_exit(struct kbase_device *kbdev);
/**
* kbase_debug_coresight_csf_state_request - Request Coresight state transition.
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
* @state: Coresight state to check for.
*/
void kbase_debug_coresight_csf_state_request(struct kbase_device *kbdev,
enum kbase_debug_coresight_csf_state state);
/**
* kbase_debug_coresight_csf_state_check - Check Coresight state.
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
* @state: Coresight state to check for.
*
* Return: true if all states of configs are @state.
*/
bool kbase_debug_coresight_csf_state_check(struct kbase_device *kbdev,
enum kbase_debug_coresight_csf_state state);
/**
* kbase_debug_coresight_csf_state_wait - Wait for Coresight state transition to complete.
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
* @state: Coresight state to wait for.
*
* Return: true if all configs become @state in pre-defined time period.
*/
bool kbase_debug_coresight_csf_state_wait(struct kbase_device *kbdev,
enum kbase_debug_coresight_csf_state state);
#endif /* _KBASE_DEBUG_CORESIGHT_INTERNAL_CSF_H_ */

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -29,10 +29,7 @@
#include <mali_kbase_reset_gpu.h>
#include <csf/mali_kbase_csf.h>
#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
#if IS_ENABLED(CONFIG_MALI_NO_MALI)
#include <backend/gpu/mali_kbase_model_linux.h>
#endif
#include <mali_kbase.h>
#include <backend/gpu/mali_kbase_irq_internal.h>
@@ -92,13 +89,13 @@ static int kbase_backend_late_init(struct kbase_device *kbdev)
goto fail_timer;
#ifdef CONFIG_MALI_DEBUG
#ifndef CONFIG_MALI_NO_MALI
#if IS_ENABLED(CONFIG_MALI_REAL_HW)
if (kbasep_common_test_interrupt_handlers(kbdev) != 0) {
dev_err(kbdev->dev, "Interrupt assignment check failed.\n");
err = -EINVAL;
goto fail_interrupt_test;
}
#endif /* !CONFIG_MALI_NO_MALI */
#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */
#endif /* CONFIG_MALI_DEBUG */
kbase_ipa_control_init(kbdev);
@@ -126,6 +123,10 @@ static int kbase_backend_late_init(struct kbase_device *kbdev)
if (err)
goto fail_update_l2_features;
err = kbase_backend_time_init(kbdev);
if (err)
goto fail_update_l2_features;
init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
kbase_pm_context_idle(kbdev);
@@ -142,9 +143,9 @@ fail_pm_metrics_init:
kbase_ipa_control_term(kbdev);
#ifdef CONFIG_MALI_DEBUG
#ifndef CONFIG_MALI_NO_MALI
#if IS_ENABLED(CONFIG_MALI_REAL_HW)
fail_interrupt_test:
#endif /* !CONFIG_MALI_NO_MALI */
#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */
#endif /* CONFIG_MALI_DEBUG */
kbase_backend_timer_term(kbdev);
@@ -283,12 +284,15 @@ static void kbase_device_hwcnt_backend_csf_term(struct kbase_device *kbdev)
}
static const struct kbase_device_init dev_init[] = {
#if IS_ENABLED(CONFIG_MALI_NO_MALI)
{ kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" },
#else
#if !IS_ENABLED(CONFIG_MALI_REAL_HW)
{ kbase_gpu_device_create, kbase_gpu_device_destroy,
"Dummy model initialization failed" },
#else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
{ assign_irqs, NULL, "IRQ search failed" },
#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
{ registers_map, registers_unmap, "Register map failed" },
#endif
#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
{ power_control_init, power_control_term, "Power control initialization failed" },
{ kbase_device_io_history_init, kbase_device_io_history_term,
"Register access history initialization failed" },
@@ -344,6 +348,10 @@ static const struct kbase_device_init dev_init[] = {
{ kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer,
"GPU property population failed" },
{ kbase_device_late_init, kbase_device_late_term, "Late device initialization failed" },
#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
{ kbase_debug_coresight_csf_init, kbase_debug_coresight_csf_term,
"Coresight initialization failed" },
#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
};
static void kbase_device_term_partial(struct kbase_device *kbdev,
@@ -357,7 +365,6 @@ static void kbase_device_term_partial(struct kbase_device *kbdev,
void kbase_device_term(struct kbase_device *kbdev)
{
kbdev->csf.mali_file_inode = NULL;
kbase_device_term_partial(kbdev, ARRAY_SIZE(dev_init));
kbase_mem_halt(kbdev);
}

View File

@@ -24,6 +24,7 @@
#include <backend/gpu/mali_kbase_instr_internal.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
#include <device/mali_kbase_device.h>
#include <device/mali_kbase_device_internal.h>
#include <mali_kbase_reset_gpu.h>
#include <mmu/mali_kbase_mmu.h>
#include <mali_kbase_ctx_sched.h>
@@ -149,9 +150,6 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
dev_dbg(kbdev->dev, "Doorbell mirror interrupt received");
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
#ifdef CONFIG_MALI_DEBUG
WARN_ON(!kbase_csf_scheduler_get_nr_active_csgs(kbdev));
#endif
kbase_pm_disable_db_mirror_interrupt(kbdev);
kbdev->pm.backend.exit_gpu_sleep_mode = true;
kbase_csf_scheduler_invoke_tick(kbdev);
@@ -189,7 +187,7 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
}
#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
static bool kbase_is_register_accessible(u32 offset)
bool kbase_is_register_accessible(u32 offset)
{
#ifdef CONFIG_MALI_DEBUG
if (((offset >= MCU_SUBSYSTEM_BASE) && (offset < IPA_CONTROL_BASE)) ||
@@ -201,7 +199,9 @@ static bool kbase_is_register_accessible(u32 offset)
return true;
}
#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
#if IS_ENABLED(CONFIG_MALI_REAL_HW)
void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value)
{
if (WARN_ON(!kbdev->pm.backend.gpu_powered))
@@ -249,4 +249,4 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset)
return val;
}
KBASE_EXPORT_TEST_API(kbase_reg_read);
#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */

View File

@@ -106,7 +106,7 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, val);
}
#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
#if IS_ENABLED(CONFIG_MALI_REAL_HW)
void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value)
{
WARN_ON(!kbdev->pm.backend.gpu_powered);
@@ -140,4 +140,4 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset)
return val;
}
KBASE_EXPORT_TEST_API(kbase_reg_read);
#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -30,10 +30,7 @@
#include <hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h>
#include <hwcnt/backend/mali_kbase_hwcnt_backend_jm.h>
#include <hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h>
#if IS_ENABLED(CONFIG_MALI_NO_MALI)
#include <backend/gpu/mali_kbase_model_linux.h>
#endif /* CONFIG_MALI_NO_MALI */
#ifdef CONFIG_MALI_ARBITER_SUPPORT
#include <arbiter/mali_kbase_arbiter_pm.h>
@@ -74,13 +71,13 @@ static int kbase_backend_late_init(struct kbase_device *kbdev)
goto fail_timer;
#ifdef CONFIG_MALI_DEBUG
#ifndef CONFIG_MALI_NO_MALI
#if IS_ENABLED(CONFIG_MALI_REAL_HW)
if (kbasep_common_test_interrupt_handlers(kbdev) != 0) {
dev_err(kbdev->dev, "Interrupt assignment check failed.\n");
err = -EINVAL;
goto fail_interrupt_test;
}
#endif /* !CONFIG_MALI_NO_MALI */
#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */
#endif /* CONFIG_MALI_DEBUG */
err = kbase_job_slot_init(kbdev);
@@ -103,6 +100,10 @@ static int kbase_backend_late_init(struct kbase_device *kbdev)
if (err)
goto fail_update_l2_features;
err = kbase_backend_time_init(kbdev);
if (err)
goto fail_update_l2_features;
init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
/* Idle the GPU and/or cores, if the policy wants it to */
@@ -119,9 +120,9 @@ fail_devfreq_init:
fail_job_slot:
#ifdef CONFIG_MALI_DEBUG
#ifndef CONFIG_MALI_NO_MALI
#if IS_ENABLED(CONFIG_MALI_REAL_HW)
fail_interrupt_test:
#endif /* !CONFIG_MALI_NO_MALI */
#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */
#endif /* CONFIG_MALI_DEBUG */
kbase_backend_timer_term(kbdev);
@@ -213,17 +214,20 @@ static void kbase_device_hwcnt_backend_jm_watchdog_term(struct kbase_device *kbd
}
static const struct kbase_device_init dev_init[] = {
#if IS_ENABLED(CONFIG_MALI_NO_MALI)
#if !IS_ENABLED(CONFIG_MALI_REAL_HW)
{ kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" },
#else
#else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
{ assign_irqs, NULL, "IRQ search failed" },
#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
{ registers_map, registers_unmap, "Register map failed" },
#endif
#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
{ kbase_device_io_history_init, kbase_device_io_history_term,
"Register access history initialization failed" },
{ kbase_device_pm_init, kbase_device_pm_term, "Power management initialization failed" },
{ kbase_device_early_init, kbase_device_early_term, "Early device initialization failed" },
{ kbase_device_populate_max_freq, NULL, "Populating max frequency failed" },
{ kbase_pm_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" },
{ kbase_device_misc_init, kbase_device_misc_term,
"Miscellaneous device initialization failed" },
{ kbase_device_pcm_dev_init, kbase_device_pcm_dev_term,
@@ -239,7 +243,6 @@ static const struct kbase_device_init dev_init[] = {
"Timeline stream initialization failed" },
{ kbase_clk_rate_trace_manager_init, kbase_clk_rate_trace_manager_term,
"Clock rate trace manager initialization failed" },
{ kbase_pm_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" },
{ kbase_instr_backend_init, kbase_instr_backend_term,
"Instrumentation backend initialization failed" },
{ kbase_device_hwcnt_watchdog_if_init, kbase_device_hwcnt_watchdog_if_term,

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -35,6 +35,7 @@
#include <mali_kbase.h>
#include <mali_kbase_defs.h>
#include <mali_kbase_hwaccess_instr.h>
#include <mali_kbase_hwaccess_time.h>
#include <mali_kbase_hw.h>
#include <mali_kbase_config_defaults.h>
#include <linux/priority_control_manager.h>
@@ -308,7 +309,8 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
#endif /* MALI_USE_CSF */
kbdev->mmu_mode = kbase_mmu_mode_get_aarch64();
kbdev->mmu_as_inactive_wait_time_ms =
kbase_get_timeout_ms(kbdev, MMU_AS_INACTIVE_WAIT_TIMEOUT);
mutex_init(&kbdev->kctx_list_lock);
INIT_LIST_HEAD(&kbdev->kctx_list);
@@ -321,6 +323,10 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
"Unable to register OOM notifier for Mali - but will continue\n");
kbdev->oom_notifier_block.notifier_call = NULL;
}
#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE)
atomic_set(&kbdev->live_fence_metadata, 0);
#endif
return 0;
term_as:
@@ -344,6 +350,11 @@ void kbase_device_misc_term(struct kbase_device *kbdev)
if (kbdev->oom_notifier_block.notifier_call)
unregister_oom_notifier(&kbdev->oom_notifier_block);
#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE)
if (atomic_read(&kbdev->live_fence_metadata) > 0)
dev_warn(kbdev->dev, "Terminating Kbase device with live fence metadata!");
#endif
}
void kbase_device_free(struct kbase_device *kbdev)

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -89,3 +89,13 @@ int kbase_device_late_init(struct kbase_device *kbdev);
* @kbdev: Device pointer
*/
void kbase_device_late_term(struct kbase_device *kbdev);
#if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_NO_MALI)
/**
* kbase_is_register_accessible - Checks if register is accessible
* @offset: Register offset
*
* Return: true if the register is accessible, false otherwise.
*/
bool kbase_is_register_accessible(u32 offset);
#endif /* MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_NO_MALI) */

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -105,6 +105,70 @@ const char *kbase_gpu_exception_name(u32 const exception_code)
case GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_CACHEABILITY_FAULT:
e = "GPU_CACHEABILITY_FAULT";
break;
/* MMU Fault */
case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L0:
e = "TRANSLATION_FAULT at level 0";
break;
case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L1:
e = "TRANSLATION_FAULT at level 1";
break;
case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L2:
e = "TRANSLATION_FAULT at level 2";
break;
case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L3:
e = "TRANSLATION_FAULT at level 3";
break;
case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L4:
e = "TRANSLATION_FAULT";
break;
case CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_0:
e = "PERMISSION_FAULT at level 0";
break;
case CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_1:
e = "PERMISSION_FAULT at level 1";
break;
case CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_2:
e = "PERMISSION_FAULT at level 2";
break;
case CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_3:
e = "PERMISSION_FAULT at level 3";
break;
case CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_1:
e = "ACCESS_FLAG at level 1";
break;
case CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_2:
e = "ACCESS_FLAG at level 2";
break;
case CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_3:
e = "ACCESS_FLAG at level 3";
break;
case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN:
e = "ADDRESS_SIZE_FAULT_IN";
break;
case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_0:
e = "ADDRESS_SIZE_FAULT_OUT_0 at level 0";
break;
case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_1:
e = "ADDRESS_SIZE_FAULT_OUT_1 at level 1";
break;
case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_2:
e = "ADDRESS_SIZE_FAULT_OUT_2 at level 2";
break;
case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_3:
e = "ADDRESS_SIZE_FAULT_OUT_3 at level 3";
break;
case CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_0:
e = "MEMORY_ATTRIBUTE_FAULT_0 at level 0";
break;
case CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_1:
e = "MEMORY_ATTRIBUTE_FAULT_1 at level 1";
break;
case CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_2:
e = "MEMORY_ATTRIBUTE_FAULT_2 at level 2";
break;
case CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_3:
e = "MEMORY_ATTRIBUTE_FAULT_3 at level 3";
break;
/* Any other exception code is unknown */
default:
e = "UNKNOWN";

View File

@@ -108,7 +108,6 @@
#define JOB_IRQ_JS_STATE 0x010 /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */
#define JOB_IRQ_THROTTLE 0x014 /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt. */
#define JOB_SLOT0 0x800 /* Configuration registers for job slot 0 */
#define JOB_SLOT1 0x880 /* Configuration registers for job slot 1 */
#define JOB_SLOT2 0x900 /* Configuration registers for job slot 2 */
#define JOB_SLOT3 0x980 /* Configuration registers for job slot 3 */
@@ -125,8 +124,6 @@
#define JOB_SLOT14 0xF00 /* Configuration registers for job slot 14 */
#define JOB_SLOT15 0xF80 /* Configuration registers for job slot 15 */
#define JOB_SLOT_REG(n, r) (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
#define JS_XAFFINITY 0x1C /* (RO) Extended affinity mask for job slot n*/
#define JS_COMMAND 0x20 /* (WO) Command register for job slot n */

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -27,9 +27,9 @@
*
* @exception_code: exception code
*
* This function is called from the interrupt handler when a GPU fault occurs.
* This function is called by error handlers when GPU reports an error.
*
* Return: name associated with the exception code
* Return: Error string associated with the exception code
*/
const char *kbase_gpu_exception_name(u32 exception_code);

View File

@@ -51,9 +51,7 @@
#define MMU_FEATURES 0x014 /* (RO) MMU features */
#define AS_PRESENT 0x018 /* (RO) Address space slots present */
#define GPU_IRQ_RAWSTAT 0x020 /* (RW) */
#define GPU_IRQ_CLEAR 0x024 /* (WO) */
#define GPU_IRQ_MASK 0x028 /* (RW) */
#define GPU_IRQ_STATUS 0x02C /* (RO) */
#define GPU_COMMAND 0x030 /* (WO) */
#define GPU_STATUS 0x034 /* (RO) */
@@ -176,14 +174,9 @@
/* Job control registers */
#define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */
#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */
/* MMU control registers */
#define MMU_IRQ_CLEAR 0x004 /* (WO) Interrupt clear register */
#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */
#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */
#define MMU_AS1 0x440 /* Configuration registers for address space 1 */
#define MMU_AS2 0x480 /* Configuration registers for address space 2 */
#define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -289,6 +289,8 @@ kbasep_hwcnt_backend_csf_cc_initial_sample(struct kbase_hwcnt_backend_csf *backe
u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS];
size_t clk;
memset(cycle_counts, 0, sizeof(cycle_counts));
/* Read cycle count from CSF interface for both clock domains. */
backend_csf->info->csf_if->get_gpu_cycle_count(backend_csf->info->csf_if->ctx, cycle_counts,
clk_enable_map);
@@ -308,6 +310,8 @@ static void kbasep_hwcnt_backend_csf_cc_update(struct kbase_hwcnt_backend_csf *b
u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS];
size_t clk;
memset(cycle_counts, 0, sizeof(cycle_counts));
backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx);
backend_csf->info->csf_if->get_gpu_cycle_count(backend_csf->info->csf_if->ctx, cycle_counts,
@@ -558,7 +562,7 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backe
u32 insert_index_to_stop)
{
u32 raw_idx;
unsigned long flags;
unsigned long flags = 0UL;
u8 *cpu_dump_base = (u8 *)backend_csf->ring_buf_cpu_base;
const size_t ring_buf_cnt = backend_csf->info->ring_buf_cnt;
const size_t buf_dump_bytes = backend_csf->info->prfcnt_info.dump_bytes;
@@ -639,7 +643,7 @@ static void kbasep_hwcnt_backend_watchdog_timer_cb(void *info)
{
struct kbase_hwcnt_backend_csf_info *csf_info = info;
struct kbase_hwcnt_backend_csf *backend_csf;
unsigned long flags;
unsigned long flags = 0UL;
csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags);
@@ -658,8 +662,8 @@ static void kbasep_hwcnt_backend_watchdog_timer_cb(void *info)
/* 3. dump state indicates no other dumping is in progress. */
((backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) ||
(backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED))) {
u32 extract_index;
u32 insert_index;
u32 extract_index = 0U;
u32 insert_index = 0U;
/* Read the raw extract and insert indexes from the CSF interface. */
csf_info->csf_if->get_indexes(csf_info->csf_if->ctx, &extract_index, &insert_index);
@@ -700,11 +704,11 @@ static void kbasep_hwcnt_backend_watchdog_timer_cb(void *info)
*/
static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work)
{
unsigned long flags;
unsigned long flags = 0ULL;
struct kbase_hwcnt_backend_csf *backend_csf;
u32 insert_index_to_acc;
u32 extract_index;
u32 insert_index;
u32 extract_index = 0U;
u32 insert_index = 0U;
WARN_ON(!work);
backend_csf = container_of(work, struct kbase_hwcnt_backend_csf, hwc_dump_work);
@@ -776,10 +780,10 @@ static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work)
*/
static void kbasep_hwcnt_backend_csf_threshold_worker(struct work_struct *work)
{
unsigned long flags;
unsigned long flags = 0ULL;
struct kbase_hwcnt_backend_csf *backend_csf;
u32 extract_index;
u32 insert_index;
u32 extract_index = 0U;
u32 insert_index = 0U;
WARN_ON(!work);
@@ -920,7 +924,7 @@ static int kbasep_hwcnt_backend_csf_dump_enable(struct kbase_hwcnt_backend *back
const struct kbase_hwcnt_enable_map *enable_map)
{
int errcode;
unsigned long flags;
unsigned long flags = 0UL;
struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
if (!backend_csf)
@@ -954,7 +958,7 @@ static void kbasep_hwcnt_backend_csf_wait_enable_transition_complete(
/* CSF backend implementation of kbase_hwcnt_backend_dump_disable_fn */
static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend)
{
unsigned long flags;
unsigned long flags = 0UL;
struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
bool do_disable = false;
@@ -1050,7 +1054,7 @@ static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *ba
static int kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend,
u64 *dump_time_ns)
{
unsigned long flags;
unsigned long flags = 0UL;
struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
bool do_request = false;
bool watchdog_dumping = false;
@@ -1157,7 +1161,7 @@ static int kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *bac
/* CSF backend implementation of kbase_hwcnt_backend_dump_wait_fn */
static int kbasep_hwcnt_backend_csf_dump_wait(struct kbase_hwcnt_backend *backend)
{
unsigned long flags;
unsigned long flags = 0UL;
struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
int errcode;
@@ -1365,7 +1369,7 @@ alloc_error:
static int kbasep_hwcnt_backend_csf_init(const struct kbase_hwcnt_backend_info *info,
struct kbase_hwcnt_backend **out_backend)
{
unsigned long flags;
unsigned long flags = 0UL;
struct kbase_hwcnt_backend_csf *backend_csf = NULL;
struct kbase_hwcnt_backend_csf_info *csf_info = (struct kbase_hwcnt_backend_csf_info *)info;
int errcode;
@@ -1407,7 +1411,7 @@ static int kbasep_hwcnt_backend_csf_init(const struct kbase_hwcnt_backend_info *
/* CSF backend implementation of kbase_hwcnt_backend_term_fn */
static void kbasep_hwcnt_backend_csf_term(struct kbase_hwcnt_backend *backend)
{
unsigned long flags;
unsigned long flags = 0UL;
struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
if (!backend)
@@ -1619,7 +1623,7 @@ void kbase_hwcnt_backend_csf_protm_exited(struct kbase_hwcnt_backend_interface *
void kbase_hwcnt_backend_csf_on_unrecoverable_error(struct kbase_hwcnt_backend_interface *iface)
{
unsigned long flags;
unsigned long flags = 0UL;
struct kbase_hwcnt_backend_csf_info *csf_info;
csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
@@ -1639,7 +1643,7 @@ void kbase_hwcnt_backend_csf_on_unrecoverable_error(struct kbase_hwcnt_backend_i
void kbase_hwcnt_backend_csf_on_before_reset(struct kbase_hwcnt_backend_interface *iface)
{
unsigned long flags;
unsigned long flags = 0UL;
struct kbase_hwcnt_backend_csf_info *csf_info;
struct kbase_hwcnt_backend_csf *backend_csf;

View File

@@ -34,13 +34,11 @@
#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h"
#include "mali_kbase_hwaccess_time.h"
#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
#include <backend/gpu/mali_kbase_model_linux.h>
#include <linux/log2.h>
#include "mali_kbase_ccswe.h"
#if IS_ENABLED(CONFIG_MALI_NO_MALI)
#include <backend/gpu/mali_kbase_model_dummy.h>
#endif /* CONFIG_MALI_NO_MALI */
/* Ring buffer virtual address start at 4GB */
#define KBASE_HWC_CSF_RING_BUFFER_VA_START (1ull << 32)
@@ -103,6 +101,8 @@ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(struct kbase_hwcnt_backend_csf_i
static void kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
unsigned long *flags)
__acquires(&(struct kbase_hwcnt_backend_csf_if_fw_ctx)
ctx->kbdev->csf.scheduler.interrupt_lock)
{
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
struct kbase_device *kbdev;
@@ -117,6 +117,8 @@ static void kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_i
static void kbasep_hwcnt_backend_csf_if_fw_unlock(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
unsigned long flags)
__releases(&(struct kbase_hwcnt_backend_csf_if_fw_ctx)
ctx->kbdev->csf.scheduler.interrupt_lock)
{
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
struct kbase_device *kbdev;
@@ -327,7 +329,7 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
/* Get physical page for the buffer */
ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
phys, false);
phys, false, NULL);
if (ret != num_pages)
goto phys_mem_pool_alloc_error;
@@ -345,7 +347,7 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
/* Update MMU table */
ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT, phys,
num_pages, flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW,
mmu_sync_info);
mmu_sync_info, NULL, false);
if (ret)
goto mmu_insert_failed;
@@ -480,7 +482,8 @@ kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_c
WARN_ON(kbase_mmu_teardown_pages(fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu,
gpu_va_base >> PAGE_SHIFT, fw_ring_buf->phys,
fw_ring_buf->num_pages, MCU_AS_NR));
fw_ring_buf->num_pages, fw_ring_buf->num_pages,
MCU_AS_NR, true));
vunmap(fw_ring_buf->cpu_dump_base);

View File

@@ -27,10 +27,7 @@
#include "mali_kbase_hwaccess_instr.h"
#include "mali_kbase_hwaccess_time.h"
#include "mali_kbase_ccswe.h"
#if IS_ENABLED(CONFIG_MALI_NO_MALI)
#include "backend/gpu/mali_kbase_model_dummy.h"
#endif /* CONFIG_MALI_NO_MALI */
#include "backend/gpu/mali_kbase_model_linux.h"
#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
#include "backend/gpu/mali_kbase_pm_internal.h"

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -362,7 +362,7 @@ static int kbasep_hwcnt_accumulator_dump(struct kbase_hwcnt_context *hctx, u64 *
bool cur_map_any_enabled;
struct kbase_hwcnt_enable_map *cur_map;
bool new_map_any_enabled = false;
u64 dump_time_ns;
u64 dump_time_ns = 0;
struct kbase_hwcnt_accumulator *accum;
WARN_ON(!hctx);

View File

@@ -23,10 +23,13 @@
#include "mali_kbase.h"
/* MEMSYS counter block offsets */
#define L2_RD_MSG_IN_CU (13)
#define L2_RD_MSG_IN (16)
#define L2_WR_MSG_IN (18)
#define L2_SNP_MSG_IN (20)
#define L2_RD_MSG_OUT (22)
#define L2_READ_LOOKUP (26)
#define L2_EXT_READ_NOSNP (30)
#define L2_EXT_WRITE_NOSNP_FULL (43)
/* SC counter block offsets */
@@ -36,17 +39,23 @@
#define FULL_QUAD_WARPS (21)
#define EXEC_INSTR_FMA (27)
#define EXEC_INSTR_CVT (28)
#define EXEC_INSTR_SFU (29)
#define EXEC_INSTR_MSG (30)
#define TEX_FILT_NUM_OPS (39)
#define LS_MEM_READ_SHORT (45)
#define LS_MEM_WRITE_SHORT (47)
#define VARY_SLOT_16 (51)
#define BEATS_RD_LSC_EXT (57)
#define BEATS_RD_TEX (58)
#define BEATS_RD_TEX_EXT (59)
#define FRAG_QUADS_COARSE (68)
/* Tiler counter block offsets */
#define IDVS_POS_SHAD_STALL (23)
#define PREFETCH_STALL (25)
#define VFETCH_POS_READ_WAIT (29)
#define VFETCH_VERTEX_WAIT (30)
#define PRIMASSY_STALL (32)
#define IDVS_VAR_SHAD_STALL (38)
#define ITER_STALL (40)
#define PMGR_PTR_RD_STALL (48)
@@ -111,6 +120,15 @@ static const struct kbase_ipa_counter ipa_top_level_cntrs_def_ttux[] = {
TILER_COUNTER_DEF("vfetch_vertex_wait", -391964, VFETCH_VERTEX_WAIT),
};
static const struct kbase_ipa_counter ipa_top_level_cntrs_def_ttix[] = {
TILER_COUNTER_DEF("primassy_stall", 471953, PRIMASSY_STALL),
TILER_COUNTER_DEF("idvs_var_shad_stall", -460559, IDVS_VAR_SHAD_STALL),
MEMSYS_COUNTER_DEF("l2_rd_msg_in_cu", -6189604, L2_RD_MSG_IN_CU),
MEMSYS_COUNTER_DEF("l2_snp_msg_in", 6289609, L2_SNP_MSG_IN),
MEMSYS_COUNTER_DEF("l2_ext_read_nosnp", 512341, L2_EXT_READ_NOSNP),
};
/* These tables provide a description of each performance counter
* used by the shader cores counter model for energy estimation.
*/
@@ -150,6 +168,17 @@ static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_ttux[] = {
SC_COUNTER_DEF("frag_quads_ezs_update", 372032, FRAG_QUADS_EZS_UPDATE),
};
static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_ttix[] = {
SC_COUNTER_DEF("exec_instr_fma", 192642, EXEC_INSTR_FMA),
SC_COUNTER_DEF("exec_instr_msg", 1326465, EXEC_INSTR_MSG),
SC_COUNTER_DEF("beats_rd_tex", 163518, BEATS_RD_TEX),
SC_COUNTER_DEF("beats_rd_lsc_ext", 127475, BEATS_RD_LSC_EXT),
SC_COUNTER_DEF("frag_quads_coarse", -36247, FRAG_QUADS_COARSE),
SC_COUNTER_DEF("ls_mem_write_short", 51547, LS_MEM_WRITE_SHORT),
SC_COUNTER_DEF("beats_rd_tex_ext", -43370, BEATS_RD_TEX_EXT),
SC_COUNTER_DEF("exec_instr_sfu", 31583, EXEC_INSTR_SFU),
};
#define IPA_POWER_MODEL_OPS(gpu, init_token) \
const struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \
.name = "mali-" #gpu "-power-model", \
@@ -181,13 +210,13 @@ static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_ttux[] = {
#define ALIAS_POWER_MODEL(gpu, as_gpu) \
IPA_POWER_MODEL_OPS(gpu, as_gpu)
/* Reference voltage value is 750 mV.
*/
/* Reference voltage value is 750 mV. */
STANDARD_POWER_MODEL(todx, 750);
STANDARD_POWER_MODEL(tgrx, 750);
STANDARD_POWER_MODEL(tvax, 750);
STANDARD_POWER_MODEL(ttux, 750);
/* Reference voltage value is 550 mV. */
STANDARD_POWER_MODEL(ttix, 550);
/* Assuming LODX is an alias of TODX for IPA */
ALIAS_POWER_MODEL(lodx, todx);
@@ -195,10 +224,14 @@ ALIAS_POWER_MODEL(lodx, todx);
/* Assuming LTUX is an alias of TTUX for IPA */
ALIAS_POWER_MODEL(ltux, ttux);
/* Assuming LTUX is an alias of TTUX for IPA */
ALIAS_POWER_MODEL(ltix, ttix);
static const struct kbase_ipa_model_ops *ipa_counter_model_ops[] = {
&kbase_todx_ipa_model_ops, &kbase_lodx_ipa_model_ops,
&kbase_tgrx_ipa_model_ops, &kbase_tvax_ipa_model_ops,
&kbase_ttux_ipa_model_ops, &kbase_ltux_ipa_model_ops
&kbase_ttux_ipa_model_ops, &kbase_ltux_ipa_model_ops,
&kbase_ttix_ipa_model_ops, &kbase_ltix_ipa_model_ops,
};
const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find(
@@ -237,6 +270,10 @@ const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id)
return "mali-ttux-power-model";
case GPU_ID2_PRODUCT_LTUX:
return "mali-ltux-power-model";
case GPU_ID2_PRODUCT_TTIX:
return "mali-ttix-power-model";
case GPU_ID2_PRODUCT_LTIX:
return "mali-ltix-power-model";
default:
return NULL;
}

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2016-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2016-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -23,10 +23,7 @@
#include "mali_kbase_ipa_counter_common_jm.h"
#include "mali_kbase.h"
#if IS_ENABLED(CONFIG_MALI_NO_MALI)
#include <backend/gpu/mali_kbase_model_dummy.h>
#endif /* CONFIG_MALI_NO_MALI */
#include <backend/gpu/mali_kbase_model_linux.h>
/* Performance counter blocks base offsets */
#define JM_BASE (0 * KBASE_IPA_NR_BYTES_PER_BLOCK)
@@ -458,16 +455,14 @@ static const struct kbase_ipa_group ipa_groups_def_tbax[] = {
},
};
#define IPA_POWER_MODEL_OPS(gpu, init_token) \
const struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \
.name = "mali-" #gpu "-power-model", \
.init = kbase_ ## init_token ## _power_model_init, \
.term = kbase_ipa_vinstr_common_model_term, \
.get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \
.reset_counter_data = kbase_ipa_vinstr_reset_data, \
}; \
KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops)
#define IPA_POWER_MODEL_OPS(gpu, init_token) \
static const struct kbase_ipa_model_ops kbase_##gpu##_ipa_model_ops = { \
.name = "mali-" #gpu "-power-model", \
.init = kbase_##init_token##_power_model_init, \
.term = kbase_ipa_vinstr_common_model_term, \
.get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \
.reset_counter_data = kbase_ipa_vinstr_reset_data, \
}
#define STANDARD_POWER_MODEL(gpu, reference_voltage) \
static int kbase_ ## gpu ## _power_model_init(\

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2016-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2016-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -84,11 +84,11 @@ KBASE_EXPORT_TEST_API(kbase_ipa_model_name_from_id);
static struct device_node *get_model_dt_node(struct kbase_ipa_model *model,
bool dt_required)
{
struct device_node *model_dt_node;
struct device_node *model_dt_node = NULL;
char compat_string[64];
snprintf(compat_string, sizeof(compat_string), "arm,%s",
model->ops->name);
if (unlikely(!scnprintf(compat_string, sizeof(compat_string), "arm,%s", model->ops->name)))
return NULL;
/* of_find_compatible_node() will call of_node_put() on the root node,
* so take a reference on it first.
@@ -111,12 +111,12 @@ int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model,
const char *name, s32 *addr,
size_t num_elems, bool dt_required)
{
int err, i;
int err = -EINVAL, i;
struct device_node *model_dt_node = get_model_dt_node(model,
dt_required);
char *origin;
err = of_property_read_u32_array(model_dt_node, name, addr, num_elems);
err = of_property_read_u32_array(model_dt_node, name, (u32 *)addr, num_elems);
/* We're done with model_dt_node now, so drop the reference taken in
* get_model_dt_node()/of_find_compatible_node().
*/
@@ -138,11 +138,17 @@ int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model,
for (i = 0; i < num_elems; ++i) {
char elem_name[32];
if (num_elems == 1)
snprintf(elem_name, sizeof(elem_name), "%s", name);
else
snprintf(elem_name, sizeof(elem_name), "%s.%d",
name, i);
if (num_elems == 1) {
if (unlikely(!scnprintf(elem_name, sizeof(elem_name), "%s", name))) {
err = -ENOMEM;
goto exit;
}
} else {
if (unlikely(!scnprintf(elem_name, sizeof(elem_name), "%s.%d", name, i))) {
err = -ENOMEM;
goto exit;
}
}
dev_dbg(model->kbdev->dev, "%s.%s = %d (%s)\n",
model->ops->name, elem_name, addr[i], origin);
@@ -164,7 +170,7 @@ int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model,
int err;
struct device_node *model_dt_node = get_model_dt_node(model,
dt_required);
const char *string_prop_value;
const char *string_prop_value = "";
char *origin;
err = of_property_read_string(model_dt_node, name,

View File

@@ -231,14 +231,12 @@ static int add_params(struct kbase_ipa_model *model)
(struct kbase_ipa_model_simple_data *)model->model_data;
err = kbase_ipa_model_add_param_s32(model, "static-coefficient",
&model_data->static_coefficient,
1, true);
(s32 *)&model_data->static_coefficient, 1, true);
if (err)
goto end;
err = kbase_ipa_model_add_param_s32(model, "dynamic-coefficient",
&model_data->dynamic_coefficient,
1, true);
(s32 *)&model_data->dynamic_coefficient, 1, true);
if (err)
goto end;

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -127,10 +127,17 @@
/**
* enum kbase_timeout_selector - The choice of which timeout to get scaled
* using the lowest GPU frequency.
* @MMU_AS_INACTIVE_WAIT_TIMEOUT: Maximum waiting time in ms for the completion
* of a MMU operation
* @JM_DEFAULT_JS_FREE_TIMEOUT: Maximum timeout to wait for JS_COMMAND_NEXT
* to be updated on HW side so a Job Slot is
* considered free.
* @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in
* the enum.
*/
enum kbase_timeout_selector {
MMU_AS_INACTIVE_WAIT_TIMEOUT,
JM_DEFAULT_JS_FREE_TIMEOUT,
/* Must be the last in the enum */
KBASE_TIMEOUT_SELECTOR_COUNT
@@ -578,7 +585,7 @@ struct kbase_jd_atom {
#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS)
int work_id;
#endif
int slot_nr;
unsigned int slot_nr;
u32 atom_flags;
@@ -852,6 +859,10 @@ struct jsctx_queue {
* @pf_data: Data relating to Page fault.
* @bf_data: Data relating to Bus fault.
* @current_setup: Stores the MMU configuration for this address space.
* @is_unresponsive: Flag to indicate MMU is not responding.
* Set if a MMU command isn't completed within
* &kbase_device:mmu_as_inactive_wait_time_ms.
* Clear by kbase_ctx_sched_restore_all_as() after GPU reset completes.
*/
struct kbase_as {
int number;
@@ -861,6 +872,7 @@ struct kbase_as {
struct kbase_fault pf_data;
struct kbase_fault bf_data;
struct kbase_mmu_setup current_setup;
bool is_unresponsive;
};
#endif /* _KBASE_JM_DEFS_H_ */

View File

@@ -132,15 +132,15 @@ void kbasep_js_kctx_term(struct kbase_context *kctx);
* Atoms of higher priority might still be able to be pulled from the context
* on @js. This helps with starting a high priority atom as soon as possible.
*/
static inline void kbase_jsctx_slot_prio_blocked_set(struct kbase_context *kctx,
int js, int sched_prio)
static inline void kbase_jsctx_slot_prio_blocked_set(struct kbase_context *kctx, unsigned int js,
int sched_prio)
{
struct kbase_jsctx_slot_tracking *slot_tracking =
&kctx->slot_tracking[js];
lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
WARN(!slot_tracking->atoms_pulled_pri[sched_prio],
"When marking slot %d as blocked for priority %d on a kctx, no atoms were pulled - the slot cannot become unblocked",
"When marking slot %u as blocked for priority %d on a kctx, no atoms were pulled - the slot cannot become unblocked",
js, sched_prio);
slot_tracking->blocked |= ((kbase_js_prio_bitmap_t)1) << sched_prio;
@@ -509,19 +509,6 @@ void kbasep_js_resume(struct kbase_device *kbdev);
bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
struct kbase_jd_atom *katom);
/**
* jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer.
* @kctx: Context Pointer
* @prio: Priority (specifies the queue together with js).
* @js: Job slot (specifies the queue together with prio).
*
* Pushes all possible atoms from the linked list to the ringbuffer.
* Number of atoms are limited to free space in the ringbuffer and
* number of available atoms in the linked list.
*
*/
void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js);
/**
* kbase_js_pull - Pull an atom from a context in the job scheduler for
* execution.
@@ -536,7 +523,7 @@ void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js);
* Return: a pointer to an atom, or NULL if there are no atoms for this
* slot that can be currently run.
*/
struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js);
struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, unsigned int js);
/**
* kbase_js_unpull - Return an atom to the job scheduler ringbuffer.
@@ -617,7 +604,7 @@ bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *katom);
* been used.
*
*/
void kbase_js_sched(struct kbase_device *kbdev, int js_mask);
void kbase_js_sched(struct kbase_device *kbdev, unsigned int js_mask);
/**
* kbase_js_zap_context - Attempt to deschedule a context that is being

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2011-2018, 2020-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -277,6 +277,7 @@ typedef u32 kbase_atom_ordering_flag_t;
* @nr_contexts_runnable:Number of contexts that can either be pulled from or
* arecurrently running
* @soft_job_timeout_ms:Value for JS_SOFT_JOB_TIMEOUT
* @js_free_wait_time_ms: Maximum waiting time in ms for a Job Slot to be seen free.
* @queue_mutex: Queue Lock, used to access the Policy's queue of contexts
* independently of the Run Pool.
* Of course, you don't need the Run Pool lock to access this.
@@ -329,6 +330,8 @@ struct kbasep_js_device_data {
u32 nr_contexts_pullable;
atomic_t nr_contexts_runnable;
atomic_t soft_job_timeout_ms;
u32 js_free_wait_time_ms;
struct mutex queue_mutex;
/*
* Run Pool mutex, for managing contexts within the runpool.

View File

@@ -40,6 +40,7 @@ enum base_hw_feature {
BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
BASE_HW_FEATURE_CORE_FEATURES,
BASE_HW_FEATURE_PBHA_HWU,
BASE_HW_FEATURE_LARGE_PAGE_ALLOC,
BASE_HW_FEATURE_END
};
@@ -131,16 +132,6 @@ __attribute__((unused)) static const enum base_hw_feature base_hw_features_tBAx[
BASE_HW_FEATURE_END
};
__attribute__((unused)) static const enum base_hw_feature base_hw_features_tDUx[] = {
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_IDVS_GROUP_SIZE,
BASE_HW_FEATURE_L2_CONFIG,
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
BASE_HW_FEATURE_END
};
__attribute__((unused)) static const enum base_hw_feature base_hw_features_tODx[] = {
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -64,6 +64,9 @@ enum base_hw_issue {
BASE_HW_ISSUE_TURSEHW_2716,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_GPU2021PRO_290,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_TITANHW_2679,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -88,6 +91,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p0
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -108,6 +113,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p0
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -128,6 +135,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p1
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -143,6 +152,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tMI
BASE_HW_ISSUE_TMIX_8343,
BASE_HW_ISSUE_TMIX_8456,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -156,6 +167,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p0
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -169,6 +182,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p1
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -182,6 +197,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p2
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -194,6 +211,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p3
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -204,6 +223,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tHE
BASE_HW_ISSUE_TMIX_8042,
BASE_HW_ISSUE_TMIX_8133,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -217,6 +238,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r0p0
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -230,6 +253,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r0p1
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -242,6 +267,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r1p0
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -253,6 +280,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r1p1
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -263,6 +292,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tSI
BASE_HW_ISSUE_TSIX_1116,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -274,6 +305,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tDVx_r0p0
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -284,6 +317,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tDV
BASE_HW_ISSUE_TSIX_1116,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -296,6 +331,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNOx_r0p0
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -306,6 +343,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tNO
BASE_HW_ISSUE_TSIX_1116,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -318,6 +357,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGOx_r0p0
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -330,6 +371,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGOx_r1p0
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_GPU2017_1336,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -340,6 +383,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGO
BASE_HW_ISSUE_TSIX_1116,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -356,6 +401,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p0
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TTRX_3485,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -372,6 +419,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p1
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TTRX_3485,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -387,6 +436,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p2
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -399,6 +450,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTR
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -415,6 +468,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNAx_r0p0
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TTRX_3485,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -430,6 +485,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNAx_r0p1
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -442,6 +499,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tNA
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -456,6 +515,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r0p0
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TTRX_3485,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -469,6 +530,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r0p1
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -482,6 +545,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r1p0
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -495,6 +560,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r1p1
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -507,6 +574,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tBE
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -521,6 +590,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_lBEx_r1p0
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TTRX_3485,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -534,6 +605,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_lBEx_r1p1
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -547,6 +620,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r0p0
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -560,6 +635,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r1p0
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
@@ -572,90 +649,74 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tBA
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TTRX_3470,
BASE_HW_ISSUE_TTRX_3464,
BASE_HW_ISSUE_END
};
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tDUx_r0p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_TTRX_921,
BASE_HW_ISSUE_TTRX_3414,
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_END
};
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tDUx[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_TTRX_3414,
BASE_HW_ISSUE_TTRX_3083,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3212,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tODx[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3212,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGRx_r0p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGRx[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tVAx[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_TURSEHW_1997,
@@ -663,70 +724,110 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0
BASE_HW_ISSUE_TURSEHW_2716,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_GPU2021PRO_290,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_TITANHW_2679,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p1[] = {
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_TURSEHW_1997,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_TURSEHW_2716,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_GPU2021PRO_290,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_TITANHW_2679,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTUx[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_TURSEHW_2716,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_GPU2021PRO_290,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_TITANHW_2679,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_TURSEHW_2716,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_GPU2021PRO_290,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_TITANHW_2679,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p1[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_TURSEHW_2716,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_GPU2021PRO_290,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_TITANHW_2679,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p2[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_TURSEHW_2716,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_GPU2021PRO_290,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_TITANHW_2679,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p3[] = {
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_TURSEHW_2716,
BASE_HW_ISSUE_GPU2019_3901,
BASE_HW_ISSUE_GPU2021PRO_290,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_TITANHW_2679,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTIx[] = {
BASE_HW_ISSUE_5736,
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_TURSEHW_2716,
BASE_HW_ISSUE_GPU2021PRO_290,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_TITANHW_2679,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTIx_r0p0[] = {
BASE_HW_ISSUE_9435,
BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337,
BASE_HW_ISSUE_TURSEHW_2716,
BASE_HW_ISSUE_GPU2021PRO_290,
BASE_HW_ISSUE_TITANHW_2710,
BASE_HW_ISSUE_TITANHW_2679,
BASE_HW_ISSUE_GPU2022PRO_148,
BASE_HW_ISSUE_END
};

View File

@@ -332,21 +332,8 @@ int kbase_job_slot_softstop_start_rp(struct kbase_context *kctx,
void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
struct kbase_jd_atom *target_katom);
void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
struct kbase_jd_atom *target_katom, u32 sw_flags);
/**
* kbase_job_slot_hardstop - Hard-stop the specified job slot
* @kctx: The kbase context that contains the job(s) that should
* be hard-stopped
* @js: The job slot to hard-stop
* @target_katom: The job that should be hard-stopped (or NULL for all
* jobs from the context)
* Context:
* The job slot lock must be held when calling this function.
*/
void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
struct kbase_jd_atom *target_katom);
void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, unsigned int js,
struct kbase_jd_atom *target_katom, u32 sw_flags);
/**
* kbase_job_check_enter_disjoint - potentiall enter disjoint mode
@@ -441,19 +428,6 @@ static inline void kbase_free_user_buffer(
}
}
/**
* kbase_mem_copy_from_extres() - Copy from external resources.
*
* @kctx: kbase context within which the copying is to take place.
* @buf_data: Pointer to the information about external resources:
* pages pertaining to the external resource, number of
* pages to copy.
*
* Return: 0 on success, error code otherwise.
*/
int kbase_mem_copy_from_extres(struct kbase_context *kctx,
struct kbase_debug_copy_buffer *buf_data);
#if !MALI_USE_CSF
int kbase_process_soft_job(struct kbase_jd_atom *katom);
int kbase_prepare_soft_job(struct kbase_jd_atom *katom);

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2016-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2016-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -98,11 +98,9 @@ void kbase_as_fault_debugfs_init(struct kbase_device *kbdev)
"unable to create address_spaces debugfs directory");
} else {
for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
snprintf(as_name, ARRAY_SIZE(as_name), "as%u", i);
debugfs_create_file(as_name, 0444,
debugfs_directory,
(void *)(uintptr_t)i,
&as_fault_fops);
if (likely(scnprintf(as_name, ARRAY_SIZE(as_name), "as%u", i)))
debugfs_create_file(as_name, 0444, debugfs_directory,
(void *)(uintptr_t)i, &as_fault_fops);
}
}

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2013-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -221,6 +221,16 @@ enum {
*/
#define JM_DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */
/* Default timeout in clock cycles to be used when checking if JS_COMMAND_NEXT
* is updated on HW side so a Job Slot is considered free.
* This timeout will only take effect on GPUs with low value for the minimum
* GPU clock frequency (<= 100MHz).
*
* Based on 1ms timeout at 100MHz. Will default to 0ms on GPUs with higher
* value for minimum GPU clock frequency.
*/
#define JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES (100000)
#endif /* MALI_USE_CSF */
/* Default timeslice that a context is scheduled in for, in nanoseconds.
@@ -257,5 +267,12 @@ enum {
*/
#define DEFAULT_IR_THRESHOLD (192)
/* Waiting time in clock cycles for the completion of a MMU operation.
*
* Ideally 1.6M GPU cycles required for the L2 cache (512KiB slice) flush.
*
* As a pessimistic value, 50M GPU cycles ( > 30 times bigger ) is chosen.
* It corresponds to 0.5s in GPU @ 100Mhz.
*/
#define MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES ((u64)50 * 1024 * 1024)
#endif /* _KBASE_CONFIG_DEFAULTS_H_ */

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -31,10 +31,7 @@
#include <ipa/mali_kbase_ipa_debugfs.h>
#endif /* CONFIG_DEVFREQ_THERMAL */
#endif /* CONFIG_MALI_DEVFREQ */
#if IS_ENABLED(CONFIG_MALI_NO_MALI)
#include "backend/gpu/mali_kbase_model_linux.h"
#include <backend/gpu/mali_kbase_model_dummy.h>
#endif /* CONFIG_MALI_NO_MALI */
#include "uapi/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h"
#include "mali_kbase_mem.h"
#include "mali_kbase_mem_pool_debugfs.h"
@@ -624,7 +621,8 @@ static int kbase_file_create_kctx(struct kbase_file *const kfile,
kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE);
#if IS_ENABLED(CONFIG_DEBUG_FS)
snprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id);
if (unlikely(!scnprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id)))
return -ENOMEM;
mutex_init(&kctx->mem_profile_lock);
@@ -663,8 +661,10 @@ static int kbase_open(struct inode *inode, struct file *filp)
if (!kbdev)
return -ENODEV;
/* Set address space operation for page migration */
//kbase_mem_migrate_set_address_space_ops(kbdev, filp); mdrjr
#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE)
/* Set address space operations for page migration */
kbase_mem_migrate_set_address_space_ops(kbdev, filp);
#endif
/* Device-wide firmware load is moved here from probing to comply with
* Android GKI vendor guideline.
@@ -1459,6 +1459,9 @@ static int kbasep_kcpu_queue_enqueue(struct kbase_context *kctx,
static int kbasep_cs_tiler_heap_init(struct kbase_context *kctx,
union kbase_ioctl_cs_tiler_heap_init *heap_init)
{
if (heap_init->in.group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)
return -EINVAL;
kctx->jit_group_id = heap_init->in.group_id;
return kbase_csf_tiler_heap_init(kctx, heap_init->in.chunk_size,
@@ -1471,6 +1474,9 @@ static int kbasep_cs_tiler_heap_init(struct kbase_context *kctx,
static int kbasep_cs_tiler_heap_init_1_13(struct kbase_context *kctx,
union kbase_ioctl_cs_tiler_heap_init_1_13 *heap_init)
{
if (heap_init->in.group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)
return -EINVAL;
kctx->jit_group_id = heap_init->in.group_id;
return kbase_csf_tiler_heap_init(kctx, heap_init->in.chunk_size,
@@ -1559,7 +1565,6 @@ static int kbasep_ioctl_cs_cpu_queue_dump(struct kbase_context *kctx,
cpu_queue_info->size);
}
#define POWER_DOWN_LATEST_FLUSH_VALUE ((u32)1)
static int kbase_ioctl_read_user_page(struct kbase_context *kctx,
union kbase_ioctl_read_user_page *user_page)
{
@@ -2045,6 +2050,7 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
struct kbase_ioctl_cs_cpu_queue_info,
kctx);
break;
/* This IOCTL will be kept for backward compatibility */
case KBASE_IOCTL_READ_USER_PAGE:
KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_READ_USER_PAGE, kbase_ioctl_read_user_page,
union kbase_ioctl_read_user_page, kctx);
@@ -2211,7 +2217,10 @@ KBASE_EXPORT_TEST_API(kbase_event_wakeup);
#if MALI_USE_CSF
int kbase_event_pending(struct kbase_context *ctx)
{
WARN_ON_ONCE(!ctx);
KBASE_DEBUG_ASSERT(ctx);
if (unlikely(!ctx))
return -EPERM;
return (atomic_read(&ctx->event_count) != 0) ||
kbase_csf_event_error_pending(ctx) ||
@@ -2222,6 +2231,9 @@ int kbase_event_pending(struct kbase_context *ctx)
{
KBASE_DEBUG_ASSERT(ctx);
if (unlikely(!ctx))
return -EPERM;
return (atomic_read(&ctx->event_count) != 0) ||
(atomic_read(&ctx->event_closed) != 0);
}
@@ -4278,7 +4290,7 @@ static int kbase_common_reg_map(struct kbase_device *kbdev)
static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
{
}
#else /* CONFIG_MALI_NO_MALI */
#else /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
static int kbase_common_reg_map(struct kbase_device *kbdev)
{
int err = 0;
@@ -4314,7 +4326,7 @@ static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
kbdev->reg_size = 0;
}
}
#endif /* CONFIG_MALI_NO_MALI */
#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
int registers_map(struct kbase_device * const kbdev)
{
@@ -4578,18 +4590,29 @@ int power_control_init(struct kbase_device *kbdev)
* from completing its initialization.
*/
#if defined(CONFIG_PM_OPP)
#if ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && \
defined(CONFIG_REGULATOR))
#if defined(CONFIG_REGULATOR)
#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
if (kbdev->nr_regulators > 0) {
kbdev->token = dev_pm_opp_set_regulators(kbdev->dev, regulator_names);
if (kbdev->token < 0) {
err = kbdev->token;
goto regulators_probe_defer;
}
}
#elif (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE)
if (kbdev->nr_regulators > 0) {
kbdev->opp_table = dev_pm_opp_set_regulators(kbdev->dev,
regulator_names);
regulator_names, BASE_MAX_NR_CLOCKS_REGULATORS);
if (IS_ERR(kbdev->opp_table)) {
err = PTR_ERR(kbdev->opp_table);
goto regulators_probe_defer;
}
}
#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */
#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */
#endif /* CONFIG_REGULATOR */
err = dev_pm_opp_of_add_table(kbdev->dev);
CSTD_UNUSED(err);
#endif /* CONFIG_PM_OPP */
@@ -4624,11 +4647,15 @@ void power_control_term(struct kbase_device *kbdev)
#if defined(CONFIG_PM_OPP)
dev_pm_opp_of_remove_table(kbdev->dev);
#if ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && \
defined(CONFIG_REGULATOR))
#if defined(CONFIG_REGULATOR)
#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
if (kbdev->token > -EPERM)
dev_pm_opp_put_regulators(kbdev->token);
#elif (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE)
if (!IS_ERR_OR_NULL(kbdev->opp_table))
dev_pm_opp_put_regulators(kbdev->opp_table);
#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */
#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */
#endif /* CONFIG_REGULATOR */
#endif /* CONFIG_PM_OPP */
for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) {
@@ -5491,6 +5518,11 @@ static int kbase_platform_device_probe(struct platform_device *pdev)
}
kbdev->dev = &pdev->dev;
#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
kbdev->token = -EPERM;
#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */
dev_set_drvdata(kbdev->dev, kbdev);
#if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE)
mutex_lock(&kbase_probe_mutex);

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2017-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -69,6 +69,12 @@ void kbase_ctx_sched_term(struct kbase_device *kbdev)
}
}
void kbase_ctx_sched_init_ctx(struct kbase_context *kctx)
{
kctx->as_nr = KBASEP_AS_NR_INVALID;
atomic_set(&kctx->refcount, 0);
}
/* kbasep_ctx_sched_find_as_for_ctx - Find a free address space
*
* @kbdev: The context for which to find a free address space
@@ -113,7 +119,7 @@ int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx)
if (atomic_inc_return(&kctx->refcount) == 1) {
int const free_as = kbasep_ctx_sched_find_as_for_ctx(kctx);
if (free_as != KBASEP_AS_NR_INVALID) {
if (free_as >= 0) {
kbdev->as_free &= ~(1u << free_as);
/* Only program the MMU if the context has not been
* assigned the same address space before.
@@ -167,8 +173,10 @@ void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx)
*/
WARN_ON(!atomic_read(&kctx->refcount));
#endif
WARN_ON(kctx->as_nr == KBASEP_AS_NR_INVALID);
WARN_ON(kbdev->as_to_kctx[kctx->as_nr] != kctx);
if (likely((kctx->as_nr >= 0) && (kctx->as_nr < BASE_MAX_NR_AS)))
WARN_ON(kbdev->as_to_kctx[kctx->as_nr] != kctx);
else
WARN(true, "Invalid as_nr(%d)", kctx->as_nr);
atomic_inc(&kctx->refcount);
}
@@ -182,16 +190,17 @@ void kbase_ctx_sched_release_ctx(struct kbase_context *kctx)
new_ref_count = atomic_dec_return(&kctx->refcount);
if (new_ref_count == 0) {
kbdev->as_free |= (1u << kctx->as_nr);
if (kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) {
KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS(
kbdev, kctx->id);
kbdev->as_to_kctx[kctx->as_nr] = NULL;
kctx->as_nr = KBASEP_AS_NR_INVALID;
kbase_ctx_flag_clear(kctx, KCTX_AS_DISABLED_ON_FAULT);
if (likely((kctx->as_nr >= 0) && (kctx->as_nr < BASE_MAX_NR_AS))) {
kbdev->as_free |= (1u << kctx->as_nr);
if (kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) {
KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS(kbdev, kctx->id);
kbdev->as_to_kctx[kctx->as_nr] = NULL;
kctx->as_nr = KBASEP_AS_NR_INVALID;
kbase_ctx_flag_clear(kctx, KCTX_AS_DISABLED_ON_FAULT);
#if !MALI_USE_CSF
kbase_backend_slot_kctx_purge_locked(kbdev, kctx);
kbase_backend_slot_kctx_purge_locked(kbdev, kctx);
#endif
}
}
}
@@ -201,13 +210,14 @@ void kbase_ctx_sched_release_ctx(struct kbase_context *kctx)
void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx)
{
struct kbase_device *const kbdev = kctx->kbdev;
unsigned long flags;
lockdep_assert_held(&kbdev->mmu_hw_mutex);
lockdep_assert_held(&kbdev->hwaccess_lock);
mutex_lock(&kbdev->mmu_hw_mutex);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
WARN_ON(atomic_read(&kctx->refcount) != 0);
if (kctx->as_nr != KBASEP_AS_NR_INVALID) {
if ((kctx->as_nr >= 0) && (kctx->as_nr < BASE_MAX_NR_AS)) {
if (kbdev->pm.backend.gpu_powered)
kbase_mmu_disable(kctx);
@@ -215,6 +225,9 @@ void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx)
kbdev->as_to_kctx[kctx->as_nr] = NULL;
kctx->as_nr = KBASEP_AS_NR_INVALID;
}
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
mutex_unlock(&kbdev->mmu_hw_mutex);
}
void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev)
@@ -229,6 +242,7 @@ void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev)
for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) {
struct kbase_context *kctx;
kbdev->as[i].is_unresponsive = false;
#if MALI_USE_CSF
if ((i == MCU_AS_NR) && kbdev->csf.firmware_inited) {
kbase_mmu_update(kbdev, &kbdev->csf.mcu_mmu,

View File

@@ -59,6 +59,15 @@ int kbase_ctx_sched_init(struct kbase_device *kbdev);
*/
void kbase_ctx_sched_term(struct kbase_device *kbdev);
/**
* kbase_ctx_sched_ctx_init - Initialize per-context data fields for scheduling
* @kctx: The context to initialize
*
* This must be called during context initialization before any other context
* scheduling functions are called on @kctx
*/
void kbase_ctx_sched_init_ctx(struct kbase_context *kctx);
/**
* kbase_ctx_sched_retain_ctx - Retain a reference to the @ref kbase_context
* @kctx: The context to which to retain a reference
@@ -113,9 +122,6 @@ void kbase_ctx_sched_release_ctx(struct kbase_context *kctx);
* This function should be called when a context is being destroyed. The
* context must no longer have any reference. If it has been assigned an
* address space before then the AS will be unprogrammed.
*
* The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be
* held whilst calling this function.
*/
void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx);

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -26,7 +26,7 @@
#include "mali_kbase_debug_mem_allocs.h"
#include "mali_kbase.h"
#include <string.h>
#include <linux/string.h>
#include <linux/list.h>
#include <linux/file.h>

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -90,11 +90,10 @@ set_attr_from_string(char *const buf, void *const array, size_t const nelems,
int kbase_debugfs_string_validator(char *const buf)
{
size_t index;
int err = 0;
char *ptr = buf;
for (index = 0; *ptr; ++index) {
while (*ptr) {
unsigned long test_number;
size_t len;

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -238,12 +238,25 @@ struct kbase_fault {
bool protected_mode;
};
/** Maximum number of memory pages that should be allocated for the array
* of pointers to free PGDs.
*
* This number has been pre-calculated to deal with the maximum allocation
* size expressed by the default value of KBASE_MEM_ALLOC_MAX_SIZE.
* This is supposed to be enough for almost the entirety of MMU operations.
* Any size greater than KBASE_MEM_ALLOC_MAX_SIZE requires being broken down
* into multiple iterations, each dealing with at most KBASE_MEM_ALLOC_MAX_SIZE
* bytes.
*
* Please update this value if KBASE_MEM_ALLOC_MAX_SIZE changes.
*/
#define MAX_PAGES_FOR_FREE_PGDS ((size_t)9)
/* Maximum number of pointers to free PGDs */
#define MAX_FREE_PGDS ((PAGE_SIZE / sizeof(struct page *)) * MAX_PAGES_FOR_FREE_PGDS)
/**
* struct kbase_mmu_table - object representing a set of GPU page tables
* @mmu_teardown_pages: Array containing pointers to 3 separate pages, used
* to cache the entries of top (L0) & intermediate level
* page tables (L1 & L2) to avoid repeated calls to
* kmap_atomic() during the MMU teardown.
* @mmu_lock: Lock to serialize the accesses made to multi level GPU
* page tables
* @pgd: Physical address of the page allocated for the top
@@ -255,14 +268,40 @@ struct kbase_fault {
* Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
* @kctx: If this set of MMU tables belongs to a context then
* this is a back-reference to the context, otherwise
* it is NULL
* it is NULL.
* @scratch_mem: Scratch memory used for MMU operations, which are
* serialized by the @mmu_lock.
*/
struct kbase_mmu_table {
u64 *mmu_teardown_pages[MIDGARD_MMU_BOTTOMLEVEL];
struct mutex mmu_lock;
phys_addr_t pgd;
u8 group_id;
struct kbase_context *kctx;
union {
/**
* @teardown_pages: Scratch memory used for backup copies of whole
* PGD pages when tearing down levels upon
* termination of the MMU table.
*/
struct {
/**
* @levels: Array of PGD pages, large enough to copy one PGD
* for each level of the MMU table.
*/
u64 levels[MIDGARD_MMU_BOTTOMLEVEL][PAGE_SIZE / sizeof(u64)];
} teardown_pages;
/**
* @free_pgds: Scratch memory user for insertion, update and teardown
* operations to store a temporary list of PGDs to be freed
* at the end of the operation.
*/
struct {
/** @pgds: Array of pointers to PGDs to free. */
struct page *pgds[MAX_FREE_PGDS];
/** @head_index: Index of first free element in the PGDs array. */
size_t head_index;
} free_pgds;
} scratch_mem;
};
/**
@@ -286,6 +325,8 @@ struct kbase_reg_zone {
#include "jm/mali_kbase_jm_defs.h"
#endif
#include "mali_kbase_hwaccess_time.h"
static inline int kbase_as_has_bus_fault(struct kbase_as *as,
struct kbase_fault *fault)
{
@@ -643,7 +684,6 @@ struct kbase_process {
* struct kbase_mem_migrate - Object representing an instance for managing
* page migration.
*
* @mapping: Pointer to address space struct used for page migration.
* @free_pages_list: List of deferred pages to free. Mostly used when page migration
* is enabled. Pages in memory pool that require migrating
* will be freed instead. However page cannot be freed
@@ -654,13 +694,17 @@ struct kbase_process {
* @free_pages_workq: Work queue to process the work items queued to free
* pages in @free_pages_list.
* @free_pages_work: Work item to free pages in @free_pages_list.
* @inode: Pointer to inode whose address space operations are used
* for page migration purposes.
*/
struct kbase_mem_migrate {
struct address_space *mapping;
struct list_head free_pages_list;
spinlock_t free_pages_lock;
struct workqueue_struct *free_pages_workq;
struct work_struct free_pages_work;
#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE)
struct inode *inode;
#endif
};
/**
@@ -701,6 +745,10 @@ struct kbase_mem_migrate {
* @opp_table: Pointer to the device OPP structure maintaining the
* link to OPPs attached to a device. This is obtained
* after setting regulator names for the device.
* @token: Integer replacement for opp_table in kernel versions
* 6 and greater. Value is a token id number when 0 or greater,
* and a linux errno when negative. Must be initialised
* to an non-zero value as 0 is valid token id.
* @devname: string containing the name used for GPU device instance,
* miscellaneous device is registered using the same name.
* @id: Unique identifier for the device, indicates the number of
@@ -747,6 +795,8 @@ struct kbase_mem_migrate {
* GPU adrress spaces assigned to them.
* @mmu_mask_change: Lock to serialize the access to MMU interrupt mask
* register used in the handling of Bus & Page faults.
* @pagesize_2mb: Boolean to determine whether 2MiB page sizes are
* supported and used where possible.
* @gpu_props: Object containing complete information about the
* configuration/properties of GPU HW device in use.
* @hw_issues_mask: List of SW workarounds for HW issues
@@ -792,6 +842,7 @@ struct kbase_mem_migrate {
* GPU reset.
* @lowest_gpu_freq_khz: Lowest frequency in KHz that the GPU can run at. Used
* to calculate suitable timeouts for wait operations.
* @backend_time: Kbase backend time related attributes.
* @cache_clean_in_progress: Set when a cache clean has been started, and
* cleared when it has finished. This prevents multiple
* cache cleans being done simultaneously.
@@ -898,6 +949,10 @@ struct kbase_mem_migrate {
* GPU2019-3878. PM state machine is invoked after
* clearing this flag and @hwaccess_lock is used to
* serialize the access.
* @mmu_page_migrate_in_progress: Set before starting a MMU page migration transaction
* and cleared after the transaction completes. PM L2 state is
* prevented from entering powering up/down transitions when the
* flag is set, @hwaccess_lock is used to serialize the access.
* @poweroff_pending: Set when power off operation for GPU is started, reset when
* power on for GPU is started.
* @infinite_cache_active_default: Set to enable using infinite cache for all the
@@ -978,6 +1033,13 @@ struct kbase_mem_migrate {
* @oom_notifier_block: notifier_block containing kernel-registered out-of-
* memory handler.
* @mem_migrate: Per device object for managing page migration.
* @live_fence_metadata: Count of live fence metadata structures created by
* KCPU queue. These structures may outlive kbase module
* itself. Therefore, in such a case, a warning should be
* be produced.
* @mmu_as_inactive_wait_time_ms: Maximum waiting time in ms for the completion of
* a MMU operation
* @va_region_slab: kmem_cache (slab) for allocated kbase_va_region structures.
*/
struct kbase_device {
u32 hw_quirks_sc;
@@ -1002,14 +1064,16 @@ struct kbase_device {
#if IS_ENABLED(CONFIG_REGULATOR)
struct regulator *regulators[BASE_MAX_NR_CLOCKS_REGULATORS];
unsigned int nr_regulators;
#if (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE)
#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
int token;
#elif (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE)
struct opp_table *opp_table;
#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */
#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */
#endif /* CONFIG_REGULATOR */
char devname[DEVNAME_SIZE];
u32 id;
#if IS_ENABLED(CONFIG_MALI_NO_MALI)
#if !IS_ENABLED(CONFIG_MALI_REAL_HW)
void *model;
struct kmem_cache *irq_slab;
struct workqueue_struct *irq_workq;
@@ -1017,7 +1081,7 @@ struct kbase_device {
atomic_t serving_gpu_irq;
atomic_t serving_mmu_irq;
spinlock_t reg_op_lock;
#endif /* CONFIG_MALI_NO_MALI */
#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
struct kbase_pm_device_data pm;
struct kbase_mem_pool_group mem_pools;
@@ -1032,6 +1096,8 @@ struct kbase_device {
spinlock_t mmu_mask_change;
bool pagesize_2mb;
struct kbase_gpu_props gpu_props;
unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
@@ -1085,6 +1151,10 @@ struct kbase_device {
u64 lowest_gpu_freq_khz;
#if MALI_USE_CSF
struct kbase_backend_time backend_time;
#endif
bool cache_clean_in_progress;
u32 cache_clean_queued;
wait_queue_head_t cache_clean_wait;
@@ -1173,6 +1243,7 @@ struct kbase_device {
#if MALI_USE_CSF
bool mmu_hw_operation_in_progress;
#endif
bool mmu_page_migrate_in_progress;
bool poweroff_pending;
bool infinite_cache_active_default;
@@ -1261,6 +1332,12 @@ struct kbase_device {
struct kbase_mem_migrate mem_migrate;
#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE)
atomic_t live_fence_metadata;
#endif
u32 mmu_as_inactive_wait_time_ms;
struct kmem_cache *va_region_slab;
};
/**
@@ -1614,11 +1691,13 @@ struct kbase_sub_alloc {
* is scheduled in and an atom is pulled from the context's per
* slot runnable tree in JM GPU or GPU command queue
* group is programmed on CSG slot in CSF GPU.
* @mm_update_lock: lock used for handling of special tracking page.
* @process_mm: Pointer to the memory descriptor of the process which
* created the context. Used for accounting the physical
* pages used for GPU allocations, done for the context,
* to the memory consumed by the process.
* to the memory consumed by the process. A reference is taken
* on this descriptor for the Userspace created contexts so that
* Kbase can safely access it to update the memory usage counters.
* The reference is dropped on context termination.
* @gpu_va_end: End address of the GPU va space (in 4KB page units)
* @running_total_tiler_heap_nr_chunks: Running total of number of chunks in all
* tiler heaps of the kbase context.
@@ -1740,6 +1819,10 @@ struct kbase_sub_alloc {
* @limited_core_mask: The mask that is applied to the affinity in case of atoms
* marked with BASE_JD_REQ_LIMITED_CORE_MASK.
* @platform_data: Pointer to platform specific per-context data.
* @task: Pointer to the task structure of the main thread of the process
* that created the Kbase context. It would be set only for the
* contexts created by the Userspace and not for the contexts
* created internally by the Kbase.
*
* A kernel base context is an entity among which the GPU is scheduled.
* Each context has its own GPU address space.
@@ -1827,8 +1910,7 @@ struct kbase_context {
atomic_t refcount;
spinlock_t mm_update_lock;
struct mm_struct __rcu *process_mm;
struct mm_struct *process_mm;
u64 gpu_va_end;
#if MALI_USE_CSF
u32 running_total_tiler_heap_nr_chunks;
@@ -1891,6 +1973,8 @@ struct kbase_context {
#if !MALI_USE_CSF
void *platform_data;
#endif
struct task_struct *task;
};
#ifdef CONFIG_MALI_CINSTR_GWT
@@ -1993,5 +2077,4 @@ static inline u64 kbase_get_lock_region_min_size_log2(struct kbase_gpu_props con
#define KBASE_AS_INACTIVE_MAX_LOOPS 100000000
/* Maximum number of loops polling the GPU PRFCNT_ACTIVE bit before we assume the GPU has hung */
#define KBASE_PRFCNT_ACTIVE_MAX_LOOPS 100000000
#endif /* _KBASE_DEFS_H_ */

View File

@@ -1,529 +0,0 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2011-2016, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU license.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
*/
/* Include mali_kbase_dma_fence.h before checking for CONFIG_MALI_DMA_FENCE as
* it will be set there.
*/
#include "mali_kbase_dma_fence.h"
#include <linux/atomic.h>
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/mutex.h>
#include <linux/version.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/workqueue.h>
#include <linux/ww_mutex.h>
#include <mali_kbase.h>
static void
kbase_dma_fence_work(struct work_struct *pwork);
static void
kbase_dma_fence_waiters_add(struct kbase_jd_atom *katom)
{
struct kbase_context *kctx = katom->kctx;
list_add_tail(&katom->queue, &kctx->dma_fence.waiting_resource);
}
static void
kbase_dma_fence_waiters_remove(struct kbase_jd_atom *katom)
{
list_del(&katom->queue);
}
static int
kbase_dma_fence_lock_reservations(struct kbase_dma_fence_resv_info *info,
struct ww_acquire_ctx *ctx)
{
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
struct reservation_object *content_res = NULL;
#else
struct dma_resv *content_res = NULL;
#endif
unsigned int content_res_idx = 0;
unsigned int r;
int err = 0;
ww_acquire_init(ctx, &reservation_ww_class);
retry:
for (r = 0; r < info->dma_fence_resv_count; r++) {
if (info->resv_objs[r] == content_res) {
content_res = NULL;
continue;
}
err = ww_mutex_lock(&info->resv_objs[r]->lock, ctx);
if (err)
goto error;
}
ww_acquire_done(ctx);
return err;
error:
content_res_idx = r;
/* Unlock the locked one ones */
while (r--)
ww_mutex_unlock(&info->resv_objs[r]->lock);
if (content_res)
ww_mutex_unlock(&content_res->lock);
/* If we deadlock try with lock_slow and retry */
if (err == -EDEADLK) {
content_res = info->resv_objs[content_res_idx];
ww_mutex_lock_slow(&content_res->lock, ctx);
goto retry;
}
/* If we are here the function failed */
ww_acquire_fini(ctx);
return err;
}
static void
kbase_dma_fence_unlock_reservations(struct kbase_dma_fence_resv_info *info,
struct ww_acquire_ctx *ctx)
{
unsigned int r;
for (r = 0; r < info->dma_fence_resv_count; r++)
ww_mutex_unlock(&info->resv_objs[r]->lock);
ww_acquire_fini(ctx);
}
/**
* kbase_dma_fence_queue_work() - Queue work to handle @katom
* @katom: Pointer to atom for which to queue work
*
* Queue kbase_dma_fence_work() for @katom to clean up the fence callbacks and
* submit the atom.
*/
static void
kbase_dma_fence_queue_work(struct kbase_jd_atom *katom)
{
struct kbase_context *kctx = katom->kctx;
bool ret;
INIT_WORK(&katom->work, kbase_dma_fence_work);
ret = queue_work(kctx->dma_fence.wq, &katom->work);
/* Warn if work was already queued, that should not happen. */
WARN_ON(!ret);
}
/**
* kbase_dma_fence_cancel_atom() - Cancels waiting on an atom
* @katom: Katom to cancel
*
* Locking: katom->dma_fence.callbacks list assumes jctx.lock is held.
*/
static void
kbase_dma_fence_cancel_atom(struct kbase_jd_atom *katom)
{
lockdep_assert_held(&katom->kctx->jctx.lock);
/* Cancel callbacks and clean up. */
kbase_fence_free_callbacks(katom);
/* Mark the atom as handled in case all fences signaled just before
* canceling the callbacks and the worker was queued.
*/
kbase_fence_dep_count_set(katom, -1);
/* Prevent job_done_nolock from being called twice on an atom when
* there is a race between job completion and cancellation.
*/
if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) {
/* Wait was cancelled - zap the atom */
katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
if (kbase_jd_done_nolock(katom, true))
kbase_js_sched_all(katom->kctx->kbdev);
}
}
/**
* kbase_dma_fence_work() - Worker thread called when a fence is signaled
* @pwork: work_struct containing a pointer to a katom
*
* This function will clean and mark all dependencies as satisfied
*/
static void
kbase_dma_fence_work(struct work_struct *pwork)
{
struct kbase_jd_atom *katom;
struct kbase_jd_context *ctx;
katom = container_of(pwork, struct kbase_jd_atom, work);
ctx = &katom->kctx->jctx;
mutex_lock(&ctx->lock);
if (kbase_fence_dep_count_read(katom) != 0)
goto out;
kbase_fence_dep_count_set(katom, -1);
/* Remove atom from list of dma-fence waiting atoms. */
kbase_dma_fence_waiters_remove(katom);
/* Cleanup callbacks. */
kbase_fence_free_callbacks(katom);
/*
* Queue atom on GPU, unless it has already completed due to a failing
* dependency. Run kbase_jd_done_nolock() on the katom if it is completed.
*/
if (unlikely(katom->status == KBASE_JD_ATOM_STATE_COMPLETED))
kbase_jd_done_nolock(katom, true);
else
kbase_jd_dep_clear_locked(katom);
out:
mutex_unlock(&ctx->lock);
}
static void
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
kbase_dma_fence_cb(struct fence *fence, struct fence_cb *cb)
#else
kbase_dma_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
#endif
{
struct kbase_fence_cb *kcb = container_of(cb,
struct kbase_fence_cb,
fence_cb);
struct kbase_jd_atom *katom = kcb->katom;
/* If the atom is zapped dep_count will be forced to a negative number
* preventing this callback from ever scheduling work. Which in turn
* would reschedule the atom.
*/
if (kbase_fence_dep_count_dec_and_test(katom))
kbase_dma_fence_queue_work(katom);
}
#if (KERNEL_VERSION(5, 18, 0) > LINUX_VERSION_CODE)
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
static int
kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom,
struct reservation_object *resv,
bool exclusive)
#else
static int
kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom,
struct dma_resv *resv,
bool exclusive)
#endif
{
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
struct fence *excl_fence = NULL;
struct fence **shared_fences = NULL;
#else
struct dma_fence *excl_fence = NULL;
struct dma_fence **shared_fences = NULL;
#endif
unsigned int shared_count = 0, i = 0;
int err;
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
err = reservation_object_get_fences_rcu(
#elif (KERNEL_VERSION(5, 14, 0) > LINUX_VERSION_CODE)
err = dma_resv_get_fences_rcu(
#else
err = dma_resv_get_fences(
#endif
resv,
&excl_fence,
&shared_count,
&shared_fences);
if (err)
return err;
if (excl_fence) {
err = kbase_fence_add_callback(katom,
excl_fence,
kbase_dma_fence_cb);
/* Release our reference, taken by reservation_object_get_fences_rcu(),
* to the fence. We have set up our callback (if that was possible),
* and it's the fence's owner is responsible for singling the fence
* before allowing it to disappear.
*/
dma_fence_put(excl_fence);
if (err)
goto out;
}
if (exclusive) {
for (i = 0; i < shared_count; i++) {
err = kbase_fence_add_callback(katom,
shared_fences[i],
kbase_dma_fence_cb);
if (err)
goto out;
}
}
/* Release all our references to the shared fences, taken by
* reservation_object_get_fences_rcu(). We have set up our callback (if
* that was possible), and it's the fence's owner is responsible for
* signaling the fence before allowing it to disappear.
*/
out:
while (i-- > 0)
dma_fence_put(shared_fences[i]);
kfree(shared_fences);
if (err) {
/*
* On error, cancel and clean up all callbacks that was set up
* before the error.
*/
kbase_fence_free_callbacks(katom);
}
return err;
}
#else
static int kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom,
struct dma_resv *resv, bool exclusive)
{
struct dma_fence **fences = NULL;
unsigned int num_fences = 0, i = 0;
int err;
err = dma_resv_get_fences(resv, exclusive, &num_fences, &fences);
if (err)
return err;
for (i = 0; i < num_fences; i++) {
err = kbase_fence_add_callback(katom, fences[i], kbase_dma_fence_cb);
if (err)
goto out;
}
out:
while (i-- > 0)
dma_fence_put(fences[i]);
kfree(fences);
if (err) {
/*
* On error, cancel and clean up all callbacks that was set up
* before the error.
*/
kbase_fence_free_callbacks(katom);
}
return err;
}
#endif
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
void kbase_dma_fence_add_reservation(struct reservation_object *resv,
struct kbase_dma_fence_resv_info *info,
bool exclusive)
#else
void kbase_dma_fence_add_reservation(struct dma_resv *resv,
struct kbase_dma_fence_resv_info *info,
bool exclusive)
#endif
{
unsigned int i;
for (i = 0; i < info->dma_fence_resv_count; i++) {
/* Duplicate resource, ignore */
if (info->resv_objs[i] == resv)
return;
}
info->resv_objs[info->dma_fence_resv_count] = resv;
if (exclusive)
set_bit(info->dma_fence_resv_count,
info->dma_fence_excl_bitmap);
(info->dma_fence_resv_count)++;
}
int kbase_dma_fence_wait(struct kbase_jd_atom *katom,
struct kbase_dma_fence_resv_info *info)
{
int err, i;
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
struct fence *fence;
#else
struct dma_fence *fence;
#endif
struct ww_acquire_ctx ww_ctx;
lockdep_assert_held(&katom->kctx->jctx.lock);
fence = kbase_fence_out_new(katom);
if (!fence) {
err = -ENOMEM;
dev_err(katom->kctx->kbdev->dev,
"Error %d creating fence.\n", err);
return err;
}
kbase_fence_dep_count_set(katom, 1);
err = kbase_dma_fence_lock_reservations(info, &ww_ctx);
if (err) {
dev_err(katom->kctx->kbdev->dev,
"Error %d locking reservations.\n", err);
kbase_fence_dep_count_set(katom, -1);
kbase_fence_out_remove(katom);
return err;
}
for (i = 0; i < info->dma_fence_resv_count; i++) {
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
struct reservation_object *obj = info->resv_objs[i];
#else
struct dma_resv *obj = info->resv_objs[i];
#endif
if (!test_bit(i, info->dma_fence_excl_bitmap)) {
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
err = reservation_object_reserve_shared(obj);
#else
err = dma_resv_reserve_shared(obj, 0);
#endif
if (err) {
dev_err(katom->kctx->kbdev->dev,
"Error %d reserving space for shared fence.\n", err);
goto end;
}
err = kbase_dma_fence_add_reservation_callback(katom, obj, false);
if (err) {
dev_err(katom->kctx->kbdev->dev,
"Error %d adding reservation to callback.\n", err);
goto end;
}
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
reservation_object_add_shared_fence(obj, fence);
#else
dma_resv_add_shared_fence(obj, fence);
#endif
} else {
err = kbase_dma_fence_add_reservation_callback(katom, obj, true);
if (err) {
dev_err(katom->kctx->kbdev->dev,
"Error %d adding reservation to callback.\n", err);
goto end;
}
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
reservation_object_add_excl_fence(obj, fence);
#else
dma_resv_add_excl_fence(obj, fence);
#endif
}
}
end:
kbase_dma_fence_unlock_reservations(info, &ww_ctx);
if (likely(!err)) {
/* Test if the callbacks are already triggered */
if (kbase_fence_dep_count_dec_and_test(katom)) {
kbase_fence_dep_count_set(katom, -1);
kbase_fence_free_callbacks(katom);
} else {
/* Add katom to the list of dma-buf fence waiting atoms
* only if it is still waiting.
*/
kbase_dma_fence_waiters_add(katom);
}
} else {
/* There was an error, cancel callbacks, set dep_count to -1 to
* indicate that the atom has been handled (the caller will
* kill it for us), signal the fence, free callbacks and the
* fence.
*/
kbase_fence_free_callbacks(katom);
kbase_fence_dep_count_set(katom, -1);
kbase_dma_fence_signal(katom);
}
return err;
}
void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx)
{
struct list_head *list = &kctx->dma_fence.waiting_resource;
while (!list_empty(list)) {
struct kbase_jd_atom *katom;
katom = list_first_entry(list, struct kbase_jd_atom, queue);
kbase_dma_fence_waiters_remove(katom);
kbase_dma_fence_cancel_atom(katom);
}
}
void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom)
{
/* Cancel callbacks and clean up. */
if (kbase_fence_free_callbacks(katom))
kbase_dma_fence_queue_work(katom);
}
void kbase_dma_fence_signal(struct kbase_jd_atom *katom)
{
if (!katom->dma_fence.fence)
return;
/* Signal the atom's fence. */
dma_fence_signal(katom->dma_fence.fence);
kbase_fence_out_remove(katom);
kbase_fence_free_callbacks(katom);
}
void kbase_dma_fence_term(struct kbase_context *kctx)
{
destroy_workqueue(kctx->dma_fence.wq);
kctx->dma_fence.wq = NULL;
}
int kbase_dma_fence_init(struct kbase_context *kctx)
{
INIT_LIST_HEAD(&kctx->dma_fence.waiting_resource);
kctx->dma_fence.wq = alloc_workqueue("mali-fence-%d",
WQ_UNBOUND, 1, kctx->pid);
if (!kctx->dma_fence.wq)
return -ENOMEM;
return 0;
}

View File

@@ -1,150 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2010-2016, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU license.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
*/
#ifndef _KBASE_DMA_FENCE_H_
#define _KBASE_DMA_FENCE_H_
#ifdef CONFIG_MALI_DMA_FENCE
#include <linux/list.h>
#include <linux/version.h>
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
#include <linux/reservation.h>
#else
#include <linux/dma-resv.h>
#endif
#include <mali_kbase_fence.h>
/* Forward declaration from mali_kbase_defs.h */
struct kbase_jd_atom;
struct kbase_context;
/**
* struct kbase_dma_fence_resv_info - Structure with list of reservation objects
* @resv_objs: Array of reservation objects to attach the
* new fence to.
* @dma_fence_resv_count: Number of reservation objects in the array.
* @dma_fence_excl_bitmap: Specifies which resv_obj are exclusive.
*
* This is used by some functions to pass around a collection of data about
* reservation objects.
*/
struct kbase_dma_fence_resv_info {
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
struct reservation_object **resv_objs;
#else
struct dma_resv **resv_objs;
#endif
unsigned int dma_fence_resv_count;
unsigned long *dma_fence_excl_bitmap;
};
/**
* kbase_dma_fence_add_reservation() - Adds a resv to the array of resv_objs
* @resv: Reservation object to add to the array.
* @info: Pointer to struct with current reservation info
* @exclusive: Boolean indicating if exclusive access is needed
*
* The function adds a new reservation_object to an existing array of
* reservation_objects. At the same time keeps track of which objects require
* exclusive access in dma_fence_excl_bitmap.
*/
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
void kbase_dma_fence_add_reservation(struct reservation_object *resv,
struct kbase_dma_fence_resv_info *info,
bool exclusive);
#else
void kbase_dma_fence_add_reservation(struct dma_resv *resv,
struct kbase_dma_fence_resv_info *info,
bool exclusive);
#endif
/**
* kbase_dma_fence_wait() - Creates a new fence and attaches it to the resv_objs
* @katom: Katom with the external dependency.
* @info: Pointer to struct with current reservation info
*
* Return: An error code or 0 if succeeds
*/
int kbase_dma_fence_wait(struct kbase_jd_atom *katom,
struct kbase_dma_fence_resv_info *info);
/**
* kbase_dma_fence_cancel_all_atoms() - Cancel all dma-fences blocked atoms on kctx
* @kctx: Pointer to kbase context
*
* This function will cancel and clean up all katoms on @kctx that is waiting
* on dma-buf fences.
*
* Locking: jctx.lock needs to be held when calling this function.
*/
void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx);
/**
* kbase_dma_fence_cancel_callbacks() - Cancel only callbacks on katom
* @katom: Pointer to katom whose callbacks are to be canceled
*
* This function cancels all dma-buf fence callbacks on @katom, but does not
* cancel the katom itself.
*
* The caller is responsible for ensuring that kbase_jd_done_nolock is called on
* @katom.
*
* Locking: jctx.lock must be held when calling this function.
*/
void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom);
/**
* kbase_dma_fence_signal() - Signal katom's fence and clean up after wait
* @katom: Pointer to katom to signal and clean up
*
* This function will signal the @katom's fence, if it has one, and clean up
* the callback data from the katom's wait on earlier fences.
*
* Locking: jctx.lock must be held while calling this function.
*/
void kbase_dma_fence_signal(struct kbase_jd_atom *katom);
/**
* kbase_dma_fence_term() - Terminate Mali dma-fence context
* @kctx: kbase context to terminate
*/
void kbase_dma_fence_term(struct kbase_context *kctx);
/**
* kbase_dma_fence_init() - Initialize Mali dma-fence context
* @kctx: kbase context to initialize
*
* Return: 0 on success, error code otherwise.
*/
int kbase_dma_fence_init(struct kbase_context *kctx);
#else /* !CONFIG_MALI_DMA_FENCE */
/* Dummy functions for when dma-buf fence isn't enabled. */
static inline int kbase_dma_fence_init(struct kbase_context *kctx)
{
return 0;
}
static inline void kbase_dma_fence_term(struct kbase_context *kctx) {}
#endif /* CONFIG_MALI_DMA_FENCE */
#endif

View File

@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2010-2018, 2020-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -32,6 +32,46 @@
#include <linux/list.h>
#include "mali_kbase_fence_defs.h"
#include "mali_kbase.h"
#include "mali_kbase_refcount_defs.h"
#if MALI_USE_CSF
/* Maximum number of characters in DMA fence timeline name. */
#define MAX_TIMELINE_NAME (32)
/**
* struct kbase_kcpu_dma_fence_meta - Metadata structure for dma fence objects containing
* information about KCPU queue. One instance per KCPU
* queue.
*
* @refcount: Atomic value to keep track of number of references to an instance.
* An instance can outlive the KCPU queue itself.
* @kbdev: Pointer to Kbase device.
* @kctx_id: Kbase context ID.
* @timeline_name: String of timeline name for associated fence object.
*/
struct kbase_kcpu_dma_fence_meta {
kbase_refcount_t refcount;
struct kbase_device *kbdev;
int kctx_id;
char timeline_name[MAX_TIMELINE_NAME];
};
/**
* struct kbase_kcpu_dma_fence - Structure which extends a dma fence object to include a
* reference to metadata containing more informaiton about it.
*
* @base: Fence object itself.
* @metadata: Pointer to metadata structure.
*/
struct kbase_kcpu_dma_fence {
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
struct fence base;
#else
struct dma_fence base;
#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */
struct kbase_kcpu_dma_fence_meta *metadata;
};
#endif
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
extern const struct fence_ops kbase_fence_ops;
@@ -167,12 +207,52 @@ static inline int kbase_fence_out_signal(struct kbase_jd_atom *katom,
*/
#define kbase_fence_get(fence_info) dma_fence_get((fence_info)->fence)
#if MALI_USE_CSF
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
static inline struct kbase_kcpu_dma_fence *kbase_kcpu_dma_fence_get(struct fence *fence)
#else
static inline struct kbase_kcpu_dma_fence *kbase_kcpu_dma_fence_get(struct dma_fence *fence)
#endif
{
if (fence->ops == &kbase_fence_ops)
return (struct kbase_kcpu_dma_fence *)fence;
return NULL;
}
static inline void kbase_kcpu_dma_fence_meta_put(struct kbase_kcpu_dma_fence_meta *metadata)
{
if (kbase_refcount_dec_and_test(&metadata->refcount)) {
atomic_dec(&metadata->kbdev->live_fence_metadata);
kfree(metadata);
}
}
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
static inline void kbase_kcpu_dma_fence_put(struct fence *fence)
#else
static inline void kbase_kcpu_dma_fence_put(struct dma_fence *fence)
#endif
{
struct kbase_kcpu_dma_fence *kcpu_fence = kbase_kcpu_dma_fence_get(fence);
if (kcpu_fence)
kbase_kcpu_dma_fence_meta_put(kcpu_fence->metadata);
}
#endif /* MALI_USE_CSF */
/**
* kbase_fence_put() - Releases a reference to a fence
* @fence: Fence to release reference for.
*/
#define kbase_fence_put(fence) dma_fence_put(fence)
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
static inline void kbase_fence_put(struct fence *fence)
#else
static inline void kbase_fence_put(struct dma_fence *fence)
#endif
{
dma_fence_put(fence);
}
#endif /* IS_ENABLED(CONFIG_SYNC_FILE) */

View File

@@ -21,7 +21,7 @@
#include <linux/atomic.h>
#include <linux/list.h>
#include <mali_kbase_fence_defs.h>
#include <mali_kbase_fence.h>
#include <mali_kbase.h>
static const char *
@@ -41,7 +41,13 @@ kbase_fence_get_timeline_name(struct fence *fence)
kbase_fence_get_timeline_name(struct dma_fence *fence)
#endif
{
#if MALI_USE_CSF
struct kbase_kcpu_dma_fence *kcpu_fence = (struct kbase_kcpu_dma_fence *)fence;
return kcpu_fence->metadata->timeline_name;
#else
return kbase_timeline_name;
#endif /* MALI_USE_CSF */
}
static bool
@@ -62,24 +68,44 @@ kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size)
#endif
{
#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE)
snprintf(str, size, "%u", fence->seqno);
const char *format = "%u";
#else
snprintf(str, size, "%llu", fence->seqno);
const char *format = "%llu";
#endif
if (unlikely(!scnprintf(str, size, format, fence->seqno)))
pr_err("Fail to encode fence seqno to string");
}
#if MALI_USE_CSF
static void
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
kbase_fence_release(struct fence *fence)
#else
kbase_fence_release(struct dma_fence *fence)
#endif
{
struct kbase_kcpu_dma_fence *kcpu_fence = (struct kbase_kcpu_dma_fence *)fence;
kbase_kcpu_dma_fence_meta_put(kcpu_fence->metadata);
kfree(kcpu_fence);
}
#endif
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
extern const struct fence_ops kbase_fence_ops; /* silence checker warning */
const struct fence_ops kbase_fence_ops = {
.wait = fence_default_wait,
const struct fence_ops kbase_fence_ops = { .wait = fence_default_wait,
#else
extern const struct dma_fence_ops kbase_fence_ops; /* silence checker warning */
const struct dma_fence_ops kbase_fence_ops = {
.wait = dma_fence_default_wait,
const struct dma_fence_ops kbase_fence_ops = { .wait = dma_fence_default_wait,
#endif
.get_driver_name = kbase_fence_get_driver_name,
.get_timeline_name = kbase_fence_get_timeline_name,
.enable_signaling = kbase_fence_enable_signaling,
#if MALI_USE_CSF
.fence_value_str = kbase_fence_fence_value_str,
.release = kbase_fence_release
#else
.fence_value_str = kbase_fence_fence_value_str
#endif
.get_driver_name = kbase_fence_get_driver_name,
.get_timeline_name = kbase_fence_get_timeline_name,
.enable_signaling = kbase_fence_enable_signaling,
.fence_value_str = kbase_fence_fence_value_str
};
KBASE_EXPORT_TEST_API(kbase_fence_ops);

View File

@@ -311,7 +311,6 @@ static void kbase_gpuprops_calculate_props(
struct base_gpu_props * const gpu_props, struct kbase_device *kbdev)
{
int i;
u32 gpu_id;
/* Populate the base_gpu_props structure */
kbase_gpuprops_update_core_props_gpu_id(gpu_props);
@@ -361,49 +360,23 @@ static void kbase_gpuprops_calculate_props(
gpu_props->thread_props.tls_alloc =
gpu_props->raw_props.thread_tls_alloc;
/* MIDHARC-2364 was intended for tULx.
* Workaround for the incorrectly applied THREAD_FEATURES to tDUx.
*/
gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
#if MALI_USE_CSF
CSTD_UNUSED(gpu_id);
gpu_props->thread_props.max_registers =
KBASE_UBFX32(gpu_props->raw_props.thread_features,
0U, 22);
KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 22);
gpu_props->thread_props.impl_tech =
KBASE_UBFX32(gpu_props->raw_props.thread_features,
22U, 2);
KBASE_UBFX32(gpu_props->raw_props.thread_features, 22U, 2);
gpu_props->thread_props.max_task_queue =
KBASE_UBFX32(gpu_props->raw_props.thread_features,
24U, 8);
KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 8);
gpu_props->thread_props.max_thread_group_split = 0;
#else
if ((gpu_id & GPU_ID2_PRODUCT_MODEL) == GPU_ID2_PRODUCT_TDUX) {
gpu_props->thread_props.max_registers =
KBASE_UBFX32(gpu_props->raw_props.thread_features,
0U, 22);
gpu_props->thread_props.impl_tech =
KBASE_UBFX32(gpu_props->raw_props.thread_features,
22U, 2);
gpu_props->thread_props.max_task_queue =
KBASE_UBFX32(gpu_props->raw_props.thread_features,
24U, 8);
gpu_props->thread_props.max_thread_group_split = 0;
} else {
gpu_props->thread_props.max_registers =
KBASE_UBFX32(gpu_props->raw_props.thread_features,
0U, 16);
gpu_props->thread_props.max_task_queue =
KBASE_UBFX32(gpu_props->raw_props.thread_features,
16U, 8);
gpu_props->thread_props.max_thread_group_split =
KBASE_UBFX32(gpu_props->raw_props.thread_features,
24U, 6);
gpu_props->thread_props.impl_tech =
KBASE_UBFX32(gpu_props->raw_props.thread_features,
30U, 2);
}
gpu_props->thread_props.max_registers =
KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16);
gpu_props->thread_props.max_task_queue =
KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8);
gpu_props->thread_props.max_thread_group_split =
KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6);
gpu_props->thread_props.impl_tech =
KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2);
#endif
/* If values are not specified, then use defaults */
@@ -539,7 +512,7 @@ MODULE_PARM_DESC(override_l2_hash, "Override L2 hash config for testing");
static u32 l2_hash_values[ASN_HASH_COUNT] = {
0,
};
static int num_override_l2_hash_values;
static unsigned int num_override_l2_hash_values;
module_param_array(l2_hash_values, uint, &num_override_l2_hash_values, 0000);
MODULE_PARM_DESC(l2_hash_values, "Override L2 hash values config for testing");
@@ -593,7 +566,7 @@ kbase_read_l2_config_from_dt(struct kbase_device *const kbdev)
kbdev->l2_hash_values_override = false;
if (num_override_l2_hash_values) {
int i;
unsigned int i;
kbdev->l2_hash_values_override = true;
for (i = 0; i < num_override_l2_hash_values; i++)

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -125,14 +125,17 @@ int kbase_gpu_gwt_stop(struct kbase_context *kctx)
return 0;
}
#if (KERNEL_VERSION(5, 13, 0) <= LINUX_VERSION_CODE)
static int list_cmp_function(void *priv, const struct list_head *a, const struct list_head *b)
#else
static int list_cmp_function(void *priv, struct list_head *a,
struct list_head *b)
#endif
{
struct kbasep_gwt_list_element *elementA = container_of(a,
struct kbasep_gwt_list_element, link);
struct kbasep_gwt_list_element *elementB = container_of(b,
struct kbasep_gwt_list_element, link);
const struct kbasep_gwt_list_element *elementA =
container_of(a, struct kbasep_gwt_list_element, link);
const struct kbasep_gwt_list_element *elementB =
container_of(b, struct kbasep_gwt_list_element, link);
CSTD_UNUSED(priv);

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -68,9 +68,6 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev)
case GPU_ID2_PRODUCT_TBAX:
features = base_hw_features_tBAx;
break;
case GPU_ID2_PRODUCT_TDUX:
features = base_hw_features_tDUx;
break;
case GPU_ID2_PRODUCT_TODX:
case GPU_ID2_PRODUCT_LODX:
features = base_hw_features_tODx;
@@ -211,10 +208,6 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
{ GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tBAx_r0p0 },
{ U32_MAX, NULL } } },
{ GPU_ID2_PRODUCT_TDUX,
{ { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDUx_r0p0 },
{ U32_MAX, NULL } } },
{ GPU_ID2_PRODUCT_TODX,
{ { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0 },
{ GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tODx_r0p0 },
@@ -235,9 +228,11 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
{ GPU_ID2_PRODUCT_TTUX,
{ { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTUx_r0p0 },
{ GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tTUx_r0p1 },
{ GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 },
{ GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tTUx_r1p1 },
{ GPU_ID2_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p2 },
{ GPU_ID2_VERSION_MAKE(1, 3, 0), base_hw_issues_tTUx_r1p3 },
{ U32_MAX, NULL } } },
{ GPU_ID2_PRODUCT_LTUX,
@@ -245,6 +240,7 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
{ GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 },
{ GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tTUx_r1p1 },
{ GPU_ID2_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p2 },
{ GPU_ID2_VERSION_MAKE(1, 3, 0), base_hw_issues_tTUx_r1p3 },
{ U32_MAX, NULL } } },
{ GPU_ID2_PRODUCT_TTIX,
@@ -309,21 +305,20 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
*/
issues = fallback_issues;
dev_warn(kbdev->dev,
"GPU hardware issue table may need updating:\n"
"r%dp%d status %d is unknown; treating as r%dp%d status %d",
(gpu_id & GPU_ID2_VERSION_MAJOR) >>
GPU_ID2_VERSION_MAJOR_SHIFT,
(gpu_id & GPU_ID2_VERSION_MINOR) >>
GPU_ID2_VERSION_MINOR_SHIFT,
(gpu_id & GPU_ID2_VERSION_STATUS) >>
GPU_ID2_VERSION_STATUS_SHIFT,
(fallback_version & GPU_ID2_VERSION_MAJOR) >>
GPU_ID2_VERSION_MAJOR_SHIFT,
(fallback_version & GPU_ID2_VERSION_MINOR) >>
GPU_ID2_VERSION_MINOR_SHIFT,
(fallback_version & GPU_ID2_VERSION_STATUS) >>
GPU_ID2_VERSION_STATUS_SHIFT);
dev_notice(kbdev->dev, "r%dp%d status %d not found in HW issues table;\n",
(gpu_id & GPU_ID2_VERSION_MAJOR) >> GPU_ID2_VERSION_MAJOR_SHIFT,
(gpu_id & GPU_ID2_VERSION_MINOR) >> GPU_ID2_VERSION_MINOR_SHIFT,
(gpu_id & GPU_ID2_VERSION_STATUS) >>
GPU_ID2_VERSION_STATUS_SHIFT);
dev_notice(kbdev->dev, "falling back to closest match: r%dp%d status %d\n",
(fallback_version & GPU_ID2_VERSION_MAJOR) >>
GPU_ID2_VERSION_MAJOR_SHIFT,
(fallback_version & GPU_ID2_VERSION_MINOR) >>
GPU_ID2_VERSION_MINOR_SHIFT,
(fallback_version & GPU_ID2_VERSION_STATUS) >>
GPU_ID2_VERSION_STATUS_SHIFT);
dev_notice(kbdev->dev,
"Execution proceeding normally with fallback match\n");
gpu_id &= ~GPU_ID2_VERSION;
gpu_id |= fallback_version;
@@ -349,7 +344,7 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
issues = kbase_hw_get_issues_for_new_id(kbdev);
if (issues == NULL) {
dev_err(kbdev->dev,
"Unknown GPU ID %x", gpu_id);
"HW product - Unknown GPU ID %x", gpu_id);
return -EINVAL;
}
@@ -393,9 +388,6 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
case GPU_ID2_PRODUCT_TBAX:
issues = base_hw_issues_model_tBAx;
break;
case GPU_ID2_PRODUCT_TDUX:
issues = base_hw_issues_model_tDUx;
break;
case GPU_ID2_PRODUCT_TODX:
case GPU_ID2_PRODUCT_LODX:
issues = base_hw_issues_model_tODx;
@@ -414,10 +406,9 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
case GPU_ID2_PRODUCT_LTIX:
issues = base_hw_issues_model_tTIx;
break;
default:
dev_err(kbdev->dev,
"Unknown GPU ID %x", gpu_id);
"HW issues - Unknown GPU ID %x", gpu_id);
return -EINVAL;
}
}

View File

@@ -97,8 +97,8 @@ bool kbase_backend_use_ctx(struct kbase_device *kbdev,
* Return: true if context is now active, false otherwise (ie if context does
* not have an address space assigned)
*/
bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
struct kbase_context *kctx, int js);
bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, struct kbase_context *kctx,
unsigned int js);
/**
* kbase_backend_release_ctx_irq - Release a context from the GPU. This will
@@ -183,8 +183,7 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp);
*
* Return: Atom currently at the head of slot @js, or NULL
*/
struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
int js);
struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, unsigned int js);
/**
* kbase_backend_nr_atoms_on_slot() - Return the number of atoms currently on a
@@ -194,7 +193,7 @@ struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
*
* Return: Number of atoms currently on slot
*/
int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js);
int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, unsigned int js);
/**
* kbase_backend_nr_atoms_submitted() - Return the number of atoms on a slot
@@ -204,7 +203,7 @@ int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js);
*
* Return: Number of atoms currently on slot @js that are currently on the GPU.
*/
int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js);
int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, unsigned int js);
/**
* kbase_backend_ctx_count_changed() - Number of contexts ready to submit jobs
@@ -233,7 +232,7 @@ void kbase_backend_timeouts_changed(struct kbase_device *kbdev);
*
* Return: Number of jobs that can be submitted.
*/
int kbase_backend_slot_free(struct kbase_device *kbdev, int js);
int kbase_backend_slot_free(struct kbase_device *kbdev, unsigned int js);
/**
* kbase_job_check_leave_disjoint - potentially leave disjoint state
@@ -287,8 +286,8 @@ u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev);
* Context:
* The job slot lock must be held when calling this function.
*/
void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
struct kbase_jd_atom *target_katom);
void kbase_job_slot_hardstop(struct kbase_context *kctx, unsigned int js,
struct kbase_jd_atom *target_katom);
/**
* kbase_gpu_atoms_submitted_any() - Inspect whether there are any atoms

Some files were not shown because too many files have changed in this diff Show More