ODROID-N2: slipstream Mali Dvalin Kernel Driver

This commit is contained in:
Mauro (mdrjr) Ribeiro
2018-08-20 16:16:59 -03:00
parent 10bdbdd937
commit 2017ef2b5a
258 changed files with 82730 additions and 1 deletions

View File

@@ -206,4 +206,5 @@ source "drivers/fpga/Kconfig"
source "drivers/tee/Kconfig"
source "drivers/gpu/arm/Kconfig"
endmenu

View File

@@ -2,5 +2,6 @@
# taken to initialize them in the correct order. Link order is the only way
# to ensure this currently.
obj-$(CONFIG_TEGRA_HOST1X) += host1x/
obj-y += drm/ vga/
obj-y += drm/ vga/
obj-$(CONFIG_IMX_IPUV3_CORE) += ipu-v3/
obj-y += arm/

23
drivers/gpu/arm/Kbuild Executable file
View File

@@ -0,0 +1,23 @@
#
# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
# Foundation, and any use by you of this program is subject to the terms
# of such GNU licence.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, you can access it online at
# http://www.gnu.org/licenses/gpl-2.0.html.
#
# SPDX-License-Identifier: GPL-2.0
#
#
obj-$(CONFIG_MALI_MIDGARD) += midgard/

25
drivers/gpu/arm/Kconfig Normal file
View File

@@ -0,0 +1,25 @@
#
# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
# Foundation, and any use by you of this program is subject to the terms
# of such GNU licence.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, you can access it online at
# http://www.gnu.org/licenses/gpl-2.0.html.
#
# SPDX-License-Identifier: GPL-2.0
#
#
menu "ARM GPU Configuration"
source "drivers/gpu/arm/midgard/Kconfig"
endmenu

176
drivers/gpu/arm/midgard/Kbuild Executable file
View File

@@ -0,0 +1,176 @@
#
# (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
# Foundation, and any use by you of this program is subject to the terms
# of such GNU licence.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, you can access it online at
# http://www.gnu.org/licenses/gpl-2.0.html.
#
# SPDX-License-Identifier: GPL-2.0
#
#
# Driver version string which is returned to userspace via an ioctl
MALI_RELEASE_NAME ?= "r12p0-01rel0"
# Paths required for build
KBASE_PATH = $(src)
KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy
UMP_PATH = $(src)/../../../base
# Set up defaults if not defined by build system
MALI_CUSTOMER_RELEASE ?= 1
MALI_UNIT_TEST ?= 0
MALI_KERNEL_TEST_API ?= 0
MALI_MOCK_TEST ?= 0
MALI_COVERAGE ?= 0
CONFIG_MALI_PLATFORM_NAME ?= "devicetree"
# This workaround is for what seems to be a compiler bug we observed in
# GCC 4.7 on AOSP 4.3. The bug caused an intermittent failure compiling
# the "_Pragma" syntax, where an error message is returned:
#
# "internal compiler error: unspellable token PRAGMA"
#
# This regression has thus far only been seen on the GCC 4.7 compiler bundled
# with AOSP 4.3.0. So this makefile, intended for in-tree kernel builds
# which are not known to be used with AOSP, is hardcoded to disable the
# workaround, i.e. set the define to 0.
MALI_GCC_WORKAROUND_MIDCOM_4598 ?= 0
# Set up our defines, which will be passed to gcc
DEFINES = \
-DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \
-DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \
-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
-DMALI_MOCK_TEST=$(MALI_MOCK_TEST) \
-DMALI_COVERAGE=$(MALI_COVERAGE) \
-DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \
-DMALI_GCC_WORKAROUND_MIDCOM_4598=$(MALI_GCC_WORKAROUND_MIDCOM_4598)
ifeq ($(KBUILD_EXTMOD),)
# in-tree
DEFINES +=-DMALI_KBASE_PLATFORM_PATH=../../$(src)/platform/$(CONFIG_MALI_PLATFORM_NAME)
else
# out-of-tree
DEFINES +=-DMALI_KBASE_PLATFORM_PATH=$(src)/platform/$(CONFIG_MALI_PLATFORM_NAME)
endif
DEFINES += -I$(srctree)/drivers/staging/android
#ldflags-y += --strip-debug
# Use our defines when compiling
ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(OSK_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
SRC := \
mali_kbase_device.c \
mali_kbase_cache_policy.c \
mali_kbase_mem.c \
mali_kbase_mmu.c \
mali_kbase_ctx_sched.c \
mali_kbase_jd.c \
mali_kbase_jd_debugfs.c \
mali_kbase_jm.c \
mali_kbase_gpuprops.c \
mali_kbase_js.c \
mali_kbase_js_ctx_attr.c \
mali_kbase_event.c \
mali_kbase_context.c \
mali_kbase_pm.c \
mali_kbase_config.c \
mali_kbase_vinstr.c \
mali_kbase_softjobs.c \
mali_kbase_10969_workaround.c \
mali_kbase_hw.c \
mali_kbase_utility.c \
mali_kbase_debug.c \
mali_kbase_trace_timeline.c \
mali_kbase_gpu_memory_debugfs.c \
mali_kbase_mem_linux.c \
mali_kbase_core_linux.c \
mali_kbase_replay.c \
mali_kbase_mem_profile_debugfs.c \
mali_kbase_mmu_mode_lpae.c \
mali_kbase_mmu_mode_aarch64.c \
mali_kbase_disjoint_events.c \
mali_kbase_gator_api.c \
mali_kbase_debug_mem_view.c \
mali_kbase_debug_job_fault.c \
mali_kbase_smc.c \
mali_kbase_mem_pool.c \
mali_kbase_mem_pool_debugfs.c \
mali_kbase_tlstream.c \
mali_kbase_strings.c \
mali_kbase_as_fault_debugfs.c \
mali_kbase_regs_history_debugfs.c \
thirdparty/mali_kbase_mmap.c
ifeq ($(CONFIG_MALI_JOB_DUMP),y)
SRC += mali_kbase_gwt.c
endif
ifeq ($(MALI_UNIT_TEST),1)
SRC += mali_kbase_tlstream_test.c
endif
ifeq ($(MALI_CUSTOMER_RELEASE),0)
SRC += mali_kbase_regs_dump_debugfs.c
endif
ccflags-y += -I$(KBASE_PATH)
# Tell the Linux build system from which .o file to create the kernel module
obj-$(CONFIG_MALI_MIDGARD) += mali_kbase.o
# Tell the Linux build system to enable building of our .c files
mali_kbase-y := $(SRC:.c=.o)
# Kconfig passes in the name with quotes for in-tree builds - remove them.
platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_NAME))
MALI_PLATFORM_DIR := platform/$(platform_name)
ccflags-y += -I$(src)/$(MALI_PLATFORM_DIR)
include $(src)/$(MALI_PLATFORM_DIR)/Kbuild
ifeq ($(CONFIG_MALI_DEVFREQ),y)
ifeq ($(CONFIG_DEVFREQ_THERMAL),y)
include $(src)/ipa/Kbuild
endif
endif
mali_kbase-$(CONFIG_MALI_DMA_FENCE) += \
mali_kbase_dma_fence.o \
mali_kbase_fence.o
mali_kbase-$(CONFIG_SYNC) += \
mali_kbase_sync_android.o \
mali_kbase_sync_common.o
mali_kbase-$(CONFIG_SYNC_FILE) += \
mali_kbase_sync_file.o \
mali_kbase_sync_common.o \
mali_kbase_fence.o
ifeq ($(MALI_MOCK_TEST),1)
# Test functionality
mali_kbase-y += tests/internal/src/mock/mali_kbase_pm_driver_mock.o
endif
include $(src)/backend/gpu/Kbuild
mali_kbase-y += $(BACKEND:.c=.o)
ccflags-y += -I$(src)/backend/gpu
subdir-ccflags-y += -I$(src)/backend/gpu
# For kutf and mali_kutf_irq_latency_test
obj-$(CONFIG_MALI_KUTF) += tests/

View File

@@ -0,0 +1,217 @@
#
# (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
# Foundation, and any use by you of this program is subject to the terms
# of such GNU licence.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, you can access it online at
# http://www.gnu.org/licenses/gpl-2.0.html.
#
# SPDX-License-Identifier: GPL-2.0
#
#
menuconfig MALI_MIDGARD
tristate "Mali Midgard series support"
select GPU_TRACEPOINTS if ANDROID
default n
help
Enable this option to build support for a ARM Mali Midgard GPU.
To compile this driver as a module, choose M here:
this will generate a single module, called mali_kbase.
config MALI_GATOR_SUPPORT
bool "Streamline support via Gator"
depends on MALI_MIDGARD
default n
help
Adds diagnostic support for use with the ARM Streamline Performance Analyzer.
You will need the Gator device driver already loaded before loading this driver when enabling
Streamline debug support.
This is a legacy interface required by older versions of Streamline.
config MALI_MIDGARD_DVFS
bool "Enable legacy DVFS"
depends on MALI_MIDGARD && !MALI_DEVFREQ
default n
help
Choose this option to enable legacy DVFS in the Mali Midgard DDK.
config MALI_MIDGARD_ENABLE_TRACE
bool "Enable kbase tracing"
depends on MALI_MIDGARD
default n
help
Enables tracing in kbase. Trace log available through
the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
config MALI_DEVFREQ
bool "devfreq support for Mali"
depends on MALI_MIDGARD && PM_DEVFREQ
help
Support devfreq for Mali.
Using the devfreq framework and, by default, the simpleondemand
governor, the frequency of Mali will be dynamically selected from the
available OPPs.
config MALI_DMA_FENCE
bool "DMA_BUF fence support for Mali"
depends on MALI_MIDGARD
default n
help
Support DMA_BUF fences for Mali.
This option should only be enabled if the Linux Kernel has built in
support for DMA_BUF fences.
config MALI_PLATFORM_NAME
depends on MALI_MIDGARD
string "Platform name"
default "devicetree"
help
Enter the name of the desired platform configuration directory to
include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must
exist.
# MALI_EXPERT configuration options
menuconfig MALI_EXPERT
depends on MALI_MIDGARD
bool "Enable Expert Settings"
default n
help
Enabling this option and modifying the default settings may produce a driver with performance or
other limitations.
config MALI_CORESTACK
bool "Support controlling power to the GPU core stack"
depends on MALI_MIDGARD && MALI_EXPERT
default n
help
Enabling this feature on supported GPUs will let the driver powering
on/off the GPU core stack independently without involving the Power
Domain Controller. This should only be enabled on platforms which
integration of the PDC to the Mali GPU is known to be problematic.
This feature is currently only supported on t-Six and t-HEx GPUs.
If unsure, say N.
config MALI_PRFCNT_SET_SECONDARY
bool "Use secondary set of performance counters"
depends on MALI_MIDGARD && MALI_EXPERT
default n
help
Select this option to use secondary set of performance counters. Kernel
features that depend on an access to the primary set of counters may
become unavailable. Enabling this option will prevent power management
from working optimally and may cause instrumentation tools to return
bogus results.
If unsure, say N.
config MALI_DEBUG
bool "Debug build"
depends on MALI_MIDGARD && MALI_EXPERT
default n
help
Select this option for increased checking and reporting of errors.
config MALI_FENCE_DEBUG
bool "Debug sync fence usage"
depends on MALI_MIDGARD && MALI_EXPERT && (SYNC || SYNC_FILE)
default y if MALI_DEBUG
help
Select this option to enable additional checking and reporting on the
use of sync fences in the Mali driver.
This will add a 3s timeout to all sync fence waits in the Mali
driver, so that when work for Mali has been waiting on a sync fence
for a long time a debug message will be printed, detailing what fence
is causing the block, and which dependent Mali atoms are blocked as a
result of this.
The timeout can be changed at runtime through the js_soft_timeout
device attribute, where the timeout is specified in milliseconds.
config MALI_NO_MALI
bool "No Mali"
depends on MALI_MIDGARD && MALI_EXPERT
default n
help
This can be used to test the driver in a simulated environment
whereby the hardware is not physically present. If the hardware is physically
present it will not be used. This can be used to test the majority of the
driver without needing actual hardware or for software benchmarking.
All calls to the simulated hardware will complete immediately as if the hardware
completed the task.
config MALI_ERROR_INJECT
bool "Error injection"
depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI
default n
help
Enables insertion of errors to test module failure and recovery mechanisms.
config MALI_TRACE_TIMELINE
bool "Timeline tracing"
depends on MALI_MIDGARD && MALI_EXPERT
default n
help
Enables timeline tracing through the kernel tracepoint system.
config MALI_SYSTEM_TRACE
bool "Enable system event tracing support"
depends on MALI_MIDGARD && MALI_EXPERT
default n
help
Choose this option to enable system trace events for each
kbase event. This is typically used for debugging but has
minimal overhead when not in use. Enable only if you know what
you are doing.
config MALI_JOB_DUMP
bool "Enable system level support needed for job dumping"
depends on MALI_MIDGARD && MALI_EXPERT
default n
help
Choose this option to enable system level support needed for
job dumping. This is typically used for instrumentation but has
minimal overhead when not in use. Enable only if you know what
you are doing.
config MALI_2MB_ALLOC
bool "Attempt to allocate 2MB pages"
depends on MALI_MIDGARD && MALI_EXPERT
default n
help
Rather than allocating all GPU memory page-by-page, attempt to
allocate 2MB pages from the kernel. This reduces TLB pressure and
helps to prevent memory fragmentation.
If in doubt, say N
config MALI_PWRSOFT_765
bool "PWRSOFT-765 ticket"
depends on MALI_MIDGARD && MALI_EXPERT
default n
help
PWRSOFT-765 fixes devfreq cooling devices issues. The fix was merged
in kernel v4.10, however if backported into the kernel then this
option must be manually selected.
If using kernel >= v4.10 then say N, otherwise if devfreq cooling
changes have been backported say Y to avoid compilation errors.
source "drivers/gpu/arm/midgard/platform/Kconfig"
source "drivers/gpu/arm/midgard/tests/Kconfig"

View File

@@ -0,0 +1,42 @@
#
# (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
# Foundation, and any use by you of this program is subject to the terms
# of such GNU licence.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, you can access it online at
# http://www.gnu.org/licenses/gpl-2.0.html.
#
# SPDX-License-Identifier: GPL-2.0
#
#
KDIR ?= /lib/modules/$(shell uname -r)/build
BUSLOG_PATH_RELATIVE = $(CURDIR)/../../../..
KBASE_PATH_RELATIVE = $(CURDIR)
ifeq ($(MALI_UNIT_TEST), 1)
EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/tests/internal/src/kernel_assert_module/linux/Module.symvers
endif
ifeq ($(CONFIG_MALI_FPGA_BUS_LOGGER),y)
#Add bus logger symbols
EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers
endif
# we get the symbols from modules using KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions
all:
$(MAKE) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include -I$(CURDIR)/../../../../tests/include $(SCONS_CFLAGS)" $(SCONS_CONFIGS) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
clean:
$(MAKE) -C $(KDIR) M=$(CURDIR) clean

View File

@@ -0,0 +1,23 @@
#
# (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
# Foundation, and any use by you of this program is subject to the terms
# of such GNU licence.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, you can access it online at
# http://www.gnu.org/licenses/gpl-2.0.html.
#
# SPDX-License-Identifier: GPL-2.0
#
#
EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(OSK_PATH)/src/linux/include -I$(KBASE_PATH)/platform_$(PLATFORM)

209
drivers/gpu/arm/midgard/Mconfig Executable file
View File

@@ -0,0 +1,209 @@
#
# (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
# Foundation, and any use by you of this program is subject to the terms
# of such GNU licence.
#
# A copy of the licence is included with the program, and can also be obtained
# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
# Boston, MA 02110-1301, USA.
#
#
menuconfig MALI_MIDGARD
bool "Mali Midgard series support"
default y
help
Enable this option to build support for a ARM Mali Midgard GPU.
To compile this driver as a module, choose M here:
this will generate a single module, called mali_kbase.
config MALI_GATOR_SUPPORT
bool "Streamline support via Gator"
depends on MALI_MIDGARD
default y if INSTRUMENTATION_STREAMLINE_OLD
default n
help
Adds diagnostic support for use with the ARM Streamline Performance Analyzer.
You will need the Gator device driver already loaded before loading this driver when enabling
Streamline debug support.
This is a legacy interface required by older versions of Streamline.
config MALI_MIDGARD_DVFS
bool "Enable legacy DVFS"
depends on MALI_MIDGARD && !MALI_DEVFREQ
default n
help
Choose this option to enable legacy DVFS in the Mali Midgard DDK.
config MALI_MIDGARD_ENABLE_TRACE
bool "Enable kbase tracing"
depends on MALI_MIDGARD
default n
help
Enables tracing in kbase. Trace log available through
the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
config MALI_DEVFREQ
bool "devfreq support for Mali"
depends on MALI_MIDGARD
default y if PLATFORM_JUNO
default y if PLATFORM_CUSTOM
help
Support devfreq for Mali.
Using the devfreq framework and, by default, the simpleondemand
governor, the frequency of Mali will be dynamically selected from the
available OPPs.
config MALI_DMA_FENCE
bool "DMA_BUF fence support for Mali"
depends on MALI_MIDGARD
default n
help
Support DMA_BUF fences for Mali.
This option should only be enabled if the Linux Kernel has built in
support for DMA_BUF fences.
config MALI_PLATFORM_NAME
depends on MALI_MIDGARD
string "Platform name"
default "arndale" if PLATFORM_ARNDALE
default "arndale_octa" if PLATFORM_ARNDALE_OCTA
default "rk" if PLATFORM_FIREFLY
default "hisilicon" if PLATFORM_HIKEY960
default "vexpress" if PLATFORM_VEXPRESS
default "devicetree"
help
Enter the name of the desired platform configuration directory to
include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must
exist.
config MALI_MOCK_TEST
bool
depends on MALI_MIDGARD && !RELEASE
default y
# MALI_EXPERT configuration options
menuconfig MALI_EXPERT
depends on MALI_MIDGARD
bool "Enable Expert Settings"
default y
help
Enabling this option and modifying the default settings may produce a driver with performance or
other limitations.
config MALI_CORESTACK
bool "Support controlling power to the GPU core stack"
depends on MALI_MIDGARD && MALI_EXPERT
default n
help
Enabling this feature on supported GPUs will let the driver powering
on/off the GPU core stack independently without involving the Power
Domain Controller. This should only be enabled on platforms which
integration of the PDC to the Mali GPU is known to be problematic.
This feature is currently only supported on t-Six and t-HEx GPUs.
If unsure, say N.
config MALI_PRFCNT_SET_SECONDARY
bool "Use secondary set of performance counters"
depends on MALI_MIDGARD && MALI_EXPERT
default n
help
Select this option to use secondary set of performance counters. Kernel
features that depend on an access to the primary set of counters may
become unavailable. Enabling this option will prevent power management
from working optimally and may cause instrumentation tools to return
bogus results.
If unsure, say N.
config MALI_DEBUG
bool "Debug build"
depends on MALI_MIDGARD && MALI_EXPERT
default y if DEBUG
default n
help
Select this option for increased checking and reporting of errors.
config MALI_FENCE_DEBUG
bool "Debug sync fence usage"
depends on MALI_MIDGARD && MALI_EXPERT
default y if MALI_DEBUG
help
Select this option to enable additional checking and reporting on the
use of sync fences in the Mali driver.
This will add a 3s timeout to all sync fence waits in the Mali
driver, so that when work for Mali has been waiting on a sync fence
for a long time a debug message will be printed, detailing what fence
is causing the block, and which dependent Mali atoms are blocked as a
result of this.
The timeout can be changed at runtime through the js_soft_timeout
device attribute, where the timeout is specified in milliseconds.
config MALI_ERROR_INJECT
bool "Error injection"
depends on MALI_MIDGARD && MALI_EXPERT && NO_MALI
default n
help
Enables insertion of errors to test module failure and recovery mechanisms.
config MALI_ERROR_INJECT_RANDOM
bool "Random error injection"
depends on MALI_MIDGARD && MALI_EXPERT && NO_MALI && MALI_ERROR_INJECT
default n
help
Injected errors are random, rather than user-driven.
config MALI_TRACE_TIMELINE
bool "Timeline tracing"
depends on MALI_MIDGARD && MALI_EXPERT
default n
help
Enables timeline tracing through the kernel tracepoint system.
config MALI_SYSTEM_TRACE
bool "Enable system event tracing support"
depends on MALI_MIDGARD && MALI_EXPERT
default n
help
Choose this option to enable system trace events for each
kbase event. This is typically used for debugging but has
minimal overhead when not in use. Enable only if you know what
you are doing.
config MALI_2MB_ALLOC
bool "Attempt to allocate 2MB pages"
depends on MALI_MIDGARD && MALI_EXPERT
default n
help
Rather than allocating all GPU memory page-by-page, attempt to
allocate 2MB pages from the kernel. This reduces TLB pressure and
helps to prevent memory fragmentation.
If in doubt, say N
config MALI_FPGA_BUS_LOGGER
bool "Enable bus log integration"
depends on MALI_MIDGARD && MALI_EXPERT
default n
config MALI_PWRSOFT_765
bool "PWRSOFT-765 ticket"
depends on MALI_MIDGARD && MALI_EXPERT
default n
help
PWRSOFT-765 fixes devfreq cooling devices issues. However, they are
not merged in mainline kernel yet. So this define helps to guard those
parts of the code.
source "kernel/drivers/gpu/arm/midgard/tests/Mconfig"

View File

@@ -0,0 +1,66 @@
#
# (C) COPYRIGHT 2014,2017 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
# Foundation, and any use by you of this program is subject to the terms
# of such GNU licence.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, you can access it online at
# http://www.gnu.org/licenses/gpl-2.0.html.
#
# SPDX-License-Identifier: GPL-2.0
#
#
BACKEND += \
backend/gpu/mali_kbase_cache_policy_backend.c \
backend/gpu/mali_kbase_device_hw.c \
backend/gpu/mali_kbase_gpu.c \
backend/gpu/mali_kbase_gpuprops_backend.c \
backend/gpu/mali_kbase_debug_job_fault_backend.c \
backend/gpu/mali_kbase_irq_linux.c \
backend/gpu/mali_kbase_instr_backend.c \
backend/gpu/mali_kbase_jm_as.c \
backend/gpu/mali_kbase_jm_hw.c \
backend/gpu/mali_kbase_jm_rb.c \
backend/gpu/mali_kbase_js_affinity.c \
backend/gpu/mali_kbase_js_backend.c \
backend/gpu/mali_kbase_mmu_hw_direct.c \
backend/gpu/mali_kbase_pm_backend.c \
backend/gpu/mali_kbase_pm_driver.c \
backend/gpu/mali_kbase_pm_metrics.c \
backend/gpu/mali_kbase_pm_ca.c \
backend/gpu/mali_kbase_pm_ca_fixed.c \
backend/gpu/mali_kbase_pm_always_on.c \
backend/gpu/mali_kbase_pm_coarse_demand.c \
backend/gpu/mali_kbase_pm_demand.c \
backend/gpu/mali_kbase_pm_policy.c \
backend/gpu/mali_kbase_time.c
ifeq ($(MALI_CUSTOMER_RELEASE),0)
BACKEND += \
backend/gpu/mali_kbase_pm_ca_random.c \
backend/gpu/mali_kbase_pm_demand_always_powered.c \
backend/gpu/mali_kbase_pm_fast_start.c
endif
ifeq ($(CONFIG_MALI_DEVFREQ),y)
BACKEND += \
backend/gpu/mali_kbase_devfreq.c \
backend/gpu/mali_kbase_pm_ca_devfreq.c
endif
ifeq ($(CONFIG_MALI_NO_MALI),y)
# Dummy model
BACKEND += backend/gpu/mali_kbase_model_dummy.c
BACKEND += backend/gpu/mali_kbase_model_linux.c
# HW error simulation
BACKEND += backend/gpu/mali_kbase_model_error_generator.c
endif

View File

@@ -0,0 +1,34 @@
/*
*
* (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* Backend specific configuration
*/
#ifndef _KBASE_BACKEND_CONFIG_H_
#define _KBASE_BACKEND_CONFIG_H_
/* Enable GPU reset API */
#define KBASE_GPU_RESET_EN 1
#endif /* _KBASE_BACKEND_CONFIG_H_ */

View File

@@ -0,0 +1,34 @@
/*
*
* (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#include "backend/gpu/mali_kbase_cache_policy_backend.h"
#include <backend/gpu/mali_kbase_device_internal.h>
void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
u32 mode)
{
kbdev->current_gpu_coherency_mode = mode;
if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG))
kbase_reg_write(kbdev, COHERENCY_ENABLE, mode, NULL);
}

View File

@@ -0,0 +1,39 @@
/*
*
* (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#ifndef _KBASE_CACHE_POLICY_BACKEND_H_
#define _KBASE_CACHE_POLICY_BACKEND_H_
#include "mali_kbase.h"
#include "mali_base_kernel.h"
/**
* kbase_cache_set_coherency_mode() - Sets the system coherency mode
* in the GPU.
* @kbdev: Device pointer
* @mode: Coherency mode. COHERENCY_ACE/ACE_LITE
*/
void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
u32 mode);
#endif /* _KBASE_CACHE_POLICY_H_ */

View File

@@ -0,0 +1,162 @@
/*
*
* (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#include <mali_kbase.h>
#include <backend/gpu/mali_kbase_device_internal.h>
#include "mali_kbase_debug_job_fault.h"
#ifdef CONFIG_DEBUG_FS
/*GPU_CONTROL_REG(r)*/
static int gpu_control_reg_snapshot[] = {
GPU_ID,
SHADER_READY_LO,
SHADER_READY_HI,
TILER_READY_LO,
TILER_READY_HI,
L2_READY_LO,
L2_READY_HI
};
/* JOB_CONTROL_REG(r) */
static int job_control_reg_snapshot[] = {
JOB_IRQ_MASK,
JOB_IRQ_STATUS
};
/* JOB_SLOT_REG(n,r) */
static int job_slot_reg_snapshot[] = {
JS_HEAD_LO,
JS_HEAD_HI,
JS_TAIL_LO,
JS_TAIL_HI,
JS_AFFINITY_LO,
JS_AFFINITY_HI,
JS_CONFIG,
JS_STATUS,
JS_HEAD_NEXT_LO,
JS_HEAD_NEXT_HI,
JS_AFFINITY_NEXT_LO,
JS_AFFINITY_NEXT_HI,
JS_CONFIG_NEXT
};
/*MMU_REG(r)*/
static int mmu_reg_snapshot[] = {
MMU_IRQ_MASK,
MMU_IRQ_STATUS
};
/* MMU_AS_REG(n,r) */
static int as_reg_snapshot[] = {
AS_TRANSTAB_LO,
AS_TRANSTAB_HI,
AS_MEMATTR_LO,
AS_MEMATTR_HI,
AS_FAULTSTATUS,
AS_FAULTADDRESS_LO,
AS_FAULTADDRESS_HI,
AS_STATUS
};
bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
int reg_range)
{
int i, j;
int offset = 0;
int slot_number;
int as_number;
if (kctx->reg_dump == NULL)
return false;
slot_number = kctx->kbdev->gpu_props.num_job_slots;
as_number = kctx->kbdev->gpu_props.num_address_spaces;
/* get the GPU control registers*/
for (i = 0; i < sizeof(gpu_control_reg_snapshot)/4; i++) {
kctx->reg_dump[offset] =
GPU_CONTROL_REG(gpu_control_reg_snapshot[i]);
offset += 2;
}
/* get the Job control registers*/
for (i = 0; i < sizeof(job_control_reg_snapshot)/4; i++) {
kctx->reg_dump[offset] =
JOB_CONTROL_REG(job_control_reg_snapshot[i]);
offset += 2;
}
/* get the Job Slot registers*/
for (j = 0; j < slot_number; j++) {
for (i = 0; i < sizeof(job_slot_reg_snapshot)/4; i++) {
kctx->reg_dump[offset] =
JOB_SLOT_REG(j, job_slot_reg_snapshot[i]);
offset += 2;
}
}
/* get the MMU registers*/
for (i = 0; i < sizeof(mmu_reg_snapshot)/4; i++) {
kctx->reg_dump[offset] = MMU_REG(mmu_reg_snapshot[i]);
offset += 2;
}
/* get the Address space registers*/
for (j = 0; j < as_number; j++) {
for (i = 0; i < sizeof(as_reg_snapshot)/4; i++) {
kctx->reg_dump[offset] =
MMU_AS_REG(j, as_reg_snapshot[i]);
offset += 2;
}
}
WARN_ON(offset >= (reg_range*2/4));
/* set the termination flag*/
kctx->reg_dump[offset] = REGISTER_DUMP_TERMINATION_FLAG;
kctx->reg_dump[offset + 1] = REGISTER_DUMP_TERMINATION_FLAG;
dev_dbg(kctx->kbdev->dev, "kbase_job_fault_reg_snapshot_init:%d\n",
offset);
return true;
}
bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx)
{
int offset = 0;
if (kctx->reg_dump == NULL)
return false;
while (kctx->reg_dump[offset] != REGISTER_DUMP_TERMINATION_FLAG) {
kctx->reg_dump[offset+1] =
kbase_reg_read(kctx->kbdev,
kctx->reg_dump[offset], NULL);
offset += 2;
}
return true;
}
#endif

View File

@@ -0,0 +1,439 @@
/*
*
* (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#include <mali_kbase.h>
#include <mali_kbase_tlstream.h>
#include <mali_kbase_config_defaults.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
#include <linux/of.h>
#include <linux/clk.h>
#include <linux/devfreq.h>
#ifdef CONFIG_DEVFREQ_THERMAL
#include <linux/devfreq_cooling.h>
#endif
#include <linux/version.h>
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
#include <linux/pm_opp.h>
#else /* Linux >= 3.13 */
/* In 3.13 the OPP include header file, types, and functions were all
* renamed. Use the old filename for the include, and define the new names to
* the old, when an old kernel is detected.
*/
#include <linux/opp.h>
#define dev_pm_opp opp
#define dev_pm_opp_get_voltage opp_get_voltage
#define dev_pm_opp_get_opp_count opp_get_opp_count
#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil
#define dev_pm_opp_find_freq_floor opp_find_freq_floor
#endif /* Linux >= 3.13 */
/**
* opp_translate - Translate nominal OPP frequency from devicetree into real
* frequency and core mask
* @kbdev: Device pointer
* @freq: Nominal frequency
* @core_mask: Pointer to u64 to store core mask to
*
* Return: Real target frequency
*
* This function will only perform translation if an operating-points-v2-mali
* table is present in devicetree. If one is not present then it will return an
* untranslated frequency and all cores enabled.
*/
static unsigned long opp_translate(struct kbase_device *kbdev,
unsigned long freq, u64 *core_mask)
{
int i;
for (i = 0; i < kbdev->num_opps; i++) {
if (kbdev->opp_table[i].opp_freq == freq) {
*core_mask = kbdev->opp_table[i].core_mask;
return kbdev->opp_table[i].real_freq;
}
}
/* Failed to find OPP - return all cores enabled & nominal frequency */
*core_mask = kbdev->gpu_props.props.raw_props.shader_present;
return freq;
}
static int
kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
{
struct kbase_device *kbdev = dev_get_drvdata(dev);
struct dev_pm_opp *opp;
unsigned long nominal_freq;
unsigned long freq = 0;
unsigned long voltage;
int err;
u64 core_mask;
freq = *target_freq;
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
rcu_read_lock();
#endif
opp = devfreq_recommended_opp(dev, &freq, flags);
voltage = dev_pm_opp_get_voltage(opp);
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
rcu_read_unlock();
#endif
if (IS_ERR_OR_NULL(opp)) {
dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp));
return PTR_ERR(opp);
}
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
dev_pm_opp_put(opp);
#endif
nominal_freq = freq;
/*
* Only update if there is a change of frequency
*/
if (kbdev->current_nominal_freq == nominal_freq) {
*target_freq = nominal_freq;
return 0;
}
freq = opp_translate(kbdev, nominal_freq, &core_mask);
#ifdef CONFIG_REGULATOR
if (kbdev->regulator && kbdev->current_voltage != voltage
&& kbdev->current_freq < freq) {
err = regulator_set_voltage(kbdev->regulator, voltage, voltage);
if (err) {
dev_err(dev, "Failed to increase voltage (%d)\n", err);
return err;
}
}
#endif
err = clk_set_rate(kbdev->clock, freq);
if (err) {
dev_err(dev, "Failed to set clock %lu (target %lu)\n",
freq, *target_freq);
return err;
}
#ifdef CONFIG_REGULATOR
if (kbdev->regulator && kbdev->current_voltage != voltage
&& kbdev->current_freq > freq) {
err = regulator_set_voltage(kbdev->regulator, voltage, voltage);
if (err) {
dev_err(dev, "Failed to decrease voltage (%d)\n", err);
return err;
}
}
#endif
if (kbdev->pm.backend.ca_current_policy->id ==
KBASE_PM_CA_POLICY_ID_DEVFREQ)
kbase_devfreq_set_core_mask(kbdev, core_mask);
*target_freq = nominal_freq;
kbdev->current_voltage = voltage;
kbdev->current_nominal_freq = nominal_freq;
kbdev->current_freq = freq;
kbdev->current_core_mask = core_mask;
KBASE_TLSTREAM_AUX_DEVFREQ_TARGET((u64)nominal_freq);
return err;
}
static int
kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq)
{
struct kbase_device *kbdev = dev_get_drvdata(dev);
*freq = kbdev->current_nominal_freq;
return 0;
}
static int
kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat)
{
struct kbase_device *kbdev = dev_get_drvdata(dev);
struct kbasep_pm_metrics diff;
kbase_pm_get_dvfs_metrics(kbdev, &kbdev->last_devfreq_metrics, &diff);
stat->busy_time = diff.time_busy;
stat->total_time = diff.time_busy + diff.time_idle;
stat->current_frequency = kbdev->current_nominal_freq;
stat->private_data = NULL;
return 0;
}
static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev,
struct devfreq_dev_profile *dp)
{
int count;
int i = 0;
unsigned long freq;
struct dev_pm_opp *opp;
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
rcu_read_lock();
#endif
count = dev_pm_opp_get_opp_count(kbdev->dev);
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
rcu_read_unlock();
#endif
if (count < 0)
return count;
dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]),
GFP_KERNEL);
if (!dp->freq_table)
return -ENOMEM;
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
rcu_read_lock();
#endif
for (i = 0, freq = ULONG_MAX; i < count; i++, freq--) {
opp = dev_pm_opp_find_freq_floor(kbdev->dev, &freq);
if (IS_ERR(opp))
break;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
dev_pm_opp_put(opp);
#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) */
dp->freq_table[i] = freq;
}
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
rcu_read_unlock();
#endif
if (count != i)
dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n",
count, i);
dp->max_state = i;
return 0;
}
static void kbase_devfreq_term_freq_table(struct kbase_device *kbdev)
{
struct devfreq_dev_profile *dp = kbdev->devfreq->profile;
kfree(dp->freq_table);
}
static void kbase_devfreq_exit(struct device *dev)
{
struct kbase_device *kbdev = dev_get_drvdata(dev);
kbase_devfreq_term_freq_table(kbdev);
}
static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev)
{
struct device_node *opp_node = of_parse_phandle(kbdev->dev->of_node,
"operating-points-v2", 0);
struct device_node *node;
int i = 0;
int count;
if (!opp_node)
return 0;
if (!of_device_is_compatible(opp_node, "operating-points-v2-mali"))
return 0;
count = dev_pm_opp_get_opp_count(kbdev->dev);
kbdev->opp_table = kmalloc_array(count,
sizeof(struct kbase_devfreq_opp), GFP_KERNEL);
if (!kbdev->opp_table)
return -ENOMEM;
for_each_available_child_of_node(opp_node, node) {
u64 core_mask;
u64 opp_freq, real_freq;
const void *core_count_p;
if (of_property_read_u64(node, "opp-hz", &opp_freq)) {
dev_warn(kbdev->dev, "OPP is missing required opp-hz property\n");
continue;
}
if (of_property_read_u64(node, "opp-hz-real", &real_freq))
real_freq = opp_freq;
if (of_property_read_u64(node, "opp-core-mask", &core_mask))
core_mask =
kbdev->gpu_props.props.raw_props.shader_present;
core_count_p = of_get_property(node, "opp-core-count", NULL);
if (core_count_p) {
u64 remaining_core_mask =
kbdev->gpu_props.props.raw_props.shader_present;
int core_count = be32_to_cpup(core_count_p);
core_mask = 0;
for (; core_count > 0; core_count--) {
int core = ffs(remaining_core_mask);
if (!core) {
dev_err(kbdev->dev, "OPP has more cores than GPU\n");
return -ENODEV;
}
core_mask |= (1ull << (core-1));
remaining_core_mask &= ~(1ull << (core-1));
}
}
if (!core_mask) {
dev_err(kbdev->dev, "OPP has invalid core mask of 0\n");
return -ENODEV;
}
kbdev->opp_table[i].opp_freq = opp_freq;
kbdev->opp_table[i].real_freq = real_freq;
kbdev->opp_table[i].core_mask = core_mask;
dev_info(kbdev->dev, "OPP %d : opp_freq=%llu real_freq=%llu core_mask=%llx\n",
i, opp_freq, real_freq, core_mask);
i++;
}
kbdev->num_opps = i;
return 0;
}
int kbase_devfreq_init(struct kbase_device *kbdev)
{
struct devfreq_dev_profile *dp;
int err;
if (!kbdev->clock) {
dev_err(kbdev->dev, "Clock not available for devfreq\n");
return -ENODEV;
}
kbdev->current_freq = clk_get_rate(kbdev->clock);
kbdev->current_nominal_freq = kbdev->current_freq;
dp = &kbdev->devfreq_profile;
dp->initial_freq = kbdev->current_freq;
dp->polling_ms = 100;
dp->target = kbase_devfreq_target;
dp->get_dev_status = kbase_devfreq_status;
dp->get_cur_freq = kbase_devfreq_cur_freq;
dp->exit = kbase_devfreq_exit;
if (kbase_devfreq_init_freq_table(kbdev, dp))
return -EFAULT;
if (dp->max_state > 0) {
/* Record the maximum frequency possible */
kbdev->gpu_props.props.core_props.gpu_freq_khz_max =
dp->freq_table[0] / 1000;
};
err = kbase_devfreq_init_core_mask_table(kbdev);
if (err)
return err;
kbdev->devfreq = devfreq_add_device(kbdev->dev, dp,
"simple_ondemand", NULL);
if (IS_ERR(kbdev->devfreq)) {
kfree(dp->freq_table);
return PTR_ERR(kbdev->devfreq);
}
/* devfreq_add_device only copies a few of kbdev->dev's fields, so
* set drvdata explicitly so IPA models can access kbdev. */
dev_set_drvdata(&kbdev->devfreq->dev, kbdev);
err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq);
if (err) {
dev_err(kbdev->dev,
"Failed to register OPP notifier (%d)\n", err);
goto opp_notifier_failed;
}
#ifdef CONFIG_DEVFREQ_THERMAL
err = kbase_ipa_init(kbdev);
if (err) {
dev_err(kbdev->dev, "IPA initialization failed\n");
goto cooling_failed;
}
kbdev->devfreq_cooling = of_devfreq_cooling_register_power(
kbdev->dev->of_node,
kbdev->devfreq,
&kbase_ipa_power_model_ops);
if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) {
err = PTR_ERR(kbdev->devfreq_cooling);
dev_err(kbdev->dev,
"Failed to register cooling device (%d)\n",
err);
goto cooling_failed;
}
#endif
return 0;
#ifdef CONFIG_DEVFREQ_THERMAL
cooling_failed:
devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
#endif /* CONFIG_DEVFREQ_THERMAL */
opp_notifier_failed:
if (devfreq_remove_device(kbdev->devfreq))
dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
else
kbdev->devfreq = NULL;
return err;
}
void kbase_devfreq_term(struct kbase_device *kbdev)
{
int err;
dev_dbg(kbdev->dev, "Term Mali devfreq\n");
#ifdef CONFIG_DEVFREQ_THERMAL
if (kbdev->devfreq_cooling)
devfreq_cooling_unregister(kbdev->devfreq_cooling);
kbase_ipa_term(kbdev);
#endif
devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
err = devfreq_remove_device(kbdev->devfreq);
if (err)
dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
else
kbdev->devfreq = NULL;
kfree(kbdev->opp_table);
}

View File

@@ -0,0 +1,29 @@
/*
*
* (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#ifndef _BASE_DEVFREQ_H_
#define _BASE_DEVFREQ_H_
int kbase_devfreq_init(struct kbase_device *kbdev);
void kbase_devfreq_term(struct kbase_device *kbdev);
#endif /* _BASE_DEVFREQ_H_ */

View File

@@ -0,0 +1,260 @@
/*
*
* (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
*
*/
#include <mali_kbase.h>
#include <backend/gpu/mali_kbase_instr_internal.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
#include <backend/gpu/mali_kbase_device_internal.h>
#if !defined(CONFIG_MALI_NO_MALI)
#ifdef CONFIG_DEBUG_FS
int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size)
{
struct kbase_io_access *old_buf;
struct kbase_io_access *new_buf;
unsigned long flags;
if (!new_size)
goto out_err; /* The new size must not be 0 */
new_buf = vmalloc(new_size * sizeof(*h->buf));
if (!new_buf)
goto out_err;
spin_lock_irqsave(&h->lock, flags);
old_buf = h->buf;
/* Note: we won't bother with copying the old data over. The dumping
* logic wouldn't work properly as it relies on 'count' both as a
* counter and as an index to the buffer which would have changed with
* the new array. This is a corner case that we don't need to support.
*/
h->count = 0;
h->size = new_size;
h->buf = new_buf;
spin_unlock_irqrestore(&h->lock, flags);
vfree(old_buf);
return 0;
out_err:
return -1;
}
int kbase_io_history_init(struct kbase_io_history *h, u16 n)
{
h->enabled = false;
spin_lock_init(&h->lock);
h->count = 0;
h->size = 0;
h->buf = NULL;
if (kbase_io_history_resize(h, n))
return -1;
return 0;
}
void kbase_io_history_term(struct kbase_io_history *h)
{
vfree(h->buf);
h->buf = NULL;
}
/* kbase_io_history_add - add new entry to the register access history
*
* @h: Pointer to the history data structure
* @addr: Register address
* @value: The value that is either read from or written to the register
* @write: 1 if it's a register write, 0 if it's a read
*/
static void kbase_io_history_add(struct kbase_io_history *h,
void __iomem const *addr, u32 value, u8 write)
{
struct kbase_io_access *io;
unsigned long flags;
spin_lock_irqsave(&h->lock, flags);
io = &h->buf[h->count % h->size];
io->addr = (uintptr_t)addr | write;
io->value = value;
++h->count;
/* If count overflows, move the index by the buffer size so the entire
* buffer will still be dumped later */
if (unlikely(!h->count))
h->count = h->size;
spin_unlock_irqrestore(&h->lock, flags);
}
void kbase_io_history_dump(struct kbase_device *kbdev)
{
struct kbase_io_history *const h = &kbdev->io_history;
u16 i;
size_t iters;
unsigned long flags;
if (!unlikely(h->enabled))
return;
spin_lock_irqsave(&h->lock, flags);
dev_err(kbdev->dev, "Register IO History:");
iters = (h->size > h->count) ? h->count : h->size;
dev_err(kbdev->dev, "Last %zu register accesses of %zu total:\n", iters,
h->count);
for (i = 0; i < iters; ++i) {
struct kbase_io_access *io =
&h->buf[(h->count - iters + i) % h->size];
char const access = (io->addr & 1) ? 'w' : 'r';
dev_err(kbdev->dev, "%6i: %c: reg 0x%p val %08x\n", i, access,
(void *)(io->addr & ~0x1), io->value);
}
spin_unlock_irqrestore(&h->lock, flags);
}
#endif /* CONFIG_DEBUG_FS */
void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
struct kbase_context *kctx)
{
KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
writel(value, kbdev->reg + offset);
#ifdef CONFIG_DEBUG_FS
if (unlikely(kbdev->io_history.enabled))
kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
value, 1);
#endif /* CONFIG_DEBUG_FS */
dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value);
if (kctx && kctx->jctx.tb)
kbase_device_trace_register_access(kctx, REG_WRITE, offset,
value);
}
KBASE_EXPORT_TEST_API(kbase_reg_write);
u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
struct kbase_context *kctx)
{
u32 val;
KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
val = readl(kbdev->reg + offset);
#ifdef CONFIG_DEBUG_FS
if (unlikely(kbdev->io_history.enabled))
kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
val, 0);
#endif /* CONFIG_DEBUG_FS */
dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val);
if (kctx && kctx->jctx.tb)
kbase_device_trace_register_access(kctx, REG_READ, offset, val);
return val;
}
KBASE_EXPORT_TEST_API(kbase_reg_read);
#endif /* !defined(CONFIG_MALI_NO_MALI) */
/**
* kbase_report_gpu_fault - Report a GPU fault.
* @kbdev: Kbase device pointer
* @multiple: Zero if only GPU_FAULT was raised, non-zero if MULTIPLE_GPU_FAULTS
* was also set
*
* This function is called from the interrupt handler when a GPU fault occurs.
* It reports the details of the fault using dev_warn().
*/
static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple)
{
u32 status;
u64 address;
status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL);
address = (u64) kbase_reg_read(kbdev,
GPU_CONTROL_REG(GPU_FAULTADDRESS_HI), NULL) << 32;
address |= kbase_reg_read(kbdev,
GPU_CONTROL_REG(GPU_FAULTADDRESS_LO), NULL);
dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx",
status & 0xFF,
kbase_exception_name(kbdev, status),
address);
if (multiple)
dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n");
}
void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
{
KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, NULL, 0u, val);
if (val & GPU_FAULT)
kbase_report_gpu_fault(kbdev, val & MULTIPLE_GPU_FAULTS);
if (val & RESET_COMPLETED)
kbase_pm_reset_done(kbdev);
if (val & PRFCNT_SAMPLE_COMPLETED)
kbase_instr_hwcnt_sample_done(kbdev);
if (val & CLEAN_CACHES_COMPLETED)
kbase_clean_caches_done(kbdev);
KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val, NULL);
/* kbase_pm_check_transitions must be called after the IRQ has been
* cleared. This is because it might trigger further power transitions
* and we don't want to miss the interrupt raised to notify us that
* these further transitions have finished.
*/
if (val & POWER_CHANGED_ALL)
kbase_pm_power_changed(kbdev);
KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, NULL, 0u, val);
}

View File

@@ -0,0 +1,72 @@
/*
*
* (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* Backend-specific HW access device APIs
*/
#ifndef _KBASE_DEVICE_INTERNAL_H_
#define _KBASE_DEVICE_INTERNAL_H_
/**
* kbase_reg_write - write to GPU register
* @kbdev: Kbase device pointer
* @offset: Offset of register
* @value: Value to write
* @kctx: Kbase context pointer. May be NULL
*
* Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
* @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
* != KBASEP_AS_NR_INVALID).
*/
void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
struct kbase_context *kctx);
/**
* kbase_reg_read - read from GPU register
* @kbdev: Kbase device pointer
* @offset: Offset of register
* @kctx: Kbase context pointer. May be NULL
*
* Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
* @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
* != KBASEP_AS_NR_INVALID).
*
* Return: Value in desired register
*/
u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
struct kbase_context *kctx);
/**
* kbase_gpu_interrupt - GPU interrupt handler
* @kbdev: Kbase device pointer
* @val: The value of the GPU IRQ status register which triggered the call
*
* This function is called from the interrupt handler when a GPU irq is to be
* handled.
*/
void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val);
#endif /* _KBASE_DEVICE_INTERNAL_H_ */

View File

@@ -0,0 +1,135 @@
/*
*
* (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* Register-based HW access backend APIs
*/
#include <mali_kbase.h>
#include <mali_kbase_hwaccess_backend.h>
#include <backend/gpu/mali_kbase_irq_internal.h>
#include <backend/gpu/mali_kbase_jm_internal.h>
#include <backend/gpu/mali_kbase_js_internal.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
int kbase_backend_early_init(struct kbase_device *kbdev)
{
int err;
err = kbasep_platform_device_init(kbdev);
if (err)
return err;
err = kbase_pm_runtime_init(kbdev);
if (err)
goto fail_runtime_pm;
/* Ensure we can access the GPU registers */
kbase_pm_register_access_enable(kbdev);
/* Find out GPU properties based on the GPU feature registers */
kbase_gpuprops_set(kbdev);
/* We're done accessing the GPU registers for now. */
kbase_pm_register_access_disable(kbdev);
err = kbase_install_interrupts(kbdev);
if (err)
goto fail_interrupts;
err = kbase_hwaccess_pm_init(kbdev);
if (err)
goto fail_pm;
return 0;
fail_pm:
kbase_release_interrupts(kbdev);
fail_interrupts:
kbase_pm_runtime_term(kbdev);
fail_runtime_pm:
kbasep_platform_device_term(kbdev);
return err;
}
void kbase_backend_early_term(struct kbase_device *kbdev)
{
kbase_hwaccess_pm_term(kbdev);
kbase_release_interrupts(kbdev);
kbase_pm_runtime_term(kbdev);
kbasep_platform_device_term(kbdev);
}
int kbase_backend_late_init(struct kbase_device *kbdev)
{
int err;
err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT);
if (err)
return err;
err = kbase_backend_timer_init(kbdev);
if (err)
goto fail_timer;
#ifdef CONFIG_MALI_DEBUG
#ifndef CONFIG_MALI_NO_MALI
if (kbasep_common_test_interrupt_handlers(kbdev) != 0) {
dev_err(kbdev->dev, "Interrupt assigment check failed.\n");
err = -EINVAL;
goto fail_interrupt_test;
}
#endif /* !CONFIG_MALI_NO_MALI */
#endif /* CONFIG_MALI_DEBUG */
err = kbase_job_slot_init(kbdev);
if (err)
goto fail_job_slot;
init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
return 0;
fail_job_slot:
#ifdef CONFIG_MALI_DEBUG
#ifndef CONFIG_MALI_NO_MALI
fail_interrupt_test:
#endif /* !CONFIG_MALI_NO_MALI */
#endif /* CONFIG_MALI_DEBUG */
kbase_backend_timer_term(kbdev);
fail_timer:
kbase_hwaccess_pm_halt(kbdev);
return err;
}
void kbase_backend_late_term(struct kbase_device *kbdev)
{
kbase_job_slot_halt(kbdev);
kbase_job_slot_term(kbdev);
kbase_backend_timer_term(kbdev);
kbase_hwaccess_pm_halt(kbdev);
}

View File

@@ -0,0 +1,117 @@
/*
*
* (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* Base kernel property query backend APIs
*/
#include <mali_kbase.h>
#include <backend/gpu/mali_kbase_device_internal.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
#include <mali_kbase_hwaccess_gpuprops.h>
void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
struct kbase_gpuprops_regdump *regdump)
{
int i;
/* Fill regdump with the content of the relevant registers */
regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID), NULL);
regdump->l2_features = kbase_reg_read(kbdev,
GPU_CONTROL_REG(L2_FEATURES), NULL);
regdump->core_features = kbase_reg_read(kbdev,
GPU_CONTROL_REG(CORE_FEATURES), NULL);
regdump->tiler_features = kbase_reg_read(kbdev,
GPU_CONTROL_REG(TILER_FEATURES), NULL);
regdump->mem_features = kbase_reg_read(kbdev,
GPU_CONTROL_REG(MEM_FEATURES), NULL);
regdump->mmu_features = kbase_reg_read(kbdev,
GPU_CONTROL_REG(MMU_FEATURES), NULL);
regdump->as_present = kbase_reg_read(kbdev,
GPU_CONTROL_REG(AS_PRESENT), NULL);
regdump->js_present = kbase_reg_read(kbdev,
GPU_CONTROL_REG(JS_PRESENT), NULL);
for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
regdump->js_features[i] = kbase_reg_read(kbdev,
GPU_CONTROL_REG(JS_FEATURES_REG(i)), NULL);
for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
regdump->texture_features[i] = kbase_reg_read(kbdev,
GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)), NULL);
regdump->thread_max_threads = kbase_reg_read(kbdev,
GPU_CONTROL_REG(THREAD_MAX_THREADS), NULL);
regdump->thread_max_workgroup_size = kbase_reg_read(kbdev,
GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE),
NULL);
regdump->thread_max_barrier_size = kbase_reg_read(kbdev,
GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE), NULL);
regdump->thread_features = kbase_reg_read(kbdev,
GPU_CONTROL_REG(THREAD_FEATURES), NULL);
regdump->thread_tls_alloc = kbase_reg_read(kbdev,
GPU_CONTROL_REG(THREAD_TLS_ALLOC), NULL);
regdump->shader_present_lo = kbase_reg_read(kbdev,
GPU_CONTROL_REG(SHADER_PRESENT_LO), NULL);
regdump->shader_present_hi = kbase_reg_read(kbdev,
GPU_CONTROL_REG(SHADER_PRESENT_HI), NULL);
regdump->tiler_present_lo = kbase_reg_read(kbdev,
GPU_CONTROL_REG(TILER_PRESENT_LO), NULL);
regdump->tiler_present_hi = kbase_reg_read(kbdev,
GPU_CONTROL_REG(TILER_PRESENT_HI), NULL);
regdump->l2_present_lo = kbase_reg_read(kbdev,
GPU_CONTROL_REG(L2_PRESENT_LO), NULL);
regdump->l2_present_hi = kbase_reg_read(kbdev,
GPU_CONTROL_REG(L2_PRESENT_HI), NULL);
regdump->stack_present_lo = kbase_reg_read(kbdev,
GPU_CONTROL_REG(STACK_PRESENT_LO), NULL);
regdump->stack_present_hi = kbase_reg_read(kbdev,
GPU_CONTROL_REG(STACK_PRESENT_HI), NULL);
}
void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
struct kbase_gpuprops_regdump *regdump)
{
if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) {
/* Ensure we can access the GPU registers */
kbase_pm_register_access_enable(kbdev);
regdump->coherency_features = kbase_reg_read(kbdev,
GPU_CONTROL_REG(COHERENCY_FEATURES), NULL);
/* We're done accessing the GPU registers for now. */
kbase_pm_register_access_disable(kbdev);
} else {
/* Pre COHERENCY_FEATURES we only supported ACE_LITE */
regdump->coherency_features =
COHERENCY_FEATURE_BIT(COHERENCY_NONE) |
COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE);
}
}

View File

@@ -0,0 +1,497 @@
/*
*
* (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* GPU backend instrumentation APIs.
*/
#include <mali_kbase.h>
#include <mali_midg_regmap.h>
#include <mali_kbase_hwaccess_instr.h>
#include <backend/gpu/mali_kbase_device_internal.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
#include <backend/gpu/mali_kbase_instr_internal.h>
/**
* kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to
* hardware
*
* @kbdev: Kbase device
*/
static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev)
{
unsigned long flags;
unsigned long pm_flags;
u32 irq_mask;
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
KBASE_INSTR_STATE_REQUEST_CLEAN);
/* Enable interrupt */
spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
irq_mask | CLEAN_CACHES_COMPLETED, NULL);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
/* clean&invalidate the caches so we're sure the mmu tables for the dump
* buffer is valid */
KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
GPU_COMMAND_CLEAN_INV_CACHES, NULL);
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING;
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
}
int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
struct kbase_context *kctx,
struct kbase_ioctl_hwcnt_enable *enable)
{
unsigned long flags, pm_flags;
int err = -EINVAL;
u32 irq_mask;
int ret;
u64 shader_cores_needed;
u32 prfcnt_config;
shader_cores_needed = kbase_pm_get_present_cores(kbdev,
KBASE_PM_CORE_SHADER);
/* alignment failure */
if ((enable->dump_buffer == 0ULL) || (enable->dump_buffer & (2048 - 1)))
goto out_err;
/* Override core availability policy to ensure all cores are available
*/
kbase_pm_ca_instr_enable(kbdev);
/* Request the cores early on synchronously - we'll release them on any
* errors (e.g. instrumentation already active) */
kbase_pm_request_cores_sync(kbdev, true, shader_cores_needed);
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
/* Instrumentation is already enabled */
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
goto out_unrequest_cores;
}
/* Enable interrupt */
spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
PRFCNT_SAMPLE_COMPLETED, NULL);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
/* In use, this context is the owner */
kbdev->hwcnt.kctx = kctx;
/* Remember the dump address so we can reprogram it later */
kbdev->hwcnt.addr = enable->dump_buffer;
/* Request the clean */
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
kbdev->hwcnt.backend.triggered = 0;
/* Clean&invalidate the caches so we're sure the mmu tables for the dump
* buffer is valid */
ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
&kbdev->hwcnt.backend.cache_clean_work);
KBASE_DEBUG_ASSERT(ret);
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
/* Wait for cacheclean to complete */
wait_event(kbdev->hwcnt.backend.wait,
kbdev->hwcnt.backend.triggered != 0);
KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
KBASE_INSTR_STATE_IDLE);
kbase_pm_request_l2_caches(kbdev);
/* Configure */
prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
{
u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID)
>> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id);
if (arch_v6)
prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
}
#endif
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
enable->dump_buffer & 0xFFFFFFFF, kctx);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
enable->dump_buffer >> 32, kctx);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
enable->jm_bm, kctx);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
enable->shader_bm, kctx);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
enable->mmu_l2_bm, kctx);
/* Due to PRLAM-8186 we need to disable the Tiler before we enable the
* HW counter dump. */
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0,
kctx);
else
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
enable->tiler_bm, kctx);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx);
/* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump
*/
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
enable->tiler_bm, kctx);
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
kbdev->hwcnt.backend.triggered = 1;
wake_up(&kbdev->hwcnt.backend.wait);
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
err = 0;
dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
return err;
out_unrequest_cores:
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
out_err:
return err;
}
int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
{
unsigned long flags, pm_flags;
int err = -EINVAL;
u32 irq_mask;
struct kbase_device *kbdev = kctx->kbdev;
while (1) {
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) {
/* Instrumentation is not enabled */
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
goto out;
}
if (kbdev->hwcnt.kctx != kctx) {
/* Instrumentation has been setup for another context */
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
goto out;
}
if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE)
break;
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
/* Ongoing dump/setup - wait for its completion */
wait_event(kbdev->hwcnt.backend.wait,
kbdev->hwcnt.backend.triggered != 0);
}
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
kbdev->hwcnt.backend.triggered = 0;
/* Disable interrupt */
spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL);
/* Disable the counters */
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx);
kbdev->hwcnt.kctx = NULL;
kbdev->hwcnt.addr = 0ULL;
kbase_pm_ca_instr_disable(kbdev);
kbase_pm_unrequest_cores(kbdev, true,
kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));
kbase_pm_release_l2_caches(kbdev);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
kctx);
err = 0;
out:
return err;
}
int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
{
unsigned long flags;
int err = -EINVAL;
struct kbase_device *kbdev = kctx->kbdev;
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
if (kbdev->hwcnt.kctx != kctx) {
/* The instrumentation has been setup for another context */
goto unlock;
}
if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) {
/* HW counters are disabled or another dump is ongoing, or we're
* resetting */
goto unlock;
}
kbdev->hwcnt.backend.triggered = 0;
/* Mark that we're dumping - the PF handler can signal that we faulted
*/
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING;
/* Reconfigure the dump address */
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
kbdev->hwcnt.addr & 0xFFFFFFFF, NULL);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
kbdev->hwcnt.addr >> 32, NULL);
/* Start dumping */
KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL,
kbdev->hwcnt.addr, 0);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
GPU_COMMAND_PRFCNT_SAMPLE, kctx);
dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx);
err = 0;
unlock:
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
return err;
}
KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump);
bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
bool * const success)
{
unsigned long flags;
bool complete = false;
struct kbase_device *kbdev = kctx->kbdev;
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) {
*success = true;
complete = true;
} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
*success = false;
complete = true;
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
}
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
return complete;
}
KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete);
void kbasep_cache_clean_worker(struct work_struct *data)
{
struct kbase_device *kbdev;
unsigned long flags;
kbdev = container_of(data, struct kbase_device,
hwcnt.backend.cache_clean_work);
mutex_lock(&kbdev->cacheclean_lock);
kbasep_instr_hwcnt_cacheclean(kbdev);
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
/* Wait for our condition, and any reset to complete */
while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
wait_event(kbdev->hwcnt.backend.cache_clean_wait,
kbdev->hwcnt.backend.state !=
KBASE_INSTR_STATE_CLEANING);
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
}
KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
KBASE_INSTR_STATE_CLEANED);
/* All finished and idle */
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
kbdev->hwcnt.backend.triggered = 1;
wake_up(&kbdev->hwcnt.backend.wait);
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
mutex_unlock(&kbdev->cacheclean_lock);
}
void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
{
unsigned long flags;
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
kbdev->hwcnt.backend.triggered = 1;
wake_up(&kbdev->hwcnt.backend.wait);
} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) {
int ret;
/* Always clean and invalidate the cache after a successful dump
*/
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
&kbdev->hwcnt.backend.cache_clean_work);
KBASE_DEBUG_ASSERT(ret);
}
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
}
void kbase_clean_caches_done(struct kbase_device *kbdev)
{
u32 irq_mask;
if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
unsigned long flags;
unsigned long pm_flags;
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
/* Disable interrupt */
spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
NULL);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
irq_mask & ~CLEAN_CACHES_COMPLETED, NULL);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
/* Wakeup... */
if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
/* Only wake if we weren't resetting */
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED;
wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
}
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
}
}
int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
{
struct kbase_device *kbdev = kctx->kbdev;
unsigned long flags;
int err;
/* Wait for dump & cacheclean to complete */
wait_event(kbdev->hwcnt.backend.wait,
kbdev->hwcnt.backend.triggered != 0);
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
err = -EINVAL;
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
} else {
/* Dump done */
KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
KBASE_INSTR_STATE_IDLE);
err = 0;
}
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
return err;
}
int kbase_instr_hwcnt_clear(struct kbase_context *kctx)
{
unsigned long flags;
int err = -EINVAL;
struct kbase_device *kbdev = kctx->kbdev;
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
/* Check it's the context previously set up and we're not already
* dumping */
if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state !=
KBASE_INSTR_STATE_IDLE)
goto out;
/* Clear the counters */
KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
GPU_COMMAND_PRFCNT_CLEAR, kctx);
err = 0;
out:
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
return err;
}
KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear);
int kbase_instr_backend_init(struct kbase_device *kbdev)
{
int ret = 0;
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
init_waitqueue_head(&kbdev->hwcnt.backend.wait);
init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait);
INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work,
kbasep_cache_clean_worker);
kbdev->hwcnt.backend.triggered = 0;
kbdev->hwcnt.backend.cache_clean_wq =
alloc_workqueue("Mali cache cleaning workqueue", 0, 1);
if (NULL == kbdev->hwcnt.backend.cache_clean_wq)
ret = -EINVAL;
return ret;
}
void kbase_instr_backend_term(struct kbase_device *kbdev)
{
destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq);
}

View File

@@ -0,0 +1,63 @@
/*
*
* (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* Backend-specific instrumentation definitions
*/
#ifndef _KBASE_INSTR_DEFS_H_
#define _KBASE_INSTR_DEFS_H_
/*
* Instrumentation State Machine States
*/
enum kbase_instr_state {
/* State where instrumentation is not active */
KBASE_INSTR_STATE_DISABLED = 0,
/* State machine is active and ready for a command. */
KBASE_INSTR_STATE_IDLE,
/* Hardware is currently dumping a frame. */
KBASE_INSTR_STATE_DUMPING,
/* We've requested a clean to occur on a workqueue */
KBASE_INSTR_STATE_REQUEST_CLEAN,
/* Hardware is currently cleaning and invalidating caches. */
KBASE_INSTR_STATE_CLEANING,
/* Cache clean completed, and either a) a dump is complete, or
* b) instrumentation can now be setup. */
KBASE_INSTR_STATE_CLEANED,
/* An error has occured during DUMPING (page fault). */
KBASE_INSTR_STATE_FAULT
};
/* Structure used for instrumentation and HW counters dumping */
struct kbase_instr_backend {
wait_queue_head_t wait;
int triggered;
enum kbase_instr_state state;
wait_queue_head_t cache_clean_wait;
struct workqueue_struct *cache_clean_wq;
struct work_struct cache_clean_work;
};
#endif /* _KBASE_INSTR_DEFS_H_ */

View File

@@ -0,0 +1,50 @@
/*
*
* (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* Backend-specific HW access instrumentation APIs
*/
#ifndef _KBASE_INSTR_INTERNAL_H_
#define _KBASE_INSTR_INTERNAL_H_
/**
* kbasep_cache_clean_worker() - Workqueue for handling cache cleaning
* @data: a &struct work_struct
*/
void kbasep_cache_clean_worker(struct work_struct *data);
/**
* kbase_clean_caches_done() - Cache clean interrupt received
* @kbdev: Kbase device
*/
void kbase_clean_caches_done(struct kbase_device *kbdev);
/**
* kbase_instr_hwcnt_sample_done() - Dump complete interrupt received
* @kbdev: Kbase device
*/
void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev);
#endif /* _KBASE_INSTR_INTERNAL_H_ */

View File

@@ -0,0 +1,44 @@
/*
*
* (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* Backend specific IRQ APIs
*/
#ifndef _KBASE_IRQ_INTERNAL_H_
#define _KBASE_IRQ_INTERNAL_H_
int kbase_install_interrupts(struct kbase_device *kbdev);
void kbase_release_interrupts(struct kbase_device *kbdev);
/**
* kbase_synchronize_irqs - Ensure that all IRQ handlers have completed
* execution
* @kbdev: The kbase device
*/
void kbase_synchronize_irqs(struct kbase_device *kbdev);
int kbasep_common_test_interrupt_handlers(
struct kbase_device * const kbdev);
#endif /* _KBASE_IRQ_INTERNAL_H_ */

View File

@@ -0,0 +1,474 @@
/*
*
* (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#include <mali_kbase.h>
#include <backend/gpu/mali_kbase_device_internal.h>
#include <backend/gpu/mali_kbase_irq_internal.h>
#include <linux/interrupt.h>
#if !defined(CONFIG_MALI_NO_MALI)
/* GPU IRQ Tags */
#define JOB_IRQ_TAG 0
#define MMU_IRQ_TAG 1
#define GPU_IRQ_TAG 2
static void *kbase_tag(void *ptr, u32 tag)
{
return (void *)(((uintptr_t) ptr) | tag);
}
static void *kbase_untag(void *ptr)
{
return (void *)(((uintptr_t) ptr) & ~3);
}
static irqreturn_t kbase_job_irq_handler(int irq, void *data)
{
unsigned long flags;
struct kbase_device *kbdev = kbase_untag(data);
u32 val;
spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
if (!kbdev->pm.backend.gpu_powered) {
/* GPU is turned off - IRQ is not for us */
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
flags);
return IRQ_NONE;
}
val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
#ifdef CONFIG_MALI_DEBUG
if (!kbdev->pm.backend.driver_ready_for_irqs)
dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
__func__, irq, val);
#endif /* CONFIG_MALI_DEBUG */
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
if (!val)
return IRQ_NONE;
dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
kbase_job_done(kbdev, val);
return IRQ_HANDLED;
}
KBASE_EXPORT_TEST_API(kbase_job_irq_handler);
static irqreturn_t kbase_mmu_irq_handler(int irq, void *data)
{
unsigned long flags;
struct kbase_device *kbdev = kbase_untag(data);
u32 val;
spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
if (!kbdev->pm.backend.gpu_powered) {
/* GPU is turned off - IRQ is not for us */
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
flags);
return IRQ_NONE;
}
atomic_inc(&kbdev->faults_pending);
val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
#ifdef CONFIG_MALI_DEBUG
if (!kbdev->pm.backend.driver_ready_for_irqs)
dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
__func__, irq, val);
#endif /* CONFIG_MALI_DEBUG */
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
if (!val) {
atomic_dec(&kbdev->faults_pending);
return IRQ_NONE;
}
dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
kbase_mmu_interrupt(kbdev, val);
atomic_dec(&kbdev->faults_pending);
return IRQ_HANDLED;
}
static irqreturn_t kbase_gpu_irq_handler(int irq, void *data)
{
unsigned long flags;
struct kbase_device *kbdev = kbase_untag(data);
u32 val;
spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
if (!kbdev->pm.backend.gpu_powered) {
/* GPU is turned off - IRQ is not for us */
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
flags);
return IRQ_NONE;
}
val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL);
#ifdef CONFIG_MALI_DEBUG
if (!kbdev->pm.backend.driver_ready_for_irqs)
dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
__func__, irq, val);
#endif /* CONFIG_MALI_DEBUG */
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
if (!val)
return IRQ_NONE;
dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
kbase_gpu_interrupt(kbdev, val);
return IRQ_HANDLED;
}
KBASE_EXPORT_TEST_API(kbase_gpu_irq_handler);
static irq_handler_t kbase_handler_table[] = {
[JOB_IRQ_TAG] = kbase_job_irq_handler,
[MMU_IRQ_TAG] = kbase_mmu_irq_handler,
[GPU_IRQ_TAG] = kbase_gpu_irq_handler,
};
#ifdef CONFIG_MALI_DEBUG
#define JOB_IRQ_HANDLER JOB_IRQ_TAG
#define MMU_IRQ_HANDLER MMU_IRQ_TAG
#define GPU_IRQ_HANDLER GPU_IRQ_TAG
/**
* kbase_set_custom_irq_handler - Set a custom IRQ handler
* @kbdev: Device for which the handler is to be registered
* @custom_handler: Handler to be registered
* @irq_type: Interrupt type
*
* Registers given interrupt handler for requested interrupt type
* In the case where irq handler is not specified, the default handler shall be
* registered
*
* Return: 0 case success, error code otherwise
*/
int kbase_set_custom_irq_handler(struct kbase_device *kbdev,
irq_handler_t custom_handler,
int irq_type)
{
int result = 0;
irq_handler_t requested_irq_handler = NULL;
KBASE_DEBUG_ASSERT((JOB_IRQ_HANDLER <= irq_type) &&
(GPU_IRQ_HANDLER >= irq_type));
/* Release previous handler */
if (kbdev->irqs[irq_type].irq)
free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type));
requested_irq_handler = (NULL != custom_handler) ? custom_handler :
kbase_handler_table[irq_type];
if (0 != request_irq(kbdev->irqs[irq_type].irq,
requested_irq_handler,
kbdev->irqs[irq_type].flags | IRQF_SHARED,
dev_name(kbdev->dev), kbase_tag(kbdev, irq_type))) {
result = -EINVAL;
dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
kbdev->irqs[irq_type].irq, irq_type);
#ifdef CONFIG_SPARSE_IRQ
dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
#endif /* CONFIG_SPARSE_IRQ */
}
return result;
}
KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler);
/* test correct interrupt assigment and reception by cpu */
struct kbasep_irq_test {
struct hrtimer timer;
wait_queue_head_t wait;
int triggered;
u32 timeout;
};
static struct kbasep_irq_test kbasep_irq_test_data;
#define IRQ_TEST_TIMEOUT 500
static irqreturn_t kbase_job_irq_test_handler(int irq, void *data)
{
unsigned long flags;
struct kbase_device *kbdev = kbase_untag(data);
u32 val;
spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
if (!kbdev->pm.backend.gpu_powered) {
/* GPU is turned off - IRQ is not for us */
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
flags);
return IRQ_NONE;
}
val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
if (!val)
return IRQ_NONE;
dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
kbasep_irq_test_data.triggered = 1;
wake_up(&kbasep_irq_test_data.wait);
kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val, NULL);
return IRQ_HANDLED;
}
static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data)
{
unsigned long flags;
struct kbase_device *kbdev = kbase_untag(data);
u32 val;
spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
if (!kbdev->pm.backend.gpu_powered) {
/* GPU is turned off - IRQ is not for us */
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
flags);
return IRQ_NONE;
}
val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
if (!val)
return IRQ_NONE;
dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
kbasep_irq_test_data.triggered = 1;
wake_up(&kbasep_irq_test_data.wait);
kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val, NULL);
return IRQ_HANDLED;
}
static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer)
{
struct kbasep_irq_test *test_data = container_of(timer,
struct kbasep_irq_test, timer);
test_data->timeout = 1;
test_data->triggered = 1;
wake_up(&test_data->wait);
return HRTIMER_NORESTART;
}
static int kbasep_common_test_interrupt(
struct kbase_device * const kbdev, u32 tag)
{
int err = 0;
irq_handler_t test_handler;
u32 old_mask_val;
u16 mask_offset;
u16 rawstat_offset;
switch (tag) {
case JOB_IRQ_TAG:
test_handler = kbase_job_irq_test_handler;
rawstat_offset = JOB_CONTROL_REG(JOB_IRQ_RAWSTAT);
mask_offset = JOB_CONTROL_REG(JOB_IRQ_MASK);
break;
case MMU_IRQ_TAG:
test_handler = kbase_mmu_irq_test_handler;
rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT);
mask_offset = MMU_REG(MMU_IRQ_MASK);
break;
case GPU_IRQ_TAG:
/* already tested by pm_driver - bail out */
default:
return 0;
}
/* store old mask */
old_mask_val = kbase_reg_read(kbdev, mask_offset, NULL);
/* mask interrupts */
kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
if (kbdev->irqs[tag].irq) {
/* release original handler and install test handler */
if (kbase_set_custom_irq_handler(kbdev, test_handler, tag) != 0) {
err = -EINVAL;
} else {
kbasep_irq_test_data.timeout = 0;
hrtimer_init(&kbasep_irq_test_data.timer,
CLOCK_MONOTONIC, HRTIMER_MODE_REL);
kbasep_irq_test_data.timer.function =
kbasep_test_interrupt_timeout;
/* trigger interrupt */
kbase_reg_write(kbdev, mask_offset, 0x1, NULL);
kbase_reg_write(kbdev, rawstat_offset, 0x1, NULL);
hrtimer_start(&kbasep_irq_test_data.timer,
HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT),
HRTIMER_MODE_REL);
wait_event(kbasep_irq_test_data.wait,
kbasep_irq_test_data.triggered != 0);
if (kbasep_irq_test_data.timeout != 0) {
dev_err(kbdev->dev, "Interrupt %d (index %d) didn't reach CPU.\n",
kbdev->irqs[tag].irq, tag);
err = -EINVAL;
} else {
dev_dbg(kbdev->dev, "Interrupt %d (index %d) reached CPU.\n",
kbdev->irqs[tag].irq, tag);
}
hrtimer_cancel(&kbasep_irq_test_data.timer);
kbasep_irq_test_data.triggered = 0;
/* mask interrupts */
kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
/* release test handler */
free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag));
}
/* restore original interrupt */
if (request_irq(kbdev->irqs[tag].irq, kbase_handler_table[tag],
kbdev->irqs[tag].flags | IRQF_SHARED,
dev_name(kbdev->dev), kbase_tag(kbdev, tag))) {
dev_err(kbdev->dev, "Can't restore original interrupt %d (index %d)\n",
kbdev->irqs[tag].irq, tag);
err = -EINVAL;
}
}
/* restore old mask */
kbase_reg_write(kbdev, mask_offset, old_mask_val, NULL);
return err;
}
int kbasep_common_test_interrupt_handlers(
struct kbase_device * const kbdev)
{
int err;
init_waitqueue_head(&kbasep_irq_test_data.wait);
kbasep_irq_test_data.triggered = 0;
/* A suspend won't happen during startup/insmod */
kbase_pm_context_active(kbdev);
err = kbasep_common_test_interrupt(kbdev, JOB_IRQ_TAG);
if (err) {
dev_err(kbdev->dev, "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n");
goto out;
}
err = kbasep_common_test_interrupt(kbdev, MMU_IRQ_TAG);
if (err) {
dev_err(kbdev->dev, "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n");
goto out;
}
dev_dbg(kbdev->dev, "Interrupts are correctly assigned.\n");
out:
kbase_pm_context_idle(kbdev);
return err;
}
#endif /* CONFIG_MALI_DEBUG */
int kbase_install_interrupts(struct kbase_device *kbdev)
{
u32 nr = ARRAY_SIZE(kbase_handler_table);
int err;
u32 i;
for (i = 0; i < nr; i++) {
err = request_irq(kbdev->irqs[i].irq, kbase_handler_table[i],
kbdev->irqs[i].flags | IRQF_SHARED,
dev_name(kbdev->dev),
kbase_tag(kbdev, i));
if (err) {
dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
kbdev->irqs[i].irq, i);
#ifdef CONFIG_SPARSE_IRQ
dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
#endif /* CONFIG_SPARSE_IRQ */
goto release;
}
}
return 0;
release:
while (i-- > 0)
free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
return err;
}
void kbase_release_interrupts(struct kbase_device *kbdev)
{
u32 nr = ARRAY_SIZE(kbase_handler_table);
u32 i;
for (i = 0; i < nr; i++) {
if (kbdev->irqs[i].irq)
free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
}
}
void kbase_synchronize_irqs(struct kbase_device *kbdev)
{
u32 nr = ARRAY_SIZE(kbase_handler_table);
u32 i;
for (i = 0; i < nr; i++) {
if (kbdev->irqs[i].irq)
synchronize_irq(kbdev->irqs[i].irq);
}
}
#endif /* !defined(CONFIG_MALI_NO_MALI) */

View File

@@ -0,0 +1,244 @@
/*
*
* (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* Register backend context / address space management
*/
#include <mali_kbase.h>
#include <mali_kbase_hwaccess_jm.h>
#include <mali_kbase_ctx_sched.h>
/**
* assign_and_activate_kctx_addr_space - Assign an AS to a context
* @kbdev: Kbase device
* @kctx: Kbase context
* @current_as: Address Space to assign
*
* Assign an Address Space (AS) to a context, and add the context to the Policy.
*
* This includes
* setting up the global runpool_irq structure and the context on the AS,
* Activating the MMU on the AS,
* Allowing jobs to be submitted on the AS.
*
* Context:
* kbasep_js_kctx_info.jsctx_mutex held,
* kbasep_js_device_data.runpool_mutex held,
* AS transaction mutex held,
* Runpool IRQ lock held
*/
static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev,
struct kbase_context *kctx,
struct kbase_as *current_as)
{
struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
lockdep_assert_held(&js_devdata->runpool_mutex);
lockdep_assert_held(&kbdev->hwaccess_lock);
/* Attribute handling */
kbasep_js_ctx_attr_runpool_retain_ctx(kbdev, kctx);
/* Allow it to run jobs */
kbasep_js_set_submit_allowed(js_devdata, kctx);
kbase_js_runpool_inc_context_count(kbdev, kctx);
}
bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
struct kbase_context *kctx,
int js)
{
int i;
if (kbdev->hwaccess.active_kctx[js] == kctx) {
/* Context is already active */
return true;
}
for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
if (kbdev->as_to_kctx[i] == kctx) {
/* Context already has ASID - mark as active */
return true;
}
}
/* Context does not have address space assigned */
return false;
}
void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
struct kbase_context *kctx)
{
int as_nr = kctx->as_nr;
if (as_nr == KBASEP_AS_NR_INVALID) {
WARN(1, "Attempting to release context without ASID\n");
return;
}
lockdep_assert_held(&kbdev->hwaccess_lock);
if (atomic_read(&kctx->refcount) != 1) {
WARN(1, "Attempting to release active ASID\n");
return;
}
kbasep_js_clear_submit_allowed(&kbdev->js_data, kctx);
kbase_ctx_sched_release_ctx(kctx);
kbase_js_runpool_dec_context_count(kbdev, kctx);
}
void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
struct kbase_context *kctx)
{
}
int kbase_backend_find_and_release_free_address_space(
struct kbase_device *kbdev, struct kbase_context *kctx)
{
struct kbasep_js_device_data *js_devdata;
struct kbasep_js_kctx_info *js_kctx_info;
unsigned long flags;
int i;
js_devdata = &kbdev->js_data;
js_kctx_info = &kctx->jctx.sched_info;
mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
mutex_lock(&js_devdata->runpool_mutex);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
struct kbasep_js_kctx_info *as_js_kctx_info;
struct kbase_context *as_kctx;
as_kctx = kbdev->as_to_kctx[i];
as_js_kctx_info = &as_kctx->jctx.sched_info;
/* Don't release privileged or active contexts, or contexts with
* jobs running.
* Note that a context will have at least 1 reference (which
* was previously taken by kbasep_js_schedule_ctx()) until
* descheduled.
*/
if (as_kctx && !kbase_ctx_flag(as_kctx, KCTX_PRIVILEGED) &&
atomic_read(&as_kctx->refcount) == 1) {
if (!kbasep_js_runpool_retain_ctx_nolock(kbdev,
as_kctx)) {
WARN(1, "Failed to retain active context\n");
spin_unlock_irqrestore(&kbdev->hwaccess_lock,
flags);
mutex_unlock(&js_devdata->runpool_mutex);
mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
return KBASEP_AS_NR_INVALID;
}
kbasep_js_clear_submit_allowed(js_devdata, as_kctx);
/* Drop and retake locks to take the jsctx_mutex on the
* context we're about to release without violating lock
* ordering
*/
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
mutex_unlock(&js_devdata->runpool_mutex);
mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
/* Release context from address space */
mutex_lock(&as_js_kctx_info->ctx.jsctx_mutex);
mutex_lock(&js_devdata->runpool_mutex);
kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx);
if (!kbase_ctx_flag(as_kctx, KCTX_SCHEDULED)) {
kbasep_js_runpool_requeue_or_kill_ctx(kbdev,
as_kctx,
true);
mutex_unlock(&js_devdata->runpool_mutex);
mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
return i;
}
/* Context was retained while locks were dropped,
* continue looking for free AS */
mutex_unlock(&js_devdata->runpool_mutex);
mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
mutex_lock(&js_devdata->runpool_mutex);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
}
}
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
mutex_unlock(&js_devdata->runpool_mutex);
mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
return KBASEP_AS_NR_INVALID;
}
bool kbase_backend_use_ctx(struct kbase_device *kbdev,
struct kbase_context *kctx,
int as_nr)
{
struct kbasep_js_device_data *js_devdata;
struct kbase_as *new_address_space = NULL;
int js;
js_devdata = &kbdev->js_data;
for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
if (kbdev->hwaccess.active_kctx[js] == kctx) {
WARN(1, "Context is already scheduled in\n");
return false;
}
}
new_address_space = &kbdev->as[as_nr];
lockdep_assert_held(&js_devdata->runpool_mutex);
lockdep_assert_held(&kbdev->mmu_hw_mutex);
lockdep_assert_held(&kbdev->hwaccess_lock);
assign_and_activate_kctx_addr_space(kbdev, kctx, new_address_space);
if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) {
/* We need to retain it to keep the corresponding address space
*/
kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
}
return true;
}

View File

@@ -0,0 +1,116 @@
/*
*
* (C) COPYRIGHT 2014-2016, 2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* Register-based HW access backend specific definitions
*/
#ifndef _KBASE_HWACCESS_GPU_DEFS_H_
#define _KBASE_HWACCESS_GPU_DEFS_H_
/* SLOT_RB_SIZE must be < 256 */
#define SLOT_RB_SIZE 2
#define SLOT_RB_MASK (SLOT_RB_SIZE - 1)
/**
* struct rb_entry - Ringbuffer entry
* @katom: Atom associated with this entry
*/
struct rb_entry {
struct kbase_jd_atom *katom;
};
/**
* struct slot_rb - Slot ringbuffer
* @entries: Ringbuffer entries
* @last_context: The last context to submit a job on this slot
* @read_idx: Current read index of buffer
* @write_idx: Current write index of buffer
* @job_chain_flag: Flag used to implement jobchain disambiguation
*/
struct slot_rb {
struct rb_entry entries[SLOT_RB_SIZE];
struct kbase_context *last_context;
u8 read_idx;
u8 write_idx;
u8 job_chain_flag;
};
/**
* struct kbase_backend_data - GPU backend specific data for HW access layer
* @slot_rb: Slot ringbuffers
* @rmu_workaround_flag: When PRLAM-8987 is present, this flag determines
* whether slots 0/1 or slot 2 are currently being
* pulled from
* @scheduling_timer: The timer tick used for rescheduling jobs
* @timer_running: Is the timer running? The runpool_mutex must be
* held whilst modifying this.
* @suspend_timer: Is the timer suspended? Set when a suspend
* occurs and cleared on resume. The runpool_mutex
* must be held whilst modifying this.
* @reset_gpu: Set to a KBASE_RESET_xxx value (see comments)
* @reset_workq: Work queue for performing the reset
* @reset_work: Work item for performing the reset
* @reset_wait: Wait event signalled when the reset is complete
* @reset_timer: Timeout for soft-stops before the reset
* @timeouts_updated: Have timeout values just been updated?
*
* The hwaccess_lock (a spinlock) must be held when accessing this structure
*/
struct kbase_backend_data {
struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS];
bool rmu_workaround_flag;
struct hrtimer scheduling_timer;
bool timer_running;
bool suspend_timer;
atomic_t reset_gpu;
/* The GPU reset isn't pending */
#define KBASE_RESET_GPU_NOT_PENDING 0
/* kbase_prepare_to_reset_gpu has been called */
#define KBASE_RESET_GPU_PREPARED 1
/* kbase_reset_gpu has been called - the reset will now definitely happen
* within the timeout period */
#define KBASE_RESET_GPU_COMMITTED 2
/* The GPU reset process is currently occuring (timeout has expired or
* kbasep_try_reset_gpu_early was called) */
#define KBASE_RESET_GPU_HAPPENING 3
/* Reset the GPU silently, used when resetting the GPU as part of normal
* behavior (e.g. when exiting protected mode). */
#define KBASE_RESET_GPU_SILENT 4
struct workqueue_struct *reset_workq;
struct work_struct reset_work;
wait_queue_head_t reset_wait;
struct hrtimer reset_timer;
bool timeouts_updated;
};
#endif /* _KBASE_HWACCESS_GPU_DEFS_H_ */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,169 @@
/*
*
* (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* Job Manager backend-specific low-level APIs.
*/
#ifndef _KBASE_JM_HWACCESS_H_
#define _KBASE_JM_HWACCESS_H_
#include <mali_kbase_hw.h>
#include <mali_kbase_debug.h>
#include <linux/atomic.h>
#include <backend/gpu/mali_kbase_jm_rb.h>
/**
* kbase_job_submit_nolock() - Submit a job to a certain job-slot
* @kbdev: Device pointer
* @katom: Atom to submit
* @js: Job slot to submit on
*
* The caller must check kbasep_jm_is_submit_slots_free() != false before
* calling this.
*
* The following locking conditions are made on the caller:
* - it must hold the hwaccess_lock
*/
void kbase_job_submit_nolock(struct kbase_device *kbdev,
struct kbase_jd_atom *katom, int js);
/**
* kbase_job_done_slot() - Complete the head job on a particular job-slot
* @kbdev: Device pointer
* @s: Job slot
* @completion_code: Completion code of job reported by GPU
* @job_tail: Job tail address reported by GPU
* @end_timestamp: Timestamp of job completion
*/
void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code,
u64 job_tail, ktime_t *end_timestamp);
#ifdef CONFIG_GPU_TRACEPOINTS
static inline char *kbasep_make_job_slot_string(int js, char *js_string,
size_t js_size)
{
snprintf(js_string, js_size, "job_slot_%i", js);
return js_string;
}
#endif
/**
* kbase_job_hw_submit() - Submit a job to the GPU
* @kbdev: Device pointer
* @katom: Atom to submit
* @js: Job slot to submit on
*
* The caller must check kbasep_jm_is_submit_slots_free() != false before
* calling this.
*
* The following locking conditions are made on the caller:
* - it must hold the hwaccess_lock
*/
void kbase_job_hw_submit(struct kbase_device *kbdev,
struct kbase_jd_atom *katom,
int js);
/**
* kbasep_job_slot_soft_or_hard_stop_do_action() - Perform a soft or hard stop
* on the specified atom
* @kbdev: Device pointer
* @js: Job slot to stop on
* @action: The action to perform, either JSn_COMMAND_HARD_STOP or
* JSn_COMMAND_SOFT_STOP
* @core_reqs: Core requirements of atom to stop
* @target_katom: Atom to stop
*
* The following locking conditions are made on the caller:
* - it must hold the hwaccess_lock
*/
void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
int js,
u32 action,
base_jd_core_req core_reqs,
struct kbase_jd_atom *target_katom);
/**
* kbase_backend_soft_hard_stop_slot() - Soft or hard stop jobs on a given job
* slot belonging to a given context.
* @kbdev: Device pointer
* @kctx: Context pointer. May be NULL
* @katom: Specific atom to stop. May be NULL
* @js: Job slot to hard stop
* @action: The action to perform, either JSn_COMMAND_HARD_STOP or
* JSn_COMMAND_SOFT_STOP
*
* If no context is provided then all jobs on the slot will be soft or hard
* stopped.
*
* If a katom is provided then only that specific atom will be stopped. In this
* case the kctx parameter is ignored.
*
* Jobs that are on the slot but are not yet on the GPU will be unpulled and
* returned to the job scheduler.
*
* Return: true if an atom was stopped, false otherwise
*/
bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
struct kbase_context *kctx,
int js,
struct kbase_jd_atom *katom,
u32 action);
/**
* kbase_job_slot_init - Initialise job slot framework
* @kbdev: Device pointer
*
* Called on driver initialisation
*
* Return: 0 on success
*/
int kbase_job_slot_init(struct kbase_device *kbdev);
/**
* kbase_job_slot_halt - Halt the job slot framework
* @kbdev: Device pointer
*
* Should prevent any further job slot processing
*/
void kbase_job_slot_halt(struct kbase_device *kbdev);
/**
* kbase_job_slot_term - Terminate job slot framework
* @kbdev: Device pointer
*
* Called on driver termination
*/
void kbase_job_slot_term(struct kbase_device *kbdev);
/**
* kbase_gpu_cacheclean - Cause a GPU cache clean & flush
* @kbdev: Device pointer
*
* Caller must not be in IRQ context
*/
void kbase_gpu_cacheclean(struct kbase_device *kbdev);
#endif /* _KBASE_JM_HWACCESS_H_ */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,83 @@
/*
*
* (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* Register-based HW access backend specific APIs
*/
#ifndef _KBASE_HWACCESS_GPU_H_
#define _KBASE_HWACCESS_GPU_H_
#include <backend/gpu/mali_kbase_pm_internal.h>
/**
* kbase_gpu_irq_evict - Evict an atom from a NEXT slot
*
* @kbdev: Device pointer
* @js: Job slot to evict from
* @completion_code: Event code from job that was run.
*
* Evict the atom in the NEXT slot for the specified job slot. This function is
* called from the job complete IRQ handler when the previous job has failed.
*
* Return: true if job evicted from NEXT registers, false otherwise
*/
bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
u32 completion_code);
/**
* kbase_gpu_complete_hw - Complete an atom on job slot js
*
* @kbdev: Device pointer
* @js: Job slot that has completed
* @completion_code: Event code from job that has completed
* @job_tail: The tail address from the hardware if the job has partially
* completed
* @end_timestamp: Time of completion
*/
void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
u32 completion_code,
u64 job_tail,
ktime_t *end_timestamp);
/**
* kbase_gpu_inspect - Inspect the contents of the HW access ringbuffer
*
* @kbdev: Device pointer
* @js: Job slot to inspect
* @idx: Index into ringbuffer. 0 is the job currently running on
* the slot, 1 is the job waiting, all other values are invalid.
* Return: The atom at that position in the ringbuffer
* or NULL if no atom present
*/
struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
int idx);
/**
* kbase_gpu_dump_slots - Print the contents of the slot ringbuffers
*
* @kbdev: Device pointer
*/
void kbase_gpu_dump_slots(struct kbase_device *kbdev);
#endif /* _KBASE_HWACCESS_GPU_H_ */

View File

@@ -0,0 +1,308 @@
/*
*
* (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* Base kernel affinity manager APIs
*/
#include <mali_kbase.h>
#include "mali_kbase_js_affinity.h"
#include "mali_kbase_hw.h"
#include <backend/gpu/mali_kbase_pm_internal.h>
bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev,
int js)
{
/*
* Here are the reasons for using job slot 2:
* - BASE_HW_ISSUE_8987 (which is entirely used for that purpose)
* - In absence of the above, then:
* - Atoms with BASE_JD_REQ_COHERENT_GROUP
* - But, only when there aren't contexts with
* KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, because the atoms that run on
* all cores on slot 1 could be blocked by those using a coherent group
* on slot 2
* - And, only when you actually have 2 or more coregroups - if you
* only have 1 coregroup, then having jobs for slot 2 implies they'd
* also be for slot 1, meaning you'll get interference from them. Jobs
* able to run on slot 2 could also block jobs that can only run on
* slot 1 (tiler jobs)
*/
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
return true;
if (js != 2)
return true;
/* Only deal with js==2 now: */
if (kbdev->gpu_props.num_core_groups > 1) {
/* Only use slot 2 in the 2+ coregroup case */
if (kbasep_js_ctx_attr_is_attr_on_runpool(kbdev,
KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES) ==
false) {
/* ...But only when we *don't* have atoms that run on
* all cores */
/* No specific check for BASE_JD_REQ_COHERENT_GROUP
* atoms - the policy will sort that out */
return true;
}
}
/* Above checks failed mean we shouldn't use slot 2 */
return false;
}
/*
* As long as it has been decided to have a deeper modification of
* what job scheduler, power manager and affinity manager will
* implement, this function is just an intermediate step that
* assumes:
* - all working cores will be powered on when this is called.
* - largest current configuration is 2 core groups.
* - It has been decided not to have hardcoded values so the low
* and high cores in a core split will be evently distributed.
* - Odd combinations of core requirements have been filtered out
* and do not get to this function (e.g. CS+T+NSS is not
* supported here).
* - This function is frequently called and can be optimized,
* (see notes in loops), but as the functionallity will likely
* be modified, optimization has not been addressed.
*/
bool kbase_js_choose_affinity(u64 * const affinity,
struct kbase_device *kbdev,
struct kbase_jd_atom *katom, int js)
{
base_jd_core_req core_req = katom->core_req;
unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
u64 core_availability_mask;
lockdep_assert_held(&kbdev->hwaccess_lock);
core_availability_mask = kbase_pm_ca_get_core_mask(kbdev);
/*
* If no cores are currently available (core availability policy is
* transitioning) then fail.
*/
if (0 == core_availability_mask) {
*affinity = 0;
return false;
}
KBASE_DEBUG_ASSERT(js >= 0);
if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
BASE_JD_REQ_T) {
/* If the hardware supports XAFFINITY then we'll only enable
* the tiler (which is the default so this is a no-op),
* otherwise enable shader core 0. */
if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
*affinity = 1;
else
*affinity = 0;
return true;
}
if (1 == kbdev->gpu_props.num_cores) {
/* trivial case only one core, nothing to do */
*affinity = core_availability_mask &
kbdev->pm.debug_core_mask[js];
} else {
if ((core_req & (BASE_JD_REQ_COHERENT_GROUP |
BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) {
if (js == 0 || num_core_groups == 1) {
/* js[0] and single-core-group systems just get
* the first core group */
*affinity =
kbdev->gpu_props.props.coherency_info.group[0].core_mask
& core_availability_mask &
kbdev->pm.debug_core_mask[js];
} else {
/* js[1], js[2] use core groups 0, 1 for
* dual-core-group systems */
u32 core_group_idx = ((u32) js) - 1;
KBASE_DEBUG_ASSERT(core_group_idx <
num_core_groups);
*affinity =
kbdev->gpu_props.props.coherency_info.group[core_group_idx].core_mask
& core_availability_mask &
kbdev->pm.debug_core_mask[js];
/* If the job is specifically targeting core
* group 1 and the core availability policy is
* keeping that core group off, then fail */
if (*affinity == 0 && core_group_idx == 1 &&
kbdev->pm.backend.cg1_disabled
== true)
katom->event_code =
BASE_JD_EVENT_PM_EVENT;
}
} else {
/* All cores are available when no core split is
* required */
*affinity = core_availability_mask &
kbdev->pm.debug_core_mask[js];
}
}
/*
* If no cores are currently available in the desired core group(s)
* (core availability policy is transitioning) then fail.
*/
if (*affinity == 0)
return false;
/* Enable core 0 if tiler required for hardware without XAFFINITY
* support (notes above) */
if (core_req & BASE_JD_REQ_T) {
if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
*affinity = *affinity | 1;
}
return true;
}
static inline bool kbase_js_affinity_is_violating(
struct kbase_device *kbdev,
u64 *affinities)
{
/* This implementation checks whether the two slots involved in Generic
* thread creation have intersecting affinity. This is due to micro-
* architectural issues where a job in slot A targetting cores used by
* slot B could prevent the job in slot B from making progress until the
* job in slot A has completed.
*/
u64 affinity_set_left;
u64 affinity_set_right;
u64 intersection;
KBASE_DEBUG_ASSERT(affinities != NULL);
affinity_set_left = affinities[1];
affinity_set_right = affinities[2];
/* A violation occurs when any bit in the left_set is also in the
* right_set */
intersection = affinity_set_left & affinity_set_right;
return (bool) (intersection != (u64) 0u);
}
bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
u64 affinity)
{
struct kbasep_js_device_data *js_devdata;
u64 new_affinities[BASE_JM_MAX_NR_SLOTS];
KBASE_DEBUG_ASSERT(kbdev != NULL);
KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
js_devdata = &kbdev->js_data;
memcpy(new_affinities, js_devdata->runpool_irq.slot_affinities,
sizeof(js_devdata->runpool_irq.slot_affinities));
new_affinities[js] |= affinity;
return kbase_js_affinity_is_violating(kbdev, new_affinities);
}
void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
u64 affinity)
{
struct kbasep_js_device_data *js_devdata;
u64 cores;
KBASE_DEBUG_ASSERT(kbdev != NULL);
KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
js_devdata = &kbdev->js_data;
KBASE_DEBUG_ASSERT(kbase_js_affinity_would_violate(kbdev, js, affinity)
== false);
cores = affinity;
while (cores) {
int bitnum = fls64(cores) - 1;
u64 bit = 1ULL << bitnum;
s8 cnt;
cnt =
++(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
if (cnt == 1)
js_devdata->runpool_irq.slot_affinities[js] |= bit;
cores &= ~bit;
}
}
void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
u64 affinity)
{
struct kbasep_js_device_data *js_devdata;
u64 cores;
KBASE_DEBUG_ASSERT(kbdev != NULL);
KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
js_devdata = &kbdev->js_data;
cores = affinity;
while (cores) {
int bitnum = fls64(cores) - 1;
u64 bit = 1ULL << bitnum;
s8 cnt;
KBASE_DEBUG_ASSERT(
js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] > 0);
cnt =
--(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
if (0 == cnt)
js_devdata->runpool_irq.slot_affinities[js] &= ~bit;
cores &= ~bit;
}
}
#if KBASE_TRACE_ENABLE
void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
{
struct kbasep_js_device_data *js_devdata;
int slot_nr;
KBASE_DEBUG_ASSERT(kbdev != NULL);
js_devdata = &kbdev->js_data;
for (slot_nr = 0; slot_nr < 3; ++slot_nr)
KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_AFFINITY_CURRENT, NULL,
NULL, 0u, slot_nr,
(u32) js_devdata->runpool_irq.slot_affinities[slot_nr]);
}
#endif /* KBASE_TRACE_ENABLE */

View File

@@ -0,0 +1,134 @@
/*
*
* (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* Affinity Manager internal APIs.
*/
#ifndef _KBASE_JS_AFFINITY_H_
#define _KBASE_JS_AFFINITY_H_
/**
* kbase_js_can_run_job_on_slot_no_lock - Decide whether it is possible to
* submit a job to a particular job slot in the current status
*
* @kbdev: The kbase device structure of the device
* @js: Job slot number to check for allowance
*
* Will check if submitting to the given job slot is allowed in the current
* status. For example using job slot 2 while in soft-stoppable state and only
* having 1 coregroup is not allowed by the policy. This function should be
* called prior to submitting a job to a slot to make sure policy rules are not
* violated.
*
* The following locking conditions are made on the caller
* - it must hold hwaccess_lock
*/
bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, int js);
/**
* kbase_js_choose_affinity - Compute affinity for a given job.
*
* @affinity: Affinity bitmap computed
* @kbdev: The kbase device structure of the device
* @katom: Job chain of which affinity is going to be found
* @js: Slot the job chain is being submitted
*
* Currently assumes an all-on/all-off power management policy.
* Also assumes there is at least one core with tiler available.
*
* Returns true if a valid affinity was chosen, false if
* no cores were available.
*/
bool kbase_js_choose_affinity(u64 * const affinity,
struct kbase_device *kbdev,
struct kbase_jd_atom *katom,
int js);
/**
* kbase_js_affinity_would_violate - Determine whether a proposed affinity on
* job slot @js would cause a violation of affinity restrictions.
*
* @kbdev: Kbase device structure
* @js: The job slot to test
* @affinity: The affinity mask to test
*
* The following locks must be held by the caller
* - hwaccess_lock
*
* Return: true if the affinity would violate the restrictions
*/
bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
u64 affinity);
/**
* kbase_js_affinity_retain_slot_cores - Affinity tracking: retain cores used by
* a slot
*
* @kbdev: Kbase device structure
* @js: The job slot retaining the cores
* @affinity: The cores to retain
*
* The following locks must be held by the caller
* - hwaccess_lock
*/
void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
u64 affinity);
/**
* kbase_js_affinity_release_slot_cores - Affinity tracking: release cores used
* by a slot
*
* @kbdev: Kbase device structure
* @js: Job slot
* @affinity: Bit mask of core to be released
*
* Cores must be released as soon as a job is dequeued from a slot's 'submit
* slots', and before another job is submitted to those slots. Otherwise, the
* refcount could exceed the maximum number submittable to a slot,
* %BASE_JM_SUBMIT_SLOTS.
*
* The following locks must be held by the caller
* - hwaccess_lock
*/
void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
u64 affinity);
/**
* kbase_js_debug_log_current_affinities - log the current affinities
*
* @kbdev: Kbase device structure
*
* Output to the Trace log the current tracked affinities on all slots
*/
#if KBASE_TRACE_ENABLE
void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev);
#else /* KBASE_TRACE_ENABLE */
static inline void
kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
{
}
#endif /* KBASE_TRACE_ENABLE */
#endif /* _KBASE_JS_AFFINITY_H_ */

View File

@@ -0,0 +1,354 @@
/*
*
* (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* Register-based HW access backend specific job scheduler APIs
*/
#include <mali_kbase.h>
#include <mali_kbase_hwaccess_jm.h>
#include <backend/gpu/mali_kbase_jm_internal.h>
#include <backend/gpu/mali_kbase_js_internal.h>
/*
* Hold the runpool_mutex for this
*/
static inline bool timer_callback_should_run(struct kbase_device *kbdev)
{
struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
s8 nr_running_ctxs;
lockdep_assert_held(&kbdev->js_data.runpool_mutex);
/* Timer must stop if we are suspending */
if (backend->suspend_timer)
return false;
/* nr_contexts_pullable is updated with the runpool_mutex. However, the
* locking in the caller gives us a barrier that ensures
* nr_contexts_pullable is up-to-date for reading */
nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable);
#ifdef CONFIG_MALI_DEBUG
if (kbdev->js_data.softstop_always) {
/* Debug support for allowing soft-stop on a single context */
return true;
}
#endif /* CONFIG_MALI_DEBUG */
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) {
/* Timeouts would have to be 4x longer (due to micro-
* architectural design) to support OpenCL conformance tests, so
* only run the timer when there's:
* - 2 or more CL contexts
* - 1 or more GLES contexts
*
* NOTE: We will treat a context that has both Compute and Non-
* Compute jobs will be treated as an OpenCL context (hence, we
* don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE).
*/
{
s8 nr_compute_ctxs =
kbasep_js_ctx_attr_count_on_runpool(kbdev,
KBASEP_JS_CTX_ATTR_COMPUTE);
s8 nr_noncompute_ctxs = nr_running_ctxs -
nr_compute_ctxs;
return (bool) (nr_compute_ctxs >= 2 ||
nr_noncompute_ctxs > 0);
}
} else {
/* Run the timer callback whenever you have at least 1 context
*/
return (bool) (nr_running_ctxs > 0);
}
}
static enum hrtimer_restart timer_callback(struct hrtimer *timer)
{
unsigned long flags;
struct kbase_device *kbdev;
struct kbasep_js_device_data *js_devdata;
struct kbase_backend_data *backend;
int s;
bool reset_needed = false;
KBASE_DEBUG_ASSERT(timer != NULL);
backend = container_of(timer, struct kbase_backend_data,
scheduling_timer);
kbdev = container_of(backend, struct kbase_device, hwaccess.backend);
js_devdata = &kbdev->js_data;
/* Loop through the slots */
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) {
struct kbase_jd_atom *atom = NULL;
if (kbase_backend_nr_atoms_on_slot(kbdev, s) > 0) {
atom = kbase_gpu_inspect(kbdev, s, 0);
KBASE_DEBUG_ASSERT(atom != NULL);
}
if (atom != NULL) {
/* The current version of the model doesn't support
* Soft-Stop */
if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) {
u32 ticks = atom->ticks++;
#ifndef CONFIG_MALI_JOB_DUMP
u32 soft_stop_ticks, hard_stop_ticks,
gpu_reset_ticks;
if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
soft_stop_ticks =
js_devdata->soft_stop_ticks_cl;
hard_stop_ticks =
js_devdata->hard_stop_ticks_cl;
gpu_reset_ticks =
js_devdata->gpu_reset_ticks_cl;
} else {
soft_stop_ticks =
js_devdata->soft_stop_ticks;
hard_stop_ticks =
js_devdata->hard_stop_ticks_ss;
gpu_reset_ticks =
js_devdata->gpu_reset_ticks_ss;
}
/* If timeouts have been changed then ensure
* that atom tick count is not greater than the
* new soft_stop timeout. This ensures that
* atoms do not miss any of the timeouts due to
* races between this worker and the thread
* changing the timeouts. */
if (backend->timeouts_updated &&
ticks > soft_stop_ticks)
ticks = atom->ticks = soft_stop_ticks;
/* Job is Soft-Stoppable */
if (ticks == soft_stop_ticks) {
/* Job has been scheduled for at least
* js_devdata->soft_stop_ticks ticks.
* Soft stop the slot so we can run
* other jobs.
*/
#if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS
int disjoint_threshold =
KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD;
u32 softstop_flags = 0u;
dev_dbg(kbdev->dev, "Soft-stop");
/* nr_user_contexts_running is updated
* with the runpool_mutex, but we can't
* take that here.
*
* However, if it's about to be
* increased then the new context can't
* run any jobs until they take the
* hwaccess_lock, so it's OK to observe
* the older value.
*
* Similarly, if it's about to be
* decreased, the last job from another
* context has already finished, so it's
* not too bad that we observe the older
* value and register a disjoint event
* when we try soft-stopping */
if (js_devdata->nr_user_contexts_running
>= disjoint_threshold)
softstop_flags |=
JS_COMMAND_SW_CAUSES_DISJOINT;
kbase_job_slot_softstop_swflags(kbdev,
s, atom, softstop_flags);
#endif
} else if (ticks == hard_stop_ticks) {
/* Job has been scheduled for at least
* js_devdata->hard_stop_ticks_ss ticks.
* It should have been soft-stopped by
* now. Hard stop the slot.
*/
#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
int ms =
js_devdata->scheduling_period_ns
/ 1000000u;
dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
(unsigned long)ticks,
(unsigned long)ms);
kbase_job_slot_hardstop(atom->kctx, s,
atom);
#endif
} else if (ticks == gpu_reset_ticks) {
/* Job has been scheduled for at least
* js_devdata->gpu_reset_ticks_ss ticks.
* It should have left the GPU by now.
* Signal that the GPU needs to be
* reset.
*/
reset_needed = true;
}
#else /* !CONFIG_MALI_JOB_DUMP */
/* NOTE: During CONFIG_MALI_JOB_DUMP, we use
* the alternate timeouts, which makes the hard-
* stop and GPU reset timeout much longer. We
* also ensure that we don't soft-stop at all.
*/
if (ticks == js_devdata->soft_stop_ticks) {
/* Job has been scheduled for at least
* js_devdata->soft_stop_ticks. We do
* not soft-stop during
* CONFIG_MALI_JOB_DUMP, however.
*/
dev_dbg(kbdev->dev, "Soft-stop");
} else if (ticks ==
js_devdata->hard_stop_ticks_dumping) {
/* Job has been scheduled for at least
* js_devdata->hard_stop_ticks_dumping
* ticks. Hard stop the slot.
*/
#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
int ms =
js_devdata->scheduling_period_ns
/ 1000000u;
dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
(unsigned long)ticks,
(unsigned long)ms);
kbase_job_slot_hardstop(atom->kctx, s,
atom);
#endif
} else if (ticks ==
js_devdata->gpu_reset_ticks_dumping) {
/* Job has been scheduled for at least
* js_devdata->gpu_reset_ticks_dumping
* ticks. It should have left the GPU by
* now. Signal that the GPU needs to be
* reset.
*/
reset_needed = true;
}
#endif /* !CONFIG_MALI_JOB_DUMP */
}
}
}
#if KBASE_GPU_RESET_EN
if (reset_needed) {
dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issueing GPU soft-reset to resolve.");
if (kbase_prepare_to_reset_gpu_locked(kbdev))
kbase_reset_gpu_locked(kbdev);
}
#endif /* KBASE_GPU_RESET_EN */
/* the timer is re-issued if there is contexts in the run-pool */
if (backend->timer_running)
hrtimer_start(&backend->scheduling_timer,
HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
HRTIMER_MODE_REL);
backend->timeouts_updated = false;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
return HRTIMER_NORESTART;
}
void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
{
struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
unsigned long flags;
lockdep_assert_held(&js_devdata->runpool_mutex);
if (!timer_callback_should_run(kbdev)) {
/* Take spinlock to force synchronisation with timer */
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
backend->timer_running = false;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
/* From now on, return value of timer_callback_should_run() will
* also cause the timer to not requeue itself. Its return value
* cannot change, because it depends on variables updated with
* the runpool_mutex held, which the caller of this must also
* hold */
hrtimer_cancel(&backend->scheduling_timer);
}
if (timer_callback_should_run(kbdev) && !backend->timer_running) {
/* Take spinlock to force synchronisation with timer */
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
backend->timer_running = true;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
hrtimer_start(&backend->scheduling_timer,
HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
HRTIMER_MODE_REL);
KBASE_TRACE_ADD(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u,
0u);
}
}
int kbase_backend_timer_init(struct kbase_device *kbdev)
{
struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC,
HRTIMER_MODE_REL);
backend->scheduling_timer.function = timer_callback;
backend->timer_running = false;
return 0;
}
void kbase_backend_timer_term(struct kbase_device *kbdev)
{
struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
hrtimer_cancel(&backend->scheduling_timer);
}
void kbase_backend_timer_suspend(struct kbase_device *kbdev)
{
struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
backend->suspend_timer = true;
kbase_backend_ctx_count_changed(kbdev);
}
void kbase_backend_timer_resume(struct kbase_device *kbdev)
{
struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
backend->suspend_timer = false;
kbase_backend_ctx_count_changed(kbdev);
}
void kbase_backend_timeouts_changed(struct kbase_device *kbdev)
{
struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
backend->timeouts_updated = true;
}

View File

@@ -0,0 +1,74 @@
/*
*
* (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* Register-based HW access backend specific job scheduler APIs
*/
#ifndef _KBASE_JS_BACKEND_H_
#define _KBASE_JS_BACKEND_H_
/**
* kbase_backend_timer_init() - Initialise the JS scheduling timer
* @kbdev: Device pointer
*
* This function should be called at driver initialisation
*
* Return: 0 on success
*/
int kbase_backend_timer_init(struct kbase_device *kbdev);
/**
* kbase_backend_timer_term() - Terminate the JS scheduling timer
* @kbdev: Device pointer
*
* This function should be called at driver termination
*/
void kbase_backend_timer_term(struct kbase_device *kbdev);
/**
* kbase_backend_timer_suspend - Suspend is happening, stop the JS scheduling
* timer
* @kbdev: Device pointer
*
* This function should be called on suspend, after the active count has reached
* zero. This is required as the timer may have been started on job submission
* to the job scheduler, but before jobs are submitted to the GPU.
*
* Caller must hold runpool_mutex.
*/
void kbase_backend_timer_suspend(struct kbase_device *kbdev);
/**
* kbase_backend_timer_resume - Resume is happening, re-evaluate the JS
* scheduling timer
* @kbdev: Device pointer
*
* This function should be called on resume. Note that is is not guaranteed to
* re-start the timer, only evalute whether it should be re-started.
*
* Caller must hold runpool_mutex.
*/
void kbase_backend_timer_resume(struct kbase_device *kbdev);
#endif /* _KBASE_JS_BACKEND_H_ */

View File

@@ -0,0 +1,405 @@
/*
*
* (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#include <linux/bitops.h>
#include <mali_kbase.h>
#include <mali_kbase_mem.h>
#include <mali_kbase_mmu_hw.h>
#include <mali_kbase_tlstream.h>
#include <backend/gpu/mali_kbase_device_internal.h>
#include <mali_kbase_as_fault_debugfs.h>
static inline u64 lock_region(struct kbase_device *kbdev, u64 pfn,
u32 num_pages)
{
u64 region;
/* can't lock a zero sized range */
KBASE_DEBUG_ASSERT(num_pages);
region = pfn << PAGE_SHIFT;
/*
* fls returns (given the ASSERT above):
* 1 .. 32
*
* 10 + fls(num_pages)
* results in the range (11 .. 42)
*/
/* gracefully handle num_pages being zero */
if (0 == num_pages) {
region |= 11;
} else {
u8 region_width;
region_width = 10 + fls(num_pages);
if (num_pages != (1ul << (region_width - 11))) {
/* not pow2, so must go up to the next pow2 */
region_width += 1;
}
KBASE_DEBUG_ASSERT(region_width <= KBASE_LOCK_REGION_MAX_SIZE);
KBASE_DEBUG_ASSERT(region_width >= KBASE_LOCK_REGION_MIN_SIZE);
region |= region_width;
}
return region;
}
static int wait_ready(struct kbase_device *kbdev,
unsigned int as_nr, struct kbase_context *kctx)
{
unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx);
/* Wait for the MMU status to indicate there is no active command, in
* case one is pending. Do not log remaining register accesses. */
while (--max_loops && (val & AS_STATUS_AS_ACTIVE))
val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), NULL);
if (max_loops == 0) {
dev_err(kbdev->dev, "AS_ACTIVE bit stuck\n");
return -1;
}
/* If waiting in loop was performed, log last read value. */
if (KBASE_AS_INACTIVE_MAX_LOOPS - 1 > max_loops)
kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx);
return 0;
}
static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd,
struct kbase_context *kctx)
{
int status;
/* write AS_COMMAND when MMU is ready to accept another command */
status = wait_ready(kbdev, as_nr, kctx);
if (status == 0)
kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd,
kctx);
return status;
}
static void validate_protected_page_fault(struct kbase_device *kbdev,
struct kbase_context *kctx)
{
/* GPUs which support (native) protected mode shall not report page
* fault addresses unless it has protected debug mode and protected
* debug mode is turned on */
u32 protected_debug_mode = 0;
if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE))
return;
if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
protected_debug_mode = kbase_reg_read(kbdev,
GPU_CONTROL_REG(GPU_STATUS),
kctx) & GPU_DBGEN;
}
if (!protected_debug_mode) {
/* fault_addr should never be reported in protected mode.
* However, we just continue by printing an error message */
dev_err(kbdev->dev, "Fault address reported in protected mode\n");
}
}
void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
{
const int num_as = 16;
const int busfault_shift = MMU_PAGE_FAULT_FLAGS;
const int pf_shift = 0;
const unsigned long as_bit_mask = (1UL << num_as) - 1;
unsigned long flags;
u32 new_mask;
u32 tmp;
/* bus faults */
u32 bf_bits = (irq_stat >> busfault_shift) & as_bit_mask;
/* page faults (note: Ignore ASes with both pf and bf) */
u32 pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits;
KBASE_DEBUG_ASSERT(NULL != kbdev);
/* remember current mask */
spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
/* mask interrupts for now */
kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
while (bf_bits | pf_bits) {
struct kbase_as *as;
int as_no;
struct kbase_context *kctx;
/*
* the while logic ensures we have a bit set, no need to check
* for not-found here
*/
as_no = ffs(bf_bits | pf_bits) - 1;
as = &kbdev->as[as_no];
/*
* Refcount the kctx ASAP - it shouldn't disappear anyway, since
* Bus/Page faults _should_ only occur whilst jobs are running,
* and a job causing the Bus/Page fault shouldn't complete until
* the MMU is updated
*/
kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no);
/* find faulting address */
as->fault_addr = kbase_reg_read(kbdev,
MMU_AS_REG(as_no,
AS_FAULTADDRESS_HI),
kctx);
as->fault_addr <<= 32;
as->fault_addr |= kbase_reg_read(kbdev,
MMU_AS_REG(as_no,
AS_FAULTADDRESS_LO),
kctx);
/* Mark the fault protected or not */
as->protected_mode = kbdev->protected_mode;
if (kbdev->protected_mode && as->fault_addr) {
/* check if address reporting is allowed */
validate_protected_page_fault(kbdev, kctx);
}
/* report the fault to debugfs */
kbase_as_fault_debugfs_new(kbdev, as_no);
/* record the fault status */
as->fault_status = kbase_reg_read(kbdev,
MMU_AS_REG(as_no,
AS_FAULTSTATUS),
kctx);
/* find the fault type */
as->fault_type = (bf_bits & (1 << as_no)) ?
KBASE_MMU_FAULT_TYPE_BUS :
KBASE_MMU_FAULT_TYPE_PAGE;
if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) {
as->fault_extra_addr = kbase_reg_read(kbdev,
MMU_AS_REG(as_no, AS_FAULTEXTRA_HI),
kctx);
as->fault_extra_addr <<= 32;
as->fault_extra_addr |= kbase_reg_read(kbdev,
MMU_AS_REG(as_no, AS_FAULTEXTRA_LO),
kctx);
}
if (kbase_as_has_bus_fault(as)) {
/* Mark bus fault as handled.
* Note that a bus fault is processed first in case
* where both a bus fault and page fault occur.
*/
bf_bits &= ~(1UL << as_no);
/* remove the queued BF (and PF) from the mask */
new_mask &= ~(MMU_BUS_ERROR(as_no) |
MMU_PAGE_FAULT(as_no));
} else {
/* Mark page fault as handled */
pf_bits &= ~(1UL << as_no);
/* remove the queued PF from the mask */
new_mask &= ~MMU_PAGE_FAULT(as_no);
}
/* Process the interrupt for this address space */
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbase_mmu_interrupt_process(kbdev, kctx, as);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
/* reenable interrupts */
spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
new_mask |= tmp;
kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask, NULL);
spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
}
void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as,
struct kbase_context *kctx)
{
struct kbase_mmu_setup *current_setup = &as->current_setup;
u32 transcfg = 0;
if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) {
transcfg = current_setup->transcfg & 0xFFFFFFFFUL;
/* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK */
/* Clear PTW_MEMATTR bits */
transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK;
/* Enable correct PTW_MEMATTR bits */
transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK;
if (kbdev->system_coherency == COHERENCY_ACE) {
/* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) */
/* Clear PTW_SH bits */
transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK);
/* Enable correct PTW_SH bits */
transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS);
}
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO),
transcfg, kctx);
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI),
(current_setup->transcfg >> 32) & 0xFFFFFFFFUL,
kctx);
} else {
if (kbdev->system_coherency == COHERENCY_ACE)
current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER;
}
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO),
current_setup->transtab & 0xFFFFFFFFUL, kctx);
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI),
(current_setup->transtab >> 32) & 0xFFFFFFFFUL, kctx);
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO),
current_setup->memattr & 0xFFFFFFFFUL, kctx);
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI),
(current_setup->memattr >> 32) & 0xFFFFFFFFUL, kctx);
KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(as,
current_setup->transtab,
current_setup->memattr,
transcfg);
write_cmd(kbdev, as->number, AS_COMMAND_UPDATE, kctx);
}
int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
struct kbase_context *kctx, u64 vpfn, u32 nr, u32 op,
unsigned int handling_irq)
{
int ret;
lockdep_assert_held(&kbdev->mmu_hw_mutex);
if (op == AS_COMMAND_UNLOCK) {
/* Unlock doesn't require a lock first */
ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
} else {
u64 lock_addr = lock_region(kbdev, vpfn, nr);
/* Lock the region that needs to be updated */
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO),
lock_addr & 0xFFFFFFFFUL, kctx);
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI),
(lock_addr >> 32) & 0xFFFFFFFFUL, kctx);
write_cmd(kbdev, as->number, AS_COMMAND_LOCK, kctx);
/* Run the MMU operation */
write_cmd(kbdev, as->number, op, kctx);
/* Wait for the flush to complete */
ret = wait_ready(kbdev, as->number, kctx);
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9630)) {
/* Issue an UNLOCK command to ensure that valid page
tables are re-read by the GPU after an update.
Note that, the FLUSH command should perform all the
actions necessary, however the bus logs show that if
multiple page faults occur within an 8 page region
the MMU does not always re-read the updated page
table entries for later faults or is only partially
read, it subsequently raises the page fault IRQ for
the same addresses, the unlock ensures that the MMU
cache is flushed, so updates can be re-read. As the
region is now unlocked we need to issue 2 UNLOCK
commands in order to flush the MMU/uTLB,
see PRLAM-8812.
*/
write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
}
}
return ret;
}
void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
struct kbase_context *kctx, enum kbase_mmu_fault_type type)
{
unsigned long flags;
u32 pf_bf_mask;
spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
/*
* A reset is in-flight and we're flushing the IRQ + bottom half
* so don't update anything as it could race with the reset code.
*/
if (kbdev->irq_reset_flush)
goto unlock;
/* Clear the page (and bus fault IRQ as well in case one occurred) */
pf_bf_mask = MMU_PAGE_FAULT(as->number);
if (type == KBASE_MMU_FAULT_TYPE_BUS ||
type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
pf_bf_mask |= MMU_BUS_ERROR(as->number);
kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask, kctx);
unlock:
spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
}
void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
struct kbase_context *kctx, enum kbase_mmu_fault_type type)
{
unsigned long flags;
u32 irq_mask;
/* Enable the page fault IRQ (and bus fault IRQ as well in case one
* occurred) */
spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
/*
* A reset is in-flight and we're flushing the IRQ + bottom half
* so don't update anything as it could race with the reset code.
*/
if (kbdev->irq_reset_flush)
goto unlock;
irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), kctx) |
MMU_PAGE_FAULT(as->number);
if (type == KBASE_MMU_FAULT_TYPE_BUS ||
type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
irq_mask |= MMU_BUS_ERROR(as->number);
kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask, kctx);
unlock:
spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
}

View File

@@ -0,0 +1,47 @@
/*
*
* (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* Interface file for the direct implementation for MMU hardware access
*
* Direct MMU hardware interface
*
* This module provides the interface(s) that are required by the direct
* register access implementation of the MMU hardware interface
*/
#ifndef _MALI_KBASE_MMU_HW_DIRECT_H_
#define _MALI_KBASE_MMU_HW_DIRECT_H_
#include <mali_kbase_defs.h>
/**
* kbase_mmu_interrupt - Process an MMU interrupt.
*
* Process the MMU interrupt that was reported by the &kbase_device.
*
* @kbdev: kbase context to clear the fault from.
* @irq_stat: Value of the MMU_IRQ_STATUS register
*/
void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
#endif /* _MALI_KBASE_MMU_HW_DIRECT_H_ */

View File

@@ -0,0 +1,68 @@
/*
*
* (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* "Always on" power management policy
*/
#include <mali_kbase.h>
#include <mali_kbase_pm.h>
static u64 always_on_get_core_mask(struct kbase_device *kbdev)
{
return kbdev->gpu_props.props.raw_props.shader_present;
}
static bool always_on_get_core_active(struct kbase_device *kbdev)
{
return true;
}
static void always_on_init(struct kbase_device *kbdev)
{
CSTD_UNUSED(kbdev);
}
static void always_on_term(struct kbase_device *kbdev)
{
CSTD_UNUSED(kbdev);
}
/*
* The struct kbase_pm_policy structure for the demand power policy.
*
* This is the static structure that defines the demand power policy's callback
* and name.
*/
const struct kbase_pm_policy kbase_pm_always_on_policy_ops = {
"always_on", /* name */
always_on_init, /* init */
always_on_term, /* term */
always_on_get_core_mask, /* get_core_mask */
always_on_get_core_active, /* get_core_active */
0u, /* flags */
KBASE_PM_POLICY_ID_ALWAYS_ON, /* id */
};
KBASE_EXPORT_TEST_API(kbase_pm_always_on_policy_ops);

View File

@@ -0,0 +1,82 @@
/*
*
* (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* "Always on" power management policy
*/
#ifndef MALI_KBASE_PM_ALWAYS_ON_H
#define MALI_KBASE_PM_ALWAYS_ON_H
/**
* DOC:
* The "Always on" power management policy has the following
* characteristics:
*
* - When KBase indicates that the GPU will be powered up, but we don't yet
* know which Job Chains are to be run:
* All Shader Cores are powered up, regardless of whether or not they will
* be needed later.
*
* - When KBase indicates that a set of Shader Cores are needed to submit the
* currently queued Job Chains:
* All Shader Cores are kept powered, regardless of whether or not they will
* be needed
*
* - When KBase indicates that the GPU need not be powered:
* The Shader Cores are kept powered, regardless of whether or not they will
* be needed. The GPU itself is also kept powered, even though it is not
* needed.
*
* This policy is automatically overridden during system suspend: the desired
* core state is ignored, and the cores are forced off regardless of what the
* policy requests. After resuming from suspend, new changes to the desired
* core state made by the policy are honored.
*
* Note:
*
* - KBase indicates the GPU will be powered up when it has a User Process that
* has just started to submit Job Chains.
*
* - KBase indicates the GPU need not be powered when all the Job Chains from
* User Processes have finished, and it is waiting for a User Process to
* submit some more Job Chains.
*/
/**
* struct kbasep_pm_policy_always_on - Private struct for policy instance data
* @dummy: unused dummy variable
*
* This contains data that is private to the particular power policy that is
* active.
*/
struct kbasep_pm_policy_always_on {
int dummy;
};
extern const struct kbase_pm_policy kbase_pm_always_on_policy_ops;
#endif /* MALI_KBASE_PM_ALWAYS_ON_H */

View File

@@ -0,0 +1,504 @@
/*
*
* (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* GPU backend implementation of base kernel power management APIs
*/
#include <mali_kbase.h>
#include <mali_midg_regmap.h>
#include <mali_kbase_config_defaults.h>
#include <mali_kbase_pm.h>
#include <mali_kbase_hwaccess_jm.h>
#include <backend/gpu/mali_kbase_js_internal.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
#include <backend/gpu/mali_kbase_jm_internal.h>
static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data);
int kbase_pm_runtime_init(struct kbase_device *kbdev)
{
struct kbase_pm_callback_conf *callbacks;
callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
if (callbacks) {
kbdev->pm.backend.callback_power_on =
callbacks->power_on_callback;
kbdev->pm.backend.callback_power_off =
callbacks->power_off_callback;
kbdev->pm.backend.callback_power_suspend =
callbacks->power_suspend_callback;
kbdev->pm.backend.callback_power_resume =
callbacks->power_resume_callback;
kbdev->pm.callback_power_runtime_init =
callbacks->power_runtime_init_callback;
kbdev->pm.callback_power_runtime_term =
callbacks->power_runtime_term_callback;
kbdev->pm.backend.callback_power_runtime_on =
callbacks->power_runtime_on_callback;
kbdev->pm.backend.callback_power_runtime_off =
callbacks->power_runtime_off_callback;
kbdev->pm.backend.callback_power_runtime_idle =
callbacks->power_runtime_idle_callback;
if (callbacks->power_runtime_init_callback)
return callbacks->power_runtime_init_callback(kbdev);
else
return 0;
}
kbdev->pm.backend.callback_power_on = NULL;
kbdev->pm.backend.callback_power_off = NULL;
kbdev->pm.backend.callback_power_suspend = NULL;
kbdev->pm.backend.callback_power_resume = NULL;
kbdev->pm.callback_power_runtime_init = NULL;
kbdev->pm.callback_power_runtime_term = NULL;
kbdev->pm.backend.callback_power_runtime_on = NULL;
kbdev->pm.backend.callback_power_runtime_off = NULL;
kbdev->pm.backend.callback_power_runtime_idle = NULL;
return 0;
}
void kbase_pm_runtime_term(struct kbase_device *kbdev)
{
if (kbdev->pm.callback_power_runtime_term) {
kbdev->pm.callback_power_runtime_term(kbdev);
}
}
void kbase_pm_register_access_enable(struct kbase_device *kbdev)
{
struct kbase_pm_callback_conf *callbacks;
callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
if (callbacks)
callbacks->power_on_callback(kbdev);
kbdev->pm.backend.gpu_powered = true;
}
void kbase_pm_register_access_disable(struct kbase_device *kbdev)
{
struct kbase_pm_callback_conf *callbacks;
callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
if (callbacks)
callbacks->power_off_callback(kbdev);
kbdev->pm.backend.gpu_powered = false;
}
int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
{
int ret = 0;
KBASE_DEBUG_ASSERT(kbdev != NULL);
mutex_init(&kbdev->pm.lock);
kbdev->pm.backend.gpu_poweroff_wait_wq = alloc_workqueue("kbase_pm_poweroff_wait",
WQ_HIGHPRI | WQ_UNBOUND, 1);
if (!kbdev->pm.backend.gpu_poweroff_wait_wq)
return -ENOMEM;
INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work,
kbase_pm_gpu_poweroff_wait_wq);
kbdev->pm.backend.gpu_powered = false;
kbdev->pm.suspending = false;
#ifdef CONFIG_MALI_DEBUG
kbdev->pm.backend.driver_ready_for_irqs = false;
#endif /* CONFIG_MALI_DEBUG */
kbdev->pm.backend.gpu_in_desired_state = true;
init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait);
/* Initialise the metrics subsystem */
ret = kbasep_pm_metrics_init(kbdev);
if (ret)
return ret;
init_waitqueue_head(&kbdev->pm.backend.l2_powered_wait);
kbdev->pm.backend.l2_powered = 0;
init_waitqueue_head(&kbdev->pm.backend.reset_done_wait);
kbdev->pm.backend.reset_done = false;
init_waitqueue_head(&kbdev->pm.zero_active_count_wait);
kbdev->pm.active_count = 0;
spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock);
spin_lock_init(&kbdev->pm.backend.gpu_powered_lock);
init_waitqueue_head(&kbdev->pm.backend.poweroff_wait);
if (kbase_pm_ca_init(kbdev) != 0)
goto workq_fail;
if (kbase_pm_policy_init(kbdev) != 0)
goto pm_policy_fail;
return 0;
pm_policy_fail:
kbase_pm_ca_term(kbdev);
workq_fail:
kbasep_pm_metrics_term(kbdev);
return -EINVAL;
}
void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume)
{
lockdep_assert_held(&kbdev->pm.lock);
/* Turn clocks and interrupts on - no-op if we haven't done a previous
* kbase_pm_clock_off() */
kbase_pm_clock_on(kbdev, is_resume);
/* Update core status as required by the policy */
KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START);
kbase_pm_update_cores_state(kbdev);
KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END);
/* NOTE: We don't wait to reach the desired state, since running atoms
* will wait for that state to be reached anyway */
}
static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data)
{
struct kbase_device *kbdev = container_of(data, struct kbase_device,
pm.backend.gpu_poweroff_wait_work);
struct kbase_pm_device_data *pm = &kbdev->pm;
struct kbase_pm_backend_data *backend = &pm->backend;
struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
unsigned long flags;
#if !PLATFORM_POWER_DOWN_ONLY
/* Wait for power transitions to complete. We do this with no locks held
* so that we don't deadlock with any pending workqueues */
KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START);
kbase_pm_check_transitions_sync(kbdev);
KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END);
#endif /* !PLATFORM_POWER_DOWN_ONLY */
mutex_lock(&js_devdata->runpool_mutex);
mutex_lock(&kbdev->pm.lock);
#if PLATFORM_POWER_DOWN_ONLY
if (kbdev->pm.backend.gpu_powered) {
if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2)) {
/* If L2 cache is powered then we must flush it before
* we power off the GPU. Normally this would have been
* handled when the L2 was powered off. */
kbase_gpu_cacheclean(kbdev);
}
}
#endif /* PLATFORM_POWER_DOWN_ONLY */
if (!backend->poweron_required) {
#if !PLATFORM_POWER_DOWN_ONLY
unsigned long flags;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
WARN_ON(kbdev->l2_available_bitmap ||
kbdev->shader_available_bitmap ||
kbdev->tiler_available_bitmap);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
#endif /* !PLATFORM_POWER_DOWN_ONLY */
/* Consume any change-state events */
kbase_timeline_pm_check_handle_event(kbdev,
KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
/* Disable interrupts and turn the clock off */
if (!kbase_pm_clock_off(kbdev, backend->poweroff_is_suspend)) {
/*
* Page/bus faults are pending, must drop locks to
* process. Interrupts are disabled so no more faults
* should be generated at this point.
*/
mutex_unlock(&kbdev->pm.lock);
mutex_unlock(&js_devdata->runpool_mutex);
kbase_flush_mmu_wqs(kbdev);
mutex_lock(&js_devdata->runpool_mutex);
mutex_lock(&kbdev->pm.lock);
/* Turn off clock now that fault have been handled. We
* dropped locks so poweron_required may have changed -
* power back on if this is the case (effectively only
* re-enabling of the interrupts would be done in this
* case, as the clocks to GPU were not withdrawn yet).
*/
if (backend->poweron_required)
kbase_pm_clock_on(kbdev, false);
else
WARN_ON(!kbase_pm_clock_off(kbdev,
backend->poweroff_is_suspend));
}
}
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
backend->poweroff_wait_in_progress = false;
if (backend->poweron_required) {
backend->poweron_required = false;
kbase_pm_update_cores_state_nolock(kbdev);
kbase_backend_slot_update(kbdev);
}
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
mutex_unlock(&kbdev->pm.lock);
mutex_unlock(&js_devdata->runpool_mutex);
wake_up(&kbdev->pm.backend.poweroff_wait);
}
void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend)
{
unsigned long flags;
lockdep_assert_held(&kbdev->pm.lock);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
if (!kbdev->pm.backend.poweroff_wait_in_progress) {
/* Force all cores off */
kbdev->pm.backend.desired_shader_state = 0;
kbdev->pm.backend.desired_tiler_state = 0;
/* Force all cores to be unavailable, in the situation where
* transitions are in progress for some cores but not others,
* and kbase_pm_check_transitions_nolock can not immediately
* power off the cores */
kbdev->shader_available_bitmap = 0;
kbdev->tiler_available_bitmap = 0;
kbdev->l2_available_bitmap = 0;
kbdev->pm.backend.poweroff_wait_in_progress = true;
kbdev->pm.backend.poweroff_is_suspend = is_suspend;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
/*Kick off wq here. Callers will have to wait*/
queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq,
&kbdev->pm.backend.gpu_poweroff_wait_work);
} else {
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
}
static bool is_poweroff_in_progress(struct kbase_device *kbdev)
{
bool ret;
unsigned long flags;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
ret = (kbdev->pm.backend.poweroff_wait_in_progress == false);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
return ret;
}
void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev)
{
wait_event_killable(kbdev->pm.backend.poweroff_wait,
is_poweroff_in_progress(kbdev));
}
int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
unsigned int flags)
{
struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
unsigned long irq_flags;
int ret;
KBASE_DEBUG_ASSERT(kbdev != NULL);
mutex_lock(&js_devdata->runpool_mutex);
mutex_lock(&kbdev->pm.lock);
/* A suspend won't happen during startup/insmod */
KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
/* Power up the GPU, don't enable IRQs as we are not ready to receive
* them. */
ret = kbase_pm_init_hw(kbdev, flags);
if (ret) {
mutex_unlock(&kbdev->pm.lock);
mutex_unlock(&js_devdata->runpool_mutex);
return ret;
}
kbasep_pm_init_core_use_bitmaps(kbdev);
kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] =
kbdev->pm.debug_core_mask[1] =
kbdev->pm.debug_core_mask[2] =
kbdev->gpu_props.props.raw_props.shader_present;
/* Pretend the GPU is active to prevent a power policy turning the GPU
* cores off */
kbdev->pm.active_count = 1;
spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
irq_flags);
/* Ensure cycle counter is off */
kbdev->pm.backend.gpu_cycle_counter_requests = 0;
spin_unlock_irqrestore(
&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
irq_flags);
/* We are ready to receive IRQ's now as power policy is set up, so
* enable them now. */
#ifdef CONFIG_MALI_DEBUG
spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
kbdev->pm.backend.driver_ready_for_irqs = true;
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
#endif
kbase_pm_enable_interrupts(kbdev);
/* Turn on the GPU and any cores needed by the policy */
kbase_pm_do_poweron(kbdev, false);
mutex_unlock(&kbdev->pm.lock);
mutex_unlock(&js_devdata->runpool_mutex);
/* Idle the GPU and/or cores, if the policy wants it to */
kbase_pm_context_idle(kbdev);
return 0;
}
void kbase_hwaccess_pm_halt(struct kbase_device *kbdev)
{
KBASE_DEBUG_ASSERT(kbdev != NULL);
mutex_lock(&kbdev->pm.lock);
kbase_pm_cancel_deferred_poweroff(kbdev);
kbase_pm_do_poweroff(kbdev, false);
mutex_unlock(&kbdev->pm.lock);
}
KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt);
void kbase_hwaccess_pm_term(struct kbase_device *kbdev)
{
KBASE_DEBUG_ASSERT(kbdev != NULL);
KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0);
KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests == 0);
/* Free any resources the policy allocated */
kbase_pm_policy_term(kbdev);
kbase_pm_ca_term(kbdev);
/* Shut down the metrics subsystem */
kbasep_pm_metrics_term(kbdev);
destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wait_wq);
}
void kbase_pm_power_changed(struct kbase_device *kbdev)
{
bool cores_are_available;
unsigned long flags;
KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END);
if (cores_are_available) {
/* Log timelining information that a change in state has
* completed */
kbase_timeline_pm_handle_event(kbdev,
KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
kbase_backend_slot_update(kbdev);
}
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
u64 new_core_mask_js0, u64 new_core_mask_js1,
u64 new_core_mask_js2)
{
kbdev->pm.debug_core_mask[0] = new_core_mask_js0;
kbdev->pm.debug_core_mask[1] = new_core_mask_js1;
kbdev->pm.debug_core_mask[2] = new_core_mask_js2;
kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 |
new_core_mask_js2;
kbase_pm_update_cores_state_nolock(kbdev);
}
void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev)
{
kbase_pm_update_active(kbdev);
}
void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev)
{
kbase_pm_update_active(kbdev);
}
void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
{
struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
/* Force power off the GPU and all cores (regardless of policy), only
* after the PM active count reaches zero (otherwise, we risk turning it
* off prematurely) */
mutex_lock(&js_devdata->runpool_mutex);
mutex_lock(&kbdev->pm.lock);
kbase_pm_cancel_deferred_poweroff(kbdev);
kbase_pm_do_poweroff(kbdev, true);
kbase_backend_timer_suspend(kbdev);
mutex_unlock(&kbdev->pm.lock);
mutex_unlock(&js_devdata->runpool_mutex);
kbase_pm_wait_for_poweroff_complete(kbdev);
}
void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
{
struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
mutex_lock(&js_devdata->runpool_mutex);
mutex_lock(&kbdev->pm.lock);
kbdev->pm.suspending = false;
kbase_pm_do_poweron(kbdev, true);
kbase_backend_timer_resume(kbdev);
mutex_unlock(&kbdev->pm.lock);
mutex_unlock(&js_devdata->runpool_mutex);
}

View File

@@ -0,0 +1,187 @@
/*
*
* (C) COPYRIGHT 2013-2017 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* Base kernel core availability APIs
*/
#include <mali_kbase.h>
#include <mali_kbase_pm.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
static const struct kbase_pm_ca_policy *const policy_list[] = {
&kbase_pm_ca_fixed_policy_ops,
#ifdef CONFIG_MALI_DEVFREQ
&kbase_pm_ca_devfreq_policy_ops,
#endif
#if !MALI_CUSTOMER_RELEASE
&kbase_pm_ca_random_policy_ops
#endif
};
/**
* POLICY_COUNT - The number of policies available in the system.
*
* This is derived from the number of functions listed in policy_list.
*/
#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
int kbase_pm_ca_init(struct kbase_device *kbdev)
{
KBASE_DEBUG_ASSERT(kbdev != NULL);
kbdev->pm.backend.ca_current_policy = policy_list[0];
kbdev->pm.backend.ca_current_policy->init(kbdev);
return 0;
}
void kbase_pm_ca_term(struct kbase_device *kbdev)
{
kbdev->pm.backend.ca_current_policy->term(kbdev);
}
int kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **list)
{
if (!list)
return POLICY_COUNT;
*list = policy_list;
return POLICY_COUNT;
}
KBASE_EXPORT_TEST_API(kbase_pm_ca_list_policies);
const struct kbase_pm_ca_policy
*kbase_pm_ca_get_policy(struct kbase_device *kbdev)
{
KBASE_DEBUG_ASSERT(kbdev != NULL);
return kbdev->pm.backend.ca_current_policy;
}
KBASE_EXPORT_TEST_API(kbase_pm_ca_get_policy);
void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
const struct kbase_pm_ca_policy *new_policy)
{
const struct kbase_pm_ca_policy *old_policy;
unsigned long flags;
KBASE_DEBUG_ASSERT(kbdev != NULL);
KBASE_DEBUG_ASSERT(new_policy != NULL);
KBASE_TRACE_ADD(kbdev, PM_CA_SET_POLICY, NULL, NULL, 0u,
new_policy->id);
/* During a policy change we pretend the GPU is active */
/* A suspend won't happen here, because we're in a syscall from a
* userspace thread */
kbase_pm_context_active(kbdev);
mutex_lock(&kbdev->pm.lock);
/* Remove the policy to prevent IRQ handlers from working on it */
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
old_policy = kbdev->pm.backend.ca_current_policy;
kbdev->pm.backend.ca_current_policy = NULL;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
if (old_policy->term)
old_policy->term(kbdev);
if (new_policy->init)
new_policy->init(kbdev);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbdev->pm.backend.ca_current_policy = new_policy;
/* If any core power state changes were previously attempted, but
* couldn't be made because the policy was changing (current_policy was
* NULL), then re-try them here. */
kbase_pm_update_cores_state_nolock(kbdev);
kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
kbdev->shader_ready_bitmap,
kbdev->shader_transitioning_bitmap);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
mutex_unlock(&kbdev->pm.lock);
/* Now the policy change is finished, we release our fake context active
* reference */
kbase_pm_context_idle(kbdev);
}
KBASE_EXPORT_TEST_API(kbase_pm_ca_set_policy);
u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev)
{
lockdep_assert_held(&kbdev->hwaccess_lock);
/* All cores must be enabled when instrumentation is in use */
if (kbdev->pm.backend.instr_enabled)
return kbdev->gpu_props.props.raw_props.shader_present &
kbdev->pm.debug_core_mask_all;
if (kbdev->pm.backend.ca_current_policy == NULL)
return kbdev->gpu_props.props.raw_props.shader_present &
kbdev->pm.debug_core_mask_all;
return kbdev->pm.backend.ca_current_policy->get_core_mask(kbdev) &
kbdev->pm.debug_core_mask_all;
}
KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask);
void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
u64 cores_transitioning)
{
lockdep_assert_held(&kbdev->hwaccess_lock);
if (kbdev->pm.backend.ca_current_policy != NULL)
kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
cores_ready,
cores_transitioning);
}
void kbase_pm_ca_instr_enable(struct kbase_device *kbdev)
{
unsigned long flags;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbdev->pm.backend.instr_enabled = true;
kbase_pm_update_cores_state_nolock(kbdev);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
void kbase_pm_ca_instr_disable(struct kbase_device *kbdev)
{
lockdep_assert_held(&kbdev->hwaccess_lock);
kbdev->pm.backend.instr_enabled = false;
kbase_pm_update_cores_state_nolock(kbdev);
}

View File

@@ -0,0 +1,97 @@
/*
*
* (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* Base kernel core availability APIs
*/
#ifndef _KBASE_PM_CA_H_
#define _KBASE_PM_CA_H_
/**
* kbase_pm_ca_init - Initialize core availability framework
*
* Must be called before calling any other core availability function
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Return: 0 if the core availability framework was successfully initialized,
* -errno otherwise
*/
int kbase_pm_ca_init(struct kbase_device *kbdev);
/**
* kbase_pm_ca_term - Terminate core availability framework
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_ca_term(struct kbase_device *kbdev);
/**
* kbase_pm_ca_get_core_mask - Get currently available shaders core mask
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Returns a mask of the currently available shader cores.
* Calls into the core availability policy
*
* Return: The bit mask of available cores
*/
u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev);
/**
* kbase_pm_ca_update_core_status - Update core status
*
* @kbdev: The kbase device structure for the device (must be
* a valid pointer)
* @cores_ready: The bit mask of cores ready for job submission
* @cores_transitioning: The bit mask of cores that are transitioning power
* state
*
* Update core availability policy with current core power status
*
* Calls into the core availability policy
*/
void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
u64 cores_transitioning);
/**
* kbase_pm_ca_instr_enable - Enable override for instrumentation
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* This overrides the output of the core availability policy, ensuring that all
* cores are available
*/
void kbase_pm_ca_instr_enable(struct kbase_device *kbdev);
/**
* kbase_pm_ca_instr_disable - Disable override for instrumentation
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* This disables any previously enabled override, and resumes normal policy
* functionality
*/
void kbase_pm_ca_instr_disable(struct kbase_device *kbdev);
#endif /* _KBASE_PM_CA_H_ */

View File

@@ -0,0 +1,134 @@
/*
*
* (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* A core availability policy implementing core mask selection from devfreq OPPs
*
*/
#include <mali_kbase.h>
#include <mali_kbase_pm.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
#include <linux/version.h>
void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
{
struct kbasep_pm_ca_policy_devfreq *data =
&kbdev->pm.backend.ca_policy_data.devfreq;
unsigned long flags;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
data->cores_desired = core_mask;
/* Disable any cores that are now unwanted */
data->cores_enabled &= data->cores_desired;
kbdev->pm.backend.ca_in_transition = true;
/* If there are no cores to be powered off then power on desired cores
*/
if (!(data->cores_used & ~data->cores_desired)) {
data->cores_enabled = data->cores_desired;
kbdev->pm.backend.ca_in_transition = false;
}
kbase_pm_update_cores_state_nolock(kbdev);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX %llX\n",
data->cores_desired, data->cores_enabled);
}
static void devfreq_init(struct kbase_device *kbdev)
{
struct kbasep_pm_ca_policy_devfreq *data =
&kbdev->pm.backend.ca_policy_data.devfreq;
if (kbdev->current_core_mask) {
data->cores_enabled = kbdev->current_core_mask;
data->cores_desired = kbdev->current_core_mask;
} else {
data->cores_enabled =
kbdev->gpu_props.props.raw_props.shader_present;
data->cores_desired =
kbdev->gpu_props.props.raw_props.shader_present;
}
data->cores_used = 0;
kbdev->pm.backend.ca_in_transition = false;
}
static void devfreq_term(struct kbase_device *kbdev)
{
}
static u64 devfreq_get_core_mask(struct kbase_device *kbdev)
{
return kbdev->pm.backend.ca_policy_data.devfreq.cores_enabled;
}
static void devfreq_update_core_status(struct kbase_device *kbdev,
u64 cores_ready,
u64 cores_transitioning)
{
struct kbasep_pm_ca_policy_devfreq *data =
&kbdev->pm.backend.ca_policy_data.devfreq;
lockdep_assert_held(&kbdev->hwaccess_lock);
data->cores_used = cores_ready | cores_transitioning;
/* If in desired state then clear transition flag */
if (data->cores_enabled == data->cores_desired)
kbdev->pm.backend.ca_in_transition = false;
/* If all undesired cores are now off then power on desired cores.
* The direct comparison against cores_enabled limits potential
* recursion to one level */
if (!(data->cores_used & ~data->cores_desired) &&
data->cores_enabled != data->cores_desired) {
data->cores_enabled = data->cores_desired;
kbase_pm_update_cores_state_nolock(kbdev);
kbdev->pm.backend.ca_in_transition = false;
}
}
/*
* The struct kbase_pm_ca_policy structure for the devfreq core availability
* policy.
*
* This is the static structure that defines the devfreq core availability power
* policy's callback and name.
*/
const struct kbase_pm_ca_policy kbase_pm_ca_devfreq_policy_ops = {
"devfreq", /* name */
devfreq_init, /* init */
devfreq_term, /* term */
devfreq_get_core_mask, /* get_core_mask */
devfreq_update_core_status, /* update_core_status */
0u, /* flags */
KBASE_PM_CA_POLICY_ID_DEVFREQ, /* id */
};

View File

@@ -0,0 +1,60 @@
/*
*
* (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* A core availability policy for use with devfreq, where core masks are
* associated with OPPs.
*/
#ifndef MALI_KBASE_PM_CA_DEVFREQ_H
#define MALI_KBASE_PM_CA_DEVFREQ_H
/**
* struct kbasep_pm_ca_policy_devfreq - Private structure for devfreq ca policy
*
* This contains data that is private to the devfreq core availability
* policy.
*
* @cores_desired: Cores that the policy wants to be available
* @cores_enabled: Cores that the policy is currently returning as available
* @cores_used: Cores currently powered or transitioning
*/
struct kbasep_pm_ca_policy_devfreq {
u64 cores_desired;
u64 cores_enabled;
u64 cores_used;
};
extern const struct kbase_pm_ca_policy kbase_pm_ca_devfreq_policy_ops;
/**
* kbase_devfreq_set_core_mask - Set core mask for policy to use
* @kbdev: Device pointer
* @core_mask: New core mask
*
* The new core mask will have immediate effect if the GPU is powered, or will
* take effect when it is next powered on.
*/
void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask);
#endif /* MALI_KBASE_PM_CA_DEVFREQ_H */

View File

@@ -0,0 +1,70 @@
/*
*
* (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* A power policy implementing fixed core availability
*/
#include <mali_kbase.h>
#include <mali_kbase_pm.h>
static void fixed_init(struct kbase_device *kbdev)
{
kbdev->pm.backend.ca_in_transition = false;
}
static void fixed_term(struct kbase_device *kbdev)
{
CSTD_UNUSED(kbdev);
}
static u64 fixed_get_core_mask(struct kbase_device *kbdev)
{
return kbdev->gpu_props.props.raw_props.shader_present;
}
static void fixed_update_core_status(struct kbase_device *kbdev,
u64 cores_ready,
u64 cores_transitioning)
{
CSTD_UNUSED(kbdev);
CSTD_UNUSED(cores_ready);
CSTD_UNUSED(cores_transitioning);
}
/*
* The struct kbase_pm_policy structure for the fixed power policy.
*
* This is the static structure that defines the fixed power policy's callback
* and name.
*/
const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops = {
"fixed", /* name */
fixed_init, /* init */
fixed_term, /* term */
fixed_get_core_mask, /* get_core_mask */
fixed_update_core_status, /* update_core_status */
0u, /* flags */
KBASE_PM_CA_POLICY_ID_FIXED, /* id */
};
KBASE_EXPORT_TEST_API(kbase_pm_ca_fixed_policy_ops);

View File

@@ -0,0 +1,45 @@
/*
*
* (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* A power policy implementing fixed core availability
*/
#ifndef MALI_KBASE_PM_CA_FIXED_H
#define MALI_KBASE_PM_CA_FIXED_H
/**
* struct kbasep_pm_ca_policy_fixed - Private structure for policy instance data
*
* @dummy: Dummy member - no state is needed
*
* This contains data that is private to the particular power policy that is
* active.
*/
struct kbasep_pm_ca_policy_fixed {
int dummy;
};
extern const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops;
#endif /* MALI_KBASE_PM_CA_FIXED_H */

View File

@@ -0,0 +1,75 @@
/*
*
* (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* "Coarse Demand" power management policy
*/
#include <mali_kbase.h>
#include <mali_kbase_pm.h>
static u64 coarse_demand_get_core_mask(struct kbase_device *kbdev)
{
if (kbdev->pm.active_count == 0)
return 0;
return kbdev->gpu_props.props.raw_props.shader_present;
}
static bool coarse_demand_get_core_active(struct kbase_device *kbdev)
{
if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt
&& !kbdev->tiler_inuse_cnt)
return false;
return true;
}
static void coarse_demand_init(struct kbase_device *kbdev)
{
CSTD_UNUSED(kbdev);
}
static void coarse_demand_term(struct kbase_device *kbdev)
{
CSTD_UNUSED(kbdev);
}
/* The struct kbase_pm_policy structure for the demand power policy.
*
* This is the static structure that defines the demand power policy's callback
* and name.
*/
const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = {
"coarse_demand", /* name */
coarse_demand_init, /* init */
coarse_demand_term, /* term */
coarse_demand_get_core_mask, /* get_core_mask */
coarse_demand_get_core_active, /* get_core_active */
0u, /* flags */
KBASE_PM_POLICY_ID_COARSE_DEMAND, /* id */
};
KBASE_EXPORT_TEST_API(kbase_pm_coarse_demand_policy_ops);

View File

@@ -0,0 +1,69 @@
/*
*
* (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* "Coarse Demand" power management policy
*/
#ifndef MALI_KBASE_PM_COARSE_DEMAND_H
#define MALI_KBASE_PM_COARSE_DEMAND_H
/**
* DOC:
* The "Coarse" demand power management policy has the following
* characteristics:
* - When KBase indicates that the GPU will be powered up, but we don't yet
* know which Job Chains are to be run:
* - All Shader Cores are powered up, regardless of whether or not they will
* be needed later.
* - When KBase indicates that a set of Shader Cores are needed to submit the
* currently queued Job Chains:
* - All Shader Cores are kept powered, regardless of whether or not they will
* be needed
* - When KBase indicates that the GPU need not be powered:
* - The Shader Cores are powered off, and the GPU itself is powered off too.
*
* @note:
* - KBase indicates the GPU will be powered up when it has a User Process that
* has just started to submit Job Chains.
* - KBase indicates the GPU need not be powered when all the Job Chains from
* User Processes have finished, and it is waiting for a User Process to
* submit some more Job Chains.
*/
/**
* struct kbasep_pm_policy_coarse_demand - Private structure for coarse demand
* policy
*
* This contains data that is private to the coarse demand power policy.
*
* @dummy: Dummy member - no state needed
*/
struct kbasep_pm_policy_coarse_demand {
int dummy;
};
extern const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops;
#endif /* MALI_KBASE_PM_COARSE_DEMAND_H */

View File

@@ -0,0 +1,533 @@
/*
*
* (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* Backend-specific Power Manager definitions
*/
#ifndef _KBASE_PM_HWACCESS_DEFS_H_
#define _KBASE_PM_HWACCESS_DEFS_H_
#include "mali_kbase_pm_ca_fixed.h"
#include "mali_kbase_pm_ca_devfreq.h"
#if !MALI_CUSTOMER_RELEASE
#include "mali_kbase_pm_ca_random.h"
#endif
#include "mali_kbase_pm_always_on.h"
#include "mali_kbase_pm_coarse_demand.h"
#include "mali_kbase_pm_demand.h"
#if !MALI_CUSTOMER_RELEASE
#include "mali_kbase_pm_demand_always_powered.h"
#include "mali_kbase_pm_fast_start.h"
#endif
/* Forward definition - see mali_kbase.h */
struct kbase_device;
struct kbase_jd_atom;
/**
* enum kbase_pm_core_type - The types of core in a GPU.
*
* These enumerated values are used in calls to
* - kbase_pm_get_present_cores()
* - kbase_pm_get_active_cores()
* - kbase_pm_get_trans_cores()
* - kbase_pm_get_ready_cores().
*
* They specify which type of core should be acted on. These values are set in
* a manner that allows core_type_to_reg() function to be simpler and more
* efficient.
*
* @KBASE_PM_CORE_L2: The L2 cache
* @KBASE_PM_CORE_SHADER: Shader cores
* @KBASE_PM_CORE_TILER: Tiler cores
* @KBASE_PM_CORE_STACK: Core stacks
*/
enum kbase_pm_core_type {
KBASE_PM_CORE_L2 = L2_PRESENT_LO,
KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO,
KBASE_PM_CORE_TILER = TILER_PRESENT_LO,
KBASE_PM_CORE_STACK = STACK_PRESENT_LO
};
/**
* struct kbasep_pm_metrics - Metrics data collected for use by the power
* management framework.
*
* @time_busy: number of ns the GPU was busy executing jobs since the
* @time_period_start timestamp.
* @time_idle: number of ns since time_period_start the GPU was not executing
* jobs since the @time_period_start timestamp.
* @busy_cl: number of ns the GPU was busy executing CL jobs. Note that
* if two CL jobs were active for 400ns, this value would be updated
* with 800.
* @busy_gl: number of ns the GPU was busy executing GL jobs. Note that
* if two GL jobs were active for 400ns, this value would be updated
* with 800.
*/
struct kbasep_pm_metrics {
u32 time_busy;
u32 time_idle;
u32 busy_cl[2];
u32 busy_gl;
};
/**
* struct kbasep_pm_metrics_state - State required to collect the metrics in
* struct kbasep_pm_metrics
* @time_period_start: time at which busy/idle measurements started
* @gpu_active: true when the GPU is executing jobs. false when
* not. Updated when the job scheduler informs us a job in submitted
* or removed from a GPU slot.
* @active_cl_ctx: number of CL jobs active on the GPU. Array is per-device.
* @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot. As
* GL jobs never run on slot 2 this slot is not recorded.
* @lock: spinlock protecting the kbasep_pm_metrics_data structure
* @platform_data: pointer to data controlled by platform specific code
* @kbdev: pointer to kbase device for which metrics are collected
* @values: The current values of the power management metrics. The
* kbase_pm_get_dvfs_metrics() function is used to compare these
* current values with the saved values from a previous invocation.
* @timer: timer to regularly make DVFS decisions based on the power
* management metrics.
* @timer_active: boolean indicating @timer is running
* @dvfs_last: values of the PM metrics from the last DVFS tick
* @dvfs_diff: different between the current and previous PM metrics.
*/
struct kbasep_pm_metrics_state {
ktime_t time_period_start;
bool gpu_active;
u32 active_cl_ctx[2];
u32 active_gl_ctx[2]; /* GL jobs can only run on 2 of the 3 job slots */
spinlock_t lock;
void *platform_data;
struct kbase_device *kbdev;
struct kbasep_pm_metrics values;
#ifdef CONFIG_MALI_MIDGARD_DVFS
struct hrtimer timer;
bool timer_active;
struct kbasep_pm_metrics dvfs_last;
struct kbasep_pm_metrics dvfs_diff;
#endif
};
union kbase_pm_policy_data {
struct kbasep_pm_policy_always_on always_on;
struct kbasep_pm_policy_coarse_demand coarse_demand;
struct kbasep_pm_policy_demand demand;
#if !MALI_CUSTOMER_RELEASE
struct kbasep_pm_policy_demand_always_powered demand_always_powered;
struct kbasep_pm_policy_fast_start fast_start;
#endif
};
union kbase_pm_ca_policy_data {
struct kbasep_pm_ca_policy_fixed fixed;
struct kbasep_pm_ca_policy_devfreq devfreq;
#if !MALI_CUSTOMER_RELEASE
struct kbasep_pm_ca_policy_random random;
#endif
};
/**
* struct kbase_pm_backend_data - Data stored per device for power management.
*
* This structure contains data for the power management framework. There is one
* instance of this structure per device in the system.
*
* @ca_current_policy: The policy that is currently actively controlling core
* availability.
* @pm_current_policy: The policy that is currently actively controlling the
* power state.
* @ca_policy_data: Private data for current CA policy
* @pm_policy_data: Private data for current PM policy
* @ca_in_transition: Flag indicating when core availability policy is
* transitioning cores. The core availability policy must
* set this when a change in core availability is occurring.
* power_change_lock must be held when accessing this.
* @reset_done: Flag when a reset is complete
* @reset_done_wait: Wait queue to wait for changes to @reset_done
* @l2_powered_wait: Wait queue for whether the l2 cache has been powered as
* requested
* @l2_powered: State indicating whether all the l2 caches are powered.
* Non-zero indicates they're *all* powered
* Zero indicates that some (or all) are not powered
* @gpu_cycle_counter_requests: The reference count of active gpu cycle counter
* users
* @gpu_cycle_counter_requests_lock: Lock to protect @gpu_cycle_counter_requests
* @desired_shader_state: A bit mask identifying the shader cores that the
* power policy would like to be on. The current state
* of the cores may be different, but there should be
* transitions in progress that will eventually achieve
* this state (assuming that the policy doesn't change
* its mind in the mean time).
* @powering_on_shader_state: A bit mask indicating which shader cores are
* currently in a power-on transition
* @desired_tiler_state: A bit mask identifying the tiler cores that the power
* policy would like to be on. See @desired_shader_state
* @powering_on_tiler_state: A bit mask indicating which tiler core are
* currently in a power-on transition
* @powering_on_l2_state: A bit mask indicating which l2-caches are currently
* in a power-on transition
* @powering_on_stack_state: A bit mask indicating which core stacks are
* currently in a power-on transition
* @gpu_in_desired_state: This flag is set if the GPU is powered as requested
* by the desired_xxx_state variables
* @gpu_in_desired_state_wait: Wait queue set when @gpu_in_desired_state != 0
* @gpu_powered: Set to true when the GPU is powered and register
* accesses are possible, false otherwise
* @instr_enabled: Set to true when instrumentation is enabled,
* false otherwise
* @cg1_disabled: Set if the policy wants to keep the second core group
* powered off
* @driver_ready_for_irqs: Debug state indicating whether sufficient
* initialization of the driver has occurred to handle
* IRQs
* @gpu_powered_lock: Spinlock that must be held when writing @gpu_powered or
* accessing @driver_ready_for_irqs
* @metrics: Structure to hold metrics for the GPU
* @gpu_poweroff_pending: number of poweroff timer ticks until the GPU is
* powered off
* @shader_poweroff_pending_time: number of poweroff timer ticks until shaders
* and/or timers are powered off
* @gpu_poweroff_timer: Timer for powering off GPU
* @gpu_poweroff_wq: Workqueue to power off GPU on when timer fires
* @gpu_poweroff_work: Workitem used on @gpu_poweroff_wq
* @shader_poweroff_pending: Bit mask of shaders to be powered off on next
* timer callback
* @tiler_poweroff_pending: Bit mask of tilers to be powered off on next timer
* callback
* @poweroff_timer_needed: true if the poweroff timer is currently required,
* false otherwise
* @poweroff_timer_running: true if the poweroff timer is currently running,
* false otherwise
* power_change_lock should be held when accessing,
* unless there is no way the timer can be running (eg
* hrtimer_cancel() was called immediately before)
* @poweroff_wait_in_progress: true if a wait for GPU power off is in progress.
* hwaccess_lock must be held when accessing
* @poweron_required: true if a GPU power on is required. Should only be set
* when poweroff_wait_in_progress is true, and therefore the
* GPU can not immediately be powered on. pm.lock must be
* held when accessing
* @poweroff_is_suspend: true if the GPU is being powered off due to a suspend
* request. pm.lock must be held when accessing
* @gpu_poweroff_wait_wq: workqueue for waiting for GPU to power off
* @gpu_poweroff_wait_work: work item for use with @gpu_poweroff_wait_wq
* @poweroff_wait: waitqueue for waiting for @gpu_poweroff_wait_work to complete
* @callback_power_on: Callback when the GPU needs to be turned on. See
* &struct kbase_pm_callback_conf
* @callback_power_off: Callback when the GPU may be turned off. See
* &struct kbase_pm_callback_conf
* @callback_power_suspend: Callback when a suspend occurs and the GPU needs to
* be turned off. See &struct kbase_pm_callback_conf
* @callback_power_resume: Callback when a resume occurs and the GPU needs to
* be turned on. See &struct kbase_pm_callback_conf
* @callback_power_runtime_on: Callback when the GPU needs to be turned on. See
* &struct kbase_pm_callback_conf
* @callback_power_runtime_off: Callback when the GPU may be turned off. See
* &struct kbase_pm_callback_conf
* @callback_power_runtime_idle: Optional callback when the GPU may be idle. See
* &struct kbase_pm_callback_conf
*
* Note:
* During an IRQ, @ca_current_policy or @pm_current_policy can be NULL when the
* policy is being changed with kbase_pm_ca_set_policy() or
* kbase_pm_set_policy(). The change is protected under
* kbase_device.pm.power_change_lock. Direct access to this
* from IRQ context must therefore check for NULL. If NULL, then
* kbase_pm_ca_set_policy() or kbase_pm_set_policy() will re-issue the policy
* functions that would have been done under IRQ.
*/
struct kbase_pm_backend_data {
const struct kbase_pm_ca_policy *ca_current_policy;
const struct kbase_pm_policy *pm_current_policy;
union kbase_pm_ca_policy_data ca_policy_data;
union kbase_pm_policy_data pm_policy_data;
bool ca_in_transition;
bool reset_done;
wait_queue_head_t reset_done_wait;
wait_queue_head_t l2_powered_wait;
int l2_powered;
int gpu_cycle_counter_requests;
spinlock_t gpu_cycle_counter_requests_lock;
u64 desired_shader_state;
u64 powering_on_shader_state;
u64 desired_tiler_state;
u64 powering_on_tiler_state;
u64 powering_on_l2_state;
#ifdef CONFIG_MALI_CORESTACK
u64 powering_on_stack_state;
#endif /* CONFIG_MALI_CORESTACK */
bool gpu_in_desired_state;
wait_queue_head_t gpu_in_desired_state_wait;
bool gpu_powered;
bool instr_enabled;
bool cg1_disabled;
#ifdef CONFIG_MALI_DEBUG
bool driver_ready_for_irqs;
#endif /* CONFIG_MALI_DEBUG */
spinlock_t gpu_powered_lock;
struct kbasep_pm_metrics_state metrics;
int gpu_poweroff_pending;
int shader_poweroff_pending_time;
struct hrtimer gpu_poweroff_timer;
struct workqueue_struct *gpu_poweroff_wq;
struct work_struct gpu_poweroff_work;
u64 shader_poweroff_pending;
u64 tiler_poweroff_pending;
bool poweroff_timer_needed;
bool poweroff_timer_running;
bool poweroff_wait_in_progress;
bool poweron_required;
bool poweroff_is_suspend;
struct workqueue_struct *gpu_poweroff_wait_wq;
struct work_struct gpu_poweroff_wait_work;
wait_queue_head_t poweroff_wait;
int (*callback_power_on)(struct kbase_device *kbdev);
void (*callback_power_off)(struct kbase_device *kbdev);
void (*callback_power_suspend)(struct kbase_device *kbdev);
void (*callback_power_resume)(struct kbase_device *kbdev);
int (*callback_power_runtime_on)(struct kbase_device *kbdev);
void (*callback_power_runtime_off)(struct kbase_device *kbdev);
int (*callback_power_runtime_idle)(struct kbase_device *kbdev);
};
/* List of policy IDs */
enum kbase_pm_policy_id {
KBASE_PM_POLICY_ID_DEMAND = 1,
KBASE_PM_POLICY_ID_ALWAYS_ON,
KBASE_PM_POLICY_ID_COARSE_DEMAND,
#if !MALI_CUSTOMER_RELEASE
KBASE_PM_POLICY_ID_DEMAND_ALWAYS_POWERED,
KBASE_PM_POLICY_ID_FAST_START
#endif
};
typedef u32 kbase_pm_policy_flags;
/**
* struct kbase_pm_policy - Power policy structure.
*
* Each power policy exposes a (static) instance of this structure which
* contains function pointers to the policy's methods.
*
* @name: The name of this policy
* @init: Function called when the policy is selected
* @term: Function called when the policy is unselected
* @get_core_mask: Function called to get the current shader core mask
* @get_core_active: Function called to get the current overall GPU power
* state
* @flags: Field indicating flags for this policy
* @id: Field indicating an ID for this policy. This is not
* necessarily the same as its index in the list returned
* by kbase_pm_list_policies().
* It is used purely for debugging.
*/
struct kbase_pm_policy {
char *name;
/**
* Function called when the policy is selected
*
* This should initialize the kbdev->pm.pm_policy_data structure. It
* should not attempt to make any changes to hardware state.
*
* It is undefined what state the cores are in when the function is
* called.
*
* @kbdev: The kbase device structure for the device (must be a
* valid pointer)
*/
void (*init)(struct kbase_device *kbdev);
/**
* Function called when the policy is unselected.
*
* @kbdev: The kbase device structure for the device (must be a
* valid pointer)
*/
void (*term)(struct kbase_device *kbdev);
/**
* Function called to get the current shader core mask
*
* The returned mask should meet or exceed (kbdev->shader_needed_bitmap
* | kbdev->shader_inuse_bitmap).
*
* @kbdev: The kbase device structure for the device (must be a
* valid pointer)
*
* Return: The mask of shader cores to be powered
*/
u64 (*get_core_mask)(struct kbase_device *kbdev);
/**
* Function called to get the current overall GPU power state
*
* This function should consider the state of kbdev->pm.active_count. If
* this count is greater than 0 then there is at least one active
* context on the device and the GPU should be powered. If it is equal
* to 0 then there are no active contexts and the GPU could be powered
* off if desired.
*
* @kbdev: The kbase device structure for the device (must be a
* valid pointer)
*
* Return: true if the GPU should be powered, false otherwise
*/
bool (*get_core_active)(struct kbase_device *kbdev);
kbase_pm_policy_flags flags;
enum kbase_pm_policy_id id;
};
enum kbase_pm_ca_policy_id {
KBASE_PM_CA_POLICY_ID_FIXED = 1,
KBASE_PM_CA_POLICY_ID_DEVFREQ,
KBASE_PM_CA_POLICY_ID_RANDOM
};
typedef u32 kbase_pm_ca_policy_flags;
/**
* Maximum length of a CA policy names
*/
#define KBASE_PM_CA_MAX_POLICY_NAME_LEN 15
/**
* struct kbase_pm_ca_policy - Core availability policy structure.
*
* Each core availability policy exposes a (static) instance of this structure
* which contains function pointers to the policy's methods.
*
* @name: The name of this policy
* @init: Function called when the policy is selected
* @term: Function called when the policy is unselected
* @get_core_mask: Function called to get the current shader core
* availability mask
* @update_core_status: Function called to update the current core status
* @flags: Field indicating flags for this policy
* @id: Field indicating an ID for this policy. This is not
* necessarily the same as its index in the list returned
* by kbase_pm_list_policies().
* It is used purely for debugging.
*/
struct kbase_pm_ca_policy {
char name[KBASE_PM_CA_MAX_POLICY_NAME_LEN + 1];
/**
* Function called when the policy is selected
*
* This should initialize the kbdev->pm.ca_policy_data structure. It
* should not attempt to make any changes to hardware state.
*
* It is undefined what state the cores are in when the function is
* called.
*
* @kbdev The kbase device structure for the device (must be a
* valid pointer)
*/
void (*init)(struct kbase_device *kbdev);
/**
* Function called when the policy is unselected.
*
* @kbdev The kbase device structure for the device (must be a
* valid pointer)
*/
void (*term)(struct kbase_device *kbdev);
/**
* Function called to get the current shader core availability mask
*
* When a change in core availability is occurring, the policy must set
* kbdev->pm.ca_in_transition to true. This is to indicate that
* reporting changes in power state cannot be optimized out, even if
* kbdev->pm.desired_shader_state remains unchanged. This must be done
* by any functions internal to the Core Availability Policy that change
* the return value of kbase_pm_ca_policy::get_core_mask.
*
* @kbdev The kbase device structure for the device (must be a
* valid pointer)
*
* Return: The current core availability mask
*/
u64 (*get_core_mask)(struct kbase_device *kbdev);
/**
* Function called to update the current core status
*
* If none of the cores in core group 0 are ready or transitioning, then
* the policy must ensure that the next call to get_core_mask does not
* return 0 for all cores in core group 0. It is an error to disable
* core group 0 through the core availability policy.
*
* When a change in core availability has finished, the policy must set
* kbdev->pm.ca_in_transition to false. This is to indicate that
* changes in power state can once again be optimized out when
* kbdev->pm.desired_shader_state is unchanged.
*
* @kbdev: The kbase device structure for the device
* (must be a valid pointer)
* @cores_ready: The mask of cores currently powered and
* ready to run jobs
* @cores_transitioning: The mask of cores currently transitioning
* power state
*/
void (*update_core_status)(struct kbase_device *kbdev, u64 cores_ready,
u64 cores_transitioning);
kbase_pm_ca_policy_flags flags;
/**
* Field indicating an ID for this policy. This is not necessarily the
* same as its index in the list returned by kbase_pm_list_policies().
* It is used purely for debugging.
*/
enum kbase_pm_ca_policy_id id;
};
#endif /* _KBASE_PM_HWACCESS_DEFS_H_ */

View File

@@ -0,0 +1,78 @@
/*
*
* (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* A simple demand based power management policy
*/
#include <mali_kbase.h>
#include <mali_kbase_pm.h>
static u64 demand_get_core_mask(struct kbase_device *kbdev)
{
u64 desired = kbdev->shader_needed_bitmap | kbdev->shader_inuse_bitmap;
if (0 == kbdev->pm.active_count)
return 0;
return desired;
}
static bool demand_get_core_active(struct kbase_device *kbdev)
{
if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt
&& !kbdev->tiler_inuse_cnt)
return false;
return true;
}
static void demand_init(struct kbase_device *kbdev)
{
CSTD_UNUSED(kbdev);
}
static void demand_term(struct kbase_device *kbdev)
{
CSTD_UNUSED(kbdev);
}
/*
* The struct kbase_pm_policy structure for the demand power policy.
*
* This is the static structure that defines the demand power policy's callback
* and name.
*/
const struct kbase_pm_policy kbase_pm_demand_policy_ops = {
"demand", /* name */
demand_init, /* init */
demand_term, /* term */
demand_get_core_mask, /* get_core_mask */
demand_get_core_active, /* get_core_active */
0u, /* flags */
KBASE_PM_POLICY_ID_DEMAND, /* id */
};
KBASE_EXPORT_TEST_API(kbase_pm_demand_policy_ops);

View File

@@ -0,0 +1,69 @@
/*
*
* (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* A simple demand based power management policy
*/
#ifndef MALI_KBASE_PM_DEMAND_H
#define MALI_KBASE_PM_DEMAND_H
/**
* DOC: Demand power management policy
*
* The demand power management policy has the following characteristics:
* - When KBase indicates that the GPU will be powered up, but we don't yet
* know which Job Chains are to be run:
* - The Shader Cores are not powered up
*
* - When KBase indicates that a set of Shader Cores are needed to submit the
* currently queued Job Chains:
* - Only those Shader Cores are powered up
*
* - When KBase indicates that the GPU need not be powered:
* - The Shader Cores are powered off, and the GPU itself is powered off too.
*
* Note:
* - KBase indicates the GPU will be powered up when it has a User Process that
* has just started to submit Job Chains.
*
* - KBase indicates the GPU need not be powered when all the Job Chains from
* User Processes have finished, and it is waiting for a User Process to
* submit some more Job Chains.
*/
/**
* struct kbasep_pm_policy_demand - Private structure for policy instance data
*
* @dummy: No state is needed, a dummy variable
*
* This contains data that is private to the demand power policy.
*/
struct kbasep_pm_policy_demand {
int dummy;
};
extern const struct kbase_pm_policy kbase_pm_demand_policy_ops;
#endif /* MALI_KBASE_PM_DEMAND_H */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,568 @@
/*
*
* (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* Power management API definitions used internally by GPU backend
*/
#ifndef _KBASE_BACKEND_PM_INTERNAL_H_
#define _KBASE_BACKEND_PM_INTERNAL_H_
#include <mali_kbase_hwaccess_pm.h>
#include "mali_kbase_pm_ca.h"
#include "mali_kbase_pm_policy.h"
/**
* kbase_pm_dev_idle - The GPU is idle.
*
* The OS may choose to turn off idle devices
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_dev_idle(struct kbase_device *kbdev);
/**
* kbase_pm_dev_activate - The GPU is active.
*
* The OS should avoid opportunistically turning off the GPU while it is active
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_dev_activate(struct kbase_device *kbdev);
/**
* kbase_pm_get_present_cores - Get details of the cores that are present in
* the device.
*
* This function can be called by the active power policy to return a bitmask of
* the cores (of a specified type) present in the GPU device and also a count of
* the number of cores.
*
* @kbdev: The kbase device structure for the device (must be a valid
* pointer)
* @type: The type of core (see the enum kbase_pm_core_type enumeration)
*
* Return: The bit mask of cores present
*/
u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
enum kbase_pm_core_type type);
/**
* kbase_pm_get_active_cores - Get details of the cores that are currently
* active in the device.
*
* This function can be called by the active power policy to return a bitmask of
* the cores (of a specified type) that are actively processing work (i.e.
* turned on *and* busy).
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
* @type: The type of core (see the enum kbase_pm_core_type enumeration)
*
* Return: The bit mask of active cores
*/
u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
enum kbase_pm_core_type type);
/**
* kbase_pm_get_trans_cores - Get details of the cores that are currently
* transitioning between power states.
*
* This function can be called by the active power policy to return a bitmask of
* the cores (of a specified type) that are currently transitioning between
* power states.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
* @type: The type of core (see the enum kbase_pm_core_type enumeration)
*
* Return: The bit mask of transitioning cores
*/
u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
enum kbase_pm_core_type type);
/**
* kbase_pm_get_ready_cores - Get details of the cores that are currently
* powered and ready for jobs.
*
* This function can be called by the active power policy to return a bitmask of
* the cores (of a specified type) that are powered and ready for jobs (they may
* or may not be currently executing jobs).
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
* @type: The type of core (see the enum kbase_pm_core_type enumeration)
*
* Return: The bit mask of ready cores
*/
u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
enum kbase_pm_core_type type);
/**
* kbase_pm_clock_on - Turn the clock for the device on, and enable device
* interrupts.
*
* This function can be used by a power policy to turn the clock for the GPU on.
* It should be modified during integration to perform the necessary actions to
* ensure that the GPU is fully powered and clocked.
*
* @kbdev: The kbase device structure for the device (must be a valid
* pointer)
* @is_resume: true if clock on due to resume after suspend, false otherwise
*/
void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume);
/**
* kbase_pm_clock_off - Disable device interrupts, and turn the clock for the
* device off.
*
* This function can be used by a power policy to turn the clock for the GPU
* off. It should be modified during integration to perform the necessary
* actions to turn the clock off (if this is possible in the integration).
*
* @kbdev: The kbase device structure for the device (must be a valid
* pointer)
* @is_suspend: true if clock off due to suspend, false otherwise
*
* Return: true if clock was turned off, or
* false if clock can not be turned off due to pending page/bus fault
* workers. Caller must flush MMU workqueues and retry
*/
bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend);
/**
* kbase_pm_enable_interrupts - Enable interrupts on the device.
*
* Interrupts are also enabled after a call to kbase_pm_clock_on().
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_enable_interrupts(struct kbase_device *kbdev);
/**
* kbase_pm_disable_interrupts - Disable interrupts on the device.
*
* This prevents delivery of Power Management interrupts to the CPU so that
* kbase_pm_check_transitions_nolock() will not be called from the IRQ handler
* until kbase_pm_enable_interrupts() or kbase_pm_clock_on() is called.
*
* Interrupts are also disabled after a call to kbase_pm_clock_off().
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_disable_interrupts(struct kbase_device *kbdev);
/**
* kbase_pm_disable_interrupts_nolock - Version of kbase_pm_disable_interrupts()
* that does not take the hwaccess_lock
*
* Caller must hold the hwaccess_lock.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev);
/**
* kbase_pm_init_hw - Initialize the hardware.
* @kbdev: The kbase device structure for the device (must be a valid pointer)
* @flags: Flags specifying the type of PM init
*
* This function checks the GPU ID register to ensure that the GPU is supported
* by the driver and performs a reset on the device so that it is in a known
* state before the device is used.
*
* Return: 0 if the device is supported and successfully reset.
*/
int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags);
/**
* kbase_pm_reset_done - The GPU has been reset successfully.
*
* This function must be called by the GPU interrupt handler when the
* RESET_COMPLETED bit is set. It signals to the power management initialization
* code that the GPU has been successfully reset.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_reset_done(struct kbase_device *kbdev);
/**
* kbase_pm_check_transitions_nolock - Check if there are any power transitions
* to make, and if so start them.
*
* This function will check the desired_xx_state members of
* struct kbase_pm_device_data and the actual status of the hardware to see if
* any power transitions can be made at this time to make the hardware state
* closer to the state desired by the power policy.
*
* The return value can be used to check whether all the desired cores are
* available, and so whether it's worth submitting a job (e.g. from a Power
* Management IRQ).
*
* Note that this still returns true when desired_xx_state has no
* cores. That is: of the no cores desired, none were *un*available. In
* this case, the caller may still need to try submitting jobs. This is because
* the Core Availability Policy might have taken us to an intermediate state
* where no cores are powered, before powering on more cores (e.g. for core
* rotation)
*
* The caller must hold kbase_device.pm.power_change_lock
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Return: non-zero when all desired cores are available. That is,
* it's worthwhile for the caller to submit a job.
* false otherwise
*/
bool kbase_pm_check_transitions_nolock(struct kbase_device *kbdev);
/**
* kbase_pm_check_transitions_sync - Synchronous and locking variant of
* kbase_pm_check_transitions_nolock()
*
* On returning, the desired state at the time of the call will have been met.
*
* There is nothing to stop the core being switched off by calls to
* kbase_pm_release_cores() or kbase_pm_unrequest_cores(). Therefore, the
* caller must have already made a call to
* kbase_pm_request_cores()/kbase_pm_request_cores_sync() previously.
*
* The usual use-case for this is to ensure cores are 'READY' after performing
* a GPU Reset.
*
* Unlike kbase_pm_check_transitions_nolock(), the caller must not hold
* kbase_device.pm.power_change_lock, because this function will take that
* lock itself.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_check_transitions_sync(struct kbase_device *kbdev);
/**
* kbase_pm_update_cores_state_nolock - Variant of kbase_pm_update_cores_state()
* where the caller must hold
* kbase_device.pm.power_change_lock
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev);
/**
* kbase_pm_update_cores_state - Update the desired state of shader cores from
* the Power Policy, and begin any power
* transitions.
*
* This function will update the desired_xx_state members of
* struct kbase_pm_device_data by calling into the current Power Policy. It will
* then begin power transitions to make the hardware acheive the desired shader
* core state.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_update_cores_state(struct kbase_device *kbdev);
/**
* kbase_pm_cancel_deferred_poweroff - Cancel any pending requests to power off
* the GPU and/or shader cores.
*
* This should be called by any functions which directly power off the GPU.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev);
/**
* kbasep_pm_init_core_use_bitmaps - Initialise data tracking the required
* and used cores.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbasep_pm_init_core_use_bitmaps(struct kbase_device *kbdev);
/**
* kbasep_pm_metrics_init - Initialize the metrics gathering framework.
*
* This must be called before other metric gathering APIs are called.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Return: 0 on success, error code on error
*/
int kbasep_pm_metrics_init(struct kbase_device *kbdev);
/**
* kbasep_pm_metrics_term - Terminate the metrics gathering framework.
*
* This must be called when metric gathering is no longer required. It is an
* error to call any metrics gathering function (other than
* kbasep_pm_metrics_init()) after calling this function.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbasep_pm_metrics_term(struct kbase_device *kbdev);
/**
* kbase_pm_report_vsync - Function to be called by the frame buffer driver to
* update the vsync metric.
*
* This function should be called by the frame buffer driver to update whether
* the system is hitting the vsync target or not. buffer_updated should be true
* if the vsync corresponded with a new frame being displayed, otherwise it
* should be false. This function does not need to be called every vsync, but
* only when the value of @buffer_updated differs from a previous call.
*
* @kbdev: The kbase device structure for the device (must be a
* valid pointer)
* @buffer_updated: True if the buffer has been updated on this VSync,
* false otherwise
*/
void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated);
/**
* kbase_pm_get_dvfs_action - Determine whether the DVFS system should change
* the clock speed of the GPU.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* This function should be called regularly by the DVFS system to check whether
* the clock speed of the GPU needs updating.
*/
void kbase_pm_get_dvfs_action(struct kbase_device *kbdev);
/**
* kbase_pm_request_gpu_cycle_counter - Mark that the GPU cycle counter is
* needed
*
* If the caller is the first caller then the GPU cycle counters will be enabled
* along with the l2 cache
*
* The GPU must be powered when calling this function (i.e.
* kbase_pm_context_active() must have been called).
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev);
/**
* kbase_pm_request_gpu_cycle_counter_l2_is_on - Mark GPU cycle counter is
* needed (l2 cache already on)
*
* This is a version of the above function
* (kbase_pm_request_gpu_cycle_counter()) suitable for being called when the
* l2 cache is known to be on and assured to be on until the subsequent call of
* kbase_pm_release_gpu_cycle_counter() such as when a job is submitted. It does
* not sleep and can be called from atomic functions.
*
* The GPU must be powered when calling this function (i.e.
* kbase_pm_context_active() must have been called) and the l2 cache must be
* powered on.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev);
/**
* kbase_pm_release_gpu_cycle_counter - Mark that the GPU cycle counter is no
* longer in use
*
* If the caller is the last caller then the GPU cycle counters will be
* disabled. A request must have been made before a call to this.
*
* Caller must not hold the hwaccess_lock, as it will be taken in this function.
* If the caller is already holding this lock then
* kbase_pm_release_gpu_cycle_counter_nolock() must be used instead.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev);
/**
* kbase_pm_release_gpu_cycle_counter_nolock - Version of kbase_pm_release_gpu_cycle_counter()
* that does not take hwaccess_lock
*
* Caller must hold the hwaccess_lock.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev);
/**
* kbase_pm_wait_for_poweroff_complete - Wait for the poweroff workqueue to
* complete
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev);
/**
* kbase_pm_runtime_init - Initialize runtime-pm for Mali GPU platform device
*
* Setup the power management callbacks and initialize/enable the runtime-pm
* for the Mali GPU platform device, using the callback function. This must be
* called before the kbase_pm_register_access_enable() function.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
int kbase_pm_runtime_init(struct kbase_device *kbdev);
/**
* kbase_pm_runtime_term - Disable runtime-pm for Mali GPU platform device
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_runtime_term(struct kbase_device *kbdev);
/**
* kbase_pm_register_access_enable - Enable access to GPU registers
*
* Enables access to the GPU registers before power management has powered up
* the GPU with kbase_pm_powerup().
*
* This results in the power management callbacks provided in the driver
* configuration to get called to turn on power and/or clocks to the GPU. See
* kbase_pm_callback_conf.
*
* This should only be used before power management is powered up with
* kbase_pm_powerup()
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_register_access_enable(struct kbase_device *kbdev);
/**
* kbase_pm_register_access_disable - Disable early register access
*
* Disables access to the GPU registers enabled earlier by a call to
* kbase_pm_register_access_enable().
*
* This results in the power management callbacks provided in the driver
* configuration to get called to turn off power and/or clocks to the GPU. See
* kbase_pm_callback_conf
*
* This should only be used before power management is powered up with
* kbase_pm_powerup()
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_register_access_disable(struct kbase_device *kbdev);
/* NOTE: kbase_pm_is_suspending is in mali_kbase.h, because it is an inline
* function */
/**
* kbase_pm_metrics_is_active - Check if the power management metrics
* collection is active.
*
* Note that this returns if the power management metrics collection was
* active at the time of calling, it is possible that after the call the metrics
* collection enable may have changed state.
*
* The caller must handle the consequence that the state may have changed.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
* Return: true if metrics collection was active else false.
*/
bool kbase_pm_metrics_is_active(struct kbase_device *kbdev);
/**
* kbase_pm_do_poweron - Power on the GPU, and any cores that are requested.
*
* @kbdev: The kbase device structure for the device (must be a valid
* pointer)
* @is_resume: true if power on due to resume after suspend,
* false otherwise
*/
void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume);
/**
* kbase_pm_do_poweroff - Power off the GPU, and any cores that have been
* requested.
*
* @kbdev: The kbase device structure for the device (must be a valid
* pointer)
* @is_suspend: true if power off due to suspend,
* false otherwise
*/
void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend);
#if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS)
void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev,
struct kbasep_pm_metrics *last,
struct kbasep_pm_metrics *diff);
#endif /* defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) */
#ifdef CONFIG_MALI_MIDGARD_DVFS
/**
* kbase_platform_dvfs_event - Report utilisation to DVFS code
*
* Function provided by platform specific code when DVFS is enabled to allow
* the power management metrics system to report utilisation.
*
* @kbdev: The kbase device structure for the device (must be a
* valid pointer)
* @utilisation: The current calculated utilisation by the metrics system.
* @util_gl_share: The current calculated gl share of utilisation.
* @util_cl_share: The current calculated cl share of utilisation per core
* group.
* Return: Returns 0 on failure and non zero on success.
*/
int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
u32 util_gl_share, u32 util_cl_share[2]);
#endif
void kbase_pm_power_changed(struct kbase_device *kbdev);
/**
* kbase_pm_metrics_update - Inform the metrics system that an atom is either
* about to be run or has just completed.
* @kbdev: The kbase device structure for the device (must be a valid pointer)
* @now: Pointer to the timestamp of the change, or NULL to use current time
*
* Caller must hold hwaccess_lock
*/
void kbase_pm_metrics_update(struct kbase_device *kbdev,
ktime_t *now);
/**
* kbase_pm_cache_snoop_enable - Allow CPU snoops on the GPU
* If the GPU does not have coherency this is a no-op
* @kbdev: Device pointer
*
* This function should be called after L2 power up.
*/
void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev);
/**
* kbase_pm_cache_snoop_disable - Prevent CPU snoops on the GPU
* If the GPU does not have coherency this is a no-op
* @kbdev: Device pointer
*
* This function should be called before L2 power off.
*/
void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev);
#endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */

View File

@@ -0,0 +1,295 @@
/*
*
* (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* Metrics for power management
*/
#include <mali_kbase.h>
#include <mali_kbase_pm.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
#include <backend/gpu/mali_kbase_jm_rb.h>
#include <backend/gpu/mali_kbase_pm_defs.h>
/* When VSync is being hit aim for utilisation between 70-90% */
#define KBASE_PM_VSYNC_MIN_UTILISATION 70
#define KBASE_PM_VSYNC_MAX_UTILISATION 90
/* Otherwise aim for 10-40% */
#define KBASE_PM_NO_VSYNC_MIN_UTILISATION 10
#define KBASE_PM_NO_VSYNC_MAX_UTILISATION 40
/* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns
* This gives a maximum period between samples of 2^(32+8)/100 ns = slightly
* under 11s. Exceeding this will cause overflow */
#define KBASE_PM_TIME_SHIFT 8
#ifdef CONFIG_MALI_MIDGARD_DVFS
static enum hrtimer_restart dvfs_callback(struct hrtimer *timer)
{
unsigned long flags;
struct kbasep_pm_metrics_state *metrics;
KBASE_DEBUG_ASSERT(timer != NULL);
metrics = container_of(timer, struct kbasep_pm_metrics_state, timer);
kbase_pm_get_dvfs_action(metrics->kbdev);
spin_lock_irqsave(&metrics->lock, flags);
if (metrics->timer_active)
hrtimer_start(timer,
HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period),
HRTIMER_MODE_REL);
spin_unlock_irqrestore(&metrics->lock, flags);
return HRTIMER_NORESTART;
}
#endif /* CONFIG_MALI_MIDGARD_DVFS */
int kbasep_pm_metrics_init(struct kbase_device *kbdev)
{
KBASE_DEBUG_ASSERT(kbdev != NULL);
kbdev->pm.backend.metrics.kbdev = kbdev;
kbdev->pm.backend.metrics.time_period_start = ktime_get();
kbdev->pm.backend.metrics.gpu_active = false;
kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
kbdev->pm.backend.metrics.values.time_busy = 0;
kbdev->pm.backend.metrics.values.time_idle = 0;
kbdev->pm.backend.metrics.values.busy_cl[0] = 0;
kbdev->pm.backend.metrics.values.busy_cl[1] = 0;
kbdev->pm.backend.metrics.values.busy_gl = 0;
spin_lock_init(&kbdev->pm.backend.metrics.lock);
#ifdef CONFIG_MALI_MIDGARD_DVFS
kbdev->pm.backend.metrics.timer_active = true;
hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC,
HRTIMER_MODE_REL);
kbdev->pm.backend.metrics.timer.function = dvfs_callback;
hrtimer_start(&kbdev->pm.backend.metrics.timer,
HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period),
HRTIMER_MODE_REL);
#endif /* CONFIG_MALI_MIDGARD_DVFS */
return 0;
}
KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init);
void kbasep_pm_metrics_term(struct kbase_device *kbdev)
{
#ifdef CONFIG_MALI_MIDGARD_DVFS
unsigned long flags;
KBASE_DEBUG_ASSERT(kbdev != NULL);
spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
kbdev->pm.backend.metrics.timer_active = false;
spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
#endif /* CONFIG_MALI_MIDGARD_DVFS */
}
KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term);
/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
* function
*/
static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev,
ktime_t now)
{
ktime_t diff;
lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start);
if (ktime_to_ns(diff) < 0)
return;
if (kbdev->pm.backend.metrics.gpu_active) {
u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT);
kbdev->pm.backend.metrics.values.time_busy += ns_time;
if (kbdev->pm.backend.metrics.active_cl_ctx[0])
kbdev->pm.backend.metrics.values.busy_cl[0] += ns_time;
if (kbdev->pm.backend.metrics.active_cl_ctx[1])
kbdev->pm.backend.metrics.values.busy_cl[1] += ns_time;
if (kbdev->pm.backend.metrics.active_gl_ctx[0])
kbdev->pm.backend.metrics.values.busy_gl += ns_time;
if (kbdev->pm.backend.metrics.active_gl_ctx[1])
kbdev->pm.backend.metrics.values.busy_gl += ns_time;
} else {
kbdev->pm.backend.metrics.values.time_idle += (u32) (ktime_to_ns(diff)
>> KBASE_PM_TIME_SHIFT);
}
kbdev->pm.backend.metrics.time_period_start = now;
}
#if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS)
void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev,
struct kbasep_pm_metrics *last,
struct kbasep_pm_metrics *diff)
{
struct kbasep_pm_metrics *cur = &kbdev->pm.backend.metrics.values;
unsigned long flags;
spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
kbase_pm_get_dvfs_utilisation_calc(kbdev, ktime_get());
memset(diff, 0, sizeof(*diff));
diff->time_busy = cur->time_busy - last->time_busy;
diff->time_idle = cur->time_idle - last->time_idle;
diff->busy_cl[0] = cur->busy_cl[0] - last->busy_cl[0];
diff->busy_cl[1] = cur->busy_cl[1] - last->busy_cl[1];
diff->busy_gl = cur->busy_gl - last->busy_gl;
*last = *cur;
spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
}
KBASE_EXPORT_TEST_API(kbase_pm_get_dvfs_metrics);
#endif
#ifdef CONFIG_MALI_MIDGARD_DVFS
void kbase_pm_get_dvfs_action(struct kbase_device *kbdev)
{
int utilisation, util_gl_share;
int util_cl_share[2];
int busy;
struct kbasep_pm_metrics *diff;
KBASE_DEBUG_ASSERT(kbdev != NULL);
diff = &kbdev->pm.backend.metrics.dvfs_diff;
kbase_pm_get_dvfs_metrics(kbdev, &kbdev->pm.backend.metrics.dvfs_last, diff);
utilisation = (100 * diff->time_busy) /
max(diff->time_busy + diff->time_idle, 1u);
busy = max(diff->busy_gl + diff->busy_cl[0] + diff->busy_cl[1], 1u);
util_gl_share = (100 * diff->busy_gl) / busy;
util_cl_share[0] = (100 * diff->busy_cl[0]) / busy;
util_cl_share[1] = (100 * diff->busy_cl[1]) / busy;
kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share, util_cl_share);
}
bool kbase_pm_metrics_is_active(struct kbase_device *kbdev)
{
bool isactive;
unsigned long flags;
KBASE_DEBUG_ASSERT(kbdev != NULL);
spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
isactive = kbdev->pm.backend.metrics.timer_active;
spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
return isactive;
}
KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active);
#endif /* CONFIG_MALI_MIDGARD_DVFS */
/**
* kbase_pm_metrics_active_calc - Update PM active counts based on currently
* running atoms
* @kbdev: Device pointer
*
* The caller must hold kbdev->pm.backend.metrics.lock
*/
static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev)
{
int js;
lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
kbdev->pm.backend.metrics.gpu_active = false;
for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
/* Head atom may have just completed, so if it isn't running
* then try the next atom */
if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED)
katom = kbase_gpu_inspect(kbdev, js, 1);
if (katom && katom->gpu_rb_state ==
KBASE_ATOM_GPU_RB_SUBMITTED) {
if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
int device_nr = (katom->core_req &
BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)
? katom->device_nr : 0;
if (!WARN_ON(device_nr >= 2))
kbdev->pm.backend.metrics.
active_cl_ctx[device_nr] = 1;
} else {
/* Slot 2 should not be running non-compute
* atoms */
if (!WARN_ON(js >= 2))
kbdev->pm.backend.metrics.
active_gl_ctx[js] = 1;
}
kbdev->pm.backend.metrics.gpu_active = true;
}
}
}
/* called when job is submitted to or removed from a GPU slot */
void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp)
{
unsigned long flags;
ktime_t now;
lockdep_assert_held(&kbdev->hwaccess_lock);
spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
if (!timestamp) {
now = ktime_get();
timestamp = &now;
}
/* Track how long CL and/or GL jobs have been busy for */
kbase_pm_get_dvfs_utilisation_calc(kbdev, *timestamp);
kbase_pm_metrics_active_calc(kbdev);
spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
}

View File

@@ -0,0 +1,984 @@
/*
*
* (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* Power policy API implementations
*/
#include <mali_kbase.h>
#include <mali_midg_regmap.h>
#include <mali_kbase_pm.h>
#include <mali_kbase_config_defaults.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
static const struct kbase_pm_policy *const policy_list[] = {
#ifdef CONFIG_MALI_NO_MALI
&kbase_pm_always_on_policy_ops,
&kbase_pm_demand_policy_ops,
&kbase_pm_coarse_demand_policy_ops,
#if !MALI_CUSTOMER_RELEASE
&kbase_pm_demand_always_powered_policy_ops,
&kbase_pm_fast_start_policy_ops,
#endif
#else /* CONFIG_MALI_NO_MALI */
#if !PLATFORM_POWER_DOWN_ONLY
&kbase_pm_demand_policy_ops,
#endif /* !PLATFORM_POWER_DOWN_ONLY */
&kbase_pm_coarse_demand_policy_ops,
&kbase_pm_always_on_policy_ops,
#if !MALI_CUSTOMER_RELEASE
#if !PLATFORM_POWER_DOWN_ONLY
&kbase_pm_demand_always_powered_policy_ops,
&kbase_pm_fast_start_policy_ops,
#endif /* !PLATFORM_POWER_DOWN_ONLY */
#endif
#endif /* CONFIG_MALI_NO_MALI */
};
/* The number of policies available in the system.
* This is derived from the number of functions listed in policy_get_functions.
*/
#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
/* Function IDs for looking up Timeline Trace codes in
* kbase_pm_change_state_trace_code */
enum kbase_pm_func_id {
KBASE_PM_FUNC_ID_REQUEST_CORES_START,
KBASE_PM_FUNC_ID_REQUEST_CORES_END,
KBASE_PM_FUNC_ID_RELEASE_CORES_START,
KBASE_PM_FUNC_ID_RELEASE_CORES_END,
/* Note: kbase_pm_unrequest_cores() is on the slow path, and we neither
* expect to hit it nor tend to hit it very much anyway. We can detect
* whether we need more instrumentation by a difference between
* PM_CHECKTRANS events and PM_SEND/HANDLE_EVENT. */
/* Must be the last */
KBASE_PM_FUNC_ID_COUNT
};
/* State changes during request/unrequest/release-ing cores */
enum {
KBASE_PM_CHANGE_STATE_SHADER = (1u << 0),
KBASE_PM_CHANGE_STATE_TILER = (1u << 1),
/* These two must be last */
KBASE_PM_CHANGE_STATE_MASK = (KBASE_PM_CHANGE_STATE_TILER |
KBASE_PM_CHANGE_STATE_SHADER),
KBASE_PM_CHANGE_STATE_COUNT = KBASE_PM_CHANGE_STATE_MASK + 1
};
typedef u32 kbase_pm_change_state;
#ifdef CONFIG_MALI_TRACE_TIMELINE
/* Timeline Trace code lookups for each function */
static u32 kbase_pm_change_state_trace_code[KBASE_PM_FUNC_ID_COUNT]
[KBASE_PM_CHANGE_STATE_COUNT] = {
/* kbase_pm_request_cores */
[KBASE_PM_FUNC_ID_REQUEST_CORES_START][0] = 0,
[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START,
[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START,
[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
KBASE_PM_CHANGE_STATE_TILER] =
SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START,
[KBASE_PM_FUNC_ID_REQUEST_CORES_END][0] = 0,
[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END,
[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END,
[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
KBASE_PM_CHANGE_STATE_TILER] =
SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END,
/* kbase_pm_release_cores */
[KBASE_PM_FUNC_ID_RELEASE_CORES_START][0] = 0,
[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START,
[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START,
[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
KBASE_PM_CHANGE_STATE_TILER] =
SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START,
[KBASE_PM_FUNC_ID_RELEASE_CORES_END][0] = 0,
[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END,
[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END,
[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
KBASE_PM_CHANGE_STATE_TILER] =
SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END
};
static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
enum kbase_pm_func_id func_id,
kbase_pm_change_state state)
{
int trace_code;
KBASE_DEBUG_ASSERT(func_id >= 0 && func_id < KBASE_PM_FUNC_ID_COUNT);
KBASE_DEBUG_ASSERT(state != 0 && (state & KBASE_PM_CHANGE_STATE_MASK) ==
state);
trace_code = kbase_pm_change_state_trace_code[func_id][state];
KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code);
}
#else /* CONFIG_MALI_TRACE_TIMELINE */
static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
enum kbase_pm_func_id func_id, kbase_pm_change_state state)
{
}
#endif /* CONFIG_MALI_TRACE_TIMELINE */
/**
* kbasep_pm_do_poweroff_cores - Process a poweroff request and power down any
* requested shader cores
* @kbdev: Device pointer
*/
static void kbasep_pm_do_poweroff_cores(struct kbase_device *kbdev)
{
u64 prev_shader_state = kbdev->pm.backend.desired_shader_state;
u64 prev_tiler_state = kbdev->pm.backend.desired_tiler_state;
lockdep_assert_held(&kbdev->hwaccess_lock);
kbdev->pm.backend.desired_shader_state &=
~kbdev->pm.backend.shader_poweroff_pending;
kbdev->pm.backend.desired_tiler_state &=
~kbdev->pm.backend.tiler_poweroff_pending;
kbdev->pm.backend.shader_poweroff_pending = 0;
kbdev->pm.backend.tiler_poweroff_pending = 0;
if (prev_shader_state != kbdev->pm.backend.desired_shader_state ||
prev_tiler_state !=
kbdev->pm.backend.desired_tiler_state ||
kbdev->pm.backend.ca_in_transition) {
bool cores_are_available;
KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START);
cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END);
/* Don't need 'cores_are_available',
* because we don't return anything */
CSTD_UNUSED(cores_are_available);
}
}
static enum hrtimer_restart
kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer)
{
struct kbase_device *kbdev;
unsigned long flags;
kbdev = container_of(timer, struct kbase_device,
pm.backend.gpu_poweroff_timer);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
/* It is safe for this call to do nothing if the work item is already
* queued. The worker function will read the must up-to-date state of
* kbdev->pm.backend.gpu_poweroff_pending under lock.
*
* If a state change occurs while the worker function is processing,
* this call will succeed as a work item can be requeued once it has
* started processing.
*/
if (kbdev->pm.backend.gpu_poweroff_pending)
queue_work(kbdev->pm.backend.gpu_poweroff_wq,
&kbdev->pm.backend.gpu_poweroff_work);
if (kbdev->pm.backend.shader_poweroff_pending ||
kbdev->pm.backend.tiler_poweroff_pending) {
kbdev->pm.backend.shader_poweroff_pending_time--;
KBASE_DEBUG_ASSERT(
kbdev->pm.backend.shader_poweroff_pending_time
>= 0);
if (!kbdev->pm.backend.shader_poweroff_pending_time)
kbasep_pm_do_poweroff_cores(kbdev);
}
if (kbdev->pm.backend.poweroff_timer_needed) {
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time);
return HRTIMER_RESTART;
}
kbdev->pm.backend.poweroff_timer_running = false;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
return HRTIMER_NORESTART;
}
static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data)
{
unsigned long flags;
struct kbase_device *kbdev;
bool do_poweroff = false;
kbdev = container_of(data, struct kbase_device,
pm.backend.gpu_poweroff_work);
mutex_lock(&kbdev->pm.lock);
if (kbdev->pm.backend.gpu_poweroff_pending == 0) {
mutex_unlock(&kbdev->pm.lock);
return;
}
kbdev->pm.backend.gpu_poweroff_pending--;
if (kbdev->pm.backend.gpu_poweroff_pending > 0) {
mutex_unlock(&kbdev->pm.lock);
return;
}
KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_poweroff_pending == 0);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
/* Only power off the GPU if a request is still pending */
if (!kbdev->pm.backend.pm_current_policy->get_core_active(kbdev))
do_poweroff = true;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
if (do_poweroff) {
kbdev->pm.backend.poweroff_timer_needed = false;
hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
kbdev->pm.backend.poweroff_timer_running = false;
/* Power off the GPU */
kbase_pm_do_poweroff(kbdev, false);
}
mutex_unlock(&kbdev->pm.lock);
}
int kbase_pm_policy_init(struct kbase_device *kbdev)
{
struct workqueue_struct *wq;
wq = alloc_workqueue("kbase_pm_do_poweroff",
WQ_HIGHPRI | WQ_UNBOUND, 1);
if (!wq)
return -ENOMEM;
kbdev->pm.backend.gpu_poweroff_wq = wq;
INIT_WORK(&kbdev->pm.backend.gpu_poweroff_work,
kbasep_pm_do_gpu_poweroff_wq);
hrtimer_init(&kbdev->pm.backend.gpu_poweroff_timer,
CLOCK_MONOTONIC, HRTIMER_MODE_REL);
kbdev->pm.backend.gpu_poweroff_timer.function =
kbasep_pm_do_gpu_poweroff_callback;
kbdev->pm.backend.pm_current_policy = policy_list[0];
kbdev->pm.backend.pm_current_policy->init(kbdev);
kbdev->pm.gpu_poweroff_time =
HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS);
kbdev->pm.poweroff_shader_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER;
kbdev->pm.poweroff_gpu_ticks = DEFAULT_PM_POWEROFF_TICK_GPU;
return 0;
}
void kbase_pm_policy_term(struct kbase_device *kbdev)
{
kbdev->pm.backend.pm_current_policy->term(kbdev);
destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wq);
}
void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev)
{
unsigned long flags;
lockdep_assert_held(&kbdev->pm.lock);
kbdev->pm.backend.poweroff_timer_needed = false;
hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbdev->pm.backend.poweroff_timer_running = false;
/* If wq is already running but is held off by pm.lock, make sure it has
* no effect */
kbdev->pm.backend.gpu_poweroff_pending = 0;
kbdev->pm.backend.shader_poweroff_pending = 0;
kbdev->pm.backend.tiler_poweroff_pending = 0;
kbdev->pm.backend.shader_poweroff_pending_time = 0;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
void kbase_pm_update_active(struct kbase_device *kbdev)
{
struct kbase_pm_device_data *pm = &kbdev->pm;
struct kbase_pm_backend_data *backend = &pm->backend;
unsigned long flags;
bool active;
lockdep_assert_held(&pm->lock);
/* pm_current_policy will never be NULL while pm.lock is held */
KBASE_DEBUG_ASSERT(backend->pm_current_policy);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
active = backend->pm_current_policy->get_core_active(kbdev);
if (active) {
if (backend->gpu_poweroff_pending) {
/* Cancel any pending power off request */
backend->gpu_poweroff_pending = 0;
/* If a request was pending then the GPU was still
* powered, so no need to continue */
if (!kbdev->poweroff_pending) {
spin_unlock_irqrestore(&kbdev->hwaccess_lock,
flags);
return;
}
}
if (!backend->poweroff_timer_running && !backend->gpu_powered &&
(pm->poweroff_gpu_ticks ||
pm->poweroff_shader_ticks)) {
backend->poweroff_timer_needed = true;
backend->poweroff_timer_running = true;
hrtimer_start(&backend->gpu_poweroff_timer,
pm->gpu_poweroff_time,
HRTIMER_MODE_REL);
}
/* Power on the GPU and any cores requested by the policy */
if (pm->backend.poweroff_wait_in_progress) {
KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
pm->backend.poweron_required = true;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
} else {
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
kbase_pm_do_poweron(kbdev, false);
}
} else {
/* It is an error for the power policy to power off the GPU
* when there are contexts active */
KBASE_DEBUG_ASSERT(pm->active_count == 0);
if (backend->shader_poweroff_pending ||
backend->tiler_poweroff_pending) {
backend->shader_poweroff_pending = 0;
backend->tiler_poweroff_pending = 0;
backend->shader_poweroff_pending_time = 0;
}
/* Request power off */
if (pm->backend.gpu_powered) {
if (pm->poweroff_gpu_ticks) {
backend->gpu_poweroff_pending =
pm->poweroff_gpu_ticks;
backend->poweroff_timer_needed = true;
if (!backend->poweroff_timer_running) {
/* Start timer if not running (eg if
* power policy has been changed from
* always_on to something else). This
* will ensure the GPU is actually
* powered off */
backend->poweroff_timer_running
= true;
hrtimer_start(
&backend->gpu_poweroff_timer,
pm->gpu_poweroff_time,
HRTIMER_MODE_REL);
}
spin_unlock_irqrestore(&kbdev->hwaccess_lock,
flags);
} else {
spin_unlock_irqrestore(&kbdev->hwaccess_lock,
flags);
/* Power off the GPU immediately */
kbase_pm_do_poweroff(kbdev, false);
}
} else {
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
}
}
void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
{
u64 desired_bitmap;
u64 desired_tiler_bitmap;
bool cores_are_available;
bool do_poweroff = false;
lockdep_assert_held(&kbdev->hwaccess_lock);
if (kbdev->pm.backend.pm_current_policy == NULL)
return;
if (kbdev->pm.backend.poweroff_wait_in_progress)
return;
if (kbdev->protected_mode_transition && !kbdev->shader_needed_bitmap &&
!kbdev->shader_inuse_bitmap && !kbdev->tiler_needed_cnt
&& !kbdev->tiler_inuse_cnt) {
/* We are trying to change in/out of protected mode - force all
* cores off so that the L2 powers down */
desired_bitmap = 0;
desired_tiler_bitmap = 0;
} else {
desired_bitmap =
kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev);
desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev);
if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0)
desired_tiler_bitmap = 1;
else
desired_tiler_bitmap = 0;
if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) {
/* Unless XAFFINITY is supported, enable core 0 if tiler
* required, regardless of core availability */
if (kbdev->tiler_needed_cnt > 0 ||
kbdev->tiler_inuse_cnt > 0)
desired_bitmap |= 1;
}
}
if (kbdev->pm.backend.desired_shader_state != desired_bitmap)
KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u,
(u32)desired_bitmap);
/* Are any cores being powered on? */
if (~kbdev->pm.backend.desired_shader_state & desired_bitmap ||
~kbdev->pm.backend.desired_tiler_state & desired_tiler_bitmap ||
kbdev->pm.backend.ca_in_transition) {
/* Check if we are powering off any cores before updating shader
* state */
if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap ||
kbdev->pm.backend.desired_tiler_state &
~desired_tiler_bitmap) {
/* Start timer to power off cores */
kbdev->pm.backend.shader_poweroff_pending |=
(kbdev->pm.backend.desired_shader_state &
~desired_bitmap);
kbdev->pm.backend.tiler_poweroff_pending |=
(kbdev->pm.backend.desired_tiler_state &
~desired_tiler_bitmap);
if (kbdev->pm.poweroff_shader_ticks &&
!kbdev->protected_mode_transition)
kbdev->pm.backend.shader_poweroff_pending_time =
kbdev->pm.poweroff_shader_ticks;
else
do_poweroff = true;
}
kbdev->pm.backend.desired_shader_state = desired_bitmap;
kbdev->pm.backend.desired_tiler_state = desired_tiler_bitmap;
/* If any cores are being powered on, transition immediately */
cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
} else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap ||
kbdev->pm.backend.desired_tiler_state &
~desired_tiler_bitmap) {
/* Start timer to power off cores */
kbdev->pm.backend.shader_poweroff_pending |=
(kbdev->pm.backend.desired_shader_state &
~desired_bitmap);
kbdev->pm.backend.tiler_poweroff_pending |=
(kbdev->pm.backend.desired_tiler_state &
~desired_tiler_bitmap);
if (kbdev->pm.poweroff_shader_ticks &&
!kbdev->protected_mode_transition)
kbdev->pm.backend.shader_poweroff_pending_time =
kbdev->pm.poweroff_shader_ticks;
else
kbasep_pm_do_poweroff_cores(kbdev);
} else if (kbdev->pm.active_count == 0 && desired_bitmap != 0 &&
desired_tiler_bitmap != 0 &&
kbdev->pm.backend.poweroff_timer_needed) {
/* If power policy is keeping cores on despite there being no
* active contexts then disable poweroff timer as it isn't
* required.
* Only reset poweroff_timer_needed if we're not in the middle
* of the power off callback */
kbdev->pm.backend.poweroff_timer_needed = false;
}
/* Ensure timer does not power off wanted cores and make sure to power
* off unwanted cores */
if (kbdev->pm.backend.shader_poweroff_pending ||
kbdev->pm.backend.tiler_poweroff_pending) {
kbdev->pm.backend.shader_poweroff_pending &=
~(kbdev->pm.backend.desired_shader_state &
desired_bitmap);
kbdev->pm.backend.tiler_poweroff_pending &=
~(kbdev->pm.backend.desired_tiler_state &
desired_tiler_bitmap);
if (!kbdev->pm.backend.shader_poweroff_pending &&
!kbdev->pm.backend.tiler_poweroff_pending)
kbdev->pm.backend.shader_poweroff_pending_time = 0;
}
/* Shader poweroff is deferred to the end of the function, to eliminate
* issues caused by the core availability policy recursing into this
* function */
if (do_poweroff)
kbasep_pm_do_poweroff_cores(kbdev);
/* Don't need 'cores_are_available', because we don't return anything */
CSTD_UNUSED(cores_are_available);
}
void kbase_pm_update_cores_state(struct kbase_device *kbdev)
{
unsigned long flags;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbase_pm_update_cores_state_nolock(kbdev);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
int kbase_pm_list_policies(const struct kbase_pm_policy * const **list)
{
if (!list)
return POLICY_COUNT;
*list = policy_list;
return POLICY_COUNT;
}
KBASE_EXPORT_TEST_API(kbase_pm_list_policies);
const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev)
{
KBASE_DEBUG_ASSERT(kbdev != NULL);
return kbdev->pm.backend.pm_current_policy;
}
KBASE_EXPORT_TEST_API(kbase_pm_get_policy);
void kbase_pm_set_policy(struct kbase_device *kbdev,
const struct kbase_pm_policy *new_policy)
{
struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
const struct kbase_pm_policy *old_policy;
unsigned long flags;
KBASE_DEBUG_ASSERT(kbdev != NULL);
KBASE_DEBUG_ASSERT(new_policy != NULL);
KBASE_TRACE_ADD(kbdev, PM_SET_POLICY, NULL, NULL, 0u, new_policy->id);
/* During a policy change we pretend the GPU is active */
/* A suspend won't happen here, because we're in a syscall from a
* userspace thread */
kbase_pm_context_active(kbdev);
mutex_lock(&js_devdata->runpool_mutex);
mutex_lock(&kbdev->pm.lock);
/* Remove the policy to prevent IRQ handlers from working on it */
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
old_policy = kbdev->pm.backend.pm_current_policy;
kbdev->pm.backend.pm_current_policy = NULL;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, NULL, 0u,
old_policy->id);
if (old_policy->term)
old_policy->term(kbdev);
KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, NULL, 0u,
new_policy->id);
if (new_policy->init)
new_policy->init(kbdev);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbdev->pm.backend.pm_current_policy = new_policy;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
/* If any core power state changes were previously attempted, but
* couldn't be made because the policy was changing (current_policy was
* NULL), then re-try them here. */
kbase_pm_update_active(kbdev);
kbase_pm_update_cores_state(kbdev);
mutex_unlock(&kbdev->pm.lock);
mutex_unlock(&js_devdata->runpool_mutex);
/* Now the policy change is finished, we release our fake context active
* reference */
kbase_pm_context_idle(kbdev);
}
KBASE_EXPORT_TEST_API(kbase_pm_set_policy);
/* Check whether a state change has finished, and trace it as completed */
static void
kbase_pm_trace_check_and_finish_state_change(struct kbase_device *kbdev)
{
if ((kbdev->shader_available_bitmap &
kbdev->pm.backend.desired_shader_state)
== kbdev->pm.backend.desired_shader_state &&
(kbdev->tiler_available_bitmap &
kbdev->pm.backend.desired_tiler_state)
== kbdev->pm.backend.desired_tiler_state)
kbase_timeline_pm_check_handle_event(kbdev,
KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
}
void kbase_pm_request_cores(struct kbase_device *kbdev,
bool tiler_required, u64 shader_cores)
{
u64 cores;
kbase_pm_change_state change_gpu_state = 0u;
KBASE_DEBUG_ASSERT(kbdev != NULL);
lockdep_assert_held(&kbdev->hwaccess_lock);
cores = shader_cores;
while (cores) {
int bitnum = fls64(cores) - 1;
u64 bit = 1ULL << bitnum;
/* It should be almost impossible for this to overflow. It would
* require 2^32 atoms to request a particular core, which would
* require 2^24 contexts to submit. This would require an amount
* of memory that is impossible on a 32-bit system and extremely
* unlikely on a 64-bit system. */
int cnt = ++kbdev->shader_needed_cnt[bitnum];
if (1 == cnt) {
kbdev->shader_needed_bitmap |= bit;
change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
}
cores &= ~bit;
}
if (tiler_required) {
int cnt = ++kbdev->tiler_needed_cnt;
if (1 == cnt)
change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt != 0);
}
if (change_gpu_state) {
KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_SHADER_NEEDED, NULL,
NULL, 0u, (u32) kbdev->shader_needed_bitmap);
kbase_timeline_pm_cores_func(kbdev,
KBASE_PM_FUNC_ID_REQUEST_CORES_START,
change_gpu_state);
kbase_pm_update_cores_state_nolock(kbdev);
kbase_timeline_pm_cores_func(kbdev,
KBASE_PM_FUNC_ID_REQUEST_CORES_END,
change_gpu_state);
}
}
KBASE_EXPORT_TEST_API(kbase_pm_request_cores);
void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
bool tiler_required, u64 shader_cores)
{
kbase_pm_change_state change_gpu_state = 0u;
KBASE_DEBUG_ASSERT(kbdev != NULL);
lockdep_assert_held(&kbdev->hwaccess_lock);
while (shader_cores) {
int bitnum = fls64(shader_cores) - 1;
u64 bit = 1ULL << bitnum;
int cnt;
KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
cnt = --kbdev->shader_needed_cnt[bitnum];
if (0 == cnt) {
kbdev->shader_needed_bitmap &= ~bit;
change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
}
shader_cores &= ~bit;
}
if (tiler_required) {
int cnt;
KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
cnt = --kbdev->tiler_needed_cnt;
if (0 == cnt)
change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
}
if (change_gpu_state) {
KBASE_TRACE_ADD(kbdev, PM_UNREQUEST_CHANGE_SHADER_NEEDED, NULL,
NULL, 0u, (u32) kbdev->shader_needed_bitmap);
kbase_pm_update_cores_state_nolock(kbdev);
/* Trace that any state change effectively completes immediately
* - no-one will wait on the state change */
kbase_pm_trace_check_and_finish_state_change(kbdev);
}
}
KBASE_EXPORT_TEST_API(kbase_pm_unrequest_cores);
enum kbase_pm_cores_ready
kbase_pm_register_inuse_cores(struct kbase_device *kbdev,
bool tiler_required, u64 shader_cores)
{
u64 prev_shader_needed; /* Just for tracing */
u64 prev_shader_inuse; /* Just for tracing */
lockdep_assert_held(&kbdev->hwaccess_lock);
prev_shader_needed = kbdev->shader_needed_bitmap;
prev_shader_inuse = kbdev->shader_inuse_bitmap;
/* If desired_shader_state does not contain the requested cores, then
* power management is not attempting to powering those cores (most
* likely due to core availability policy) and a new job affinity must
* be chosen */
if ((kbdev->pm.backend.desired_shader_state & shader_cores) !=
shader_cores) {
return (kbdev->pm.backend.poweroff_wait_in_progress ||
kbdev->pm.backend.pm_current_policy == NULL) ?
KBASE_CORES_NOT_READY : KBASE_NEW_AFFINITY;
}
if ((kbdev->shader_available_bitmap & shader_cores) != shader_cores ||
(tiler_required && !kbdev->tiler_available_bitmap)) {
/* Trace ongoing core transition */
kbase_timeline_pm_l2_transition_start(kbdev);
return KBASE_CORES_NOT_READY;
}
/* If we started to trace a state change, then trace it has being
* finished by now, at the very latest */
kbase_pm_trace_check_and_finish_state_change(kbdev);
/* Trace core transition done */
kbase_timeline_pm_l2_transition_done(kbdev);
while (shader_cores) {
int bitnum = fls64(shader_cores) - 1;
u64 bit = 1ULL << bitnum;
int cnt;
KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
cnt = --kbdev->shader_needed_cnt[bitnum];
if (0 == cnt)
kbdev->shader_needed_bitmap &= ~bit;
/* shader_inuse_cnt should not overflow because there can only
* be a very limited number of jobs on the h/w at one time */
kbdev->shader_inuse_cnt[bitnum]++;
kbdev->shader_inuse_bitmap |= bit;
shader_cores &= ~bit;
}
if (tiler_required) {
KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
--kbdev->tiler_needed_cnt;
kbdev->tiler_inuse_cnt++;
KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt != 0);
}
if (prev_shader_needed != kbdev->shader_needed_bitmap)
KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_NEEDED, NULL,
NULL, 0u, (u32) kbdev->shader_needed_bitmap);
if (prev_shader_inuse != kbdev->shader_inuse_bitmap)
KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_INUSE, NULL,
NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
return KBASE_CORES_READY;
}
KBASE_EXPORT_TEST_API(kbase_pm_register_inuse_cores);
void kbase_pm_release_cores(struct kbase_device *kbdev,
bool tiler_required, u64 shader_cores)
{
kbase_pm_change_state change_gpu_state = 0u;
KBASE_DEBUG_ASSERT(kbdev != NULL);
lockdep_assert_held(&kbdev->hwaccess_lock);
while (shader_cores) {
int bitnum = fls64(shader_cores) - 1;
u64 bit = 1ULL << bitnum;
int cnt;
KBASE_DEBUG_ASSERT(kbdev->shader_inuse_cnt[bitnum] > 0);
cnt = --kbdev->shader_inuse_cnt[bitnum];
if (0 == cnt) {
kbdev->shader_inuse_bitmap &= ~bit;
change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
}
shader_cores &= ~bit;
}
if (tiler_required) {
int cnt;
KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt > 0);
cnt = --kbdev->tiler_inuse_cnt;
if (0 == cnt)
change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
}
if (change_gpu_state) {
KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_INUSE, NULL,
NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
kbase_timeline_pm_cores_func(kbdev,
KBASE_PM_FUNC_ID_RELEASE_CORES_START,
change_gpu_state);
kbase_pm_update_cores_state_nolock(kbdev);
kbase_timeline_pm_cores_func(kbdev,
KBASE_PM_FUNC_ID_RELEASE_CORES_END,
change_gpu_state);
/* Trace that any state change completed immediately */
kbase_pm_trace_check_and_finish_state_change(kbdev);
}
}
KBASE_EXPORT_TEST_API(kbase_pm_release_cores);
void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
bool tiler_required,
u64 shader_cores)
{
unsigned long flags;
kbase_pm_wait_for_poweroff_complete(kbdev);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbase_pm_request_cores(kbdev, tiler_required, shader_cores);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
kbase_pm_check_transitions_sync(kbdev);
}
KBASE_EXPORT_TEST_API(kbase_pm_request_cores_sync);
void kbase_pm_request_l2_caches(struct kbase_device *kbdev)
{
unsigned long flags;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbdev->l2_users_count++;
KBASE_DEBUG_ASSERT(kbdev->l2_users_count != 0);
/* Check for the required L2 transitions.
* Caller would block here for the L2 caches of all core groups to be
* powered on, so need to inform the Hw to power up all the L2 caches.
* Can't rely on the l2_users_count value being non-zero previously to
* avoid checking for the transition, as the count could be non-zero
* even if not all the instances of L2 cache are powered up since
* currently the power status of L2 is not tracked separately for each
* core group. Also if the GPU is reset while the L2 is on, L2 will be
* off but the count will be non-zero.
*/
kbase_pm_check_transitions_nolock(kbdev);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
wait_event(kbdev->pm.backend.l2_powered_wait,
kbdev->pm.backend.l2_powered == 1);
/* Trace that any state change completed immediately */
kbase_pm_trace_check_and_finish_state_change(kbdev);
}
KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches);
void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev)
{
lockdep_assert_held(&kbdev->hwaccess_lock);
kbdev->l2_users_count++;
}
KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches_l2_is_on);
void kbase_pm_release_l2_caches(struct kbase_device *kbdev)
{
lockdep_assert_held(&kbdev->hwaccess_lock);
KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0);
--kbdev->l2_users_count;
if (!kbdev->l2_users_count) {
kbase_pm_check_transitions_nolock(kbdev);
/* Trace that any state change completed immediately */
kbase_pm_trace_check_and_finish_state_change(kbdev);
}
}
KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches);

View File

@@ -0,0 +1,232 @@
/*
*
* (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* Power policy API definitions
*/
#ifndef _KBASE_PM_POLICY_H_
#define _KBASE_PM_POLICY_H_
/**
* kbase_pm_policy_init - Initialize power policy framework
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Must be called before calling any other policy function
*
* Return: 0 if the power policy framework was successfully
* initialized, -errno otherwise.
*/
int kbase_pm_policy_init(struct kbase_device *kbdev);
/**
* kbase_pm_policy_term - Terminate power policy framework
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_policy_term(struct kbase_device *kbdev);
/**
* kbase_pm_update_active - Update the active power state of the GPU
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Calls into the current power policy
*/
void kbase_pm_update_active(struct kbase_device *kbdev);
/**
* kbase_pm_update_cores - Update the desired core state of the GPU
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Calls into the current power policy
*/
void kbase_pm_update_cores(struct kbase_device *kbdev);
enum kbase_pm_cores_ready {
KBASE_CORES_NOT_READY = 0,
KBASE_NEW_AFFINITY = 1,
KBASE_CORES_READY = 2
};
/**
* kbase_pm_request_cores_sync - Synchronous variant of kbase_pm_request_cores()
*
* @kbdev: The kbase device structure for the device
* @tiler_required: true if the tiler is required, false otherwise
* @shader_cores: A bitmask of shader cores which are necessary for the job
*
* When this function returns, the @shader_cores will be in the READY state.
*
* This is safe variant of kbase_pm_check_transitions_sync(): it handles the
* work of ensuring the requested cores will remain powered until a matching
* call to kbase_pm_unrequest_cores()/kbase_pm_release_cores() (as appropriate)
* is made.
*/
void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
bool tiler_required, u64 shader_cores);
/**
* kbase_pm_request_cores - Mark one or more cores as being required
* for jobs to be submitted
*
* @kbdev: The kbase device structure for the device
* @tiler_required: true if the tiler is required, false otherwise
* @shader_cores: A bitmask of shader cores which are necessary for the job
*
* This function is called by the job scheduler to mark one or more cores as
* being required to submit jobs that are ready to run.
*
* The cores requested are reference counted and a subsequent call to
* kbase_pm_register_inuse_cores() or kbase_pm_unrequest_cores() should be
* made to dereference the cores as being 'needed'.
*
* The active power policy will meet or exceed the requirements of the
* requested cores in the system. Any core transitions needed will be begun
* immediately, but they might not complete/the cores might not be available
* until a Power Management IRQ.
*
* Return: 0 if the cores were successfully requested, or -errno otherwise.
*/
void kbase_pm_request_cores(struct kbase_device *kbdev,
bool tiler_required, u64 shader_cores);
/**
* kbase_pm_unrequest_cores - Unmark one or more cores as being required for
* jobs to be submitted.
*
* @kbdev: The kbase device structure for the device
* @tiler_required: true if the tiler is required, false otherwise
* @shader_cores: A bitmask of shader cores (as given to
* kbase_pm_request_cores() )
*
* This function undoes the effect of kbase_pm_request_cores(). It should be
* used when a job is not going to be submitted to the hardware (e.g. the job is
* cancelled before it is enqueued).
*
* The active power policy will meet or exceed the requirements of the
* requested cores in the system. Any core transitions needed will be begun
* immediately, but they might not complete until a Power Management IRQ.
*
* The policy may use this as an indication that it can power down cores.
*/
void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
bool tiler_required, u64 shader_cores);
/**
* kbase_pm_register_inuse_cores - Register a set of cores as in use by a job
*
* @kbdev: The kbase device structure for the device
* @tiler_required: true if the tiler is required, false otherwise
* @shader_cores: A bitmask of shader cores (as given to
* kbase_pm_request_cores() )
*
* This function should be called after kbase_pm_request_cores() when the job
* is about to be submitted to the hardware. It will check that the necessary
* cores are available and if so update the 'needed' and 'inuse' bitmasks to
* reflect that the job is now committed to being run.
*
* If the necessary cores are not currently available then the function will
* return %KBASE_CORES_NOT_READY and have no effect.
*
* Return: %KBASE_CORES_NOT_READY if the cores are not immediately ready,
*
* %KBASE_NEW_AFFINITY if the affinity requested is not allowed,
*
* %KBASE_CORES_READY if the cores requested are already available
*/
enum kbase_pm_cores_ready kbase_pm_register_inuse_cores(
struct kbase_device *kbdev,
bool tiler_required,
u64 shader_cores);
/**
* kbase_pm_release_cores - Release cores after a job has run
*
* @kbdev: The kbase device structure for the device
* @tiler_required: true if the tiler is required, false otherwise
* @shader_cores: A bitmask of shader cores (as given to
* kbase_pm_register_inuse_cores() )
*
* This function should be called when a job has finished running on the
* hardware. A call to kbase_pm_register_inuse_cores() must have previously
* occurred. The reference counts of the specified cores will be decremented
* which may cause the bitmask of 'inuse' cores to be reduced. The power policy
* may then turn off any cores which are no longer 'inuse'.
*/
void kbase_pm_release_cores(struct kbase_device *kbdev,
bool tiler_required, u64 shader_cores);
/**
* kbase_pm_request_l2_caches - Request l2 caches
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Request the use of l2 caches for all core groups, power up, wait and prevent
* the power manager from powering down the l2 caches.
*
* This tells the power management that the caches should be powered up, and
* they should remain powered, irrespective of the usage of shader cores. This
* does not return until the l2 caches are powered up.
*
* The caller must call kbase_pm_release_l2_caches() when they are finished
* to allow normal power management of the l2 caches to resume.
*
* This should only be used when power management is active.
*/
void kbase_pm_request_l2_caches(struct kbase_device *kbdev);
/**
* kbase_pm_request_l2_caches_l2_is_on - Request l2 caches but don't power on
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Increment the count of l2 users but do not attempt to power on the l2
*
* It is the callers responsibility to ensure that the l2 is already powered up
* and to eventually call kbase_pm_release_l2_caches()
*/
void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev);
/**
* kbase_pm_request_l2_caches - Release l2 caches
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Release the use of l2 caches for all core groups and allow the power manager
* to power them down when necessary.
*
* This tells the power management that the caches can be powered down if
* necessary, with respect to the usage of shader cores.
*
* The caller must have called kbase_pm_request_l2_caches() prior to a call
* to this.
*
* This should only be used when power management is active.
*/
void kbase_pm_release_l2_caches(struct kbase_device *kbdev);
#endif /* _KBASE_PM_POLICY_H_ */

View File

@@ -0,0 +1,108 @@
/*
*
* (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#include <mali_kbase.h>
#include <mali_kbase_hwaccess_time.h>
#include <backend/gpu/mali_kbase_device_internal.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
u64 *system_time, struct timespec *ts)
{
u32 hi1, hi2;
kbase_pm_request_gpu_cycle_counter(kbdev);
/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
* correctly */
do {
hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
NULL);
*cycle_counter = kbase_reg_read(kbdev,
GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
NULL);
*cycle_counter |= (((u64) hi1) << 32);
} while (hi1 != hi2);
/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
* correctly */
do {
hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
NULL);
*system_time = kbase_reg_read(kbdev,
GPU_CONTROL_REG(TIMESTAMP_LO), NULL);
hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
NULL);
*system_time |= (((u64) hi1) << 32);
} while (hi1 != hi2);
/* Record the CPU's idea of current time */
getrawmonotonic(ts);
kbase_pm_release_gpu_cycle_counter(kbdev);
}
/**
* kbase_wait_write_flush - Wait for GPU write flush
* @kctx: Context pointer
*
* Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
* its write buffer.
*
* Only in use for BASE_HW_ISSUE_6367
*
* Note : If GPU resets occur then the counters are reset to zero, the delay may
* not be as expected.
*/
#ifndef CONFIG_MALI_NO_MALI
void kbase_wait_write_flush(struct kbase_context *kctx)
{
u32 base_count = 0;
/*
* The caller must be holding onto the kctx or the call is from
* userspace.
*/
kbase_pm_context_active(kctx->kbdev);
kbase_pm_request_gpu_cycle_counter(kctx->kbdev);
while (true) {
u32 new_count;
new_count = kbase_reg_read(kctx->kbdev,
GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
/* First time around, just store the count. */
if (base_count == 0) {
base_count = new_count;
continue;
}
/* No need to handle wrapping, unsigned maths works for this. */
if ((new_count - base_count) > 1000)
break;
}
kbase_pm_release_gpu_cycle_counter(kctx->kbdev);
kbase_pm_context_idle(kctx->kbdev);
}
#endif /* CONFIG_MALI_NO_MALI */

View File

@@ -0,0 +1,57 @@
/*
*
* (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#ifndef _KBASE_BACKEND_TIME_H_
#define _KBASE_BACKEND_TIME_H_
/**
* kbase_backend_get_gpu_time() - Get current GPU time
* @kbdev: Device pointer
* @cycle_counter: Pointer to u64 to store cycle counter in
* @system_time: Pointer to u64 to store system time in
* @ts: Pointer to struct timespec to store current monotonic
* time in
*/
void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
u64 *system_time, struct timespec *ts);
/**
* kbase_wait_write_flush() - Wait for GPU write flush
* @kctx: Context pointer
*
* Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
* its write buffer.
*
* If GPU resets occur then the counters are reset to zero, the delay may not be
* as expected.
*
* This function is only in use for BASE_HW_ISSUE_6367
*/
#ifdef CONFIG_MALI_NO_MALI
static inline void kbase_wait_write_flush(struct kbase_context *kctx)
{
}
#else
void kbase_wait_write_flush(struct kbase_context *kctx);
#endif
#endif /* _KBASE_BACKEND_TIME_H_ */

101
drivers/gpu/arm/midgard/build.bp Executable file
View File

@@ -0,0 +1,101 @@
/*
* Copyright:
* ----------------------------------------------------------------------------
* This confidential and proprietary software may be used only as authorized
* by a licensing agreement from ARM Limited.
* (C) COPYRIGHT 2017-2018 ARM Limited, ALL RIGHTS RESERVED
* The entire notice above must be reproduced on all authorized copies and
* copies may only be made to the extent permitted by a licensing agreement
* from ARM Limited.
* ----------------------------------------------------------------------------
*/
/* Kernel-side tests may include mali_kbase's headers. Therefore any config
* options which affect the sizes of any structs (e.g. adding extra members)
* must be included in these defaults, so that the structs are consistent in
* both mali_kbase and the test modules. */
bob_defaults {
name: "mali_kbase_shared_config_defaults",
no_mali: {
kbuild_options: ["CONFIG_MALI_NO_MALI=y"],
},
mali_corestack: {
kbuild_options: ["CONFIG_MALI_CORESTACK=y"],
},
mali_devfreq: {
kbuild_options: ["CONFIG_MALI_DEVFREQ=y"],
},
mali_midgard_dvfs: {
kbuild_options: ["CONFIG_MALI_MIDGARD_DVFS=y"],
},
mali_trace_timeline: {
kbuild_options: ["CONFIG_MALI_TRACE_TIMELINE=y"],
},
mali_debug: {
kbuild_options: ["CONFIG_MALI_DEBUG=y"],
},
mali_fpga_bus_logger: {
kbuild_options: ["CONFIG_MALI_FPGA_BUS_LOGGER=y"],
},
cinstr_job_dump: {
kbuild_options: ["CONFIG_MALI_JOB_DUMP=y"],
},
mali_gator_support: {
kbuild_options: ["CONFIG_MALI_GATOR_SUPPORT=y"],
},
mali_system_trace: {
kbuild_options: ["CONFIG_MALI_SYSTEM_TRACE=y"],
},
kbuild_options: [
"MALI_UNIT_TEST={{.unit_test_code}}",
"MALI_CUSTOMER_RELEASE={{.release}}",
"MALI_KERNEL_TEST_API={{.debug}}",
],
defaults: ["kernel_defaults"],
}
bob_kernel_module {
name: "mali_kbase",
srcs: [
"*.c",
"*.h",
"Kbuild",
"backend/gpu/*.c",
"backend/gpu/*.h",
"backend/gpu/Kbuild",
"ipa/*.c",
"ipa/*.h",
"ipa/Kbuild",
"platform/*.h",
"platform/*/*.c",
"platform/*/*.h",
"platform/*/Kbuild",
"thirdparty/*.c",
],
kbuild_options: [
"CONFIG_MALI_KUTF=n",
"CONFIG_MALI_MIDGARD=m",
"CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}",
"CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}",
"MALI_KERNEL_TEST_API={{.unit_test_code}}",
"MALI_MOCK_TEST={{.mali_mock_test}}",
],
mali_error_inject: {
kbuild_options: ["CONFIG_MALI_ERROR_INJECT=y"],
},
mali_error_inject_random: {
kbuild_options: ["CONFIG_MALI_ERROR_INJECT_RANDOM=y"],
},
mali_prfcnt_set_secondary: {
kbuild_options: ["CONFIG_MALI_PRFCNT_SET_SECONDARY=y"],
},
mali_2mb_alloc: {
kbuild_options: ["CONFIG_MALI_2MB_ALLOC=y"],
},
mali_mock_test: {
srcs: ["tests/internal/src/mock/mali_kbase_pm_driver_mock.c"],
},
defaults: ["mali_kbase_shared_config_defaults"],
}
optional_subdirs = ["tests"]

View File

@@ -0,0 +1,132 @@
#
# (C) COPYRIGHT 2011-2013, 2015, 2017 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
# Foundation, and any use by you of this program is subject to the terms
# of such GNU licence.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, you can access it online at
# http://www.gnu.org/licenses/gpl-2.0.html.
#
# SPDX-License-Identifier: GPL-2.0
#
#
##############################################################################
# This file contains per-module Doxygen configuration. Please do not add
# extra settings to this file without consulting all stakeholders, as they
# may cause override project-wide settings.
#
# Additionally, when defining aliases, macros, sections etc, use the module
# name as a prefix e.g. gles_my_alias.
##############################################################################
@INCLUDE = ../../bldsys/Doxyfile_common
# The INPUT tag can be used to specify the files and/or directories that contain
# documented source files. You may enter file names like "myfile.cpp" or
# directories like "/usr/src/myproject". Separate the files or directories
# with spaces.
INPUT += ../../kernel/drivers/gpu/arm/midgard/
##############################################################################
# Everything below here is optional, and in most cases not required
##############################################################################
# This tag can be used to specify a number of aliases that acts
# as commands in the documentation. An alias has the form "name=value".
# For example adding "sideeffect=\par Side Effects:\n" will allow you to
# put the command \sideeffect (or @sideeffect) in the documentation, which
# will result in a user-defined paragraph with heading "Side Effects:".
# You can put \n's in the value part of an alias to insert newlines.
ALIASES +=
# The ENABLED_SECTIONS tag can be used to enable conditional
# documentation sections, marked by \if sectionname ... \endif.
ENABLED_SECTIONS +=
# If the value of the INPUT tag contains directories, you can use the
# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
# and *.h) to filter out the source-files in the directories. If left
# blank the following patterns are tested:
# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
FILE_PATTERNS +=
# The EXCLUDE tag can be used to specify files and/or directories that should
# excluded from the INPUT source files. This way you can easily exclude a
# subdirectory from a directory tree whose root is specified with the INPUT tag.
EXCLUDE += ../../kernel/drivers/gpu/arm/midgard/platform ../../kernel/drivers/gpu/arm/midgard/platform_dummy ../../kernel/drivers/gpu/arm/midgard/scripts ../../kernel/drivers/gpu/arm/midgard/tests ../../kernel/drivers/gpu/arm/midgard/Makefile ../../kernel/drivers/gpu/arm/midgard/Makefile.kbase ../../kernel/drivers/gpu/arm/midgard/Kbuild ../../kernel/drivers/gpu/arm/midgard/Kconfig ../../kernel/drivers/gpu/arm/midgard/sconscript ../../kernel/drivers/gpu/arm/midgard/docs ../../kernel/drivers/gpu/arm/midgard/mali_uk.h ../../kernel/drivers/gpu/arm/midgard/Makefile
# If the value of the INPUT tag contains directories, you can use the
# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
# certain files from those directories. Note that the wildcards are matched
# against the file with absolute path, so to exclude all test directories
# for example use the pattern */test/*
EXCLUDE_PATTERNS +=
# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
# (namespaces, classes, functions, etc.) that should be excluded from the
# output. The symbol name can be a fully qualified name, a word, or if the
# wildcard * is used, a substring. Examples: ANamespace, AClass,
# AClass::ANamespace, ANamespace::*Test
EXCLUDE_SYMBOLS +=
# The EXAMPLE_PATH tag can be used to specify one or more files or
# directories that contain example code fragments that are included (see
# the \include command).
EXAMPLE_PATH +=
# The IMAGE_PATH tag can be used to specify one or more files or
# directories that contain image that are included in the documentation (see
# the \image command).
IMAGE_PATH +=
# The INCLUDE_PATH tag can be used to specify one or more directories that
# contain include files that are not input files but should be processed by
# the preprocessor.
INCLUDE_PATH +=
# The PREDEFINED tag can be used to specify one or more macro names that
# are defined before the preprocessor is started (similar to the -D option of
# gcc). The argument of the tag is a list of macros of the form: name
# or name=definition (no spaces). If the definition and the = are
# omitted =1 is assumed. To prevent a macro definition from being
# undefined via #undef or recursively expanded use the := operator
# instead of the = operator.
PREDEFINED +=
# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
# this tag can be used to specify a list of macro names that should be expanded.
# The macro definition that is found in the sources will be used.
# Use the PREDEFINED tag if you want to use a different macro definition.
EXPAND_AS_DEFINED +=
# The DOTFILE_DIRS tag can be used to specify one or more directories that
# contain dot files that are included in the documentation (see the
# \dotfile command).
DOTFILE_DIRS += ../../kernel/drivers/gpu/arm/midgard/docs

View File

@@ -0,0 +1,117 @@
/*
*
* (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
digraph policy_objects_diagram {
rankdir=LR;
size="12,8";
compound=true;
node [ shape = box ];
subgraph cluster_policy_queues {
low_queue [ shape=record label = "LowP | {<ql>ctx_lo | ... | <qm>ctx_i | ... | <qr>ctx_hi}" ];
queues_middle_sep [ label="" shape=plaintext width=0 height=0 ];
rt_queue [ shape=record label = "RT | {<ql>ctx_lo | ... | <qm>ctx_j | ... | <qr>ctx_hi}" ];
label = "Policy's Queue(s)";
}
call_enqueue [ shape=plaintext label="enqueue_ctx()" ];
{
rank=same;
ordering=out;
call_dequeue [ shape=plaintext label="dequeue_head_ctx()\n+ runpool_add_ctx()" ];
call_ctxfinish [ shape=plaintext label="runpool_remove_ctx()" ];
call_ctxdone [ shape=plaintext label="don't requeue;\n/* ctx has no more jobs */" ];
}
subgraph cluster_runpool {
as0 [ width=2 height = 0.25 label="AS0: Job_1, ..., Job_n" ];
as1 [ width=2 height = 0.25 label="AS1: Job_1, ..., Job_m" ];
as2 [ width=2 height = 0.25 label="AS2: Job_1, ..., Job_p" ];
as3 [ width=2 height = 0.25 label="AS3: Job_1, ..., Job_q" ];
label = "Policy's Run Pool";
}
{
rank=same;
call_jdequeue [ shape=plaintext label="dequeue_job()" ];
sstop_dotfixup [ shape=plaintext label="" width=0 height=0 ];
}
{
rank=same;
ordering=out;
sstop [ shape=ellipse label="SS-Timer expires" ]
jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
irq [ label="IRQ" shape=ellipse ];
job_finish [ shape=plaintext label="don't requeue;\n/* job done */" ];
}
hstop [ shape=ellipse label="HS-Timer expires" ]
/*
* Edges
*/
call_enqueue -> queues_middle_sep [ lhead=cluster_policy_queues ];
low_queue:qr -> call_dequeue:w;
rt_queue:qr -> call_dequeue:w;
call_dequeue -> as1 [lhead=cluster_runpool];
as1->call_jdequeue [ltail=cluster_runpool];
call_jdequeue->jobslots:0;
call_jdequeue->sstop_dotfixup [ arrowhead=none];
sstop_dotfixup->sstop [label="Spawn SS-Timer"];
sstop->jobslots [label="SoftStop"];
sstop->hstop [label="Spawn HS-Timer"];
hstop->jobslots:ne [label="HardStop"];
as3->call_ctxfinish:ne [ ltail=cluster_runpool ];
call_ctxfinish:sw->rt_queue:qm [ lhead=cluster_policy_queues label="enqueue_ctx()\n/* ctx still has jobs */" ];
call_ctxfinish->call_ctxdone [constraint=false];
call_ctxdone->call_enqueue [weight=0.1 labeldistance=20.0 labelangle=0.0 taillabel="Job submitted to the ctx" style=dotted constraint=false];
{
jobslots->irq [constraint=false];
irq->job_finish [constraint=false];
}
irq->as2 [lhead=cluster_runpool label="requeue_job()\n/* timeslice expired */" ];
}

View File

@@ -0,0 +1,68 @@
/*
*
* (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
digraph policy_objects_diagram {
rankdir=LR
size="6,6"
compound=true;
node [ shape = box ];
call_enqueue [ shape=plaintext label="enqueue ctx" ];
policy_queue [ label="Policy's Queue" ];
{
rank=same;
runpool [ label="Policy's Run Pool" ];
ctx_finish [ label="ctx finished" ];
}
{
rank=same;
jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
job_finish [ label="Job finished" ];
}
/*
* Edges
*/
call_enqueue -> policy_queue;
policy_queue->runpool [label="dequeue ctx" weight=0.1];
runpool->policy_queue [label="requeue ctx" weight=0.1];
runpool->ctx_finish [ style=dotted ];
runpool->jobslots [label="dequeue job" weight=0.1];
jobslots->runpool [label="requeue job" weight=0.1];
jobslots->job_finish [ style=dotted ];
}

View File

@@ -0,0 +1,32 @@
#
# (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
# Foundation, and any use by you of this program is subject to the terms
# of such GNU licence.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, you can access it online at
# http://www.gnu.org/licenses/gpl-2.0.html.
#
# SPDX-License-Identifier: GPL-2.0
#
#
mali_kbase-y += \
ipa/mali_kbase_ipa_simple.o \
ipa/mali_kbase_ipa.o
mali_kbase-$(CONFIG_DEBUG_FS) += ipa/mali_kbase_ipa_debugfs.o
ifneq ($(wildcard $(src)/ipa/mali_kbase_ipa_vinstr_common.c),)
mali_kbase-y += \
ipa/mali_kbase_ipa_vinstr_g7x.o \
ipa/mali_kbase_ipa_vinstr_common.o
endif

View File

@@ -0,0 +1,632 @@
/*
*
* (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#include <linux/thermal.h>
#include <linux/devfreq_cooling.h>
#include <linux/of.h>
#include "mali_kbase.h"
#include "mali_kbase_ipa.h"
#include "mali_kbase_ipa_debugfs.h"
#include "mali_kbase_ipa_simple.h"
#include "backend/gpu/mali_kbase_pm_internal.h"
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0))
#include <linux/pm_opp.h>
#else
#include <linux/opp.h>
#define dev_pm_opp_find_freq_exact opp_find_freq_exact
#define dev_pm_opp_get_voltage opp_get_voltage
#define dev_pm_opp opp
#endif
#define KBASE_IPA_FALLBACK_MODEL_NAME "mali-simple-power-model"
#define KBASE_IPA_G71_MODEL_NAME "mali-g71-power-model"
#define KBASE_IPA_G72_MODEL_NAME "mali-g72-power-model"
#define KBASE_IPA_TNOX_MODEL_NAME "mali-tnox-power-model"
static struct kbase_ipa_model_ops *kbase_ipa_all_model_ops[] = {
&kbase_simple_ipa_model_ops,
&kbase_g71_ipa_model_ops,
&kbase_g72_ipa_model_ops,
&kbase_tnox_ipa_model_ops
};
int kbase_ipa_model_recalculate(struct kbase_ipa_model *model)
{
int err = 0;
lockdep_assert_held(&model->kbdev->ipa.lock);
if (model->ops->recalculate) {
err = model->ops->recalculate(model);
if (err) {
dev_err(model->kbdev->dev,
"recalculation of power model %s returned error %d\n",
model->ops->name, err);
}
}
return err;
}
static struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev,
const char *name)
{
int i;
for (i = 0; i < ARRAY_SIZE(kbase_ipa_all_model_ops); ++i) {
struct kbase_ipa_model_ops *ops = kbase_ipa_all_model_ops[i];
if (!strcmp(ops->name, name))
return ops;
}
dev_err(kbdev->dev, "power model \'%s\' not found\n", name);
return NULL;
}
void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev)
{
atomic_set(&kbdev->ipa_use_configured_model, false);
}
void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev)
{
atomic_set(&kbdev->ipa_use_configured_model, true);
}
const char *kbase_ipa_model_name_from_id(u32 gpu_id)
{
const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
GPU_ID_VERSION_PRODUCT_ID_SHIFT;
if (GPU_ID_IS_NEW_FORMAT(prod_id)) {
switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) {
case GPU_ID2_PRODUCT_TMIX:
return KBASE_IPA_G71_MODEL_NAME;
case GPU_ID2_PRODUCT_THEX:
return KBASE_IPA_G72_MODEL_NAME;
case GPU_ID2_PRODUCT_TNOX:
return KBASE_IPA_TNOX_MODEL_NAME;
case GPU_ID2_PRODUCT_TGOX:
if ((gpu_id & GPU_ID2_VERSION_MAJOR) ==
(0 << GPU_ID2_VERSION_MAJOR_SHIFT))
/* TGOX r0 shares a power model with TNOX */
return KBASE_IPA_TNOX_MODEL_NAME;
default:
return KBASE_IPA_FALLBACK_MODEL_NAME;
}
}
return KBASE_IPA_FALLBACK_MODEL_NAME;
}
static struct device_node *get_model_dt_node(struct kbase_ipa_model *model)
{
struct device_node *model_dt_node;
char compat_string[64];
snprintf(compat_string, sizeof(compat_string), "arm,%s",
model->ops->name);
/* of_find_compatible_node() will call of_node_put() on the root node,
* so take a reference on it first.
*/
of_node_get(model->kbdev->dev->of_node);
model_dt_node = of_find_compatible_node(model->kbdev->dev->of_node,
NULL, compat_string);
if (!model_dt_node && !model->missing_dt_node_warning) {
dev_warn(model->kbdev->dev,
"Couldn't find power_model DT node matching \'%s\'\n",
compat_string);
model->missing_dt_node_warning = true;
}
return model_dt_node;
}
int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model,
const char *name, s32 *addr,
size_t num_elems, bool dt_required)
{
int err, i;
struct device_node *model_dt_node = get_model_dt_node(model);
char *origin;
err = of_property_read_u32_array(model_dt_node, name, addr, num_elems);
/* We're done with model_dt_node now, so drop the reference taken in
* get_model_dt_node()/of_find_compatible_node().
*/
of_node_put(model_dt_node);
if (err && dt_required) {
memset(addr, 0, sizeof(s32) * num_elems);
dev_warn(model->kbdev->dev,
"Error %d, no DT entry: %s.%s = %zu*[0]\n",
err, model->ops->name, name, num_elems);
origin = "zero";
} else if (err && !dt_required) {
origin = "default";
} else /* !err */ {
origin = "DT";
}
/* Create a unique debugfs entry for each element */
for (i = 0; i < num_elems; ++i) {
char elem_name[32];
if (num_elems == 1)
snprintf(elem_name, sizeof(elem_name), "%s", name);
else
snprintf(elem_name, sizeof(elem_name), "%s.%d",
name, i);
dev_dbg(model->kbdev->dev, "%s.%s = %d (%s)\n",
model->ops->name, elem_name, addr[i], origin);
err = kbase_ipa_model_param_add(model, elem_name,
&addr[i], sizeof(s32),
PARAM_TYPE_S32);
if (err)
goto exit;
}
exit:
return err;
}
int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model,
const char *name, char *addr,
size_t size, bool dt_required)
{
int err;
struct device_node *model_dt_node = get_model_dt_node(model);
const char *string_prop_value;
char *origin;
err = of_property_read_string(model_dt_node, name,
&string_prop_value);
/* We're done with model_dt_node now, so drop the reference taken in
* get_model_dt_node()/of_find_compatible_node().
*/
of_node_put(model_dt_node);
if (err && dt_required) {
strncpy(addr, "", size - 1);
dev_warn(model->kbdev->dev,
"Error %d, no DT entry: %s.%s = \'%s\'\n",
err, model->ops->name, name, addr);
err = 0;
origin = "zero";
} else if (err && !dt_required) {
origin = "default";
} else /* !err */ {
strncpy(addr, string_prop_value, size - 1);
origin = "DT";
}
addr[size - 1] = '\0';
dev_dbg(model->kbdev->dev, "%s.%s = \'%s\' (%s)\n",
model->ops->name, name, string_prop_value, origin);
err = kbase_ipa_model_param_add(model, name, addr, size,
PARAM_TYPE_STRING);
return err;
}
void kbase_ipa_term_model(struct kbase_ipa_model *model)
{
if (!model)
return;
lockdep_assert_held(&model->kbdev->ipa.lock);
if (model->ops->term)
model->ops->term(model);
kbase_ipa_model_param_free_all(model);
kfree(model);
}
KBASE_EXPORT_TEST_API(kbase_ipa_term_model);
struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev,
struct kbase_ipa_model_ops *ops)
{
struct kbase_ipa_model *model;
int err;
lockdep_assert_held(&kbdev->ipa.lock);
if (!ops || !ops->name)
return NULL;
model = kzalloc(sizeof(struct kbase_ipa_model), GFP_KERNEL);
if (!model)
return NULL;
model->kbdev = kbdev;
model->ops = ops;
INIT_LIST_HEAD(&model->params);
err = model->ops->init(model);
if (err) {
dev_err(kbdev->dev,
"init of power model \'%s\' returned error %d\n",
ops->name, err);
kfree(model);
return NULL;
}
err = kbase_ipa_model_recalculate(model);
if (err) {
kbase_ipa_term_model(model);
return NULL;
}
return model;
}
KBASE_EXPORT_TEST_API(kbase_ipa_init_model);
static void kbase_ipa_term_locked(struct kbase_device *kbdev)
{
lockdep_assert_held(&kbdev->ipa.lock);
/* Clean up the models */
if (kbdev->ipa.configured_model != kbdev->ipa.fallback_model)
kbase_ipa_term_model(kbdev->ipa.configured_model);
kbase_ipa_term_model(kbdev->ipa.fallback_model);
kbdev->ipa.configured_model = NULL;
kbdev->ipa.fallback_model = NULL;
}
int kbase_ipa_init(struct kbase_device *kbdev)
{
const char *model_name;
struct kbase_ipa_model_ops *ops;
struct kbase_ipa_model *default_model = NULL;
int err;
mutex_init(&kbdev->ipa.lock);
/*
* Lock during init to avoid warnings from lockdep_assert_held (there
* shouldn't be any concurrent access yet).
*/
mutex_lock(&kbdev->ipa.lock);
/* The simple IPA model must *always* be present.*/
ops = kbase_ipa_model_ops_find(kbdev, KBASE_IPA_FALLBACK_MODEL_NAME);
default_model = kbase_ipa_init_model(kbdev, ops);
if (!default_model) {
err = -EINVAL;
goto end;
}
kbdev->ipa.fallback_model = default_model;
err = of_property_read_string(kbdev->dev->of_node,
"ipa-model",
&model_name);
if (err) {
/* Attempt to load a match from GPU-ID */
u32 gpu_id;
gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
model_name = kbase_ipa_model_name_from_id(gpu_id);
dev_dbg(kbdev->dev,
"Inferring model from GPU ID 0x%x: \'%s\'\n",
gpu_id, model_name);
err = 0;
} else {
dev_dbg(kbdev->dev,
"Using ipa-model parameter from DT: \'%s\'\n",
model_name);
}
if (strcmp(KBASE_IPA_FALLBACK_MODEL_NAME, model_name) != 0) {
ops = kbase_ipa_model_ops_find(kbdev, model_name);
kbdev->ipa.configured_model = kbase_ipa_init_model(kbdev, ops);
if (!kbdev->ipa.configured_model) {
dev_warn(kbdev->dev,
"Failed to initialize ipa-model: \'%s\'\n"
"Falling back on default model\n",
model_name);
kbdev->ipa.configured_model = default_model;
}
} else {
kbdev->ipa.configured_model = default_model;
}
kbase_ipa_model_use_configured_locked(kbdev);
end:
if (err)
kbase_ipa_term_locked(kbdev);
else
dev_info(kbdev->dev,
"Using configured power model %s, and fallback %s\n",
kbdev->ipa.configured_model->ops->name,
kbdev->ipa.fallback_model->ops->name);
mutex_unlock(&kbdev->ipa.lock);
return err;
}
KBASE_EXPORT_TEST_API(kbase_ipa_init);
void kbase_ipa_term(struct kbase_device *kbdev)
{
mutex_lock(&kbdev->ipa.lock);
kbase_ipa_term_locked(kbdev);
mutex_unlock(&kbdev->ipa.lock);
}
KBASE_EXPORT_TEST_API(kbase_ipa_term);
/**
* kbase_scale_dynamic_power() - Scale a dynamic power coefficient to an OPP
* @c: Dynamic model coefficient, in pW/(Hz V^2). Should be in range
* 0 < c < 2^26 to prevent overflow.
* @freq: Frequency, in Hz. Range: 2^23 < freq < 2^30 (~8MHz to ~1GHz)
* @voltage: Voltage, in mV. Range: 2^9 < voltage < 2^13 (~0.5V to ~8V)
*
* Keep a record of the approximate range of each value at every stage of the
* calculation, to ensure we don't overflow. This makes heavy use of the
* approximations 1000 = 2^10 and 1000000 = 2^20, but does the actual
* calculations in decimal for increased accuracy.
*
* Return: Power consumption, in mW. Range: 0 < p < 2^13 (0W to ~8W)
*/
static u32 kbase_scale_dynamic_power(const u32 c, const u32 freq,
const u32 voltage)
{
/* Range: 2^8 < v2 < 2^16 m(V^2) */
const u32 v2 = (voltage * voltage) / 1000;
/* Range: 2^3 < f_MHz < 2^10 MHz */
const u32 f_MHz = freq / 1000000;
/* Range: 2^11 < v2f_big < 2^26 kHz V^2 */
const u32 v2f_big = v2 * f_MHz;
/* Range: 2^1 < v2f < 2^16 MHz V^2 */
const u32 v2f = v2f_big / 1000;
/* Range (working backwards from next line): 0 < v2fc < 2^23 uW.
* Must be < 2^42 to avoid overflowing the return value. */
const u64 v2fc = (u64) c * (u64) v2f;
/* Range: 0 < v2fc / 1000 < 2^13 mW */
return div_u64(v2fc, 1000);
}
/**
* kbase_scale_static_power() - Scale a static power coefficient to an OPP
* @c: Static model coefficient, in uW/V^3. Should be in range
* 0 < c < 2^32 to prevent overflow.
* @voltage: Voltage, in mV. Range: 2^9 < voltage < 2^13 (~0.5V to ~8V)
*
* Return: Power consumption, in mW. Range: 0 < p < 2^13 (0W to ~8W)
*/
u32 kbase_scale_static_power(const u32 c, const u32 voltage)
{
/* Range: 2^8 < v2 < 2^16 m(V^2) */
const u32 v2 = (voltage * voltage) / 1000;
/* Range: 2^17 < v3_big < 2^29 m(V^2) mV */
const u32 v3_big = v2 * voltage;
/* Range: 2^7 < v3 < 2^19 m(V^3) */
const u32 v3 = v3_big / 1000;
/*
* Range (working backwards from next line): 0 < v3c_big < 2^33 nW.
* The result should be < 2^52 to avoid overflowing the return value.
*/
const u64 v3c_big = (u64) c * (u64) v3;
/* Range: 0 < v3c_big / 1000000 < 2^13 mW */
return div_u64(v3c_big, 1000000);
}
static struct kbase_ipa_model *get_current_model(struct kbase_device *kbdev)
{
lockdep_assert_held(&kbdev->ipa.lock);
if (atomic_read(&kbdev->ipa_use_configured_model))
return kbdev->ipa.configured_model;
else
return kbdev->ipa.fallback_model;
}
static u32 get_static_power_locked(struct kbase_device *kbdev,
struct kbase_ipa_model *model,
unsigned long voltage)
{
u32 power = 0;
int err;
u32 power_coeff;
lockdep_assert_held(&model->kbdev->ipa.lock);
if (!model->ops->get_static_coeff)
model = kbdev->ipa.fallback_model;
if (model->ops->get_static_coeff) {
err = model->ops->get_static_coeff(model, &power_coeff);
if (!err)
power = kbase_scale_static_power(power_coeff,
(u32) voltage);
}
return power;
}
#if defined(CONFIG_MALI_PWRSOFT_765) || \
LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
static unsigned long kbase_get_static_power(struct devfreq *df,
unsigned long voltage)
#else
static unsigned long kbase_get_static_power(unsigned long voltage)
#endif
{
struct kbase_ipa_model *model;
u32 power = 0;
#if defined(CONFIG_MALI_PWRSOFT_765) || \
LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
#else
struct kbase_device *kbdev = kbase_find_device(-1);
#endif
mutex_lock(&kbdev->ipa.lock);
model = get_current_model(kbdev);
power = get_static_power_locked(kbdev, model, voltage);
mutex_unlock(&kbdev->ipa.lock);
#if !(defined(CONFIG_MALI_PWRSOFT_765) || \
LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
kbase_release_device(kbdev);
#endif
return power;
}
#if defined(CONFIG_MALI_PWRSOFT_765) || \
LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
static unsigned long kbase_get_dynamic_power(struct devfreq *df,
unsigned long freq,
unsigned long voltage)
#else
static unsigned long kbase_get_dynamic_power(unsigned long freq,
unsigned long voltage)
#endif
{
struct kbase_ipa_model *model;
u32 power_coeff = 0, power = 0;
int err = 0;
#if defined(CONFIG_MALI_PWRSOFT_765) || \
LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
#else
struct kbase_device *kbdev = kbase_find_device(-1);
#endif
mutex_lock(&kbdev->ipa.lock);
model = kbdev->ipa.fallback_model;
err = model->ops->get_dynamic_coeff(model, &power_coeff);
if (!err)
power = kbase_scale_dynamic_power(power_coeff, freq, voltage);
else
dev_err_ratelimited(kbdev->dev,
"Model %s returned error code %d\n",
model->ops->name, err);
mutex_unlock(&kbdev->ipa.lock);
#if !(defined(CONFIG_MALI_PWRSOFT_765) || \
LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
kbase_release_device(kbdev);
#endif
return power;
}
int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power,
unsigned long freq,
unsigned long voltage)
{
struct kbase_ipa_model *model;
u32 power_coeff = 0;
int err = 0;
struct kbasep_pm_metrics diff;
u64 total_time;
lockdep_assert_held(&kbdev->ipa.lock);
kbase_pm_get_dvfs_metrics(kbdev, &kbdev->ipa.last_metrics, &diff);
model = get_current_model(kbdev);
err = model->ops->get_dynamic_coeff(model, &power_coeff);
/* If the counter model returns an error (e.g. switching back to
* protected mode and failing to read counters, or a counter sample
* with too few cycles), revert to the fallback model.
*/
if (err && model != kbdev->ipa.fallback_model) {
model = kbdev->ipa.fallback_model;
err = model->ops->get_dynamic_coeff(model, &power_coeff);
}
if (err)
return err;
*power = kbase_scale_dynamic_power(power_coeff, freq, voltage);
/* time_busy / total_time cannot be >1, so assigning the 64-bit
* result of div_u64 to *power cannot overflow.
*/
total_time = diff.time_busy + (u64) diff.time_idle;
*power = div_u64(*power * (u64) diff.time_busy,
max(total_time, 1ull));
*power += get_static_power_locked(kbdev, model, voltage);
return err;
}
KBASE_EXPORT_TEST_API(kbase_get_real_power_locked);
int kbase_get_real_power(struct devfreq *df, u32 *power,
unsigned long freq,
unsigned long voltage)
{
int ret;
struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
mutex_lock(&kbdev->ipa.lock);
ret = kbase_get_real_power_locked(kbdev, power, freq, voltage);
mutex_unlock(&kbdev->ipa.lock);
return ret;
}
KBASE_EXPORT_TEST_API(kbase_get_real_power);
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
struct devfreq_cooling_ops kbase_ipa_power_model_ops = {
#else
struct devfreq_cooling_power kbase_ipa_power_model_ops = {
#endif
.get_static_power = &kbase_get_static_power,
.get_dynamic_power = &kbase_get_dynamic_power,
#if defined(CONFIG_MALI_PWRSOFT_765) || \
LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
.get_real_power = &kbase_get_real_power,
#endif
};
KBASE_EXPORT_TEST_API(kbase_ipa_power_model_ops);

View File

@@ -0,0 +1,228 @@
/*
*
* (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#ifndef _KBASE_IPA_H_
#define _KBASE_IPA_H_
#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)
struct devfreq;
/**
* struct kbase_ipa_model - Object describing a particular IPA model.
* @kbdev: pointer to kbase device
* @model_data: opaque pointer to model specific data, accessed
* only by model specific methods.
* @ops: pointer to object containing model specific methods.
* @params: head of the list of debugfs params added for model
* @missing_dt_node_warning: flag to limit the matching power model DT not found
* warning to once.
*/
struct kbase_ipa_model {
struct kbase_device *kbdev;
void *model_data;
struct kbase_ipa_model_ops *ops;
struct list_head params;
bool missing_dt_node_warning;
};
/**
* kbase_ipa_model_add_param_s32 - Add an integer model parameter
* @model: pointer to IPA model
* @name: name of corresponding debugfs entry
* @addr: address where the value is stored
* @num_elems: number of elements (1 if not an array)
* @dt_required: if false, a corresponding devicetree entry is not required,
* and the current value will be used. If true, a warning is
* output and the data is zeroed
*
* Return: 0 on success, or an error code
*/
int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model,
const char *name, s32 *addr,
size_t num_elems, bool dt_required);
/**
* kbase_ipa_model_add_param_string - Add a string model parameter
* @model: pointer to IPA model
* @name: name of corresponding debugfs entry
* @addr: address where the value is stored
* @size: size, in bytes, of the value storage (so the maximum string
* length is size - 1)
* @dt_required: if false, a corresponding devicetree entry is not required,
* and the current value will be used. If true, a warning is
* output and the data is zeroed
*
* Return: 0 on success, or an error code
*/
int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model,
const char *name, char *addr,
size_t size, bool dt_required);
struct kbase_ipa_model_ops {
char *name;
/* The init, recalculate and term ops on the default model are always
* called. However, all the other models are only invoked if the model
* is selected in the device tree. Otherwise they are never
* initialized. Additional resources can be acquired by models in
* init(), however they must be terminated in the term().
*/
int (*init)(struct kbase_ipa_model *model);
/* Called immediately after init(), or when a parameter is changed, so
* that any coefficients derived from model parameters can be
* recalculated. */
int (*recalculate)(struct kbase_ipa_model *model);
void (*term)(struct kbase_ipa_model *model);
/*
* get_dynamic_coeff() - calculate dynamic power coefficient
* @model: pointer to model
* @coeffp: pointer to return value location
*
* Calculate a dynamic power coefficient, with units pW/(Hz V^2), which
* is then scaled by the IPA framework according to the current OPP's
* frequency and voltage.
*
* Return: 0 on success, or an error code.
*/
int (*get_dynamic_coeff)(struct kbase_ipa_model *model, u32 *coeffp);
/*
* get_static_coeff() - calculate static power coefficient
* @model: pointer to model
* @coeffp: pointer to return value location
*
* Calculate a static power coefficient, with units uW/(V^3), which is
* scaled by the IPA framework according to the current OPP's voltage.
*
* Return: 0 on success, or an error code.
*/
int (*get_static_coeff)(struct kbase_ipa_model *model, u32 *coeffp);
};
/**
* kbase_ipa_init - Initialize the IPA feature
* @kbdev: pointer to kbase device
*
* simple IPA power model is initialized as a fallback model and if that
* initialization fails then IPA is not used.
* The device tree is read for the name of ipa model to be used, by using the
* property string "ipa-model". If that ipa model is supported then it is
* initialized but if the initialization fails then simple power model is used.
*
* Return: 0 on success, negative -errno on error
*/
int kbase_ipa_init(struct kbase_device *kbdev);
/**
* kbase_ipa_term - Terminate the IPA feature
* @kbdev: pointer to kbase device
*
* Both simple IPA power model and model retrieved from device tree are
* terminated.
*/
void kbase_ipa_term(struct kbase_device *kbdev);
/**
* kbase_ipa_model_recalculate - Recalculate the model coefficients
* @model: pointer to the IPA model object, already initialized
*
* It shall be called immediately after the model has been initialized
* or when the model parameter has changed, so that any coefficients
* derived from parameters can be recalculated.
* Its a wrapper for the module specific recalculate() method.
*
* Return: 0 on success, negative -errno on error
*/
int kbase_ipa_model_recalculate(struct kbase_ipa_model *model);
/**
* kbase_ipa_init_model - Initilaize the particular IPA model
* @kbdev: pointer to the IPA model object, already initialized
* @ops: pointer to object containing model specific methods.
*
* Initialize the model corresponding to the @ops pointer passed.
* The init() method specified in @ops would be called.
*
* Return: pointer to kbase_ipa_model on success, NULL on error
*/
struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev,
struct kbase_ipa_model_ops *ops);
/**
* kbase_ipa_term_model - Terminate the particular IPA model
* @model: pointer to the IPA model object, already initialized
*
* Terminate the model, using the term() method.
* Module specific parameters would be freed.
*/
void kbase_ipa_term_model(struct kbase_ipa_model *model);
/* Switch to the fallback model */
void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev);
/* Switch to the model retrieved from device tree */
void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev);
extern struct kbase_ipa_model_ops kbase_g71_ipa_model_ops;
extern struct kbase_ipa_model_ops kbase_g72_ipa_model_ops;
extern struct kbase_ipa_model_ops kbase_tnox_ipa_model_ops;
#if MALI_UNIT_TEST
/**
* kbase_get_real_power() - get the real power consumption of the GPU
* @df: dynamic voltage and frequency scaling information for the GPU.
* @power: where to store the power consumption, in mW.
* @freq: a frequency, in HZ.
* @voltage: a voltage, in mV.
*
* This function is only exposed for use by unit tests. The returned value
* incorporates both static and dynamic power consumption.
*
* Return: 0 on success, or an error code.
*/
int kbase_get_real_power(struct devfreq *df, u32 *power,
unsigned long freq,
unsigned long voltage);
/* Called by kbase_get_real_power() to invoke the power models.
* Must be called with kbdev->ipa.lock held.
*/
int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power,
unsigned long freq,
unsigned long voltage);
#endif /* MALI_UNIT_TEST */
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
extern struct devfreq_cooling_ops kbase_ipa_power_model_ops;
#else
extern struct devfreq_cooling_power kbase_ipa_power_model_ops;
#endif
#else /* !(defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */
static inline void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev)
{ }
static inline void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev)
{ }
#endif /* (defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */
#endif

View File

@@ -0,0 +1,268 @@
/*
*
* (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#include <linux/debugfs.h>
#include <linux/list.h>
#include <linux/mutex.h>
#include "mali_kbase.h"
#include "mali_kbase_ipa.h"
#include "mali_kbase_ipa_debugfs.h"
#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0))
#define DEFINE_DEBUGFS_ATTRIBUTE DEFINE_SIMPLE_ATTRIBUTE
#endif
struct kbase_ipa_model_param {
char *name;
union {
void *voidp;
s32 *s32p;
char *str;
} addr;
size_t size;
enum kbase_ipa_model_param_type type;
struct kbase_ipa_model *model;
struct list_head link;
};
static int param_int_get(void *data, u64 *val)
{
struct kbase_ipa_model_param *param = data;
mutex_lock(&param->model->kbdev->ipa.lock);
*(s64 *) val = *param->addr.s32p;
mutex_unlock(&param->model->kbdev->ipa.lock);
return 0;
}
static int param_int_set(void *data, u64 val)
{
struct kbase_ipa_model_param *param = data;
struct kbase_ipa_model *model = param->model;
s64 sval = (s64) val;
s32 old_val;
int err = 0;
if (sval < S32_MIN || sval > S32_MAX)
return -ERANGE;
mutex_lock(&param->model->kbdev->ipa.lock);
old_val = *param->addr.s32p;
*param->addr.s32p = val;
err = kbase_ipa_model_recalculate(model);
if (err < 0)
*param->addr.s32p = old_val;
mutex_unlock(&param->model->kbdev->ipa.lock);
return err;
}
DEFINE_DEBUGFS_ATTRIBUTE(fops_s32, param_int_get, param_int_set, "%lld\n");
static ssize_t param_string_get(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos)
{
struct kbase_ipa_model_param *param = file->private_data;
ssize_t ret;
size_t len;
mutex_lock(&param->model->kbdev->ipa.lock);
len = strnlen(param->addr.str, param->size - 1) + 1;
ret = simple_read_from_buffer(user_buf, count, ppos,
param->addr.str, len);
mutex_unlock(&param->model->kbdev->ipa.lock);
return ret;
}
static ssize_t param_string_set(struct file *file, const char __user *user_buf,
size_t count, loff_t *ppos)
{
struct kbase_ipa_model_param *param = file->private_data;
struct kbase_ipa_model *model = param->model;
char *old_str = NULL;
ssize_t ret = count;
size_t buf_size;
int err;
mutex_lock(&model->kbdev->ipa.lock);
if (count > param->size) {
ret = -EINVAL;
goto end;
}
old_str = kstrndup(param->addr.str, param->size, GFP_KERNEL);
if (!old_str) {
ret = -ENOMEM;
goto end;
}
buf_size = min(param->size - 1, count);
if (copy_from_user(param->addr.str, user_buf, buf_size)) {
ret = -EFAULT;
goto end;
}
param->addr.str[buf_size] = '\0';
err = kbase_ipa_model_recalculate(model);
if (err < 0) {
ret = err;
strlcpy(param->addr.str, old_str, param->size);
}
end:
kfree(old_str);
mutex_unlock(&model->kbdev->ipa.lock);
return ret;
}
static const struct file_operations fops_string = {
.read = param_string_get,
.write = param_string_set,
.open = simple_open,
.llseek = default_llseek,
};
int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name,
void *addr, size_t size,
enum kbase_ipa_model_param_type type)
{
struct kbase_ipa_model_param *param;
param = kzalloc(sizeof(*param), GFP_KERNEL);
if (!param)
return -ENOMEM;
/* 'name' is stack-allocated for array elements, so copy it into
* heap-allocated storage */
param->name = kstrdup(name, GFP_KERNEL);
if (!param->name) {
kfree(param);
return -ENOMEM;
}
param->addr.voidp = addr;
param->size = size;
param->type = type;
param->model = model;
list_add(&param->link, &model->params);
return 0;
}
void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model)
{
struct kbase_ipa_model_param *param_p, *param_n;
list_for_each_entry_safe(param_p, param_n, &model->params, link) {
list_del(&param_p->link);
kfree(param_p->name);
kfree(param_p);
}
}
static void kbase_ipa_model_debugfs_init(struct kbase_ipa_model *model)
{
struct list_head *it;
struct dentry *dir;
lockdep_assert_held(&model->kbdev->ipa.lock);
dir = debugfs_create_dir(model->ops->name,
model->kbdev->mali_debugfs_directory);
if (!dir) {
dev_err(model->kbdev->dev,
"Couldn't create mali debugfs %s directory",
model->ops->name);
return;
}
list_for_each(it, &model->params) {
struct kbase_ipa_model_param *param =
list_entry(it,
struct kbase_ipa_model_param,
link);
const struct file_operations *fops = NULL;
switch (param->type) {
case PARAM_TYPE_S32:
fops = &fops_s32;
break;
case PARAM_TYPE_STRING:
fops = &fops_string;
break;
}
if (unlikely(!fops)) {
dev_err(model->kbdev->dev,
"Type not set for %s parameter %s\n",
model->ops->name, param->name);
} else {
debugfs_create_file(param->name, S_IRUGO | S_IWUSR,
dir, param, fops);
}
}
}
void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model,
const char *name, s32 val)
{
struct kbase_ipa_model_param *param;
mutex_lock(&model->kbdev->ipa.lock);
list_for_each_entry(param, &model->params, link) {
if (!strcmp(param->name, name)) {
if (param->type == PARAM_TYPE_S32) {
*param->addr.s32p = val;
} else {
dev_err(model->kbdev->dev,
"Wrong type for %s parameter %s\n",
model->ops->name, param->name);
}
break;
}
}
mutex_unlock(&model->kbdev->ipa.lock);
}
KBASE_EXPORT_TEST_API(kbase_ipa_model_param_set_s32);
void kbase_ipa_debugfs_init(struct kbase_device *kbdev)
{
mutex_lock(&kbdev->ipa.lock);
if (kbdev->ipa.configured_model != kbdev->ipa.fallback_model)
kbase_ipa_model_debugfs_init(kbdev->ipa.configured_model);
kbase_ipa_model_debugfs_init(kbdev->ipa.fallback_model);
mutex_unlock(&kbdev->ipa.lock);
}

View File

@@ -0,0 +1,68 @@
/*
*
* (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#ifndef _KBASE_IPA_DEBUGFS_H_
#define _KBASE_IPA_DEBUGFS_H_
enum kbase_ipa_model_param_type {
PARAM_TYPE_S32 = 1,
PARAM_TYPE_STRING,
};
#ifdef CONFIG_DEBUG_FS
void kbase_ipa_debugfs_init(struct kbase_device *kbdev);
int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name,
void *addr, size_t size,
enum kbase_ipa_model_param_type type);
void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model);
/**
* kbase_ipa_model_param_set_s32 - Set an integer model parameter
*
* @model: pointer to IPA model
* @name: name of corresponding debugfs entry
* @val: new value of the parameter
*
* This function is only exposed for use by unit tests running in
* kernel space. Normally it is expected that parameter values will
* instead be set via debugfs.
*/
void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model,
const char *name, s32 val);
#else /* CONFIG_DEBUG_FS */
static inline int kbase_ipa_model_param_add(struct kbase_ipa_model *model,
const char *name, void *addr,
size_t size,
enum kbase_ipa_model_param_type type)
{
return 0;
}
static inline void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model)
{ }
#endif /* CONFIG_DEBUG_FS */
#endif /* _KBASE_IPA_DEBUGFS_H_ */

View File

@@ -0,0 +1,350 @@
/*
*
* (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#include <uapi/linux/thermal.h>
#include <linux/thermal.h>
#ifdef CONFIG_DEVFREQ_THERMAL
#include <linux/devfreq_cooling.h>
#endif
#include <linux/of.h>
#include <linux/delay.h>
#include <linux/kthread.h>
#include "mali_kbase.h"
#include "mali_kbase_defs.h"
#include "mali_kbase_ipa_simple.h"
#include "mali_kbase_ipa_debugfs.h"
#if MALI_UNIT_TEST
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
static unsigned long dummy_temp;
static int kbase_simple_power_model_get_dummy_temp(
struct thermal_zone_device *tz,
unsigned long *temp)
{
*temp = READ_ONCE(dummy_temp);
return 0;
}
#else
static int dummy_temp;
static int kbase_simple_power_model_get_dummy_temp(
struct thermal_zone_device *tz,
int *temp)
{
*temp = READ_ONCE(dummy_temp);
return 0;
}
#endif
/* Intercept calls to the kernel function using a macro */
#ifdef thermal_zone_get_temp
#undef thermal_zone_get_temp
#endif
#define thermal_zone_get_temp(tz, temp) \
kbase_simple_power_model_get_dummy_temp(tz, temp)
void kbase_simple_power_model_set_dummy_temp(int temp)
{
WRITE_ONCE(dummy_temp, temp);
}
KBASE_EXPORT_TEST_API(kbase_simple_power_model_set_dummy_temp);
#endif /* MALI_UNIT_TEST */
/*
* This model is primarily designed for the Juno platform. It may not be
* suitable for other platforms. The additional resources in this model
* should preferably be minimal, as this model is rarely used when a dynamic
* model is available.
*/
/**
* struct kbase_ipa_model_simple_data - IPA context per device
* @dynamic_coefficient: dynamic coefficient of the model
* @static_coefficient: static coefficient of the model
* @ts: Thermal scaling coefficients of the model
* @tz_name: Thermal zone name
* @gpu_tz: thermal zone device
* @poll_temperature_thread: Handle for temperature polling thread
* @current_temperature: Most recent value of polled temperature
* @temperature_poll_interval_ms: How often temperature should be checked, in ms
*/
struct kbase_ipa_model_simple_data {
u32 dynamic_coefficient;
u32 static_coefficient;
s32 ts[4];
char tz_name[THERMAL_NAME_LENGTH];
struct thermal_zone_device *gpu_tz;
struct task_struct *poll_temperature_thread;
int current_temperature;
int temperature_poll_interval_ms;
};
#define FALLBACK_STATIC_TEMPERATURE 55000
/**
* calculate_temp_scaling_factor() - Calculate temperature scaling coefficient
* @ts: Signed coefficients, in order t^0 to t^3, with units Deg^-N
* @t: Temperature, in mDeg C. Range: -2^17 < t < 2^17
*
* Scale the temperature according to a cubic polynomial whose coefficients are
* provided in the device tree. The result is used to scale the static power
* coefficient, where 1000000 means no change.
*
* Return: Temperature scaling factor. Range 0 <= ret <= 10,000,000.
*/
static u32 calculate_temp_scaling_factor(s32 ts[4], s64 t)
{
/* Range: -2^24 < t2 < 2^24 m(Deg^2) */
const s64 t2 = div_s64((t * t), 1000);
/* Range: -2^31 < t3 < 2^31 m(Deg^3) */
const s64 t3 = div_s64((t * t2), 1000);
/*
* Sum the parts. t^[1-3] are in m(Deg^N), but the coefficients are in
* Deg^-N, so we need to multiply the last coefficient by 1000.
* Range: -2^63 < res_big < 2^63
*/
const s64 res_big = ts[3] * t3 /* +/- 2^62 */
+ ts[2] * t2 /* +/- 2^55 */
+ ts[1] * t /* +/- 2^48 */
+ ts[0] * 1000; /* +/- 2^41 */
/* Range: -2^60 < res_unclamped < 2^60 */
s64 res_unclamped = div_s64(res_big, 1000);
/* Clamp to range of 0x to 10x the static power */
return clamp(res_unclamped, (s64) 0, (s64) 10000000);
}
/* We can't call thermal_zone_get_temp() directly in model_static_coeff(),
* because we don't know if tz->lock is held in the same thread. So poll it in
* a separate thread to get around this. */
static int poll_temperature(void *data)
{
struct kbase_ipa_model_simple_data *model_data =
(struct kbase_ipa_model_simple_data *) data;
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
unsigned long temp;
#else
int temp;
#endif
while (!kthread_should_stop()) {
struct thermal_zone_device *tz = READ_ONCE(model_data->gpu_tz);
if (tz) {
int ret;
ret = thermal_zone_get_temp(tz, &temp);
if (ret) {
pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n",
ret);
temp = FALLBACK_STATIC_TEMPERATURE;
}
} else {
temp = FALLBACK_STATIC_TEMPERATURE;
}
WRITE_ONCE(model_data->current_temperature, temp);
msleep_interruptible(READ_ONCE(model_data->temperature_poll_interval_ms));
}
return 0;
}
static int model_static_coeff(struct kbase_ipa_model *model, u32 *coeffp)
{
u32 temp_scaling_factor;
struct kbase_ipa_model_simple_data *model_data =
(struct kbase_ipa_model_simple_data *) model->model_data;
u64 coeff_big;
int temp;
temp = READ_ONCE(model_data->current_temperature);
/* Range: 0 <= temp_scaling_factor < 2^24 */
temp_scaling_factor = calculate_temp_scaling_factor(model_data->ts,
temp);
/*
* Range: 0 <= coeff_big < 2^52 to avoid overflowing *coeffp. This
* means static_coefficient must be in range
* 0 <= static_coefficient < 2^28.
*/
coeff_big = (u64) model_data->static_coefficient * (u64) temp_scaling_factor;
*coeffp = div_u64(coeff_big, 1000000);
return 0;
}
static int model_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp)
{
struct kbase_ipa_model_simple_data *model_data =
(struct kbase_ipa_model_simple_data *) model->model_data;
*coeffp = model_data->dynamic_coefficient;
return 0;
}
static int add_params(struct kbase_ipa_model *model)
{
int err = 0;
struct kbase_ipa_model_simple_data *model_data =
(struct kbase_ipa_model_simple_data *)model->model_data;
err = kbase_ipa_model_add_param_s32(model, "static-coefficient",
&model_data->static_coefficient,
1, true);
if (err)
goto end;
err = kbase_ipa_model_add_param_s32(model, "dynamic-coefficient",
&model_data->dynamic_coefficient,
1, true);
if (err)
goto end;
err = kbase_ipa_model_add_param_s32(model, "ts",
model_data->ts, 4, true);
if (err)
goto end;
err = kbase_ipa_model_add_param_string(model, "thermal-zone",
model_data->tz_name,
sizeof(model_data->tz_name), true);
if (err)
goto end;
model_data->temperature_poll_interval_ms = 200;
err = kbase_ipa_model_add_param_s32(model, "temp-poll-interval-ms",
&model_data->temperature_poll_interval_ms,
1, false);
end:
return err;
}
static int kbase_simple_power_model_init(struct kbase_ipa_model *model)
{
int err;
struct kbase_ipa_model_simple_data *model_data;
model_data = kzalloc(sizeof(struct kbase_ipa_model_simple_data),
GFP_KERNEL);
if (!model_data)
return -ENOMEM;
model->model_data = (void *) model_data;
model_data->current_temperature = FALLBACK_STATIC_TEMPERATURE;
model_data->poll_temperature_thread = kthread_run(poll_temperature,
(void *) model_data,
"mali-simple-power-model-temp-poll");
if (IS_ERR(model_data->poll_temperature_thread)) {
kfree(model_data);
return PTR_ERR(model_data->poll_temperature_thread);
}
err = add_params(model);
if (err) {
kbase_ipa_model_param_free_all(model);
kthread_stop(model_data->poll_temperature_thread);
kfree(model_data);
}
return err;
}
static int kbase_simple_power_model_recalculate(struct kbase_ipa_model *model)
{
struct kbase_ipa_model_simple_data *model_data =
(struct kbase_ipa_model_simple_data *)model->model_data;
struct thermal_zone_device *tz;
lockdep_assert_held(&model->kbdev->ipa.lock);
if (!strnlen(model_data->tz_name, sizeof(model_data->tz_name))) {
model_data->gpu_tz = NULL;
} else {
char tz_name[THERMAL_NAME_LENGTH];
strlcpy(tz_name, model_data->tz_name, sizeof(tz_name));
/* Release ipa.lock so that thermal_list_lock is not acquired
* with ipa.lock held, thereby avoid lock ordering violation
* lockdep warning. The warning comes as a chain of locks
* ipa.lock --> thermal_list_lock --> tz->lock gets formed
* on registering devfreq cooling device when probe method
* of mali platform driver is invoked.
*/
mutex_unlock(&model->kbdev->ipa.lock);
tz = thermal_zone_get_zone_by_name(tz_name);
mutex_lock(&model->kbdev->ipa.lock);
if (IS_ERR_OR_NULL(tz)) {
pr_warn_ratelimited("Error %ld getting thermal zone \'%s\', not yet ready?\n",
PTR_ERR(tz), tz_name);
return -EPROBE_DEFER;
}
/* Check if another thread raced against us & updated the
* thermal zone name string. Update the gpu_tz pointer only if
* the name string did not change whilst we retrieved the new
* thermal_zone_device pointer, otherwise model_data->tz_name &
* model_data->gpu_tz would become inconsistent with each other.
* The below check will succeed only for the thread which last
* updated the name string.
*/
if (strncmp(tz_name, model_data->tz_name, sizeof(tz_name)) == 0)
model_data->gpu_tz = tz;
}
return 0;
}
static void kbase_simple_power_model_term(struct kbase_ipa_model *model)
{
struct kbase_ipa_model_simple_data *model_data =
(struct kbase_ipa_model_simple_data *)model->model_data;
kthread_stop(model_data->poll_temperature_thread);
kfree(model_data);
}
struct kbase_ipa_model_ops kbase_simple_ipa_model_ops = {
.name = "mali-simple-power-model",
.init = &kbase_simple_power_model_init,
.recalculate = &kbase_simple_power_model_recalculate,
.term = &kbase_simple_power_model_term,
.get_dynamic_coeff = &model_dynamic_coeff,
.get_static_coeff = &model_static_coeff,
};
KBASE_EXPORT_TEST_API(kbase_simple_ipa_model_ops);

View File

@@ -0,0 +1,45 @@
/*
*
* (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#ifndef _KBASE_IPA_SIMPLE_H_
#define _KBASE_IPA_SIMPLE_H_
#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)
extern struct kbase_ipa_model_ops kbase_simple_ipa_model_ops;
#if MALI_UNIT_TEST
/**
* kbase_simple_power_model_set_dummy_temp() - set a dummy temperature value
* @temp: Temperature of the thermal zone, in millidegrees celsius.
*
* This is only intended for use in unit tests, to ensure that the temperature
* values used by the simple power model are predictable. Deterministic
* behavior is necessary to allow validation of the static power values
* computed by this model.
*/
void kbase_simple_power_model_set_dummy_temp(int temp);
#endif /* MALI_UNIT_TEST */
#endif /* (defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */
#endif /* _KBASE_IPA_SIMPLE_H_ */

View File

@@ -0,0 +1,338 @@
/*
*
* (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#include "mali_kbase_ipa_vinstr_common.h"
#include "mali_kbase_ipa_debugfs.h"
#define DEFAULT_SCALING_FACTOR 5
/* If the value of GPU_ACTIVE is below this, use the simple model
* instead, to avoid extrapolating small amounts of counter data across
* large sample periods.
*/
#define DEFAULT_MIN_SAMPLE_CYCLES 10000
/**
* read_hwcnt() - read a counter value
* @model_data: pointer to model data
* @offset: offset, in bytes, into vinstr buffer
*
* Return: A 32-bit counter value. Range: 0 < value < 2^27 (worst case would be
* incrementing every cycle over a ~100ms sample period at a high frequency,
* e.g. 1 GHz: 2^30 * 0.1seconds ~= 2^27.
*/
static inline u32 kbase_ipa_read_hwcnt(
struct kbase_ipa_model_vinstr_data *model_data,
u32 offset)
{
u8 *p = model_data->vinstr_buffer;
return *(u32 *)&p[offset];
}
static inline s64 kbase_ipa_add_saturate(s64 a, s64 b)
{
if (S64_MAX - a < b)
return S64_MAX;
return a + b;
}
s64 kbase_ipa_sum_all_shader_cores(
struct kbase_ipa_model_vinstr_data *model_data,
s32 coeff, u32 counter)
{
struct kbase_device *kbdev = model_data->kbdev;
u64 core_mask;
u32 base = 0;
s64 ret = 0;
core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
while (core_mask != 0ull) {
if ((core_mask & 1ull) != 0ull) {
/* 0 < counter_value < 2^27 */
u32 counter_value = kbase_ipa_read_hwcnt(model_data,
base + counter);
/* 0 < ret < 2^27 * max_num_cores = 2^32 */
ret = kbase_ipa_add_saturate(ret, counter_value);
}
base += KBASE_IPA_NR_BYTES_PER_BLOCK;
core_mask >>= 1;
}
/* Range: -2^54 < ret * coeff < 2^54 */
return ret * coeff;
}
s64 kbase_ipa_single_counter(
struct kbase_ipa_model_vinstr_data *model_data,
s32 coeff, u32 counter)
{
/* Range: 0 < counter_value < 2^27 */
const u32 counter_value = kbase_ipa_read_hwcnt(model_data, counter);
/* Range: -2^49 < ret < 2^49 */
return counter_value * (s64) coeff;
}
/**
* kbase_ipa_gpu_active - Inform IPA that GPU is now active
* @model_data: Pointer to model data
*
* This function may cause vinstr to become active.
*/
static void kbase_ipa_gpu_active(struct kbase_ipa_model_vinstr_data *model_data)
{
struct kbase_device *kbdev = model_data->kbdev;
lockdep_assert_held(&kbdev->pm.lock);
if (!kbdev->ipa.vinstr_active) {
kbdev->ipa.vinstr_active = true;
kbase_vinstr_resume_client(model_data->vinstr_cli);
}
}
/**
* kbase_ipa_gpu_idle - Inform IPA that GPU is now idle
* @model_data: Pointer to model data
*
* This function may cause vinstr to become idle.
*/
static void kbase_ipa_gpu_idle(struct kbase_ipa_model_vinstr_data *model_data)
{
struct kbase_device *kbdev = model_data->kbdev;
lockdep_assert_held(&kbdev->pm.lock);
if (kbdev->ipa.vinstr_active) {
kbase_vinstr_suspend_client(model_data->vinstr_cli);
kbdev->ipa.vinstr_active = false;
}
}
int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data)
{
struct kbase_device *kbdev = model_data->kbdev;
struct kbase_ioctl_hwcnt_reader_setup setup;
size_t dump_size;
dump_size = kbase_vinstr_dump_size(kbdev);
model_data->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL);
if (!model_data->vinstr_buffer) {
dev_err(kbdev->dev, "Failed to allocate IPA dump buffer");
return -1;
}
setup.jm_bm = ~0u;
setup.shader_bm = ~0u;
setup.tiler_bm = ~0u;
setup.mmu_l2_bm = ~0u;
model_data->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(kbdev->vinstr_ctx,
&setup, model_data->vinstr_buffer);
if (!model_data->vinstr_cli) {
dev_err(kbdev->dev, "Failed to register IPA with vinstr core");
kfree(model_data->vinstr_buffer);
model_data->vinstr_buffer = NULL;
return -1;
}
kbase_vinstr_hwc_clear(model_data->vinstr_cli);
kbdev->ipa.gpu_active_callback = kbase_ipa_gpu_active;
kbdev->ipa.gpu_idle_callback = kbase_ipa_gpu_idle;
kbdev->ipa.model_data = model_data;
kbdev->ipa.vinstr_active = false;
/* Suspend vinstr, to ensure that the GPU is powered off until there is
* something to execute.
*/
kbase_vinstr_suspend_client(model_data->vinstr_cli);
return 0;
}
void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data)
{
struct kbase_device *kbdev = model_data->kbdev;
kbdev->ipa.gpu_active_callback = NULL;
kbdev->ipa.gpu_idle_callback = NULL;
kbdev->ipa.model_data = NULL;
kbdev->ipa.vinstr_active = false;
if (model_data->vinstr_cli)
kbase_vinstr_detach_client(model_data->vinstr_cli);
model_data->vinstr_cli = NULL;
kfree(model_data->vinstr_buffer);
model_data->vinstr_buffer = NULL;
}
int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp)
{
struct kbase_ipa_model_vinstr_data *model_data =
(struct kbase_ipa_model_vinstr_data *)model->model_data;
struct kbase_device *kbdev = model_data->kbdev;
s64 energy = 0;
size_t i;
u64 coeff = 0, coeff_mul = 0;
u32 active_cycles;
int err = 0;
if (!kbdev->ipa.vinstr_active)
goto err0; /* GPU powered off - no counters to collect */
err = kbase_vinstr_hwc_dump(model_data->vinstr_cli,
BASE_HWCNT_READER_EVENT_MANUAL);
if (err)
goto err0;
/* Range: 0 (GPU not used at all), to the max sampling interval, say
* 1s, * max GPU frequency (GPU 100% utilized).
* 0 <= active_cycles <= 1 * ~2GHz
* 0 <= active_cycles < 2^31
*/
active_cycles = model_data->get_active_cycles(model_data);
if (active_cycles < (u32) max(model_data->min_sample_cycles, 0)) {
err = -ENODATA;
goto err0;
}
/* Range: 1 <= active_cycles < 2^31 */
active_cycles = max(1u, active_cycles);
/* Range of 'energy' is +/- 2^54 * number of IPA groups (~8), so around
* -2^57 < energy < 2^57
*/
for (i = 0; i < model_data->groups_def_num; i++) {
const struct kbase_ipa_group *group = &model_data->groups_def[i];
s32 coeff = model_data->group_values[i];
s64 group_energy = group->op(model_data, coeff,
group->counter_block_offset);
energy = kbase_ipa_add_saturate(energy, group_energy);
}
/* Range: 0 <= coeff < 2^57 */
if (energy > 0)
coeff = energy;
/* Range: 0 <= coeff < 2^57 (because active_cycles >= 1). However, this
* can be constrained further: Counter values can only be increased by
* a theoretical maximum of about 64k per clock cycle. Beyond this,
* we'd have to sample every 1ms to avoid them overflowing at the
* lowest clock frequency (say 100MHz). Therefore, we can write the
* range of 'coeff' in terms of active_cycles:
*
* coeff = SUM(coeffN * counterN * num_cores_for_counterN)
* coeff <= SUM(coeffN * counterN) * max_num_cores
* coeff <= num_IPA_groups * max_coeff * max_counter * max_num_cores
* (substitute max_counter = 2^16 * active_cycles)
* coeff <= num_IPA_groups * max_coeff * 2^16 * active_cycles * max_num_cores
* coeff <= 2^3 * 2^22 * 2^16 * active_cycles * 2^5
* coeff <= 2^46 * active_cycles
*
* So after the division: 0 <= coeff <= 2^46
*/
coeff = div_u64(coeff, active_cycles);
/* Scale by user-specified factor (where unity is 1000).
* Range: 0 <= coeff_mul < 2^61
*/
coeff_mul = coeff * model_data->scaling_factor;
/* Range: 0 <= coeff_mul < 2^51 */
coeff_mul = div_u64(coeff_mul, 1000u);
err0:
/* Clamp to a sensible range - 2^16 gives about 14W at 400MHz/750mV */
*coeffp = clamp(coeff_mul, (u64) 0, (u64) 1 << 16);
return err;
}
int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model,
const struct kbase_ipa_group *ipa_groups_def,
size_t ipa_group_size,
kbase_ipa_get_active_cycles_callback get_active_cycles)
{
int err = 0;
size_t i;
struct kbase_ipa_model_vinstr_data *model_data;
if (!model || !ipa_groups_def || !ipa_group_size || !get_active_cycles)
return -EINVAL;
model_data = kzalloc(sizeof(*model_data), GFP_KERNEL);
if (!model_data)
return -ENOMEM;
model_data->kbdev = model->kbdev;
model_data->groups_def = ipa_groups_def;
model_data->groups_def_num = ipa_group_size;
model_data->get_active_cycles = get_active_cycles;
model->model_data = (void *) model_data;
for (i = 0; i < model_data->groups_def_num; ++i) {
const struct kbase_ipa_group *group = &model_data->groups_def[i];
model_data->group_values[i] = group->default_value;
err = kbase_ipa_model_add_param_s32(model, group->name,
&model_data->group_values[i],
1, false);
if (err)
goto exit;
}
model_data->scaling_factor = DEFAULT_SCALING_FACTOR;
err = kbase_ipa_model_add_param_s32(model, "scale",
&model_data->scaling_factor,
1, false);
if (err)
goto exit;
model_data->min_sample_cycles = DEFAULT_MIN_SAMPLE_CYCLES;
err = kbase_ipa_model_add_param_s32(model, "min_sample_cycles",
&model_data->min_sample_cycles,
1, false);
if (err)
goto exit;
err = kbase_ipa_attach_vinstr(model_data);
exit:
if (err) {
kbase_ipa_model_param_free_all(model);
kfree(model_data);
}
return err;
}
void kbase_ipa_vinstr_common_model_term(struct kbase_ipa_model *model)
{
struct kbase_ipa_model_vinstr_data *model_data =
(struct kbase_ipa_model_vinstr_data *)model->model_data;
kbase_ipa_detach_vinstr(model_data);
kfree(model_data);
}

View File

@@ -0,0 +1,189 @@
/*
*
* (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#ifndef _KBASE_IPA_VINSTR_COMMON_H_
#define _KBASE_IPA_VINSTR_COMMON_H_
#include "mali_kbase.h"
/* Maximum number of IPA groups for an IPA model. */
#define KBASE_IPA_MAX_GROUP_DEF_NUM 16
/* Number of bytes per hardware counter in a vinstr_buffer. */
#define KBASE_IPA_NR_BYTES_PER_CNT 4
/* Number of hardware counters per block in a vinstr_buffer. */
#define KBASE_IPA_NR_CNT_PER_BLOCK 64
/* Number of bytes per block in a vinstr_buffer. */
#define KBASE_IPA_NR_BYTES_PER_BLOCK \
(KBASE_IPA_NR_CNT_PER_BLOCK * KBASE_IPA_NR_BYTES_PER_CNT)
struct kbase_ipa_model_vinstr_data;
typedef u32 (*kbase_ipa_get_active_cycles_callback)(struct kbase_ipa_model_vinstr_data *);
/**
* struct kbase_ipa_model_vinstr_data - IPA context per device
* @kbdev: pointer to kbase device
* @groups_def: Array of IPA groups.
* @groups_def_num: Number of elements in the array of IPA groups.
* @get_active_cycles: Callback to return number of active cycles during
* counter sample period
* @vinstr_cli: vinstr client handle
* @vinstr_buffer: buffer to dump hardware counters onto
* @scaling_factor: user-specified power scaling factor. This is
* interpreted as a fraction where the denominator is
* 1000. Range approx 0.0-32.0:
* 0 < scaling_factor < 2^15
* @min_sample_cycles: If the value of the GPU_ACTIVE counter (the number of
* cycles the GPU was working) is less than
* min_sample_cycles, the counter model will return an
* error, causing the IPA framework to approximate using
* the cached simple model results instead. This may be
* more accurate than extrapolating using a very small
* counter dump.
*/
struct kbase_ipa_model_vinstr_data {
struct kbase_device *kbdev;
s32 group_values[KBASE_IPA_MAX_GROUP_DEF_NUM];
const struct kbase_ipa_group *groups_def;
size_t groups_def_num;
kbase_ipa_get_active_cycles_callback get_active_cycles;
struct kbase_vinstr_client *vinstr_cli;
void *vinstr_buffer;
s32 scaling_factor;
s32 min_sample_cycles;
};
/**
* struct ipa_group - represents a single IPA group
* @name: name of the IPA group
* @default_value: default value of coefficient for IPA group.
* Coefficients are interpreted as fractions where the
* denominator is 1000000.
* @op: which operation to be performed on the counter values
* @counter_block_offset: block offset in bytes of the counter used to calculate energy for IPA group
*/
struct kbase_ipa_group {
const char *name;
s32 default_value;
s64 (*op)(struct kbase_ipa_model_vinstr_data *, s32, u32);
u32 counter_block_offset;
};
/**
* sum_all_shader_cores() - sum a counter over all cores
* @model_data pointer to model data
* @coeff model coefficient. Unity is ~2^20, so range approx
* +/- 4.0: -2^22 < coeff < 2^22
* @counter offset in bytes of the counter used to calculate energy for IPA group
*
* Calculate energy estimation based on hardware counter `counter'
* across all shader cores.
*
* Return: Sum of counter values. Range: -2^54 < ret < 2^54
*/
s64 kbase_ipa_sum_all_shader_cores(
struct kbase_ipa_model_vinstr_data *model_data,
s32 coeff, u32 counter);
/**
* sum_single_counter() - sum a single counter
* @model_data pointer to model data
* @coeff model coefficient. Unity is ~2^20, so range approx
* +/- 4.0: -2^22 < coeff < 2^22
* @counter offset in bytes of the counter used to calculate energy for IPA group
*
* Calculate energy estimation based on hardware counter `counter'.
*
* Return: Counter value. Range: -2^49 < ret < 2^49
*/
s64 kbase_ipa_single_counter(
struct kbase_ipa_model_vinstr_data *model_data,
s32 coeff, u32 counter);
/**
* attach_vinstr() - attach a vinstr_buffer to an IPA model.
* @model_data pointer to model data
*
* Attach a vinstr_buffer to an IPA model. The vinstr_buffer
* allows access to the hardware counters used to calculate
* energy consumption.
*
* Return: 0 on success, or an error code.
*/
int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data);
/**
* detach_vinstr() - detach a vinstr_buffer from an IPA model.
* @model_data pointer to model data
*
* Detach a vinstr_buffer from an IPA model.
*/
void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data);
/**
* kbase_ipa_vinstr_dynamic_coeff() - calculate dynamic power based on HW counters
* @model: pointer to instantiated model
* @coeffp: pointer to location where calculated power, in
* pW/(Hz V^2), is stored.
*
* This is a GPU-agnostic implementation of the get_dynamic_coeff()
* function of an IPA model. It relies on the model being populated
* with GPU-specific attributes at initialization time.
*
* Return: 0 on success, or an error code.
*/
int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp);
/**
* kbase_ipa_vinstr_common_model_init() - initialize ipa power model
* @model: ipa power model to initialize
* @ipa_groups_def: array of ipa groups which sets coefficients for
* the corresponding counters used in the ipa model
* @ipa_group_size: number of elements in the array @ipa_groups_def
* @get_active_cycles: callback to return the number of cycles the GPU was
* active during the counter sample period.
*
* This initialization function performs initialization steps common
* for ipa models based on counter values. In each call, the model
* passes its specific coefficient values per ipa counter group via
* @ipa_groups_def array.
*
* Return: 0 on success, error code otherwise
*/
int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model,
const struct kbase_ipa_group *ipa_groups_def,
size_t ipa_group_size,
kbase_ipa_get_active_cycles_callback get_active_cycles);
/**
* kbase_ipa_vinstr_common_model_term() - terminate ipa power model
* @model: ipa power model to terminate
*
* This function performs all necessary steps to terminate ipa power model
* including clean up of resources allocated to hold model data.
*/
void kbase_ipa_vinstr_common_model_term(struct kbase_ipa_model *model);
#endif /* _KBASE_IPA_VINSTR_COMMON_H_ */

View File

@@ -0,0 +1,311 @@
/*
*
* (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#include <linux/thermal.h>
#include "mali_kbase_ipa_vinstr_common.h"
#include "mali_kbase.h"
#include "mali_kbase_ipa_debugfs.h"
/* Performance counter blocks base offsets */
#define JM_BASE (0 * KBASE_IPA_NR_BYTES_PER_BLOCK)
#define TILER_BASE (1 * KBASE_IPA_NR_BYTES_PER_BLOCK)
#define MEMSYS_BASE (2 * KBASE_IPA_NR_BYTES_PER_BLOCK)
#define SC0_BASE_ONE_MEMSYS (3 * KBASE_IPA_NR_BYTES_PER_BLOCK)
#define SC0_BASE_TWO_MEMSYS (4 * KBASE_IPA_NR_BYTES_PER_BLOCK)
/* JM counter block offsets */
#define JM_GPU_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 6)
/* Tiler counter block offsets */
#define TILER_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 45)
/* MEMSYS counter block offsets */
#define MEMSYS_L2_ANY_LOOKUP (KBASE_IPA_NR_BYTES_PER_CNT * 25)
/* SC counter block offsets */
#define SC_FRAG_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 4)
#define SC_EXEC_CORE_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 26)
#define SC_EXEC_INSTR_COUNT (KBASE_IPA_NR_BYTES_PER_CNT * 28)
#define SC_TEX_COORD_ISSUE (KBASE_IPA_NR_BYTES_PER_CNT * 40)
#define SC_TEX_TFCH_NUM_OPERATIONS (KBASE_IPA_NR_BYTES_PER_CNT * 42)
#define SC_VARY_INSTR (KBASE_IPA_NR_BYTES_PER_CNT * 49)
#define SC_VARY_SLOT_32 (KBASE_IPA_NR_BYTES_PER_CNT * 50)
#define SC_VARY_SLOT_16 (KBASE_IPA_NR_BYTES_PER_CNT * 51)
#define SC_BEATS_RD_LSC (KBASE_IPA_NR_BYTES_PER_CNT * 56)
#define SC_BEATS_WR_LSC (KBASE_IPA_NR_BYTES_PER_CNT * 61)
#define SC_BEATS_WR_TIB (KBASE_IPA_NR_BYTES_PER_CNT * 62)
/** Maximum number of cores for which a single Memory System block of performance counters is present. */
#define KBASE_G7x_SINGLE_MEMSYS_MAX_NUM_CORES ((u8)4)
/**
* get_jm_counter() - get performance counter offset inside the Job Manager block
* @model_data: pointer to GPU model data.
* @counter_block_offset: offset in bytes of the performance counter inside the Job Manager block.
*
* Return: Block offset in bytes of the required performance counter.
*/
static u32 kbase_g7x_power_model_get_jm_counter(struct kbase_ipa_model_vinstr_data *model_data,
u32 counter_block_offset)
{
return JM_BASE + counter_block_offset;
}
/**
* get_memsys_counter() - get performance counter offset inside the Memory System block
* @model_data: pointer to GPU model data.
* @counter_block_offset: offset in bytes of the performance counter inside the (first) Memory System block.
*
* Return: Block offset in bytes of the required performance counter.
*/
static u32 kbase_g7x_power_model_get_memsys_counter(struct kbase_ipa_model_vinstr_data *model_data,
u32 counter_block_offset)
{
/* The base address of Memory System performance counters is always the same, although their number
* may vary based on the number of cores. For the moment it's ok to return a constant.
*/
return MEMSYS_BASE + counter_block_offset;
}
/**
* get_sc_counter() - get performance counter offset inside the Shader Cores block
* @model_data: pointer to GPU model data.
* @counter_block_offset: offset in bytes of the performance counter inside the (first) Shader Cores block.
*
* Return: Block offset in bytes of the required performance counter.
*/
static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_data *model_data,
u32 counter_block_offset)
{
const u32 sc_base = model_data->kbdev->gpu_props.num_cores <= KBASE_G7x_SINGLE_MEMSYS_MAX_NUM_CORES ?
SC0_BASE_ONE_MEMSYS :
SC0_BASE_TWO_MEMSYS;
return sc_base + counter_block_offset;
}
/**
* memsys_single_counter() - calculate energy for a single Memory System performance counter.
* @model_data: pointer to GPU model data.
* @coeff: default value of coefficient for IPA group.
* @offset: offset in bytes of the counter inside the block it belongs to.
*
* Return: Energy estimation for a single Memory System performance counter.
*/
static s64 kbase_g7x_memsys_single_counter(
struct kbase_ipa_model_vinstr_data *model_data,
s32 coeff,
u32 offset)
{
u32 counter;
counter = kbase_g7x_power_model_get_memsys_counter(model_data, offset);
return kbase_ipa_single_counter(model_data, coeff, counter);
}
/**
* sum_all_shader_cores() - calculate energy for a Shader Cores performance counter for all cores.
* @model_data: pointer to GPU model data.
* @coeff: default value of coefficient for IPA group.
* @counter_block_offset: offset in bytes of the counter inside the block it belongs to.
*
* Return: Energy estimation for a Shader Cores performance counter for all cores.
*/
static s64 kbase_g7x_sum_all_shader_cores(
struct kbase_ipa_model_vinstr_data *model_data,
s32 coeff,
u32 counter_block_offset)
{
u32 counter;
counter = kbase_g7x_power_model_get_sc_counter(model_data,
counter_block_offset);
return kbase_ipa_sum_all_shader_cores(model_data, coeff, counter);
}
/**
* jm_single_counter() - calculate energy for a single Job Manager performance counter.
* @model_data: pointer to GPU model data.
* @coeff: default value of coefficient for IPA group.
* @counter_block_offset: offset in bytes of the counter inside the block it belongs to.
*
* Return: Energy estimation for a single Job Manager performance counter.
*/
static s64 kbase_g7x_jm_single_counter(
struct kbase_ipa_model_vinstr_data *model_data,
s32 coeff,
u32 counter_block_offset)
{
u32 counter;
counter = kbase_g7x_power_model_get_jm_counter(model_data,
counter_block_offset);
return kbase_ipa_single_counter(model_data, coeff, counter);
}
/**
* get_active_cycles() - return the GPU_ACTIVE counter
* @model_data: pointer to GPU model data.
*
* Return: the number of cycles the GPU was active during the counter sampling
* period.
*/
static u32 kbase_g7x_get_active_cycles(
struct kbase_ipa_model_vinstr_data *model_data)
{
u32 counter = kbase_g7x_power_model_get_jm_counter(model_data, JM_GPU_ACTIVE);
/* Counters are only 32-bit, so we can safely multiply by 1 then cast
* the 64-bit result back to a u32.
*/
return kbase_ipa_single_counter(model_data, 1, counter);
}
/** Table of IPA group definitions.
*
* For each IPA group, this table defines a function to access the given performance block counter (or counters,
* if the operation needs to be iterated on multiple blocks) and calculate energy estimation.
*/
static const struct kbase_ipa_group ipa_groups_def_g71[] = {
{
.name = "l2_access",
.default_value = 526300,
.op = kbase_g7x_memsys_single_counter,
.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
},
{
.name = "exec_instr_count",
.default_value = 301100,
.op = kbase_g7x_sum_all_shader_cores,
.counter_block_offset = SC_EXEC_INSTR_COUNT,
},
{
.name = "tex_issue",
.default_value = 197400,
.op = kbase_g7x_sum_all_shader_cores,
.counter_block_offset = SC_TEX_COORD_ISSUE,
},
{
.name = "tile_wb",
.default_value = -156400,
.op = kbase_g7x_sum_all_shader_cores,
.counter_block_offset = SC_BEATS_WR_TIB,
},
{
.name = "gpu_active",
.default_value = 115800,
.op = kbase_g7x_jm_single_counter,
.counter_block_offset = JM_GPU_ACTIVE,
},
};
static const struct kbase_ipa_group ipa_groups_def_g72[] = {
{
.name = "l2_access",
.default_value = 393000,
.op = kbase_g7x_memsys_single_counter,
.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
},
{
.name = "exec_instr_count",
.default_value = 227000,
.op = kbase_g7x_sum_all_shader_cores,
.counter_block_offset = SC_EXEC_INSTR_COUNT,
},
{
.name = "tex_issue",
.default_value = 181900,
.op = kbase_g7x_sum_all_shader_cores,
.counter_block_offset = SC_TEX_COORD_ISSUE,
},
{
.name = "tile_wb",
.default_value = -120200,
.op = kbase_g7x_sum_all_shader_cores,
.counter_block_offset = SC_BEATS_WR_TIB,
},
{
.name = "gpu_active",
.default_value = 133100,
.op = kbase_g7x_jm_single_counter,
.counter_block_offset = JM_GPU_ACTIVE,
},
};
static const struct kbase_ipa_group ipa_groups_def_tnox[] = {
{
.name = "gpu_active",
.default_value = 122000,
.op = kbase_g7x_jm_single_counter,
.counter_block_offset = JM_GPU_ACTIVE,
},
{
.name = "exec_instr_count",
.default_value = 488900,
.op = kbase_g7x_sum_all_shader_cores,
.counter_block_offset = SC_EXEC_INSTR_COUNT,
},
{
.name = "vary_instr",
.default_value = 212100,
.op = kbase_g7x_sum_all_shader_cores,
.counter_block_offset = SC_VARY_INSTR,
},
{
.name = "tex_tfch_num_operations",
.default_value = 288000,
.op = kbase_g7x_sum_all_shader_cores,
.counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS,
},
{
.name = "l2_access",
.default_value = 378100,
.op = kbase_g7x_memsys_single_counter,
.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
},
};
#define STANDARD_POWER_MODEL(gpu) \
static int kbase_ ## gpu ## _power_model_init(\
struct kbase_ipa_model *model) \
{ \
BUILD_BUG_ON(ARRAY_SIZE(ipa_groups_def_ ## gpu) > \
KBASE_IPA_MAX_GROUP_DEF_NUM); \
return kbase_ipa_vinstr_common_model_init(model, \
ipa_groups_def_ ## gpu, \
ARRAY_SIZE(ipa_groups_def_ ## gpu), \
kbase_g7x_get_active_cycles); \
} \
struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \
.name = "mali-" #gpu "-power-model", \
.init = kbase_ ## gpu ## _power_model_init, \
.term = kbase_ipa_vinstr_common_model_term, \
.get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \
}; \
KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops)
STANDARD_POWER_MODEL(g71);
STANDARD_POWER_MODEL(g72);
STANDARD_POWER_MODEL(tnox);

View File

@@ -0,0 +1,461 @@
/*
*
* (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
* please update base/tools/hwconfig_generator/hwc_{issues,features}.py
* For more information see base/tools/hwconfig_generator/README
*/
#ifndef _BASE_HWCONFIG_FEATURES_H_
#define _BASE_HWCONFIG_FEATURES_H_
enum base_hw_feature {
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
BASE_HW_FEATURE_XAFFINITY,
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
BASE_HW_FEATURE_MRT,
BASE_HW_FEATURE_BRNDOUT_CC,
BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
BASE_HW_FEATURE_LD_ST_TILEBUFFER,
BASE_HW_FEATURE_MSAA_16X,
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
BASE_HW_FEATURE_LD_ST_LEA_TEX,
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
BASE_HW_FEATURE_IMAGES_IN_FRAGMENT_SHADERS,
BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
BASE_HW_FEATURE_BRNDOUT_KILL,
BASE_HW_FEATURE_WARPING,
BASE_HW_FEATURE_V4,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_AARCH64_MMU,
BASE_HW_FEATURE_TLS_HASHING,
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_generic[] = {
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_t60x[] = {
BASE_HW_FEATURE_LD_ST_LEA_TEX,
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
BASE_HW_FEATURE_V4,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_t62x[] = {
BASE_HW_FEATURE_LD_ST_LEA_TEX,
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
BASE_HW_FEATURE_V4,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_t72x[] = {
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
BASE_HW_FEATURE_WARPING,
BASE_HW_FEATURE_V4,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_t76x[] = {
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
BASE_HW_FEATURE_XAFFINITY,
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
BASE_HW_FEATURE_BRNDOUT_CC,
BASE_HW_FEATURE_LD_ST_LEA_TEX,
BASE_HW_FEATURE_LD_ST_TILEBUFFER,
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
BASE_HW_FEATURE_MRT,
BASE_HW_FEATURE_MSAA_16X,
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_tFxx[] = {
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
BASE_HW_FEATURE_XAFFINITY,
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
BASE_HW_FEATURE_BRNDOUT_CC,
BASE_HW_FEATURE_BRNDOUT_KILL,
BASE_HW_FEATURE_LD_ST_LEA_TEX,
BASE_HW_FEATURE_LD_ST_TILEBUFFER,
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
BASE_HW_FEATURE_MRT,
BASE_HW_FEATURE_MSAA_16X,
BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_t83x[] = {
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
BASE_HW_FEATURE_XAFFINITY,
BASE_HW_FEATURE_WARPING,
BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
BASE_HW_FEATURE_BRNDOUT_CC,
BASE_HW_FEATURE_BRNDOUT_KILL,
BASE_HW_FEATURE_LD_ST_LEA_TEX,
BASE_HW_FEATURE_LD_ST_TILEBUFFER,
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
BASE_HW_FEATURE_MRT,
BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_t82x[] = {
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
BASE_HW_FEATURE_XAFFINITY,
BASE_HW_FEATURE_WARPING,
BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
BASE_HW_FEATURE_BRNDOUT_CC,
BASE_HW_FEATURE_BRNDOUT_KILL,
BASE_HW_FEATURE_LD_ST_LEA_TEX,
BASE_HW_FEATURE_LD_ST_TILEBUFFER,
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
BASE_HW_FEATURE_MRT,
BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_tMIx[] = {
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
BASE_HW_FEATURE_XAFFINITY,
BASE_HW_FEATURE_WARPING,
BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
BASE_HW_FEATURE_BRNDOUT_CC,
BASE_HW_FEATURE_BRNDOUT_KILL,
BASE_HW_FEATURE_LD_ST_LEA_TEX,
BASE_HW_FEATURE_LD_ST_TILEBUFFER,
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
BASE_HW_FEATURE_MRT,
BASE_HW_FEATURE_MSAA_16X,
BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_tHEx[] = {
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
BASE_HW_FEATURE_XAFFINITY,
BASE_HW_FEATURE_WARPING,
BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
BASE_HW_FEATURE_BRNDOUT_CC,
BASE_HW_FEATURE_BRNDOUT_KILL,
BASE_HW_FEATURE_LD_ST_LEA_TEX,
BASE_HW_FEATURE_LD_ST_TILEBUFFER,
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
BASE_HW_FEATURE_MRT,
BASE_HW_FEATURE_MSAA_16X,
BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_MODE,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_tSIx[] = {
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
BASE_HW_FEATURE_XAFFINITY,
BASE_HW_FEATURE_WARPING,
BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
BASE_HW_FEATURE_BRNDOUT_CC,
BASE_HW_FEATURE_BRNDOUT_KILL,
BASE_HW_FEATURE_LD_ST_LEA_TEX,
BASE_HW_FEATURE_LD_ST_TILEBUFFER,
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
BASE_HW_FEATURE_MRT,
BASE_HW_FEATURE_MSAA_16X,
BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_MODE,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_tDVx[] = {
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
BASE_HW_FEATURE_XAFFINITY,
BASE_HW_FEATURE_WARPING,
BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
BASE_HW_FEATURE_BRNDOUT_CC,
BASE_HW_FEATURE_BRNDOUT_KILL,
BASE_HW_FEATURE_LD_ST_LEA_TEX,
BASE_HW_FEATURE_LD_ST_TILEBUFFER,
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
BASE_HW_FEATURE_MRT,
BASE_HW_FEATURE_MSAA_16X,
BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_MODE,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_tNOx[] = {
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
BASE_HW_FEATURE_XAFFINITY,
BASE_HW_FEATURE_WARPING,
BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
BASE_HW_FEATURE_BRNDOUT_CC,
BASE_HW_FEATURE_BRNDOUT_KILL,
BASE_HW_FEATURE_LD_ST_LEA_TEX,
BASE_HW_FEATURE_LD_ST_TILEBUFFER,
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
BASE_HW_FEATURE_MRT,
BASE_HW_FEATURE_MSAA_16X,
BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_MODE,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_AARCH64_MMU,
BASE_HW_FEATURE_TLS_HASHING,
BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_tGOx[] = {
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
BASE_HW_FEATURE_XAFFINITY,
BASE_HW_FEATURE_WARPING,
BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
BASE_HW_FEATURE_BRNDOUT_CC,
BASE_HW_FEATURE_BRNDOUT_KILL,
BASE_HW_FEATURE_LD_ST_LEA_TEX,
BASE_HW_FEATURE_LD_ST_TILEBUFFER,
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
BASE_HW_FEATURE_MRT,
BASE_HW_FEATURE_MSAA_16X,
BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_MODE,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_AARCH64_MMU,
BASE_HW_FEATURE_TLS_HASHING,
BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_tKAx[] = {
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
BASE_HW_FEATURE_XAFFINITY,
BASE_HW_FEATURE_WARPING,
BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
BASE_HW_FEATURE_BRNDOUT_CC,
BASE_HW_FEATURE_BRNDOUT_KILL,
BASE_HW_FEATURE_LD_ST_LEA_TEX,
BASE_HW_FEATURE_LD_ST_TILEBUFFER,
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
BASE_HW_FEATURE_MRT,
BASE_HW_FEATURE_MSAA_16X,
BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_MODE,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_AARCH64_MMU,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_tTRx[] = {
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
BASE_HW_FEATURE_XAFFINITY,
BASE_HW_FEATURE_WARPING,
BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
BASE_HW_FEATURE_BRNDOUT_CC,
BASE_HW_FEATURE_BRNDOUT_KILL,
BASE_HW_FEATURE_LD_ST_LEA_TEX,
BASE_HW_FEATURE_LD_ST_TILEBUFFER,
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
BASE_HW_FEATURE_MRT,
BASE_HW_FEATURE_MSAA_16X,
BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_MODE,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_AARCH64_MMU,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_tBOx[] = {
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
BASE_HW_FEATURE_XAFFINITY,
BASE_HW_FEATURE_WARPING,
BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
BASE_HW_FEATURE_BRNDOUT_CC,
BASE_HW_FEATURE_BRNDOUT_KILL,
BASE_HW_FEATURE_LD_ST_LEA_TEX,
BASE_HW_FEATURE_LD_ST_TILEBUFFER,
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
BASE_HW_FEATURE_MRT,
BASE_HW_FEATURE_MSAA_16X,
BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_MODE,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_AARCH64_MMU,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_tEGx[] = {
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
BASE_HW_FEATURE_XAFFINITY,
BASE_HW_FEATURE_WARPING,
BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
BASE_HW_FEATURE_BRNDOUT_CC,
BASE_HW_FEATURE_BRNDOUT_KILL,
BASE_HW_FEATURE_LD_ST_LEA_TEX,
BASE_HW_FEATURE_LD_ST_TILEBUFFER,
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
BASE_HW_FEATURE_MRT,
BASE_HW_FEATURE_MSAA_16X,
BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_MODE,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_AARCH64_MMU,
BASE_HW_FEATURE_TLS_HASHING,
BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
BASE_HW_FEATURE_END
};
#endif /* _BASE_HWCONFIG_FEATURES_H_ */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,57 @@
/*
*
* (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#ifndef _BASE_MEM_PRIV_H_
#define _BASE_MEM_PRIV_H_
#define BASE_SYNCSET_OP_MSYNC (1U << 0)
#define BASE_SYNCSET_OP_CSYNC (1U << 1)
/*
* This structure describe a basic memory coherency operation.
* It can either be:
* @li a sync from CPU to Memory:
* - type = ::BASE_SYNCSET_OP_MSYNC
* - mem_handle = a handle to the memory object on which the operation
* is taking place
* - user_addr = the address of the range to be synced
* - size = the amount of data to be synced, in bytes
* - offset is ignored.
* @li a sync from Memory to CPU:
* - type = ::BASE_SYNCSET_OP_CSYNC
* - mem_handle = a handle to the memory object on which the operation
* is taking place
* - user_addr = the address of the range to be synced
* - size = the amount of data to be synced, in bytes.
* - offset is ignored.
*/
struct basep_syncset {
base_mem_handle mem_handle;
u64 user_addr;
u64 size;
u8 type;
u8 padding[7];
};
#endif

View File

@@ -0,0 +1,29 @@
/*
*
* (C) COPYRIGHT 2010, 2012-2013, 2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#ifndef _BASE_VENDOR_SPEC_FUNC_H_
#define _BASE_VENDOR_SPEC_FUNC_H_
int kbase_get_vendor_specific_cpu_clock_speed(u32 * const);
#endif /*_BASE_VENDOR_SPEC_FUNC_H_*/

View File

@@ -0,0 +1,639 @@
/*
*
* (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#ifndef _KBASE_H_
#define _KBASE_H_
#include <mali_malisw.h>
#include <mali_kbase_debug.h>
#include <linux/atomic.h>
#include <linux/highmem.h>
#include <linux/hrtimer.h>
#include <linux/ktime.h>
#include <linux/list.h>
#include <linux/mm.h>
#include <linux/mutex.h>
#include <linux/rwsem.h>
#include <linux/sched.h>
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0))
#include <linux/sched/mm.h>
#endif
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/vmalloc.h>
#include <linux/wait.h>
#include <linux/workqueue.h>
#include "mali_base_kernel.h"
#include <mali_kbase_linux.h>
/*
* Include mali_kbase_defs.h first as this provides types needed by other local
* header files.
*/
#include "mali_kbase_defs.h"
#include "mali_kbase_context.h"
#include "mali_kbase_strings.h"
#include "mali_kbase_mem_lowlevel.h"
#include "mali_kbase_trace_timeline.h"
#include "mali_kbase_js.h"
#include "mali_kbase_utility.h"
#include "mali_kbase_mem.h"
#include "mali_kbase_gpu_memory_debugfs.h"
#include "mali_kbase_mem_profile_debugfs.h"
#include "mali_kbase_debug_job_fault.h"
#include "mali_kbase_jd_debugfs.h"
#include "mali_kbase_gpuprops.h"
#include "mali_kbase_jm.h"
#include "mali_kbase_vinstr.h"
#include "ipa/mali_kbase_ipa.h"
#ifdef CONFIG_GPU_TRACEPOINTS
#include <trace/events/gpu.h>
#endif
#ifndef u64_to_user_ptr
/* Introduced in Linux v4.6 */
#define u64_to_user_ptr(x) ((void __user *)(uintptr_t)x)
#endif
/*
* Kernel-side Base (KBase) APIs
*/
struct kbase_device *kbase_device_alloc(void);
/*
* note: configuration attributes member of kbdev needs to have
* been setup before calling kbase_device_init
*/
/*
* API to acquire device list semaphore and return pointer
* to the device list head
*/
const struct list_head *kbase_dev_list_get(void);
/* API to release the device list semaphore */
void kbase_dev_list_put(const struct list_head *dev_list);
int kbase_device_init(struct kbase_device * const kbdev);
void kbase_device_term(struct kbase_device *kbdev);
void kbase_device_free(struct kbase_device *kbdev);
int kbase_device_has_feature(struct kbase_device *kbdev, u32 feature);
/* Needed for gator integration and for reporting vsync information */
struct kbase_device *kbase_find_device(int minor);
void kbase_release_device(struct kbase_device *kbdev);
void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value);
/**
* kbase_get_unmapped_area() - get an address range which is currently
* unmapped.
* @filp: File operations associated with kbase device.
* @addr: CPU mapped address (set to 0 since MAP_FIXED mapping is not allowed
* as Mali GPU driver decides about the mapping).
* @len: Length of the address range.
* @pgoff: Page offset within the GPU address space of the kbase context.
* @flags: Flags for the allocation.
*
* Finds the unmapped address range which satisfies requirements specific to
* GPU and those provided by the call parameters.
*
* 1) Requirement for allocations greater than 2MB:
* - alignment offset is set to 2MB and the alignment mask to 2MB decremented
* by 1.
*
* 2) Requirements imposed for the shader memory alignment:
* - alignment is decided by the number of GPU pc bits which can be read from
* GPU properties of the device associated with this kbase context; alignment
* offset is set to this value in bytes and the alignment mask to the offset
* decremented by 1.
* - allocations must not to be at 4GB boundaries. Such cases are indicated
* by the flag KBASE_REG_GPU_NX not being set (check the flags of the kbase
* region). 4GB boundaries can be checked against @ref BASE_MEM_MASK_4GB.
*
* 3) Requirements imposed for tiler memory alignment, cases indicated by
* the flag @ref KBASE_REG_TILER_ALIGN_TOP (check the flags of the kbase
* region):
* - alignment offset is set to the difference between the kbase region
* extent (converted from the original value in pages to bytes) and the kbase
* region initial_commit (also converted from the original value in pages to
* bytes); alignment mask is set to the kbase region extent in bytes and
* decremented by 1.
*
* Return: if successful, address of the unmapped area aligned as required;
* error code (negative) in case of failure;
*/
unsigned long kbase_get_unmapped_area(struct file *filp,
const unsigned long addr, const unsigned long len,
const unsigned long pgoff, const unsigned long flags);
int kbase_jd_init(struct kbase_context *kctx);
void kbase_jd_exit(struct kbase_context *kctx);
/**
* kbase_jd_submit - Submit atoms to the job dispatcher
*
* @kctx: The kbase context to submit to
* @user_addr: The address in user space of the struct base_jd_atom_v2 array
* @nr_atoms: The number of atoms in the array
* @stride: sizeof(struct base_jd_atom_v2)
* @uk6_atom: true if the atoms are legacy atoms (struct base_jd_atom_v2_uk6)
*
* Return: 0 on success or error code
*/
int kbase_jd_submit(struct kbase_context *kctx,
void __user *user_addr, u32 nr_atoms, u32 stride,
bool uk6_atom);
/**
* kbase_jd_done_worker - Handle a job completion
* @data: a &struct work_struct
*
* This function requeues the job from the runpool (if it was soft-stopped or
* removed from NEXT registers).
*
* Removes it from the system if it finished/failed/was cancelled.
*
* Resolves dependencies to add dependent jobs to the context, potentially
* starting them if necessary (which may add more references to the context)
*
* Releases the reference to the context from the no-longer-running job.
*
* Handles retrying submission outside of IRQ context if it failed from within
* IRQ context.
*/
void kbase_jd_done_worker(struct work_struct *data);
void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp,
kbasep_js_atom_done_code done_code);
void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
void kbase_jd_zap_context(struct kbase_context *kctx);
bool jd_done_nolock(struct kbase_jd_atom *katom,
struct list_head *completed_jobs_ctx);
void kbase_jd_free_external_resources(struct kbase_jd_atom *katom);
bool jd_submit_atom(struct kbase_context *kctx,
const struct base_jd_atom_v2 *user_atom,
struct kbase_jd_atom *katom);
void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom);
void kbase_job_done(struct kbase_device *kbdev, u32 done);
/**
* kbase_job_slot_ctx_priority_check_locked(): - Check for lower priority atoms
* and soft stop them
* @kctx: Pointer to context to check.
* @katom: Pointer to priority atom.
*
* Atoms from @kctx on the same job slot as @katom, which have lower priority
* than @katom will be soft stopped and put back in the queue, so that atoms
* with higher priority can run.
*
* The hwaccess_lock must be held when calling this function.
*/
void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
struct kbase_jd_atom *katom);
void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
struct kbase_jd_atom *target_katom);
void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
struct kbase_jd_atom *target_katom, u32 sw_flags);
void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
struct kbase_jd_atom *target_katom);
void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom);
void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
struct kbase_jd_atom *target_katom);
void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *event);
int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent);
int kbase_event_pending(struct kbase_context *ctx);
int kbase_event_init(struct kbase_context *kctx);
void kbase_event_close(struct kbase_context *kctx);
void kbase_event_cleanup(struct kbase_context *kctx);
void kbase_event_wakeup(struct kbase_context *kctx);
int kbase_process_soft_job(struct kbase_jd_atom *katom);
int kbase_prepare_soft_job(struct kbase_jd_atom *katom);
void kbase_finish_soft_job(struct kbase_jd_atom *katom);
void kbase_cancel_soft_job(struct kbase_jd_atom *katom);
void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev);
void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom);
#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom);
#endif
int kbase_soft_event_update(struct kbase_context *kctx,
u64 event,
unsigned char new_status);
bool kbase_replay_process(struct kbase_jd_atom *katom);
void kbasep_soft_job_timeout_worker(struct timer_list *timer);
void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt);
/* api used internally for register access. Contains validation and tracing */
void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value);
int kbase_device_trace_buffer_install(
struct kbase_context *kctx, u32 *tb, size_t size);
void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx);
void kbasep_as_do_poke(struct work_struct *work);
/** Returns the name associated with a Mali exception code
*
* This function is called from the interrupt handler when a GPU fault occurs.
* It reports the details of the fault using KBASE_DEBUG_PRINT_WARN.
*
* @param[in] kbdev The kbase device that the GPU fault occurred from.
* @param[in] exception_code exception code
* @return name associated with the exception code
*/
const char *kbase_exception_name(struct kbase_device *kbdev,
u32 exception_code);
/**
* Check whether a system suspend is in progress, or has already been suspended
*
* The caller should ensure that either kbdev->pm.active_count_lock is held, or
* a dmb was executed recently (to ensure the value is most
* up-to-date). However, without a lock the value could change afterwards.
*
* @return false if a suspend is not in progress
* @return !=false otherwise
*/
static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev)
{
return kbdev->pm.suspending;
}
/**
* Return the atom's ID, as was originally supplied by userspace in
* base_jd_atom_v2::atom_number
*/
static inline int kbase_jd_atom_id(struct kbase_context *kctx, struct kbase_jd_atom *katom)
{
int result;
KBASE_DEBUG_ASSERT(kctx);
KBASE_DEBUG_ASSERT(katom);
KBASE_DEBUG_ASSERT(katom->kctx == kctx);
result = katom - &kctx->jctx.atoms[0];
KBASE_DEBUG_ASSERT(result >= 0 && result <= BASE_JD_ATOM_COUNT);
return result;
}
/**
* kbase_jd_atom_from_id - Return the atom structure for the given atom ID
* @kctx: Context pointer
* @id: ID of atom to retrieve
*
* Return: Pointer to struct kbase_jd_atom associated with the supplied ID
*/
static inline struct kbase_jd_atom *kbase_jd_atom_from_id(
struct kbase_context *kctx, int id)
{
return &kctx->jctx.atoms[id];
}
/**
* Initialize the disjoint state
*
* The disjoint event count and state are both set to zero.
*
* Disjoint functions usage:
*
* The disjoint event count should be incremented whenever a disjoint event occurs.
*
* There are several cases which are regarded as disjoint behavior. Rather than just increment
* the counter during disjoint events we also increment the counter when jobs may be affected
* by what the GPU is currently doing. To facilitate this we have the concept of disjoint state.
*
* Disjoint state is entered during GPU reset and for the entire time that an atom is replaying
* (as part of the replay workaround). Increasing the disjoint state also increases the count of
* disjoint events.
*
* The disjoint state is then used to increase the count of disjoint events during job submission
* and job completion. Any atom submitted or completed while the disjoint state is greater than
* zero is regarded as a disjoint event.
*
* The disjoint event counter is also incremented immediately whenever a job is soft stopped
* and during context creation.
*
* @param kbdev The kbase device
*/
void kbase_disjoint_init(struct kbase_device *kbdev);
/**
* Increase the count of disjoint events
* called when a disjoint event has happened
*
* @param kbdev The kbase device
*/
void kbase_disjoint_event(struct kbase_device *kbdev);
/**
* Increase the count of disjoint events only if the GPU is in a disjoint state
*
* This should be called when something happens which could be disjoint if the GPU
* is in a disjoint state. The state refcount keeps track of this.
*
* @param kbdev The kbase device
*/
void kbase_disjoint_event_potential(struct kbase_device *kbdev);
/**
* Returns the count of disjoint events
*
* @param kbdev The kbase device
* @return the count of disjoint events
*/
u32 kbase_disjoint_event_get(struct kbase_device *kbdev);
/**
* Increment the refcount state indicating that the GPU is in a disjoint state.
*
* Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
* eventually after the disjoint state has completed @ref kbase_disjoint_state_down
* should be called
*
* @param kbdev The kbase device
*/
void kbase_disjoint_state_up(struct kbase_device *kbdev);
/**
* Decrement the refcount state
*
* Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
*
* Called after @ref kbase_disjoint_state_up once the disjoint state is over
*
* @param kbdev The kbase device
*/
void kbase_disjoint_state_down(struct kbase_device *kbdev);
/**
* If a job is soft stopped and the number of contexts is >= this value
* it is reported as a disjoint event
*/
#define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2
#if !defined(UINT64_MAX)
#define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
#endif
#if KBASE_TRACE_ENABLE
void kbasep_trace_debugfs_init(struct kbase_device *kbdev);
#ifndef CONFIG_MALI_SYSTEM_TRACE
/** Add trace values about a job-slot
*
* @note Any functions called through this macro will still be evaluated in
* Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
* functions called to get the parameters supplied to this macro must:
* - be static or static inline
* - must just return 0 and have no other statements present in the body.
*/
#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot) \
kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, 0)
/** Add trace values about a job-slot, with info
*
* @note Any functions called through this macro will still be evaluated in
* Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
* functions called to get the parameters supplied to this macro must:
* - be static or static inline
* - must just return 0 and have no other statements present in the body.
*/
#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val) \
kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, info_val)
/** Add trace values about a ctx refcount
*
* @note Any functions called through this macro will still be evaluated in
* Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
* functions called to get the parameters supplied to this macro must:
* - be static or static inline
* - must just return 0 and have no other statements present in the body.
*/
#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount) \
kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, 0)
/** Add trace values about a ctx refcount, and info
*
* @note Any functions called through this macro will still be evaluated in
* Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
* functions called to get the parameters supplied to this macro must:
* - be static or static inline
* - must just return 0 and have no other statements present in the body.
*/
#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val) \
kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, info_val)
/** Add trace values (no slot or refcount)
*
* @note Any functions called through this macro will still be evaluated in
* Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
* functions called to get the parameters supplied to this macro must:
* - be static or static inline
* - must just return 0 and have no other statements present in the body.
*/
#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val) \
kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
0, 0, 0, info_val)
/** Clear the trace */
#define KBASE_TRACE_CLEAR(kbdev) \
kbasep_trace_clear(kbdev)
/** Dump the slot trace */
#define KBASE_TRACE_DUMP(kbdev) \
kbasep_trace_dump(kbdev)
/** PRIVATE - do not use directly. Use KBASE_TRACE_ADD() instead */
void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val);
/** PRIVATE - do not use directly. Use KBASE_TRACE_CLEAR() instead */
void kbasep_trace_clear(struct kbase_device *kbdev);
#else /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
/* Dispatch kbase trace events as system trace events */
#include <mali_linux_kbase_trace.h>
#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
trace_mali_##code(jobslot, 0)
#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
trace_mali_##code(jobslot, info_val)
#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
trace_mali_##code(refcount, 0)
#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
trace_mali_##code(refcount, info_val)
#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)\
trace_mali_##code(gpu_addr, info_val)
#define KBASE_TRACE_CLEAR(kbdev)\
do {\
CSTD_UNUSED(kbdev);\
CSTD_NOP(0);\
} while (0)
#define KBASE_TRACE_DUMP(kbdev)\
do {\
CSTD_UNUSED(kbdev);\
CSTD_NOP(0);\
} while (0)
#endif /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
#else
#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
do {\
CSTD_UNUSED(kbdev);\
CSTD_NOP(code);\
CSTD_UNUSED(ctx);\
CSTD_UNUSED(katom);\
CSTD_UNUSED(gpu_addr);\
CSTD_UNUSED(jobslot);\
} while (0)
#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
do {\
CSTD_UNUSED(kbdev);\
CSTD_NOP(code);\
CSTD_UNUSED(ctx);\
CSTD_UNUSED(katom);\
CSTD_UNUSED(gpu_addr);\
CSTD_UNUSED(jobslot);\
CSTD_UNUSED(info_val);\
CSTD_NOP(0);\
} while (0)
#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
do {\
CSTD_UNUSED(kbdev);\
CSTD_NOP(code);\
CSTD_UNUSED(ctx);\
CSTD_UNUSED(katom);\
CSTD_UNUSED(gpu_addr);\
CSTD_UNUSED(refcount);\
CSTD_NOP(0);\
} while (0)
#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
do {\
CSTD_UNUSED(kbdev);\
CSTD_NOP(code);\
CSTD_UNUSED(ctx);\
CSTD_UNUSED(katom);\
CSTD_UNUSED(gpu_addr);\
CSTD_UNUSED(info_val);\
CSTD_NOP(0);\
} while (0)
#define KBASE_TRACE_ADD(kbdev, code, subcode, ctx, katom, val)\
do {\
CSTD_UNUSED(kbdev);\
CSTD_NOP(code);\
CSTD_UNUSED(subcode);\
CSTD_UNUSED(ctx);\
CSTD_UNUSED(katom);\
CSTD_UNUSED(val);\
CSTD_NOP(0);\
} while (0)
#define KBASE_TRACE_CLEAR(kbdev)\
do {\
CSTD_UNUSED(kbdev);\
CSTD_NOP(0);\
} while (0)
#define KBASE_TRACE_DUMP(kbdev)\
do {\
CSTD_UNUSED(kbdev);\
CSTD_NOP(0);\
} while (0)
#endif /* KBASE_TRACE_ENABLE */
/** PRIVATE - do not use directly. Use KBASE_TRACE_DUMP() instead */
void kbasep_trace_dump(struct kbase_device *kbdev);
#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
/* kbase_io_history_init - initialize data struct for register access history
*
* @kbdev The register history to initialize
* @n The number of register accesses that the buffer could hold
*
* @return 0 if successfully initialized, failure otherwise
*/
int kbase_io_history_init(struct kbase_io_history *h, u16 n);
/* kbase_io_history_term - uninit all resources for the register access history
*
* @h The register history to terminate
*/
void kbase_io_history_term(struct kbase_io_history *h);
/* kbase_io_history_dump - print the register history to the kernel ring buffer
*
* @kbdev Pointer to kbase_device containing the register history to dump
*/
void kbase_io_history_dump(struct kbase_device *kbdev);
/**
* kbase_io_history_resize - resize the register access history buffer.
*
* @h: Pointer to a valid register history to resize
* @new_size: Number of accesses the buffer could hold
*
* A successful resize will clear all recent register accesses.
* If resizing fails for any reason (e.g., could not allocate memory, invalid
* buffer size) then the original buffer will be kept intact.
*
* @return 0 if the buffer was resized, failure otherwise
*/
int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size);
#else /* CONFIG_DEBUG_FS */
#define kbase_io_history_init(...) ((int)0)
#define kbase_io_history_term CSTD_NOP
#define kbase_io_history_dump CSTD_NOP
#define kbase_io_history_resize CSTD_NOP
#endif /* CONFIG_DEBUG_FS */
#endif
extern int meson_gpu_data_invalid_count;
extern int meson_gpu_fault;

View File

@@ -0,0 +1,210 @@
/*
*
* (C) COPYRIGHT 2013-2015,2017-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#include <linux/dma-mapping.h>
#include <mali_kbase.h>
#include <mali_kbase_10969_workaround.h>
/* Mask of X and Y coordinates for the coordinates words in the descriptors*/
#define X_COORDINATE_MASK 0x00000FFF
#define Y_COORDINATE_MASK 0x0FFF0000
/* Max number of words needed from the fragment shader job descriptor */
#define JOB_HEADER_SIZE_IN_WORDS 10
#define JOB_HEADER_SIZE (JOB_HEADER_SIZE_IN_WORDS*sizeof(u32))
/* Word 0: Status Word */
#define JOB_DESC_STATUS_WORD 0
/* Word 1: Restart Index */
#define JOB_DESC_RESTART_INDEX_WORD 1
/* Word 2: Fault address low word */
#define JOB_DESC_FAULT_ADDR_LOW_WORD 2
/* Word 8: Minimum Tile Coordinates */
#define FRAG_JOB_DESC_MIN_TILE_COORD_WORD 8
/* Word 9: Maximum Tile Coordinates */
#define FRAG_JOB_DESC_MAX_TILE_COORD_WORD 9
int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom)
{
struct device *dev = katom->kctx->kbdev->dev;
u32 clamped = 0;
struct kbase_va_region *region;
struct tagged_addr *page_array;
u64 page_index;
u32 offset = katom->jc & (~PAGE_MASK);
u32 *page_1 = NULL;
u32 *page_2 = NULL;
u32 job_header[JOB_HEADER_SIZE_IN_WORDS];
void *dst = job_header;
u32 minX, minY, maxX, maxY;
u32 restartX, restartY;
struct page *p;
u32 copy_size;
dev_warn(dev, "Called TILE_RANGE_FAULT workaround clamping function.\n");
if (!(katom->core_req & BASE_JD_REQ_FS))
return 0;
kbase_gpu_vm_lock(katom->kctx);
region = kbase_region_tracker_find_region_enclosing_address(katom->kctx,
katom->jc);
if (!region || (region->flags & KBASE_REG_FREE))
goto out_unlock;
page_array = kbase_get_cpu_phy_pages(region);
if (!page_array)
goto out_unlock;
page_index = (katom->jc >> PAGE_SHIFT) - region->start_pfn;
p = phys_to_page(as_phys_addr_t(page_array[page_index]));
/* we need the first 10 words of the fragment shader job descriptor.
* We need to check that the offset + 10 words is less that the page
* size otherwise we need to load the next page.
* page_size_overflow will be equal to 0 in case the whole descriptor
* is within the page > 0 otherwise.
*/
copy_size = MIN(PAGE_SIZE - offset, JOB_HEADER_SIZE);
page_1 = kmap_atomic(p);
/* page_1 is a u32 pointer, offset is expressed in bytes */
page_1 += offset>>2;
kbase_sync_single_for_cpu(katom->kctx->kbdev,
kbase_dma_addr(p) + offset,
copy_size, DMA_BIDIRECTIONAL);
memcpy(dst, page_1, copy_size);
/* The data needed overflows page the dimension,
* need to map the subsequent page */
if (copy_size < JOB_HEADER_SIZE) {
p = phys_to_page(as_phys_addr_t(page_array[page_index + 1]));
page_2 = kmap_atomic(p);
kbase_sync_single_for_cpu(katom->kctx->kbdev,
kbase_dma_addr(p),
JOB_HEADER_SIZE - copy_size, DMA_BIDIRECTIONAL);
memcpy(dst + copy_size, page_2, JOB_HEADER_SIZE - copy_size);
}
/* We managed to correctly map one or two pages (in case of overflow) */
/* Get Bounding Box data and restart index from fault address low word */
minX = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & X_COORDINATE_MASK;
minY = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & Y_COORDINATE_MASK;
maxX = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & X_COORDINATE_MASK;
maxY = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & Y_COORDINATE_MASK;
restartX = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & X_COORDINATE_MASK;
restartY = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & Y_COORDINATE_MASK;
dev_warn(dev, "Before Clamping:\n"
"Jobstatus: %08x\n"
"restartIdx: %08x\n"
"Fault_addr_low: %08x\n"
"minCoordsX: %08x minCoordsY: %08x\n"
"maxCoordsX: %08x maxCoordsY: %08x\n",
job_header[JOB_DESC_STATUS_WORD],
job_header[JOB_DESC_RESTART_INDEX_WORD],
job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
minX, minY,
maxX, maxY);
/* Set the restart index to the one which generated the fault*/
job_header[JOB_DESC_RESTART_INDEX_WORD] =
job_header[JOB_DESC_FAULT_ADDR_LOW_WORD];
if (restartX < minX) {
job_header[JOB_DESC_RESTART_INDEX_WORD] = (minX) | restartY;
dev_warn(dev,
"Clamping restart X index to minimum. %08x clamped to %08x\n",
restartX, minX);
clamped = 1;
}
if (restartY < minY) {
job_header[JOB_DESC_RESTART_INDEX_WORD] = (minY) | restartX;
dev_warn(dev,
"Clamping restart Y index to minimum. %08x clamped to %08x\n",
restartY, minY);
clamped = 1;
}
if (restartX > maxX) {
job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxX) | restartY;
dev_warn(dev,
"Clamping restart X index to maximum. %08x clamped to %08x\n",
restartX, maxX);
clamped = 1;
}
if (restartY > maxY) {
job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxY) | restartX;
dev_warn(dev,
"Clamping restart Y index to maximum. %08x clamped to %08x\n",
restartY, maxY);
clamped = 1;
}
if (clamped) {
/* Reset the fault address low word
* and set the job status to STOPPED */
job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] = 0x0;
job_header[JOB_DESC_STATUS_WORD] = BASE_JD_EVENT_STOPPED;
dev_warn(dev, "After Clamping:\n"
"Jobstatus: %08x\n"
"restartIdx: %08x\n"
"Fault_addr_low: %08x\n"
"minCoordsX: %08x minCoordsY: %08x\n"
"maxCoordsX: %08x maxCoordsY: %08x\n",
job_header[JOB_DESC_STATUS_WORD],
job_header[JOB_DESC_RESTART_INDEX_WORD],
job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
minX, minY,
maxX, maxY);
/* Flush CPU cache to update memory for future GPU reads*/
memcpy(page_1, dst, copy_size);
p = phys_to_page(as_phys_addr_t(page_array[page_index]));
kbase_sync_single_for_device(katom->kctx->kbdev,
kbase_dma_addr(p) + offset,
copy_size, DMA_TO_DEVICE);
if (copy_size < JOB_HEADER_SIZE) {
memcpy(page_2, dst + copy_size,
JOB_HEADER_SIZE - copy_size);
p = phys_to_page(as_phys_addr_t(page_array[page_index +
1]));
kbase_sync_single_for_device(katom->kctx->kbdev,
kbase_dma_addr(p),
JOB_HEADER_SIZE - copy_size,
DMA_TO_DEVICE);
}
}
if (copy_size < JOB_HEADER_SIZE)
kunmap_atomic(page_2);
kunmap_atomic(page_1);
out_unlock:
kbase_gpu_vm_unlock(katom->kctx);
return clamped;
}

View File

@@ -0,0 +1,37 @@
/*
*
* (C) COPYRIGHT 2013-2014, 2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#ifndef _KBASE_10969_WORKAROUND_
#define _KBASE_10969_WORKAROUND_
/**
* kbasep_10969_workaround_clamp_coordinates - Apply the WA to clamp the restart indices
* @katom: atom representing the fragment job for which the WA has to be applied
*
* This workaround is used to solve an HW issue with single iterator GPUs.
* If a fragment job is soft-stopped on the edge of its bounding box, it can happen
* that the restart index is out of bounds and the rerun causes a tile range
* fault. If this happens we try to clamp the restart index to a correct value.
*/
int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom);
#endif /* _KBASE_10969_WORKAROUND_ */

View File

@@ -0,0 +1,111 @@
/*
*
* (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#include <linux/debugfs.h>
#include <mali_kbase.h>
#include <mali_kbase_as_fault_debugfs.h>
#ifdef CONFIG_DEBUG_FS
#ifdef CONFIG_MALI_DEBUG
static int kbase_as_fault_read(struct seq_file *sfile, void *data)
{
uintptr_t as_no = (uintptr_t) sfile->private;
struct list_head *entry;
const struct list_head *kbdev_list;
struct kbase_device *kbdev = NULL;
kbdev_list = kbase_dev_list_get();
list_for_each(entry, kbdev_list) {
kbdev = list_entry(entry, struct kbase_device, entry);
if (kbdev->debugfs_as_read_bitmap & (1ULL << as_no)) {
/* don't show this one again until another fault occors */
kbdev->debugfs_as_read_bitmap &= ~(1ULL << as_no);
/* output the last page fault addr */
seq_printf(sfile, "%llu\n",
(u64) kbdev->as[as_no].fault_addr);
}
}
kbase_dev_list_put(kbdev_list);
return 0;
}
static int kbase_as_fault_debugfs_open(struct inode *in, struct file *file)
{
return single_open(file, kbase_as_fault_read, in->i_private);
}
static const struct file_operations as_fault_fops = {
.open = kbase_as_fault_debugfs_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
#endif /* CONFIG_MALI_DEBUG */
#endif /* CONFIG_DEBUG_FS */
/*
* Initialize debugfs entry for each address space
*/
void kbase_as_fault_debugfs_init(struct kbase_device *kbdev)
{
#ifdef CONFIG_DEBUG_FS
#ifdef CONFIG_MALI_DEBUG
uint i;
char as_name[64];
struct dentry *debugfs_directory;
kbdev->debugfs_as_read_bitmap = 0ULL;
KBASE_DEBUG_ASSERT(kbdev->nr_hw_address_spaces);
KBASE_DEBUG_ASSERT(sizeof(kbdev->as[0].fault_addr) == sizeof(u64));
debugfs_directory = debugfs_create_dir("address_spaces",
kbdev->mali_debugfs_directory);
if (debugfs_directory) {
for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
snprintf(as_name, ARRAY_SIZE(as_name), "as%u", i);
debugfs_create_file(as_name, S_IRUGO,
debugfs_directory,
(void *)(uintptr_t)i,
&as_fault_fops);
}
} else {
dev_warn(kbdev->dev,
"unable to create address_spaces debugfs directory");
}
#endif /* CONFIG_MALI_DEBUG */
#endif /* CONFIG_DEBUG_FS */
return;
}

View File

@@ -0,0 +1,50 @@
/*
*
* (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#ifndef _KBASE_AS_FAULT_DEBUG_FS_H
#define _KBASE_AS_FAULT_DEBUG_FS_H
/**
* kbase_as_fault_debugfs_init() - Add debugfs files for reporting page faults
*
* @kbdev: Pointer to kbase_device
*/
void kbase_as_fault_debugfs_init(struct kbase_device *kbdev);
/**
* kbase_as_fault_debugfs_new() - make the last fault available on debugfs
*
* @kbdev: Pointer to kbase_device
* @as_no: The address space the fault occurred on
*/
static inline void
kbase_as_fault_debugfs_new(struct kbase_device *kbdev, int as_no)
{
#ifdef CONFIG_DEBUG_FS
#ifdef CONFIG_MALI_DEBUG
kbdev->debugfs_as_read_bitmap |= (1ULL << as_no);
#endif /* CONFIG_DEBUG_FS */
#endif /* CONFIG_MALI_DEBUG */
return;
}
#endif /*_KBASE_AS_FAULT_DEBUG_FS_H*/

View File

@@ -0,0 +1,59 @@
/*
*
* (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* Cache Policy API.
*/
#include "mali_kbase_cache_policy.h"
/*
* The output flags should be a combination of the following values:
* KBASE_REG_CPU_CACHED: CPU cache should be enabled.
*/
u32 kbase_cache_enabled(u32 flags, u32 nr_pages)
{
u32 cache_flags = 0;
CSTD_UNUSED(nr_pages);
if (flags & BASE_MEM_CACHED_CPU)
cache_flags |= KBASE_REG_CPU_CACHED;
return cache_flags;
}
void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir)
{
dma_sync_single_for_device(kbdev->dev, handle, size, dir);
}
void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir)
{
dma_sync_single_for_cpu(kbdev->dev, handle, size, dir);
}

View File

@@ -0,0 +1,50 @@
/*
*
* (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* Cache Policy API.
*/
#ifndef _KBASE_CACHE_POLICY_H_
#define _KBASE_CACHE_POLICY_H_
#include "mali_kbase.h"
#include "mali_base_kernel.h"
/**
* kbase_cache_enabled - Choose the cache policy for a specific region
* @flags: flags describing attributes of the region
* @nr_pages: total number of pages (backed or not) for the region
*
* Tells whether the CPU and GPU caches should be enabled or not for a specific
* region.
* This function can be modified to customize the cache policy depending on the
* flags and size of the region.
*
* Return: a combination of %KBASE_REG_CPU_CACHED and %KBASE_REG_GPU_CACHED
* depending on the cache policy
*/
u32 kbase_cache_enabled(u32 flags, u32 nr_pages);
#endif /* _KBASE_CACHE_POLICY_H_ */

View File

@@ -0,0 +1,48 @@
/*
*
* (C) COPYRIGHT 2011-2015,2017 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#include <mali_kbase.h>
#include <mali_kbase_defs.h>
#include <mali_kbase_config_defaults.h>
int kbasep_platform_device_init(struct kbase_device *kbdev)
{
struct kbase_platform_funcs_conf *platform_funcs_p;
platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS;
if (platform_funcs_p && platform_funcs_p->platform_init_func)
return platform_funcs_p->platform_init_func(kbdev);
return 0;
}
void kbasep_platform_device_term(struct kbase_device *kbdev)
{
struct kbase_platform_funcs_conf *platform_funcs_p;
platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS;
if (platform_funcs_p && platform_funcs_p->platform_term_func)
platform_funcs_p->platform_term_func(kbdev);
}

View File

@@ -0,0 +1,299 @@
/*
*
* (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/**
* @file mali_kbase_config.h
* Configuration API and Attributes for KBase
*/
#ifndef _KBASE_CONFIG_H_
#define _KBASE_CONFIG_H_
#include <linux/mm.h>
#include <mali_malisw.h>
#include <mali_kbase_backend_config.h>
#include <linux/rbtree.h>
/**
* @addtogroup base_api
* @{
*/
/**
* @addtogroup base_kbase_api
* @{
*/
/**
* @addtogroup kbase_config Configuration API and Attributes
* @{
*/
/* Forward declaration of struct kbase_device */
struct kbase_device;
/**
* kbase_platform_funcs_conf - Specifies platform init/term function pointers
*
* Specifies the functions pointers for platform specific initialization and
* termination. By default no functions are required. No additional platform
* specific control is necessary.
*/
struct kbase_platform_funcs_conf {
/**
* platform_init_func - platform specific init function pointer
* @kbdev - kbase_device pointer
*
* Returns 0 on success, negative error code otherwise.
*
* Function pointer for platform specific initialization or NULL if no
* initialization function is required. At the point this the GPU is
* not active and its power and clocks are in unknown (platform specific
* state) as kbase doesn't yet have control of power and clocks.
*
* The platform specific private pointer kbase_device::platform_context
* can be accessed (and possibly initialized) in here.
*/
int (*platform_init_func)(struct kbase_device *kbdev);
/**
* platform_term_func - platform specific termination function pointer
* @kbdev - kbase_device pointer
*
* Function pointer for platform specific termination or NULL if no
* termination function is required. At the point this the GPU will be
* idle but still powered and clocked.
*
* The platform specific private pointer kbase_device::platform_context
* can be accessed (and possibly terminated) in here.
*/
void (*platform_term_func)(struct kbase_device *kbdev);
};
/*
* @brief Specifies the callbacks for power management
*
* By default no callbacks will be made and the GPU must not be powered off.
*/
struct kbase_pm_callback_conf {
/** Callback for when the GPU is idle and the power to it can be switched off.
*
* The system integrator can decide whether to either do nothing, just switch off
* the clocks to the GPU, or to completely power down the GPU.
* The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
* platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
*/
void (*power_off_callback)(struct kbase_device *kbdev);
/** Callback for when the GPU is about to become active and power must be supplied.
*
* This function must not return until the GPU is powered and clocked sufficiently for register access to
* succeed. The return value specifies whether the GPU was powered down since the call to power_off_callback.
* If the GPU state has been lost then this function must return 1, otherwise it should return 0.
* The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
* platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
*
* The return value of the first call to this function is ignored.
*
* @return 1 if the GPU state may have been lost, 0 otherwise.
*/
int (*power_on_callback)(struct kbase_device *kbdev);
/** Callback for when the system is requesting a suspend and GPU power
* must be switched off.
*
* Note that if this callback is present, then this may be called
* without a preceding call to power_off_callback. Therefore this
* callback must be able to take any action that might otherwise happen
* in power_off_callback.
*
* The platform specific private pointer kbase_device::platform_context
* can be accessed and modified in here. It is the platform \em
* callbacks responsibility to initialize and terminate this pointer if
* used (see @ref kbase_platform_funcs_conf).
*/
void (*power_suspend_callback)(struct kbase_device *kbdev);
/** Callback for when the system is resuming from a suspend and GPU
* power must be switched on.
*
* Note that if this callback is present, then this may be called
* without a following call to power_on_callback. Therefore this
* callback must be able to take any action that might otherwise happen
* in power_on_callback.
*
* The platform specific private pointer kbase_device::platform_context
* can be accessed and modified in here. It is the platform \em
* callbacks responsibility to initialize and terminate this pointer if
* used (see @ref kbase_platform_funcs_conf).
*/
void (*power_resume_callback)(struct kbase_device *kbdev);
/** Callback for handling runtime power management initialization.
*
* The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
* will become active from calls made to the OS from within this function.
* The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback.
* Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
*
* @return 0 on success, else int error code.
*/
int (*power_runtime_init_callback)(struct kbase_device *kbdev);
/** Callback for handling runtime power management termination.
*
* The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
* should no longer be called by the OS on completion of this function.
* Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
*/
void (*power_runtime_term_callback)(struct kbase_device *kbdev);
/** Callback for runtime power-off power management callback
*
* For linux this callback will be called by the kernel runtime_suspend callback.
* Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
*
* @return 0 on success, else OS error code.
*/
void (*power_runtime_off_callback)(struct kbase_device *kbdev);
/** Callback for runtime power-on power management callback
*
* For linux this callback will be called by the kernel runtime_resume callback.
* Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
*/
int (*power_runtime_on_callback)(struct kbase_device *kbdev);
/*
* Optional callback for checking if GPU can be suspended when idle
*
* This callback will be called by the runtime power management core
* when the reference count goes to 0 to provide notification that the
* GPU now seems idle.
*
* If this callback finds that the GPU can't be powered off, or handles
* suspend by powering off directly or queueing up a power off, a
* non-zero value must be returned to prevent the runtime PM core from
* also triggering a suspend.
*
* Returning 0 will cause the runtime PM core to conduct a regular
* autosuspend.
*
* This callback is optional and if not provided regular autosuspend
* will be triggered.
*
* Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use
* this feature.
*
* Return 0 if GPU can be suspended, positive value if it can not be
* suspeneded by runtime PM, else OS error code
*/
int (*power_runtime_idle_callback)(struct kbase_device *kbdev);
};
#ifdef CONFIG_OF
struct kbase_platform_config {
};
#else
/*
* @brief Specifies start and end of I/O memory region.
*/
struct kbase_io_memory_region {
u64 start;
u64 end;
};
/*
* @brief Specifies I/O related resources like IRQs and memory region for I/O operations.
*/
struct kbase_io_resources {
u32 job_irq_number;
u32 mmu_irq_number;
u32 gpu_irq_number;
struct kbase_io_memory_region io_memory_region;
};
struct kbase_platform_config {
const struct kbase_io_resources *io_resources;
};
#endif /* CONFIG_OF */
/**
* @brief Gets the pointer to platform config.
*
* @return Pointer to the platform config
*/
struct kbase_platform_config *kbase_get_platform_config(void);
/**
* kbasep_platform_device_init: - Platform specific call to initialize hardware
* @kbdev: kbase device pointer
*
* Function calls a platform defined routine if specified in the configuration
* attributes. The routine can initialize any hardware and context state that
* is required for the GPU block to function.
*
* Return: 0 if no errors have been found in the config.
* Negative error code otherwise.
*/
int kbasep_platform_device_init(struct kbase_device *kbdev);
/**
* kbasep_platform_device_term - Platform specific call to terminate hardware
* @kbdev: Kbase device pointer
*
* Function calls a platform defined routine if specified in the configuration
* attributes. The routine can destroy any platform specific context state and
* shut down any hardware functionality that are outside of the Power Management
* callbacks.
*
*/
void kbasep_platform_device_term(struct kbase_device *kbdev);
#ifndef CONFIG_OF
/**
* kbase_platform_register - Register a platform device for the GPU
*
* This can be used to register a platform device on systems where device tree
* is not enabled and the platform initialisation code in the kernel doesn't
* create the GPU device. Where possible device tree should be used instead.
*
* Return: 0 for success, any other fail causes module initialisation to fail
*/
int kbase_platform_register(void);
/**
* kbase_platform_unregister - Unregister a fake platform device
*
* Unregister the platform device created with kbase_platform_register()
*/
void kbase_platform_unregister(void);
#endif
/** @} *//* end group kbase_config */
/** @} *//* end group base_kbase_api */
/** @} *//* end group base_api */
#endif /* _KBASE_CONFIG_H_ */

View File

@@ -0,0 +1,278 @@
/*
*
* (C) COPYRIGHT 2013-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/**
* @file mali_kbase_config_defaults.h
*
* Default values for configuration settings
*
*/
#ifndef _KBASE_CONFIG_DEFAULTS_H_
#define _KBASE_CONFIG_DEFAULTS_H_
/* Include mandatory definitions per platform */
#include <mali_kbase_config_platform.h>
/**
* Boolean indicating whether the driver is configured to be secure at
* a potential loss of performance.
*
* This currently affects only r0p0-15dev0 HW and earlier.
*
* On r0p0-15dev0 HW and earlier, there are tradeoffs between security and
* performance:
*
* - When this is set to true, the driver remains fully secure,
* but potentially loses performance compared with setting this to
* false.
* - When set to false, the driver is open to certain security
* attacks.
*
* From r0p0-00rel0 and onwards, there is no security loss by setting
* this to false, and no performance loss by setting it to
* true.
*/
#define DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE false
enum {
/**
* Use unrestricted Address ID width on the AXI bus.
*/
KBASE_AID_32 = 0x0,
/**
* Restrict GPU to a half of maximum Address ID count.
* This will reduce performance, but reduce bus load due to GPU.
*/
KBASE_AID_16 = 0x3,
/**
* Restrict GPU to a quarter of maximum Address ID count.
* This will reduce performance, but reduce bus load due to GPU.
*/
KBASE_AID_8 = 0x2,
/**
* Restrict GPU to an eighth of maximum Address ID count.
* This will reduce performance, but reduce bus load due to GPU.
*/
KBASE_AID_4 = 0x1
};
enum {
/**
* Use unrestricted Address ID width on the AXI bus.
* Restricting ID width will reduce performance & bus load due to GPU.
*/
KBASE_3BIT_AID_32 = 0x0,
/* Restrict GPU to 7/8 of maximum Address ID count. */
KBASE_3BIT_AID_28 = 0x1,
/* Restrict GPU to 3/4 of maximum Address ID count. */
KBASE_3BIT_AID_24 = 0x2,
/* Restrict GPU to 5/8 of maximum Address ID count. */
KBASE_3BIT_AID_20 = 0x3,
/* Restrict GPU to 1/2 of maximum Address ID count. */
KBASE_3BIT_AID_16 = 0x4,
/* Restrict GPU to 3/8 of maximum Address ID count. */
KBASE_3BIT_AID_12 = 0x5,
/* Restrict GPU to 1/4 of maximum Address ID count. */
KBASE_3BIT_AID_8 = 0x6,
/* Restrict GPU to 1/8 of maximum Address ID count. */
KBASE_3BIT_AID_4 = 0x7
};
/**
* Default setting for read Address ID limiting on AXI bus.
*
* Attached value: u32 register value
* KBASE_AID_32 - use the full 32 IDs (5 ID bits)
* KBASE_AID_16 - use 16 IDs (4 ID bits)
* KBASE_AID_8 - use 8 IDs (3 ID bits)
* KBASE_AID_4 - use 4 IDs (2 ID bits)
* Default value: KBASE_AID_32 (no limit). Note hardware implementation
* may limit to a lower value.
*/
#define DEFAULT_ARID_LIMIT KBASE_AID_32
/**
* Default setting for write Address ID limiting on AXI.
*
* Attached value: u32 register value
* KBASE_AID_32 - use the full 32 IDs (5 ID bits)
* KBASE_AID_16 - use 16 IDs (4 ID bits)
* KBASE_AID_8 - use 8 IDs (3 ID bits)
* KBASE_AID_4 - use 4 IDs (2 ID bits)
* Default value: KBASE_AID_32 (no limit). Note hardware implementation
* may limit to a lower value.
*/
#define DEFAULT_AWID_LIMIT KBASE_AID_32
/**
* Default setting for read Address ID limiting on AXI bus.
*
* Default value: KBASE_3BIT_AID_32 (no limit). Note hardware implementation
* may limit to a lower value.
*/
#define DEFAULT_3BIT_ARID_LIMIT KBASE_3BIT_AID_32
/**
* Default setting for write Address ID limiting on AXI.
*
* Default value: KBASE_3BIT_AID_32 (no limit). Note hardware implementation
* may limit to a lower value.
*/
#define DEFAULT_3BIT_AWID_LIMIT KBASE_3BIT_AID_32
/**
* Default period for DVFS sampling
*/
#define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */
/**
* Power Management poweroff tick granuality. This is in nanoseconds to
* allow HR timer support.
*
* On each scheduling tick, the power manager core may decide to:
* -# Power off one or more shader cores
* -# Power off the entire GPU
*/
#define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */
/**
* Power Manager number of ticks before shader cores are powered off
*/
#define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */
/**
* Power Manager number of ticks before GPU is powered off
*/
#define DEFAULT_PM_POWEROFF_TICK_GPU (2) /* 400-800us */
/**
* Default scheduling tick granuality
*/
#define DEFAULT_JS_SCHEDULING_PERIOD_NS (100000000u) /* 100ms */
/**
* Default minimum number of scheduling ticks before jobs are soft-stopped.
*
* This defines the time-slice for a job (which may be different from that of a
* context)
*/
#define DEFAULT_JS_SOFT_STOP_TICKS (1) /* 100ms-200ms */
/**
* Default minimum number of scheduling ticks before CL jobs are soft-stopped.
*/
#define DEFAULT_JS_SOFT_STOP_TICKS_CL (1) /* 100ms-200ms */
/**
* Default minimum number of scheduling ticks before jobs are hard-stopped
*/
#define DEFAULT_JS_HARD_STOP_TICKS_SS (50) /* 5s */
#define DEFAULT_JS_HARD_STOP_TICKS_SS_8408 (300) /* 30s */
/**
* Default minimum number of scheduling ticks before CL jobs are hard-stopped.
*/
#define DEFAULT_JS_HARD_STOP_TICKS_CL (50) /* 5s */
/**
* Default minimum number of scheduling ticks before jobs are hard-stopped
* during dumping
*/
#define DEFAULT_JS_HARD_STOP_TICKS_DUMPING (15000) /* 1500s */
/**
* Default timeout for some software jobs, after which the software event wait
* jobs will be cancelled.
*/
#define DEFAULT_JS_SOFT_JOB_TIMEOUT (3000) /* 3s */
/**
* Default minimum number of scheduling ticks before the GPU is reset to clear a
* "stuck" job
*/
#define DEFAULT_JS_RESET_TICKS_SS (55) /* 5.5s */
#define DEFAULT_JS_RESET_TICKS_SS_8408 (450) /* 45s */
/**
* Default minimum number of scheduling ticks before the GPU is reset to clear a
* "stuck" CL job.
*/
#define DEFAULT_JS_RESET_TICKS_CL (55) /* 5.5s */
/**
* Default minimum number of scheduling ticks before the GPU is reset to clear a
* "stuck" job during dumping.
*/
#define DEFAULT_JS_RESET_TICKS_DUMPING (15020) /* 1502s */
/**
* Default number of milliseconds given for other jobs on the GPU to be
* soft-stopped when the GPU needs to be reset.
*/
#define DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */
/**
* Default timeslice that a context is scheduled in for, in nanoseconds.
*
* When a context has used up this amount of time across its jobs, it is
* scheduled out to let another run.
*
* @note the resolution is nanoseconds (ns) here, because that's the format
* often used by the OS.
*/
#define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */
/**
* Perform GPU power down using only platform specific code, skipping DDK power
* management.
*
* If this is non-zero then kbase will avoid powering down shader cores, the
* tiler, and the L2 cache, instead just powering down the entire GPU through
* platform specific code. This may be required for certain platform
* integrations.
*
* Note that as this prevents kbase from powering down shader cores, this limits
* the available power policies to coarse_demand and always_on.
*/
#define PLATFORM_POWER_DOWN_ONLY (1)
/**
* Maximum frequency (in kHz) that the GPU can be clocked. For some platforms
* this isn't available, so we simply define a dummy value here. If devfreq
* is enabled the value will be read from there, otherwise this should be
* overridden by defining GPU_FREQ_KHZ_MAX in the platform file.
*/
#define DEFAULT_GPU_FREQ_KHZ_MAX (5000)
#endif /* _KBASE_CONFIG_DEFAULTS_H_ */

View File

@@ -0,0 +1,347 @@
/*
*
* (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* Base kernel context APIs
*/
#include <mali_kbase.h>
#include <mali_midg_regmap.h>
#include <mali_kbase_mem_linux.h>
#include <mali_kbase_dma_fence.h>
#include <mali_kbase_ctx_sched.h>
struct kbase_context *
kbase_create_context(struct kbase_device *kbdev, bool is_compat)
{
struct kbase_context *kctx;
int err;
struct page *p;
KBASE_DEBUG_ASSERT(kbdev != NULL);
/* zero-inited as lot of code assume it's zero'ed out on create */
kctx = vzalloc(sizeof(*kctx));
if (!kctx)
goto out;
/* creating a context is considered a disjoint event */
kbase_disjoint_event(kbdev);
kctx->kbdev = kbdev;
kctx->as_nr = KBASEP_AS_NR_INVALID;
atomic_set(&kctx->refcount, 0);
if (is_compat)
kbase_ctx_flag_set(kctx, KCTX_COMPAT);
#if defined(CONFIG_64BIT)
else
kbase_ctx_flag_set(kctx, KCTX_FORCE_SAME_VA);
#endif /* !defined(CONFIG_64BIT) */
#ifdef CONFIG_MALI_TRACE_TIMELINE
kctx->timeline.owner_tgid = task_tgid_nr(current);
#endif
atomic_set(&kctx->setup_complete, 0);
atomic_set(&kctx->setup_in_progress, 0);
spin_lock_init(&kctx->mm_update_lock);
kctx->process_mm = NULL;
atomic_set(&kctx->nonmapped_pages, 0);
kctx->slots_pullable = 0;
kctx->tgid = current->tgid;
kctx->pid = current->pid;
err = kbase_mem_pool_init(&kctx->mem_pool,
kbdev->mem_pool_max_size_default,
KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER,
kctx->kbdev,
&kbdev->mem_pool);
if (err)
goto free_kctx;
err = kbase_mem_pool_init(&kctx->lp_mem_pool,
(kbdev->mem_pool_max_size_default >> 9),
KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER,
kctx->kbdev,
&kbdev->lp_mem_pool);
if (err)
goto free_mem_pool;
err = kbase_mem_evictable_init(kctx);
if (err)
goto free_both_pools;
atomic_set(&kctx->used_pages, 0);
err = kbase_jd_init(kctx);
if (err)
goto deinit_evictable;
err = kbasep_js_kctx_init(kctx);
if (err)
goto free_jd; /* safe to call kbasep_js_kctx_term in this case */
err = kbase_event_init(kctx);
if (err)
goto free_jd;
atomic_set(&kctx->drain_pending, 0);
mutex_init(&kctx->reg_lock);
mutex_init(&kctx->mem_partials_lock);
INIT_LIST_HEAD(&kctx->mem_partials);
INIT_LIST_HEAD(&kctx->waiting_soft_jobs);
spin_lock_init(&kctx->waiting_soft_jobs_lock);
err = kbase_dma_fence_init(kctx);
if (err)
goto free_event;
err = kbase_mmu_init(kctx);
if (err)
goto term_dma_fence;
do {
err = kbase_mem_pool_grow(&kctx->mem_pool,
MIDGARD_MMU_BOTTOMLEVEL);
if (err)
goto pgd_no_mem;
mutex_lock(&kctx->mmu_lock);
kctx->pgd = kbase_mmu_alloc_pgd(kctx);
mutex_unlock(&kctx->mmu_lock);
} while (!kctx->pgd);
p = kbase_mem_alloc_page(&kctx->mem_pool);
if (!p)
goto no_sink_page;
kctx->aliasing_sink_page = as_tagged(page_to_phys(p));
init_waitqueue_head(&kctx->event_queue);
kctx->cookies = KBASE_COOKIE_MASK;
/* Make sure page 0 is not used... */
err = kbase_region_tracker_init(kctx);
if (err)
goto no_region_tracker;
err = kbase_sticky_resource_init(kctx);
if (err)
goto no_sticky;
err = kbase_jit_init(kctx);
if (err)
goto no_jit;
#ifdef CONFIG_GPU_TRACEPOINTS
atomic_set(&kctx->jctx.work_id, 0);
#endif
#ifdef CONFIG_MALI_TRACE_TIMELINE
atomic_set(&kctx->timeline.jd_atoms_in_flight, 0);
#endif
kctx->id = atomic_add_return(1, &(kbdev->ctx_num)) - 1;
mutex_init(&kctx->vinstr_cli_lock);
kbase_timer_setup(&kctx->soft_job_timeout,
kbasep_soft_job_timeout_worker);
return kctx;
no_jit:
kbase_gpu_vm_lock(kctx);
kbase_sticky_resource_term(kctx);
kbase_gpu_vm_unlock(kctx);
no_sticky:
kbase_region_tracker_term(kctx);
no_region_tracker:
kbase_mem_pool_free(&kctx->mem_pool, p, false);
no_sink_page:
/* VM lock needed for the call to kbase_mmu_free_pgd */
kbase_gpu_vm_lock(kctx);
kbase_mmu_free_pgd(kctx);
kbase_gpu_vm_unlock(kctx);
pgd_no_mem:
kbase_mmu_term(kctx);
term_dma_fence:
kbase_dma_fence_term(kctx);
free_event:
kbase_event_cleanup(kctx);
free_jd:
/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
kbasep_js_kctx_term(kctx);
kbase_jd_exit(kctx);
deinit_evictable:
kbase_mem_evictable_deinit(kctx);
free_both_pools:
kbase_mem_pool_term(&kctx->lp_mem_pool);
free_mem_pool:
kbase_mem_pool_term(&kctx->mem_pool);
free_kctx:
vfree(kctx);
out:
return NULL;
}
KBASE_EXPORT_SYMBOL(kbase_create_context);
static void kbase_reg_pending_dtor(struct kbase_va_region *reg)
{
dev_dbg(reg->kctx->kbdev->dev, "Freeing pending unmapped region\n");
kbase_mem_phy_alloc_put(reg->cpu_alloc);
kbase_mem_phy_alloc_put(reg->gpu_alloc);
kfree(reg);
}
void kbase_destroy_context(struct kbase_context *kctx)
{
struct kbase_device *kbdev;
int pages;
unsigned long pending_regions_to_clean;
unsigned long flags;
struct page *p;
KBASE_DEBUG_ASSERT(NULL != kctx);
kbdev = kctx->kbdev;
KBASE_DEBUG_ASSERT(NULL != kbdev);
KBASE_TRACE_ADD(kbdev, CORE_CTX_DESTROY, kctx, NULL, 0u, 0u);
/* Ensure the core is powered up for the destroy process */
/* A suspend won't happen here, because we're in a syscall from a userspace
* thread. */
kbase_pm_context_active(kbdev);
kbase_mem_pool_mark_dying(&kctx->mem_pool);
kbase_jd_zap_context(kctx);
#ifdef CONFIG_DEBUG_FS
/* Removing the rest of the debugfs entries here as we want to keep the
* atom debugfs interface alive until all atoms have completed. This
* is useful for debugging hung contexts. */
debugfs_remove_recursive(kctx->kctx_dentry);
#endif
kbase_event_cleanup(kctx);
/*
* JIT must be terminated before the code below as it must be called
* without the region lock being held.
* The code above ensures no new JIT allocations can be made by
* by the time we get to this point of context tear down.
*/
kbase_jit_term(kctx);
kbase_gpu_vm_lock(kctx);
kbase_sticky_resource_term(kctx);
/* MMU is disabled as part of scheduling out the context */
kbase_mmu_free_pgd(kctx);
/* drop the aliasing sink page now that it can't be mapped anymore */
p = phys_to_page(as_phys_addr_t(kctx->aliasing_sink_page));
kbase_mem_pool_free(&kctx->mem_pool, p, false);
/* free pending region setups */
pending_regions_to_clean = (~kctx->cookies) & KBASE_COOKIE_MASK;
while (pending_regions_to_clean) {
unsigned int cookie = __ffs(pending_regions_to_clean);
BUG_ON(!kctx->pending_regions[cookie]);
kbase_reg_pending_dtor(kctx->pending_regions[cookie]);
kctx->pending_regions[cookie] = NULL;
pending_regions_to_clean &= ~(1UL << cookie);
}
kbase_region_tracker_term(kctx);
kbase_gpu_vm_unlock(kctx);
/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
kbasep_js_kctx_term(kctx);
kbase_jd_exit(kctx);
kbase_dma_fence_term(kctx);
mutex_lock(&kbdev->mmu_hw_mutex);
spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags);
kbase_ctx_sched_remove_ctx(kctx);
spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
mutex_unlock(&kbdev->mmu_hw_mutex);
kbase_mmu_term(kctx);
pages = atomic_read(&kctx->used_pages);
if (pages != 0)
dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
kbase_mem_evictable_deinit(kctx);
kbase_mem_pool_term(&kctx->mem_pool);
kbase_mem_pool_term(&kctx->lp_mem_pool);
WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0);
vfree(kctx);
kbase_pm_context_idle(kbdev);
}
KBASE_EXPORT_SYMBOL(kbase_destroy_context);
int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags)
{
int err = 0;
struct kbasep_js_kctx_info *js_kctx_info;
unsigned long irq_flags;
KBASE_DEBUG_ASSERT(NULL != kctx);
js_kctx_info = &kctx->jctx.sched_info;
/* Validate flags */
if (flags != (flags & BASE_CONTEXT_CREATE_KERNEL_FLAGS)) {
err = -EINVAL;
goto out;
}
mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags);
/* Translate the flags */
if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0)
kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED);
/* Latch the initial attributes into the Job Scheduler */
kbasep_js_ctx_attr_set_initial_attrs(kctx->kbdev, kctx);
spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags);
mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
out:
return err;
}
KBASE_EXPORT_SYMBOL(kbase_context_set_create_flags);

View File

@@ -0,0 +1,123 @@
/*
*
* (C) COPYRIGHT 2011-2016, 2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#ifndef _KBASE_CONTEXT_H_
#define _KBASE_CONTEXT_H_
#include <linux/atomic.h>
/**
* kbase_create_context() - Create a kernel base context.
* @kbdev: Kbase device
* @is_compat: Force creation of a 32-bit context
*
* Allocate and init a kernel base context.
*
* Return: new kbase context
*/
struct kbase_context *
kbase_create_context(struct kbase_device *kbdev, bool is_compat);
/**
* kbase_destroy_context - Destroy a kernel base context.
* @kctx: Context to destroy
*
* Calls kbase_destroy_os_context() to free OS specific structures.
* Will release all outstanding regions.
*/
void kbase_destroy_context(struct kbase_context *kctx);
/**
* kbase_context_set_create_flags - Set creation flags on a context
* @kctx: Kbase context
* @flags: Flags to set, which shall be one of the flags of
* BASE_CONTEXT_CREATE_KERNEL_FLAGS.
*
* Return: 0 on success, -EINVAL otherwise when an invalid flag is specified.
*/
int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags);
/**
* kbase_ctx_flag - Check if @flag is set on @kctx
* @kctx: Pointer to kbase context to check
* @flag: Flag to check
*
* Return: true if @flag is set on @kctx, false if not.
*/
static inline bool kbase_ctx_flag(struct kbase_context *kctx,
enum kbase_context_flags flag)
{
return atomic_read(&kctx->flags) & flag;
}
/**
* kbase_ctx_flag_clear - Clear @flag on @kctx
* @kctx: Pointer to kbase context
* @flag: Flag to clear
*
* Clear the @flag on @kctx. This is done atomically, so other flags being
* cleared or set at the same time will be safe.
*
* Some flags have locking requirements, check the documentation for the
* respective flags.
*/
static inline void kbase_ctx_flag_clear(struct kbase_context *kctx,
enum kbase_context_flags flag)
{
#if KERNEL_VERSION(4, 3, 0) > LINUX_VERSION_CODE
/*
* Earlier kernel versions doesn't have atomic_andnot() or
* atomic_and(). atomic_clear_mask() was only available on some
* architectures and removed on arm in v3.13 on arm and arm64.
*
* Use a compare-exchange loop to clear the flag on pre 4.3 kernels,
* when atomic_andnot() becomes available.
*/
int old, new;
do {
old = atomic_read(&kctx->flags);
new = old & ~flag;
} while (atomic_cmpxchg(&kctx->flags, old, new) != old);
#else
atomic_andnot(flag, &kctx->flags);
#endif
}
/**
* kbase_ctx_flag_set - Set @flag on @kctx
* @kctx: Pointer to kbase context
* @flag: Flag to clear
*
* Set the @flag on @kctx. This is done atomically, so other flags being
* cleared or set at the same time will be safe.
*
* Some flags have locking requirements, check the documentation for the
* respective flags.
*/
static inline void kbase_ctx_flag_set(struct kbase_context *kctx,
enum kbase_context_flags flag)
{
atomic_or(flag, &kctx->flags);
}
#endif /* _KBASE_CONTEXT_H_ */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,208 @@
/*
*
* (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#include <mali_kbase.h>
#include <mali_kbase_config_defaults.h>
#include "mali_kbase_ctx_sched.h"
int kbase_ctx_sched_init(struct kbase_device *kbdev)
{
int as_present = (1U << kbdev->nr_hw_address_spaces) - 1;
/* These two must be recalculated if nr_hw_address_spaces changes
* (e.g. for HW workarounds) */
kbdev->nr_user_address_spaces = kbdev->nr_hw_address_spaces;
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) {
bool use_workaround;
use_workaround = DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE;
if (use_workaround) {
dev_dbg(kbdev->dev, "GPU has HW ISSUE 8987, and driver configured for security workaround: 1 address space only");
kbdev->nr_user_address_spaces = 1;
}
}
kbdev->as_free = as_present; /* All ASs initially free */
memset(kbdev->as_to_kctx, 0, sizeof(kbdev->as_to_kctx));
return 0;
}
void kbase_ctx_sched_term(struct kbase_device *kbdev)
{
s8 i;
/* Sanity checks */
for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) {
WARN_ON(kbdev->as_to_kctx[i] != NULL);
WARN_ON(!(kbdev->as_free & (1u << i)));
}
}
/* kbasep_ctx_sched_find_as_for_ctx - Find a free address space
*
* @kbdev: The context for which to find a free address space
*
* Return: A valid AS if successful, otherwise KBASEP_AS_NR_INVALID
*
* This function returns an address space available for use. It would prefer
* returning an AS that has been previously assigned to the context to
* avoid having to reprogram the MMU.
*/
static int kbasep_ctx_sched_find_as_for_ctx(struct kbase_context *kctx)
{
struct kbase_device *const kbdev = kctx->kbdev;
int free_as;
lockdep_assert_held(&kbdev->hwaccess_lock);
/* First check if the previously assigned AS is available */
if ((kctx->as_nr != KBASEP_AS_NR_INVALID) &&
(kbdev->as_free & (1u << kctx->as_nr)))
return kctx->as_nr;
/* The previously assigned AS was taken, we'll be returning any free
* AS at this point.
*/
free_as = ffs(kbdev->as_free) - 1;
if (free_as >= 0 && free_as < kbdev->nr_hw_address_spaces)
return free_as;
return KBASEP_AS_NR_INVALID;
}
int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx)
{
struct kbase_device *const kbdev = kctx->kbdev;
lockdep_assert_held(&kbdev->mmu_hw_mutex);
lockdep_assert_held(&kbdev->hwaccess_lock);
WARN_ON(!kbdev->pm.backend.gpu_powered);
if (atomic_inc_return(&kctx->refcount) == 1) {
int const free_as = kbasep_ctx_sched_find_as_for_ctx(kctx);
if (free_as != KBASEP_AS_NR_INVALID) {
kbdev->as_free &= ~(1u << free_as);
/* Only program the MMU if the context has not been
* assigned the same address space before.
*/
if (free_as != kctx->as_nr) {
struct kbase_context *const prev_kctx =
kbdev->as_to_kctx[free_as];
if (prev_kctx) {
WARN_ON(atomic_read(&prev_kctx->refcount) != 0);
kbase_mmu_disable(prev_kctx);
prev_kctx->as_nr = KBASEP_AS_NR_INVALID;
}
kctx->as_nr = free_as;
kbdev->as_to_kctx[free_as] = kctx;
kbase_mmu_update(kctx);
}
} else {
atomic_dec(&kctx->refcount);
/* Failed to find an available address space, we must
* be returning an error at this point.
*/
WARN_ON(kctx->as_nr != KBASEP_AS_NR_INVALID);
}
}
return kctx->as_nr;
}
void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx)
{
struct kbase_device *const kbdev = kctx->kbdev;
lockdep_assert_held(&kbdev->hwaccess_lock);
WARN_ON(atomic_read(&kctx->refcount) == 0);
WARN_ON(kctx->as_nr == KBASEP_AS_NR_INVALID);
WARN_ON(kbdev->as_to_kctx[kctx->as_nr] != kctx);
atomic_inc(&kctx->refcount);
}
void kbase_ctx_sched_release_ctx(struct kbase_context *kctx)
{
struct kbase_device *const kbdev = kctx->kbdev;
lockdep_assert_held(&kbdev->hwaccess_lock);
if (atomic_dec_return(&kctx->refcount) == 0)
kbdev->as_free |= (1u << kctx->as_nr);
}
void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx)
{
struct kbase_device *const kbdev = kctx->kbdev;
lockdep_assert_held(&kbdev->mmu_hw_mutex);
lockdep_assert_held(&kbdev->hwaccess_lock);
WARN_ON(atomic_read(&kctx->refcount) != 0);
if (kctx->as_nr != KBASEP_AS_NR_INVALID) {
if (kbdev->pm.backend.gpu_powered)
kbase_mmu_disable(kctx);
kbdev->as_to_kctx[kctx->as_nr] = NULL;
kctx->as_nr = KBASEP_AS_NR_INVALID;
}
}
void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev)
{
s8 i;
lockdep_assert_held(&kbdev->mmu_hw_mutex);
lockdep_assert_held(&kbdev->hwaccess_lock);
WARN_ON(!kbdev->pm.backend.gpu_powered);
for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) {
struct kbase_context *kctx;
kctx = kbdev->as_to_kctx[i];
if (kctx) {
if (atomic_read(&kctx->refcount)) {
WARN_ON(kctx->as_nr != i);
kbase_mmu_update(kctx);
} else {
/* This context might have been assigned an
* AS before, clear it.
*/
kbdev->as_to_kctx[kctx->as_nr] = NULL;
kctx->as_nr = KBASEP_AS_NR_INVALID;
}
} else {
kbase_mmu_disable_as(kbdev, i);
}
}
}

View File

@@ -0,0 +1,135 @@
/*
*
* (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#ifndef _KBASE_CTX_SCHED_H_
#define _KBASE_CTX_SCHED_H_
#include <mali_kbase.h>
/**
* The Context Scheduler manages address space assignment and reference
* counting to kbase_context. The interface has been designed to minimise
* interactions between the Job Scheduler and Power Management/MMU to support
* the existing Job Scheduler interface.
*
* The initial implementation of the Context Scheduler does not schedule
* contexts. Instead it relies on the Job Scheduler to make decisions of
* when to schedule/evict contexts if address spaces are starved. In the
* future, once an interface between the CS and JS has been devised to
* provide enough information about how each context is consuming GPU resources,
* those decisions can be made in the CS itself, thereby reducing duplicated
* code.
*/
/**
* kbase_ctx_sched_init - Initialise the context scheduler
* @kbdev: The device for which the context scheduler needs to be initialised
*
* This must be called during device initialisation. The number of hardware
* address spaces must already be established before calling this function.
*
* Return: 0 for success, otherwise failure
*/
int kbase_ctx_sched_init(struct kbase_device *kbdev);
/**
* kbase_ctx_sched_term - Terminate the context scheduler
* @kbdev: The device for which the context scheduler needs to be terminated
*
* This must be called during device termination after all contexts have been
* destroyed.
*/
void kbase_ctx_sched_term(struct kbase_device *kbdev);
/**
* kbase_ctx_sched_retain_ctx - Retain a reference to the @ref kbase_context
* @kctx: The context to which to retain a reference
*
* This function should be called whenever an address space should be assigned
* to a context and programmed onto the MMU. It should typically be called
* when jobs are ready to be submitted to the GPU.
*
* It can be called as many times as necessary. The address space will be
* assigned to the context for as long as there is a reference to said context.
*
* The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be
* held whilst calling this function.
*
* Return: The address space that the context has been assigned to or
* KBASEP_AS_NR_INVALID if no address space was available.
*/
int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx);
/**
* kbase_ctx_sched_retain_ctx_refcount
* @kctx: The context to which to retain a reference
*
* This function only retains a reference to the context. It must be called
* only when the context already has a reference.
*
* This is typically called inside an atomic session where we know the context
* is already scheduled in but want to take an extra reference to ensure that
* it doesn't get descheduled.
*
* The kbase_device::hwaccess_lock must be held whilst calling this function
*/
void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx);
/**
* kbase_ctx_sched_release_ctx - Release a reference to the @ref kbase_context
* @kctx: The context from which to release a reference
*
* This function should be called whenever an address space could be unassigned
* from a context. When there are no more references to said context, the
* address space previously assigned to this context shall be reassigned to
* other contexts as needed.
*
* The kbase_device::hwaccess_lock must be held whilst calling this function
*/
void kbase_ctx_sched_release_ctx(struct kbase_context *kctx);
/**
* kbase_ctx_sched_remove_ctx - Unassign previously assigned address space
* @kctx: The context to be removed
*
* This function should be called when a context is being destroyed. The
* context must no longer have any reference. If it has been assigned an
* address space before then the AS will be unprogrammed.
*
* The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be
* held whilst calling this function.
*/
void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx);
/**
* kbase_ctx_sched_restore_all_as - Reprogram all address spaces
* @kbdev: The device for which address spaces to be reprogrammed
*
* This function shall reprogram all address spaces previously assigned to
* contexts. It can be used after the GPU is reset.
*
* The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be
* held whilst calling this function.
*/
void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev);
#endif /* _KBASE_CTX_SCHED_H_ */

View File

@@ -0,0 +1,44 @@
/*
*
* (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#include <mali_kbase.h>
static struct kbasep_debug_assert_cb kbasep_debug_assert_registered_cb = {
NULL,
NULL
};
void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param)
{
kbasep_debug_assert_registered_cb.func = func;
kbasep_debug_assert_registered_cb.param = param;
}
void kbasep_debug_assert_call_hook(void)
{
if (kbasep_debug_assert_registered_cb.func != NULL)
kbasep_debug_assert_registered_cb.func(kbasep_debug_assert_registered_cb.param);
}
KBASE_EXPORT_SYMBOL(kbasep_debug_assert_call_hook);

View File

@@ -0,0 +1,169 @@
/*
*
* (C) COPYRIGHT 2012-2015, 2017 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#ifndef _KBASE_DEBUG_H
#define _KBASE_DEBUG_H
#include <linux/bug.h>
/** @brief If equals to 0, a trace containing the file, line, and function will be displayed before each message. */
#define KBASE_DEBUG_SKIP_TRACE 0
/** @brief If different from 0, the trace will only contain the file and line. */
#define KBASE_DEBUG_SKIP_FUNCTION_NAME 0
/** @brief Disable the asserts tests if set to 1. Default is to disable the asserts in release. */
#ifndef KBASE_DEBUG_DISABLE_ASSERTS
#ifdef CONFIG_MALI_DEBUG
#define KBASE_DEBUG_DISABLE_ASSERTS 0
#else
#define KBASE_DEBUG_DISABLE_ASSERTS 1
#endif
#endif /* KBASE_DEBUG_DISABLE_ASSERTS */
/** Function type that is called on an KBASE_DEBUG_ASSERT() or KBASE_DEBUG_ASSERT_MSG() */
typedef void (kbase_debug_assert_hook) (void *);
struct kbasep_debug_assert_cb {
kbase_debug_assert_hook *func;
void *param;
};
/**
* @def KBASEP_DEBUG_PRINT_TRACE
* @brief Private macro containing the format of the trace to display before every message
* @sa KBASE_DEBUG_SKIP_TRACE, KBASE_DEBUG_SKIP_FUNCTION_NAME
*/
#if !KBASE_DEBUG_SKIP_TRACE
#define KBASEP_DEBUG_PRINT_TRACE \
"In file: " __FILE__ " line: " CSTD_STR2(__LINE__)
#if !KBASE_DEBUG_SKIP_FUNCTION_NAME
#define KBASEP_DEBUG_PRINT_FUNCTION __func__
#else
#define KBASEP_DEBUG_PRINT_FUNCTION ""
#endif
#else
#define KBASEP_DEBUG_PRINT_TRACE ""
#endif
/**
* @def KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)
* @brief (Private) system printing function associated to the @ref KBASE_DEBUG_ASSERT_MSG event.
* @param trace location in the code from where the message is printed
* @param function function from where the message is printed
* @param ... Format string followed by format arguments.
* @note function parameter cannot be concatenated with other strings
*/
/* Select the correct system output function*/
#ifdef CONFIG_MALI_DEBUG
#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)\
do { \
pr_err("Mali<ASSERT>: %s function:%s ", trace, function);\
pr_err(__VA_ARGS__);\
pr_err("\n");\
} while (false)
#else
#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) CSTD_NOP()
#endif
#ifdef CONFIG_MALI_DEBUG
#define KBASE_CALL_ASSERT_HOOK() kbasep_debug_assert_call_hook()
#else
#define KBASE_CALL_ASSERT_HOOK() CSTD_NOP()
#endif
/**
* @def KBASE_DEBUG_ASSERT(expr)
* @brief Calls @ref KBASE_PRINT_ASSERT and prints the expression @a expr if @a expr is false
*
* @note This macro does nothing if the flag @ref KBASE_DEBUG_DISABLE_ASSERTS is set to 1
*
* @param expr Boolean expression
*/
#define KBASE_DEBUG_ASSERT(expr) \
KBASE_DEBUG_ASSERT_MSG(expr, #expr)
#if KBASE_DEBUG_DISABLE_ASSERTS
#define KBASE_DEBUG_ASSERT_MSG(expr, ...) CSTD_NOP()
#else
/**
* @def KBASE_DEBUG_ASSERT_MSG(expr, ...)
* @brief Calls @ref KBASEP_DEBUG_ASSERT_OUT and prints the given message if @a expr is false
*
* @note This macro does nothing if the flag @ref KBASE_DEBUG_DISABLE_ASSERTS is set to 1
*
* @param expr Boolean expression
* @param ... Message to display when @a expr is false, as a format string followed by format arguments.
*/
#define KBASE_DEBUG_ASSERT_MSG(expr, ...) \
do { \
if (!(expr)) { \
KBASEP_DEBUG_ASSERT_OUT(KBASEP_DEBUG_PRINT_TRACE, KBASEP_DEBUG_PRINT_FUNCTION, __VA_ARGS__);\
KBASE_CALL_ASSERT_HOOK();\
BUG();\
} \
} while (false)
#endif /* KBASE_DEBUG_DISABLE_ASSERTS */
/**
* @def KBASE_DEBUG_CODE( X )
* @brief Executes the code inside the macro only in debug mode
*
* @param X Code to compile only in debug mode.
*/
#ifdef CONFIG_MALI_DEBUG
#define KBASE_DEBUG_CODE(X) X
#else
#define KBASE_DEBUG_CODE(X) CSTD_NOP()
#endif /* CONFIG_MALI_DEBUG */
/** @} */
/**
* @brief Register a function to call on ASSERT
*
* Such functions will \b only be called during Debug mode, and for debugging
* features \b only. Do not rely on them to be called in general use.
*
* To disable the hook, supply NULL to \a func.
*
* @note This function is not thread-safe, and should only be used to
* register/deregister once in the module's lifetime.
*
* @param[in] func the function to call when an assert is triggered.
* @param[in] param the parameter to pass to \a func when calling it
*/
void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param);
/**
* @brief Call a debug assert hook previously registered with kbase_debug_assert_register_hook()
*
* @note This function is not thread-safe with respect to multiple threads
* registering functions and parameters with
* kbase_debug_assert_register_hook(). Otherwise, thread safety is the
* responsibility of the registered hook.
*/
void kbasep_debug_assert_call_hook(void);
#endif /* _KBASE_DEBUG_H */

View File

@@ -0,0 +1,504 @@
/*
*
* (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#include <mali_kbase.h>
#include <linux/spinlock.h>
#include <mali_kbase_hwaccess_jm.h>
#ifdef CONFIG_DEBUG_FS
static bool kbase_is_job_fault_event_pending(struct kbase_device *kbdev)
{
struct list_head *event_list = &kbdev->job_fault_event_list;
unsigned long flags;
bool ret;
spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
ret = !list_empty(event_list);
spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
return ret;
}
static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx)
{
struct kbase_device *kbdev = kctx->kbdev;
struct list_head *event_list = &kctx->kbdev->job_fault_event_list;
struct base_job_fault_event *event;
unsigned long flags;
spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
if (list_empty(event_list)) {
spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
return true;
}
list_for_each_entry(event, event_list, head) {
if (event->katom->kctx == kctx) {
spin_unlock_irqrestore(&kbdev->job_fault_event_lock,
flags);
return false;
}
}
spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
return true;
}
/* wait until the fault happen and copy the event */
static int kbase_job_fault_event_wait(struct kbase_device *kbdev,
struct base_job_fault_event *event)
{
struct list_head *event_list = &kbdev->job_fault_event_list;
struct base_job_fault_event *event_in;
unsigned long flags;
spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
if (list_empty(event_list)) {
spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
if (wait_event_interruptible(kbdev->job_fault_wq,
kbase_is_job_fault_event_pending(kbdev)))
return -ERESTARTSYS;
spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
}
event_in = list_entry(event_list->next,
struct base_job_fault_event, head);
event->event_code = event_in->event_code;
event->katom = event_in->katom;
spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
return 0;
}
/* remove the event from the queue */
static struct base_job_fault_event *kbase_job_fault_event_dequeue(
struct kbase_device *kbdev, struct list_head *event_list)
{
struct base_job_fault_event *event;
event = list_entry(event_list->next,
struct base_job_fault_event, head);
list_del(event_list->next);
return event;
}
/* Remove all the following atoms after the failed atom in the same context
* Call the postponed bottom half of job done.
* Then, this context could be rescheduled.
*/
static void kbase_job_fault_resume_event_cleanup(struct kbase_context *kctx)
{
struct list_head *event_list = &kctx->job_fault_resume_event_list;
while (!list_empty(event_list)) {
struct base_job_fault_event *event;
event = kbase_job_fault_event_dequeue(kctx->kbdev,
&kctx->job_fault_resume_event_list);
kbase_jd_done_worker(&event->katom->work);
}
}
/* Remove all the failed atoms that belong to different contexts
* Resume all the contexts that were suspend due to failed job
*/
static void kbase_job_fault_event_cleanup(struct kbase_device *kbdev)
{
struct list_head *event_list = &kbdev->job_fault_event_list;
unsigned long flags;
spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
while (!list_empty(event_list)) {
kbase_job_fault_event_dequeue(kbdev, event_list);
spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
wake_up(&kbdev->job_fault_resume_wq);
spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
}
spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
}
static void kbase_job_fault_resume_worker(struct work_struct *data)
{
struct base_job_fault_event *event = container_of(data,
struct base_job_fault_event, job_fault_work);
struct kbase_context *kctx;
struct kbase_jd_atom *katom;
katom = event->katom;
kctx = katom->kctx;
dev_info(kctx->kbdev->dev, "Job dumping wait\n");
/* When it was waked up, it need to check if queue is empty or the
* failed atom belongs to different context. If yes, wake up. Both
* of them mean the failed job has been dumped. Please note, it
* should never happen that the job_fault_event_list has the two
* atoms belong to the same context.
*/
wait_event(kctx->kbdev->job_fault_resume_wq,
kbase_ctx_has_no_event_pending(kctx));
atomic_set(&kctx->job_fault_count, 0);
kbase_jd_done_worker(&katom->work);
/* In case the following atoms were scheduled during failed job dump
* the job_done_worker was held. We need to rerun it after the dump
* was finished
*/
kbase_job_fault_resume_event_cleanup(kctx);
dev_info(kctx->kbdev->dev, "Job dumping finish, resume scheduler\n");
}
static struct base_job_fault_event *kbase_job_fault_event_queue(
struct list_head *event_list,
struct kbase_jd_atom *atom,
u32 completion_code)
{
struct base_job_fault_event *event;
event = &atom->fault_event;
event->katom = atom;
event->event_code = completion_code;
list_add_tail(&event->head, event_list);
return event;
}
static void kbase_job_fault_event_post(struct kbase_device *kbdev,
struct kbase_jd_atom *katom, u32 completion_code)
{
struct base_job_fault_event *event;
unsigned long flags;
spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
event = kbase_job_fault_event_queue(&kbdev->job_fault_event_list,
katom, completion_code);
spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
wake_up_interruptible(&kbdev->job_fault_wq);
INIT_WORK(&event->job_fault_work, kbase_job_fault_resume_worker);
queue_work(kbdev->job_fault_resume_workq, &event->job_fault_work);
dev_info(katom->kctx->kbdev->dev, "Job fault happen, start dump: %d_%d",
katom->kctx->tgid, katom->kctx->id);
}
/*
* This function will process the job fault
* Get the register copy
* Send the failed job dump event
* Create a Wait queue to wait until the job dump finish
*/
bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
u32 completion_code)
{
struct kbase_context *kctx = katom->kctx;
/* Check if dumping is in the process
* only one atom of each context can be dumped at the same time
* If the atom belongs to different context, it can be dumped
*/
if (atomic_read(&kctx->job_fault_count) > 0) {
kbase_job_fault_event_queue(
&kctx->job_fault_resume_event_list,
katom, completion_code);
dev_info(kctx->kbdev->dev, "queue:%d\n",
kbase_jd_atom_id(kctx, katom));
return true;
}
if (kctx->kbdev->job_fault_debug == true) {
if (completion_code != BASE_JD_EVENT_DONE) {
if (kbase_job_fault_get_reg_snapshot(kctx) == false) {
dev_warn(kctx->kbdev->dev, "get reg dump failed\n");
return false;
}
kbase_job_fault_event_post(kctx->kbdev, katom,
completion_code);
atomic_inc(&kctx->job_fault_count);
dev_info(kctx->kbdev->dev, "post:%d\n",
kbase_jd_atom_id(kctx, katom));
return true;
}
}
return false;
}
static int debug_job_fault_show(struct seq_file *m, void *v)
{
struct kbase_device *kbdev = m->private;
struct base_job_fault_event *event = (struct base_job_fault_event *)v;
struct kbase_context *kctx = event->katom->kctx;
int i;
dev_info(kbdev->dev, "debug job fault seq show:%d_%d, %d",
kctx->tgid, kctx->id, event->reg_offset);
if (kctx->reg_dump == NULL) {
dev_warn(kbdev->dev, "reg dump is NULL");
return -1;
}
if (kctx->reg_dump[event->reg_offset] ==
REGISTER_DUMP_TERMINATION_FLAG) {
/* Return the error here to stop the read. And the
* following next() will not be called. The stop can
* get the real event resource and release it
*/
return -1;
}
if (event->reg_offset == 0)
seq_printf(m, "%d_%d\n", kctx->tgid, kctx->id);
for (i = 0; i < 50; i++) {
if (kctx->reg_dump[event->reg_offset] ==
REGISTER_DUMP_TERMINATION_FLAG) {
break;
}
seq_printf(m, "%08x: %08x\n",
kctx->reg_dump[event->reg_offset],
kctx->reg_dump[1+event->reg_offset]);
event->reg_offset += 2;
}
return 0;
}
static void *debug_job_fault_next(struct seq_file *m, void *v, loff_t *pos)
{
struct kbase_device *kbdev = m->private;
struct base_job_fault_event *event = (struct base_job_fault_event *)v;
dev_info(kbdev->dev, "debug job fault seq next:%d, %d",
event->reg_offset, (int)*pos);
return event;
}
static void *debug_job_fault_start(struct seq_file *m, loff_t *pos)
{
struct kbase_device *kbdev = m->private;
struct base_job_fault_event *event;
dev_info(kbdev->dev, "fault job seq start:%d", (int)*pos);
/* The condition is trick here. It needs make sure the
* fault hasn't happened and the dumping hasn't been started,
* or the dumping has finished
*/
if (*pos == 0) {
event = kmalloc(sizeof(*event), GFP_KERNEL);
if (!event)
return NULL;
event->reg_offset = 0;
if (kbase_job_fault_event_wait(kbdev, event)) {
kfree(event);
return NULL;
}
/* The cache flush workaround is called in bottom half of
* job done but we delayed it. Now we should clean cache
* earlier. Then the GPU memory dump should be correct.
*/
kbase_backend_cacheclean(kbdev, event->katom);
} else
return NULL;
return event;
}
static void debug_job_fault_stop(struct seq_file *m, void *v)
{
struct kbase_device *kbdev = m->private;
/* here we wake up the kbase_jd_done_worker after stop, it needs
* get the memory dump before the register dump in debug daemon,
* otherwise, the memory dump may be incorrect.
*/
if (v != NULL) {
kfree(v);
dev_info(kbdev->dev, "debug job fault seq stop stage 1");
} else {
unsigned long flags;
spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
if (!list_empty(&kbdev->job_fault_event_list)) {
kbase_job_fault_event_dequeue(kbdev,
&kbdev->job_fault_event_list);
wake_up(&kbdev->job_fault_resume_wq);
}
spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
dev_info(kbdev->dev, "debug job fault seq stop stage 2");
}
}
static const struct seq_operations ops = {
.start = debug_job_fault_start,
.next = debug_job_fault_next,
.stop = debug_job_fault_stop,
.show = debug_job_fault_show,
};
static int debug_job_fault_open(struct inode *in, struct file *file)
{
struct kbase_device *kbdev = in->i_private;
seq_open(file, &ops);
((struct seq_file *)file->private_data)->private = kbdev;
dev_info(kbdev->dev, "debug job fault seq open");
kbdev->job_fault_debug = true;
return 0;
}
static int debug_job_fault_release(struct inode *in, struct file *file)
{
struct kbase_device *kbdev = in->i_private;
seq_release(in, file);
kbdev->job_fault_debug = false;
/* Clean the unprocessed job fault. After that, all the suspended
* contexts could be rescheduled.
*/
kbase_job_fault_event_cleanup(kbdev);
dev_info(kbdev->dev, "debug job fault seq close");
return 0;
}
static const struct file_operations kbasep_debug_job_fault_fops = {
.open = debug_job_fault_open,
.read = seq_read,
.llseek = seq_lseek,
.release = debug_job_fault_release,
};
/*
* Initialize debugfs entry for job fault dump
*/
void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev)
{
debugfs_create_file("job_fault", S_IRUGO,
kbdev->mali_debugfs_directory, kbdev,
&kbasep_debug_job_fault_fops);
}
int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
{
INIT_LIST_HEAD(&kbdev->job_fault_event_list);
init_waitqueue_head(&(kbdev->job_fault_wq));
init_waitqueue_head(&(kbdev->job_fault_resume_wq));
spin_lock_init(&kbdev->job_fault_event_lock);
kbdev->job_fault_resume_workq = alloc_workqueue(
"kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1);
if (!kbdev->job_fault_resume_workq)
return -ENOMEM;
kbdev->job_fault_debug = false;
return 0;
}
/*
* Release the relevant resource per device
*/
void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
{
destroy_workqueue(kbdev->job_fault_resume_workq);
}
/*
* Initialize the relevant data structure per context
*/
void kbase_debug_job_fault_context_init(struct kbase_context *kctx)
{
/* We need allocate double size register range
* Because this memory will keep the register address and value
*/
kctx->reg_dump = vmalloc(0x4000 * 2);
if (kctx->reg_dump == NULL)
return;
if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == false) {
vfree(kctx->reg_dump);
kctx->reg_dump = NULL;
}
INIT_LIST_HEAD(&kctx->job_fault_resume_event_list);
atomic_set(&kctx->job_fault_count, 0);
}
/*
* release the relevant resource per context
*/
void kbase_debug_job_fault_context_term(struct kbase_context *kctx)
{
vfree(kctx->reg_dump);
}
#else /* CONFIG_DEBUG_FS */
int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
{
kbdev->job_fault_debug = false;
return 0;
}
void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
{
}
#endif /* CONFIG_DEBUG_FS */

View File

@@ -0,0 +1,101 @@
/*
*
* (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#ifndef _KBASE_DEBUG_JOB_FAULT_H
#define _KBASE_DEBUG_JOB_FAULT_H
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#define REGISTER_DUMP_TERMINATION_FLAG 0xFFFFFFFF
/**
* kbase_debug_job_fault_dev_init - Create the fault event wait queue
* per device and initialize the required lists.
* @kbdev: Device pointer
*
* Return: Zero on success or a negative error code.
*/
int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev);
/**
* kbase_debug_job_fault_debugfs_init - Initialize job fault debug sysfs
* @kbdev: Device pointer
*/
void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev);
/**
* kbase_debug_job_fault_dev_term - Clean up resources created in
* kbase_debug_job_fault_dev_init.
* @kbdev: Device pointer
*/
void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev);
/**
* kbase_debug_job_fault_context_init - Initialize the relevant
* data structure per context
* @kctx: KBase context pointer
*/
void kbase_debug_job_fault_context_init(struct kbase_context *kctx);
/**
* kbase_debug_job_fault_context_term - Release the relevant
* resource per context
* @kctx: KBase context pointer
*/
void kbase_debug_job_fault_context_term(struct kbase_context *kctx);
/**
* kbase_debug_job_fault_process - Process the failed job.
* It will send a event and wake up the job fault waiting queue
* Then create a work queue to wait for job dump finish
* This function should be called in the interrupt handler and before
* jd_done that make sure the jd_done_worker will be delayed until the
* job dump finish
* @katom: The failed atom pointer
* @completion_code: the job status
* @return true if dump is going on
*/
bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
u32 completion_code);
/**
* kbase_debug_job_fault_reg_snapshot_init - Set the interested registers
* address during the job fault process, the relevant registers will
* be saved when a job fault happen
* @kctx: KBase context pointer
* @reg_range: Maximum register address space
* @return true if initializing successfully
*/
bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
int reg_range);
/**
* kbase_job_fault_get_reg_snapshot - Read the interested registers for
* failed job dump
* @kctx: KBase context pointer
* @return true if getting registers successfully
*/
bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx);
#endif /*_KBASE_DEBUG_JOB_FAULT_H*/

View File

@@ -0,0 +1,311 @@
/*
*
* (C) COPYRIGHT 2013-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* Debugfs interface to dump the memory visible to the GPU
*/
#include "mali_kbase_debug_mem_view.h"
#include "mali_kbase.h"
#include <linux/list.h>
#include <linux/file.h>
#ifdef CONFIG_DEBUG_FS
struct debug_mem_mapping {
struct list_head node;
struct kbase_mem_phy_alloc *alloc;
unsigned long flags;
u64 start_pfn;
size_t nr_pages;
};
struct debug_mem_data {
struct list_head mapping_list;
struct kbase_context *kctx;
};
struct debug_mem_seq_off {
struct list_head *lh;
size_t offset;
};
static void *debug_mem_start(struct seq_file *m, loff_t *_pos)
{
struct debug_mem_data *mem_data = m->private;
struct debug_mem_seq_off *data;
struct debug_mem_mapping *map;
loff_t pos = *_pos;
list_for_each_entry(map, &mem_data->mapping_list, node) {
if (pos >= map->nr_pages) {
pos -= map->nr_pages;
} else {
data = kmalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return NULL;
data->lh = &map->node;
data->offset = pos;
return data;
}
}
/* Beyond the end */
return NULL;
}
static void debug_mem_stop(struct seq_file *m, void *v)
{
kfree(v);
}
static void *debug_mem_next(struct seq_file *m, void *v, loff_t *pos)
{
struct debug_mem_data *mem_data = m->private;
struct debug_mem_seq_off *data = v;
struct debug_mem_mapping *map;
map = list_entry(data->lh, struct debug_mem_mapping, node);
if (data->offset < map->nr_pages - 1) {
data->offset++;
++*pos;
return data;
}
if (list_is_last(data->lh, &mem_data->mapping_list)) {
kfree(data);
return NULL;
}
data->lh = data->lh->next;
data->offset = 0;
++*pos;
return data;
}
static int debug_mem_show(struct seq_file *m, void *v)
{
struct debug_mem_data *mem_data = m->private;
struct debug_mem_seq_off *data = v;
struct debug_mem_mapping *map;
int i, j;
struct page *page;
uint32_t *mapping;
pgprot_t prot = PAGE_KERNEL;
map = list_entry(data->lh, struct debug_mem_mapping, node);
kbase_gpu_vm_lock(mem_data->kctx);
if (data->offset >= map->alloc->nents) {
seq_printf(m, "%016llx: Unbacked page\n\n", (map->start_pfn +
data->offset) << PAGE_SHIFT);
goto out;
}
if (!(map->flags & KBASE_REG_CPU_CACHED))
prot = pgprot_writecombine(prot);
page = phys_to_page(as_phys_addr_t(map->alloc->pages[data->offset]));
mapping = vmap(&page, 1, VM_MAP, prot);
if (!mapping)
goto out;
for (i = 0; i < PAGE_SIZE; i += 4*sizeof(*mapping)) {
seq_printf(m, "%016llx:", i + ((map->start_pfn +
data->offset) << PAGE_SHIFT));
for (j = 0; j < 4*sizeof(*mapping); j += sizeof(*mapping))
seq_printf(m, " %08x", mapping[(i+j)/sizeof(*mapping)]);
seq_putc(m, '\n');
}
vunmap(mapping);
seq_putc(m, '\n');
out:
kbase_gpu_vm_unlock(mem_data->kctx);
return 0;
}
static const struct seq_operations ops = {
.start = debug_mem_start,
.next = debug_mem_next,
.stop = debug_mem_stop,
.show = debug_mem_show,
};
static int debug_mem_zone_open(struct rb_root *rbtree,
struct debug_mem_data *mem_data)
{
int ret = 0;
struct rb_node *p;
struct kbase_va_region *reg;
struct debug_mem_mapping *mapping;
for (p = rb_first(rbtree); p; p = rb_next(p)) {
reg = rb_entry(p, struct kbase_va_region, rblink);
if (reg->gpu_alloc == NULL)
/* Empty region - ignore */
continue;
mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
if (!mapping) {
ret = -ENOMEM;
goto out;
}
mapping->alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
mapping->start_pfn = reg->start_pfn;
mapping->nr_pages = reg->nr_pages;
mapping->flags = reg->flags;
list_add_tail(&mapping->node, &mem_data->mapping_list);
}
out:
return ret;
}
static int debug_mem_open(struct inode *i, struct file *file)
{
struct file *kctx_file = i->i_private;
struct kbase_context *kctx = kctx_file->private_data;
struct debug_mem_data *mem_data;
int ret;
ret = seq_open(file, &ops);
if (ret)
return ret;
mem_data = kmalloc(sizeof(*mem_data), GFP_KERNEL);
if (!mem_data) {
ret = -ENOMEM;
goto out;
}
mem_data->kctx = kctx;
INIT_LIST_HEAD(&mem_data->mapping_list);
get_file(kctx_file);
kbase_gpu_vm_lock(kctx);
ret = debug_mem_zone_open(&kctx->reg_rbtree_same, mem_data);
if (0 != ret) {
kbase_gpu_vm_unlock(kctx);
goto out;
}
ret = debug_mem_zone_open(&kctx->reg_rbtree_exec, mem_data);
if (ret != 0) {
kbase_gpu_vm_unlock(kctx);
goto out;
}
ret = debug_mem_zone_open(&kctx->reg_rbtree_custom, mem_data);
if (0 != ret) {
kbase_gpu_vm_unlock(kctx);
goto out;
}
kbase_gpu_vm_unlock(kctx);
((struct seq_file *)file->private_data)->private = mem_data;
return 0;
out:
if (mem_data) {
while (!list_empty(&mem_data->mapping_list)) {
struct debug_mem_mapping *mapping;
mapping = list_first_entry(&mem_data->mapping_list,
struct debug_mem_mapping, node);
kbase_mem_phy_alloc_put(mapping->alloc);
list_del(&mapping->node);
kfree(mapping);
}
fput(kctx_file);
kfree(mem_data);
}
seq_release(i, file);
return ret;
}
static int debug_mem_release(struct inode *inode, struct file *file)
{
struct file *kctx_file = inode->i_private;
struct seq_file *sfile = file->private_data;
struct debug_mem_data *mem_data = sfile->private;
struct debug_mem_mapping *mapping;
seq_release(inode, file);
while (!list_empty(&mem_data->mapping_list)) {
mapping = list_first_entry(&mem_data->mapping_list,
struct debug_mem_mapping, node);
kbase_mem_phy_alloc_put(mapping->alloc);
list_del(&mapping->node);
kfree(mapping);
}
kfree(mem_data);
fput(kctx_file);
return 0;
}
static const struct file_operations kbase_debug_mem_view_fops = {
.open = debug_mem_open,
.release = debug_mem_release,
.read = seq_read,
.llseek = seq_lseek
};
/**
* kbase_debug_mem_view_init - Initialise the mem_view sysfs file
* @kctx_file: The /dev/mali0 file instance for the context
*
* This function creates a "mem_view" file which can be used to get a view of
* the context's memory as the GPU sees it (i.e. using the GPU's page tables).
*
* The file is cleaned up by a call to debugfs_remove_recursive() deleting the
* parent directory.
*/
void kbase_debug_mem_view_init(struct file *kctx_file)
{
struct kbase_context *kctx = kctx_file->private_data;
debugfs_create_file("mem_view", S_IRUSR, kctx->kctx_dentry, kctx_file,
&kbase_debug_mem_view_fops);
}
#endif

View File

@@ -0,0 +1,30 @@
/*
*
* (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
#ifndef _KBASE_DEBUG_MEM_VIEW_H
#define _KBASE_DEBUG_MEM_VIEW_H
#include <mali_kbase.h>
void kbase_debug_mem_view_init(struct file *kctx_file);
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,674 @@
/*
*
* (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* SPDX-License-Identifier: GPL-2.0
*
*/
/*
* Base kernel device APIs
*/
#include <linux/debugfs.h>
#include <linux/dma-mapping.h>
#include <linux/seq_file.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of_platform.h>
#include <mali_kbase.h>
#include <mali_kbase_defs.h>
#include <mali_kbase_hwaccess_instr.h>
#include <mali_kbase_hw.h>
#include <mali_kbase_config_defaults.h>
#include <mali_kbase_profiling_gator_api.h>
/* NOTE: Magic - 0x45435254 (TRCE in ASCII).
* Supports tracing feature provided in the base module.
* Please keep it in sync with the value of base module.
*/
#define TRACE_BUFFER_HEADER_SPECIAL 0x45435254
#if KBASE_TRACE_ENABLE
static const char *kbasep_trace_code_string[] = {
/* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
* THIS MUST BE USED AT THE START OF THE ARRAY */
#define KBASE_TRACE_CODE_MAKE_CODE(X) # X
#include "mali_kbase_trace_defs.h"
#undef KBASE_TRACE_CODE_MAKE_CODE
};
#endif
#define DEBUG_MESSAGE_SIZE 256
static int kbasep_trace_init(struct kbase_device *kbdev);
static void kbasep_trace_term(struct kbase_device *kbdev);
static void kbasep_trace_hook_wrapper(void *param);
struct kbase_device *kbase_device_alloc(void)
{
return kzalloc(sizeof(struct kbase_device), GFP_KERNEL);
}
static int kbase_device_as_init(struct kbase_device *kbdev, int i)
{
const char format[] = "mali_mmu%d";
char name[sizeof(format)];
const char poke_format[] = "mali_mmu%d_poker";
char poke_name[sizeof(poke_format)];
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
snprintf(poke_name, sizeof(poke_name), poke_format, i);
snprintf(name, sizeof(name), format, i);
kbdev->as[i].number = i;
kbdev->as[i].fault_addr = 0ULL;
kbdev->as[i].pf_wq = alloc_workqueue(name, 0, 1);
if (!kbdev->as[i].pf_wq)
return -EINVAL;
INIT_WORK(&kbdev->as[i].work_pagefault, page_fault_worker);
INIT_WORK(&kbdev->as[i].work_busfault, bus_fault_worker);
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
struct hrtimer *poke_timer = &kbdev->as[i].poke_timer;
struct work_struct *poke_work = &kbdev->as[i].poke_work;
kbdev->as[i].poke_wq = alloc_workqueue(poke_name, 0, 1);
if (!kbdev->as[i].poke_wq) {
destroy_workqueue(kbdev->as[i].pf_wq);
return -EINVAL;
}
INIT_WORK(poke_work, kbasep_as_do_poke);
hrtimer_init(poke_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
poke_timer->function = kbasep_as_poke_timer_callback;
kbdev->as[i].poke_refcount = 0;
kbdev->as[i].poke_state = 0u;
}
return 0;
}
static void kbase_device_as_term(struct kbase_device *kbdev, int i)
{
destroy_workqueue(kbdev->as[i].pf_wq);
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
destroy_workqueue(kbdev->as[i].poke_wq);
}
static int kbase_device_all_as_init(struct kbase_device *kbdev)
{
int i, err;
for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
err = kbase_device_as_init(kbdev, i);
if (err)
goto free_workqs;
}
return 0;
free_workqs:
for (; i > 0; i--)
kbase_device_as_term(kbdev, i);
return err;
}
static void kbase_device_all_as_term(struct kbase_device *kbdev)
{
int i;
for (i = 0; i < kbdev->nr_hw_address_spaces; i++)
kbase_device_as_term(kbdev, i);
}
int kbase_device_init(struct kbase_device * const kbdev)
{
int i, err;
#ifdef CONFIG_ARM64
struct device_node *np = NULL;
#endif /* CONFIG_ARM64 */
spin_lock_init(&kbdev->mmu_mask_change);
mutex_init(&kbdev->mmu_hw_mutex);
#ifdef CONFIG_ARM64
kbdev->cci_snoop_enabled = false;
np = kbdev->dev->of_node;
if (np != NULL) {
if (of_property_read_u32(np, "snoop_enable_smc",
&kbdev->snoop_enable_smc))
kbdev->snoop_enable_smc = 0;
if (of_property_read_u32(np, "snoop_disable_smc",
&kbdev->snoop_disable_smc))
kbdev->snoop_disable_smc = 0;
/* Either both or none of the calls should be provided. */
if (!((kbdev->snoop_disable_smc == 0
&& kbdev->snoop_enable_smc == 0)
|| (kbdev->snoop_disable_smc != 0
&& kbdev->snoop_enable_smc != 0))) {
WARN_ON(1);
err = -EINVAL;
goto fail;
}
}
#endif /* CONFIG_ARM64 */
/* Get the list of workarounds for issues on the current HW
* (identified by the GPU_ID register)
*/
err = kbase_hw_set_issues_mask(kbdev);
if (err)
goto fail;
/* Set the list of features available on the current HW
* (identified by the GPU_ID register)
*/
kbase_hw_set_features_mask(kbdev);
kbase_gpuprops_set_features(kbdev);
/* On Linux 4.0+, dma coherency is determined from device tree */
#if defined(CONFIG_ARM64) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
set_dma_ops(kbdev->dev, &noncoherent_swiotlb_dma_ops);
#endif
/* Workaround a pre-3.13 Linux issue, where dma_mask is NULL when our
* device structure was created by device-tree
*/
if (!kbdev->dev->dma_mask)
kbdev->dev->dma_mask = &kbdev->dev->coherent_dma_mask;
err = dma_set_mask(kbdev->dev,
DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
if (err)
goto dma_set_mask_failed;
err = dma_set_coherent_mask(kbdev->dev,
DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
if (err)
goto dma_set_mask_failed;
kbdev->nr_hw_address_spaces = kbdev->gpu_props.num_address_spaces;
err = kbase_device_all_as_init(kbdev);
if (err)
goto as_init_failed;
spin_lock_init(&kbdev->hwcnt.lock);
err = kbasep_trace_init(kbdev);
if (err)
goto term_as;
mutex_init(&kbdev->cacheclean_lock);
#ifdef CONFIG_MALI_TRACE_TIMELINE
for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
kbdev->timeline.slot_atoms_submitted[i] = 0;
for (i = 0; i <= KBASEP_TIMELINE_PM_EVENT_LAST; ++i)
atomic_set(&kbdev->timeline.pm_event_uid[i], 0);
#endif /* CONFIG_MALI_TRACE_TIMELINE */
/* fbdump profiling controls set to 0 - fbdump not enabled until changed by gator */
for (i = 0; i < FBDUMP_CONTROL_MAX; i++)
kbdev->kbase_profiling_controls[i] = 0;
kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev);
atomic_set(&kbdev->ctx_num, 0);
err = kbase_instr_backend_init(kbdev);
if (err)
goto term_trace;
kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD;
kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS;
if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
kbdev->mmu_mode = kbase_mmu_mode_get_aarch64();
else
kbdev->mmu_mode = kbase_mmu_mode_get_lpae();
return 0;
term_trace:
kbasep_trace_term(kbdev);
term_as:
kbase_device_all_as_term(kbdev);
as_init_failed:
dma_set_mask_failed:
fail:
return err;
}
void kbase_device_term(struct kbase_device *kbdev)
{
KBASE_DEBUG_ASSERT(kbdev);
#if KBASE_TRACE_ENABLE
kbase_debug_assert_register_hook(NULL, NULL);
#endif
kbase_instr_backend_term(kbdev);
kbasep_trace_term(kbdev);
kbase_device_all_as_term(kbdev);
}
void kbase_device_free(struct kbase_device *kbdev)
{
kfree(kbdev);
}
int kbase_device_trace_buffer_install(
struct kbase_context *kctx, u32 *tb, size_t size)
{
unsigned long flags;
KBASE_DEBUG_ASSERT(kctx);
KBASE_DEBUG_ASSERT(tb);
/* Interface uses 16-bit value to track last accessed entry. Each entry
* is composed of two 32-bit words.
* This limits the size that can be handled without an overflow. */
if (0xFFFF * (2 * sizeof(u32)) < size)
return -EINVAL;
/* set up the header */
/* magic number in the first 4 bytes */
tb[0] = TRACE_BUFFER_HEADER_SPECIAL;
/* Store (write offset = 0, wrap counter = 0, transaction active = no)
* write offset 0 means never written.
* Offsets 1 to (wrap_offset - 1) used to store values when trace started
*/
tb[1] = 0;
/* install trace buffer */
spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
kctx->jctx.tb_wrap_offset = size / 8;
kctx->jctx.tb = tb;
spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
return 0;
}
void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx)
{
unsigned long flags;
KBASE_DEBUG_ASSERT(kctx);
spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
kctx->jctx.tb = NULL;
kctx->jctx.tb_wrap_offset = 0;
spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
}
void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value)
{
unsigned long flags;
spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
if (kctx->jctx.tb) {
u16 wrap_count;
u16 write_offset;
u32 *tb = kctx->jctx.tb;
u32 header_word;
header_word = tb[1];
KBASE_DEBUG_ASSERT(0 == (header_word & 0x1));
wrap_count = (header_word >> 1) & 0x7FFF;
write_offset = (header_word >> 16) & 0xFFFF;
/* mark as transaction in progress */
tb[1] |= 0x1;
mb();
/* calculate new offset */
write_offset++;
if (write_offset == kctx->jctx.tb_wrap_offset) {
/* wrap */
write_offset = 1;
wrap_count++;
wrap_count &= 0x7FFF; /* 15bit wrap counter */
}
/* store the trace entry at the selected offset */
tb[write_offset * 2 + 0] = (reg_offset & ~0x3) | ((type == REG_WRITE) ? 0x1 : 0x0);
tb[write_offset * 2 + 1] = reg_value;
mb();
/* new header word */
header_word = (write_offset << 16) | (wrap_count << 1) | 0x0; /* transaction complete */
tb[1] = header_word;
}
spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
}
/*
* Device trace functions
*/
#if KBASE_TRACE_ENABLE
static int kbasep_trace_init(struct kbase_device *kbdev)
{
struct kbase_trace *rbuf;
rbuf = kmalloc_array(KBASE_TRACE_SIZE, sizeof(*rbuf), GFP_KERNEL);
if (!rbuf)
return -EINVAL;
kbdev->trace_rbuf = rbuf;
spin_lock_init(&kbdev->trace_lock);
return 0;
}
static void kbasep_trace_term(struct kbase_device *kbdev)
{
kfree(kbdev->trace_rbuf);
}
static void kbasep_trace_format_msg(struct kbase_trace *trace_msg, char *buffer, int len)
{
s32 written = 0;
/* Initial part of message */
written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d.%.6d,%d,%d,%s,%p,", (int)trace_msg->timestamp.tv_sec, (int)(trace_msg->timestamp.tv_nsec / 1000), trace_msg->thread_id, trace_msg->cpu, kbasep_trace_code_string[trace_msg->code], trace_msg->ctx), 0);
if (trace_msg->katom)
written += MAX(snprintf(buffer + written, MAX(len - written, 0), "atom %d (ud: 0x%llx 0x%llx)", trace_msg->atom_number, trace_msg->atom_udata[0], trace_msg->atom_udata[1]), 0);
written += MAX(snprintf(buffer + written, MAX(len - written, 0), ",%.8llx,", trace_msg->gpu_addr), 0);
/* NOTE: Could add function callbacks to handle different message types */
/* Jobslot present */
if (trace_msg->flags & KBASE_TRACE_FLAG_JOBSLOT)
written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->jobslot), 0);
written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
/* Refcount present */
if (trace_msg->flags & KBASE_TRACE_FLAG_REFCOUNT)
written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->refcount), 0);
written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
/* Rest of message */
written += MAX(snprintf(buffer + written, MAX(len - written, 0), "0x%.8lx", trace_msg->info_val), 0);
}
static void kbasep_trace_dump_msg(struct kbase_device *kbdev, struct kbase_trace *trace_msg)
{
char buffer[DEBUG_MESSAGE_SIZE];
kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
dev_dbg(kbdev->dev, "%s", buffer);
}
void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val)
{
unsigned long irqflags;
struct kbase_trace *trace_msg;
spin_lock_irqsave(&kbdev->trace_lock, irqflags);
trace_msg = &kbdev->trace_rbuf[kbdev->trace_next_in];
/* Fill the message */
trace_msg->thread_id = task_pid_nr(current);
trace_msg->cpu = task_cpu(current);
getnstimeofday(&trace_msg->timestamp);
trace_msg->code = code;
trace_msg->ctx = ctx;
if (NULL == katom) {
trace_msg->katom = false;
} else {
trace_msg->katom = true;
trace_msg->atom_number = kbase_jd_atom_id(katom->kctx, katom);
trace_msg->atom_udata[0] = katom->udata.blob[0];
trace_msg->atom_udata[1] = katom->udata.blob[1];
}
trace_msg->gpu_addr = gpu_addr;
trace_msg->jobslot = jobslot;
trace_msg->refcount = MIN((unsigned int)refcount, 0xFF);
trace_msg->info_val = info_val;
trace_msg->flags = flags;
/* Update the ringbuffer indices */
kbdev->trace_next_in = (kbdev->trace_next_in + 1) & KBASE_TRACE_MASK;
if (kbdev->trace_next_in == kbdev->trace_first_out)
kbdev->trace_first_out = (kbdev->trace_first_out + 1) & KBASE_TRACE_MASK;
/* Done */
spin_unlock_irqrestore(&kbdev->trace_lock, irqflags);
}
void kbasep_trace_clear(struct kbase_device *kbdev)
{
unsigned long flags;
spin_lock_irqsave(&kbdev->trace_lock, flags);
kbdev->trace_first_out = kbdev->trace_next_in;
spin_unlock_irqrestore(&kbdev->trace_lock, flags);
}
void kbasep_trace_dump(struct kbase_device *kbdev)
{
unsigned long flags;
u32 start;
u32 end;
dev_dbg(kbdev->dev, "Dumping trace:\nsecs,nthread,cpu,code,ctx,katom,gpu_addr,jobslot,refcount,info_val");
spin_lock_irqsave(&kbdev->trace_lock, flags);
start = kbdev->trace_first_out;
end = kbdev->trace_next_in;
while (start != end) {
struct kbase_trace *trace_msg = &kbdev->trace_rbuf[start];
kbasep_trace_dump_msg(kbdev, trace_msg);
start = (start + 1) & KBASE_TRACE_MASK;
}
dev_dbg(kbdev->dev, "TRACE_END");
spin_unlock_irqrestore(&kbdev->trace_lock, flags);
KBASE_TRACE_CLEAR(kbdev);
}
static void kbasep_trace_hook_wrapper(void *param)
{
struct kbase_device *kbdev = (struct kbase_device *)param;
kbasep_trace_dump(kbdev);
}
#ifdef CONFIG_DEBUG_FS
struct trace_seq_state {
struct kbase_trace trace_buf[KBASE_TRACE_SIZE];
u32 start;
u32 end;
};
static void *kbasep_trace_seq_start(struct seq_file *s, loff_t *pos)
{
struct trace_seq_state *state = s->private;
int i;
if (*pos > KBASE_TRACE_SIZE)
return NULL;
i = state->start + *pos;
if ((state->end >= state->start && i >= state->end) ||
i >= state->end + KBASE_TRACE_SIZE)
return NULL;
i &= KBASE_TRACE_MASK;
return &state->trace_buf[i];
}
static void kbasep_trace_seq_stop(struct seq_file *s, void *data)
{
}
static void *kbasep_trace_seq_next(struct seq_file *s, void *data, loff_t *pos)
{
struct trace_seq_state *state = s->private;
int i;
(*pos)++;
i = (state->start + *pos) & KBASE_TRACE_MASK;
if (i == state->end)
return NULL;
return &state->trace_buf[i];
}
static int kbasep_trace_seq_show(struct seq_file *s, void *data)
{
struct kbase_trace *trace_msg = data;
char buffer[DEBUG_MESSAGE_SIZE];
kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
seq_printf(s, "%s\n", buffer);
return 0;
}
static const struct seq_operations kbasep_trace_seq_ops = {
.start = kbasep_trace_seq_start,
.next = kbasep_trace_seq_next,
.stop = kbasep_trace_seq_stop,
.show = kbasep_trace_seq_show,
};
static int kbasep_trace_debugfs_open(struct inode *inode, struct file *file)
{
struct kbase_device *kbdev = inode->i_private;
unsigned long flags;
struct trace_seq_state *state;
state = __seq_open_private(file, &kbasep_trace_seq_ops, sizeof(*state));
if (!state)
return -ENOMEM;
spin_lock_irqsave(&kbdev->trace_lock, flags);
state->start = kbdev->trace_first_out;
state->end = kbdev->trace_next_in;
memcpy(state->trace_buf, kbdev->trace_rbuf, sizeof(state->trace_buf));
spin_unlock_irqrestore(&kbdev->trace_lock, flags);
return 0;
}
static const struct file_operations kbasep_trace_debugfs_fops = {
.open = kbasep_trace_debugfs_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release_private,
};
void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
{
debugfs_create_file("mali_trace", S_IRUGO,
kbdev->mali_debugfs_directory, kbdev,
&kbasep_trace_debugfs_fops);
}
#else
void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
{
}
#endif /* CONFIG_DEBUG_FS */
#else /* KBASE_TRACE_ENABLE */
static int kbasep_trace_init(struct kbase_device *kbdev)
{
CSTD_UNUSED(kbdev);
return 0;
}
static void kbasep_trace_term(struct kbase_device *kbdev)
{
CSTD_UNUSED(kbdev);
}
static void kbasep_trace_hook_wrapper(void *param)
{
CSTD_UNUSED(param);
}
void kbasep_trace_dump(struct kbase_device *kbdev)
{
CSTD_UNUSED(kbdev);
}
#endif /* KBASE_TRACE_ENABLE */
void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value)
{
switch (control) {
case FBDUMP_CONTROL_ENABLE:
/* fall through */
case FBDUMP_CONTROL_RATE:
/* fall through */
case SW_COUNTER_ENABLE:
/* fall through */
case FBDUMP_CONTROL_RESIZE_FACTOR:
kbdev->kbase_profiling_controls[control] = value;
break;
default:
dev_err(kbdev->dev, "Profiling control %d not found\n", control);
break;
}
}
/*
* Called by gator to control the production of
* profiling information at runtime
* */
void _mali_profiling_control(u32 action, u32 value)
{
struct kbase_device *kbdev = NULL;
/* find the first i.e. call with -1 */
kbdev = kbase_find_device(-1);
if (NULL != kbdev)
kbase_set_profiling_control(kbdev, action, value);
}
KBASE_EXPORT_SYMBOL(_mali_profiling_control);

Some files were not shown because too many files have changed in this diff Show More