mirror of
https://github.com/hardkernel/linux.git
synced 2026-04-01 02:33:01 +09:00
MALI: rockchip: upgrade Bifrost DDK to r20p0-01rel0, from r12p0-01rel0
I replace "sg_dma_len" with "MALI_SG_DMA_LEN" and rename configs which have duplicated names in midgard/, as before. Note that drivers/gpu/arm/bifrost on DDK r20 depends on drivers/base/memory_group_manager. Change-Id: Ie51fda035a98b5151f25a0fcf7294b88d4ecc978 Signed-off-by: Zhen Chen <chenzhen@rock-chips.com>
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# (C) COPYRIGHT 2013-2018 ARM Limited. All rights reserved.
|
||||
# (C) COPYRIGHT 2013-2019 ARM Limited. All rights reserved.
|
||||
#
|
||||
# This program is free software and is provided to you under the terms of the
|
||||
# GNU General Public License version 2 as published by the Free Software
|
||||
@@ -33,22 +33,38 @@ provided in the interrupts property. Must contain: "JOB, "MMU", "GPU".
|
||||
|
||||
Optional:
|
||||
|
||||
- clocks : Phandle to clock for the Mali T-6xx device.
|
||||
- clock-names : Shall be "clk_mali".
|
||||
- mali-supply : Phandle to regulator for the Mali device. Refer to
|
||||
- clocks : One or more pairs of phandle to clock and clock specifier
|
||||
for the Mali device. The order is important: the first clock
|
||||
shall correspond to the "clk_mali" source, while the second clock
|
||||
(that is optional) shall correspond to the "shadercores" source.
|
||||
- clock-names : Shall be set to: "clk_mali", "shadercores".
|
||||
- mali-supply : Phandle to the top level regulator for the Mali device.
|
||||
Refer to
|
||||
Documentation/devicetree/bindings/regulator/regulator.txt for details.
|
||||
- operating-points-v2 : Refer to Documentation/devicetree/bindings/power/opp.txt
|
||||
- shadercores-supply : Phandle to shader cores regulator for the Mali device.
|
||||
This is optional.
|
||||
- operating-points-v2 : Refer to Documentation/devicetree/bindings/power/mali-opp.txt
|
||||
for details.
|
||||
- jm_config : For T860/T880. Sets job manager configuration. An array containing:
|
||||
- 1 to override the TIMESTAMP value, 0 otherwise.
|
||||
- 1 to override clock gate, forcing them to be always on, 0 otherwise.
|
||||
- 1 to enable job throttle, limiting the number of cores that can be started
|
||||
simultaneously, 0 otherwise.
|
||||
- Value between 0 and 63 (including). If job throttle is enabled, this is one
|
||||
less than the number of cores that can be started simultaneously.
|
||||
- power_model : Sets the power model parameters. Three power models are currently
|
||||
defined which include "mali-simple-power-model", "mali-g71-power-model" and
|
||||
"mali-g72-power-model".
|
||||
- quirks_jm : Used to write to the JM_CONFIG register or equivalent.
|
||||
Should be used with care. Options passed here are used to override
|
||||
certain default behavior. Note: This will override 'idvs-group-size'
|
||||
field in devicetree and module param 'corestack_driver_control',
|
||||
therefore if 'quirks_jm' is used then 'idvs-group-size' and
|
||||
'corestack_driver_control' value should be incorporated into 'quirks_jm'.
|
||||
- quirks_sc : Used to write to the SHADER_CONFIG register.
|
||||
Should be used with care. Options passed here are used to override
|
||||
certain default behavior.
|
||||
- quirks_tiler : Used to write to the TILER_CONFIG register.
|
||||
Should be used with care. Options passed here are used to
|
||||
disable or override certain default behavior.
|
||||
- quirks_mmu : Used to write to the L2_CONFIG register.
|
||||
Should be used with care. Options passed here are used to
|
||||
disable or override certain default behavior.
|
||||
- power_model : Sets the power model parameters. Defined power models include:
|
||||
"mali-simple-power-model", "mali-g51-power-model", "mali-g52-power-model",
|
||||
"mali-g52_r1-power-model", "mali-g71-power-model", "mali-g72-power-model",
|
||||
"mali-g76-power-model", "mali-g77-power-model", "mali-tnax-power-model"
|
||||
and "mali-tbex-power-model".
|
||||
- mali-simple-power-model: this model derives the GPU power usage based
|
||||
on the GPU voltage scaled by the system temperature. Note: it was
|
||||
designed for the Juno platform, and may not be suitable for others.
|
||||
@@ -66,14 +82,17 @@ for details.
|
||||
the GPU
|
||||
- temp-poll-interval-ms: the interval at which the system
|
||||
temperature is polled
|
||||
- mali-g71-power-model / mali-g72-power-model: these models derive
|
||||
- mali-g*-power-model(s): unless being stated otherwise, these models derive
|
||||
the GPU power usage based on performance counters, so they are more
|
||||
accurate.
|
||||
- compatible: Should be "arm,mali-g71-power-model" /
|
||||
"arm,mali-g72-power-model"
|
||||
- compatible: Should be, as examples, "arm,mali-g51-power-model" /
|
||||
"arm,mali-g72-power-model".
|
||||
- scale: the dynamic power calculated by the power model is
|
||||
scaled by a factor of "scale"/1000. This value should be
|
||||
multiplied by a factor of 'scale'. This value should be
|
||||
chosen to match a particular implementation.
|
||||
- min_sample_cycles: Fall back to the simple power model if the
|
||||
number of GPU cycles for a given counter dump is less than
|
||||
'min_sample_cycles'. The default value of this should suffice.
|
||||
* Note: when IPA is used, two separate power models (simple and counter-based)
|
||||
are used at different points so care should be taken to configure
|
||||
both power models in the device tree (specifically dynamic-coefficient,
|
||||
@@ -90,8 +109,13 @@ for details.
|
||||
- mali-simple-power-model: Default model used on mali
|
||||
- protected-mode-switcher : Phandle to device implemented protected mode switching functionality.
|
||||
Refer to Documentation/devicetree/bindings/arm/smc-protected-mode-switcher.txt for one implementation.
|
||||
- idvs-group-size : Override the IDVS group size value. Tasks are sent to
|
||||
cores in groups of N + 1, so i.e. 0xF means 16 tasks.
|
||||
Valid values are between 0 to 0x3F (including).
|
||||
- l2-size : Override L2 cache size on GPU that supports it
|
||||
- l2-hash : Override L2 hash function on GPU that supports it
|
||||
|
||||
Example for a Mali GPU:
|
||||
Example for a Mali GPU with 1 clock and no regulators:
|
||||
|
||||
gpu@0xfc010000 {
|
||||
compatible = "arm,malit602", "arm,malit60x", "arm,malit6xx", "arm,mali-midgard";
|
||||
@@ -114,6 +138,10 @@ gpu@0xfc010000 {
|
||||
compatible = "arm,mali-g71-power-model";
|
||||
scale = <5>;
|
||||
};
|
||||
|
||||
idvs-group-size = <0x7>;
|
||||
l2-size = /bits/ 8 <0x10>;
|
||||
l2-hash = /bits/ 8 <0x04>;
|
||||
};
|
||||
|
||||
gpu_opp_table: opp_table0 {
|
||||
@@ -148,3 +176,41 @@ gpu_opp_table: opp_table0 {
|
||||
opp-microvolt = <912500>;
|
||||
};
|
||||
};
|
||||
|
||||
Example for a Mali GPU with 2 clocks and 2 regulators:
|
||||
|
||||
gpu: gpu@6e000000 {
|
||||
compatible = "arm,mali-midgard";
|
||||
reg = <0x0 0x6e000000 0x0 0x200000>;
|
||||
interrupts = <0 168 4>, <0 168 4>, <0 168 4>;
|
||||
interrupt-names = "JOB", "MMU", "GPU";
|
||||
clocks = <&clk_mali 0>, <&clk_mali 1>;
|
||||
clock-names = "clk_mali", "shadercores";
|
||||
mali-supply = <&supply0_3v3>;
|
||||
shadercores-supply = <&supply1_3v3>;
|
||||
system-coherency = <31>;
|
||||
operating-points-v2 = <&gpu_opp_table>;
|
||||
};
|
||||
|
||||
gpu_opp_table: opp_table0 {
|
||||
compatible = "operating-points-v2", "operating-points-v2-mali";
|
||||
|
||||
opp@0 {
|
||||
opp-hz = /bits/ 64 <50000000>;
|
||||
opp-hz-real = /bits/ 64 <50000000>, /bits/ 64 <45000000>;
|
||||
opp-microvolt = <820000>, <800000>;
|
||||
opp-core-mask = /bits/ 64 <0xf>;
|
||||
};
|
||||
opp@1 {
|
||||
opp-hz = /bits/ 64 <40000000>;
|
||||
opp-hz-real = /bits/ 64 <40000000>, /bits/ 64 <35000000>;
|
||||
opp-microvolt = <720000>, <700000>;
|
||||
opp-core-mask = /bits/ 64 <0x7>;
|
||||
};
|
||||
opp@2 {
|
||||
opp-hz = /bits/ 64 <30000000>;
|
||||
opp-hz-real = /bits/ 64 <30000000>, /bits/ 64 <25000000>;
|
||||
opp-microvolt = <620000>, <700000>;
|
||||
opp-core-mask = /bits/ 64 <0x3>;
|
||||
};
|
||||
};
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
|
||||
# (C) COPYRIGHT 2017, 2019 ARM Limited. All rights reserved.
|
||||
#
|
||||
# This program is free software and is provided to you under the terms of the
|
||||
# GNU General Public License version 2 as published by the Free Software
|
||||
@@ -39,8 +39,9 @@ Required properties:
|
||||
frequency and core mask into account.
|
||||
|
||||
Optional properties:
|
||||
- opp-hz-real: Real frequency in Hz, expressed as a 64-bit big-endian integer.
|
||||
If this is not present then the nominal frequency will be used instead.
|
||||
- opp-hz-real: List of one or two real frequencies in Hz, expressed as 64-bit
|
||||
big-endian integers. They shall correspond to the clocks declared under
|
||||
the Mali device node, and follow the same order.
|
||||
|
||||
- opp-core-mask: Shader core mask. If neither this or opp-core-count are present
|
||||
then all shader cores will be used for this OPP.
|
||||
@@ -54,7 +55,9 @@ Optional properties:
|
||||
|
||||
If both this and opp-core-mask are present then opp-core-mask is ignored.
|
||||
|
||||
- opp-microvolt: voltage in micro Volts.
|
||||
- opp-microvolt: List of one or two voltages in micro Volts. They shall correspond
|
||||
to the regulators declared under the Mali device node, and follow the order:
|
||||
"toplevel", "shadercores".
|
||||
|
||||
A single regulator's voltage is specified with an array of size one or three.
|
||||
Single entry is for target voltage and three entries are for <target min max>
|
||||
@@ -97,6 +100,11 @@ Optional properties:
|
||||
- opp-suspend: Marks the OPP to be used during device suspend. Only one OPP in
|
||||
the table should have this.
|
||||
|
||||
- opp-mali-errata-1485982: Marks the OPP to be selected for suspend clock.
|
||||
This will be effective only if MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE is
|
||||
enabled. It needs to be placed in any OPP that has proper suspend clock for
|
||||
the HW workaround.
|
||||
|
||||
- opp-supported-hw: This enables us to select only a subset of OPPs from the
|
||||
larger OPP table, based on what version of the hardware we are running on. We
|
||||
still can't have multiple nodes with the same opp-hz value in OPP table.
|
||||
@@ -119,7 +127,7 @@ Optional properties:
|
||||
|
||||
- status: Marks the node enabled/disabled.
|
||||
|
||||
Example for a Juno with Mali T624:
|
||||
Example for a Juno with 1 clock and 1 regulator:
|
||||
|
||||
gpu_opp_table: opp_table0 {
|
||||
compatible = "operating-points-v2", "operating-points-v2-mali";
|
||||
@@ -130,6 +138,7 @@ gpu_opp_table: opp_table0 {
|
||||
opp-microvolt = <820000>;
|
||||
opp-core-mask = /bits/ 64 <0x1>;
|
||||
opp-suspend;
|
||||
opp-mali-errata-1485982;
|
||||
};
|
||||
opp@225000000 {
|
||||
opp-hz = /bits/ 64 <225000000>;
|
||||
@@ -161,3 +170,27 @@ gpu_opp_table: opp_table0 {
|
||||
};
|
||||
};
|
||||
|
||||
Example for a Juno with 2 clocks and 2 regulators:
|
||||
|
||||
gpu_opp_table: opp_table0 {
|
||||
compatible = "operating-points-v2", "operating-points-v2-mali";
|
||||
|
||||
opp@0 {
|
||||
opp-hz = /bits/ 64 <50000000>;
|
||||
opp-hz-real = /bits/ 64 <50000000>, /bits/ 64 <45000000>;
|
||||
opp-microvolt = <820000>, <800000>;
|
||||
opp-core-mask = /bits/ 64 <0xf>;
|
||||
};
|
||||
opp@1 {
|
||||
opp-hz = /bits/ 64 <40000000>;
|
||||
opp-hz-real = /bits/ 64 <40000000>, /bits/ 64 <35000000>;
|
||||
opp-microvolt = <720000>, <700000>;
|
||||
opp-core-mask = /bits/ 64 <0x7>;
|
||||
};
|
||||
opp@2 {
|
||||
opp-hz = /bits/ 64 <30000000>;
|
||||
opp-hz-real = /bits/ 64 <30000000>, /bits/ 64 <25000000>;
|
||||
opp-microvolt = <620000>, <700000>;
|
||||
opp-core-mask = /bits/ 64 <0x3>;
|
||||
};
|
||||
};
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
#
|
||||
|
||||
# Driver version string which is returned to userspace via an ioctl
|
||||
MALI_RELEASE_NAME ?= "r12p0-01rel0"
|
||||
MALI_RELEASE_NAME ?= "r20p0-01rel0"
|
||||
|
||||
# Paths required for build
|
||||
KBASE_PATH = $(src)
|
||||
@@ -29,32 +29,20 @@ UMP_PATH = $(src)/../../../base
|
||||
|
||||
# Set up defaults if not defined by build system
|
||||
MALI_CUSTOMER_RELEASE ?= 1
|
||||
MALI_USE_CSF ?= 0
|
||||
MALI_UNIT_TEST ?= 0
|
||||
MALI_KERNEL_TEST_API ?= 0
|
||||
MALI_MOCK_TEST ?= 0
|
||||
MALI_COVERAGE ?= 0
|
||||
CONFIG_MALI_PLATFORM_NAME ?= "devicetree"
|
||||
# This workaround is for what seems to be a compiler bug we observed in
|
||||
# GCC 4.7 on AOSP 4.3. The bug caused an intermittent failure compiling
|
||||
# the "_Pragma" syntax, where an error message is returned:
|
||||
#
|
||||
# "internal compiler error: unspellable token PRAGMA"
|
||||
#
|
||||
# This regression has thus far only been seen on the GCC 4.7 compiler bundled
|
||||
# with AOSP 4.3.0. So this makefile, intended for in-tree kernel builds
|
||||
# which are not known to be used with AOSP, is hardcoded to disable the
|
||||
# workaround, i.e. set the define to 0.
|
||||
MALI_GCC_WORKAROUND_MIDCOM_4598 ?= 0
|
||||
|
||||
# Set up our defines, which will be passed to gcc
|
||||
DEFINES = \
|
||||
-DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \
|
||||
-DMALI_USE_CSF=$(MALI_USE_CSF) \
|
||||
-DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \
|
||||
-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
|
||||
-DMALI_MOCK_TEST=$(MALI_MOCK_TEST) \
|
||||
-DMALI_COVERAGE=$(MALI_COVERAGE) \
|
||||
-DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \
|
||||
-DMALI_GCC_WORKAROUND_MIDCOM_4598=$(MALI_GCC_WORKAROUND_MIDCOM_4598)
|
||||
-DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\"
|
||||
|
||||
ifeq ($(KBUILD_EXTMOD),)
|
||||
# in-tree
|
||||
@@ -66,15 +54,19 @@ endif
|
||||
|
||||
DEFINES += -I$(srctree)/drivers/staging/android
|
||||
|
||||
DEFINES += -DMALI_KBASE_BUILD
|
||||
|
||||
# Use our defines when compiling
|
||||
ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
|
||||
subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(OSK_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
|
||||
subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
|
||||
|
||||
SRC := \
|
||||
mali_kbase_device.c \
|
||||
mali_kbase_cache_policy.c \
|
||||
mali_kbase_mem.c \
|
||||
mali_kbase_mem_pool_group.c \
|
||||
mali_kbase_mmu.c \
|
||||
mali_kbase_native_mgm.c \
|
||||
mali_kbase_ctx_sched.c \
|
||||
mali_kbase_jd.c \
|
||||
mali_kbase_jd_debugfs.c \
|
||||
@@ -87,39 +79,45 @@ SRC := \
|
||||
mali_kbase_pm.c \
|
||||
mali_kbase_config.c \
|
||||
mali_kbase_vinstr.c \
|
||||
mali_kbase_hwcnt.c \
|
||||
mali_kbase_hwcnt_backend_gpu.c \
|
||||
mali_kbase_hwcnt_gpu.c \
|
||||
mali_kbase_hwcnt_legacy.c \
|
||||
mali_kbase_hwcnt_types.c \
|
||||
mali_kbase_hwcnt_virtualizer.c \
|
||||
mali_kbase_softjobs.c \
|
||||
mali_kbase_10969_workaround.c \
|
||||
mali_kbase_hw.c \
|
||||
mali_kbase_utility.c \
|
||||
mali_kbase_debug.c \
|
||||
mali_kbase_trace_timeline.c \
|
||||
mali_kbase_gpu_memory_debugfs.c \
|
||||
mali_kbase_mem_linux.c \
|
||||
mali_kbase_core_linux.c \
|
||||
mali_kbase_replay.c \
|
||||
mali_kbase_mem_profile_debugfs.c \
|
||||
mali_kbase_mmu_mode_lpae.c \
|
||||
mali_kbase_mmu_mode_aarch64.c \
|
||||
mali_kbase_disjoint_events.c \
|
||||
mali_kbase_gator_api.c \
|
||||
mali_kbase_debug_mem_view.c \
|
||||
mali_kbase_debug_job_fault.c \
|
||||
mali_kbase_smc.c \
|
||||
mali_kbase_mem_pool.c \
|
||||
mali_kbase_mem_pool_debugfs.c \
|
||||
mali_kbase_debugfs_helper.c \
|
||||
mali_kbase_timeline.c \
|
||||
mali_kbase_timeline_io.c \
|
||||
mali_kbase_tlstream.c \
|
||||
mali_kbase_tracepoints.c \
|
||||
mali_kbase_strings.c \
|
||||
mali_kbase_as_fault_debugfs.c \
|
||||
mali_kbase_regs_history_debugfs.c \
|
||||
thirdparty/mali_kbase_mmap.c
|
||||
|
||||
|
||||
ifeq ($(CONFIG_MALI_JOB_DUMP),y)
|
||||
ifeq ($(CONFIG_MALI_CINSTR_GWT),y)
|
||||
SRC += mali_kbase_gwt.c
|
||||
endif
|
||||
|
||||
ifeq ($(MALI_UNIT_TEST),1)
|
||||
SRC += mali_kbase_tlstream_test.c
|
||||
SRC += mali_kbase_timeline_test.c
|
||||
endif
|
||||
|
||||
ifeq ($(MALI_CUSTOMER_RELEASE),0)
|
||||
@@ -147,6 +145,10 @@ ifeq ($(CONFIG_MALI_BIFROST_DEVFREQ),y)
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(MALI_USE_CSF),1)
|
||||
include $(src)/csf/Kbuild
|
||||
endif
|
||||
|
||||
bifrost_kbase-$(CONFIG_MALI_BIFROST_DMA_FENCE) += \
|
||||
mali_kbase_dma_fence.o \
|
||||
mali_kbase_fence.o
|
||||
@@ -158,11 +160,6 @@ bifrost_kbase-$(CONFIG_SYNC_FILE) += \
|
||||
mali_kbase_sync_common.o \
|
||||
mali_kbase_fence.o
|
||||
|
||||
ifeq ($(MALI_MOCK_TEST),1)
|
||||
# Test functionality
|
||||
bifrost_kbase-y += tests/internal/src/mock/mali_kbase_pm_driver_mock.o
|
||||
endif
|
||||
|
||||
include $(src)/backend/gpu/Kbuild
|
||||
bifrost_kbase-y += $(BACKEND:.c=.o)
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
|
||||
# (C) COPYRIGHT 2012-2019 ARM Limited. All rights reserved.
|
||||
#
|
||||
# This program is free software and is provided to you under the terms of the
|
||||
# GNU General Public License version 2 as published by the Free Software
|
||||
@@ -23,6 +23,7 @@
|
||||
menuconfig MALI_BIFROST
|
||||
tristate "Mali Bifrost series support"
|
||||
select GPU_TRACEPOINTS if ANDROID
|
||||
select DMA_SHARED_BUFFER
|
||||
default n
|
||||
help
|
||||
Enable this option to build support for a ARM Mali Bifrost GPU.
|
||||
@@ -31,14 +32,12 @@ menuconfig MALI_BIFROST
|
||||
this will generate a single module, called mali_kbase.
|
||||
|
||||
config MALI_BIFROST_GATOR_SUPPORT
|
||||
bool "Streamline support via Gator"
|
||||
bool "Enable Streamline tracing support"
|
||||
depends on MALI_BIFROST
|
||||
default n
|
||||
help
|
||||
Adds diagnostic support for use with the ARM Streamline Performance Analyzer.
|
||||
You will need the Gator device driver already loaded before loading this driver when enabling
|
||||
Streamline debug support.
|
||||
This is a legacy interface required by older versions of Streamline.
|
||||
Enables kbase tracing used by the Arm Streamline Performance Analyzer.
|
||||
The tracepoints are used to derive GPU activity charts in Streamline.
|
||||
|
||||
config MALI_BIFROST_DVFS
|
||||
bool "Enable legacy DVFS"
|
||||
@@ -107,16 +106,17 @@ config MALI_CORESTACK
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config MALI_BIFROST_PRFCNT_SET_SECONDARY
|
||||
bool "Use secondary set of performance counters"
|
||||
config MALI_PLATFORM_POWER_DOWN_ONLY
|
||||
bool "Support disabling the power down of individual cores"
|
||||
depends on MALI_BIFROST && MALI_BIFROST_EXPERT
|
||||
default n
|
||||
help
|
||||
Select this option to use secondary set of performance counters. Kernel
|
||||
features that depend on an access to the primary set of counters may
|
||||
become unavailable. Enabling this option will prevent power management
|
||||
from working optimally and may cause instrumentation tools to return
|
||||
bogus results.
|
||||
Enabling this feature will let the driver avoid power down of the
|
||||
shader cores, the tiler, and the L2 cache.
|
||||
The entire GPU would be powered down at once through the platform
|
||||
specific code.
|
||||
This may be required for certain platform configurations only.
|
||||
This also limits the available power policies.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
@@ -156,6 +156,9 @@ config MALI_BIFROST_NO_MALI
|
||||
All calls to the simulated hardware will complete immediately as if the hardware
|
||||
completed the task.
|
||||
|
||||
config MALI_REAL_HW
|
||||
def_bool !MALI_BIFROST_NO_MALI
|
||||
|
||||
config MALI_BIFROST_ERROR_INJECT
|
||||
bool "Error injection"
|
||||
depends on MALI_BIFROST && MALI_BIFROST_EXPERT && MALI_BIFROST_NO_MALI
|
||||
@@ -163,13 +166,6 @@ config MALI_BIFROST_ERROR_INJECT
|
||||
help
|
||||
Enables insertion of errors to test module failure and recovery mechanisms.
|
||||
|
||||
config MALI_BIFROST_TRACE_TIMELINE
|
||||
bool "Timeline tracing"
|
||||
depends on MALI_BIFROST && MALI_BIFROST_EXPERT
|
||||
default n
|
||||
help
|
||||
Enables timeline tracing through the kernel tracepoint system.
|
||||
|
||||
config MALI_BIFROST_SYSTEM_TRACE
|
||||
bool "Enable system event tracing support"
|
||||
depends on MALI_BIFROST && MALI_BIFROST_EXPERT
|
||||
@@ -180,16 +176,6 @@ config MALI_BIFROST_SYSTEM_TRACE
|
||||
minimal overhead when not in use. Enable only if you know what
|
||||
you are doing.
|
||||
|
||||
config MALI_JOB_DUMP
|
||||
bool "Enable system level support needed for job dumping"
|
||||
depends on MALI_BIFROST && MALI_BIFROST_EXPERT
|
||||
default n
|
||||
help
|
||||
Choose this option to enable system level support needed for
|
||||
job dumping. This is typically used for instrumentation but has
|
||||
minimal overhead when not in use. Enable only if you know what
|
||||
you are doing.
|
||||
|
||||
config MALI_2MB_ALLOC
|
||||
bool "Attempt to allocate 2MB pages"
|
||||
depends on MALI_BIFROST && MALI_BIFROST_EXPERT
|
||||
@@ -213,5 +199,84 @@ config MALI_PWRSOFT_765
|
||||
If using kernel >= v4.10 then say N, otherwise if devfreq cooling
|
||||
changes have been backported say Y to avoid compilation errors.
|
||||
|
||||
source "drivers/gpu/arm/bifrost/platform/Kconfig"
|
||||
# source "drivers/gpu/arm/bifrost/tests/Kconfig"
|
||||
config MALI_MEMORY_FULLY_BACKED
|
||||
bool "Memory fully physically-backed"
|
||||
depends on MALI_BIFROST && MALI_BIFROST_EXPERT
|
||||
default n
|
||||
help
|
||||
This option enables full physical backing of all virtual
|
||||
memory allocations in the kernel. Notice that this build
|
||||
option only affects allocations of grow-on-GPU-page-fault
|
||||
memory.
|
||||
|
||||
config MALI_DMA_BUF_MAP_ON_DEMAND
|
||||
bool "Map imported dma-bufs on demand"
|
||||
depends on MALI_BIFROST
|
||||
default n
|
||||
help
|
||||
This option caused kbase to set up the GPU mapping of imported
|
||||
dma-buf when needed to run atoms. This is the legacy behaviour.
|
||||
|
||||
This is intended for testing and the option will get removed in the
|
||||
future.
|
||||
|
||||
config MALI_DMA_BUF_LEGACY_COMPAT
|
||||
bool "Enable legacy compatibility cache flush on dma-buf map"
|
||||
depends on MALI_BIFROST && !MALI_DMA_BUF_MAP_ON_DEMAND
|
||||
default y
|
||||
help
|
||||
This option enables compatibility with legacy dma-buf mapping
|
||||
behavior, then the dma-buf is mapped on import, by adding cache
|
||||
maintenance where MALI_DMA_BUF_MAP_ON_DEMAND would do the mapping,
|
||||
including a cache flush.
|
||||
|
||||
config MALI_HW_ERRATA_1485982_NOT_AFFECTED
|
||||
bool "Disable workaround for BASE_HW_ISSUE_GPU2017_1336"
|
||||
depends on MALI_BIFROST && MALI_BIFROST_EXPERT
|
||||
default n
|
||||
help
|
||||
This option disables the default workaround for GPU2017-1336. The
|
||||
workaround keeps the L2 cache powered up except for powerdown and reset.
|
||||
|
||||
The workaround introduces a limitation that will prevent the running of
|
||||
protected mode content on fully coherent platforms, as the switch to IO
|
||||
coherency mode requires the L2 to be turned off.
|
||||
|
||||
config MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE
|
||||
bool "Use alternative workaround for BASE_HW_ISSUE_GPU2017_1336"
|
||||
depends on MALI_BIFROST && MALI_BIFROST_EXPERT && !MALI_HW_ERRATA_1485982_NOT_AFFECTED
|
||||
default n
|
||||
help
|
||||
This option uses an alternative workaround for GPU2017-1336. Lowering
|
||||
the GPU clock to a, platform specific, known good frequeuncy before
|
||||
powering down the L2 cache. The clock can be specified in the device
|
||||
tree using the property, opp-mali-errata-1485982. Otherwise the
|
||||
slowest clock will be selected.
|
||||
|
||||
# Instrumentation options.
|
||||
|
||||
config MALI_JOB_DUMP
|
||||
bool "Enable system level support needed for job dumping"
|
||||
depends on MALI_BIFROST && MALI_BIFROST_EXPERT
|
||||
default n
|
||||
help
|
||||
Choose this option to enable system level support needed for
|
||||
job dumping. This is typically used for instrumentation but has
|
||||
minimal overhead when not in use. Enable only if you know what
|
||||
you are doing.
|
||||
|
||||
config MALI_BIFROST_PRFCNT_SET_SECONDARY
|
||||
bool "Use secondary set of performance counters"
|
||||
depends on MALI_BIFROST && MALI_BIFROST_EXPERT
|
||||
default n
|
||||
help
|
||||
Select this option to use secondary set of performance counters. Kernel
|
||||
features that depend on an access to the primary set of counters may
|
||||
become unavailable. Enabling this option will prevent power management
|
||||
from working optimally and may cause instrumentation tools to return
|
||||
bogus results.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
source "drivers/gpu/arm/midgard/platform/Kconfig"
|
||||
# source "drivers/gpu/arm/midgard/tests/Kconfig"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
|
||||
# (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
|
||||
#
|
||||
# This program is free software and is provided to you under the terms of the
|
||||
# GNU General Public License version 2 as published by the Free Software
|
||||
@@ -25,11 +25,7 @@ KDIR ?= /lib/modules/$(shell uname -r)/build
|
||||
BUSLOG_PATH_RELATIVE = $(CURDIR)/../../../..
|
||||
KBASE_PATH_RELATIVE = $(CURDIR)
|
||||
|
||||
ifeq ($(MALI_UNIT_TEST), 1)
|
||||
EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/tests/internal/src/kernel_assert_module/linux/Module.symvers
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_MALI_FPGA_BUS_LOGGER),y)
|
||||
ifeq ($(CONFIG_MALI_BUSLOG),y)
|
||||
#Add bus logger symbols
|
||||
EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers
|
||||
endif
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
|
||||
# (C) COPYRIGHT 2010, 2013, 2018 ARM Limited. All rights reserved.
|
||||
#
|
||||
# This program is free software and is provided to you under the terms of the
|
||||
# GNU General Public License version 2 as published by the Free Software
|
||||
@@ -19,5 +19,5 @@
|
||||
#
|
||||
#
|
||||
|
||||
EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(OSK_PATH)/src/linux/include -I$(KBASE_PATH)/platform_$(PLATFORM)
|
||||
EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(KBASE_PATH)/platform_$(PLATFORM)
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
|
||||
# (C) COPYRIGHT 2012-2019 ARM Limited. All rights reserved.
|
||||
#
|
||||
# This program is free software and is provided to you under the terms of the
|
||||
# GNU General Public License version 2 as published by the Free Software
|
||||
@@ -23,15 +23,12 @@ menuconfig MALI_BIFROST
|
||||
this will generate a single module, called mali_kbase.
|
||||
|
||||
config MALI_BIFROST_GATOR_SUPPORT
|
||||
bool "Streamline support via Gator"
|
||||
depends on MALI_BIFROST
|
||||
default y if INSTRUMENTATION_STREAMLINE_OLD
|
||||
default n
|
||||
bool "Enable Streamline tracing support"
|
||||
depends on MALI_BIFROST && !BACKEND_USER
|
||||
default y
|
||||
help
|
||||
Adds diagnostic support for use with the ARM Streamline Performance Analyzer.
|
||||
You will need the Gator device driver already loaded before loading this driver when enabling
|
||||
Streamline debug support.
|
||||
This is a legacy interface required by older versions of Streamline.
|
||||
Enables kbase tracing used by the Arm Streamline Performance Analyzer.
|
||||
The tracepoints are used to derive GPU activity charts in Streamline.
|
||||
|
||||
config MALI_BIFROST_DVFS
|
||||
bool "Enable legacy DVFS"
|
||||
@@ -73,21 +70,16 @@ config MALI_BIFROST_DMA_FENCE
|
||||
config MALI_PLATFORM_NAME
|
||||
depends on MALI_BIFROST
|
||||
string "Platform name"
|
||||
default "arndale" if PLATFORM_ARNDALE
|
||||
default "arndale_octa" if PLATFORM_ARNDALE_OCTA
|
||||
default "rk" if PLATFORM_FIREFLY
|
||||
default "hisilicon" if PLATFORM_HIKEY960
|
||||
default "vexpress" if PLATFORM_VEXPRESS
|
||||
default "hisilicon" if PLATFORM_HIKEY970
|
||||
default "devicetree"
|
||||
help
|
||||
Enter the name of the desired platform configuration directory to
|
||||
include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must
|
||||
exist.
|
||||
|
||||
config MALI_MOCK_TEST
|
||||
bool
|
||||
depends on MALI_BIFROST && !RELEASE
|
||||
default y
|
||||
When PLATFORM_CUSTOM is set, this needs to be set manually to
|
||||
pick up the desired platform files.
|
||||
|
||||
# MALI_BIFROST_EXPERT configuration options
|
||||
|
||||
@@ -112,16 +104,17 @@ config MALI_CORESTACK
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config MALI_BIFROST_PRFCNT_SET_SECONDARY
|
||||
bool "Use secondary set of performance counters"
|
||||
config MALI_PLATFORM_POWER_DOWN_ONLY
|
||||
bool "Support disabling the power down of individual cores"
|
||||
depends on MALI_BIFROST && MALI_BIFROST_EXPERT
|
||||
default n
|
||||
help
|
||||
Select this option to use secondary set of performance counters. Kernel
|
||||
features that depend on an access to the primary set of counters may
|
||||
become unavailable. Enabling this option will prevent power management
|
||||
from working optimally and may cause instrumentation tools to return
|
||||
bogus results.
|
||||
Enabling this feature will let the driver avoid power down of the
|
||||
shader cores, the tiler, and the L2 cache.
|
||||
The entire GPU would be powered down at once through the platform
|
||||
specific code.
|
||||
This may be required for certain platform configurations only.
|
||||
This also limits the available power policies.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
@@ -150,26 +143,40 @@ config MALI_BIFROST_FENCE_DEBUG
|
||||
The timeout can be changed at runtime through the js_soft_timeout
|
||||
device attribute, where the timeout is specified in milliseconds.
|
||||
|
||||
config MALI_BIFROST_ERROR_INJECT
|
||||
bool "Error injection"
|
||||
depends on MALI_BIFROST && MALI_BIFROST_EXPERT && NO_MALI
|
||||
default n
|
||||
choice
|
||||
prompt "Error injection level"
|
||||
default MALI_ERROR_INJECT_NONE
|
||||
help
|
||||
Enables insertion of errors to test module failure and recovery mechanisms.
|
||||
|
||||
config MALI_ERROR_INJECT_NONE
|
||||
bool "disabled"
|
||||
help
|
||||
Error injection is disabled.
|
||||
|
||||
config MALI_ERROR_INJECT_TRACK_LIST
|
||||
bool "error track list"
|
||||
depends on MALI_BIFROST && MALI_BIFROST_EXPERT && NO_MALI
|
||||
help
|
||||
Errors to inject are pre-configured by the user.
|
||||
|
||||
config MALI_ERROR_INJECT_RANDOM
|
||||
bool "Random error injection"
|
||||
depends on MALI_BIFROST && MALI_BIFROST_EXPERT && NO_MALI && MALI_BIFROST_ERROR_INJECT
|
||||
default n
|
||||
bool "random error injection"
|
||||
depends on MALI_BIFROST && MALI_BIFROST_EXPERT && NO_MALI
|
||||
help
|
||||
Injected errors are random, rather than user-driven.
|
||||
|
||||
config MALI_BIFROST_TRACE_TIMELINE
|
||||
bool "Timeline tracing"
|
||||
depends on MALI_BIFROST && MALI_BIFROST_EXPERT
|
||||
default n
|
||||
help
|
||||
Enables timeline tracing through the kernel tracepoint system.
|
||||
endchoice
|
||||
|
||||
config MALI_ERROR_INJECT_ON
|
||||
string
|
||||
default "0" if MALI_ERROR_INJECT_NONE
|
||||
default "1" if MALI_ERROR_INJECT_TRACK_LIST
|
||||
default "2" if MALI_ERROR_INJECT_RANDOM
|
||||
|
||||
config MALI_BIFROST_ERROR_INJECT
|
||||
bool
|
||||
default y if !MALI_ERROR_INJECT_NONE
|
||||
|
||||
config MALI_BIFROST_SYSTEM_TRACE
|
||||
bool "Enable system event tracing support"
|
||||
@@ -192,11 +199,6 @@ config MALI_2MB_ALLOC
|
||||
|
||||
If in doubt, say N
|
||||
|
||||
config MALI_FPGA_BUS_LOGGER
|
||||
bool "Enable bus log integration"
|
||||
depends on MALI_BIFROST && MALI_BIFROST_EXPERT
|
||||
default n
|
||||
|
||||
config MALI_PWRSOFT_765
|
||||
bool "PWRSOFT-765 ticket"
|
||||
depends on MALI_BIFROST && MALI_BIFROST_EXPERT
|
||||
@@ -206,4 +208,64 @@ config MALI_PWRSOFT_765
|
||||
not merged in mainline kernel yet. So this define helps to guard those
|
||||
parts of the code.
|
||||
|
||||
config MALI_MEMORY_FULLY_BACKED
|
||||
bool "Memory fully physically-backed"
|
||||
default n
|
||||
help
|
||||
This option enables full backing of all virtual memory allocations
|
||||
for the kernel. This only affects grow-on-GPU-page-fault memory.
|
||||
|
||||
config MALI_DMA_BUF_MAP_ON_DEMAND
|
||||
bool "Map imported dma-bufs on demand"
|
||||
depends on MALI_BIFROST
|
||||
default n
|
||||
default y if !DMA_BUF_SYNC_IOCTL_SUPPORTED
|
||||
help
|
||||
This option caused kbase to set up the GPU mapping of imported
|
||||
dma-buf when needed to run atoms. This is the legacy behaviour.
|
||||
|
||||
config MALI_DMA_BUF_LEGACY_COMPAT
|
||||
bool "Enable legacy compatibility cache flush on dma-buf map"
|
||||
depends on MALI_BIFROST && !MALI_DMA_BUF_MAP_ON_DEMAND
|
||||
default y
|
||||
help
|
||||
This option enables compatibility with legacy dma-buf mapping
|
||||
behavior, then the dma-buf is mapped on import, by adding cache
|
||||
maintenance where MALI_DMA_BUF_MAP_ON_DEMAND would do the mapping,
|
||||
including a cache flush.
|
||||
|
||||
config MALI_REAL_HW
|
||||
bool
|
||||
default y
|
||||
default n if NO_MALI
|
||||
|
||||
config MALI_HW_ERRATA_1485982_NOT_AFFECTED
|
||||
bool "Disable workaround for BASE_HW_ISSUE_GPU2017_1336"
|
||||
depends on MALI_BIFROST && MALI_BIFROST_EXPERT
|
||||
default n
|
||||
default y if PLATFORM_JUNO
|
||||
help
|
||||
This option disables the default workaround for GPU2017-1336. The
|
||||
workaround keeps the L2 cache powered up except for powerdown and reset.
|
||||
|
||||
The workaround introduces a limitation that will prevent the running of
|
||||
protected mode content on fully coherent platforms, as the switch to IO
|
||||
coherency mode requires the L2 to be turned off.
|
||||
|
||||
config MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE
|
||||
bool "Use alternative workaround for BASE_HW_ISSUE_GPU2017_1336"
|
||||
depends on MALI_BIFROST && MALI_BIFROST_EXPERT && !MALI_HW_ERRATA_1485982_NOT_AFFECTED
|
||||
default n
|
||||
help
|
||||
This option uses an alternative workaround for GPU2017-1336. Lowering
|
||||
the GPU clock to a, platform specific, known good frequeuncy before
|
||||
powering down the L2 cache. The clock can be specified in the device
|
||||
tree using the property, opp-mali-errata-1485982. Otherwise the
|
||||
slowest clock will be selected.
|
||||
|
||||
# Instrumentation options.
|
||||
|
||||
# config MALI_JOB_DUMP exists in the Kernel Kconfig but is configured using CINSTR_JOB_DUMP in Mconfig.
|
||||
# config MALI_BIFROST_PRFCNT_SET_SECONDARY exists in the Kernel Kconfig but is configured using CINSTR_SECONDARY_HWC in Mconfig.
|
||||
|
||||
source "kernel/drivers/gpu/arm/midgard/tests/Mconfig"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# (C) COPYRIGHT 2014,2017 ARM Limited. All rights reserved.
|
||||
# (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
|
||||
#
|
||||
# This program is free software and is provided to you under the terms of the
|
||||
# GNU General Public License version 2 as published by the Free Software
|
||||
@@ -30,31 +30,26 @@ BACKEND += \
|
||||
backend/gpu/mali_kbase_jm_as.c \
|
||||
backend/gpu/mali_kbase_jm_hw.c \
|
||||
backend/gpu/mali_kbase_jm_rb.c \
|
||||
backend/gpu/mali_kbase_js_affinity.c \
|
||||
backend/gpu/mali_kbase_js_backend.c \
|
||||
backend/gpu/mali_kbase_mmu_hw_direct.c \
|
||||
backend/gpu/mali_kbase_pm_backend.c \
|
||||
backend/gpu/mali_kbase_pm_driver.c \
|
||||
backend/gpu/mali_kbase_pm_metrics.c \
|
||||
backend/gpu/mali_kbase_pm_ca.c \
|
||||
backend/gpu/mali_kbase_pm_ca_fixed.c \
|
||||
backend/gpu/mali_kbase_pm_always_on.c \
|
||||
backend/gpu/mali_kbase_pm_coarse_demand.c \
|
||||
backend/gpu/mali_kbase_pm_demand.c \
|
||||
backend/gpu/mali_kbase_pm_policy.c \
|
||||
backend/gpu/mali_kbase_time.c
|
||||
backend/gpu/mali_kbase_time.c \
|
||||
backend/gpu/mali_kbase_l2_mmu_config.c
|
||||
|
||||
ifeq ($(MALI_CUSTOMER_RELEASE),0)
|
||||
BACKEND += \
|
||||
backend/gpu/mali_kbase_pm_ca_random.c \
|
||||
backend/gpu/mali_kbase_pm_demand_always_powered.c \
|
||||
backend/gpu/mali_kbase_pm_fast_start.c
|
||||
backend/gpu/mali_kbase_pm_always_on_demand.c
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_MALI_BIFROST_DEVFREQ),y)
|
||||
BACKEND += \
|
||||
backend/gpu/mali_kbase_devfreq.c \
|
||||
backend/gpu/mali_kbase_pm_ca_devfreq.c
|
||||
backend/gpu/mali_kbase_devfreq.c
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_MALI_BIFROST_NO_MALI),y)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -27,8 +27,5 @@
|
||||
#ifndef _KBASE_BACKEND_CONFIG_H_
|
||||
#define _KBASE_BACKEND_CONFIG_H_
|
||||
|
||||
/* Enable GPU reset API */
|
||||
#define KBASE_GPU_RESET_EN 1
|
||||
|
||||
#endif /* _KBASE_BACKEND_CONFIG_H_ */
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2015-2016,2018 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -29,6 +29,6 @@ void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
|
||||
kbdev->current_gpu_coherency_mode = mode;
|
||||
|
||||
if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG))
|
||||
kbase_reg_write(kbdev, COHERENCY_ENABLE, mode, NULL);
|
||||
kbase_reg_write(kbdev, COHERENCY_ENABLE, mode);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2012-2015,2018 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -152,7 +152,7 @@ bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx)
|
||||
while (kctx->reg_dump[offset] != REGISTER_DUMP_TERMINATION_FLAG) {
|
||||
kctx->reg_dump[offset+1] =
|
||||
kbase_reg_read(kctx->kbdev,
|
||||
kctx->reg_dump[offset], NULL);
|
||||
kctx->reg_dump[offset]);
|
||||
offset += 2;
|
||||
}
|
||||
return true;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -21,8 +21,7 @@
|
||||
*/
|
||||
|
||||
#include <mali_kbase.h>
|
||||
#include <mali_kbase_tlstream.h>
|
||||
#include <mali_kbase_config_defaults.h>
|
||||
#include <mali_kbase_tracepoints.h>
|
||||
#include <backend/gpu/mali_kbase_pm_internal.h>
|
||||
|
||||
#include <linux/of.h>
|
||||
@@ -62,29 +61,42 @@ static struct monitor_dev_profile mali_mdevp = {
|
||||
* @kbdev: Device pointer
|
||||
* @freq: Nominal frequency
|
||||
* @core_mask: Pointer to u64 to store core mask to
|
||||
*
|
||||
* Return: Real target frequency
|
||||
* @freqs: Pointer to array of frequencies
|
||||
* @volts: Pointer to array of voltages
|
||||
*
|
||||
* This function will only perform translation if an operating-points-v2-mali
|
||||
* table is present in devicetree. If one is not present then it will return an
|
||||
* untranslated frequency and all cores enabled.
|
||||
*/
|
||||
static unsigned long opp_translate(struct kbase_device *kbdev,
|
||||
unsigned long freq, u64 *core_mask)
|
||||
static void opp_translate(struct kbase_device *kbdev, unsigned long freq,
|
||||
u64 *core_mask, unsigned long *freqs, unsigned long *volts)
|
||||
{
|
||||
int i;
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < kbdev->num_opps; i++) {
|
||||
if (kbdev->opp_table[i].opp_freq == freq) {
|
||||
*core_mask = kbdev->opp_table[i].core_mask;
|
||||
return kbdev->opp_table[i].real_freq;
|
||||
if (kbdev->devfreq_table[i].opp_freq == freq) {
|
||||
unsigned int j;
|
||||
|
||||
*core_mask = kbdev->devfreq_table[i].core_mask;
|
||||
for (j = 0; j < kbdev->nr_clocks; j++) {
|
||||
freqs[j] =
|
||||
kbdev->devfreq_table[i].real_freqs[j];
|
||||
volts[j] =
|
||||
kbdev->devfreq_table[i].opp_volts[j];
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Failed to find OPP - return all cores enabled & nominal frequency */
|
||||
*core_mask = kbdev->gpu_props.props.raw_props.shader_present;
|
||||
|
||||
return freq;
|
||||
/* If failed to find OPP, return all cores enabled
|
||||
* and nominal frequency
|
||||
*/
|
||||
if (i == kbdev->num_opps) {
|
||||
*core_mask = kbdev->gpu_props.props.raw_props.shader_present;
|
||||
for (i = 0; i < kbdev->nr_clocks; i++)
|
||||
freqs[i] = freq;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
@@ -93,18 +105,17 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
|
||||
struct kbase_device *kbdev = dev_get_drvdata(dev);
|
||||
struct dev_pm_opp *opp;
|
||||
unsigned long nominal_freq;
|
||||
unsigned long freq = 0;
|
||||
unsigned long voltage;
|
||||
int err;
|
||||
unsigned long freqs[BASE_MAX_NR_CLOCKS_REGULATORS] = {0};
|
||||
unsigned long volts[BASE_MAX_NR_CLOCKS_REGULATORS] = {0};
|
||||
unsigned int i;
|
||||
u64 core_mask;
|
||||
|
||||
freq = *target_freq;
|
||||
nominal_freq = *target_freq;
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
|
||||
rcu_read_lock();
|
||||
#endif
|
||||
opp = devfreq_recommended_opp(dev, &freq, flags);
|
||||
voltage = dev_pm_opp_get_voltage(opp);
|
||||
opp = devfreq_recommended_opp(dev, &nominal_freq, flags);
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
|
||||
rcu_read_unlock();
|
||||
#endif
|
||||
@@ -116,70 +127,121 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
|
||||
dev_pm_opp_put(opp);
|
||||
#endif
|
||||
|
||||
nominal_freq = freq;
|
||||
opp_translate(kbdev, nominal_freq, &core_mask, freqs, volts);
|
||||
|
||||
/*
|
||||
* Only update if there is a change of frequency
|
||||
*/
|
||||
if (kbdev->current_nominal_freq == nominal_freq) {
|
||||
unsigned int i;
|
||||
int err;
|
||||
|
||||
*target_freq = nominal_freq;
|
||||
|
||||
#ifdef CONFIG_REGULATOR
|
||||
if (kbdev->current_voltage == voltage)
|
||||
return 0;
|
||||
err = regulator_set_voltage(kbdev->regulator, voltage, INT_MAX);
|
||||
if (err) {
|
||||
dev_err(dev, "Failed to set voltage (%d)\n", err);
|
||||
return err;
|
||||
for (i = 0; i < kbdev->nr_regulators; i++) {
|
||||
if (kbdev->current_voltages[i] == volts[i])
|
||||
continue;
|
||||
|
||||
err = regulator_set_voltage(kbdev->regulators[i],
|
||||
volts[i],
|
||||
INT_MAX);
|
||||
if (err) {
|
||||
dev_err(dev, "Failed to set voltage (%d)\n", err);
|
||||
return err;
|
||||
}
|
||||
kbdev->current_voltages[i] = volts[i];
|
||||
}
|
||||
kbdev->current_voltage = voltage;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
freq = opp_translate(kbdev, nominal_freq, &core_mask);
|
||||
#ifdef CONFIG_REGULATOR
|
||||
if (kbdev->regulator && kbdev->current_voltage != voltage
|
||||
&& kbdev->current_freq < freq) {
|
||||
err = regulator_set_voltage(kbdev->regulator, voltage, INT_MAX);
|
||||
if (err) {
|
||||
dev_err(dev, "Failed to increase voltage (%d)\n", err);
|
||||
return err;
|
||||
/* Regulators and clocks work in pairs: every clock has a regulator,
|
||||
* and we never expect to have more regulators than clocks.
|
||||
*
|
||||
* We always need to increase the voltage before increasing
|
||||
* the frequency of a regulator/clock pair, otherwise the clock
|
||||
* wouldn't have enough power to perform the transition.
|
||||
*
|
||||
* It's always safer to decrease the frequency before decreasing
|
||||
* voltage of a regulator/clock pair, otherwise the clock could have
|
||||
* problems operating if it is deprived of the necessary power
|
||||
* to sustain its current frequency (even if that happens for a short
|
||||
* transition interval).
|
||||
*/
|
||||
for (i = 0; i < kbdev->nr_clocks; i++) {
|
||||
if (kbdev->regulators[i] &&
|
||||
kbdev->current_voltages[i] != volts[i] &&
|
||||
kbdev->current_freqs[i] < freqs[i]) {
|
||||
int err;
|
||||
|
||||
err = regulator_set_voltage(kbdev->regulators[i],
|
||||
volts[i], INT_MAX);
|
||||
if (!err) {
|
||||
kbdev->current_voltages[i] = volts[i];
|
||||
} else {
|
||||
dev_err(dev, "Failed to increase voltage (%d) (target %lu)\n",
|
||||
err, volts[i]);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
err = clk_set_rate(kbdev->clock, freq);
|
||||
if (err) {
|
||||
dev_err(dev, "Failed to set clock %lu (target %lu)\n",
|
||||
freq, *target_freq);
|
||||
return err;
|
||||
for (i = 0; i < kbdev->nr_clocks; i++) {
|
||||
if (kbdev->clocks[i]) {
|
||||
int err;
|
||||
|
||||
err = clk_set_rate(kbdev->clocks[i], freqs[i]);
|
||||
if (!err) {
|
||||
kbdev->current_freqs[i] = freqs[i];
|
||||
} else {
|
||||
dev_err(dev, "Failed to set clock %lu (target %lu)\n",
|
||||
freqs[i], *target_freq);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_REGULATOR
|
||||
if (kbdev->regulator && kbdev->current_voltage != voltage
|
||||
&& kbdev->current_freq > freq) {
|
||||
err = regulator_set_voltage(kbdev->regulator, voltage, INT_MAX);
|
||||
if (err) {
|
||||
dev_err(dev, "Failed to decrease voltage (%d)\n", err);
|
||||
return err;
|
||||
for (i = 0; i < kbdev->nr_clocks; i++) {
|
||||
if (kbdev->regulators[i] &&
|
||||
kbdev->current_voltages[i] != volts[i] &&
|
||||
kbdev->current_freqs[i] > freqs[i]) {
|
||||
int err;
|
||||
|
||||
err = regulator_set_voltage(kbdev->regulators[i],
|
||||
volts[i], INT_MAX);
|
||||
if (!err) {
|
||||
kbdev->current_voltages[i] = volts[i];
|
||||
} else {
|
||||
dev_err(dev, "Failed to decrease voltage (%d) (target %lu)\n",
|
||||
err, volts[i]);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (kbdev->pm.backend.ca_current_policy->id ==
|
||||
KBASE_PM_CA_POLICY_ID_DEVFREQ)
|
||||
kbase_devfreq_set_core_mask(kbdev, core_mask);
|
||||
kbase_devfreq_set_core_mask(kbdev, core_mask);
|
||||
|
||||
*target_freq = nominal_freq;
|
||||
kbdev->current_voltage = voltage;
|
||||
kbdev->current_nominal_freq = nominal_freq;
|
||||
kbdev->current_freq = freq;
|
||||
kbdev->current_core_mask = core_mask;
|
||||
if (kbdev->devfreq)
|
||||
kbdev->devfreq->last_status.current_frequency = nominal_freq;
|
||||
|
||||
KBASE_TLSTREAM_AUX_DEVFREQ_TARGET((u64)nominal_freq);
|
||||
KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(kbdev, (u64)nominal_freq);
|
||||
|
||||
return err;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kbase_devfreq_force_freq(struct kbase_device *kbdev, unsigned long freq)
|
||||
{
|
||||
unsigned long target_freq = freq;
|
||||
|
||||
kbase_devfreq_target(kbdev->dev, &target_freq, 0);
|
||||
}
|
||||
|
||||
static int
|
||||
@@ -254,16 +316,35 @@ static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev,
|
||||
|
||||
dp->max_state = i;
|
||||
|
||||
/* Have the lowest clock as suspend clock.
|
||||
* It may be overridden by 'opp-mali-errata-1485982'.
|
||||
*/
|
||||
if (kbdev->pm.backend.gpu_clock_slow_down_wa) {
|
||||
freq = 0;
|
||||
opp = dev_pm_opp_find_freq_ceil(kbdev->dev, &freq);
|
||||
if (IS_ERR(opp)) {
|
||||
dev_err(kbdev->dev, "failed to find slowest clock");
|
||||
return 0;
|
||||
}
|
||||
dev_info(kbdev->dev, "suspend clock %lu from slowest", freq);
|
||||
kbdev->pm.backend.gpu_clock_suspend_freq = freq;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kbase_devfreq_term_freq_table(struct kbase_device *kbdev)
|
||||
{
|
||||
struct devfreq_dev_profile *dp = kbdev->devfreq->profile;
|
||||
struct devfreq_dev_profile *dp = &kbdev->devfreq_profile;
|
||||
|
||||
kfree(dp->freq_table);
|
||||
}
|
||||
|
||||
static void kbase_devfreq_term_core_mask_table(struct kbase_device *kbdev)
|
||||
{
|
||||
kfree(kbdev->devfreq_table);
|
||||
}
|
||||
|
||||
static void kbase_devfreq_exit(struct device *dev)
|
||||
{
|
||||
struct kbase_device *kbdev = dev_get_drvdata(dev);
|
||||
@@ -271,13 +352,58 @@ static void kbase_devfreq_exit(struct device *dev)
|
||||
kbase_devfreq_term_freq_table(kbdev);
|
||||
}
|
||||
|
||||
static void kbasep_devfreq_read_suspend_clock(struct kbase_device *kbdev,
|
||||
struct device_node *node)
|
||||
{
|
||||
u64 freq = 0;
|
||||
int err = 0;
|
||||
|
||||
/* Check if this node is the opp entry having 'opp-mali-errata-1485982'
|
||||
* to get the suspend clock, otherwise skip it.
|
||||
*/
|
||||
if (!of_property_read_bool(node, "opp-mali-errata-1485982"))
|
||||
return;
|
||||
|
||||
/* In kbase DevFreq, the clock will be read from 'opp-hz'
|
||||
* and translated into the actual clock by opp_translate.
|
||||
*
|
||||
* In customer DVFS, the clock will be read from 'opp-hz-real'
|
||||
* for clk driver. If 'opp-hz-real' does not exist,
|
||||
* read from 'opp-hz'.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_MALI_BIFROST_DEVFREQ))
|
||||
err = of_property_read_u64(node, "opp-hz", &freq);
|
||||
else {
|
||||
if (of_property_read_u64(node, "opp-hz-real", &freq))
|
||||
err = of_property_read_u64(node, "opp-hz", &freq);
|
||||
}
|
||||
|
||||
if (WARN_ON(err || !freq))
|
||||
return;
|
||||
|
||||
kbdev->pm.backend.gpu_clock_suspend_freq = freq;
|
||||
dev_info(kbdev->dev,
|
||||
"suspend clock %llu by opp-mali-errata-1485982", freq);
|
||||
}
|
||||
|
||||
static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev)
|
||||
{
|
||||
#if KERNEL_VERSION(3, 18, 0) > LINUX_VERSION_CODE || !defined(CONFIG_OF)
|
||||
/* OPP table initialization requires at least the capability to get
|
||||
* regulators and clocks from the device tree, as well as parsing
|
||||
* arrays of unsigned integer values.
|
||||
*
|
||||
* The whole initialization process shall simply be skipped if the
|
||||
* minimum capability is not available.
|
||||
*/
|
||||
return 0;
|
||||
#else
|
||||
struct device_node *opp_node = of_parse_phandle(kbdev->dev->of_node,
|
||||
"operating-points-v2", 0);
|
||||
struct device_node *node;
|
||||
int i = 0;
|
||||
int count;
|
||||
u64 shader_present = kbdev->gpu_props.props.raw_props.shader_present;
|
||||
|
||||
if (!opp_node)
|
||||
return 0;
|
||||
@@ -285,25 +411,66 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev)
|
||||
return 0;
|
||||
|
||||
count = dev_pm_opp_get_opp_count(kbdev->dev);
|
||||
kbdev->opp_table = kmalloc_array(count,
|
||||
kbdev->devfreq_table = kmalloc_array(count,
|
||||
sizeof(struct kbase_devfreq_opp), GFP_KERNEL);
|
||||
if (!kbdev->opp_table)
|
||||
if (!kbdev->devfreq_table)
|
||||
return -ENOMEM;
|
||||
|
||||
for_each_available_child_of_node(opp_node, node) {
|
||||
u64 core_mask;
|
||||
u64 opp_freq, real_freq;
|
||||
const void *core_count_p;
|
||||
u64 core_mask, opp_freq,
|
||||
real_freqs[BASE_MAX_NR_CLOCKS_REGULATORS];
|
||||
int err;
|
||||
#ifdef CONFIG_REGULATOR
|
||||
u32 opp_volts[BASE_MAX_NR_CLOCKS_REGULATORS];
|
||||
#endif
|
||||
|
||||
if (of_property_read_u64(node, "opp-hz", &opp_freq)) {
|
||||
dev_warn(kbdev->dev, "OPP is missing required opp-hz property\n");
|
||||
/* Read suspend clock from opp table */
|
||||
if (kbdev->pm.backend.gpu_clock_slow_down_wa)
|
||||
kbasep_devfreq_read_suspend_clock(kbdev, node);
|
||||
|
||||
err = of_property_read_u64(node, "opp-hz", &opp_freq);
|
||||
if (err) {
|
||||
dev_warn(kbdev->dev, "Failed to read opp-hz property with error %d\n",
|
||||
err);
|
||||
continue;
|
||||
}
|
||||
if (of_property_read_u64(node, "opp-hz-real", &real_freq))
|
||||
real_freq = opp_freq;
|
||||
|
||||
|
||||
#if BASE_MAX_NR_CLOCKS_REGULATORS > 1
|
||||
err = of_property_read_u64_array(node, "opp-hz-real",
|
||||
real_freqs, kbdev->nr_clocks);
|
||||
#else
|
||||
WARN_ON(kbdev->nr_clocks != 1);
|
||||
err = of_property_read_u64(node, "opp-hz-real", real_freqs);
|
||||
#endif
|
||||
if (err < 0) {
|
||||
dev_warn(kbdev->dev, "Failed to read opp-hz-real property with error %d\n",
|
||||
err);
|
||||
continue;
|
||||
}
|
||||
#ifdef CONFIG_REGULATOR
|
||||
err = of_property_read_u32_array(node,
|
||||
"opp-microvolt", opp_volts, kbdev->nr_regulators);
|
||||
if (err < 0) {
|
||||
dev_warn(kbdev->dev, "Failed to read opp-microvolt property with error %d\n",
|
||||
err);
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (of_property_read_u64(node, "opp-core-mask", &core_mask))
|
||||
core_mask =
|
||||
kbdev->gpu_props.props.raw_props.shader_present;
|
||||
core_mask = shader_present;
|
||||
if (core_mask != shader_present &&
|
||||
(kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11056) ||
|
||||
corestack_driver_control ||
|
||||
platform_power_down_only)) {
|
||||
|
||||
dev_warn(kbdev->dev, "Ignoring OPP %llu - Dynamic Core Scaling not supported on this GPU\n",
|
||||
opp_freq);
|
||||
continue;
|
||||
}
|
||||
|
||||
core_count_p = of_get_property(node, "opp-core-count", NULL);
|
||||
if (core_count_p) {
|
||||
u64 remaining_core_mask =
|
||||
@@ -330,12 +497,27 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev)
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
kbdev->opp_table[i].opp_freq = opp_freq;
|
||||
kbdev->opp_table[i].real_freq = real_freq;
|
||||
kbdev->opp_table[i].core_mask = core_mask;
|
||||
kbdev->devfreq_table[i].opp_freq = opp_freq;
|
||||
kbdev->devfreq_table[i].core_mask = core_mask;
|
||||
if (kbdev->nr_clocks > 0) {
|
||||
int j;
|
||||
|
||||
dev_info(kbdev->dev, "OPP %d : opp_freq=%llu real_freq=%llu core_mask=%llx\n",
|
||||
i, opp_freq, real_freq, core_mask);
|
||||
for (j = 0; j < kbdev->nr_clocks; j++)
|
||||
kbdev->devfreq_table[i].real_freqs[j] =
|
||||
real_freqs[j];
|
||||
}
|
||||
#ifdef CONFIG_REGULATOR
|
||||
if (kbdev->nr_regulators > 0) {
|
||||
int j;
|
||||
|
||||
for (j = 0; j < kbdev->nr_regulators; j++)
|
||||
kbdev->devfreq_table[i].opp_volts[j] =
|
||||
opp_volts[j];
|
||||
}
|
||||
#endif
|
||||
|
||||
dev_info(kbdev->dev, "OPP %d : opp_freq=%llu core_mask=%llx\n",
|
||||
i, opp_freq, core_mask);
|
||||
|
||||
i++;
|
||||
}
|
||||
@@ -343,6 +525,106 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev)
|
||||
kbdev->num_opps = i;
|
||||
|
||||
return 0;
|
||||
#endif /* KERNEL_VERSION(3, 18, 0) > LINUX_VERSION_CODE */
|
||||
}
|
||||
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
|
||||
|
||||
static const char *kbase_devfreq_req_type_name(enum kbase_devfreq_work_type type)
|
||||
{
|
||||
const char *p;
|
||||
|
||||
switch (type) {
|
||||
case DEVFREQ_WORK_NONE:
|
||||
p = "devfreq_none";
|
||||
break;
|
||||
case DEVFREQ_WORK_SUSPEND:
|
||||
p = "devfreq_suspend";
|
||||
break;
|
||||
case DEVFREQ_WORK_RESUME:
|
||||
p = "devfreq_resume";
|
||||
break;
|
||||
default:
|
||||
p = "Unknown devfreq_type";
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
static void kbase_devfreq_suspend_resume_worker(struct work_struct *work)
|
||||
{
|
||||
struct kbase_devfreq_queue_info *info = container_of(work,
|
||||
struct kbase_devfreq_queue_info, work);
|
||||
struct kbase_device *kbdev = container_of(info, struct kbase_device,
|
||||
devfreq_queue);
|
||||
unsigned long flags;
|
||||
enum kbase_devfreq_work_type type, acted_type;
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
type = kbdev->devfreq_queue.req_type;
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
acted_type = kbdev->devfreq_queue.acted_type;
|
||||
dev_dbg(kbdev->dev, "Worker handles queued req: %s (acted: %s)\n",
|
||||
kbase_devfreq_req_type_name(type),
|
||||
kbase_devfreq_req_type_name(acted_type));
|
||||
switch (type) {
|
||||
case DEVFREQ_WORK_SUSPEND:
|
||||
case DEVFREQ_WORK_RESUME:
|
||||
if (type != acted_type) {
|
||||
if (type == DEVFREQ_WORK_RESUME)
|
||||
devfreq_resume_device(kbdev->devfreq);
|
||||
else
|
||||
devfreq_suspend_device(kbdev->devfreq);
|
||||
dev_dbg(kbdev->dev, "Devfreq transition occured: %s => %s\n",
|
||||
kbase_devfreq_req_type_name(acted_type),
|
||||
kbase_devfreq_req_type_name(type));
|
||||
kbdev->devfreq_queue.acted_type = type;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
WARN_ON(1);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
|
||||
|
||||
void kbase_devfreq_enqueue_work(struct kbase_device *kbdev,
|
||||
enum kbase_devfreq_work_type work_type)
|
||||
{
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
|
||||
unsigned long flags;
|
||||
|
||||
WARN_ON(work_type == DEVFREQ_WORK_NONE);
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
kbdev->devfreq_queue.req_type = work_type;
|
||||
queue_work(kbdev->devfreq_queue.workq, &kbdev->devfreq_queue.work);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
dev_dbg(kbdev->dev, "Enqueuing devfreq req: %s\n",
|
||||
kbase_devfreq_req_type_name(work_type));
|
||||
#endif
|
||||
}
|
||||
|
||||
static int kbase_devfreq_work_init(struct kbase_device *kbdev)
|
||||
{
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
|
||||
kbdev->devfreq_queue.req_type = DEVFREQ_WORK_NONE;
|
||||
kbdev->devfreq_queue.acted_type = DEVFREQ_WORK_RESUME;
|
||||
|
||||
kbdev->devfreq_queue.workq = alloc_ordered_workqueue("devfreq_workq", 0);
|
||||
if (!kbdev->devfreq_queue.workq)
|
||||
return -ENOMEM;
|
||||
|
||||
INIT_WORK(&kbdev->devfreq_queue.work,
|
||||
kbase_devfreq_suspend_resume_worker);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kbase_devfreq_work_term(struct kbase_device *kbdev)
|
||||
{
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
|
||||
destroy_workqueue(kbdev->devfreq_queue.workq);
|
||||
#endif
|
||||
}
|
||||
|
||||
int kbase_devfreq_init(struct kbase_device *kbdev)
|
||||
@@ -351,18 +633,25 @@ int kbase_devfreq_init(struct kbase_device *kbdev)
|
||||
struct dev_pm_opp *opp;
|
||||
unsigned long opp_rate;
|
||||
int err;
|
||||
unsigned int i;
|
||||
|
||||
if (!kbdev->clock) {
|
||||
if (kbdev->nr_clocks == 0) {
|
||||
dev_err(kbdev->dev, "Clock not available for devfreq\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
kbdev->current_freq = clk_get_rate(kbdev->clock);
|
||||
kbdev->current_nominal_freq = kbdev->current_freq;
|
||||
for (i = 0; i < kbdev->nr_clocks; i++) {
|
||||
if (kbdev->clocks[i])
|
||||
kbdev->current_freqs[i] =
|
||||
clk_get_rate(kbdev->clocks[i]);
|
||||
else
|
||||
kbdev->current_freqs[i] = 0;
|
||||
}
|
||||
kbdev->current_nominal_freq = kbdev->current_freqs[0];
|
||||
|
||||
dp = &kbdev->devfreq_profile;
|
||||
|
||||
dp->initial_freq = kbdev->current_freq;
|
||||
dp->initial_freq = kbdev->current_freqs[0];
|
||||
dp->polling_ms = 100;
|
||||
dp->target = kbase_devfreq_target;
|
||||
dp->get_dev_status = kbase_devfreq_status;
|
||||
@@ -379,14 +668,24 @@ int kbase_devfreq_init(struct kbase_device *kbdev)
|
||||
};
|
||||
|
||||
err = kbase_devfreq_init_core_mask_table(kbdev);
|
||||
if (err)
|
||||
if (err) {
|
||||
kbase_devfreq_term_freq_table(kbdev);
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Initialise devfreq suspend/resume workqueue */
|
||||
err = kbase_devfreq_work_init(kbdev);
|
||||
if (err) {
|
||||
kbase_devfreq_term_freq_table(kbdev);
|
||||
dev_err(kbdev->dev, "Devfreq initialization failed");
|
||||
return err;
|
||||
}
|
||||
|
||||
kbdev->devfreq = devfreq_add_device(kbdev->dev, dp,
|
||||
"simple_ondemand", NULL);
|
||||
if (IS_ERR(kbdev->devfreq)) {
|
||||
kfree(dp->freq_table);
|
||||
return PTR_ERR(kbdev->devfreq);
|
||||
err = PTR_ERR(kbdev->devfreq);
|
||||
goto add_device_failed;
|
||||
}
|
||||
|
||||
/* devfreq_add_device only copies a few of kbdev->dev's fields, so
|
||||
@@ -400,7 +699,7 @@ int kbase_devfreq_init(struct kbase_device *kbdev)
|
||||
goto opp_notifier_failed;
|
||||
}
|
||||
|
||||
opp_rate = kbdev->current_freq;
|
||||
opp_rate = kbdev->current_freqs[0]; /* Bifrost GPU has only 1 clock. */
|
||||
opp = devfreq_recommended_opp(kbdev->dev, &opp_rate, 0);
|
||||
if (!IS_ERR(opp))
|
||||
dev_pm_opp_put(opp);
|
||||
@@ -444,6 +743,10 @@ opp_notifier_failed:
|
||||
dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
|
||||
else
|
||||
kbdev->devfreq = NULL;
|
||||
add_device_failed:
|
||||
kbase_devfreq_work_term(kbdev);
|
||||
|
||||
kbase_devfreq_term_freq_table(kbdev);
|
||||
|
||||
return err;
|
||||
}
|
||||
@@ -470,5 +773,7 @@ void kbase_devfreq_term(struct kbase_device *kbdev)
|
||||
else
|
||||
kbdev->devfreq = NULL;
|
||||
|
||||
kfree(kbdev->opp_table);
|
||||
kbase_devfreq_term_core_mask_table(kbdev);
|
||||
|
||||
kbase_devfreq_work_term(kbdev);
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2014, 2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -24,7 +24,24 @@
|
||||
#define _BASE_DEVFREQ_H_
|
||||
|
||||
int kbase_devfreq_init(struct kbase_device *kbdev);
|
||||
|
||||
void kbase_devfreq_term(struct kbase_device *kbdev);
|
||||
int kbase_platform_rk_init_opp_table(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_devfreq_force_freq - Set GPU frequency on L2 power on/off.
|
||||
* @kbdev: Device pointer
|
||||
* @freq: GPU frequency in HZ to be set when
|
||||
* MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE is enabled
|
||||
*/
|
||||
void kbase_devfreq_force_freq(struct kbase_device *kbdev, unsigned long freq);
|
||||
|
||||
/**
|
||||
* kbase_devfreq_enqueue_work - Enqueue a work item for suspend/resume devfreq.
|
||||
* @kbdev: Device pointer
|
||||
* @work_type: The type of the devfreq work item, i.e. suspend or resume
|
||||
*/
|
||||
void kbase_devfreq_enqueue_work(struct kbase_device *kbdev,
|
||||
enum kbase_devfreq_work_type work_type);
|
||||
|
||||
#endif /* _BASE_DEVFREQ_H_ */
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2014-2016, 2018-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -27,15 +27,15 @@
|
||||
#include <mali_kbase.h>
|
||||
#include <backend/gpu/mali_kbase_instr_internal.h>
|
||||
#include <backend/gpu/mali_kbase_pm_internal.h>
|
||||
|
||||
#include <backend/gpu/mali_kbase_device_internal.h>
|
||||
#include <backend/gpu/mali_kbase_mmu_hw_direct.h>
|
||||
#include <mali_kbase_reset_gpu.h>
|
||||
|
||||
#if !defined(CONFIG_MALI_BIFROST_NO_MALI)
|
||||
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
|
||||
|
||||
int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size)
|
||||
{
|
||||
struct kbase_io_access *old_buf;
|
||||
@@ -154,11 +154,9 @@ void kbase_io_history_dump(struct kbase_device *kbdev)
|
||||
#endif /* CONFIG_DEBUG_FS */
|
||||
|
||||
|
||||
void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
|
||||
struct kbase_context *kctx)
|
||||
void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value)
|
||||
{
|
||||
KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
|
||||
KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
|
||||
KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
|
||||
|
||||
writel(value, kbdev->reg + offset);
|
||||
@@ -168,21 +166,15 @@ void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
|
||||
kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
|
||||
value, 1);
|
||||
#endif /* CONFIG_DEBUG_FS */
|
||||
dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value);
|
||||
|
||||
if (kctx && kctx->jctx.tb)
|
||||
kbase_device_trace_register_access(kctx, REG_WRITE, offset,
|
||||
value);
|
||||
dev_dbg(kbdev->dev, "w: reg %08x val %08x", offset, value);
|
||||
}
|
||||
|
||||
KBASE_EXPORT_TEST_API(kbase_reg_write);
|
||||
|
||||
u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
|
||||
struct kbase_context *kctx)
|
||||
u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset)
|
||||
{
|
||||
u32 val;
|
||||
KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
|
||||
KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
|
||||
KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
|
||||
|
||||
val = readl(kbdev->reg + offset);
|
||||
@@ -192,10 +184,8 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
|
||||
kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
|
||||
val, 0);
|
||||
#endif /* CONFIG_DEBUG_FS */
|
||||
dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val);
|
||||
dev_dbg(kbdev->dev, "r: reg %08x val %08x", offset, val);
|
||||
|
||||
if (kctx && kctx->jctx.tb)
|
||||
kbase_device_trace_register_access(kctx, REG_READ, offset, val);
|
||||
return val;
|
||||
}
|
||||
|
||||
@@ -213,28 +203,124 @@ KBASE_EXPORT_TEST_API(kbase_reg_read);
|
||||
*/
|
||||
static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple)
|
||||
{
|
||||
u32 status;
|
||||
u64 address;
|
||||
u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
|
||||
u32 status = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(GPU_FAULTSTATUS));
|
||||
u64 address = (u64) kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(GPU_FAULTADDRESS_HI)) << 32;
|
||||
|
||||
status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL);
|
||||
address = (u64) kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(GPU_FAULTADDRESS_HI), NULL) << 32;
|
||||
address |= kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(GPU_FAULTADDRESS_LO), NULL);
|
||||
GPU_CONTROL_REG(GPU_FAULTADDRESS_LO));
|
||||
|
||||
dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx",
|
||||
status & 0xFF,
|
||||
kbase_exception_name(kbdev, status),
|
||||
if ((gpu_id & GPU_ID2_PRODUCT_MODEL) != GPU_ID2_PRODUCT_TULX) {
|
||||
dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx",
|
||||
status,
|
||||
kbase_exception_name(kbdev, status & 0xFF),
|
||||
address);
|
||||
if (multiple)
|
||||
dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n");
|
||||
if (multiple)
|
||||
dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n");
|
||||
}
|
||||
}
|
||||
|
||||
static bool kbase_gpu_fault_interrupt(struct kbase_device *kbdev, int multiple)
|
||||
{
|
||||
kbase_report_gpu_fault(kbdev, multiple);
|
||||
return false;
|
||||
}
|
||||
|
||||
void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev)
|
||||
{
|
||||
u32 irq_mask;
|
||||
|
||||
lockdep_assert_held(&kbdev->hwaccess_lock);
|
||||
|
||||
if (kbdev->cache_clean_in_progress) {
|
||||
/* If this is called while another clean is in progress, we
|
||||
* can't rely on the current one to flush any new changes in
|
||||
* the cache. Instead, trigger another cache clean immediately
|
||||
* after this one finishes.
|
||||
*/
|
||||
kbdev->cache_clean_queued = true;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Enable interrupt */
|
||||
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
|
||||
irq_mask | CLEAN_CACHES_COMPLETED);
|
||||
|
||||
KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
|
||||
GPU_COMMAND_CLEAN_INV_CACHES);
|
||||
|
||||
kbdev->cache_clean_in_progress = true;
|
||||
}
|
||||
|
||||
void kbase_gpu_start_cache_clean(struct kbase_device *kbdev)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
kbase_gpu_start_cache_clean_nolock(kbdev);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
}
|
||||
|
||||
void kbase_gpu_cache_clean_wait_complete(struct kbase_device *kbdev)
|
||||
{
|
||||
lockdep_assert_held(&kbdev->hwaccess_lock);
|
||||
|
||||
kbdev->cache_clean_queued = false;
|
||||
kbdev->cache_clean_in_progress = false;
|
||||
wake_up(&kbdev->cache_clean_wait);
|
||||
}
|
||||
|
||||
static void kbase_clean_caches_done(struct kbase_device *kbdev)
|
||||
{
|
||||
u32 irq_mask;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
if (kbdev->cache_clean_queued) {
|
||||
kbdev->cache_clean_queued = false;
|
||||
|
||||
KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
|
||||
GPU_COMMAND_CLEAN_INV_CACHES);
|
||||
} else {
|
||||
/* Disable interrupt */
|
||||
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
|
||||
irq_mask & ~CLEAN_CACHES_COMPLETED);
|
||||
|
||||
kbase_gpu_cache_clean_wait_complete(kbdev);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
}
|
||||
|
||||
void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
while (kbdev->cache_clean_in_progress) {
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
wait_event_interruptible(kbdev->cache_clean_wait,
|
||||
!kbdev->cache_clean_in_progress);
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
}
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
}
|
||||
|
||||
void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
|
||||
{
|
||||
bool clear_gpu_fault = false;
|
||||
|
||||
KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, NULL, 0u, val);
|
||||
if (val & GPU_FAULT)
|
||||
kbase_report_gpu_fault(kbdev, val & MULTIPLE_GPU_FAULTS);
|
||||
clear_gpu_fault = kbase_gpu_fault_interrupt(kbdev,
|
||||
val & MULTIPLE_GPU_FAULTS);
|
||||
|
||||
if (val & RESET_COMPLETED)
|
||||
kbase_pm_reset_done(kbdev);
|
||||
@@ -242,19 +328,42 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
|
||||
if (val & PRFCNT_SAMPLE_COMPLETED)
|
||||
kbase_instr_hwcnt_sample_done(kbdev);
|
||||
|
||||
KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val);
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val);
|
||||
|
||||
/* kbase_pm_check_transitions (called by kbase_pm_power_changed) must
|
||||
* be called after the IRQ has been cleared. This is because it might
|
||||
* trigger further power transitions and we don't want to miss the
|
||||
* interrupt raised to notify us that these further transitions have
|
||||
* finished. The same applies to kbase_clean_caches_done() - if another
|
||||
* clean was queued, it might trigger another clean, which might
|
||||
* generate another interrupt which shouldn't be missed.
|
||||
*/
|
||||
|
||||
if (val & CLEAN_CACHES_COMPLETED)
|
||||
kbase_clean_caches_done(kbdev);
|
||||
|
||||
KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val);
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val, NULL);
|
||||
|
||||
/* kbase_pm_check_transitions must be called after the IRQ has been
|
||||
* cleared. This is because it might trigger further power transitions
|
||||
* and we don't want to miss the interrupt raised to notify us that
|
||||
* these further transitions have finished.
|
||||
*/
|
||||
if (val & POWER_CHANGED_ALL)
|
||||
if (val & POWER_CHANGED_ALL) {
|
||||
kbase_pm_power_changed(kbdev);
|
||||
} else if (val & CLEAN_CACHES_COMPLETED) {
|
||||
/* When 'platform_power_down_only' is enabled, the L2 cache is
|
||||
* not powered down, but flushed before the GPU power down
|
||||
* (which is done by the platform code). So the L2 state machine
|
||||
* requests a cache flush. And when that flush completes, the L2
|
||||
* state machine needs to be re-invoked to proceed with the GPU
|
||||
* power down.
|
||||
* If cache line evict messages can be lost when shader cores
|
||||
* power down then we need to flush the L2 cache before powering
|
||||
* down cores. When the flush completes, the shaders' state
|
||||
* machine needs to be re-invoked to proceed with powering down
|
||||
* cores.
|
||||
*/
|
||||
if (platform_power_down_only ||
|
||||
kbdev->pm.backend.l2_always_on ||
|
||||
kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921))
|
||||
kbase_pm_power_changed(kbdev);
|
||||
}
|
||||
|
||||
|
||||
KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, NULL, 0u, val);
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2014,2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -34,30 +34,57 @@
|
||||
* @kbdev: Kbase device pointer
|
||||
* @offset: Offset of register
|
||||
* @value: Value to write
|
||||
* @kctx: Kbase context pointer. May be NULL
|
||||
*
|
||||
* Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
|
||||
* @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
|
||||
* != KBASEP_AS_NR_INVALID).
|
||||
* Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false).
|
||||
*/
|
||||
void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
|
||||
struct kbase_context *kctx);
|
||||
void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value);
|
||||
|
||||
/**
|
||||
* kbase_reg_read - read from GPU register
|
||||
* @kbdev: Kbase device pointer
|
||||
* @offset: Offset of register
|
||||
* @kctx: Kbase context pointer. May be NULL
|
||||
*
|
||||
* Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
|
||||
* @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
|
||||
* != KBASEP_AS_NR_INVALID).
|
||||
* Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false).
|
||||
*
|
||||
* Return: Value in desired register
|
||||
*/
|
||||
u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
|
||||
struct kbase_context *kctx);
|
||||
u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset);
|
||||
|
||||
/**
|
||||
* kbase_gpu_start_cache_clean - Start a cache clean
|
||||
* @kbdev: Kbase device
|
||||
*
|
||||
* Issue a cache clean and invalidate command to hardware. This function will
|
||||
* take hwaccess_lock.
|
||||
*/
|
||||
void kbase_gpu_start_cache_clean(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_gpu_start_cache_clean_nolock - Start a cache clean
|
||||
* @kbdev: Kbase device
|
||||
*
|
||||
* Issue a cache clean and invalidate command to hardware. hwaccess_lock
|
||||
* must be held by the caller.
|
||||
*/
|
||||
void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_gpu_wait_cache_clean - Wait for cache cleaning to finish
|
||||
* @kbdev: Kbase device
|
||||
*
|
||||
* This function will take hwaccess_lock, and may sleep.
|
||||
*/
|
||||
void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_gpu_cache_clean_wait_complete - Called after the cache cleaning is
|
||||
* finished. Would also be called after
|
||||
* the GPU reset.
|
||||
* @kbdev: Kbase device
|
||||
*
|
||||
* Caller must hold the hwaccess_lock.
|
||||
*/
|
||||
void kbase_gpu_cache_clean_wait_complete(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_gpu_interrupt - GPU interrupt handler
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -26,6 +26,7 @@
|
||||
*/
|
||||
#include <mali_kbase.h>
|
||||
#include <mali_kbase_hwaccess_backend.h>
|
||||
#include <mali_kbase_reset_gpu.h>
|
||||
#include <backend/gpu/mali_kbase_irq_internal.h>
|
||||
#include <backend/gpu/mali_kbase_jm_internal.h>
|
||||
#include <backend/gpu/mali_kbase_js_internal.h>
|
||||
@@ -56,14 +57,8 @@ int kbase_backend_early_init(struct kbase_device *kbdev)
|
||||
if (err)
|
||||
goto fail_interrupts;
|
||||
|
||||
err = kbase_hwaccess_pm_init(kbdev);
|
||||
if (err)
|
||||
goto fail_pm;
|
||||
|
||||
return 0;
|
||||
|
||||
fail_pm:
|
||||
kbase_release_interrupts(kbdev);
|
||||
fail_interrupts:
|
||||
kbase_pm_runtime_term(kbdev);
|
||||
fail_runtime_pm:
|
||||
@@ -74,7 +69,6 @@ fail_runtime_pm:
|
||||
|
||||
void kbase_backend_early_term(struct kbase_device *kbdev)
|
||||
{
|
||||
kbase_hwaccess_pm_term(kbdev);
|
||||
kbase_release_interrupts(kbdev);
|
||||
kbase_pm_runtime_term(kbdev);
|
||||
kbasep_platform_device_term(kbdev);
|
||||
@@ -84,10 +78,18 @@ int kbase_backend_late_init(struct kbase_device *kbdev)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT);
|
||||
err = kbase_hwaccess_pm_init(kbdev);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = kbase_reset_gpu_init(kbdev);
|
||||
if (err)
|
||||
goto fail_reset_gpu_init;
|
||||
|
||||
err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT);
|
||||
if (err)
|
||||
goto fail_pm_powerup;
|
||||
|
||||
err = kbase_backend_timer_init(kbdev);
|
||||
if (err)
|
||||
goto fail_timer;
|
||||
@@ -106,10 +108,29 @@ int kbase_backend_late_init(struct kbase_device *kbdev)
|
||||
if (err)
|
||||
goto fail_job_slot;
|
||||
|
||||
/* Do the initialisation of devfreq.
|
||||
* Devfreq needs backend_timer_init() for completion of its
|
||||
* initialisation and it also needs to catch the first callback
|
||||
* occurence of the runtime_suspend event for maintaining state
|
||||
* coherence with the backend power management, hence needs to be
|
||||
* placed before the kbase_pm_context_idle().
|
||||
*/
|
||||
err = kbase_backend_devfreq_init(kbdev);
|
||||
if (err)
|
||||
goto fail_devfreq_init;
|
||||
|
||||
/* Idle the GPU and/or cores, if the policy wants it to */
|
||||
kbase_pm_context_idle(kbdev);
|
||||
|
||||
/* Update gpuprops with L2_FEATURES if applicable */
|
||||
kbase_gpuprops_update_l2_features(kbdev);
|
||||
|
||||
init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
|
||||
|
||||
return 0;
|
||||
|
||||
fail_devfreq_init:
|
||||
kbase_job_slot_term(kbdev);
|
||||
fail_job_slot:
|
||||
|
||||
#ifdef CONFIG_MALI_BIFROST_DEBUG
|
||||
@@ -121,15 +142,21 @@ fail_interrupt_test:
|
||||
kbase_backend_timer_term(kbdev);
|
||||
fail_timer:
|
||||
kbase_hwaccess_pm_halt(kbdev);
|
||||
fail_pm_powerup:
|
||||
kbase_reset_gpu_term(kbdev);
|
||||
fail_reset_gpu_init:
|
||||
kbase_hwaccess_pm_term(kbdev);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
void kbase_backend_late_term(struct kbase_device *kbdev)
|
||||
{
|
||||
kbase_backend_devfreq_term(kbdev);
|
||||
kbase_job_slot_halt(kbdev);
|
||||
kbase_job_slot_term(kbdev);
|
||||
kbase_backend_timer_term(kbdev);
|
||||
kbase_hwaccess_pm_halt(kbdev);
|
||||
kbase_reset_gpu_term(kbdev);
|
||||
kbase_hwaccess_pm_term(kbdev);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -37,62 +37,61 @@ void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
|
||||
int i;
|
||||
|
||||
/* Fill regdump with the content of the relevant registers */
|
||||
regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID), NULL);
|
||||
regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID));
|
||||
|
||||
regdump->l2_features = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(L2_FEATURES), NULL);
|
||||
GPU_CONTROL_REG(L2_FEATURES));
|
||||
regdump->core_features = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(CORE_FEATURES), NULL);
|
||||
GPU_CONTROL_REG(CORE_FEATURES));
|
||||
regdump->tiler_features = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(TILER_FEATURES), NULL);
|
||||
GPU_CONTROL_REG(TILER_FEATURES));
|
||||
regdump->mem_features = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(MEM_FEATURES), NULL);
|
||||
GPU_CONTROL_REG(MEM_FEATURES));
|
||||
regdump->mmu_features = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(MMU_FEATURES), NULL);
|
||||
GPU_CONTROL_REG(MMU_FEATURES));
|
||||
regdump->as_present = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(AS_PRESENT), NULL);
|
||||
GPU_CONTROL_REG(AS_PRESENT));
|
||||
regdump->js_present = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(JS_PRESENT), NULL);
|
||||
GPU_CONTROL_REG(JS_PRESENT));
|
||||
|
||||
for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
|
||||
regdump->js_features[i] = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(JS_FEATURES_REG(i)), NULL);
|
||||
GPU_CONTROL_REG(JS_FEATURES_REG(i)));
|
||||
|
||||
for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
|
||||
regdump->texture_features[i] = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)), NULL);
|
||||
GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)));
|
||||
|
||||
regdump->thread_max_threads = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(THREAD_MAX_THREADS), NULL);
|
||||
GPU_CONTROL_REG(THREAD_MAX_THREADS));
|
||||
regdump->thread_max_workgroup_size = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE),
|
||||
NULL);
|
||||
GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE));
|
||||
regdump->thread_max_barrier_size = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE), NULL);
|
||||
GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE));
|
||||
regdump->thread_features = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(THREAD_FEATURES), NULL);
|
||||
GPU_CONTROL_REG(THREAD_FEATURES));
|
||||
regdump->thread_tls_alloc = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(THREAD_TLS_ALLOC), NULL);
|
||||
GPU_CONTROL_REG(THREAD_TLS_ALLOC));
|
||||
|
||||
regdump->shader_present_lo = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(SHADER_PRESENT_LO), NULL);
|
||||
GPU_CONTROL_REG(SHADER_PRESENT_LO));
|
||||
regdump->shader_present_hi = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(SHADER_PRESENT_HI), NULL);
|
||||
GPU_CONTROL_REG(SHADER_PRESENT_HI));
|
||||
|
||||
regdump->tiler_present_lo = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(TILER_PRESENT_LO), NULL);
|
||||
GPU_CONTROL_REG(TILER_PRESENT_LO));
|
||||
regdump->tiler_present_hi = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(TILER_PRESENT_HI), NULL);
|
||||
GPU_CONTROL_REG(TILER_PRESENT_HI));
|
||||
|
||||
regdump->l2_present_lo = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(L2_PRESENT_LO), NULL);
|
||||
GPU_CONTROL_REG(L2_PRESENT_LO));
|
||||
regdump->l2_present_hi = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(L2_PRESENT_HI), NULL);
|
||||
GPU_CONTROL_REG(L2_PRESENT_HI));
|
||||
|
||||
regdump->stack_present_lo = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(STACK_PRESENT_LO), NULL);
|
||||
GPU_CONTROL_REG(STACK_PRESENT_LO));
|
||||
regdump->stack_present_hi = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(STACK_PRESENT_HI), NULL);
|
||||
GPU_CONTROL_REG(STACK_PRESENT_HI));
|
||||
}
|
||||
|
||||
void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
|
||||
@@ -103,7 +102,7 @@ void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
|
||||
kbase_pm_register_access_enable(kbdev);
|
||||
|
||||
regdump->coherency_features = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(COHERENCY_FEATURES), NULL);
|
||||
GPU_CONTROL_REG(COHERENCY_FEATURES));
|
||||
|
||||
/* We're done accessing the GPU registers for now. */
|
||||
kbase_pm_register_access_disable(kbdev);
|
||||
@@ -115,3 +114,12 @@ void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
|
||||
}
|
||||
}
|
||||
|
||||
void kbase_backend_gpuprops_get_l2_features(struct kbase_device *kbdev,
|
||||
struct kbase_gpuprops_regdump *regdump)
|
||||
{
|
||||
if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) {
|
||||
regdump->l2_features = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(L2_FEATURES));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -30,152 +30,78 @@
|
||||
#include <mali_midg_regmap.h>
|
||||
#include <mali_kbase_hwaccess_instr.h>
|
||||
#include <backend/gpu/mali_kbase_device_internal.h>
|
||||
#include <backend/gpu/mali_kbase_pm_internal.h>
|
||||
#include <backend/gpu/mali_kbase_instr_internal.h>
|
||||
|
||||
/**
|
||||
* kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to
|
||||
* hardware
|
||||
*
|
||||
* @kbdev: Kbase device
|
||||
*/
|
||||
static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev)
|
||||
{
|
||||
unsigned long flags;
|
||||
unsigned long pm_flags;
|
||||
u32 irq_mask;
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
|
||||
KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
|
||||
KBASE_INSTR_STATE_REQUEST_CLEAN);
|
||||
|
||||
/* Enable interrupt */
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
|
||||
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
|
||||
irq_mask | CLEAN_CACHES_COMPLETED, NULL);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
|
||||
|
||||
/* clean&invalidate the caches so we're sure the mmu tables for the dump
|
||||
* buffer is valid */
|
||||
KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
|
||||
GPU_COMMAND_CLEAN_INV_CACHES, NULL);
|
||||
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING;
|
||||
|
||||
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
|
||||
}
|
||||
|
||||
int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
|
||||
struct kbase_context *kctx,
|
||||
struct kbase_ioctl_hwcnt_enable *enable)
|
||||
struct kbase_instr_hwcnt_enable *enable)
|
||||
{
|
||||
unsigned long flags, pm_flags;
|
||||
unsigned long flags;
|
||||
int err = -EINVAL;
|
||||
u32 irq_mask;
|
||||
int ret;
|
||||
u64 shader_cores_needed;
|
||||
u32 prfcnt_config;
|
||||
|
||||
shader_cores_needed = kbase_pm_get_present_cores(kbdev,
|
||||
KBASE_PM_CORE_SHADER);
|
||||
lockdep_assert_held(&kbdev->hwaccess_lock);
|
||||
|
||||
/* alignment failure */
|
||||
if ((enable->dump_buffer == 0ULL) || (enable->dump_buffer & (2048 - 1)))
|
||||
goto out_err;
|
||||
|
||||
/* Override core availability policy to ensure all cores are available
|
||||
*/
|
||||
kbase_pm_ca_instr_enable(kbdev);
|
||||
|
||||
/* Request the cores early on synchronously - we'll release them on any
|
||||
* errors (e.g. instrumentation already active) */
|
||||
kbase_pm_request_cores_sync(kbdev, true, shader_cores_needed);
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
|
||||
|
||||
if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
|
||||
/* Instrumentation is already enabled */
|
||||
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
|
||||
goto out_unrequest_cores;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
/* Enable interrupt */
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
|
||||
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
|
||||
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
|
||||
PRFCNT_SAMPLE_COMPLETED, NULL);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
|
||||
PRFCNT_SAMPLE_COMPLETED);
|
||||
|
||||
/* In use, this context is the owner */
|
||||
kbdev->hwcnt.kctx = kctx;
|
||||
/* Remember the dump address so we can reprogram it later */
|
||||
kbdev->hwcnt.addr = enable->dump_buffer;
|
||||
|
||||
/* Request the clean */
|
||||
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
|
||||
kbdev->hwcnt.backend.triggered = 0;
|
||||
/* Clean&invalidate the caches so we're sure the mmu tables for the dump
|
||||
* buffer is valid */
|
||||
ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
|
||||
&kbdev->hwcnt.backend.cache_clean_work);
|
||||
KBASE_DEBUG_ASSERT(ret);
|
||||
kbdev->hwcnt.addr_bytes = enable->dump_buffer_bytes;
|
||||
|
||||
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
|
||||
|
||||
/* Wait for cacheclean to complete */
|
||||
wait_event(kbdev->hwcnt.backend.wait,
|
||||
kbdev->hwcnt.backend.triggered != 0);
|
||||
|
||||
KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
|
||||
KBASE_INSTR_STATE_IDLE);
|
||||
|
||||
kbase_pm_request_l2_caches(kbdev);
|
||||
|
||||
/* Configure */
|
||||
prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
|
||||
#ifdef CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY
|
||||
{
|
||||
u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
|
||||
u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID)
|
||||
>> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
|
||||
int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id);
|
||||
|
||||
if (arch_v6)
|
||||
prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
|
||||
}
|
||||
#endif
|
||||
if (enable->use_secondary)
|
||||
prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
|
||||
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
|
||||
prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx);
|
||||
prfcnt_config | PRFCNT_CONFIG_MODE_OFF);
|
||||
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
|
||||
enable->dump_buffer & 0xFFFFFFFF, kctx);
|
||||
enable->dump_buffer & 0xFFFFFFFF);
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
|
||||
enable->dump_buffer >> 32, kctx);
|
||||
enable->dump_buffer >> 32);
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
|
||||
enable->jm_bm, kctx);
|
||||
enable->jm_bm);
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
|
||||
enable->shader_bm, kctx);
|
||||
enable->shader_bm);
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
|
||||
enable->mmu_l2_bm, kctx);
|
||||
enable->mmu_l2_bm);
|
||||
/* Due to PRLAM-8186 we need to disable the Tiler before we enable the
|
||||
* HW counter dump. */
|
||||
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0,
|
||||
kctx);
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0);
|
||||
else
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
|
||||
enable->tiler_bm, kctx);
|
||||
enable->tiler_bm);
|
||||
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
|
||||
prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx);
|
||||
prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL);
|
||||
|
||||
/* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump
|
||||
*/
|
||||
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
|
||||
enable->tiler_bm, kctx);
|
||||
enable->tiler_bm);
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
|
||||
|
||||
@@ -189,10 +115,6 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
|
||||
|
||||
dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
|
||||
return err;
|
||||
out_unrequest_cores:
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
out_err:
|
||||
return err;
|
||||
}
|
||||
@@ -205,17 +127,20 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
|
||||
struct kbase_device *kbdev = kctx->kbdev;
|
||||
|
||||
while (1) {
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
|
||||
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
|
||||
|
||||
if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) {
|
||||
/* Instrumentation is not enabled */
|
||||
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (kbdev->hwcnt.kctx != kctx) {
|
||||
/* Instrumentation has been setup for another context */
|
||||
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -223,6 +148,7 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
|
||||
break;
|
||||
|
||||
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
|
||||
|
||||
/* Ongoing dump/setup - wait for its completion */
|
||||
wait_event(kbdev->hwcnt.backend.wait,
|
||||
@@ -233,26 +159,19 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
|
||||
kbdev->hwcnt.backend.triggered = 0;
|
||||
|
||||
/* Disable interrupt */
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
|
||||
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
|
||||
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
|
||||
irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL);
|
||||
irq_mask & ~PRFCNT_SAMPLE_COMPLETED);
|
||||
|
||||
/* Disable the counters */
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx);
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0);
|
||||
|
||||
kbdev->hwcnt.kctx = NULL;
|
||||
kbdev->hwcnt.addr = 0ULL;
|
||||
kbdev->hwcnt.addr_bytes = 0ULL;
|
||||
|
||||
kbase_pm_ca_instr_disable(kbdev);
|
||||
|
||||
kbase_pm_unrequest_cores(kbdev, true,
|
||||
kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));
|
||||
|
||||
kbase_pm_release_l2_caches(kbdev);
|
||||
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
|
||||
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
|
||||
|
||||
dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
|
||||
kctx);
|
||||
@@ -290,15 +209,15 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
|
||||
|
||||
/* Reconfigure the dump address */
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
|
||||
kbdev->hwcnt.addr & 0xFFFFFFFF, NULL);
|
||||
kbdev->hwcnt.addr & 0xFFFFFFFF);
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
|
||||
kbdev->hwcnt.addr >> 32, NULL);
|
||||
kbdev->hwcnt.addr >> 32);
|
||||
|
||||
/* Start dumping */
|
||||
KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL,
|
||||
kbdev->hwcnt.addr, 0);
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
|
||||
GPU_COMMAND_PRFCNT_SAMPLE, kctx);
|
||||
GPU_COMMAND_PRFCNT_SAMPLE);
|
||||
|
||||
dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx);
|
||||
|
||||
@@ -337,33 +256,34 @@ KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete);
|
||||
void kbasep_cache_clean_worker(struct work_struct *data)
|
||||
{
|
||||
struct kbase_device *kbdev;
|
||||
unsigned long flags;
|
||||
unsigned long flags, pm_flags;
|
||||
|
||||
kbdev = container_of(data, struct kbase_device,
|
||||
hwcnt.backend.cache_clean_work);
|
||||
|
||||
mutex_lock(&kbdev->cacheclean_lock);
|
||||
kbasep_instr_hwcnt_cacheclean(kbdev);
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
|
||||
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
|
||||
|
||||
/* Clean and invalidate the caches so we're sure the mmu tables for the
|
||||
* dump buffer is valid.
|
||||
*/
|
||||
KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
|
||||
KBASE_INSTR_STATE_REQUEST_CLEAN);
|
||||
kbase_gpu_start_cache_clean_nolock(kbdev);
|
||||
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
|
||||
|
||||
kbase_gpu_wait_cache_clean(kbdev);
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
|
||||
/* Wait for our condition, and any reset to complete */
|
||||
while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
|
||||
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
|
||||
wait_event(kbdev->hwcnt.backend.cache_clean_wait,
|
||||
kbdev->hwcnt.backend.state !=
|
||||
KBASE_INSTR_STATE_CLEANING);
|
||||
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
|
||||
}
|
||||
KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
|
||||
KBASE_INSTR_STATE_CLEANED);
|
||||
|
||||
KBASE_INSTR_STATE_REQUEST_CLEAN);
|
||||
/* All finished and idle */
|
||||
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
|
||||
kbdev->hwcnt.backend.triggered = 1;
|
||||
wake_up(&kbdev->hwcnt.backend.wait);
|
||||
|
||||
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
|
||||
mutex_unlock(&kbdev->cacheclean_lock);
|
||||
}
|
||||
|
||||
void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
|
||||
@@ -376,53 +296,32 @@ void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
|
||||
kbdev->hwcnt.backend.triggered = 1;
|
||||
wake_up(&kbdev->hwcnt.backend.wait);
|
||||
} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) {
|
||||
int ret;
|
||||
/* Always clean and invalidate the cache after a successful dump
|
||||
*/
|
||||
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
|
||||
ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
|
||||
&kbdev->hwcnt.backend.cache_clean_work);
|
||||
KBASE_DEBUG_ASSERT(ret);
|
||||
if (kbdev->mmu_mode->flags & KBASE_MMU_MODE_HAS_NON_CACHEABLE) {
|
||||
/* All finished and idle */
|
||||
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
|
||||
kbdev->hwcnt.backend.triggered = 1;
|
||||
wake_up(&kbdev->hwcnt.backend.wait);
|
||||
} else {
|
||||
int ret;
|
||||
/* Always clean and invalidate the cache after a successful dump
|
||||
*/
|
||||
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
|
||||
ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
|
||||
&kbdev->hwcnt.backend.cache_clean_work);
|
||||
KBASE_DEBUG_ASSERT(ret);
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
|
||||
}
|
||||
|
||||
void kbase_clean_caches_done(struct kbase_device *kbdev)
|
||||
{
|
||||
u32 irq_mask;
|
||||
|
||||
if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
|
||||
unsigned long flags;
|
||||
unsigned long pm_flags;
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
|
||||
/* Disable interrupt */
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
|
||||
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
|
||||
NULL);
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
|
||||
irq_mask & ~CLEAN_CACHES_COMPLETED, NULL);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
|
||||
|
||||
/* Wakeup... */
|
||||
if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
|
||||
/* Only wake if we weren't resetting */
|
||||
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED;
|
||||
wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
|
||||
}
|
||||
}
|
||||
|
||||
int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
|
||||
{
|
||||
struct kbase_device *kbdev = kctx->kbdev;
|
||||
unsigned long flags;
|
||||
int err;
|
||||
|
||||
/* Wait for dump & cacheclean to complete */
|
||||
/* Wait for dump & cache clean to complete */
|
||||
wait_event(kbdev->hwcnt.backend.wait,
|
||||
kbdev->hwcnt.backend.triggered != 0);
|
||||
|
||||
@@ -460,7 +359,7 @@ int kbase_instr_hwcnt_clear(struct kbase_context *kctx)
|
||||
/* Clear the counters */
|
||||
KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0);
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
|
||||
GPU_COMMAND_PRFCNT_CLEAR, kctx);
|
||||
GPU_COMMAND_PRFCNT_CLEAR);
|
||||
|
||||
err = 0;
|
||||
|
||||
@@ -477,7 +376,6 @@ int kbase_instr_backend_init(struct kbase_device *kbdev)
|
||||
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
|
||||
|
||||
init_waitqueue_head(&kbdev->hwcnt.backend.wait);
|
||||
init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait);
|
||||
INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work,
|
||||
kbasep_cache_clean_worker);
|
||||
kbdev->hwcnt.backend.triggered = 0;
|
||||
@@ -494,4 +392,3 @@ void kbase_instr_backend_term(struct kbase_device *kbdev)
|
||||
{
|
||||
destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2014, 2016, 2018 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -39,11 +39,6 @@ enum kbase_instr_state {
|
||||
KBASE_INSTR_STATE_DUMPING,
|
||||
/* We've requested a clean to occur on a workqueue */
|
||||
KBASE_INSTR_STATE_REQUEST_CLEAN,
|
||||
/* Hardware is currently cleaning and invalidating caches. */
|
||||
KBASE_INSTR_STATE_CLEANING,
|
||||
/* Cache clean completed, and either a) a dump is complete, or
|
||||
* b) instrumentation can now be setup. */
|
||||
KBASE_INSTR_STATE_CLEANED,
|
||||
/* An error has occured during DUMPING (page fault). */
|
||||
KBASE_INSTR_STATE_FAULT
|
||||
};
|
||||
@@ -54,7 +49,6 @@ struct kbase_instr_backend {
|
||||
int triggered;
|
||||
|
||||
enum kbase_instr_state state;
|
||||
wait_queue_head_t cache_clean_wait;
|
||||
struct workqueue_struct *cache_clean_wq;
|
||||
struct work_struct cache_clean_work;
|
||||
};
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2014, 2018 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -35,12 +35,6 @@
|
||||
*/
|
||||
void kbasep_cache_clean_worker(struct work_struct *data);
|
||||
|
||||
/**
|
||||
* kbase_clean_caches_done() - Cache clean interrupt received
|
||||
* @kbdev: Kbase device
|
||||
*/
|
||||
void kbase_clean_caches_done(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_instr_hwcnt_sample_done() - Dump complete interrupt received
|
||||
* @kbdev: Kbase device
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2014-2016,2018-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -49,31 +49,33 @@ static irqreturn_t kbase_job_irq_handler(int irq, void *data)
|
||||
struct kbase_device *kbdev = kbase_untag(data);
|
||||
u32 val;
|
||||
|
||||
spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
if (!kbdev->pm.backend.gpu_powered) {
|
||||
/* GPU is turned off - IRQ is not for us */
|
||||
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
|
||||
flags);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
return IRQ_NONE;
|
||||
}
|
||||
|
||||
val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
|
||||
val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS));
|
||||
|
||||
#ifdef CONFIG_MALI_BIFROST_DEBUG
|
||||
if (!kbdev->pm.backend.driver_ready_for_irqs)
|
||||
dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
|
||||
__func__, irq, val);
|
||||
#endif /* CONFIG_MALI_BIFROST_DEBUG */
|
||||
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
|
||||
|
||||
if (!val)
|
||||
if (!val) {
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
return IRQ_NONE;
|
||||
}
|
||||
|
||||
dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
|
||||
|
||||
kbase_job_done(kbdev, val);
|
||||
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
@@ -85,25 +87,24 @@ static irqreturn_t kbase_mmu_irq_handler(int irq, void *data)
|
||||
struct kbase_device *kbdev = kbase_untag(data);
|
||||
u32 val;
|
||||
|
||||
spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
if (!kbdev->pm.backend.gpu_powered) {
|
||||
/* GPU is turned off - IRQ is not for us */
|
||||
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
|
||||
flags);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
return IRQ_NONE;
|
||||
}
|
||||
|
||||
atomic_inc(&kbdev->faults_pending);
|
||||
|
||||
val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
|
||||
val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS));
|
||||
|
||||
#ifdef CONFIG_MALI_BIFROST_DEBUG
|
||||
if (!kbdev->pm.backend.driver_ready_for_irqs)
|
||||
dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
|
||||
__func__, irq, val);
|
||||
#endif /* CONFIG_MALI_BIFROST_DEBUG */
|
||||
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
if (!val) {
|
||||
atomic_dec(&kbdev->faults_pending);
|
||||
@@ -125,23 +126,22 @@ static irqreturn_t kbase_gpu_irq_handler(int irq, void *data)
|
||||
struct kbase_device *kbdev = kbase_untag(data);
|
||||
u32 val;
|
||||
|
||||
spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
if (!kbdev->pm.backend.gpu_powered) {
|
||||
/* GPU is turned off - IRQ is not for us */
|
||||
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
|
||||
flags);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
return IRQ_NONE;
|
||||
}
|
||||
|
||||
val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL);
|
||||
val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS));
|
||||
|
||||
#ifdef CONFIG_MALI_BIFROST_DEBUG
|
||||
if (!kbdev->pm.backend.driver_ready_for_irqs)
|
||||
dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
|
||||
__func__, irq, val);
|
||||
#endif /* CONFIG_MALI_BIFROST_DEBUG */
|
||||
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
if (!val)
|
||||
return IRQ_NONE;
|
||||
@@ -230,18 +230,17 @@ static irqreturn_t kbase_job_irq_test_handler(int irq, void *data)
|
||||
struct kbase_device *kbdev = kbase_untag(data);
|
||||
u32 val;
|
||||
|
||||
spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
if (!kbdev->pm.backend.gpu_powered) {
|
||||
/* GPU is turned off - IRQ is not for us */
|
||||
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
|
||||
flags);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
return IRQ_NONE;
|
||||
}
|
||||
|
||||
val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
|
||||
val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS));
|
||||
|
||||
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
if (!val)
|
||||
return IRQ_NONE;
|
||||
@@ -251,7 +250,7 @@ static irqreturn_t kbase_job_irq_test_handler(int irq, void *data)
|
||||
kbasep_irq_test_data.triggered = 1;
|
||||
wake_up(&kbasep_irq_test_data.wait);
|
||||
|
||||
kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val, NULL);
|
||||
kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
@@ -262,18 +261,17 @@ static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data)
|
||||
struct kbase_device *kbdev = kbase_untag(data);
|
||||
u32 val;
|
||||
|
||||
spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
if (!kbdev->pm.backend.gpu_powered) {
|
||||
/* GPU is turned off - IRQ is not for us */
|
||||
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
|
||||
flags);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
return IRQ_NONE;
|
||||
}
|
||||
|
||||
val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
|
||||
val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS));
|
||||
|
||||
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
if (!val)
|
||||
return IRQ_NONE;
|
||||
@@ -283,7 +281,7 @@ static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data)
|
||||
kbasep_irq_test_data.triggered = 1;
|
||||
wake_up(&kbasep_irq_test_data.wait);
|
||||
|
||||
kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val, NULL);
|
||||
kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
@@ -327,9 +325,9 @@ static int kbasep_common_test_interrupt(
|
||||
}
|
||||
|
||||
/* store old mask */
|
||||
old_mask_val = kbase_reg_read(kbdev, mask_offset, NULL);
|
||||
old_mask_val = kbase_reg_read(kbdev, mask_offset);
|
||||
/* mask interrupts */
|
||||
kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
|
||||
kbase_reg_write(kbdev, mask_offset, 0x0);
|
||||
|
||||
if (kbdev->irqs[tag].irq) {
|
||||
/* release original handler and install test handler */
|
||||
@@ -343,8 +341,8 @@ static int kbasep_common_test_interrupt(
|
||||
kbasep_test_interrupt_timeout;
|
||||
|
||||
/* trigger interrupt */
|
||||
kbase_reg_write(kbdev, mask_offset, 0x1, NULL);
|
||||
kbase_reg_write(kbdev, rawstat_offset, 0x1, NULL);
|
||||
kbase_reg_write(kbdev, mask_offset, 0x1);
|
||||
kbase_reg_write(kbdev, rawstat_offset, 0x1);
|
||||
|
||||
hrtimer_start(&kbasep_irq_test_data.timer,
|
||||
HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT),
|
||||
@@ -366,7 +364,7 @@ static int kbasep_common_test_interrupt(
|
||||
kbasep_irq_test_data.triggered = 0;
|
||||
|
||||
/* mask interrupts */
|
||||
kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
|
||||
kbase_reg_write(kbdev, mask_offset, 0x0);
|
||||
|
||||
/* release test handler */
|
||||
free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag));
|
||||
@@ -382,7 +380,7 @@ static int kbasep_common_test_interrupt(
|
||||
}
|
||||
}
|
||||
/* restore old mask */
|
||||
kbase_reg_write(kbdev, mask_offset, old_mask_val, NULL);
|
||||
kbase_reg_write(kbdev, mask_offset, old_mask_val);
|
||||
|
||||
return err;
|
||||
}
|
||||
@@ -471,4 +469,6 @@ void kbase_synchronize_irqs(struct kbase_device *kbdev)
|
||||
}
|
||||
}
|
||||
|
||||
KBASE_EXPORT_TEST_API(kbase_synchronize_irqs);
|
||||
|
||||
#endif /* !defined(CONFIG_MALI_BIFROST_NO_MALI) */
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -27,32 +27,72 @@
|
||||
#include <mali_kbase.h>
|
||||
#include <mali_kbase_config.h>
|
||||
#include <mali_midg_regmap.h>
|
||||
#if defined(CONFIG_MALI_BIFROST_GATOR_SUPPORT)
|
||||
#include <mali_kbase_gator.h>
|
||||
#endif
|
||||
#include <mali_kbase_tlstream.h>
|
||||
#include <mali_kbase_vinstr.h>
|
||||
#include <mali_kbase_tracepoints.h>
|
||||
#include <mali_kbase_hw.h>
|
||||
#include <mali_kbase_hwaccess_jm.h>
|
||||
#include <mali_kbase_reset_gpu.h>
|
||||
#include <mali_kbase_ctx_sched.h>
|
||||
#include <mali_kbase_hwcnt_context.h>
|
||||
#include <backend/gpu/mali_kbase_device_internal.h>
|
||||
#include <backend/gpu/mali_kbase_irq_internal.h>
|
||||
#include <backend/gpu/mali_kbase_js_affinity.h>
|
||||
#include <backend/gpu/mali_kbase_jm_internal.h>
|
||||
|
||||
#define beenthere(kctx, f, a...) \
|
||||
dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
|
||||
|
||||
#if KBASE_GPU_RESET_EN
|
||||
static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev);
|
||||
static void kbasep_reset_timeout_worker(struct work_struct *data);
|
||||
static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer);
|
||||
#endif /* KBASE_GPU_RESET_EN */
|
||||
static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev);
|
||||
|
||||
static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
|
||||
struct kbase_context *kctx)
|
||||
{
|
||||
return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), kctx);
|
||||
return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT));
|
||||
}
|
||||
|
||||
static u64 kbase_job_write_affinity(struct kbase_device *kbdev,
|
||||
base_jd_core_req core_req,
|
||||
int js)
|
||||
{
|
||||
u64 affinity;
|
||||
|
||||
if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
|
||||
BASE_JD_REQ_T) {
|
||||
/* Tiler-only atom */
|
||||
/* If the hardware supports XAFFINITY then we'll only enable
|
||||
* the tiler (which is the default so this is a no-op),
|
||||
* otherwise enable shader core 0.
|
||||
*/
|
||||
if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
|
||||
affinity = 1;
|
||||
else
|
||||
affinity = 0;
|
||||
} else if ((core_req & (BASE_JD_REQ_COHERENT_GROUP |
|
||||
BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) {
|
||||
unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
|
||||
struct mali_base_gpu_coherent_group_info *coherency_info =
|
||||
&kbdev->gpu_props.props.coherency_info;
|
||||
|
||||
affinity = kbdev->pm.backend.shaders_avail &
|
||||
kbdev->pm.debug_core_mask[js];
|
||||
|
||||
/* JS2 on a dual core group system targets core group 1. All
|
||||
* other cases target core group 0.
|
||||
*/
|
||||
if (js == 2 && num_core_groups > 1)
|
||||
affinity &= coherency_info->group[1].core_mask;
|
||||
else
|
||||
affinity &= coherency_info->group[0].core_mask;
|
||||
} else {
|
||||
/* Use all cores */
|
||||
affinity = kbdev->pm.backend.shaders_avail &
|
||||
kbdev->pm.debug_core_mask[js];
|
||||
}
|
||||
|
||||
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO),
|
||||
affinity & 0xFFFFFFFF);
|
||||
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI),
|
||||
affinity >> 32);
|
||||
|
||||
return affinity;
|
||||
}
|
||||
|
||||
void kbase_job_hw_submit(struct kbase_device *kbdev,
|
||||
@@ -62,6 +102,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
|
||||
struct kbase_context *kctx;
|
||||
u32 cfg;
|
||||
u64 jc_head = katom->jc;
|
||||
u64 affinity;
|
||||
|
||||
KBASE_DEBUG_ASSERT(kbdev);
|
||||
KBASE_DEBUG_ASSERT(katom);
|
||||
@@ -70,20 +111,13 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
|
||||
|
||||
/* Command register must be available */
|
||||
KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx));
|
||||
/* Affinity is not violating */
|
||||
kbase_js_debug_log_current_affinities(kbdev);
|
||||
KBASE_DEBUG_ASSERT(!kbase_js_affinity_would_violate(kbdev, js,
|
||||
katom->affinity));
|
||||
|
||||
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO),
|
||||
jc_head & 0xFFFFFFFF, kctx);
|
||||
jc_head & 0xFFFFFFFF);
|
||||
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI),
|
||||
jc_head >> 32, kctx);
|
||||
jc_head >> 32);
|
||||
|
||||
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO),
|
||||
katom->affinity & 0xFFFFFFFF, kctx);
|
||||
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI),
|
||||
katom->affinity >> 32, kctx);
|
||||
affinity = kbase_job_write_affinity(kbdev, katom->core_req, js);
|
||||
|
||||
/* start MMU, medium priority, cache clean/flush on end, clean/flush on
|
||||
* start */
|
||||
@@ -101,6 +135,8 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
|
||||
if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END) &&
|
||||
!(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET))
|
||||
cfg |= JS_CONFIG_END_FLUSH_NO_ACTION;
|
||||
else if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CLEAN_ONLY_SAFE))
|
||||
cfg |= JS_CONFIG_END_FLUSH_CLEAN;
|
||||
else
|
||||
cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE;
|
||||
|
||||
@@ -127,11 +163,11 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
|
||||
}
|
||||
}
|
||||
|
||||
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg, kctx);
|
||||
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg);
|
||||
|
||||
if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION))
|
||||
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT),
|
||||
katom->flush_id, kctx);
|
||||
katom->flush_id);
|
||||
|
||||
/* Write an approximate start timestamp.
|
||||
* It's approximate because there might be a job in the HEAD register.
|
||||
@@ -139,25 +175,25 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
|
||||
katom->start_timestamp = ktime_get();
|
||||
|
||||
/* GO ! */
|
||||
dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx, affinity=0x%llx",
|
||||
katom, kctx, js, jc_head, katom->affinity);
|
||||
dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx",
|
||||
katom, kctx, js, jc_head);
|
||||
|
||||
KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js,
|
||||
(u32) katom->affinity);
|
||||
(u32)affinity);
|
||||
|
||||
#if defined(CONFIG_MALI_BIFROST_GATOR_SUPPORT)
|
||||
kbase_trace_mali_job_slots_event(
|
||||
GATOR_MAKE_EVENT(GATOR_JOB_SLOT_START, js),
|
||||
kctx, kbase_jd_atom_id(kctx, katom));
|
||||
#endif
|
||||
KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(katom, jc_head,
|
||||
katom->affinity, cfg);
|
||||
KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, kctx,
|
||||
js, kbase_jd_atom_id(kctx, katom), TL_JS_EVENT_START);
|
||||
|
||||
KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(kbdev, katom, jc_head,
|
||||
affinity, cfg);
|
||||
KBASE_TLSTREAM_TL_RET_CTX_LPU(
|
||||
kbdev,
|
||||
kctx,
|
||||
&kbdev->gpu_props.props.raw_props.js_features[
|
||||
katom->slot_nr]);
|
||||
KBASE_TLSTREAM_TL_RET_ATOM_AS(katom, &kbdev->as[kctx->as_nr]);
|
||||
KBASE_TLSTREAM_TL_RET_ATOM_AS(kbdev, katom, &kbdev->as[kctx->as_nr]);
|
||||
KBASE_TLSTREAM_TL_RET_ATOM_LPU(
|
||||
kbdev,
|
||||
katom,
|
||||
&kbdev->gpu_props.props.raw_props.js_features[js],
|
||||
"ctx_nr,atom_nr");
|
||||
@@ -174,10 +210,8 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
|
||||
kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx;
|
||||
}
|
||||
#endif
|
||||
kbase_timeline_job_slot_submit(kbdev, kctx, katom, js);
|
||||
|
||||
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
|
||||
JS_COMMAND_START, katom->kctx);
|
||||
JS_COMMAND_START);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -198,14 +232,12 @@ static void kbasep_job_slot_update_head_start_timestamp(
|
||||
int js,
|
||||
ktime_t end_timestamp)
|
||||
{
|
||||
if (kbase_backend_nr_atoms_on_slot(kbdev, js) > 0) {
|
||||
struct kbase_jd_atom *katom;
|
||||
ktime_t timestamp_diff;
|
||||
/* The atom in the HEAD */
|
||||
katom = kbase_gpu_inspect(kbdev, js, 0);
|
||||
|
||||
KBASE_DEBUG_ASSERT(katom != NULL);
|
||||
ktime_t timestamp_diff;
|
||||
struct kbase_jd_atom *katom;
|
||||
|
||||
/* Checking the HEAD position for the job slot */
|
||||
katom = kbase_gpu_inspect(kbdev, js, 0);
|
||||
if (katom != NULL) {
|
||||
timestamp_diff = ktime_sub(end_timestamp,
|
||||
katom->start_timestamp);
|
||||
if (ktime_to_ns(timestamp_diff) >= 0) {
|
||||
@@ -231,21 +263,23 @@ static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev,
|
||||
int js)
|
||||
{
|
||||
KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(
|
||||
kbdev,
|
||||
&kbdev->gpu_props.props.raw_props.js_features[js]);
|
||||
}
|
||||
|
||||
void kbase_job_done(struct kbase_device *kbdev, u32 done)
|
||||
{
|
||||
unsigned long flags;
|
||||
int i;
|
||||
u32 count = 0;
|
||||
ktime_t end_timestamp = ktime_get();
|
||||
ktime_t end_timestamp;
|
||||
|
||||
lockdep_assert_held(&kbdev->hwaccess_lock);
|
||||
|
||||
KBASE_DEBUG_ASSERT(kbdev);
|
||||
|
||||
KBASE_TRACE_ADD(kbdev, JM_IRQ, NULL, NULL, 0, done);
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
end_timestamp = ktime_get();
|
||||
|
||||
while (done) {
|
||||
u32 failed = done >> 16;
|
||||
@@ -269,16 +303,12 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
|
||||
/* read out the job slot status code if the job
|
||||
* slot reported failure */
|
||||
completion_code = kbase_reg_read(kbdev,
|
||||
JOB_SLOT_REG(i, JS_STATUS), NULL);
|
||||
JOB_SLOT_REG(i, JS_STATUS));
|
||||
|
||||
switch (completion_code) {
|
||||
case BASE_JD_EVENT_STOPPED:
|
||||
#if defined(CONFIG_MALI_BIFROST_GATOR_SUPPORT)
|
||||
kbase_trace_mali_job_slots_event(
|
||||
GATOR_MAKE_EVENT(
|
||||
GATOR_JOB_SLOT_SOFT_STOPPED, i),
|
||||
NULL, 0);
|
||||
#endif
|
||||
if (completion_code == BASE_JD_EVENT_STOPPED) {
|
||||
KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(
|
||||
kbdev, NULL,
|
||||
i, 0, TL_JS_EVENT_SOFT_STOP);
|
||||
|
||||
kbasep_trace_tl_event_lpu_softstop(
|
||||
kbdev, i);
|
||||
@@ -287,37 +317,37 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
|
||||
* JS<n>_TAIL so that the job chain can
|
||||
* be resumed */
|
||||
job_tail = (u64)kbase_reg_read(kbdev,
|
||||
JOB_SLOT_REG(i, JS_TAIL_LO),
|
||||
NULL) |
|
||||
JOB_SLOT_REG(i, JS_TAIL_LO)) |
|
||||
((u64)kbase_reg_read(kbdev,
|
||||
JOB_SLOT_REG(i, JS_TAIL_HI),
|
||||
NULL) << 32);
|
||||
break;
|
||||
case BASE_JD_EVENT_NOT_STARTED:
|
||||
JOB_SLOT_REG(i, JS_TAIL_HI))
|
||||
<< 32);
|
||||
} else if (completion_code ==
|
||||
BASE_JD_EVENT_NOT_STARTED) {
|
||||
/* PRLAM-10673 can cause a TERMINATED
|
||||
* job to come back as NOT_STARTED, but
|
||||
* the error interrupt helps us detect
|
||||
* it */
|
||||
completion_code =
|
||||
BASE_JD_EVENT_TERMINATED;
|
||||
/* fall through */
|
||||
default:
|
||||
dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)",
|
||||
i, completion_code,
|
||||
kbase_exception_name
|
||||
(kbdev,
|
||||
completion_code));
|
||||
}
|
||||
|
||||
kbase_gpu_irq_evict(kbdev, i, completion_code);
|
||||
|
||||
/* Some jobs that encounter a BUS FAULT may result in corrupted
|
||||
* state causing future jobs to hang. Reset GPU before
|
||||
* allowing any other jobs on the slot to continue. */
|
||||
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_3076)) {
|
||||
if (completion_code == BASE_JD_EVENT_JOB_BUS_FAULT) {
|
||||
if (kbase_prepare_to_reset_gpu_locked(kbdev))
|
||||
kbase_reset_gpu_locked(kbdev);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR),
|
||||
done & ((1 << i) | (1 << (i + 16))),
|
||||
NULL);
|
||||
done & ((1 << i) | (1 << (i + 16))));
|
||||
active = kbase_reg_read(kbdev,
|
||||
JOB_CONTROL_REG(JOB_IRQ_JS_STATE),
|
||||
NULL);
|
||||
JOB_CONTROL_REG(JOB_IRQ_JS_STATE));
|
||||
|
||||
if (((active >> i) & 1) == 0 &&
|
||||
(((done >> (i + 16)) & 1) == 0)) {
|
||||
@@ -362,7 +392,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
|
||||
* execution.
|
||||
*/
|
||||
u32 rawstat = kbase_reg_read(kbdev,
|
||||
JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL);
|
||||
JOB_CONTROL_REG(JOB_IRQ_RAWSTAT));
|
||||
|
||||
if ((rawstat >> (i + 16)) & 1) {
|
||||
/* There is a failed job that we've
|
||||
@@ -412,7 +442,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
|
||||
}
|
||||
spurious:
|
||||
done = kbase_reg_read(kbdev,
|
||||
JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL);
|
||||
JOB_CONTROL_REG(JOB_IRQ_RAWSTAT));
|
||||
|
||||
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10883)) {
|
||||
/* Workaround for missing interrupt caused by
|
||||
@@ -420,7 +450,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
|
||||
if (((active >> i) & 1) && (0 ==
|
||||
kbase_reg_read(kbdev,
|
||||
JOB_SLOT_REG(i,
|
||||
JS_STATUS), NULL))) {
|
||||
JS_STATUS)))) {
|
||||
/* Force job slot to be processed again
|
||||
*/
|
||||
done |= (1u << i);
|
||||
@@ -437,20 +467,16 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
|
||||
end_timestamp);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
#if KBASE_GPU_RESET_EN
|
||||
if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
|
||||
KBASE_RESET_GPU_COMMITTED) {
|
||||
/* If we're trying to reset the GPU then we might be able to do
|
||||
* it early (without waiting for a timeout) because some jobs
|
||||
* have completed
|
||||
*/
|
||||
kbasep_try_reset_gpu_early(kbdev);
|
||||
kbasep_try_reset_gpu_early_locked(kbdev);
|
||||
}
|
||||
#endif /* KBASE_GPU_RESET_EN */
|
||||
KBASE_TRACE_ADD(kbdev, JM_IRQ_END, NULL, NULL, 0, count);
|
||||
}
|
||||
KBASE_EXPORT_TEST_API(kbase_job_done);
|
||||
|
||||
static bool kbasep_soft_stop_allowed(struct kbase_device *kbdev,
|
||||
struct kbase_jd_atom *katom)
|
||||
@@ -484,7 +510,6 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
|
||||
base_jd_core_req core_reqs,
|
||||
struct kbase_jd_atom *target_katom)
|
||||
{
|
||||
struct kbase_context *kctx = target_katom->kctx;
|
||||
#if KBASE_TRACE_ENABLE
|
||||
u32 status_reg_before;
|
||||
u64 job_in_head_before;
|
||||
@@ -494,12 +519,11 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
|
||||
|
||||
/* Check the head pointer */
|
||||
job_in_head_before = ((u64) kbase_reg_read(kbdev,
|
||||
JOB_SLOT_REG(js, JS_HEAD_LO), NULL))
|
||||
JOB_SLOT_REG(js, JS_HEAD_LO)))
|
||||
| (((u64) kbase_reg_read(kbdev,
|
||||
JOB_SLOT_REG(js, JS_HEAD_HI), NULL))
|
||||
JOB_SLOT_REG(js, JS_HEAD_HI)))
|
||||
<< 32);
|
||||
status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS),
|
||||
NULL);
|
||||
status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS));
|
||||
#endif
|
||||
|
||||
if (action == JS_COMMAND_SOFT_STOP) {
|
||||
@@ -520,7 +544,7 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
|
||||
target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED;
|
||||
|
||||
/* Mark the point where we issue the soft-stop command */
|
||||
KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(target_katom);
|
||||
KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(kbdev, target_katom);
|
||||
|
||||
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
|
||||
int i;
|
||||
@@ -603,11 +627,10 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
|
||||
}
|
||||
}
|
||||
|
||||
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action, kctx);
|
||||
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action);
|
||||
|
||||
#if KBASE_TRACE_ENABLE
|
||||
status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS),
|
||||
NULL);
|
||||
status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS));
|
||||
if (status_reg_after == BASE_JD_EVENT_ACTIVE) {
|
||||
struct kbase_jd_atom *head;
|
||||
struct kbase_context *head_kctx;
|
||||
@@ -692,32 +715,15 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
|
||||
#endif
|
||||
}
|
||||
|
||||
void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx)
|
||||
void kbase_backend_jm_kill_running_jobs_from_kctx(struct kbase_context *kctx)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct kbase_device *kbdev;
|
||||
struct kbase_device *kbdev = kctx->kbdev;
|
||||
int i;
|
||||
|
||||
KBASE_DEBUG_ASSERT(kctx != NULL);
|
||||
kbdev = kctx->kbdev;
|
||||
KBASE_DEBUG_ASSERT(kbdev != NULL);
|
||||
|
||||
/* Cancel any remaining running jobs for this kctx */
|
||||
mutex_lock(&kctx->jctx.lock);
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
/* Invalidate all jobs in context, to prevent re-submitting */
|
||||
for (i = 0; i < BASE_JD_ATOM_COUNT; i++) {
|
||||
if (!work_pending(&kctx->jctx.atoms[i].work))
|
||||
kctx->jctx.atoms[i].event_code =
|
||||
BASE_JD_EVENT_JOB_CANCELLED;
|
||||
}
|
||||
lockdep_assert_held(&kbdev->hwaccess_lock);
|
||||
|
||||
for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
|
||||
kbase_job_slot_hardstop(kctx, i, NULL);
|
||||
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
mutex_unlock(&kctx->jctx.lock);
|
||||
}
|
||||
|
||||
void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
|
||||
@@ -750,6 +756,7 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
|
||||
if (katom->sched_priority > priority) {
|
||||
if (!stop_sent)
|
||||
KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED(
|
||||
kbdev,
|
||||
target_katom);
|
||||
|
||||
kbase_job_slot_softstop(kbdev, js, katom);
|
||||
@@ -776,7 +783,6 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
|
||||
if (timeout != 0)
|
||||
goto exit;
|
||||
|
||||
#if KBASE_GPU_RESET_EN
|
||||
if (kbase_prepare_to_reset_gpu(kbdev)) {
|
||||
dev_err(kbdev->dev,
|
||||
"Issueing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n",
|
||||
@@ -785,15 +791,7 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
|
||||
}
|
||||
|
||||
/* Wait for the reset to complete */
|
||||
wait_event(kbdev->hwaccess.backend.reset_wait,
|
||||
atomic_read(&kbdev->hwaccess.backend.reset_gpu)
|
||||
== KBASE_RESET_GPU_NOT_PENDING);
|
||||
#else
|
||||
dev_warn(kbdev->dev,
|
||||
"Jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n",
|
||||
ZAP_TIMEOUT);
|
||||
|
||||
#endif
|
||||
kbase_reset_gpu_wait(kbdev);
|
||||
exit:
|
||||
dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx);
|
||||
|
||||
@@ -812,7 +810,7 @@ u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev)
|
||||
mutex_lock(&kbdev->pm.lock);
|
||||
if (kbdev->pm.backend.gpu_powered)
|
||||
flush_id = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(LATEST_FLUSH), NULL);
|
||||
GPU_CONTROL_REG(LATEST_FLUSH));
|
||||
mutex_unlock(&kbdev->pm.lock);
|
||||
}
|
||||
|
||||
@@ -821,21 +819,7 @@ u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev)
|
||||
|
||||
int kbase_job_slot_init(struct kbase_device *kbdev)
|
||||
{
|
||||
#if KBASE_GPU_RESET_EN
|
||||
kbdev->hwaccess.backend.reset_workq = alloc_workqueue(
|
||||
"Mali reset workqueue", 0, 1);
|
||||
if (NULL == kbdev->hwaccess.backend.reset_workq)
|
||||
return -EINVAL;
|
||||
|
||||
INIT_WORK(&kbdev->hwaccess.backend.reset_work,
|
||||
kbasep_reset_timeout_worker);
|
||||
|
||||
hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC,
|
||||
HRTIMER_MODE_REL);
|
||||
kbdev->hwaccess.backend.reset_timer.function =
|
||||
kbasep_reset_timer_callback;
|
||||
#endif
|
||||
|
||||
CSTD_UNUSED(kbdev);
|
||||
return 0;
|
||||
}
|
||||
KBASE_EXPORT_TEST_API(kbase_job_slot_init);
|
||||
@@ -847,13 +831,10 @@ void kbase_job_slot_halt(struct kbase_device *kbdev)
|
||||
|
||||
void kbase_job_slot_term(struct kbase_device *kbdev)
|
||||
{
|
||||
#if KBASE_GPU_RESET_EN
|
||||
destroy_workqueue(kbdev->hwaccess.backend.reset_workq);
|
||||
#endif
|
||||
CSTD_UNUSED(kbdev);
|
||||
}
|
||||
KBASE_EXPORT_TEST_API(kbase_job_slot_term);
|
||||
|
||||
#if KBASE_GPU_RESET_EN
|
||||
/**
|
||||
* kbasep_check_for_afbc_on_slot() - Check whether AFBC is in use on this slot
|
||||
* @kbdev: kbase device pointer
|
||||
@@ -911,7 +892,6 @@ static bool kbasep_check_for_afbc_on_slot(struct kbase_device *kbdev,
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif /* KBASE_GPU_RESET_EN */
|
||||
|
||||
/**
|
||||
* kbase_job_slot_softstop_swflags - Soft-stop a job with flags
|
||||
@@ -968,7 +948,6 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
|
||||
{
|
||||
struct kbase_device *kbdev = kctx->kbdev;
|
||||
bool stopped;
|
||||
#if KBASE_GPU_RESET_EN
|
||||
/* We make the check for AFBC before evicting/stopping atoms. Note
|
||||
* that no other thread can modify the slots whilst we have the
|
||||
* hwaccess_lock. */
|
||||
@@ -976,12 +955,10 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
|
||||
kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3542)
|
||||
&& kbasep_check_for_afbc_on_slot(kbdev, kctx, js,
|
||||
target_katom);
|
||||
#endif
|
||||
|
||||
stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js,
|
||||
target_katom,
|
||||
JS_COMMAND_HARD_STOP);
|
||||
#if KBASE_GPU_RESET_EN
|
||||
if (stopped && (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_8401) ||
|
||||
kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_9510) ||
|
||||
needs_workaround_for_afbc)) {
|
||||
@@ -996,7 +973,6 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
|
||||
kbase_reset_gpu_locked(kbdev);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1061,8 +1037,6 @@ void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#if KBASE_GPU_RESET_EN
|
||||
static void kbase_debug_dump_registers(struct kbase_device *kbdev)
|
||||
{
|
||||
int i;
|
||||
@@ -1071,34 +1045,32 @@ static void kbase_debug_dump_registers(struct kbase_device *kbdev)
|
||||
|
||||
dev_err(kbdev->dev, "Register state:");
|
||||
dev_err(kbdev->dev, " GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x",
|
||||
kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL),
|
||||
kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL));
|
||||
kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)),
|
||||
kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)));
|
||||
dev_err(kbdev->dev, " JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x",
|
||||
kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL),
|
||||
kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE), NULL));
|
||||
kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)),
|
||||
kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE)));
|
||||
for (i = 0; i < 3; i++) {
|
||||
dev_err(kbdev->dev, " JS%d_STATUS=0x%08x JS%d_HEAD_LO=0x%08x",
|
||||
i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS),
|
||||
NULL),
|
||||
i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO),
|
||||
NULL));
|
||||
i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS)),
|
||||
i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO)));
|
||||
}
|
||||
dev_err(kbdev->dev, " MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x",
|
||||
kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT), NULL),
|
||||
kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL));
|
||||
kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT)),
|
||||
kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS)));
|
||||
dev_err(kbdev->dev, " GPU_IRQ_MASK=0x%08x JOB_IRQ_MASK=0x%08x MMU_IRQ_MASK=0x%08x",
|
||||
kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL),
|
||||
kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), NULL),
|
||||
kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL));
|
||||
kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)),
|
||||
kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)),
|
||||
kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)));
|
||||
dev_err(kbdev->dev, " PWR_OVERRIDE0=0x%08x PWR_OVERRIDE1=0x%08x",
|
||||
kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0), NULL),
|
||||
kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), NULL));
|
||||
kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0)),
|
||||
kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1)));
|
||||
dev_err(kbdev->dev, " SHADER_CONFIG=0x%08x L2_MMU_CONFIG=0x%08x",
|
||||
kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), NULL),
|
||||
kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), NULL));
|
||||
kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG)),
|
||||
kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG)));
|
||||
dev_err(kbdev->dev, " TILER_CONFIG=0x%08x JM_CONFIG=0x%08x",
|
||||
kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG), NULL),
|
||||
kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG), NULL));
|
||||
kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG)),
|
||||
kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG)));
|
||||
}
|
||||
|
||||
static void kbasep_reset_timeout_worker(struct work_struct *data)
|
||||
@@ -1107,7 +1079,6 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
|
||||
struct kbase_device *kbdev;
|
||||
ktime_t end_timestamp = ktime_get();
|
||||
struct kbasep_js_device_data *js_devdata;
|
||||
bool try_schedule = false;
|
||||
bool silent = false;
|
||||
u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
|
||||
|
||||
@@ -1125,9 +1096,10 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
|
||||
|
||||
KBASE_TRACE_ADD(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0);
|
||||
|
||||
/* Suspend vinstr.
|
||||
* This call will block until vinstr is suspended. */
|
||||
kbase_vinstr_suspend(kbdev->vinstr_ctx);
|
||||
/* Disable GPU hardware counters.
|
||||
* This call will block until counters are disabled.
|
||||
*/
|
||||
kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
|
||||
|
||||
/* Make sure the timer has completed - this cannot be done from
|
||||
* interrupt context, so this cannot be done within
|
||||
@@ -1142,15 +1114,18 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
|
||||
KBASE_RESET_GPU_NOT_PENDING);
|
||||
kbase_disjoint_state_down(kbdev);
|
||||
wake_up(&kbdev->hwaccess.backend.reset_wait);
|
||||
kbase_vinstr_resume(kbdev->vinstr_ctx);
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
return;
|
||||
}
|
||||
|
||||
KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false);
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
|
||||
spin_lock(&kbdev->hwaccess_lock);
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
spin_lock(&kbdev->mmu_mask_change);
|
||||
kbase_pm_reset_start_locked(kbdev);
|
||||
|
||||
/* We're about to flush out the IRQs and their bottom half's */
|
||||
kbdev->irq_reset_flush = true;
|
||||
|
||||
@@ -1159,8 +1134,7 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
|
||||
kbase_pm_disable_interrupts_nolock(kbdev);
|
||||
|
||||
spin_unlock(&kbdev->mmu_mask_change);
|
||||
spin_unlock(&kbdev->hwaccess_lock);
|
||||
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
/* Ensure that any IRQ handlers have finished
|
||||
* Must be done without any locks IRQ handlers will take */
|
||||
@@ -1203,7 +1177,8 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
|
||||
/* Complete any jobs that were still on the GPU */
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
kbdev->protected_mode = false;
|
||||
kbase_backend_reset(kbdev, &end_timestamp);
|
||||
if (!kbdev->pm.backend.protected_entry_transition_override)
|
||||
kbase_backend_reset(kbdev, &end_timestamp);
|
||||
kbase_pm_metrics_update(kbdev, NULL);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
@@ -1222,37 +1197,33 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
|
||||
|
||||
kbase_pm_enable_interrupts(kbdev);
|
||||
|
||||
atomic_set(&kbdev->hwaccess.backend.reset_gpu,
|
||||
KBASE_RESET_GPU_NOT_PENDING);
|
||||
|
||||
kbase_disjoint_state_down(kbdev);
|
||||
|
||||
wake_up(&kbdev->hwaccess.backend.reset_wait);
|
||||
if (!silent)
|
||||
dev_err(kbdev->dev, "Reset complete");
|
||||
|
||||
if (js_devdata->nr_contexts_pullable > 0 && !kbdev->poweroff_pending)
|
||||
try_schedule = true;
|
||||
|
||||
mutex_unlock(&js_devdata->runpool_mutex);
|
||||
|
||||
mutex_lock(&kbdev->pm.lock);
|
||||
|
||||
kbase_pm_reset_complete(kbdev);
|
||||
|
||||
/* Find out what cores are required now */
|
||||
kbase_pm_update_cores_state(kbdev);
|
||||
|
||||
/* Synchronously request and wait for those cores, because if
|
||||
* instrumentation is enabled it would need them immediately. */
|
||||
kbase_pm_check_transitions_sync(kbdev);
|
||||
kbase_pm_wait_for_desired_state(kbdev);
|
||||
|
||||
mutex_unlock(&kbdev->pm.lock);
|
||||
|
||||
atomic_set(&kbdev->hwaccess.backend.reset_gpu,
|
||||
KBASE_RESET_GPU_NOT_PENDING);
|
||||
|
||||
wake_up(&kbdev->hwaccess.backend.reset_wait);
|
||||
if (!silent)
|
||||
dev_err(kbdev->dev, "Reset complete");
|
||||
|
||||
/* Try submitting some jobs to restart processing */
|
||||
if (try_schedule) {
|
||||
KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u,
|
||||
0);
|
||||
kbase_js_sched_all(kbdev);
|
||||
}
|
||||
KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u, 0);
|
||||
kbase_js_sched_all(kbdev);
|
||||
|
||||
/* Process any pending slot updates */
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
@@ -1261,8 +1232,10 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
|
||||
|
||||
kbase_pm_context_idle(kbdev);
|
||||
|
||||
/* Release vinstr */
|
||||
kbase_vinstr_resume(kbdev->vinstr_ctx);
|
||||
/* Re-enable GPU hardware counters */
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
KBASE_TRACE_ADD(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0);
|
||||
}
|
||||
@@ -1308,7 +1281,7 @@ static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev)
|
||||
/* To prevent getting incorrect registers when dumping failed job,
|
||||
* skip early reset.
|
||||
*/
|
||||
if (kbdev->job_fault_debug != false)
|
||||
if (atomic_read(&kbdev->job_fault_debug) > 0)
|
||||
return;
|
||||
|
||||
/* Check that the reset has been committed to (i.e. kbase_reset_gpu has
|
||||
@@ -1436,23 +1409,25 @@ void kbase_reset_gpu_locked(struct kbase_device *kbdev)
|
||||
kbasep_try_reset_gpu_early_locked(kbdev);
|
||||
}
|
||||
|
||||
void kbase_reset_gpu_silent(struct kbase_device *kbdev)
|
||||
int kbase_reset_gpu_silent(struct kbase_device *kbdev)
|
||||
{
|
||||
if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
|
||||
KBASE_RESET_GPU_NOT_PENDING,
|
||||
KBASE_RESET_GPU_SILENT) !=
|
||||
KBASE_RESET_GPU_NOT_PENDING) {
|
||||
/* Some other thread is already resetting the GPU */
|
||||
return;
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
kbase_disjoint_state_up(kbdev);
|
||||
|
||||
queue_work(kbdev->hwaccess.backend.reset_workq,
|
||||
&kbdev->hwaccess.backend.reset_work);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool kbase_reset_gpu_active(struct kbase_device *kbdev)
|
||||
bool kbase_reset_gpu_is_active(struct kbase_device *kbdev)
|
||||
{
|
||||
if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
|
||||
KBASE_RESET_GPU_NOT_PENDING)
|
||||
@@ -1460,4 +1435,37 @@ bool kbase_reset_gpu_active(struct kbase_device *kbdev)
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif /* KBASE_GPU_RESET_EN */
|
||||
|
||||
int kbase_reset_gpu_wait(struct kbase_device *kbdev)
|
||||
{
|
||||
wait_event(kbdev->hwaccess.backend.reset_wait,
|
||||
atomic_read(&kbdev->hwaccess.backend.reset_gpu)
|
||||
== KBASE_RESET_GPU_NOT_PENDING);
|
||||
|
||||
return 0;
|
||||
}
|
||||
KBASE_EXPORT_TEST_API(kbase_reset_gpu_wait);
|
||||
|
||||
int kbase_reset_gpu_init(struct kbase_device *kbdev)
|
||||
{
|
||||
kbdev->hwaccess.backend.reset_workq = alloc_workqueue(
|
||||
"Mali reset workqueue", 0, 1);
|
||||
if (kbdev->hwaccess.backend.reset_workq == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
INIT_WORK(&kbdev->hwaccess.backend.reset_work,
|
||||
kbasep_reset_timeout_worker);
|
||||
|
||||
hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC,
|
||||
HRTIMER_MODE_REL);
|
||||
kbdev->hwaccess.backend.reset_timer.function =
|
||||
kbasep_reset_timer_callback;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kbase_reset_gpu_term(struct kbase_device *kbdev)
|
||||
{
|
||||
destroy_workqueue(kbdev->hwaccess.backend.reset_workq);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -159,11 +159,11 @@ void kbase_job_slot_halt(struct kbase_device *kbdev);
|
||||
void kbase_job_slot_term(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_gpu_cacheclean - Cause a GPU cache clean & flush
|
||||
* kbase_gpu_cache_clean - Cause a GPU cache clean & flush
|
||||
* @kbdev: Device pointer
|
||||
*
|
||||
* Caller must not be in IRQ context
|
||||
*/
|
||||
void kbase_gpu_cacheclean(struct kbase_device *kbdev);
|
||||
void kbase_gpu_cache_clean(struct kbase_device *kbdev);
|
||||
|
||||
#endif /* _KBASE_JM_HWACCESS_H_ */
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,308 +0,0 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU licence.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, you can access it online at
|
||||
* http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Base kernel affinity manager APIs
|
||||
*/
|
||||
|
||||
#include <mali_kbase.h>
|
||||
#include "mali_kbase_js_affinity.h"
|
||||
#include "mali_kbase_hw.h"
|
||||
|
||||
#include <backend/gpu/mali_kbase_pm_internal.h>
|
||||
|
||||
|
||||
bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev,
|
||||
int js)
|
||||
{
|
||||
/*
|
||||
* Here are the reasons for using job slot 2:
|
||||
* - BASE_HW_ISSUE_8987 (which is entirely used for that purpose)
|
||||
* - In absence of the above, then:
|
||||
* - Atoms with BASE_JD_REQ_COHERENT_GROUP
|
||||
* - But, only when there aren't contexts with
|
||||
* KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, because the atoms that run on
|
||||
* all cores on slot 1 could be blocked by those using a coherent group
|
||||
* on slot 2
|
||||
* - And, only when you actually have 2 or more coregroups - if you
|
||||
* only have 1 coregroup, then having jobs for slot 2 implies they'd
|
||||
* also be for slot 1, meaning you'll get interference from them. Jobs
|
||||
* able to run on slot 2 could also block jobs that can only run on
|
||||
* slot 1 (tiler jobs)
|
||||
*/
|
||||
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
|
||||
return true;
|
||||
|
||||
if (js != 2)
|
||||
return true;
|
||||
|
||||
/* Only deal with js==2 now: */
|
||||
if (kbdev->gpu_props.num_core_groups > 1) {
|
||||
/* Only use slot 2 in the 2+ coregroup case */
|
||||
if (kbasep_js_ctx_attr_is_attr_on_runpool(kbdev,
|
||||
KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES) ==
|
||||
false) {
|
||||
/* ...But only when we *don't* have atoms that run on
|
||||
* all cores */
|
||||
|
||||
/* No specific check for BASE_JD_REQ_COHERENT_GROUP
|
||||
* atoms - the policy will sort that out */
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/* Above checks failed mean we shouldn't use slot 2 */
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* As long as it has been decided to have a deeper modification of
|
||||
* what job scheduler, power manager and affinity manager will
|
||||
* implement, this function is just an intermediate step that
|
||||
* assumes:
|
||||
* - all working cores will be powered on when this is called.
|
||||
* - largest current configuration is 2 core groups.
|
||||
* - It has been decided not to have hardcoded values so the low
|
||||
* and high cores in a core split will be evently distributed.
|
||||
* - Odd combinations of core requirements have been filtered out
|
||||
* and do not get to this function (e.g. CS+T+NSS is not
|
||||
* supported here).
|
||||
* - This function is frequently called and can be optimized,
|
||||
* (see notes in loops), but as the functionallity will likely
|
||||
* be modified, optimization has not been addressed.
|
||||
*/
|
||||
bool kbase_js_choose_affinity(u64 * const affinity,
|
||||
struct kbase_device *kbdev,
|
||||
struct kbase_jd_atom *katom, int js)
|
||||
{
|
||||
base_jd_core_req core_req = katom->core_req;
|
||||
unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
|
||||
u64 core_availability_mask;
|
||||
|
||||
lockdep_assert_held(&kbdev->hwaccess_lock);
|
||||
|
||||
core_availability_mask = kbase_pm_ca_get_core_mask(kbdev);
|
||||
|
||||
/*
|
||||
* If no cores are currently available (core availability policy is
|
||||
* transitioning) then fail.
|
||||
*/
|
||||
if (0 == core_availability_mask) {
|
||||
*affinity = 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
KBASE_DEBUG_ASSERT(js >= 0);
|
||||
|
||||
if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
|
||||
BASE_JD_REQ_T) {
|
||||
/* If the hardware supports XAFFINITY then we'll only enable
|
||||
* the tiler (which is the default so this is a no-op),
|
||||
* otherwise enable shader core 0. */
|
||||
if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
|
||||
*affinity = 1;
|
||||
else
|
||||
*affinity = 0;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
if (1 == kbdev->gpu_props.num_cores) {
|
||||
/* trivial case only one core, nothing to do */
|
||||
*affinity = core_availability_mask &
|
||||
kbdev->pm.debug_core_mask[js];
|
||||
} else {
|
||||
if ((core_req & (BASE_JD_REQ_COHERENT_GROUP |
|
||||
BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) {
|
||||
if (js == 0 || num_core_groups == 1) {
|
||||
/* js[0] and single-core-group systems just get
|
||||
* the first core group */
|
||||
*affinity =
|
||||
kbdev->gpu_props.props.coherency_info.group[0].core_mask
|
||||
& core_availability_mask &
|
||||
kbdev->pm.debug_core_mask[js];
|
||||
} else {
|
||||
/* js[1], js[2] use core groups 0, 1 for
|
||||
* dual-core-group systems */
|
||||
u32 core_group_idx = ((u32) js) - 1;
|
||||
|
||||
KBASE_DEBUG_ASSERT(core_group_idx <
|
||||
num_core_groups);
|
||||
*affinity =
|
||||
kbdev->gpu_props.props.coherency_info.group[core_group_idx].core_mask
|
||||
& core_availability_mask &
|
||||
kbdev->pm.debug_core_mask[js];
|
||||
|
||||
/* If the job is specifically targeting core
|
||||
* group 1 and the core availability policy is
|
||||
* keeping that core group off, then fail */
|
||||
if (*affinity == 0 && core_group_idx == 1 &&
|
||||
kbdev->pm.backend.cg1_disabled
|
||||
== true)
|
||||
katom->event_code =
|
||||
BASE_JD_EVENT_PM_EVENT;
|
||||
}
|
||||
} else {
|
||||
/* All cores are available when no core split is
|
||||
* required */
|
||||
*affinity = core_availability_mask &
|
||||
kbdev->pm.debug_core_mask[js];
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If no cores are currently available in the desired core group(s)
|
||||
* (core availability policy is transitioning) then fail.
|
||||
*/
|
||||
if (*affinity == 0)
|
||||
return false;
|
||||
|
||||
/* Enable core 0 if tiler required for hardware without XAFFINITY
|
||||
* support (notes above) */
|
||||
if (core_req & BASE_JD_REQ_T) {
|
||||
if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
|
||||
*affinity = *affinity | 1;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool kbase_js_affinity_is_violating(
|
||||
struct kbase_device *kbdev,
|
||||
u64 *affinities)
|
||||
{
|
||||
/* This implementation checks whether the two slots involved in Generic
|
||||
* thread creation have intersecting affinity. This is due to micro-
|
||||
* architectural issues where a job in slot A targetting cores used by
|
||||
* slot B could prevent the job in slot B from making progress until the
|
||||
* job in slot A has completed.
|
||||
*/
|
||||
u64 affinity_set_left;
|
||||
u64 affinity_set_right;
|
||||
u64 intersection;
|
||||
|
||||
KBASE_DEBUG_ASSERT(affinities != NULL);
|
||||
|
||||
affinity_set_left = affinities[1];
|
||||
|
||||
affinity_set_right = affinities[2];
|
||||
|
||||
/* A violation occurs when any bit in the left_set is also in the
|
||||
* right_set */
|
||||
intersection = affinity_set_left & affinity_set_right;
|
||||
|
||||
return (bool) (intersection != (u64) 0u);
|
||||
}
|
||||
|
||||
bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
|
||||
u64 affinity)
|
||||
{
|
||||
struct kbasep_js_device_data *js_devdata;
|
||||
u64 new_affinities[BASE_JM_MAX_NR_SLOTS];
|
||||
|
||||
KBASE_DEBUG_ASSERT(kbdev != NULL);
|
||||
KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
|
||||
js_devdata = &kbdev->js_data;
|
||||
|
||||
memcpy(new_affinities, js_devdata->runpool_irq.slot_affinities,
|
||||
sizeof(js_devdata->runpool_irq.slot_affinities));
|
||||
|
||||
new_affinities[js] |= affinity;
|
||||
|
||||
return kbase_js_affinity_is_violating(kbdev, new_affinities);
|
||||
}
|
||||
|
||||
void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
|
||||
u64 affinity)
|
||||
{
|
||||
struct kbasep_js_device_data *js_devdata;
|
||||
u64 cores;
|
||||
|
||||
KBASE_DEBUG_ASSERT(kbdev != NULL);
|
||||
KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
|
||||
js_devdata = &kbdev->js_data;
|
||||
|
||||
KBASE_DEBUG_ASSERT(kbase_js_affinity_would_violate(kbdev, js, affinity)
|
||||
== false);
|
||||
|
||||
cores = affinity;
|
||||
while (cores) {
|
||||
int bitnum = fls64(cores) - 1;
|
||||
u64 bit = 1ULL << bitnum;
|
||||
s8 cnt;
|
||||
|
||||
cnt =
|
||||
++(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
|
||||
|
||||
if (cnt == 1)
|
||||
js_devdata->runpool_irq.slot_affinities[js] |= bit;
|
||||
|
||||
cores &= ~bit;
|
||||
}
|
||||
}
|
||||
|
||||
void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
|
||||
u64 affinity)
|
||||
{
|
||||
struct kbasep_js_device_data *js_devdata;
|
||||
u64 cores;
|
||||
|
||||
KBASE_DEBUG_ASSERT(kbdev != NULL);
|
||||
KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
|
||||
js_devdata = &kbdev->js_data;
|
||||
|
||||
cores = affinity;
|
||||
while (cores) {
|
||||
int bitnum = fls64(cores) - 1;
|
||||
u64 bit = 1ULL << bitnum;
|
||||
s8 cnt;
|
||||
|
||||
KBASE_DEBUG_ASSERT(
|
||||
js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] > 0);
|
||||
|
||||
cnt =
|
||||
--(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
|
||||
|
||||
if (0 == cnt)
|
||||
js_devdata->runpool_irq.slot_affinities[js] &= ~bit;
|
||||
|
||||
cores &= ~bit;
|
||||
}
|
||||
}
|
||||
|
||||
#if KBASE_TRACE_ENABLE
|
||||
void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
|
||||
{
|
||||
struct kbasep_js_device_data *js_devdata;
|
||||
int slot_nr;
|
||||
|
||||
KBASE_DEBUG_ASSERT(kbdev != NULL);
|
||||
js_devdata = &kbdev->js_data;
|
||||
|
||||
for (slot_nr = 0; slot_nr < 3; ++slot_nr)
|
||||
KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_AFFINITY_CURRENT, NULL,
|
||||
NULL, 0u, slot_nr,
|
||||
(u32) js_devdata->runpool_irq.slot_affinities[slot_nr]);
|
||||
}
|
||||
#endif /* KBASE_TRACE_ENABLE */
|
||||
@@ -1,134 +0,0 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU licence.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, you can access it online at
|
||||
* http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Affinity Manager internal APIs.
|
||||
*/
|
||||
|
||||
#ifndef _KBASE_JS_AFFINITY_H_
|
||||
#define _KBASE_JS_AFFINITY_H_
|
||||
|
||||
/**
|
||||
* kbase_js_can_run_job_on_slot_no_lock - Decide whether it is possible to
|
||||
* submit a job to a particular job slot in the current status
|
||||
*
|
||||
* @kbdev: The kbase device structure of the device
|
||||
* @js: Job slot number to check for allowance
|
||||
*
|
||||
* Will check if submitting to the given job slot is allowed in the current
|
||||
* status. For example using job slot 2 while in soft-stoppable state and only
|
||||
* having 1 coregroup is not allowed by the policy. This function should be
|
||||
* called prior to submitting a job to a slot to make sure policy rules are not
|
||||
* violated.
|
||||
*
|
||||
* The following locking conditions are made on the caller
|
||||
* - it must hold hwaccess_lock
|
||||
*/
|
||||
bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, int js);
|
||||
|
||||
/**
|
||||
* kbase_js_choose_affinity - Compute affinity for a given job.
|
||||
*
|
||||
* @affinity: Affinity bitmap computed
|
||||
* @kbdev: The kbase device structure of the device
|
||||
* @katom: Job chain of which affinity is going to be found
|
||||
* @js: Slot the job chain is being submitted
|
||||
*
|
||||
* Currently assumes an all-on/all-off power management policy.
|
||||
* Also assumes there is at least one core with tiler available.
|
||||
*
|
||||
* Returns true if a valid affinity was chosen, false if
|
||||
* no cores were available.
|
||||
*/
|
||||
bool kbase_js_choose_affinity(u64 * const affinity,
|
||||
struct kbase_device *kbdev,
|
||||
struct kbase_jd_atom *katom,
|
||||
int js);
|
||||
|
||||
/**
|
||||
* kbase_js_affinity_would_violate - Determine whether a proposed affinity on
|
||||
* job slot @js would cause a violation of affinity restrictions.
|
||||
*
|
||||
* @kbdev: Kbase device structure
|
||||
* @js: The job slot to test
|
||||
* @affinity: The affinity mask to test
|
||||
*
|
||||
* The following locks must be held by the caller
|
||||
* - hwaccess_lock
|
||||
*
|
||||
* Return: true if the affinity would violate the restrictions
|
||||
*/
|
||||
bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
|
||||
u64 affinity);
|
||||
|
||||
/**
|
||||
* kbase_js_affinity_retain_slot_cores - Affinity tracking: retain cores used by
|
||||
* a slot
|
||||
*
|
||||
* @kbdev: Kbase device structure
|
||||
* @js: The job slot retaining the cores
|
||||
* @affinity: The cores to retain
|
||||
*
|
||||
* The following locks must be held by the caller
|
||||
* - hwaccess_lock
|
||||
*/
|
||||
void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
|
||||
u64 affinity);
|
||||
|
||||
/**
|
||||
* kbase_js_affinity_release_slot_cores - Affinity tracking: release cores used
|
||||
* by a slot
|
||||
*
|
||||
* @kbdev: Kbase device structure
|
||||
* @js: Job slot
|
||||
* @affinity: Bit mask of core to be released
|
||||
*
|
||||
* Cores must be released as soon as a job is dequeued from a slot's 'submit
|
||||
* slots', and before another job is submitted to those slots. Otherwise, the
|
||||
* refcount could exceed the maximum number submittable to a slot,
|
||||
* %BASE_JM_SUBMIT_SLOTS.
|
||||
*
|
||||
* The following locks must be held by the caller
|
||||
* - hwaccess_lock
|
||||
*/
|
||||
void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
|
||||
u64 affinity);
|
||||
|
||||
/**
|
||||
* kbase_js_debug_log_current_affinities - log the current affinities
|
||||
*
|
||||
* @kbdev: Kbase device structure
|
||||
*
|
||||
* Output to the Trace log the current tracked affinities on all slots
|
||||
*/
|
||||
#if KBASE_TRACE_ENABLE
|
||||
void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev);
|
||||
#else /* KBASE_TRACE_ENABLE */
|
||||
static inline void
|
||||
kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
|
||||
{
|
||||
}
|
||||
#endif /* KBASE_TRACE_ENABLE */
|
||||
|
||||
#endif /* _KBASE_JS_AFFINITY_H_ */
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -27,6 +27,7 @@
|
||||
|
||||
#include <mali_kbase.h>
|
||||
#include <mali_kbase_hwaccess_jm.h>
|
||||
#include <mali_kbase_reset_gpu.h>
|
||||
#include <backend/gpu/mali_kbase_jm_internal.h>
|
||||
#include <backend/gpu/mali_kbase_js_internal.h>
|
||||
|
||||
@@ -116,7 +117,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
|
||||
if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) {
|
||||
u32 ticks = atom->ticks++;
|
||||
|
||||
#ifndef CONFIG_MALI_JOB_DUMP
|
||||
#if !defined(CONFIG_MALI_JOB_DUMP) && !defined(CONFIG_MALI_VECTOR_DUMP)
|
||||
u32 soft_stop_ticks, hard_stop_ticks,
|
||||
gpu_reset_ticks;
|
||||
if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
|
||||
@@ -250,14 +251,12 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
|
||||
}
|
||||
}
|
||||
}
|
||||
#if KBASE_GPU_RESET_EN
|
||||
if (reset_needed) {
|
||||
dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issueing GPU soft-reset to resolve.");
|
||||
|
||||
if (kbase_prepare_to_reset_gpu_locked(kbdev))
|
||||
kbase_reset_gpu_locked(kbdev);
|
||||
}
|
||||
#endif /* KBASE_GPU_RESET_EN */
|
||||
/* the timer is re-issued if there is contexts in the run-pool */
|
||||
|
||||
if (backend->timer_running)
|
||||
|
||||
118
drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.c
Normal file
118
drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.c
Normal file
@@ -0,0 +1,118 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU licence.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, you can access it online at
|
||||
* http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include <mali_kbase.h>
|
||||
#include <mali_kbase_bits.h>
|
||||
#include <mali_kbase_config_defaults.h>
|
||||
#include <backend/gpu/mali_kbase_device_internal.h>
|
||||
#include "mali_kbase_l2_mmu_config.h"
|
||||
|
||||
/**
|
||||
* struct l2_mmu_config_limit_region
|
||||
*
|
||||
* @value: The default value to load into the L2_MMU_CONFIG register
|
||||
* @mask: The shifted mask of the field in the L2_MMU_CONFIG register
|
||||
* @shift: The shift of where the field starts in the L2_MMU_CONFIG register
|
||||
* This should be the same value as the smaller of the two mask
|
||||
* values
|
||||
*/
|
||||
struct l2_mmu_config_limit_region {
|
||||
u32 value, mask, shift;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct l2_mmu_config_limit
|
||||
*
|
||||
* @product_model: The GPU for which this entry applies
|
||||
* @read: Values for the read limit field
|
||||
* @write: Values for the write limit field
|
||||
*/
|
||||
struct l2_mmu_config_limit {
|
||||
u32 product_model;
|
||||
struct l2_mmu_config_limit_region read;
|
||||
struct l2_mmu_config_limit_region write;
|
||||
};
|
||||
|
||||
/*
|
||||
* Zero represents no limit
|
||||
*
|
||||
* For LBEX TBEX TTRX and TNAX:
|
||||
* The value represents the number of outstanding reads (6 bits) or writes (5 bits)
|
||||
*
|
||||
* For all other GPUS it is a fraction see: mali_kbase_config_defaults.h
|
||||
*/
|
||||
static const struct l2_mmu_config_limit limits[] = {
|
||||
/* GPU read write */
|
||||
{GPU_ID2_PRODUCT_LBEX, {0, GENMASK(10, 5), 5}, {0, GENMASK(16, 12), 12} },
|
||||
{GPU_ID2_PRODUCT_TBEX, {0, GENMASK(10, 5), 5}, {0, GENMASK(16, 12), 12} },
|
||||
{GPU_ID2_PRODUCT_TTRX, {0, GENMASK(12, 7), 7}, {0, GENMASK(17, 13), 13} },
|
||||
{GPU_ID2_PRODUCT_TNAX, {0, GENMASK(12, 7), 7}, {0, GENMASK(17, 13), 13} },
|
||||
{GPU_ID2_PRODUCT_TGOX,
|
||||
{KBASE_3BIT_AID_32, GENMASK(14, 12), 12},
|
||||
{KBASE_3BIT_AID_32, GENMASK(17, 15), 15} },
|
||||
{GPU_ID2_PRODUCT_TNOX,
|
||||
{KBASE_3BIT_AID_32, GENMASK(14, 12), 12},
|
||||
{KBASE_3BIT_AID_32, GENMASK(17, 15), 15} },
|
||||
};
|
||||
|
||||
void kbase_set_mmu_quirks(struct kbase_device *kbdev)
|
||||
{
|
||||
/* All older GPUs had 2 bits for both fields, this is a default */
|
||||
struct l2_mmu_config_limit limit = {
|
||||
0, /* Any GPU not in the limits array defined above */
|
||||
{KBASE_AID_32, GENMASK(25, 24), 24},
|
||||
{KBASE_AID_32, GENMASK(27, 26), 26}
|
||||
};
|
||||
u32 product_model, gpu_id;
|
||||
u32 mmu_config;
|
||||
int i;
|
||||
|
||||
gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
|
||||
product_model = gpu_id & GPU_ID2_PRODUCT_MODEL;
|
||||
|
||||
/* Limit the GPU bus bandwidth if the platform needs this. */
|
||||
for (i = 0; i < ARRAY_SIZE(limits); i++) {
|
||||
if (product_model == limits[i].product_model) {
|
||||
limit = limits[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
mmu_config = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG));
|
||||
|
||||
mmu_config &= ~(limit.read.mask | limit.write.mask);
|
||||
/* Can't use FIELD_PREP() macro here as the mask isn't constant */
|
||||
mmu_config |= (limit.read.value << limit.read.shift) |
|
||||
(limit.write.value << limit.write.shift);
|
||||
|
||||
kbdev->hw_quirks_mmu = mmu_config;
|
||||
|
||||
if (kbdev->system_coherency == COHERENCY_ACE) {
|
||||
/* Allow memory configuration disparity to be ignored,
|
||||
* we optimize the use of shared memory and thus we
|
||||
* expect some disparity in the memory configuration.
|
||||
*/
|
||||
kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU licence.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, you can access it online at
|
||||
* http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
*//* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU licence.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _KBASE_L2_MMU_CONFIG_H_
|
||||
#define _KBASE_L2_MMU_CONFIG_H_
|
||||
/**
|
||||
* kbase_set_mmu_quirks - Set the hw_quirks_mmu field of kbdev
|
||||
*
|
||||
* @kbdev: The kbase device structure for the device (must be a valid pointer)
|
||||
*
|
||||
* Use this function to initialise the hw_quirks_mmu field, for instance to set
|
||||
* the MAX_READS and MAX_WRITES to sane defaults for each GPU.
|
||||
*/
|
||||
void kbase_set_mmu_quirks(struct kbase_device *kbdev);
|
||||
|
||||
#endif /* _KBASE_L2_MMU_CONFIG_H */
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -25,7 +25,7 @@
|
||||
#include <mali_kbase.h>
|
||||
#include <mali_kbase_mem.h>
|
||||
#include <mali_kbase_mmu_hw.h>
|
||||
#include <mali_kbase_tlstream.h>
|
||||
#include <mali_kbase_tracepoints.h>
|
||||
#include <backend/gpu/mali_kbase_device_internal.h>
|
||||
#include <mali_kbase_as_fault_debugfs.h>
|
||||
|
||||
@@ -66,44 +66,41 @@ static inline u64 lock_region(struct kbase_device *kbdev, u64 pfn,
|
||||
}
|
||||
|
||||
static int wait_ready(struct kbase_device *kbdev,
|
||||
unsigned int as_nr, struct kbase_context *kctx)
|
||||
unsigned int as_nr)
|
||||
{
|
||||
unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
|
||||
u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx);
|
||||
u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS));
|
||||
|
||||
/* Wait for the MMU status to indicate there is no active command, in
|
||||
* case one is pending. Do not log remaining register accesses. */
|
||||
while (--max_loops && (val & AS_STATUS_AS_ACTIVE))
|
||||
val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), NULL);
|
||||
val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS));
|
||||
|
||||
if (max_loops == 0) {
|
||||
dev_err(kbdev->dev, "AS_ACTIVE bit stuck\n");
|
||||
dev_err(kbdev->dev, "AS_ACTIVE bit stuck, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* If waiting in loop was performed, log last read value. */
|
||||
if (KBASE_AS_INACTIVE_MAX_LOOPS - 1 > max_loops)
|
||||
kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx);
|
||||
kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd,
|
||||
struct kbase_context *kctx)
|
||||
static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd)
|
||||
{
|
||||
int status;
|
||||
|
||||
/* write AS_COMMAND when MMU is ready to accept another command */
|
||||
status = wait_ready(kbdev, as_nr, kctx);
|
||||
status = wait_ready(kbdev, as_nr);
|
||||
if (status == 0)
|
||||
kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd,
|
||||
kctx);
|
||||
kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static void validate_protected_page_fault(struct kbase_device *kbdev,
|
||||
struct kbase_context *kctx)
|
||||
static void validate_protected_page_fault(struct kbase_device *kbdev)
|
||||
{
|
||||
/* GPUs which support (native) protected mode shall not report page
|
||||
* fault addresses unless it has protected debug mode and protected
|
||||
@@ -115,8 +112,7 @@ static void validate_protected_page_fault(struct kbase_device *kbdev,
|
||||
|
||||
if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
|
||||
protected_debug_mode = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(GPU_STATUS),
|
||||
kctx) & GPU_DBGEN;
|
||||
GPU_CONTROL_REG(GPU_STATUS)) & GPU_DBGEN;
|
||||
}
|
||||
|
||||
if (!protected_debug_mode) {
|
||||
@@ -145,15 +141,16 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
|
||||
|
||||
/* remember current mask */
|
||||
spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
|
||||
new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
|
||||
new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK));
|
||||
/* mask interrupts for now */
|
||||
kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
|
||||
kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0);
|
||||
spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
|
||||
|
||||
while (bf_bits | pf_bits) {
|
||||
struct kbase_as *as;
|
||||
int as_no;
|
||||
struct kbase_context *kctx;
|
||||
struct kbase_fault *fault;
|
||||
|
||||
/*
|
||||
* the while logic ensures we have a bit set, no need to check
|
||||
@@ -162,6 +159,12 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
|
||||
as_no = ffs(bf_bits | pf_bits) - 1;
|
||||
as = &kbdev->as[as_no];
|
||||
|
||||
/* find the fault type */
|
||||
if (bf_bits & (1 << as_no))
|
||||
fault = &as->bf_data;
|
||||
else
|
||||
fault = &as->pf_data;
|
||||
|
||||
/*
|
||||
* Refcount the kctx ASAP - it shouldn't disappear anyway, since
|
||||
* Bus/Page faults _should_ only occur whilst jobs are running,
|
||||
@@ -170,51 +173,36 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
|
||||
*/
|
||||
kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no);
|
||||
|
||||
|
||||
/* find faulting address */
|
||||
as->fault_addr = kbase_reg_read(kbdev,
|
||||
MMU_AS_REG(as_no,
|
||||
AS_FAULTADDRESS_HI),
|
||||
kctx);
|
||||
as->fault_addr <<= 32;
|
||||
as->fault_addr |= kbase_reg_read(kbdev,
|
||||
MMU_AS_REG(as_no,
|
||||
AS_FAULTADDRESS_LO),
|
||||
kctx);
|
||||
|
||||
fault->addr = kbase_reg_read(kbdev, MMU_AS_REG(as_no,
|
||||
AS_FAULTADDRESS_HI));
|
||||
fault->addr <<= 32;
|
||||
fault->addr |= kbase_reg_read(kbdev, MMU_AS_REG(as_no,
|
||||
AS_FAULTADDRESS_LO));
|
||||
/* Mark the fault protected or not */
|
||||
as->protected_mode = kbdev->protected_mode;
|
||||
fault->protected_mode = kbdev->protected_mode;
|
||||
|
||||
if (kbdev->protected_mode && as->fault_addr) {
|
||||
if (kbdev->protected_mode && fault->addr) {
|
||||
/* check if address reporting is allowed */
|
||||
validate_protected_page_fault(kbdev, kctx);
|
||||
validate_protected_page_fault(kbdev);
|
||||
}
|
||||
|
||||
/* report the fault to debugfs */
|
||||
kbase_as_fault_debugfs_new(kbdev, as_no);
|
||||
|
||||
/* record the fault status */
|
||||
as->fault_status = kbase_reg_read(kbdev,
|
||||
MMU_AS_REG(as_no,
|
||||
AS_FAULTSTATUS),
|
||||
kctx);
|
||||
|
||||
/* find the fault type */
|
||||
as->fault_type = (bf_bits & (1 << as_no)) ?
|
||||
KBASE_MMU_FAULT_TYPE_BUS :
|
||||
KBASE_MMU_FAULT_TYPE_PAGE;
|
||||
fault->status = kbase_reg_read(kbdev, MMU_AS_REG(as_no,
|
||||
AS_FAULTSTATUS));
|
||||
|
||||
if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) {
|
||||
as->fault_extra_addr = kbase_reg_read(kbdev,
|
||||
MMU_AS_REG(as_no, AS_FAULTEXTRA_HI),
|
||||
kctx);
|
||||
as->fault_extra_addr <<= 32;
|
||||
as->fault_extra_addr |= kbase_reg_read(kbdev,
|
||||
MMU_AS_REG(as_no, AS_FAULTEXTRA_LO),
|
||||
kctx);
|
||||
fault->extra_addr = kbase_reg_read(kbdev,
|
||||
MMU_AS_REG(as_no, AS_FAULTEXTRA_HI));
|
||||
fault->extra_addr <<= 32;
|
||||
fault->extra_addr |= kbase_reg_read(kbdev,
|
||||
MMU_AS_REG(as_no, AS_FAULTEXTRA_LO));
|
||||
}
|
||||
|
||||
if (kbase_as_has_bus_fault(as)) {
|
||||
if (kbase_as_has_bus_fault(as, fault)) {
|
||||
/* Mark bus fault as handled.
|
||||
* Note that a bus fault is processed first in case
|
||||
* where both a bus fault and page fault occur.
|
||||
@@ -234,32 +222,35 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
|
||||
|
||||
/* Process the interrupt for this address space */
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
kbase_mmu_interrupt_process(kbdev, kctx, as);
|
||||
kbase_mmu_interrupt_process(kbdev, kctx, as, fault);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
}
|
||||
|
||||
/* reenable interrupts */
|
||||
spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
|
||||
tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
|
||||
tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK));
|
||||
new_mask |= tmp;
|
||||
kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask, NULL);
|
||||
kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask);
|
||||
spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
|
||||
}
|
||||
|
||||
void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as,
|
||||
struct kbase_context *kctx)
|
||||
void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as)
|
||||
{
|
||||
struct kbase_mmu_setup *current_setup = &as->current_setup;
|
||||
u32 transcfg = 0;
|
||||
u64 transcfg = 0;
|
||||
|
||||
if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) {
|
||||
transcfg = current_setup->transcfg & 0xFFFFFFFFUL;
|
||||
transcfg = current_setup->transcfg;
|
||||
|
||||
/* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK */
|
||||
/* Clear PTW_MEMATTR bits */
|
||||
transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK;
|
||||
/* Enable correct PTW_MEMATTR bits */
|
||||
transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK;
|
||||
/* Ensure page-tables reads use read-allocate cache-policy in
|
||||
* the L2
|
||||
*/
|
||||
transcfg |= AS_TRANSCFG_R_ALLOCATE;
|
||||
|
||||
if (kbdev->system_coherency == COHERENCY_ACE) {
|
||||
/* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) */
|
||||
@@ -270,35 +261,34 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as,
|
||||
}
|
||||
|
||||
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO),
|
||||
transcfg, kctx);
|
||||
transcfg);
|
||||
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI),
|
||||
(current_setup->transcfg >> 32) & 0xFFFFFFFFUL,
|
||||
kctx);
|
||||
(transcfg >> 32) & 0xFFFFFFFFUL);
|
||||
} else {
|
||||
if (kbdev->system_coherency == COHERENCY_ACE)
|
||||
current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER;
|
||||
}
|
||||
|
||||
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO),
|
||||
current_setup->transtab & 0xFFFFFFFFUL, kctx);
|
||||
current_setup->transtab & 0xFFFFFFFFUL);
|
||||
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI),
|
||||
(current_setup->transtab >> 32) & 0xFFFFFFFFUL, kctx);
|
||||
(current_setup->transtab >> 32) & 0xFFFFFFFFUL);
|
||||
|
||||
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO),
|
||||
current_setup->memattr & 0xFFFFFFFFUL, kctx);
|
||||
current_setup->memattr & 0xFFFFFFFFUL);
|
||||
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI),
|
||||
(current_setup->memattr >> 32) & 0xFFFFFFFFUL, kctx);
|
||||
(current_setup->memattr >> 32) & 0xFFFFFFFFUL);
|
||||
|
||||
KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(as,
|
||||
KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(kbdev, as,
|
||||
current_setup->transtab,
|
||||
current_setup->memattr,
|
||||
transcfg);
|
||||
|
||||
write_cmd(kbdev, as->number, AS_COMMAND_UPDATE, kctx);
|
||||
write_cmd(kbdev, as->number, AS_COMMAND_UPDATE);
|
||||
}
|
||||
|
||||
int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
|
||||
struct kbase_context *kctx, u64 vpfn, u32 nr, u32 op,
|
||||
u64 vpfn, u32 nr, u32 op,
|
||||
unsigned int handling_irq)
|
||||
{
|
||||
int ret;
|
||||
@@ -307,22 +297,22 @@ int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
|
||||
|
||||
if (op == AS_COMMAND_UNLOCK) {
|
||||
/* Unlock doesn't require a lock first */
|
||||
ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
|
||||
ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK);
|
||||
} else {
|
||||
u64 lock_addr = lock_region(kbdev, vpfn, nr);
|
||||
|
||||
/* Lock the region that needs to be updated */
|
||||
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO),
|
||||
lock_addr & 0xFFFFFFFFUL, kctx);
|
||||
lock_addr & 0xFFFFFFFFUL);
|
||||
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI),
|
||||
(lock_addr >> 32) & 0xFFFFFFFFUL, kctx);
|
||||
write_cmd(kbdev, as->number, AS_COMMAND_LOCK, kctx);
|
||||
(lock_addr >> 32) & 0xFFFFFFFFUL);
|
||||
write_cmd(kbdev, as->number, AS_COMMAND_LOCK);
|
||||
|
||||
/* Run the MMU operation */
|
||||
write_cmd(kbdev, as->number, op, kctx);
|
||||
write_cmd(kbdev, as->number, op);
|
||||
|
||||
/* Wait for the flush to complete */
|
||||
ret = wait_ready(kbdev, as->number, kctx);
|
||||
ret = wait_ready(kbdev, as->number);
|
||||
|
||||
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9630)) {
|
||||
/* Issue an UNLOCK command to ensure that valid page
|
||||
@@ -339,8 +329,8 @@ int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
|
||||
commands in order to flush the MMU/uTLB,
|
||||
see PRLAM-8812.
|
||||
*/
|
||||
write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
|
||||
write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
|
||||
write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK);
|
||||
write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -348,7 +338,7 @@ int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
|
||||
}
|
||||
|
||||
void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
|
||||
struct kbase_context *kctx, enum kbase_mmu_fault_type type)
|
||||
enum kbase_mmu_fault_type type)
|
||||
{
|
||||
unsigned long flags;
|
||||
u32 pf_bf_mask;
|
||||
@@ -368,14 +358,14 @@ void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
|
||||
type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
|
||||
pf_bf_mask |= MMU_BUS_ERROR(as->number);
|
||||
|
||||
kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask, kctx);
|
||||
kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask);
|
||||
|
||||
unlock:
|
||||
spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
|
||||
}
|
||||
|
||||
void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
|
||||
struct kbase_context *kctx, enum kbase_mmu_fault_type type)
|
||||
enum kbase_mmu_fault_type type)
|
||||
{
|
||||
unsigned long flags;
|
||||
u32 irq_mask;
|
||||
@@ -391,14 +381,14 @@ void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
|
||||
if (kbdev->irq_reset_flush)
|
||||
goto unlock;
|
||||
|
||||
irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), kctx) |
|
||||
irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)) |
|
||||
MMU_PAGE_FAULT(as->number);
|
||||
|
||||
if (type == KBASE_MMU_FAULT_TYPE_BUS ||
|
||||
type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
|
||||
irq_mask |= MMU_BUS_ERROR(as->number);
|
||||
|
||||
kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask, kctx);
|
||||
kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask);
|
||||
|
||||
unlock:
|
||||
spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2014-2015, 2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -29,8 +29,8 @@
|
||||
* register access implementation of the MMU hardware interface
|
||||
*/
|
||||
|
||||
#ifndef _MALI_KBASE_MMU_HW_DIRECT_H_
|
||||
#define _MALI_KBASE_MMU_HW_DIRECT_H_
|
||||
#ifndef _KBASE_MMU_HW_DIRECT_H_
|
||||
#define _KBASE_MMU_HW_DIRECT_H_
|
||||
|
||||
#include <mali_kbase_defs.h>
|
||||
|
||||
@@ -39,9 +39,24 @@
|
||||
*
|
||||
* Process the MMU interrupt that was reported by the &kbase_device.
|
||||
*
|
||||
* @kbdev: kbase context to clear the fault from.
|
||||
* @irq_stat: Value of the MMU_IRQ_STATUS register
|
||||
* @kbdev: Pointer to the kbase device for which the interrupt happened.
|
||||
* @irq_stat: Value of the MMU_IRQ_STATUS register.
|
||||
*/
|
||||
void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
|
||||
|
||||
#endif /* _MALI_KBASE_MMU_HW_DIRECT_H_ */
|
||||
/**
|
||||
* kbase_mmu_bus_fault_interrupt - Process a bus fault interrupt.
|
||||
*
|
||||
* Process the bus fault interrupt that was reported for a particular GPU
|
||||
* address space.
|
||||
*
|
||||
* @kbdev: Pointer to the kbase device for which bus fault was reported.
|
||||
* @status: Value of the GPU_FAULTSTATUS register.
|
||||
* @as_nr: GPU address space for which the bus fault occurred.
|
||||
*
|
||||
* Return: zero if the operation was successful, non-zero otherwise.
|
||||
*/
|
||||
int kbase_mmu_bus_fault_interrupt(struct kbase_device *kbdev,
|
||||
u32 status, u32 as_nr);
|
||||
|
||||
#endif /* _KBASE_MMU_HW_DIRECT_H_ */
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2010-2015,2018 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -29,9 +29,9 @@
|
||||
#include <mali_kbase.h>
|
||||
#include <mali_kbase_pm.h>
|
||||
|
||||
static u64 always_on_get_core_mask(struct kbase_device *kbdev)
|
||||
static bool always_on_shaders_needed(struct kbase_device *kbdev)
|
||||
{
|
||||
return kbdev->gpu_props.props.raw_props.shader_present;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool always_on_get_core_active(struct kbase_device *kbdev)
|
||||
@@ -59,7 +59,7 @@ const struct kbase_pm_policy kbase_pm_always_on_policy_ops = {
|
||||
"always_on", /* name */
|
||||
always_on_init, /* init */
|
||||
always_on_term, /* term */
|
||||
always_on_get_core_mask, /* get_core_mask */
|
||||
always_on_shaders_needed, /* shaders_needed */
|
||||
always_on_get_core_active, /* get_core_active */
|
||||
0u, /* flags */
|
||||
KBASE_PM_POLICY_ID_ALWAYS_ON, /* id */
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2011-2015,2018 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -37,13 +36,13 @@
|
||||
*
|
||||
* - When KBase indicates that the GPU will be powered up, but we don't yet
|
||||
* know which Job Chains are to be run:
|
||||
* All Shader Cores are powered up, regardless of whether or not they will
|
||||
* be needed later.
|
||||
* Shader Cores are powered up, regardless of whether or not they will be
|
||||
* needed later.
|
||||
*
|
||||
* - When KBase indicates that a set of Shader Cores are needed to submit the
|
||||
* currently queued Job Chains:
|
||||
* All Shader Cores are kept powered, regardless of whether or not they will
|
||||
* be needed
|
||||
* - When KBase indicates that Shader Cores are needed to submit the currently
|
||||
* queued Job Chains:
|
||||
* Shader Cores are kept powered, regardless of whether or not they will be
|
||||
* needed
|
||||
*
|
||||
* - When KBase indicates that the GPU need not be powered:
|
||||
* The Shader Cores are kept powered, regardless of whether or not they will
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -31,11 +31,15 @@
|
||||
|
||||
#include <mali_kbase_pm.h>
|
||||
#include <mali_kbase_hwaccess_jm.h>
|
||||
#include <mali_kbase_hwcnt_context.h>
|
||||
#include <backend/gpu/mali_kbase_js_internal.h>
|
||||
#include <backend/gpu/mali_kbase_pm_internal.h>
|
||||
#include <backend/gpu/mali_kbase_jm_internal.h>
|
||||
#include <backend/gpu/mali_kbase_devfreq.h>
|
||||
|
||||
static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data);
|
||||
static void kbase_pm_hwcnt_disable_worker(struct work_struct *data);
|
||||
static void kbase_pm_gpu_clock_control_worker(struct work_struct *data);
|
||||
|
||||
int kbase_pm_runtime_init(struct kbase_device *kbdev)
|
||||
{
|
||||
@@ -128,12 +132,12 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
|
||||
INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work,
|
||||
kbase_pm_gpu_poweroff_wait_wq);
|
||||
|
||||
kbdev->pm.backend.ca_cores_enabled = ~0ull;
|
||||
kbdev->pm.backend.gpu_powered = false;
|
||||
kbdev->pm.suspending = false;
|
||||
#ifdef CONFIG_MALI_BIFROST_DEBUG
|
||||
kbdev->pm.backend.driver_ready_for_irqs = false;
|
||||
#endif /* CONFIG_MALI_BIFROST_DEBUG */
|
||||
kbdev->pm.backend.gpu_in_desired_state = true;
|
||||
init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait);
|
||||
|
||||
/* Initialise the metrics subsystem */
|
||||
@@ -141,9 +145,6 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
init_waitqueue_head(&kbdev->pm.backend.l2_powered_wait);
|
||||
kbdev->pm.backend.l2_powered = 0;
|
||||
|
||||
init_waitqueue_head(&kbdev->pm.backend.reset_done_wait);
|
||||
kbdev->pm.backend.reset_done = false;
|
||||
|
||||
@@ -151,7 +152,6 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
|
||||
kbdev->pm.active_count = 0;
|
||||
|
||||
spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock);
|
||||
spin_lock_init(&kbdev->pm.backend.gpu_powered_lock);
|
||||
|
||||
init_waitqueue_head(&kbdev->pm.backend.poweroff_wait);
|
||||
|
||||
@@ -161,8 +161,59 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
|
||||
if (kbase_pm_policy_init(kbdev) != 0)
|
||||
goto pm_policy_fail;
|
||||
|
||||
if (kbase_pm_state_machine_init(kbdev) != 0)
|
||||
goto pm_state_machine_fail;
|
||||
|
||||
kbdev->pm.backend.hwcnt_desired = false;
|
||||
kbdev->pm.backend.hwcnt_disabled = true;
|
||||
INIT_WORK(&kbdev->pm.backend.hwcnt_disable_work,
|
||||
kbase_pm_hwcnt_disable_worker);
|
||||
kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
|
||||
|
||||
/* At runtime, this feature can be enabled via module parameter
|
||||
* when insmod is executed. Then this will override all workarounds.
|
||||
*/
|
||||
if (platform_power_down_only) {
|
||||
kbdev->pm.backend.gpu_clock_slow_down_wa = false;
|
||||
kbdev->pm.backend.l2_always_on = false;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (IS_ENABLED(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED)) {
|
||||
kbdev->pm.backend.l2_always_on = false;
|
||||
kbdev->pm.backend.gpu_clock_slow_down_wa = false;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* WA1: L2 always_on for GPUs being affected by GPU2017-1336 */
|
||||
if (!IS_ENABLED(CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE)) {
|
||||
kbdev->pm.backend.gpu_clock_slow_down_wa = false;
|
||||
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2017_1336))
|
||||
kbdev->pm.backend.l2_always_on = true;
|
||||
else
|
||||
kbdev->pm.backend.l2_always_on = false;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* WA3: Clock slow down for GPUs being affected by GPU2017-1336 */
|
||||
kbdev->pm.backend.l2_always_on = false;
|
||||
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2017_1336)) {
|
||||
kbdev->pm.backend.gpu_clock_slow_down_wa = true;
|
||||
kbdev->pm.backend.gpu_clock_suspend_freq = 0;
|
||||
kbdev->pm.backend.gpu_clock_slow_down_desired = true;
|
||||
kbdev->pm.backend.gpu_clock_slowed_down = false;
|
||||
INIT_WORK(&kbdev->pm.backend.gpu_clock_control_work,
|
||||
kbase_pm_gpu_clock_control_worker);
|
||||
} else
|
||||
kbdev->pm.backend.gpu_clock_slow_down_wa = false;
|
||||
|
||||
return 0;
|
||||
|
||||
pm_state_machine_fail:
|
||||
kbase_pm_policy_term(kbdev);
|
||||
pm_policy_fail:
|
||||
kbase_pm_ca_term(kbdev);
|
||||
workq_fail:
|
||||
@@ -178,12 +229,19 @@ void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume)
|
||||
* kbase_pm_clock_off() */
|
||||
kbase_pm_clock_on(kbdev, is_resume);
|
||||
|
||||
if (!is_resume) {
|
||||
unsigned long flags;
|
||||
|
||||
/* Force update of L2 state - if we have abandoned a power off
|
||||
* then this may be required to power the L2 back on.
|
||||
*/
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
kbase_pm_update_state(kbdev);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
}
|
||||
|
||||
/* Update core status as required by the policy */
|
||||
KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
|
||||
SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START);
|
||||
kbase_pm_update_cores_state(kbdev);
|
||||
KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
|
||||
SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END);
|
||||
|
||||
/* NOTE: We don't wait to reach the desired state, since running atoms
|
||||
* will wait for that state to be reached anyway */
|
||||
@@ -198,44 +256,23 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data)
|
||||
struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
|
||||
unsigned long flags;
|
||||
|
||||
#if !PLATFORM_POWER_DOWN_ONLY
|
||||
/* Wait for power transitions to complete. We do this with no locks held
|
||||
* so that we don't deadlock with any pending workqueues */
|
||||
KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
|
||||
SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START);
|
||||
kbase_pm_check_transitions_sync(kbdev);
|
||||
KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
|
||||
SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END);
|
||||
#endif /* !PLATFORM_POWER_DOWN_ONLY */
|
||||
if (!platform_power_down_only)
|
||||
/* Wait for power transitions to complete. We do this with no locks held
|
||||
* so that we don't deadlock with any pending workqueues.
|
||||
*/
|
||||
kbase_pm_wait_for_desired_state(kbdev);
|
||||
|
||||
mutex_lock(&js_devdata->runpool_mutex);
|
||||
mutex_lock(&kbdev->pm.lock);
|
||||
|
||||
#if PLATFORM_POWER_DOWN_ONLY
|
||||
if (kbdev->pm.backend.gpu_powered) {
|
||||
if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2)) {
|
||||
/* If L2 cache is powered then we must flush it before
|
||||
* we power off the GPU. Normally this would have been
|
||||
* handled when the L2 was powered off. */
|
||||
kbase_gpu_cacheclean(kbdev);
|
||||
}
|
||||
}
|
||||
#endif /* PLATFORM_POWER_DOWN_ONLY */
|
||||
|
||||
if (!backend->poweron_required) {
|
||||
#if !PLATFORM_POWER_DOWN_ONLY
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
WARN_ON(kbdev->l2_available_bitmap ||
|
||||
kbdev->shader_available_bitmap ||
|
||||
kbdev->tiler_available_bitmap);
|
||||
WARN_ON(backend->shaders_state !=
|
||||
KBASE_SHADERS_OFF_CORESTACK_OFF ||
|
||||
backend->l2_state != KBASE_L2_OFF);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
#endif /* !PLATFORM_POWER_DOWN_ONLY */
|
||||
|
||||
/* Consume any change-state events */
|
||||
kbase_timeline_pm_check_handle_event(kbdev,
|
||||
KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
|
||||
|
||||
/* Disable interrupts and turn the clock off */
|
||||
if (!kbase_pm_clock_off(kbdev, backend->poweroff_is_suspend)) {
|
||||
@@ -268,6 +305,8 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data)
|
||||
backend->poweroff_wait_in_progress = false;
|
||||
if (backend->poweron_required) {
|
||||
backend->poweron_required = false;
|
||||
kbdev->pm.backend.l2_desired = true;
|
||||
kbase_pm_update_state(kbdev);
|
||||
kbase_pm_update_cores_state_nolock(kbdev);
|
||||
kbase_backend_slot_update(kbdev);
|
||||
}
|
||||
@@ -279,6 +318,161 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data)
|
||||
wake_up(&kbdev->pm.backend.poweroff_wait);
|
||||
}
|
||||
|
||||
static void kbase_pm_l2_clock_slow(struct kbase_device *kbdev)
|
||||
{
|
||||
#if defined(CONFIG_MALI_BIFROST_DVFS)
|
||||
struct clk *clk = kbdev->clocks[0];
|
||||
#endif
|
||||
|
||||
if (!kbdev->pm.backend.gpu_clock_slow_down_wa)
|
||||
return;
|
||||
|
||||
/* No suspend clock is specified */
|
||||
if (WARN_ON_ONCE(!kbdev->pm.backend.gpu_clock_suspend_freq))
|
||||
return;
|
||||
|
||||
#if defined(CONFIG_MALI_BIFROST_DEVFREQ)
|
||||
|
||||
/* Suspend devfreq */
|
||||
devfreq_suspend_device(kbdev->devfreq);
|
||||
|
||||
/* Keep the current freq to restore it upon resume */
|
||||
kbdev->previous_frequency = kbdev->current_nominal_freq;
|
||||
|
||||
/* Slow down GPU clock to the suspend clock*/
|
||||
kbase_devfreq_force_freq(kbdev,
|
||||
kbdev->pm.backend.gpu_clock_suspend_freq);
|
||||
|
||||
#elif defined(CONFIG_MALI_BIFROST_DVFS) /* CONFIG_MALI_BIFROST_DEVFREQ */
|
||||
|
||||
if (WARN_ON_ONCE(!clk))
|
||||
return;
|
||||
|
||||
/* Stop the metrics gathering framework */
|
||||
if (kbase_pm_metrics_is_active(kbdev))
|
||||
kbase_pm_metrics_stop(kbdev);
|
||||
|
||||
/* Keep the current freq to restore it upon resume */
|
||||
kbdev->previous_frequency = clk_get_rate(clk);
|
||||
|
||||
/* Slow down GPU clock to the suspend clock*/
|
||||
if (WARN_ON_ONCE(clk_set_rate(clk,
|
||||
kbdev->pm.backend.gpu_clock_suspend_freq)))
|
||||
dev_err(kbdev->dev, "Failed to set suspend freq\n");
|
||||
|
||||
#endif /* CONFIG_MALI_BIFROST_DVFS */
|
||||
}
|
||||
|
||||
static void kbase_pm_l2_clock_normalize(struct kbase_device *kbdev)
|
||||
{
|
||||
#if defined(CONFIG_MALI_BIFROST_DVFS)
|
||||
struct clk *clk = kbdev->clocks[0];
|
||||
#endif
|
||||
|
||||
if (!kbdev->pm.backend.gpu_clock_slow_down_wa)
|
||||
return;
|
||||
|
||||
#if defined(CONFIG_MALI_BIFROST_DEVFREQ)
|
||||
|
||||
/* Restore GPU clock to the previous one */
|
||||
kbase_devfreq_force_freq(kbdev, kbdev->previous_frequency);
|
||||
|
||||
/* Resume devfreq */
|
||||
devfreq_resume_device(kbdev->devfreq);
|
||||
|
||||
#elif defined(CONFIG_MALI_BIFROST_DVFS) /* CONFIG_MALI_BIFROST_DEVFREQ */
|
||||
|
||||
if (WARN_ON_ONCE(!clk))
|
||||
return;
|
||||
|
||||
/* Restore GPU clock */
|
||||
if (WARN_ON_ONCE(clk_set_rate(clk, kbdev->previous_frequency)))
|
||||
dev_err(kbdev->dev, "Failed to restore freq (%lu)\n",
|
||||
kbdev->previous_frequency);
|
||||
|
||||
/* Restart the metrics gathering framework */
|
||||
kbase_pm_metrics_start(kbdev);
|
||||
|
||||
#endif /* CONFIG_MALI_BIFROST_DVFS */
|
||||
}
|
||||
|
||||
static void kbase_pm_gpu_clock_control_worker(struct work_struct *data)
|
||||
{
|
||||
struct kbase_device *kbdev = container_of(data, struct kbase_device,
|
||||
pm.backend.gpu_clock_control_work);
|
||||
struct kbase_pm_device_data *pm = &kbdev->pm;
|
||||
struct kbase_pm_backend_data *backend = &pm->backend;
|
||||
unsigned long flags;
|
||||
bool slow_down = false, normalize = false;
|
||||
|
||||
/* Determine if GPU clock control is required */
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
if (!backend->gpu_clock_slowed_down &&
|
||||
backend->gpu_clock_slow_down_desired) {
|
||||
slow_down = true;
|
||||
backend->gpu_clock_slowed_down = true;
|
||||
} else if (backend->gpu_clock_slowed_down &&
|
||||
!backend->gpu_clock_slow_down_desired) {
|
||||
normalize = true;
|
||||
backend->gpu_clock_slowed_down = false;
|
||||
}
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
/* Control GPU clock according to the request of L2 state machine.
|
||||
* The GPU clock needs to be lowered for safe L2 power down
|
||||
* and restored to previous speed at L2 power up.
|
||||
*/
|
||||
if (slow_down)
|
||||
kbase_pm_l2_clock_slow(kbdev);
|
||||
else if (normalize)
|
||||
kbase_pm_l2_clock_normalize(kbdev);
|
||||
|
||||
/* Tell L2 state machine to transit to next state */
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
kbase_pm_update_state(kbdev);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
}
|
||||
|
||||
static void kbase_pm_hwcnt_disable_worker(struct work_struct *data)
|
||||
{
|
||||
struct kbase_device *kbdev = container_of(data, struct kbase_device,
|
||||
pm.backend.hwcnt_disable_work);
|
||||
struct kbase_pm_device_data *pm = &kbdev->pm;
|
||||
struct kbase_pm_backend_data *backend = &pm->backend;
|
||||
unsigned long flags;
|
||||
|
||||
bool do_disable;
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
do_disable = !backend->hwcnt_desired && !backend->hwcnt_disabled;
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
if (!do_disable)
|
||||
return;
|
||||
|
||||
kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
do_disable = !backend->hwcnt_desired && !backend->hwcnt_disabled;
|
||||
|
||||
if (do_disable) {
|
||||
/* PM state did not change while we were doing the disable,
|
||||
* so commit the work we just performed and continue the state
|
||||
* machine.
|
||||
*/
|
||||
backend->hwcnt_disabled = true;
|
||||
kbase_pm_update_state(kbdev);
|
||||
kbase_backend_slot_update(kbdev);
|
||||
} else {
|
||||
/* PM state was updated while we were doing the disable,
|
||||
* so we need to undo the disable we just performed.
|
||||
*/
|
||||
kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
}
|
||||
|
||||
void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend)
|
||||
{
|
||||
unsigned long flags;
|
||||
@@ -286,29 +480,31 @@ void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend)
|
||||
lockdep_assert_held(&kbdev->pm.lock);
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
if (!kbdev->pm.backend.poweroff_wait_in_progress) {
|
||||
/* Force all cores off */
|
||||
kbdev->pm.backend.desired_shader_state = 0;
|
||||
kbdev->pm.backend.desired_tiler_state = 0;
|
||||
|
||||
/* Force all cores to be unavailable, in the situation where
|
||||
* transitions are in progress for some cores but not others,
|
||||
* and kbase_pm_check_transitions_nolock can not immediately
|
||||
* power off the cores */
|
||||
kbdev->shader_available_bitmap = 0;
|
||||
kbdev->tiler_available_bitmap = 0;
|
||||
kbdev->l2_available_bitmap = 0;
|
||||
if (!kbdev->pm.backend.gpu_powered)
|
||||
goto unlock_hwaccess;
|
||||
|
||||
kbdev->pm.backend.poweroff_wait_in_progress = true;
|
||||
kbdev->pm.backend.poweroff_is_suspend = is_suspend;
|
||||
if (kbdev->pm.backend.poweroff_wait_in_progress)
|
||||
goto unlock_hwaccess;
|
||||
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
/*Kick off wq here. Callers will have to wait*/
|
||||
queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq,
|
||||
&kbdev->pm.backend.gpu_poweroff_wait_work);
|
||||
} else {
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
}
|
||||
/* Force all cores off */
|
||||
kbdev->pm.backend.shaders_desired = false;
|
||||
kbdev->pm.backend.l2_desired = false;
|
||||
|
||||
kbdev->pm.backend.poweroff_wait_in_progress = true;
|
||||
kbdev->pm.backend.poweroff_is_suspend = is_suspend;
|
||||
kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off = true;
|
||||
|
||||
/* l2_desired being false should cause the state machine to
|
||||
* start powering off the L2. When it actually is powered off,
|
||||
* the interrupt handler will call kbase_pm_l2_update_state()
|
||||
* again, which will trigger the kbase_pm_gpu_poweroff_wait_wq.
|
||||
* Callers of this function will need to wait on poweroff_wait.
|
||||
*/
|
||||
kbase_pm_update_state(kbdev);
|
||||
|
||||
unlock_hwaccess:
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
}
|
||||
|
||||
static bool is_poweroff_in_progress(struct kbase_device *kbdev)
|
||||
@@ -328,6 +524,7 @@ void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev)
|
||||
wait_event_killable(kbdev->pm.backend.poweroff_wait,
|
||||
is_poweroff_in_progress(kbdev));
|
||||
}
|
||||
KBASE_EXPORT_TEST_API(kbase_pm_wait_for_poweroff_complete);
|
||||
|
||||
int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
|
||||
unsigned int flags)
|
||||
@@ -353,8 +550,6 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
|
||||
return ret;
|
||||
}
|
||||
|
||||
kbasep_pm_init_core_use_bitmaps(kbdev);
|
||||
|
||||
kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] =
|
||||
kbdev->pm.debug_core_mask[1] =
|
||||
kbdev->pm.debug_core_mask[2] =
|
||||
@@ -375,9 +570,7 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
|
||||
/* We are ready to receive IRQ's now as power policy is set up, so
|
||||
* enable them now. */
|
||||
#ifdef CONFIG_MALI_BIFROST_DEBUG
|
||||
spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
|
||||
kbdev->pm.backend.driver_ready_for_irqs = true;
|
||||
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
|
||||
#endif
|
||||
kbase_pm_enable_interrupts(kbdev);
|
||||
|
||||
@@ -386,9 +579,6 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
|
||||
mutex_unlock(&kbdev->pm.lock);
|
||||
mutex_unlock(&js_devdata->runpool_mutex);
|
||||
|
||||
/* Idle the GPU and/or cores, if the policy wants it to */
|
||||
kbase_pm_context_idle(kbdev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -397,7 +587,6 @@ void kbase_hwaccess_pm_halt(struct kbase_device *kbdev)
|
||||
KBASE_DEBUG_ASSERT(kbdev != NULL);
|
||||
|
||||
mutex_lock(&kbdev->pm.lock);
|
||||
kbase_pm_cancel_deferred_poweroff(kbdev);
|
||||
kbase_pm_do_poweroff(kbdev, false);
|
||||
mutex_unlock(&kbdev->pm.lock);
|
||||
}
|
||||
@@ -410,7 +599,18 @@ void kbase_hwaccess_pm_term(struct kbase_device *kbdev)
|
||||
KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0);
|
||||
KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests == 0);
|
||||
|
||||
cancel_work_sync(&kbdev->pm.backend.hwcnt_disable_work);
|
||||
|
||||
if (kbdev->pm.backend.hwcnt_disabled) {
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
}
|
||||
|
||||
/* Free any resources the policy allocated */
|
||||
kbase_pm_state_machine_term(kbdev);
|
||||
kbase_pm_policy_term(kbdev);
|
||||
kbase_pm_ca_term(kbdev);
|
||||
|
||||
@@ -422,24 +622,13 @@ void kbase_hwaccess_pm_term(struct kbase_device *kbdev)
|
||||
|
||||
void kbase_pm_power_changed(struct kbase_device *kbdev)
|
||||
{
|
||||
bool cores_are_available;
|
||||
unsigned long flags;
|
||||
|
||||
KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
|
||||
SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START);
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
|
||||
KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
|
||||
SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END);
|
||||
kbase_pm_update_state(kbdev);
|
||||
|
||||
if (cores_are_available) {
|
||||
/* Log timelining information that a change in state has
|
||||
* completed */
|
||||
kbase_timeline_pm_handle_event(kbdev,
|
||||
KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
|
||||
kbase_backend_slot_update(kbdev);
|
||||
|
||||
kbase_backend_slot_update(kbdev);
|
||||
}
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
}
|
||||
|
||||
@@ -476,7 +665,6 @@ void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
|
||||
mutex_lock(&js_devdata->runpool_mutex);
|
||||
mutex_lock(&kbdev->pm.lock);
|
||||
|
||||
kbase_pm_cancel_deferred_poweroff(kbdev);
|
||||
kbase_pm_do_poweroff(kbdev, true);
|
||||
|
||||
kbase_backend_timer_suspend(kbdev);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2013-2017 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2013-2018 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -27,161 +27,80 @@
|
||||
#include <mali_kbase.h>
|
||||
#include <mali_kbase_pm.h>
|
||||
#include <backend/gpu/mali_kbase_pm_internal.h>
|
||||
|
||||
static const struct kbase_pm_ca_policy *const policy_list[] = {
|
||||
&kbase_pm_ca_fixed_policy_ops,
|
||||
#ifdef CONFIG_MALI_BIFROST_DEVFREQ
|
||||
&kbase_pm_ca_devfreq_policy_ops,
|
||||
#ifdef MALI_BIFROST_NO_MALI
|
||||
#include <backend/gpu/mali_kbase_model_dummy.h>
|
||||
#endif
|
||||
#if !MALI_CUSTOMER_RELEASE
|
||||
&kbase_pm_ca_random_policy_ops
|
||||
#endif
|
||||
};
|
||||
|
||||
/**
|
||||
* POLICY_COUNT - The number of policies available in the system.
|
||||
*
|
||||
* This is derived from the number of functions listed in policy_list.
|
||||
*/
|
||||
#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
|
||||
|
||||
int kbase_pm_ca_init(struct kbase_device *kbdev)
|
||||
{
|
||||
KBASE_DEBUG_ASSERT(kbdev != NULL);
|
||||
#ifdef CONFIG_MALI_BIFROST_DEVFREQ
|
||||
struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend;
|
||||
|
||||
kbdev->pm.backend.ca_current_policy = policy_list[0];
|
||||
|
||||
kbdev->pm.backend.ca_current_policy->init(kbdev);
|
||||
if (kbdev->current_core_mask)
|
||||
pm_backend->ca_cores_enabled = kbdev->current_core_mask;
|
||||
else
|
||||
pm_backend->ca_cores_enabled =
|
||||
kbdev->gpu_props.props.raw_props.shader_present;
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kbase_pm_ca_term(struct kbase_device *kbdev)
|
||||
{
|
||||
kbdev->pm.backend.ca_current_policy->term(kbdev);
|
||||
}
|
||||
|
||||
int kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **list)
|
||||
#ifdef CONFIG_MALI_BIFROST_DEVFREQ
|
||||
void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
|
||||
{
|
||||
if (!list)
|
||||
return POLICY_COUNT;
|
||||
|
||||
*list = policy_list;
|
||||
|
||||
return POLICY_COUNT;
|
||||
}
|
||||
|
||||
KBASE_EXPORT_TEST_API(kbase_pm_ca_list_policies);
|
||||
|
||||
const struct kbase_pm_ca_policy
|
||||
*kbase_pm_ca_get_policy(struct kbase_device *kbdev)
|
||||
{
|
||||
KBASE_DEBUG_ASSERT(kbdev != NULL);
|
||||
|
||||
return kbdev->pm.backend.ca_current_policy;
|
||||
}
|
||||
|
||||
KBASE_EXPORT_TEST_API(kbase_pm_ca_get_policy);
|
||||
|
||||
void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
|
||||
const struct kbase_pm_ca_policy *new_policy)
|
||||
{
|
||||
const struct kbase_pm_ca_policy *old_policy;
|
||||
struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend;
|
||||
unsigned long flags;
|
||||
|
||||
KBASE_DEBUG_ASSERT(kbdev != NULL);
|
||||
KBASE_DEBUG_ASSERT(new_policy != NULL);
|
||||
|
||||
KBASE_TRACE_ADD(kbdev, PM_CA_SET_POLICY, NULL, NULL, 0u,
|
||||
new_policy->id);
|
||||
|
||||
/* During a policy change we pretend the GPU is active */
|
||||
/* A suspend won't happen here, because we're in a syscall from a
|
||||
* userspace thread */
|
||||
kbase_pm_context_active(kbdev);
|
||||
|
||||
mutex_lock(&kbdev->pm.lock);
|
||||
|
||||
/* Remove the policy to prevent IRQ handlers from working on it */
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
old_policy = kbdev->pm.backend.ca_current_policy;
|
||||
kbdev->pm.backend.ca_current_policy = NULL;
|
||||
|
||||
if (!(core_mask & kbdev->pm.debug_core_mask_all)) {
|
||||
dev_err(kbdev->dev, "OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n",
|
||||
core_mask, kbdev->pm.debug_core_mask_all);
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
pm_backend->ca_cores_enabled = core_mask;
|
||||
|
||||
kbase_pm_update_state(kbdev);
|
||||
|
||||
unlock:
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
if (old_policy->term)
|
||||
old_policy->term(kbdev);
|
||||
|
||||
if (new_policy->init)
|
||||
new_policy->init(kbdev);
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
kbdev->pm.backend.ca_current_policy = new_policy;
|
||||
|
||||
/* If any core power state changes were previously attempted, but
|
||||
* couldn't be made because the policy was changing (current_policy was
|
||||
* NULL), then re-try them here. */
|
||||
kbase_pm_update_cores_state_nolock(kbdev);
|
||||
|
||||
kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
|
||||
kbdev->shader_ready_bitmap,
|
||||
kbdev->shader_transitioning_bitmap);
|
||||
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
mutex_unlock(&kbdev->pm.lock);
|
||||
|
||||
/* Now the policy change is finished, we release our fake context active
|
||||
* reference */
|
||||
kbase_pm_context_idle(kbdev);
|
||||
dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX\n",
|
||||
pm_backend->ca_cores_enabled);
|
||||
}
|
||||
|
||||
KBASE_EXPORT_TEST_API(kbase_pm_ca_set_policy);
|
||||
#endif
|
||||
|
||||
u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev)
|
||||
{
|
||||
#ifdef CONFIG_MALI_BIFROST_DEVFREQ
|
||||
struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend;
|
||||
#endif
|
||||
|
||||
lockdep_assert_held(&kbdev->hwaccess_lock);
|
||||
|
||||
/* All cores must be enabled when instrumentation is in use */
|
||||
if (kbdev->pm.backend.instr_enabled)
|
||||
return kbdev->gpu_props.props.raw_props.shader_present &
|
||||
kbdev->pm.debug_core_mask_all;
|
||||
|
||||
if (kbdev->pm.backend.ca_current_policy == NULL)
|
||||
return kbdev->gpu_props.props.raw_props.shader_present &
|
||||
kbdev->pm.debug_core_mask_all;
|
||||
|
||||
return kbdev->pm.backend.ca_current_policy->get_core_mask(kbdev) &
|
||||
kbdev->pm.debug_core_mask_all;
|
||||
#ifdef CONFIG_MALI_BIFROST_DEVFREQ
|
||||
return pm_backend->ca_cores_enabled & kbdev->pm.debug_core_mask_all;
|
||||
#else
|
||||
return kbdev->gpu_props.props.raw_props.shader_present &
|
||||
kbdev->pm.debug_core_mask_all;
|
||||
#endif
|
||||
}
|
||||
|
||||
KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask);
|
||||
|
||||
void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
|
||||
u64 cores_transitioning)
|
||||
u64 kbase_pm_ca_get_instr_core_mask(struct kbase_device *kbdev)
|
||||
{
|
||||
lockdep_assert_held(&kbdev->hwaccess_lock);
|
||||
|
||||
if (kbdev->pm.backend.ca_current_policy != NULL)
|
||||
kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
|
||||
cores_ready,
|
||||
cores_transitioning);
|
||||
}
|
||||
|
||||
void kbase_pm_ca_instr_enable(struct kbase_device *kbdev)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
kbdev->pm.backend.instr_enabled = true;
|
||||
|
||||
kbase_pm_update_cores_state_nolock(kbdev);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
}
|
||||
|
||||
void kbase_pm_ca_instr_disable(struct kbase_device *kbdev)
|
||||
{
|
||||
lockdep_assert_held(&kbdev->hwaccess_lock);
|
||||
kbdev->pm.backend.instr_enabled = false;
|
||||
|
||||
kbase_pm_update_cores_state_nolock(kbdev);
|
||||
#ifdef CONFIG_MALI_BIFROST_NO_MALI
|
||||
return (((1ull) << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1);
|
||||
#else
|
||||
return kbdev->pm.backend.pm_shaders_core_mask;
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -75,23 +75,15 @@ void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
|
||||
u64 cores_transitioning);
|
||||
|
||||
/**
|
||||
* kbase_pm_ca_instr_enable - Enable override for instrumentation
|
||||
* kbase_pm_ca_get_instr_core_mask - Get the PM state sync-ed shaders core mask
|
||||
*
|
||||
* @kbdev: The kbase device structure for the device (must be a valid pointer)
|
||||
*
|
||||
* This overrides the output of the core availability policy, ensuring that all
|
||||
* cores are available
|
||||
*/
|
||||
void kbase_pm_ca_instr_enable(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_pm_ca_instr_disable - Disable override for instrumentation
|
||||
* Returns a mask of the PM state synchronised shader cores for arranging
|
||||
* HW performance counter dumps
|
||||
*
|
||||
* @kbdev: The kbase device structure for the device (must be a valid pointer)
|
||||
*
|
||||
* This disables any previously enabled override, and resumes normal policy
|
||||
* functionality
|
||||
* Return: The bit mask of PM state synchronised cores
|
||||
*/
|
||||
void kbase_pm_ca_instr_disable(struct kbase_device *kbdev);
|
||||
u64 kbase_pm_ca_get_instr_core_mask(struct kbase_device *kbdev);
|
||||
|
||||
#endif /* _KBASE_PM_CA_H_ */
|
||||
|
||||
@@ -1,134 +0,0 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU licence.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, you can access it online at
|
||||
* http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* A core availability policy implementing core mask selection from devfreq OPPs
|
||||
*
|
||||
*/
|
||||
|
||||
#include <mali_kbase.h>
|
||||
#include <mali_kbase_pm.h>
|
||||
#include <backend/gpu/mali_kbase_pm_internal.h>
|
||||
#include <linux/version.h>
|
||||
|
||||
void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
|
||||
{
|
||||
struct kbasep_pm_ca_policy_devfreq *data =
|
||||
&kbdev->pm.backend.ca_policy_data.devfreq;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
data->cores_desired = core_mask;
|
||||
|
||||
/* Disable any cores that are now unwanted */
|
||||
data->cores_enabled &= data->cores_desired;
|
||||
|
||||
kbdev->pm.backend.ca_in_transition = true;
|
||||
|
||||
/* If there are no cores to be powered off then power on desired cores
|
||||
*/
|
||||
if (!(data->cores_used & ~data->cores_desired)) {
|
||||
data->cores_enabled = data->cores_desired;
|
||||
kbdev->pm.backend.ca_in_transition = false;
|
||||
}
|
||||
|
||||
kbase_pm_update_cores_state_nolock(kbdev);
|
||||
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX %llX\n",
|
||||
data->cores_desired, data->cores_enabled);
|
||||
}
|
||||
|
||||
static void devfreq_init(struct kbase_device *kbdev)
|
||||
{
|
||||
struct kbasep_pm_ca_policy_devfreq *data =
|
||||
&kbdev->pm.backend.ca_policy_data.devfreq;
|
||||
|
||||
if (kbdev->current_core_mask) {
|
||||
data->cores_enabled = kbdev->current_core_mask;
|
||||
data->cores_desired = kbdev->current_core_mask;
|
||||
} else {
|
||||
data->cores_enabled =
|
||||
kbdev->gpu_props.props.raw_props.shader_present;
|
||||
data->cores_desired =
|
||||
kbdev->gpu_props.props.raw_props.shader_present;
|
||||
}
|
||||
data->cores_used = 0;
|
||||
kbdev->pm.backend.ca_in_transition = false;
|
||||
}
|
||||
|
||||
static void devfreq_term(struct kbase_device *kbdev)
|
||||
{
|
||||
}
|
||||
|
||||
static u64 devfreq_get_core_mask(struct kbase_device *kbdev)
|
||||
{
|
||||
return kbdev->pm.backend.ca_policy_data.devfreq.cores_enabled;
|
||||
}
|
||||
|
||||
static void devfreq_update_core_status(struct kbase_device *kbdev,
|
||||
u64 cores_ready,
|
||||
u64 cores_transitioning)
|
||||
{
|
||||
struct kbasep_pm_ca_policy_devfreq *data =
|
||||
&kbdev->pm.backend.ca_policy_data.devfreq;
|
||||
|
||||
lockdep_assert_held(&kbdev->hwaccess_lock);
|
||||
|
||||
data->cores_used = cores_ready | cores_transitioning;
|
||||
|
||||
/* If in desired state then clear transition flag */
|
||||
if (data->cores_enabled == data->cores_desired)
|
||||
kbdev->pm.backend.ca_in_transition = false;
|
||||
|
||||
/* If all undesired cores are now off then power on desired cores.
|
||||
* The direct comparison against cores_enabled limits potential
|
||||
* recursion to one level */
|
||||
if (!(data->cores_used & ~data->cores_desired) &&
|
||||
data->cores_enabled != data->cores_desired) {
|
||||
data->cores_enabled = data->cores_desired;
|
||||
|
||||
kbase_pm_update_cores_state_nolock(kbdev);
|
||||
|
||||
kbdev->pm.backend.ca_in_transition = false;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The struct kbase_pm_ca_policy structure for the devfreq core availability
|
||||
* policy.
|
||||
*
|
||||
* This is the static structure that defines the devfreq core availability power
|
||||
* policy's callback and name.
|
||||
*/
|
||||
const struct kbase_pm_ca_policy kbase_pm_ca_devfreq_policy_ops = {
|
||||
"devfreq", /* name */
|
||||
devfreq_init, /* init */
|
||||
devfreq_term, /* term */
|
||||
devfreq_get_core_mask, /* get_core_mask */
|
||||
devfreq_update_core_status, /* update_core_status */
|
||||
0u, /* flags */
|
||||
KBASE_PM_CA_POLICY_ID_DEVFREQ, /* id */
|
||||
};
|
||||
|
||||
@@ -1,70 +0,0 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU licence.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, you can access it online at
|
||||
* http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* A power policy implementing fixed core availability
|
||||
*/
|
||||
|
||||
#include <mali_kbase.h>
|
||||
#include <mali_kbase_pm.h>
|
||||
|
||||
static void fixed_init(struct kbase_device *kbdev)
|
||||
{
|
||||
kbdev->pm.backend.ca_in_transition = false;
|
||||
}
|
||||
|
||||
static void fixed_term(struct kbase_device *kbdev)
|
||||
{
|
||||
CSTD_UNUSED(kbdev);
|
||||
}
|
||||
|
||||
static u64 fixed_get_core_mask(struct kbase_device *kbdev)
|
||||
{
|
||||
return kbdev->gpu_props.props.raw_props.shader_present;
|
||||
}
|
||||
|
||||
static void fixed_update_core_status(struct kbase_device *kbdev,
|
||||
u64 cores_ready,
|
||||
u64 cores_transitioning)
|
||||
{
|
||||
CSTD_UNUSED(kbdev);
|
||||
CSTD_UNUSED(cores_ready);
|
||||
CSTD_UNUSED(cores_transitioning);
|
||||
}
|
||||
|
||||
/*
|
||||
* The struct kbase_pm_policy structure for the fixed power policy.
|
||||
*
|
||||
* This is the static structure that defines the fixed power policy's callback
|
||||
* and name.
|
||||
*/
|
||||
const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops = {
|
||||
"fixed", /* name */
|
||||
fixed_init, /* init */
|
||||
fixed_term, /* term */
|
||||
fixed_get_core_mask, /* get_core_mask */
|
||||
fixed_update_core_status, /* update_core_status */
|
||||
0u, /* flags */
|
||||
KBASE_PM_CA_POLICY_ID_FIXED, /* id */
|
||||
};
|
||||
|
||||
KBASE_EXPORT_TEST_API(kbase_pm_ca_fixed_policy_ops);
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -29,22 +29,14 @@
|
||||
#include <mali_kbase.h>
|
||||
#include <mali_kbase_pm.h>
|
||||
|
||||
static u64 coarse_demand_get_core_mask(struct kbase_device *kbdev)
|
||||
static bool coarse_demand_shaders_needed(struct kbase_device *kbdev)
|
||||
{
|
||||
if (kbdev->pm.active_count == 0)
|
||||
return 0;
|
||||
|
||||
return kbdev->gpu_props.props.raw_props.shader_present;
|
||||
return kbase_pm_is_active(kbdev);
|
||||
}
|
||||
|
||||
static bool coarse_demand_get_core_active(struct kbase_device *kbdev)
|
||||
{
|
||||
if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
|
||||
kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt
|
||||
&& !kbdev->tiler_inuse_cnt)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
return kbase_pm_is_active(kbdev);
|
||||
}
|
||||
|
||||
static void coarse_demand_init(struct kbase_device *kbdev)
|
||||
@@ -66,7 +58,7 @@ const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = {
|
||||
"coarse_demand", /* name */
|
||||
coarse_demand_init, /* init */
|
||||
coarse_demand_term, /* term */
|
||||
coarse_demand_get_core_mask, /* get_core_mask */
|
||||
coarse_demand_shaders_needed, /* shaders_needed */
|
||||
coarse_demand_get_core_active, /* get_core_active */
|
||||
0u, /* flags */
|
||||
KBASE_PM_POLICY_ID_COARSE_DEMAND, /* id */
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2012-2015,2018 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -35,11 +35,11 @@
|
||||
* characteristics:
|
||||
* - When KBase indicates that the GPU will be powered up, but we don't yet
|
||||
* know which Job Chains are to be run:
|
||||
* - All Shader Cores are powered up, regardless of whether or not they will
|
||||
* be needed later.
|
||||
* - When KBase indicates that a set of Shader Cores are needed to submit the
|
||||
* currently queued Job Chains:
|
||||
* - All Shader Cores are kept powered, regardless of whether or not they will
|
||||
* - Shader Cores are powered up, regardless of whether or not they will be
|
||||
* needed later.
|
||||
* - When KBase indicates that Shader Cores are needed to submit the currently
|
||||
* queued Job Chains:
|
||||
* - Shader Cores are kept powered, regardless of whether or not they will
|
||||
* be needed
|
||||
* - When KBase indicates that the GPU need not be powered:
|
||||
* - The Shader Cores are powered off, and the GPU itself is powered off too.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -27,24 +27,21 @@
|
||||
#ifndef _KBASE_PM_HWACCESS_DEFS_H_
|
||||
#define _KBASE_PM_HWACCESS_DEFS_H_
|
||||
|
||||
#include "mali_kbase_pm_ca_fixed.h"
|
||||
#include "mali_kbase_pm_ca_devfreq.h"
|
||||
#if !MALI_CUSTOMER_RELEASE
|
||||
#include "mali_kbase_pm_ca_random.h"
|
||||
#endif
|
||||
|
||||
#include "mali_kbase_pm_always_on.h"
|
||||
#include "mali_kbase_pm_coarse_demand.h"
|
||||
#include "mali_kbase_pm_demand.h"
|
||||
#if !MALI_CUSTOMER_RELEASE
|
||||
#include "mali_kbase_pm_demand_always_powered.h"
|
||||
#include "mali_kbase_pm_fast_start.h"
|
||||
#include "mali_kbase_pm_always_on_demand.h"
|
||||
#endif
|
||||
|
||||
/* Forward definition - see mali_kbase.h */
|
||||
struct kbase_device;
|
||||
struct kbase_jd_atom;
|
||||
|
||||
/**
|
||||
* Maximum number of PM policies that may be active on a device.
|
||||
*/
|
||||
#define KBASE_PM_MAX_NUM_POLICIES (10)
|
||||
|
||||
/**
|
||||
* enum kbase_pm_core_type - The types of core in a GPU.
|
||||
*
|
||||
@@ -70,6 +67,71 @@ enum kbase_pm_core_type {
|
||||
KBASE_PM_CORE_STACK = STACK_PRESENT_LO
|
||||
};
|
||||
|
||||
/**
|
||||
* enum kbase_l2_core_state - The states used for the L2 cache & tiler power
|
||||
* state machine.
|
||||
*
|
||||
* @KBASE_L2_OFF: The L2 cache and tiler are off
|
||||
* @KBASE_L2_PEND_ON: The L2 cache and tiler are powering on
|
||||
* @KBASE_L2_RESTORE_CLOCKS: The GPU clock is restored. Conditionally used.
|
||||
* @KBASE_L2_ON_HWCNT_ENABLE: The L2 cache and tiler are on, and hwcnt is being
|
||||
* enabled
|
||||
* @KBASE_L2_ON: The L2 cache and tiler are on, and hwcnt is enabled
|
||||
* @KBASE_L2_ON_HWCNT_DISABLE: The L2 cache and tiler are on, and hwcnt is being
|
||||
* disabled
|
||||
* @KBASE_L2_SLOW_DOWN_CLOCKS: The GPU clock is set to appropriate or lowest
|
||||
* clock. Conditionally used.
|
||||
* @KBASE_L2_POWER_DOWN: The L2 cache and tiler are about to be powered off
|
||||
* @KBASE_L2_PEND_OFF: The L2 cache and tiler are powering off
|
||||
* @KBASE_L2_RESET_WAIT: The GPU is resetting, L2 cache and tiler power state
|
||||
* are unknown
|
||||
*/
|
||||
enum kbase_l2_core_state {
|
||||
#define KBASEP_L2_STATE(n) KBASE_L2_ ## n,
|
||||
#include "mali_kbase_pm_l2_states.h"
|
||||
#undef KBASEP_L2_STATE
|
||||
};
|
||||
|
||||
/**
|
||||
* enum kbase_shader_core_state - The states used for the shaders' state machine.
|
||||
*
|
||||
* @KBASE_SHADERS_OFF_CORESTACK_OFF: The shaders and core stacks are off
|
||||
* @KBASE_SHADERS_OFF_CORESTACK_PEND_ON: The shaders are off, core stacks have
|
||||
* been requested to power on and hwcnt
|
||||
* is being disabled
|
||||
* @KBASE_SHADERS_PEND_ON_CORESTACK_ON: Core stacks are on, shaders have been
|
||||
* requested to power on.
|
||||
* @KBASE_SHADERS_ON_CORESTACK_ON: The shaders and core stacks are on, and hwcnt
|
||||
* already enabled.
|
||||
* @KBASE_SHADERS_ON_CORESTACK_ON_RECHECK: The shaders and core stacks
|
||||
* are on, hwcnt disabled, and checks
|
||||
* to powering down or re-enabling
|
||||
* hwcnt.
|
||||
* @KBASE_SHADERS_WAIT_OFF_CORESTACK_ON: The shaders have been requested to
|
||||
* power off, but they remain on for the
|
||||
* duration of the hysteresis timer
|
||||
* @KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON: The hysteresis timer has expired
|
||||
* @KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON: The core stacks are on and the
|
||||
* level 2 cache is being flushed.
|
||||
* @KBASE_SHADERS_READY_OFF_CORESTACK_ON: The core stacks are on and the shaders
|
||||
* are ready to be powered off.
|
||||
* @KBASE_SHADERS_PEND_OFF_CORESTACK_ON: The core stacks are on, and the shaders
|
||||
* have been requested to power off
|
||||
* @KBASE_SHADERS_OFF_CORESTACK_PEND_OFF: The shaders are off, and the core stacks
|
||||
* have been requested to power off
|
||||
* @KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF: Shaders and corestacks are
|
||||
* off, but the tick timer
|
||||
* cancellation is still
|
||||
* pending.
|
||||
* @KBASE_SHADERS_RESET_WAIT: The GPU is resetting, shader and core stack power
|
||||
* states are unknown
|
||||
*/
|
||||
enum kbase_shader_core_state {
|
||||
#define KBASEP_SHADER_STATE(n) KBASE_SHADERS_ ## n,
|
||||
#include "mali_kbase_pm_shader_states.h"
|
||||
#undef KBASEP_SHADER_STATE
|
||||
};
|
||||
|
||||
/**
|
||||
* struct kbasep_pm_metrics - Metrics data collected for use by the power
|
||||
* management framework.
|
||||
@@ -100,8 +162,7 @@ struct kbasep_pm_metrics {
|
||||
* not. Updated when the job scheduler informs us a job in submitted
|
||||
* or removed from a GPU slot.
|
||||
* @active_cl_ctx: number of CL jobs active on the GPU. Array is per-device.
|
||||
* @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot. As
|
||||
* GL jobs never run on slot 2 this slot is not recorded.
|
||||
* @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot.
|
||||
* @lock: spinlock protecting the kbasep_pm_metrics_data structure
|
||||
* @platform_data: pointer to data controlled by platform specific code
|
||||
* @kbdev: pointer to kbase device for which metrics are collected
|
||||
@@ -118,7 +179,7 @@ struct kbasep_pm_metrics_state {
|
||||
ktime_t time_period_start;
|
||||
bool gpu_active;
|
||||
u32 active_cl_ctx[2];
|
||||
u32 active_gl_ctx[2]; /* GL jobs can only run on 2 of the 3 job slots */
|
||||
u32 active_gl_ctx[3];
|
||||
spinlock_t lock;
|
||||
|
||||
void *platform_data;
|
||||
@@ -134,21 +195,39 @@ struct kbasep_pm_metrics_state {
|
||||
#endif
|
||||
};
|
||||
|
||||
/**
|
||||
* struct kbasep_pm_tick_timer_state - State for the shader hysteresis timer
|
||||
* @wq: Work queue to wait for the timer to stopped
|
||||
* @work: Work item which cancels the timer
|
||||
* @timer: Timer for powering off the shader cores
|
||||
* @configured_interval: Period of GPU poweroff timer
|
||||
* @configured_ticks: User-configured number of ticks to wait after the shader
|
||||
* power down request is received before turning off the cores
|
||||
* @remaining_ticks: Number of remaining timer ticks until shaders are powered off
|
||||
* @cancel_queued: True if the cancellation work item has been queued. This is
|
||||
* required to ensure that it is not queued twice, e.g. after
|
||||
* a reset, which could cause the timer to be incorrectly
|
||||
* cancelled later by a delayed workitem.
|
||||
* @needed: Whether the timer should restart itself
|
||||
*/
|
||||
struct kbasep_pm_tick_timer_state {
|
||||
struct workqueue_struct *wq;
|
||||
struct work_struct work;
|
||||
struct hrtimer timer;
|
||||
|
||||
ktime_t configured_interval;
|
||||
unsigned int configured_ticks;
|
||||
unsigned int remaining_ticks;
|
||||
|
||||
bool cancel_queued;
|
||||
bool needed;
|
||||
};
|
||||
|
||||
union kbase_pm_policy_data {
|
||||
struct kbasep_pm_policy_always_on always_on;
|
||||
struct kbasep_pm_policy_coarse_demand coarse_demand;
|
||||
struct kbasep_pm_policy_demand demand;
|
||||
#if !MALI_CUSTOMER_RELEASE
|
||||
struct kbasep_pm_policy_demand_always_powered demand_always_powered;
|
||||
struct kbasep_pm_policy_fast_start fast_start;
|
||||
#endif
|
||||
};
|
||||
|
||||
union kbase_pm_ca_policy_data {
|
||||
struct kbasep_pm_ca_policy_fixed fixed;
|
||||
struct kbasep_pm_ca_policy_devfreq devfreq;
|
||||
#if !MALI_CUSTOMER_RELEASE
|
||||
struct kbasep_pm_ca_policy_random random;
|
||||
struct kbasep_pm_policy_always_on_demand always_on_demand;
|
||||
#endif
|
||||
};
|
||||
|
||||
@@ -158,77 +237,38 @@ union kbase_pm_ca_policy_data {
|
||||
* This structure contains data for the power management framework. There is one
|
||||
* instance of this structure per device in the system.
|
||||
*
|
||||
* @ca_current_policy: The policy that is currently actively controlling core
|
||||
* availability.
|
||||
* @pm_current_policy: The policy that is currently actively controlling the
|
||||
* power state.
|
||||
* @ca_policy_data: Private data for current CA policy
|
||||
* @pm_policy_data: Private data for current PM policy
|
||||
* @ca_in_transition: Flag indicating when core availability policy is
|
||||
* transitioning cores. The core availability policy must
|
||||
* set this when a change in core availability is occurring.
|
||||
* power_change_lock must be held when accessing this.
|
||||
* @reset_done: Flag when a reset is complete
|
||||
* @reset_done_wait: Wait queue to wait for changes to @reset_done
|
||||
* @l2_powered_wait: Wait queue for whether the l2 cache has been powered as
|
||||
* requested
|
||||
* @l2_powered: State indicating whether all the l2 caches are powered.
|
||||
* Non-zero indicates they're *all* powered
|
||||
* Zero indicates that some (or all) are not powered
|
||||
* @gpu_cycle_counter_requests: The reference count of active gpu cycle counter
|
||||
* users
|
||||
* @gpu_cycle_counter_requests_lock: Lock to protect @gpu_cycle_counter_requests
|
||||
* @desired_shader_state: A bit mask identifying the shader cores that the
|
||||
* power policy would like to be on. The current state
|
||||
* of the cores may be different, but there should be
|
||||
* transitions in progress that will eventually achieve
|
||||
* this state (assuming that the policy doesn't change
|
||||
* its mind in the mean time).
|
||||
* @powering_on_shader_state: A bit mask indicating which shader cores are
|
||||
* currently in a power-on transition
|
||||
* @desired_tiler_state: A bit mask identifying the tiler cores that the power
|
||||
* policy would like to be on. See @desired_shader_state
|
||||
* @powering_on_tiler_state: A bit mask indicating which tiler core are
|
||||
* currently in a power-on transition
|
||||
* @powering_on_l2_state: A bit mask indicating which l2-caches are currently
|
||||
* in a power-on transition
|
||||
* @powering_on_stack_state: A bit mask indicating which core stacks are
|
||||
* currently in a power-on transition
|
||||
* @gpu_in_desired_state: This flag is set if the GPU is powered as requested
|
||||
* by the desired_xxx_state variables
|
||||
* @gpu_in_desired_state_wait: Wait queue set when @gpu_in_desired_state != 0
|
||||
* @gpu_in_desired_state_wait: Wait queue set when the GPU is in the desired
|
||||
* state according to the L2 and shader power state
|
||||
* machines
|
||||
* @gpu_powered: Set to true when the GPU is powered and register
|
||||
* accesses are possible, false otherwise
|
||||
* @instr_enabled: Set to true when instrumentation is enabled,
|
||||
* false otherwise
|
||||
* accesses are possible, false otherwise. Access to this
|
||||
* variable should be protected by: both the hwaccess_lock
|
||||
* spinlock and the pm.lock mutex for writes; or at least
|
||||
* one of either lock for reads.
|
||||
* @pm_shaders_core_mask: Shader PM state synchronised shaders core mask. It
|
||||
* holds the cores enabled in a hardware counters dump,
|
||||
* and may differ from @shaders_avail when under different
|
||||
* states and transitions.
|
||||
* @cg1_disabled: Set if the policy wants to keep the second core group
|
||||
* powered off
|
||||
* @driver_ready_for_irqs: Debug state indicating whether sufficient
|
||||
* initialization of the driver has occurred to handle
|
||||
* IRQs
|
||||
* @gpu_powered_lock: Spinlock that must be held when writing @gpu_powered or
|
||||
* accessing @driver_ready_for_irqs
|
||||
* @metrics: Structure to hold metrics for the GPU
|
||||
* @gpu_poweroff_pending: number of poweroff timer ticks until the GPU is
|
||||
* powered off
|
||||
* @shader_poweroff_pending_time: number of poweroff timer ticks until shaders
|
||||
* and/or timers are powered off
|
||||
* @gpu_poweroff_timer: Timer for powering off GPU
|
||||
* @gpu_poweroff_wq: Workqueue to power off GPU on when timer fires
|
||||
* @gpu_poweroff_work: Workitem used on @gpu_poweroff_wq
|
||||
* @shader_poweroff_pending: Bit mask of shaders to be powered off on next
|
||||
* timer callback
|
||||
* @tiler_poweroff_pending: Bit mask of tilers to be powered off on next timer
|
||||
* callback
|
||||
* @poweroff_timer_needed: true if the poweroff timer is currently required,
|
||||
* false otherwise
|
||||
* @poweroff_timer_running: true if the poweroff timer is currently running,
|
||||
* false otherwise
|
||||
* power_change_lock should be held when accessing,
|
||||
* unless there is no way the timer can be running (eg
|
||||
* hrtimer_cancel() was called immediately before)
|
||||
* @shader_tick_timer: Structure to hold the shader poweroff tick timer state
|
||||
* @poweroff_wait_in_progress: true if a wait for GPU power off is in progress.
|
||||
* hwaccess_lock must be held when accessing
|
||||
* @invoke_poweroff_wait_wq_when_l2_off: flag indicating that the L2 power state
|
||||
* machine should invoke the poweroff
|
||||
* worker after the L2 has turned off.
|
||||
* @poweron_required: true if a GPU power on is required. Should only be set
|
||||
* when poweroff_wait_in_progress is true, and therefore the
|
||||
* GPU can not immediately be powered on. pm.lock must be
|
||||
@@ -252,44 +292,76 @@ union kbase_pm_ca_policy_data {
|
||||
* &struct kbase_pm_callback_conf
|
||||
* @callback_power_runtime_idle: Optional callback when the GPU may be idle. See
|
||||
* &struct kbase_pm_callback_conf
|
||||
* @ca_cores_enabled: Cores that are currently available
|
||||
* @l2_state: The current state of the L2 cache state machine. See
|
||||
* &enum kbase_l2_core_state
|
||||
* @l2_desired: True if the L2 cache should be powered on by the L2 cache state
|
||||
* machine
|
||||
* @l2_always_on: If true, disable powering down of l2 cache.
|
||||
* @shaders_state: The current state of the shader state machine.
|
||||
* @shaders_avail: This is updated by the state machine when it is in a state
|
||||
* where it can handle changes to the core availability. This
|
||||
* is internal to the shader state machine and should *not* be
|
||||
* modified elsewhere.
|
||||
* @shaders_desired: True if the PM active count or power policy requires the
|
||||
* shader cores to be on. This is used as an input to the
|
||||
* shader power state machine. The current state of the
|
||||
* cores may be different, but there should be transitions in
|
||||
* progress that will eventually achieve this state (assuming
|
||||
* that the policy doesn't change its mind in the mean time).
|
||||
* @in_reset: True if a GPU is resetting and normal power manager operation is
|
||||
* suspended
|
||||
* @protected_entry_transition_override : True if GPU reset is being used
|
||||
* before entering the protected mode and so
|
||||
* the reset handling behaviour is being
|
||||
* overridden.
|
||||
* @protected_transition_override : True if a protected mode transition is in
|
||||
* progress and is overriding power manager
|
||||
* behaviour.
|
||||
* @protected_l2_override : Non-zero if the L2 cache is required during a
|
||||
* protected mode transition. Has no effect if not
|
||||
* transitioning.
|
||||
* @hwcnt_desired: True if we want GPU hardware counters to be enabled.
|
||||
* @hwcnt_disabled: True if GPU hardware counters are not enabled.
|
||||
* @hwcnt_disable_work: Work item to disable GPU hardware counters, used if
|
||||
* atomic disable is not possible.
|
||||
* @gpu_clock_suspend_freq: 'opp-mali-errata-1485982' clock in opp table
|
||||
* for safe L2 power cycle.
|
||||
* If no opp-mali-errata-1485982 specified,
|
||||
* the slowest clock will be taken.
|
||||
* @gpu_clock_slow_down_wa: If true, slow down GPU clock during L2 power cycle.
|
||||
* @gpu_clock_slow_down_desired: True if we want lower GPU clock
|
||||
* for safe L2 power cycle. False if want GPU clock
|
||||
* to back to normalized one. This is updated only
|
||||
* in L2 state machine, kbase_pm_l2_update_state.
|
||||
* @gpu_clock_slowed_down: During L2 power cycle,
|
||||
* True if gpu clock is set at lower frequency
|
||||
* for safe L2 power down, False if gpu clock gets
|
||||
* restored to previous speed. This is updated only in
|
||||
* work function, kbase_pm_gpu_clock_control_worker.
|
||||
* @gpu_clock_control_work: work item to set GPU clock during L2 power cycle
|
||||
* using gpu_clock_control
|
||||
*
|
||||
* Note:
|
||||
* During an IRQ, @ca_current_policy or @pm_current_policy can be NULL when the
|
||||
* policy is being changed with kbase_pm_ca_set_policy() or
|
||||
* kbase_pm_set_policy(). The change is protected under
|
||||
* kbase_device.pm.power_change_lock. Direct access to this
|
||||
* from IRQ context must therefore check for NULL. If NULL, then
|
||||
* kbase_pm_ca_set_policy() or kbase_pm_set_policy() will re-issue the policy
|
||||
* functions that would have been done under IRQ.
|
||||
* During an IRQ, @pm_current_policy can be NULL when the policy is being
|
||||
* changed with kbase_pm_set_policy(). The change is protected under
|
||||
* kbase_device.pm.pcower_change_lock. Direct access to this from IRQ context
|
||||
* must therefore check for NULL. If NULL, then kbase_pm_set_policy() will
|
||||
* re-issue the policy functions that would have been done under IRQ.
|
||||
*/
|
||||
struct kbase_pm_backend_data {
|
||||
const struct kbase_pm_ca_policy *ca_current_policy;
|
||||
const struct kbase_pm_policy *pm_current_policy;
|
||||
union kbase_pm_ca_policy_data ca_policy_data;
|
||||
union kbase_pm_policy_data pm_policy_data;
|
||||
bool ca_in_transition;
|
||||
bool reset_done;
|
||||
wait_queue_head_t reset_done_wait;
|
||||
wait_queue_head_t l2_powered_wait;
|
||||
int l2_powered;
|
||||
int gpu_cycle_counter_requests;
|
||||
spinlock_t gpu_cycle_counter_requests_lock;
|
||||
|
||||
u64 desired_shader_state;
|
||||
u64 powering_on_shader_state;
|
||||
u64 desired_tiler_state;
|
||||
u64 powering_on_tiler_state;
|
||||
u64 powering_on_l2_state;
|
||||
#ifdef CONFIG_MALI_CORESTACK
|
||||
u64 powering_on_stack_state;
|
||||
#endif /* CONFIG_MALI_CORESTACK */
|
||||
|
||||
bool gpu_in_desired_state;
|
||||
wait_queue_head_t gpu_in_desired_state_wait;
|
||||
|
||||
bool gpu_powered;
|
||||
|
||||
bool instr_enabled;
|
||||
u64 pm_shaders_core_mask;
|
||||
|
||||
bool cg1_disabled;
|
||||
|
||||
@@ -297,25 +369,12 @@ struct kbase_pm_backend_data {
|
||||
bool driver_ready_for_irqs;
|
||||
#endif /* CONFIG_MALI_BIFROST_DEBUG */
|
||||
|
||||
spinlock_t gpu_powered_lock;
|
||||
|
||||
|
||||
struct kbasep_pm_metrics_state metrics;
|
||||
|
||||
int gpu_poweroff_pending;
|
||||
int shader_poweroff_pending_time;
|
||||
|
||||
struct hrtimer gpu_poweroff_timer;
|
||||
struct workqueue_struct *gpu_poweroff_wq;
|
||||
struct work_struct gpu_poweroff_work;
|
||||
|
||||
u64 shader_poweroff_pending;
|
||||
u64 tiler_poweroff_pending;
|
||||
|
||||
bool poweroff_timer_needed;
|
||||
bool poweroff_timer_running;
|
||||
struct kbasep_pm_tick_timer_state shader_tick_timer;
|
||||
|
||||
bool poweroff_wait_in_progress;
|
||||
bool invoke_poweroff_wait_wq_when_l2_off;
|
||||
bool poweron_required;
|
||||
bool poweroff_is_suspend;
|
||||
|
||||
@@ -331,22 +390,47 @@ struct kbase_pm_backend_data {
|
||||
int (*callback_power_runtime_on)(struct kbase_device *kbdev);
|
||||
void (*callback_power_runtime_off)(struct kbase_device *kbdev);
|
||||
int (*callback_power_runtime_idle)(struct kbase_device *kbdev);
|
||||
|
||||
u64 ca_cores_enabled;
|
||||
|
||||
enum kbase_l2_core_state l2_state;
|
||||
enum kbase_shader_core_state shaders_state;
|
||||
u64 shaders_avail;
|
||||
bool l2_desired;
|
||||
bool l2_always_on;
|
||||
bool shaders_desired;
|
||||
|
||||
bool in_reset;
|
||||
|
||||
bool protected_entry_transition_override;
|
||||
bool protected_transition_override;
|
||||
int protected_l2_override;
|
||||
|
||||
bool hwcnt_desired;
|
||||
bool hwcnt_disabled;
|
||||
struct work_struct hwcnt_disable_work;
|
||||
|
||||
u64 gpu_clock_suspend_freq;
|
||||
bool gpu_clock_slow_down_wa;
|
||||
bool gpu_clock_slow_down_desired;
|
||||
bool gpu_clock_slowed_down;
|
||||
struct work_struct gpu_clock_control_work;
|
||||
};
|
||||
|
||||
|
||||
/* List of policy IDs */
|
||||
enum kbase_pm_policy_id {
|
||||
KBASE_PM_POLICY_ID_DEMAND = 1,
|
||||
KBASE_PM_POLICY_ID_ALWAYS_ON,
|
||||
KBASE_PM_POLICY_ID_COARSE_DEMAND,
|
||||
#if !MALI_CUSTOMER_RELEASE
|
||||
KBASE_PM_POLICY_ID_DEMAND_ALWAYS_POWERED,
|
||||
KBASE_PM_POLICY_ID_FAST_START
|
||||
KBASE_PM_POLICY_ID_ALWAYS_ON_DEMAND,
|
||||
#endif
|
||||
KBASE_PM_POLICY_ID_ALWAYS_ON
|
||||
};
|
||||
|
||||
typedef u32 kbase_pm_policy_flags;
|
||||
|
||||
#define KBASE_PM_POLICY_FLAG_DISABLED_WITH_POWER_DOWN_ONLY (1u)
|
||||
|
||||
/**
|
||||
* struct kbase_pm_policy - Power policy structure.
|
||||
*
|
||||
@@ -356,7 +440,7 @@ typedef u32 kbase_pm_policy_flags;
|
||||
* @name: The name of this policy
|
||||
* @init: Function called when the policy is selected
|
||||
* @term: Function called when the policy is unselected
|
||||
* @get_core_mask: Function called to get the current shader core mask
|
||||
* @shaders_needed: Function called to find out if shader cores are needed
|
||||
* @get_core_active: Function called to get the current overall GPU power
|
||||
* state
|
||||
* @flags: Field indicating flags for this policy
|
||||
@@ -391,26 +475,23 @@ struct kbase_pm_policy {
|
||||
void (*term)(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* Function called to get the current shader core mask
|
||||
* Function called to find out if shader cores are needed
|
||||
*
|
||||
* The returned mask should meet or exceed (kbdev->shader_needed_bitmap
|
||||
* | kbdev->shader_inuse_bitmap).
|
||||
* This needs to at least satisfy kbdev->pm.backend.shaders_desired,
|
||||
* and so must never return false when shaders_desired is true.
|
||||
*
|
||||
* @kbdev: The kbase device structure for the device (must be a
|
||||
* valid pointer)
|
||||
*
|
||||
* Return: The mask of shader cores to be powered
|
||||
* Return: true if shader cores are needed, false otherwise
|
||||
*/
|
||||
u64 (*get_core_mask)(struct kbase_device *kbdev);
|
||||
bool (*shaders_needed)(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* Function called to get the current overall GPU power state
|
||||
*
|
||||
* This function should consider the state of kbdev->pm.active_count. If
|
||||
* this count is greater than 0 then there is at least one active
|
||||
* context on the device and the GPU should be powered. If it is equal
|
||||
* to 0 then there are no active contexts and the GPU could be powered
|
||||
* off if desired.
|
||||
* This function must meet or exceed the requirements for power
|
||||
* indicated by kbase_pm_is_active().
|
||||
*
|
||||
* @kbdev: The kbase device structure for the device (must be a
|
||||
* valid pointer)
|
||||
@@ -423,111 +504,4 @@ struct kbase_pm_policy {
|
||||
enum kbase_pm_policy_id id;
|
||||
};
|
||||
|
||||
|
||||
enum kbase_pm_ca_policy_id {
|
||||
KBASE_PM_CA_POLICY_ID_FIXED = 1,
|
||||
KBASE_PM_CA_POLICY_ID_DEVFREQ,
|
||||
KBASE_PM_CA_POLICY_ID_RANDOM
|
||||
};
|
||||
|
||||
typedef u32 kbase_pm_ca_policy_flags;
|
||||
|
||||
/**
|
||||
* Maximum length of a CA policy names
|
||||
*/
|
||||
#define KBASE_PM_CA_MAX_POLICY_NAME_LEN 15
|
||||
|
||||
/**
|
||||
* struct kbase_pm_ca_policy - Core availability policy structure.
|
||||
*
|
||||
* Each core availability policy exposes a (static) instance of this structure
|
||||
* which contains function pointers to the policy's methods.
|
||||
*
|
||||
* @name: The name of this policy
|
||||
* @init: Function called when the policy is selected
|
||||
* @term: Function called when the policy is unselected
|
||||
* @get_core_mask: Function called to get the current shader core
|
||||
* availability mask
|
||||
* @update_core_status: Function called to update the current core status
|
||||
* @flags: Field indicating flags for this policy
|
||||
* @id: Field indicating an ID for this policy. This is not
|
||||
* necessarily the same as its index in the list returned
|
||||
* by kbase_pm_list_policies().
|
||||
* It is used purely for debugging.
|
||||
*/
|
||||
struct kbase_pm_ca_policy {
|
||||
char name[KBASE_PM_CA_MAX_POLICY_NAME_LEN + 1];
|
||||
|
||||
/**
|
||||
* Function called when the policy is selected
|
||||
*
|
||||
* This should initialize the kbdev->pm.ca_policy_data structure. It
|
||||
* should not attempt to make any changes to hardware state.
|
||||
*
|
||||
* It is undefined what state the cores are in when the function is
|
||||
* called.
|
||||
*
|
||||
* @kbdev The kbase device structure for the device (must be a
|
||||
* valid pointer)
|
||||
*/
|
||||
void (*init)(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* Function called when the policy is unselected.
|
||||
*
|
||||
* @kbdev The kbase device structure for the device (must be a
|
||||
* valid pointer)
|
||||
*/
|
||||
void (*term)(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* Function called to get the current shader core availability mask
|
||||
*
|
||||
* When a change in core availability is occurring, the policy must set
|
||||
* kbdev->pm.ca_in_transition to true. This is to indicate that
|
||||
* reporting changes in power state cannot be optimized out, even if
|
||||
* kbdev->pm.desired_shader_state remains unchanged. This must be done
|
||||
* by any functions internal to the Core Availability Policy that change
|
||||
* the return value of kbase_pm_ca_policy::get_core_mask.
|
||||
*
|
||||
* @kbdev The kbase device structure for the device (must be a
|
||||
* valid pointer)
|
||||
*
|
||||
* Return: The current core availability mask
|
||||
*/
|
||||
u64 (*get_core_mask)(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* Function called to update the current core status
|
||||
*
|
||||
* If none of the cores in core group 0 are ready or transitioning, then
|
||||
* the policy must ensure that the next call to get_core_mask does not
|
||||
* return 0 for all cores in core group 0. It is an error to disable
|
||||
* core group 0 through the core availability policy.
|
||||
*
|
||||
* When a change in core availability has finished, the policy must set
|
||||
* kbdev->pm.ca_in_transition to false. This is to indicate that
|
||||
* changes in power state can once again be optimized out when
|
||||
* kbdev->pm.desired_shader_state is unchanged.
|
||||
*
|
||||
* @kbdev: The kbase device structure for the device
|
||||
* (must be a valid pointer)
|
||||
* @cores_ready: The mask of cores currently powered and
|
||||
* ready to run jobs
|
||||
* @cores_transitioning: The mask of cores currently transitioning
|
||||
* power state
|
||||
*/
|
||||
void (*update_core_status)(struct kbase_device *kbdev, u64 cores_ready,
|
||||
u64 cores_transitioning);
|
||||
|
||||
kbase_pm_ca_policy_flags flags;
|
||||
|
||||
/**
|
||||
* Field indicating an ID for this policy. This is not necessarily the
|
||||
* same as its index in the list returned by kbase_pm_list_policies().
|
||||
* It is used purely for debugging.
|
||||
*/
|
||||
enum kbase_pm_ca_policy_id id;
|
||||
};
|
||||
|
||||
#endif /* _KBASE_PM_HWACCESS_DEFS_H_ */
|
||||
|
||||
@@ -1,78 +0,0 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU licence.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, you can access it online at
|
||||
* http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* A simple demand based power management policy
|
||||
*/
|
||||
|
||||
#include <mali_kbase.h>
|
||||
#include <mali_kbase_pm.h>
|
||||
|
||||
static u64 demand_get_core_mask(struct kbase_device *kbdev)
|
||||
{
|
||||
u64 desired = kbdev->shader_needed_bitmap | kbdev->shader_inuse_bitmap;
|
||||
|
||||
if (0 == kbdev->pm.active_count)
|
||||
return 0;
|
||||
|
||||
return desired;
|
||||
}
|
||||
|
||||
static bool demand_get_core_active(struct kbase_device *kbdev)
|
||||
{
|
||||
if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
|
||||
kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt
|
||||
&& !kbdev->tiler_inuse_cnt)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void demand_init(struct kbase_device *kbdev)
|
||||
{
|
||||
CSTD_UNUSED(kbdev);
|
||||
}
|
||||
|
||||
static void demand_term(struct kbase_device *kbdev)
|
||||
{
|
||||
CSTD_UNUSED(kbdev);
|
||||
}
|
||||
|
||||
/*
|
||||
* The struct kbase_pm_policy structure for the demand power policy.
|
||||
*
|
||||
* This is the static structure that defines the demand power policy's callback
|
||||
* and name.
|
||||
*/
|
||||
const struct kbase_pm_policy kbase_pm_demand_policy_ops = {
|
||||
"demand", /* name */
|
||||
demand_init, /* init */
|
||||
demand_term, /* term */
|
||||
demand_get_core_mask, /* get_core_mask */
|
||||
demand_get_core_active, /* get_core_active */
|
||||
0u, /* flags */
|
||||
KBASE_PM_POLICY_ID_DEMAND, /* id */
|
||||
};
|
||||
|
||||
KBASE_EXPORT_TEST_API(kbase_pm_demand_policy_ops);
|
||||
@@ -1,69 +0,0 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU licence.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, you can access it online at
|
||||
* http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* A simple demand based power management policy
|
||||
*/
|
||||
|
||||
#ifndef MALI_KBASE_PM_DEMAND_H
|
||||
#define MALI_KBASE_PM_DEMAND_H
|
||||
|
||||
/**
|
||||
* DOC: Demand power management policy
|
||||
*
|
||||
* The demand power management policy has the following characteristics:
|
||||
* - When KBase indicates that the GPU will be powered up, but we don't yet
|
||||
* know which Job Chains are to be run:
|
||||
* - The Shader Cores are not powered up
|
||||
*
|
||||
* - When KBase indicates that a set of Shader Cores are needed to submit the
|
||||
* currently queued Job Chains:
|
||||
* - Only those Shader Cores are powered up
|
||||
*
|
||||
* - When KBase indicates that the GPU need not be powered:
|
||||
* - The Shader Cores are powered off, and the GPU itself is powered off too.
|
||||
*
|
||||
* Note:
|
||||
* - KBase indicates the GPU will be powered up when it has a User Process that
|
||||
* has just started to submit Job Chains.
|
||||
*
|
||||
* - KBase indicates the GPU need not be powered when all the Job Chains from
|
||||
* User Processes have finished, and it is waiting for a User Process to
|
||||
* submit some more Job Chains.
|
||||
*/
|
||||
|
||||
/**
|
||||
* struct kbasep_pm_policy_demand - Private structure for policy instance data
|
||||
*
|
||||
* @dummy: No state is needed, a dummy variable
|
||||
*
|
||||
* This contains data that is private to the demand power policy.
|
||||
*/
|
||||
struct kbasep_pm_policy_demand {
|
||||
int dummy;
|
||||
};
|
||||
|
||||
extern const struct kbase_pm_policy kbase_pm_demand_policy_ops;
|
||||
|
||||
#endif /* MALI_KBASE_PM_DEMAND_H */
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -163,7 +163,7 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev);
|
||||
* kbase_pm_disable_interrupts - Disable interrupts on the device.
|
||||
*
|
||||
* This prevents delivery of Power Management interrupts to the CPU so that
|
||||
* kbase_pm_check_transitions_nolock() will not be called from the IRQ handler
|
||||
* kbase_pm_update_state() will not be called from the IRQ handler
|
||||
* until kbase_pm_enable_interrupts() or kbase_pm_clock_on() is called.
|
||||
*
|
||||
* Interrupts are also disabled after a call to kbase_pm_clock_off().
|
||||
@@ -206,58 +206,43 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags);
|
||||
*/
|
||||
void kbase_pm_reset_done(struct kbase_device *kbdev);
|
||||
|
||||
|
||||
/**
|
||||
* kbase_pm_check_transitions_nolock - Check if there are any power transitions
|
||||
* to make, and if so start them.
|
||||
* kbase_pm_wait_for_desired_state - Wait for the desired power state to be
|
||||
* reached
|
||||
*
|
||||
* This function will check the desired_xx_state members of
|
||||
* struct kbase_pm_device_data and the actual status of the hardware to see if
|
||||
* any power transitions can be made at this time to make the hardware state
|
||||
* closer to the state desired by the power policy.
|
||||
*
|
||||
* The return value can be used to check whether all the desired cores are
|
||||
* available, and so whether it's worth submitting a job (e.g. from a Power
|
||||
* Management IRQ).
|
||||
*
|
||||
* Note that this still returns true when desired_xx_state has no
|
||||
* cores. That is: of the no cores desired, none were *un*available. In
|
||||
* this case, the caller may still need to try submitting jobs. This is because
|
||||
* the Core Availability Policy might have taken us to an intermediate state
|
||||
* where no cores are powered, before powering on more cores (e.g. for core
|
||||
* rotation)
|
||||
*
|
||||
* The caller must hold kbase_device.pm.power_change_lock
|
||||
*
|
||||
* @kbdev: The kbase device structure for the device (must be a valid pointer)
|
||||
*
|
||||
* Return: non-zero when all desired cores are available. That is,
|
||||
* it's worthwhile for the caller to submit a job.
|
||||
* false otherwise
|
||||
*/
|
||||
bool kbase_pm_check_transitions_nolock(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_pm_check_transitions_sync - Synchronous and locking variant of
|
||||
* kbase_pm_check_transitions_nolock()
|
||||
*
|
||||
* On returning, the desired state at the time of the call will have been met.
|
||||
*
|
||||
* There is nothing to stop the core being switched off by calls to
|
||||
* kbase_pm_release_cores() or kbase_pm_unrequest_cores(). Therefore, the
|
||||
* caller must have already made a call to
|
||||
* kbase_pm_request_cores()/kbase_pm_request_cores_sync() previously.
|
||||
* Wait for the L2 and shader power state machines to reach the states
|
||||
* corresponding to the values of 'l2_desired' and 'shaders_desired'.
|
||||
*
|
||||
* The usual use-case for this is to ensure cores are 'READY' after performing
|
||||
* a GPU Reset.
|
||||
*
|
||||
* Unlike kbase_pm_check_transitions_nolock(), the caller must not hold
|
||||
* kbase_device.pm.power_change_lock, because this function will take that
|
||||
* lock itself.
|
||||
* Unlike kbase_pm_update_state(), the caller must not hold hwaccess_lock,
|
||||
* because this function will take that lock itself.
|
||||
*
|
||||
* NOTE: This may not wait until the correct state is reached if there is a
|
||||
* power off in progress. To correctly wait for the desired state the caller
|
||||
* must ensure that this is not the case by, for example, calling
|
||||
* kbase_pm_wait_for_poweroff_complete()
|
||||
*
|
||||
* @kbdev: The kbase device structure for the device (must be a valid pointer)
|
||||
*/
|
||||
void kbase_pm_check_transitions_sync(struct kbase_device *kbdev);
|
||||
void kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_pm_wait_for_l2_powered - Wait for the L2 cache to be powered on
|
||||
*
|
||||
* Wait for the L2 to be powered on, and for the L2 and shader state machines to
|
||||
* stabilise by reaching the states corresponding to the values of 'l2_desired'
|
||||
* and 'shaders_desired'.
|
||||
*
|
||||
* kbdev->pm.active_count must be non-zero when calling this function.
|
||||
*
|
||||
* Unlike kbase_pm_update_state(), the caller must not hold hwaccess_lock,
|
||||
* because this function will take that lock itself.
|
||||
*
|
||||
* @kbdev: The kbase device structure for the device (must be a valid pointer)
|
||||
*/
|
||||
void kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_pm_update_cores_state_nolock - Variant of kbase_pm_update_cores_state()
|
||||
@@ -268,6 +253,25 @@ void kbase_pm_check_transitions_sync(struct kbase_device *kbdev);
|
||||
*/
|
||||
void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_pm_update_state - Update the L2 and shader power state machines
|
||||
* @kbdev: Device pointer
|
||||
*/
|
||||
void kbase_pm_update_state(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_pm_state_machine_init - Initialize the state machines, primarily the
|
||||
* shader poweroff timer
|
||||
* @kbdev: Device pointer
|
||||
*/
|
||||
int kbase_pm_state_machine_init(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_pm_state_machine_term - Clean up the PM state machines' data
|
||||
* @kbdev: Device pointer
|
||||
*/
|
||||
void kbase_pm_state_machine_term(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_pm_update_cores_state - Update the desired state of shader cores from
|
||||
* the Power Policy, and begin any power
|
||||
@@ -282,24 +286,6 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev);
|
||||
*/
|
||||
void kbase_pm_update_cores_state(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_pm_cancel_deferred_poweroff - Cancel any pending requests to power off
|
||||
* the GPU and/or shader cores.
|
||||
*
|
||||
* This should be called by any functions which directly power off the GPU.
|
||||
*
|
||||
* @kbdev: The kbase device structure for the device (must be a valid pointer)
|
||||
*/
|
||||
void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbasep_pm_init_core_use_bitmaps - Initialise data tracking the required
|
||||
* and used cores.
|
||||
*
|
||||
* @kbdev: The kbase device structure for the device (must be a valid pointer)
|
||||
*/
|
||||
void kbasep_pm_init_core_use_bitmaps(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbasep_pm_metrics_init - Initialize the metrics gathering framework.
|
||||
*
|
||||
@@ -565,4 +551,121 @@ void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev);
|
||||
*/
|
||||
void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev);
|
||||
|
||||
#ifdef CONFIG_MALI_BIFROST_DEVFREQ
|
||||
/**
|
||||
* kbase_devfreq_set_core_mask - Set devfreq core mask
|
||||
* @kbdev: Device pointer
|
||||
* @core_mask: New core mask
|
||||
*
|
||||
* This function is used by devfreq to change the available core mask as
|
||||
* required by Dynamic Core Scaling.
|
||||
*/
|
||||
void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* kbase_pm_reset_start_locked - Signal that GPU reset has started
|
||||
* @kbdev: Device pointer
|
||||
*
|
||||
* Normal power management operation will be suspended until the reset has
|
||||
* completed.
|
||||
*
|
||||
* Caller must hold hwaccess_lock.
|
||||
*/
|
||||
void kbase_pm_reset_start_locked(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_pm_reset_complete - Signal that GPU reset has completed
|
||||
* @kbdev: Device pointer
|
||||
*
|
||||
* Normal power management operation will be resumed. The power manager will
|
||||
* re-evaluate what cores are needed and power on or off as required.
|
||||
*/
|
||||
void kbase_pm_reset_complete(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_pm_protected_override_enable - Enable the protected mode override
|
||||
* @kbdev: Device pointer
|
||||
*
|
||||
* When the protected mode override is enabled, all shader cores are requested
|
||||
* to power down, and the L2 power state can be controlled by
|
||||
* kbase_pm_protected_l2_override().
|
||||
*
|
||||
* Caller must hold hwaccess_lock.
|
||||
*/
|
||||
void kbase_pm_protected_override_enable(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_pm_protected_override_disable - Disable the protected mode override
|
||||
* @kbdev: Device pointer
|
||||
*
|
||||
* Caller must hold hwaccess_lock.
|
||||
*/
|
||||
void kbase_pm_protected_override_disable(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_pm_protected_l2_override - Control the protected mode L2 override
|
||||
* @kbdev: Device pointer
|
||||
* @override: true to enable the override, false to disable
|
||||
*
|
||||
* When the driver is transitioning in or out of protected mode, the L2 cache is
|
||||
* forced to power off. This can be overridden to force the L2 cache to power
|
||||
* on. This is required to change coherency settings on some GPUs.
|
||||
*/
|
||||
void kbase_pm_protected_l2_override(struct kbase_device *kbdev, bool override);
|
||||
|
||||
/**
|
||||
* kbase_pm_protected_entry_override_enable - Enable the protected mode entry
|
||||
* override
|
||||
* @kbdev: Device pointer
|
||||
*
|
||||
* Initiate a GPU reset and enable the protected mode entry override flag if
|
||||
* l2_always_on WA is enabled and platform is fully coherent. If the GPU
|
||||
* reset is already ongoing then protected mode entry override flag will not
|
||||
* be enabled and function will have to be called again.
|
||||
*
|
||||
* When protected mode entry override flag is enabled to power down L2 via GPU
|
||||
* reset, the GPU reset handling behavior gets changed. For example call to
|
||||
* kbase_backend_reset() is skipped, Hw counters are not re-enabled and L2
|
||||
* isn't powered up again post reset.
|
||||
* This is needed only as a workaround for a Hw issue where explicit power down
|
||||
* of L2 causes a glitch. For entering protected mode on fully coherent
|
||||
* platforms L2 needs to be powered down to switch to IO coherency mode, so to
|
||||
* avoid the glitch GPU reset is used to power down L2. Hence, this function
|
||||
* does nothing on systems where the glitch issue isn't present.
|
||||
*
|
||||
* Caller must hold hwaccess_lock. Should be only called during the transition
|
||||
* to enter protected mode.
|
||||
*
|
||||
* Return: -EAGAIN if a GPU reset was required for the glitch workaround but
|
||||
* was already ongoing, otherwise 0.
|
||||
*/
|
||||
int kbase_pm_protected_entry_override_enable(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_pm_protected_entry_override_disable - Disable the protected mode entry
|
||||
* override
|
||||
* @kbdev: Device pointer
|
||||
*
|
||||
* This shall be called once L2 has powered down and switch to IO coherency
|
||||
* mode has been made. As with kbase_pm_protected_entry_override_enable(),
|
||||
* this function does nothing on systems where the glitch issue isn't present.
|
||||
*
|
||||
* Caller must hold hwaccess_lock. Should be only called during the transition
|
||||
* to enter protected mode.
|
||||
*/
|
||||
void kbase_pm_protected_entry_override_disable(struct kbase_device *kbdev);
|
||||
|
||||
/* If true, the driver should explicitly control corestack power management,
|
||||
* instead of relying on the Power Domain Controller.
|
||||
*/
|
||||
extern bool corestack_driver_control;
|
||||
|
||||
/* If true, disable powering-down of individual cores, and just power-down at
|
||||
* the top-level using platform-specific code.
|
||||
* If false, use the expected behaviour of controlling the individual cores
|
||||
* from within the driver.
|
||||
*/
|
||||
extern bool platform_power_down_only;
|
||||
|
||||
#endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2010, 2012-2013, 2015 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -20,10 +20,19 @@
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#ifndef _BASE_VENDOR_SPEC_FUNC_H_
|
||||
#define _BASE_VENDOR_SPEC_FUNC_H_
|
||||
|
||||
int kbase_get_vendor_specific_cpu_clock_speed(u32 * const);
|
||||
|
||||
#endif /*_BASE_VENDOR_SPEC_FUNC_H_*/
|
||||
/*
|
||||
* Backend-specific Power Manager level 2 cache state definitions.
|
||||
* The function-like macro KBASEP_L2_STATE() must be defined before including
|
||||
* this header file. This header file can be included multiple times in the
|
||||
* same compilation unit with different definitions of KBASEP_L2_STATE().
|
||||
*/
|
||||
KBASEP_L2_STATE(OFF)
|
||||
KBASEP_L2_STATE(PEND_ON)
|
||||
KBASEP_L2_STATE(RESTORE_CLOCKS)
|
||||
KBASEP_L2_STATE(ON_HWCNT_ENABLE)
|
||||
KBASEP_L2_STATE(ON)
|
||||
KBASEP_L2_STATE(ON_HWCNT_DISABLE)
|
||||
KBASEP_L2_STATE(SLOW_DOWN_CLOCKS)
|
||||
KBASEP_L2_STATE(POWER_DOWN)
|
||||
KBASEP_L2_STATE(PEND_OFF)
|
||||
KBASEP_L2_STATE(RESET_WAIT)
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -80,6 +80,7 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev)
|
||||
kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
|
||||
kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
|
||||
kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
|
||||
kbdev->pm.backend.metrics.active_gl_ctx[2] = 0;
|
||||
|
||||
kbdev->pm.backend.metrics.values.time_busy = 0;
|
||||
kbdev->pm.backend.metrics.values.time_idle = 0;
|
||||
@@ -90,19 +91,15 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev)
|
||||
spin_lock_init(&kbdev->pm.backend.metrics.lock);
|
||||
|
||||
#ifdef CONFIG_MALI_BIFROST_DVFS
|
||||
kbdev->pm.backend.metrics.timer_active = true;
|
||||
hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC,
|
||||
HRTIMER_MODE_REL);
|
||||
kbdev->pm.backend.metrics.timer.function = dvfs_callback;
|
||||
|
||||
hrtimer_start(&kbdev->pm.backend.metrics.timer,
|
||||
HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period),
|
||||
HRTIMER_MODE_REL);
|
||||
kbase_pm_metrics_start(kbdev);
|
||||
#endif /* CONFIG_MALI_BIFROST_DVFS */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init);
|
||||
|
||||
void kbasep_pm_metrics_term(struct kbase_device *kbdev)
|
||||
@@ -148,6 +145,8 @@ static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev,
|
||||
kbdev->pm.backend.metrics.values.busy_gl += ns_time;
|
||||
if (kbdev->pm.backend.metrics.active_gl_ctx[1])
|
||||
kbdev->pm.backend.metrics.values.busy_gl += ns_time;
|
||||
if (kbdev->pm.backend.metrics.active_gl_ctx[2])
|
||||
kbdev->pm.backend.metrics.values.busy_gl += ns_time;
|
||||
} else {
|
||||
kbdev->pm.backend.metrics.values.time_idle += (u32) (ktime_to_ns(diff)
|
||||
>> KBASE_PM_TIME_SHIFT);
|
||||
@@ -221,6 +220,29 @@ bool kbase_pm_metrics_is_active(struct kbase_device *kbdev)
|
||||
}
|
||||
KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active);
|
||||
|
||||
void kbase_pm_metrics_start(struct kbase_device *kbdev)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
|
||||
kbdev->pm.backend.metrics.timer_active = true;
|
||||
spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
|
||||
hrtimer_start(&kbdev->pm.backend.metrics.timer,
|
||||
HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period),
|
||||
HRTIMER_MODE_REL);
|
||||
}
|
||||
|
||||
void kbase_pm_metrics_stop(struct kbase_device *kbdev)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
|
||||
kbdev->pm.backend.metrics.timer_active = false;
|
||||
spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
|
||||
hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
|
||||
}
|
||||
|
||||
|
||||
#endif /* CONFIG_MALI_BIFROST_DVFS */
|
||||
|
||||
/**
|
||||
@@ -238,6 +260,7 @@ static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev)
|
||||
|
||||
kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
|
||||
kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
|
||||
kbdev->pm.backend.metrics.active_gl_ctx[2] = 0;
|
||||
kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
|
||||
kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
|
||||
kbdev->pm.backend.metrics.gpu_active = false;
|
||||
@@ -260,11 +283,7 @@ static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev)
|
||||
kbdev->pm.backend.metrics.
|
||||
active_cl_ctx[device_nr] = 1;
|
||||
} else {
|
||||
/* Slot 2 should not be running non-compute
|
||||
* atoms */
|
||||
if (!WARN_ON(js >= 2))
|
||||
kbdev->pm.backend.metrics.
|
||||
active_gl_ctx[js] = 1;
|
||||
kbdev->pm.backend.metrics.active_gl_ctx[js] = 1;
|
||||
}
|
||||
kbdev->pm.backend.metrics.gpu_active = true;
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -27,288 +27,49 @@
|
||||
#include <mali_kbase.h>
|
||||
#include <mali_midg_regmap.h>
|
||||
#include <mali_kbase_pm.h>
|
||||
#include <mali_kbase_config_defaults.h>
|
||||
#include <backend/gpu/mali_kbase_pm_internal.h>
|
||||
|
||||
static const struct kbase_pm_policy *const policy_list[] = {
|
||||
static const struct kbase_pm_policy *const all_policy_list[] = {
|
||||
#ifdef CONFIG_MALI_BIFROST_NO_MALI
|
||||
&kbase_pm_always_on_policy_ops,
|
||||
&kbase_pm_demand_policy_ops,
|
||||
&kbase_pm_coarse_demand_policy_ops,
|
||||
#if !MALI_CUSTOMER_RELEASE
|
||||
&kbase_pm_demand_always_powered_policy_ops,
|
||||
&kbase_pm_fast_start_policy_ops,
|
||||
&kbase_pm_always_on_demand_policy_ops,
|
||||
#endif
|
||||
#else /* CONFIG_MALI_BIFROST_NO_MALI */
|
||||
#if !PLATFORM_POWER_DOWN_ONLY
|
||||
&kbase_pm_demand_policy_ops,
|
||||
#endif /* !PLATFORM_POWER_DOWN_ONLY */
|
||||
&kbase_pm_coarse_demand_policy_ops,
|
||||
&kbase_pm_always_on_policy_ops,
|
||||
#if !MALI_CUSTOMER_RELEASE
|
||||
#if !PLATFORM_POWER_DOWN_ONLY
|
||||
&kbase_pm_demand_always_powered_policy_ops,
|
||||
&kbase_pm_fast_start_policy_ops,
|
||||
#endif /* !PLATFORM_POWER_DOWN_ONLY */
|
||||
&kbase_pm_always_on_demand_policy_ops,
|
||||
#endif
|
||||
&kbase_pm_always_on_policy_ops
|
||||
#endif /* CONFIG_MALI_BIFROST_NO_MALI */
|
||||
};
|
||||
|
||||
/* The number of policies available in the system.
|
||||
* This is derived from the number of functions listed in policy_get_functions.
|
||||
*/
|
||||
#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
|
||||
|
||||
|
||||
/* Function IDs for looking up Timeline Trace codes in
|
||||
* kbase_pm_change_state_trace_code */
|
||||
enum kbase_pm_func_id {
|
||||
KBASE_PM_FUNC_ID_REQUEST_CORES_START,
|
||||
KBASE_PM_FUNC_ID_REQUEST_CORES_END,
|
||||
KBASE_PM_FUNC_ID_RELEASE_CORES_START,
|
||||
KBASE_PM_FUNC_ID_RELEASE_CORES_END,
|
||||
/* Note: kbase_pm_unrequest_cores() is on the slow path, and we neither
|
||||
* expect to hit it nor tend to hit it very much anyway. We can detect
|
||||
* whether we need more instrumentation by a difference between
|
||||
* PM_CHECKTRANS events and PM_SEND/HANDLE_EVENT. */
|
||||
|
||||
/* Must be the last */
|
||||
KBASE_PM_FUNC_ID_COUNT
|
||||
};
|
||||
|
||||
|
||||
/* State changes during request/unrequest/release-ing cores */
|
||||
enum {
|
||||
KBASE_PM_CHANGE_STATE_SHADER = (1u << 0),
|
||||
KBASE_PM_CHANGE_STATE_TILER = (1u << 1),
|
||||
|
||||
/* These two must be last */
|
||||
KBASE_PM_CHANGE_STATE_MASK = (KBASE_PM_CHANGE_STATE_TILER |
|
||||
KBASE_PM_CHANGE_STATE_SHADER),
|
||||
KBASE_PM_CHANGE_STATE_COUNT = KBASE_PM_CHANGE_STATE_MASK + 1
|
||||
};
|
||||
typedef u32 kbase_pm_change_state;
|
||||
|
||||
|
||||
#ifdef CONFIG_MALI_BIFROST_TRACE_TIMELINE
|
||||
/* Timeline Trace code lookups for each function */
|
||||
static u32 kbase_pm_change_state_trace_code[KBASE_PM_FUNC_ID_COUNT]
|
||||
[KBASE_PM_CHANGE_STATE_COUNT] = {
|
||||
/* kbase_pm_request_cores */
|
||||
[KBASE_PM_FUNC_ID_REQUEST_CORES_START][0] = 0,
|
||||
[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
|
||||
SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START,
|
||||
[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
|
||||
SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START,
|
||||
[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
|
||||
KBASE_PM_CHANGE_STATE_TILER] =
|
||||
SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START,
|
||||
|
||||
[KBASE_PM_FUNC_ID_REQUEST_CORES_END][0] = 0,
|
||||
[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
|
||||
SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END,
|
||||
[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
|
||||
SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END,
|
||||
[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
|
||||
KBASE_PM_CHANGE_STATE_TILER] =
|
||||
SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END,
|
||||
|
||||
/* kbase_pm_release_cores */
|
||||
[KBASE_PM_FUNC_ID_RELEASE_CORES_START][0] = 0,
|
||||
[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
|
||||
SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START,
|
||||
[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
|
||||
SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START,
|
||||
[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
|
||||
KBASE_PM_CHANGE_STATE_TILER] =
|
||||
SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START,
|
||||
|
||||
[KBASE_PM_FUNC_ID_RELEASE_CORES_END][0] = 0,
|
||||
[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
|
||||
SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END,
|
||||
[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
|
||||
SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END,
|
||||
[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
|
||||
KBASE_PM_CHANGE_STATE_TILER] =
|
||||
SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END
|
||||
};
|
||||
|
||||
static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
|
||||
enum kbase_pm_func_id func_id,
|
||||
kbase_pm_change_state state)
|
||||
static void generate_filtered_policy_list(struct kbase_device *kbdev)
|
||||
{
|
||||
int trace_code;
|
||||
size_t i;
|
||||
|
||||
KBASE_DEBUG_ASSERT(func_id >= 0 && func_id < KBASE_PM_FUNC_ID_COUNT);
|
||||
KBASE_DEBUG_ASSERT(state != 0 && (state & KBASE_PM_CHANGE_STATE_MASK) ==
|
||||
state);
|
||||
for (i = 0; i < ARRAY_SIZE(all_policy_list); ++i) {
|
||||
const struct kbase_pm_policy *pol = all_policy_list[i];
|
||||
|
||||
trace_code = kbase_pm_change_state_trace_code[func_id][state];
|
||||
KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code);
|
||||
}
|
||||
BUILD_BUG_ON(ARRAY_SIZE(all_policy_list) >
|
||||
KBASE_PM_MAX_NUM_POLICIES);
|
||||
if (platform_power_down_only &&
|
||||
(pol->flags & KBASE_PM_POLICY_FLAG_DISABLED_WITH_POWER_DOWN_ONLY))
|
||||
continue;
|
||||
|
||||
#else /* CONFIG_MALI_BIFROST_TRACE_TIMELINE */
|
||||
static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
|
||||
enum kbase_pm_func_id func_id, kbase_pm_change_state state)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* CONFIG_MALI_BIFROST_TRACE_TIMELINE */
|
||||
|
||||
/**
|
||||
* kbasep_pm_do_poweroff_cores - Process a poweroff request and power down any
|
||||
* requested shader cores
|
||||
* @kbdev: Device pointer
|
||||
*/
|
||||
static void kbasep_pm_do_poweroff_cores(struct kbase_device *kbdev)
|
||||
{
|
||||
u64 prev_shader_state = kbdev->pm.backend.desired_shader_state;
|
||||
u64 prev_tiler_state = kbdev->pm.backend.desired_tiler_state;
|
||||
|
||||
lockdep_assert_held(&kbdev->hwaccess_lock);
|
||||
|
||||
kbdev->pm.backend.desired_shader_state &=
|
||||
~kbdev->pm.backend.shader_poweroff_pending;
|
||||
kbdev->pm.backend.desired_tiler_state &=
|
||||
~kbdev->pm.backend.tiler_poweroff_pending;
|
||||
|
||||
kbdev->pm.backend.shader_poweroff_pending = 0;
|
||||
kbdev->pm.backend.tiler_poweroff_pending = 0;
|
||||
|
||||
if (prev_shader_state != kbdev->pm.backend.desired_shader_state ||
|
||||
prev_tiler_state !=
|
||||
kbdev->pm.backend.desired_tiler_state ||
|
||||
kbdev->pm.backend.ca_in_transition) {
|
||||
bool cores_are_available;
|
||||
|
||||
KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
|
||||
SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START);
|
||||
cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
|
||||
KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
|
||||
SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END);
|
||||
|
||||
/* Don't need 'cores_are_available',
|
||||
* because we don't return anything */
|
||||
CSTD_UNUSED(cores_are_available);
|
||||
kbdev->policy_list[kbdev->policy_count++] = pol;
|
||||
}
|
||||
}
|
||||
|
||||
static enum hrtimer_restart
|
||||
kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer)
|
||||
{
|
||||
struct kbase_device *kbdev;
|
||||
unsigned long flags;
|
||||
|
||||
kbdev = container_of(timer, struct kbase_device,
|
||||
pm.backend.gpu_poweroff_timer);
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
/* It is safe for this call to do nothing if the work item is already
|
||||
* queued. The worker function will read the must up-to-date state of
|
||||
* kbdev->pm.backend.gpu_poweroff_pending under lock.
|
||||
*
|
||||
* If a state change occurs while the worker function is processing,
|
||||
* this call will succeed as a work item can be requeued once it has
|
||||
* started processing.
|
||||
*/
|
||||
if (kbdev->pm.backend.gpu_poweroff_pending)
|
||||
queue_work(kbdev->pm.backend.gpu_poweroff_wq,
|
||||
&kbdev->pm.backend.gpu_poweroff_work);
|
||||
|
||||
if (kbdev->pm.backend.shader_poweroff_pending ||
|
||||
kbdev->pm.backend.tiler_poweroff_pending) {
|
||||
kbdev->pm.backend.shader_poweroff_pending_time--;
|
||||
|
||||
KBASE_DEBUG_ASSERT(
|
||||
kbdev->pm.backend.shader_poweroff_pending_time
|
||||
>= 0);
|
||||
|
||||
if (!kbdev->pm.backend.shader_poweroff_pending_time)
|
||||
kbasep_pm_do_poweroff_cores(kbdev);
|
||||
}
|
||||
|
||||
if (kbdev->pm.backend.poweroff_timer_needed) {
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time);
|
||||
|
||||
return HRTIMER_RESTART;
|
||||
}
|
||||
|
||||
kbdev->pm.backend.poweroff_timer_running = false;
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
return HRTIMER_NORESTART;
|
||||
}
|
||||
|
||||
static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct kbase_device *kbdev;
|
||||
bool do_poweroff = false;
|
||||
|
||||
kbdev = container_of(data, struct kbase_device,
|
||||
pm.backend.gpu_poweroff_work);
|
||||
|
||||
mutex_lock(&kbdev->pm.lock);
|
||||
|
||||
if (kbdev->pm.backend.gpu_poweroff_pending == 0) {
|
||||
mutex_unlock(&kbdev->pm.lock);
|
||||
return;
|
||||
}
|
||||
|
||||
kbdev->pm.backend.gpu_poweroff_pending--;
|
||||
|
||||
if (kbdev->pm.backend.gpu_poweroff_pending > 0) {
|
||||
mutex_unlock(&kbdev->pm.lock);
|
||||
return;
|
||||
}
|
||||
|
||||
KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_poweroff_pending == 0);
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
/* Only power off the GPU if a request is still pending */
|
||||
if (!kbdev->pm.backend.pm_current_policy->get_core_active(kbdev))
|
||||
do_poweroff = true;
|
||||
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
if (do_poweroff) {
|
||||
kbdev->pm.backend.poweroff_timer_needed = false;
|
||||
hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
|
||||
kbdev->pm.backend.poweroff_timer_running = false;
|
||||
|
||||
/* Power off the GPU */
|
||||
kbase_pm_do_poweroff(kbdev, false);
|
||||
}
|
||||
|
||||
mutex_unlock(&kbdev->pm.lock);
|
||||
}
|
||||
|
||||
int kbase_pm_policy_init(struct kbase_device *kbdev)
|
||||
{
|
||||
struct workqueue_struct *wq;
|
||||
generate_filtered_policy_list(kbdev);
|
||||
if (kbdev->policy_count == 0)
|
||||
return -EINVAL;
|
||||
|
||||
wq = alloc_workqueue("kbase_pm_do_poweroff",
|
||||
WQ_HIGHPRI | WQ_UNBOUND, 1);
|
||||
if (!wq)
|
||||
return -ENOMEM;
|
||||
|
||||
kbdev->pm.backend.gpu_poweroff_wq = wq;
|
||||
INIT_WORK(&kbdev->pm.backend.gpu_poweroff_work,
|
||||
kbasep_pm_do_gpu_poweroff_wq);
|
||||
hrtimer_init(&kbdev->pm.backend.gpu_poweroff_timer,
|
||||
CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||
kbdev->pm.backend.gpu_poweroff_timer.function =
|
||||
kbasep_pm_do_gpu_poweroff_callback;
|
||||
kbdev->pm.backend.pm_current_policy = policy_list[0];
|
||||
kbdev->pm.backend.pm_current_policy = kbdev->policy_list[0];
|
||||
kbdev->pm.backend.pm_current_policy->init(kbdev);
|
||||
kbdev->pm.gpu_poweroff_time =
|
||||
HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS);
|
||||
kbdev->pm.poweroff_shader_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER;
|
||||
kbdev->pm.poweroff_gpu_ticks = DEFAULT_PM_POWEROFF_TICK_GPU;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -316,29 +77,6 @@ int kbase_pm_policy_init(struct kbase_device *kbdev)
|
||||
void kbase_pm_policy_term(struct kbase_device *kbdev)
|
||||
{
|
||||
kbdev->pm.backend.pm_current_policy->term(kbdev);
|
||||
destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wq);
|
||||
}
|
||||
|
||||
void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
lockdep_assert_held(&kbdev->pm.lock);
|
||||
|
||||
kbdev->pm.backend.poweroff_timer_needed = false;
|
||||
hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
kbdev->pm.backend.poweroff_timer_running = false;
|
||||
|
||||
/* If wq is already running but is held off by pm.lock, make sure it has
|
||||
* no effect */
|
||||
kbdev->pm.backend.gpu_poweroff_pending = 0;
|
||||
|
||||
kbdev->pm.backend.shader_poweroff_pending = 0;
|
||||
kbdev->pm.backend.tiler_poweroff_pending = 0;
|
||||
kbdev->pm.backend.shader_poweroff_pending_time = 0;
|
||||
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
}
|
||||
|
||||
void kbase_pm_update_active(struct kbase_device *kbdev)
|
||||
@@ -356,37 +94,29 @@ void kbase_pm_update_active(struct kbase_device *kbdev)
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
active = backend->pm_current_policy->get_core_active(kbdev);
|
||||
WARN((kbase_pm_is_active(kbdev) && !active),
|
||||
"GPU is active but policy '%s' is indicating that it can be powered off",
|
||||
kbdev->pm.backend.pm_current_policy->name);
|
||||
|
||||
if (active) {
|
||||
if (backend->gpu_poweroff_pending) {
|
||||
/* Cancel any pending power off request */
|
||||
backend->gpu_poweroff_pending = 0;
|
||||
|
||||
/* If a request was pending then the GPU was still
|
||||
* powered, so no need to continue */
|
||||
if (!kbdev->poweroff_pending) {
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock,
|
||||
flags);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (!backend->poweroff_timer_running && !backend->gpu_powered &&
|
||||
(pm->poweroff_gpu_ticks ||
|
||||
pm->poweroff_shader_ticks)) {
|
||||
backend->poweroff_timer_needed = true;
|
||||
backend->poweroff_timer_running = true;
|
||||
hrtimer_start(&backend->gpu_poweroff_timer,
|
||||
pm->gpu_poweroff_time,
|
||||
HRTIMER_MODE_REL);
|
||||
}
|
||||
|
||||
/* Power on the GPU and any cores requested by the policy */
|
||||
if (pm->backend.poweroff_wait_in_progress) {
|
||||
if (!pm->backend.invoke_poweroff_wait_wq_when_l2_off &&
|
||||
pm->backend.poweroff_wait_in_progress) {
|
||||
KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
|
||||
pm->backend.poweron_required = true;
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
} else {
|
||||
/* Cancel the the invocation of
|
||||
* kbase_pm_gpu_poweroff_wait_wq() from the L2 state
|
||||
* machine. This is safe - it
|
||||
* invoke_poweroff_wait_wq_when_l2_off is true, then
|
||||
* the poweroff work hasn't even been queued yet,
|
||||
* meaning we can go straight to powering on.
|
||||
*/
|
||||
pm->backend.invoke_poweroff_wait_wq_when_l2_off = false;
|
||||
pm->backend.poweroff_wait_in_progress = false;
|
||||
pm->backend.l2_desired = true;
|
||||
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
kbase_pm_do_poweron(kbdev, false);
|
||||
}
|
||||
@@ -395,41 +125,12 @@ void kbase_pm_update_active(struct kbase_device *kbdev)
|
||||
* when there are contexts active */
|
||||
KBASE_DEBUG_ASSERT(pm->active_count == 0);
|
||||
|
||||
if (backend->shader_poweroff_pending ||
|
||||
backend->tiler_poweroff_pending) {
|
||||
backend->shader_poweroff_pending = 0;
|
||||
backend->tiler_poweroff_pending = 0;
|
||||
backend->shader_poweroff_pending_time = 0;
|
||||
}
|
||||
|
||||
/* Request power off */
|
||||
if (pm->backend.gpu_powered) {
|
||||
if (pm->poweroff_gpu_ticks) {
|
||||
backend->gpu_poweroff_pending =
|
||||
pm->poweroff_gpu_ticks;
|
||||
backend->poweroff_timer_needed = true;
|
||||
if (!backend->poweroff_timer_running) {
|
||||
/* Start timer if not running (eg if
|
||||
* power policy has been changed from
|
||||
* always_on to something else). This
|
||||
* will ensure the GPU is actually
|
||||
* powered off */
|
||||
backend->poweroff_timer_running
|
||||
= true;
|
||||
hrtimer_start(
|
||||
&backend->gpu_poweroff_timer,
|
||||
pm->gpu_poweroff_time,
|
||||
HRTIMER_MODE_REL);
|
||||
}
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock,
|
||||
flags);
|
||||
} else {
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock,
|
||||
flags);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
/* Power off the GPU immediately */
|
||||
kbase_pm_do_poweroff(kbdev, false);
|
||||
}
|
||||
/* Power off the GPU immediately */
|
||||
kbase_pm_do_poweroff(kbdev, false);
|
||||
} else {
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
}
|
||||
@@ -438,10 +139,7 @@ void kbase_pm_update_active(struct kbase_device *kbdev)
|
||||
|
||||
void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
|
||||
{
|
||||
u64 desired_bitmap;
|
||||
u64 desired_tiler_bitmap;
|
||||
bool cores_are_available;
|
||||
bool do_poweroff = false;
|
||||
bool shaders_desired;
|
||||
|
||||
lockdep_assert_held(&kbdev->hwaccess_lock);
|
||||
|
||||
@@ -450,116 +148,20 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
|
||||
if (kbdev->pm.backend.poweroff_wait_in_progress)
|
||||
return;
|
||||
|
||||
if (kbdev->protected_mode_transition && !kbdev->shader_needed_bitmap &&
|
||||
!kbdev->shader_inuse_bitmap && !kbdev->tiler_needed_cnt
|
||||
&& !kbdev->tiler_inuse_cnt) {
|
||||
if (kbdev->pm.backend.protected_transition_override)
|
||||
/* We are trying to change in/out of protected mode - force all
|
||||
* cores off so that the L2 powers down */
|
||||
desired_bitmap = 0;
|
||||
desired_tiler_bitmap = 0;
|
||||
} else {
|
||||
desired_bitmap =
|
||||
kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev);
|
||||
desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev);
|
||||
shaders_desired = false;
|
||||
else
|
||||
shaders_desired = kbdev->pm.backend.pm_current_policy->shaders_needed(kbdev);
|
||||
|
||||
if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0)
|
||||
desired_tiler_bitmap = 1;
|
||||
else
|
||||
desired_tiler_bitmap = 0;
|
||||
|
||||
if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) {
|
||||
/* Unless XAFFINITY is supported, enable core 0 if tiler
|
||||
* required, regardless of core availability */
|
||||
if (kbdev->tiler_needed_cnt > 0 ||
|
||||
kbdev->tiler_inuse_cnt > 0)
|
||||
desired_bitmap |= 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (kbdev->pm.backend.desired_shader_state != desired_bitmap)
|
||||
if (kbdev->pm.backend.shaders_desired != shaders_desired) {
|
||||
KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u,
|
||||
(u32)desired_bitmap);
|
||||
/* Are any cores being powered on? */
|
||||
if (~kbdev->pm.backend.desired_shader_state & desired_bitmap ||
|
||||
~kbdev->pm.backend.desired_tiler_state & desired_tiler_bitmap ||
|
||||
kbdev->pm.backend.ca_in_transition) {
|
||||
/* Check if we are powering off any cores before updating shader
|
||||
* state */
|
||||
if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap ||
|
||||
kbdev->pm.backend.desired_tiler_state &
|
||||
~desired_tiler_bitmap) {
|
||||
/* Start timer to power off cores */
|
||||
kbdev->pm.backend.shader_poweroff_pending |=
|
||||
(kbdev->pm.backend.desired_shader_state &
|
||||
~desired_bitmap);
|
||||
kbdev->pm.backend.tiler_poweroff_pending |=
|
||||
(kbdev->pm.backend.desired_tiler_state &
|
||||
~desired_tiler_bitmap);
|
||||
(u32)kbdev->pm.backend.shaders_desired);
|
||||
|
||||
if (kbdev->pm.poweroff_shader_ticks &&
|
||||
!kbdev->protected_mode_transition)
|
||||
kbdev->pm.backend.shader_poweroff_pending_time =
|
||||
kbdev->pm.poweroff_shader_ticks;
|
||||
else
|
||||
do_poweroff = true;
|
||||
}
|
||||
|
||||
kbdev->pm.backend.desired_shader_state = desired_bitmap;
|
||||
kbdev->pm.backend.desired_tiler_state = desired_tiler_bitmap;
|
||||
|
||||
/* If any cores are being powered on, transition immediately */
|
||||
cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
|
||||
} else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap ||
|
||||
kbdev->pm.backend.desired_tiler_state &
|
||||
~desired_tiler_bitmap) {
|
||||
/* Start timer to power off cores */
|
||||
kbdev->pm.backend.shader_poweroff_pending |=
|
||||
(kbdev->pm.backend.desired_shader_state &
|
||||
~desired_bitmap);
|
||||
kbdev->pm.backend.tiler_poweroff_pending |=
|
||||
(kbdev->pm.backend.desired_tiler_state &
|
||||
~desired_tiler_bitmap);
|
||||
if (kbdev->pm.poweroff_shader_ticks &&
|
||||
!kbdev->protected_mode_transition)
|
||||
kbdev->pm.backend.shader_poweroff_pending_time =
|
||||
kbdev->pm.poweroff_shader_ticks;
|
||||
else
|
||||
kbasep_pm_do_poweroff_cores(kbdev);
|
||||
} else if (kbdev->pm.active_count == 0 && desired_bitmap != 0 &&
|
||||
desired_tiler_bitmap != 0 &&
|
||||
kbdev->pm.backend.poweroff_timer_needed) {
|
||||
/* If power policy is keeping cores on despite there being no
|
||||
* active contexts then disable poweroff timer as it isn't
|
||||
* required.
|
||||
* Only reset poweroff_timer_needed if we're not in the middle
|
||||
* of the power off callback */
|
||||
kbdev->pm.backend.poweroff_timer_needed = false;
|
||||
kbdev->pm.backend.shaders_desired = shaders_desired;
|
||||
kbase_pm_update_state(kbdev);
|
||||
}
|
||||
|
||||
/* Ensure timer does not power off wanted cores and make sure to power
|
||||
* off unwanted cores */
|
||||
if (kbdev->pm.backend.shader_poweroff_pending ||
|
||||
kbdev->pm.backend.tiler_poweroff_pending) {
|
||||
kbdev->pm.backend.shader_poweroff_pending &=
|
||||
~(kbdev->pm.backend.desired_shader_state &
|
||||
desired_bitmap);
|
||||
kbdev->pm.backend.tiler_poweroff_pending &=
|
||||
~(kbdev->pm.backend.desired_tiler_state &
|
||||
desired_tiler_bitmap);
|
||||
|
||||
if (!kbdev->pm.backend.shader_poweroff_pending &&
|
||||
!kbdev->pm.backend.tiler_poweroff_pending)
|
||||
kbdev->pm.backend.shader_poweroff_pending_time = 0;
|
||||
}
|
||||
|
||||
/* Shader poweroff is deferred to the end of the function, to eliminate
|
||||
* issues caused by the core availability policy recursing into this
|
||||
* function */
|
||||
if (do_poweroff)
|
||||
kbasep_pm_do_poweroff_cores(kbdev);
|
||||
|
||||
/* Don't need 'cores_are_available', because we don't return anything */
|
||||
CSTD_UNUSED(cores_are_available);
|
||||
}
|
||||
|
||||
void kbase_pm_update_cores_state(struct kbase_device *kbdev)
|
||||
@@ -573,14 +175,14 @@ void kbase_pm_update_cores_state(struct kbase_device *kbdev)
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
}
|
||||
|
||||
int kbase_pm_list_policies(const struct kbase_pm_policy * const **list)
|
||||
int kbase_pm_list_policies(struct kbase_device *kbdev,
|
||||
const struct kbase_pm_policy * const **list)
|
||||
{
|
||||
if (!list)
|
||||
return POLICY_COUNT;
|
||||
WARN_ON(kbdev->policy_count == 0);
|
||||
if (list)
|
||||
*list = kbdev->policy_list;
|
||||
|
||||
*list = policy_list;
|
||||
|
||||
return POLICY_COUNT;
|
||||
return kbdev->policy_count;
|
||||
}
|
||||
|
||||
KBASE_EXPORT_TEST_API(kbase_pm_list_policies);
|
||||
@@ -649,336 +251,3 @@ void kbase_pm_set_policy(struct kbase_device *kbdev,
|
||||
}
|
||||
|
||||
KBASE_EXPORT_TEST_API(kbase_pm_set_policy);
|
||||
|
||||
/* Check whether a state change has finished, and trace it as completed */
|
||||
static void
|
||||
kbase_pm_trace_check_and_finish_state_change(struct kbase_device *kbdev)
|
||||
{
|
||||
if ((kbdev->shader_available_bitmap &
|
||||
kbdev->pm.backend.desired_shader_state)
|
||||
== kbdev->pm.backend.desired_shader_state &&
|
||||
(kbdev->tiler_available_bitmap &
|
||||
kbdev->pm.backend.desired_tiler_state)
|
||||
== kbdev->pm.backend.desired_tiler_state)
|
||||
kbase_timeline_pm_check_handle_event(kbdev,
|
||||
KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
|
||||
}
|
||||
|
||||
void kbase_pm_request_cores(struct kbase_device *kbdev,
|
||||
bool tiler_required, u64 shader_cores)
|
||||
{
|
||||
u64 cores;
|
||||
|
||||
kbase_pm_change_state change_gpu_state = 0u;
|
||||
|
||||
KBASE_DEBUG_ASSERT(kbdev != NULL);
|
||||
|
||||
lockdep_assert_held(&kbdev->hwaccess_lock);
|
||||
|
||||
cores = shader_cores;
|
||||
while (cores) {
|
||||
int bitnum = fls64(cores) - 1;
|
||||
u64 bit = 1ULL << bitnum;
|
||||
|
||||
/* It should be almost impossible for this to overflow. It would
|
||||
* require 2^32 atoms to request a particular core, which would
|
||||
* require 2^24 contexts to submit. This would require an amount
|
||||
* of memory that is impossible on a 32-bit system and extremely
|
||||
* unlikely on a 64-bit system. */
|
||||
int cnt = ++kbdev->shader_needed_cnt[bitnum];
|
||||
|
||||
if (1 == cnt) {
|
||||
kbdev->shader_needed_bitmap |= bit;
|
||||
change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
|
||||
}
|
||||
|
||||
cores &= ~bit;
|
||||
}
|
||||
|
||||
if (tiler_required) {
|
||||
int cnt = ++kbdev->tiler_needed_cnt;
|
||||
|
||||
if (1 == cnt)
|
||||
change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
|
||||
|
||||
KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt != 0);
|
||||
}
|
||||
|
||||
if (change_gpu_state) {
|
||||
KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_SHADER_NEEDED, NULL,
|
||||
NULL, 0u, (u32) kbdev->shader_needed_bitmap);
|
||||
|
||||
kbase_timeline_pm_cores_func(kbdev,
|
||||
KBASE_PM_FUNC_ID_REQUEST_CORES_START,
|
||||
change_gpu_state);
|
||||
kbase_pm_update_cores_state_nolock(kbdev);
|
||||
kbase_timeline_pm_cores_func(kbdev,
|
||||
KBASE_PM_FUNC_ID_REQUEST_CORES_END,
|
||||
change_gpu_state);
|
||||
}
|
||||
}
|
||||
|
||||
KBASE_EXPORT_TEST_API(kbase_pm_request_cores);
|
||||
|
||||
void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
|
||||
bool tiler_required, u64 shader_cores)
|
||||
{
|
||||
kbase_pm_change_state change_gpu_state = 0u;
|
||||
|
||||
KBASE_DEBUG_ASSERT(kbdev != NULL);
|
||||
|
||||
lockdep_assert_held(&kbdev->hwaccess_lock);
|
||||
|
||||
while (shader_cores) {
|
||||
int bitnum = fls64(shader_cores) - 1;
|
||||
u64 bit = 1ULL << bitnum;
|
||||
int cnt;
|
||||
|
||||
KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
|
||||
|
||||
cnt = --kbdev->shader_needed_cnt[bitnum];
|
||||
|
||||
if (0 == cnt) {
|
||||
kbdev->shader_needed_bitmap &= ~bit;
|
||||
|
||||
change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
|
||||
}
|
||||
|
||||
shader_cores &= ~bit;
|
||||
}
|
||||
|
||||
if (tiler_required) {
|
||||
int cnt;
|
||||
|
||||
KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
|
||||
|
||||
cnt = --kbdev->tiler_needed_cnt;
|
||||
|
||||
if (0 == cnt)
|
||||
change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
|
||||
}
|
||||
|
||||
if (change_gpu_state) {
|
||||
KBASE_TRACE_ADD(kbdev, PM_UNREQUEST_CHANGE_SHADER_NEEDED, NULL,
|
||||
NULL, 0u, (u32) kbdev->shader_needed_bitmap);
|
||||
|
||||
kbase_pm_update_cores_state_nolock(kbdev);
|
||||
|
||||
/* Trace that any state change effectively completes immediately
|
||||
* - no-one will wait on the state change */
|
||||
kbase_pm_trace_check_and_finish_state_change(kbdev);
|
||||
}
|
||||
}
|
||||
|
||||
KBASE_EXPORT_TEST_API(kbase_pm_unrequest_cores);
|
||||
|
||||
enum kbase_pm_cores_ready
|
||||
kbase_pm_register_inuse_cores(struct kbase_device *kbdev,
|
||||
bool tiler_required, u64 shader_cores)
|
||||
{
|
||||
u64 prev_shader_needed; /* Just for tracing */
|
||||
u64 prev_shader_inuse; /* Just for tracing */
|
||||
|
||||
lockdep_assert_held(&kbdev->hwaccess_lock);
|
||||
|
||||
prev_shader_needed = kbdev->shader_needed_bitmap;
|
||||
prev_shader_inuse = kbdev->shader_inuse_bitmap;
|
||||
|
||||
/* If desired_shader_state does not contain the requested cores, then
|
||||
* power management is not attempting to powering those cores (most
|
||||
* likely due to core availability policy) and a new job affinity must
|
||||
* be chosen */
|
||||
if ((kbdev->pm.backend.desired_shader_state & shader_cores) !=
|
||||
shader_cores) {
|
||||
return (kbdev->pm.backend.poweroff_wait_in_progress ||
|
||||
kbdev->pm.backend.pm_current_policy == NULL) ?
|
||||
KBASE_CORES_NOT_READY : KBASE_NEW_AFFINITY;
|
||||
}
|
||||
|
||||
if ((kbdev->shader_available_bitmap & shader_cores) != shader_cores ||
|
||||
(tiler_required && !kbdev->tiler_available_bitmap)) {
|
||||
/* Trace ongoing core transition */
|
||||
kbase_timeline_pm_l2_transition_start(kbdev);
|
||||
return KBASE_CORES_NOT_READY;
|
||||
}
|
||||
|
||||
/* If we started to trace a state change, then trace it has being
|
||||
* finished by now, at the very latest */
|
||||
kbase_pm_trace_check_and_finish_state_change(kbdev);
|
||||
/* Trace core transition done */
|
||||
kbase_timeline_pm_l2_transition_done(kbdev);
|
||||
|
||||
while (shader_cores) {
|
||||
int bitnum = fls64(shader_cores) - 1;
|
||||
u64 bit = 1ULL << bitnum;
|
||||
int cnt;
|
||||
|
||||
KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
|
||||
|
||||
cnt = --kbdev->shader_needed_cnt[bitnum];
|
||||
|
||||
if (0 == cnt)
|
||||
kbdev->shader_needed_bitmap &= ~bit;
|
||||
|
||||
/* shader_inuse_cnt should not overflow because there can only
|
||||
* be a very limited number of jobs on the h/w at one time */
|
||||
|
||||
kbdev->shader_inuse_cnt[bitnum]++;
|
||||
kbdev->shader_inuse_bitmap |= bit;
|
||||
|
||||
shader_cores &= ~bit;
|
||||
}
|
||||
|
||||
if (tiler_required) {
|
||||
KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
|
||||
|
||||
--kbdev->tiler_needed_cnt;
|
||||
|
||||
kbdev->tiler_inuse_cnt++;
|
||||
|
||||
KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt != 0);
|
||||
}
|
||||
|
||||
if (prev_shader_needed != kbdev->shader_needed_bitmap)
|
||||
KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_NEEDED, NULL,
|
||||
NULL, 0u, (u32) kbdev->shader_needed_bitmap);
|
||||
|
||||
if (prev_shader_inuse != kbdev->shader_inuse_bitmap)
|
||||
KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_INUSE, NULL,
|
||||
NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
|
||||
|
||||
return KBASE_CORES_READY;
|
||||
}
|
||||
|
||||
KBASE_EXPORT_TEST_API(kbase_pm_register_inuse_cores);
|
||||
|
||||
void kbase_pm_release_cores(struct kbase_device *kbdev,
|
||||
bool tiler_required, u64 shader_cores)
|
||||
{
|
||||
kbase_pm_change_state change_gpu_state = 0u;
|
||||
|
||||
KBASE_DEBUG_ASSERT(kbdev != NULL);
|
||||
|
||||
lockdep_assert_held(&kbdev->hwaccess_lock);
|
||||
|
||||
while (shader_cores) {
|
||||
int bitnum = fls64(shader_cores) - 1;
|
||||
u64 bit = 1ULL << bitnum;
|
||||
int cnt;
|
||||
|
||||
KBASE_DEBUG_ASSERT(kbdev->shader_inuse_cnt[bitnum] > 0);
|
||||
|
||||
cnt = --kbdev->shader_inuse_cnt[bitnum];
|
||||
|
||||
if (0 == cnt) {
|
||||
kbdev->shader_inuse_bitmap &= ~bit;
|
||||
change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
|
||||
}
|
||||
|
||||
shader_cores &= ~bit;
|
||||
}
|
||||
|
||||
if (tiler_required) {
|
||||
int cnt;
|
||||
|
||||
KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt > 0);
|
||||
|
||||
cnt = --kbdev->tiler_inuse_cnt;
|
||||
|
||||
if (0 == cnt)
|
||||
change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
|
||||
}
|
||||
|
||||
if (change_gpu_state) {
|
||||
KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_INUSE, NULL,
|
||||
NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
|
||||
|
||||
kbase_timeline_pm_cores_func(kbdev,
|
||||
KBASE_PM_FUNC_ID_RELEASE_CORES_START,
|
||||
change_gpu_state);
|
||||
kbase_pm_update_cores_state_nolock(kbdev);
|
||||
kbase_timeline_pm_cores_func(kbdev,
|
||||
KBASE_PM_FUNC_ID_RELEASE_CORES_END,
|
||||
change_gpu_state);
|
||||
|
||||
/* Trace that any state change completed immediately */
|
||||
kbase_pm_trace_check_and_finish_state_change(kbdev);
|
||||
}
|
||||
}
|
||||
|
||||
KBASE_EXPORT_TEST_API(kbase_pm_release_cores);
|
||||
|
||||
void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
|
||||
bool tiler_required,
|
||||
u64 shader_cores)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
kbase_pm_wait_for_poweroff_complete(kbdev);
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
kbase_pm_request_cores(kbdev, tiler_required, shader_cores);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
kbase_pm_check_transitions_sync(kbdev);
|
||||
}
|
||||
|
||||
KBASE_EXPORT_TEST_API(kbase_pm_request_cores_sync);
|
||||
|
||||
void kbase_pm_request_l2_caches(struct kbase_device *kbdev)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
kbdev->l2_users_count++;
|
||||
|
||||
KBASE_DEBUG_ASSERT(kbdev->l2_users_count != 0);
|
||||
|
||||
/* Check for the required L2 transitions.
|
||||
* Caller would block here for the L2 caches of all core groups to be
|
||||
* powered on, so need to inform the Hw to power up all the L2 caches.
|
||||
* Can't rely on the l2_users_count value being non-zero previously to
|
||||
* avoid checking for the transition, as the count could be non-zero
|
||||
* even if not all the instances of L2 cache are powered up since
|
||||
* currently the power status of L2 is not tracked separately for each
|
||||
* core group. Also if the GPU is reset while the L2 is on, L2 will be
|
||||
* off but the count will be non-zero.
|
||||
*/
|
||||
kbase_pm_check_transitions_nolock(kbdev);
|
||||
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
wait_event(kbdev->pm.backend.l2_powered_wait,
|
||||
kbdev->pm.backend.l2_powered == 1);
|
||||
|
||||
/* Trace that any state change completed immediately */
|
||||
kbase_pm_trace_check_and_finish_state_change(kbdev);
|
||||
}
|
||||
|
||||
KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches);
|
||||
|
||||
void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev)
|
||||
{
|
||||
lockdep_assert_held(&kbdev->hwaccess_lock);
|
||||
|
||||
kbdev->l2_users_count++;
|
||||
}
|
||||
|
||||
KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches_l2_is_on);
|
||||
|
||||
void kbase_pm_release_l2_caches(struct kbase_device *kbdev)
|
||||
{
|
||||
lockdep_assert_held(&kbdev->hwaccess_lock);
|
||||
|
||||
KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0);
|
||||
|
||||
--kbdev->l2_users_count;
|
||||
|
||||
if (!kbdev->l2_users_count) {
|
||||
kbase_pm_check_transitions_nolock(kbdev);
|
||||
/* Trace that any state change completed immediately */
|
||||
kbase_pm_trace_check_and_finish_state_change(kbdev);
|
||||
}
|
||||
}
|
||||
|
||||
KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2010-2015, 2018 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -64,169 +64,46 @@ void kbase_pm_update_active(struct kbase_device *kbdev);
|
||||
*/
|
||||
void kbase_pm_update_cores(struct kbase_device *kbdev);
|
||||
|
||||
|
||||
enum kbase_pm_cores_ready {
|
||||
KBASE_CORES_NOT_READY = 0,
|
||||
KBASE_NEW_AFFINITY = 1,
|
||||
KBASE_CORES_READY = 2
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* kbase_pm_request_cores_sync - Synchronous variant of kbase_pm_request_cores()
|
||||
* kbase_pm_cores_requested - Check that a power request has been locked into
|
||||
* the HW.
|
||||
* @kbdev: Kbase device
|
||||
* @shader_required: true if shaders are required
|
||||
*
|
||||
* @kbdev: The kbase device structure for the device
|
||||
* @tiler_required: true if the tiler is required, false otherwise
|
||||
* @shader_cores: A bitmask of shader cores which are necessary for the job
|
||||
* Called by the scheduler to check if a power on request has been locked into
|
||||
* the HW.
|
||||
*
|
||||
* When this function returns, the @shader_cores will be in the READY state.
|
||||
* Note that there is no guarantee that the cores are actually ready, however
|
||||
* when the request has been locked into the HW, then it is safe to submit work
|
||||
* since the HW will wait for the transition to ready.
|
||||
*
|
||||
* This is safe variant of kbase_pm_check_transitions_sync(): it handles the
|
||||
* work of ensuring the requested cores will remain powered until a matching
|
||||
* call to kbase_pm_unrequest_cores()/kbase_pm_release_cores() (as appropriate)
|
||||
* is made.
|
||||
* A reference must first be taken prior to making this call.
|
||||
*
|
||||
* Caller must hold the hwaccess_lock.
|
||||
*
|
||||
* Return: true if the request to the HW was successfully made else false if the
|
||||
* request is still pending.
|
||||
*/
|
||||
void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
|
||||
bool tiler_required, u64 shader_cores);
|
||||
static inline bool kbase_pm_cores_requested(struct kbase_device *kbdev,
|
||||
bool shader_required)
|
||||
{
|
||||
lockdep_assert_held(&kbdev->hwaccess_lock);
|
||||
|
||||
/**
|
||||
* kbase_pm_request_cores - Mark one or more cores as being required
|
||||
* for jobs to be submitted
|
||||
*
|
||||
* @kbdev: The kbase device structure for the device
|
||||
* @tiler_required: true if the tiler is required, false otherwise
|
||||
* @shader_cores: A bitmask of shader cores which are necessary for the job
|
||||
*
|
||||
* This function is called by the job scheduler to mark one or more cores as
|
||||
* being required to submit jobs that are ready to run.
|
||||
*
|
||||
* The cores requested are reference counted and a subsequent call to
|
||||
* kbase_pm_register_inuse_cores() or kbase_pm_unrequest_cores() should be
|
||||
* made to dereference the cores as being 'needed'.
|
||||
*
|
||||
* The active power policy will meet or exceed the requirements of the
|
||||
* requested cores in the system. Any core transitions needed will be begun
|
||||
* immediately, but they might not complete/the cores might not be available
|
||||
* until a Power Management IRQ.
|
||||
*
|
||||
* Return: 0 if the cores were successfully requested, or -errno otherwise.
|
||||
*/
|
||||
void kbase_pm_request_cores(struct kbase_device *kbdev,
|
||||
bool tiler_required, u64 shader_cores);
|
||||
/* If the L2 & tiler are not on or pending, then the tiler is not yet
|
||||
* available, and shaders are definitely not powered.
|
||||
*/
|
||||
if (kbdev->pm.backend.l2_state != KBASE_L2_PEND_ON &&
|
||||
kbdev->pm.backend.l2_state != KBASE_L2_ON &&
|
||||
kbdev->pm.backend.l2_state != KBASE_L2_ON_HWCNT_ENABLE)
|
||||
return false;
|
||||
|
||||
/**
|
||||
* kbase_pm_unrequest_cores - Unmark one or more cores as being required for
|
||||
* jobs to be submitted.
|
||||
*
|
||||
* @kbdev: The kbase device structure for the device
|
||||
* @tiler_required: true if the tiler is required, false otherwise
|
||||
* @shader_cores: A bitmask of shader cores (as given to
|
||||
* kbase_pm_request_cores() )
|
||||
*
|
||||
* This function undoes the effect of kbase_pm_request_cores(). It should be
|
||||
* used when a job is not going to be submitted to the hardware (e.g. the job is
|
||||
* cancelled before it is enqueued).
|
||||
*
|
||||
* The active power policy will meet or exceed the requirements of the
|
||||
* requested cores in the system. Any core transitions needed will be begun
|
||||
* immediately, but they might not complete until a Power Management IRQ.
|
||||
*
|
||||
* The policy may use this as an indication that it can power down cores.
|
||||
*/
|
||||
void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
|
||||
bool tiler_required, u64 shader_cores);
|
||||
if (shader_required &&
|
||||
kbdev->pm.backend.shaders_state != KBASE_SHADERS_PEND_ON_CORESTACK_ON &&
|
||||
kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON &&
|
||||
kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON_RECHECK)
|
||||
return false;
|
||||
|
||||
/**
|
||||
* kbase_pm_register_inuse_cores - Register a set of cores as in use by a job
|
||||
*
|
||||
* @kbdev: The kbase device structure for the device
|
||||
* @tiler_required: true if the tiler is required, false otherwise
|
||||
* @shader_cores: A bitmask of shader cores (as given to
|
||||
* kbase_pm_request_cores() )
|
||||
*
|
||||
* This function should be called after kbase_pm_request_cores() when the job
|
||||
* is about to be submitted to the hardware. It will check that the necessary
|
||||
* cores are available and if so update the 'needed' and 'inuse' bitmasks to
|
||||
* reflect that the job is now committed to being run.
|
||||
*
|
||||
* If the necessary cores are not currently available then the function will
|
||||
* return %KBASE_CORES_NOT_READY and have no effect.
|
||||
*
|
||||
* Return: %KBASE_CORES_NOT_READY if the cores are not immediately ready,
|
||||
*
|
||||
* %KBASE_NEW_AFFINITY if the affinity requested is not allowed,
|
||||
*
|
||||
* %KBASE_CORES_READY if the cores requested are already available
|
||||
*/
|
||||
enum kbase_pm_cores_ready kbase_pm_register_inuse_cores(
|
||||
struct kbase_device *kbdev,
|
||||
bool tiler_required,
|
||||
u64 shader_cores);
|
||||
|
||||
/**
|
||||
* kbase_pm_release_cores - Release cores after a job has run
|
||||
*
|
||||
* @kbdev: The kbase device structure for the device
|
||||
* @tiler_required: true if the tiler is required, false otherwise
|
||||
* @shader_cores: A bitmask of shader cores (as given to
|
||||
* kbase_pm_register_inuse_cores() )
|
||||
*
|
||||
* This function should be called when a job has finished running on the
|
||||
* hardware. A call to kbase_pm_register_inuse_cores() must have previously
|
||||
* occurred. The reference counts of the specified cores will be decremented
|
||||
* which may cause the bitmask of 'inuse' cores to be reduced. The power policy
|
||||
* may then turn off any cores which are no longer 'inuse'.
|
||||
*/
|
||||
void kbase_pm_release_cores(struct kbase_device *kbdev,
|
||||
bool tiler_required, u64 shader_cores);
|
||||
|
||||
/**
|
||||
* kbase_pm_request_l2_caches - Request l2 caches
|
||||
*
|
||||
* @kbdev: The kbase device structure for the device (must be a valid pointer)
|
||||
*
|
||||
* Request the use of l2 caches for all core groups, power up, wait and prevent
|
||||
* the power manager from powering down the l2 caches.
|
||||
*
|
||||
* This tells the power management that the caches should be powered up, and
|
||||
* they should remain powered, irrespective of the usage of shader cores. This
|
||||
* does not return until the l2 caches are powered up.
|
||||
*
|
||||
* The caller must call kbase_pm_release_l2_caches() when they are finished
|
||||
* to allow normal power management of the l2 caches to resume.
|
||||
*
|
||||
* This should only be used when power management is active.
|
||||
*/
|
||||
void kbase_pm_request_l2_caches(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_pm_request_l2_caches_l2_is_on - Request l2 caches but don't power on
|
||||
*
|
||||
* @kbdev: The kbase device structure for the device (must be a valid pointer)
|
||||
*
|
||||
* Increment the count of l2 users but do not attempt to power on the l2
|
||||
*
|
||||
* It is the callers responsibility to ensure that the l2 is already powered up
|
||||
* and to eventually call kbase_pm_release_l2_caches()
|
||||
*/
|
||||
void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_pm_request_l2_caches - Release l2 caches
|
||||
*
|
||||
* @kbdev: The kbase device structure for the device (must be a valid pointer)
|
||||
*
|
||||
* Release the use of l2 caches for all core groups and allow the power manager
|
||||
* to power them down when necessary.
|
||||
*
|
||||
* This tells the power management that the caches can be powered down if
|
||||
* necessary, with respect to the usage of shader cores.
|
||||
*
|
||||
* The caller must have called kbase_pm_request_l2_caches() prior to a call
|
||||
* to this.
|
||||
*
|
||||
* This should only be used when power management is active.
|
||||
*/
|
||||
void kbase_pm_release_l2_caches(struct kbase_device *kbdev);
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif /* _KBASE_PM_POLICY_H_ */
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU licence.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, you can access it online at
|
||||
* http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* Backend-specific Power Manager shader core state definitions.
|
||||
* The function-like macro KBASEP_SHADER_STATE() must be defined before
|
||||
* including this header file. This header file can be included multiple
|
||||
* times in the same compilation unit with different definitions of
|
||||
* KBASEP_SHADER_STATE().
|
||||
*/
|
||||
KBASEP_SHADER_STATE(OFF_CORESTACK_OFF)
|
||||
KBASEP_SHADER_STATE(OFF_CORESTACK_PEND_ON)
|
||||
KBASEP_SHADER_STATE(PEND_ON_CORESTACK_ON)
|
||||
KBASEP_SHADER_STATE(ON_CORESTACK_ON)
|
||||
KBASEP_SHADER_STATE(ON_CORESTACK_ON_RECHECK)
|
||||
KBASEP_SHADER_STATE(WAIT_OFF_CORESTACK_ON)
|
||||
KBASEP_SHADER_STATE(WAIT_FINISHED_CORESTACK_ON)
|
||||
KBASEP_SHADER_STATE(L2_FLUSHING_CORESTACK_ON)
|
||||
KBASEP_SHADER_STATE(READY_OFF_CORESTACK_ON)
|
||||
KBASEP_SHADER_STATE(PEND_OFF_CORESTACK_ON)
|
||||
KBASEP_SHADER_STATE(OFF_CORESTACK_PEND_OFF)
|
||||
KBASEP_SHADER_STATE(OFF_CORESTACK_OFF_TIMER_PEND_OFF)
|
||||
KBASEP_SHADER_STATE(RESET_WAIT)
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2014-2016,2018-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -32,39 +32,42 @@ void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
|
||||
|
||||
kbase_pm_request_gpu_cycle_counter(kbdev);
|
||||
|
||||
/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
|
||||
* correctly */
|
||||
do {
|
||||
hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
|
||||
NULL);
|
||||
*cycle_counter = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
|
||||
hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
|
||||
NULL);
|
||||
if (cycle_counter) {
|
||||
/* Read hi, lo, hi to ensure a coherent u64 */
|
||||
do {
|
||||
hi1 = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(CYCLE_COUNT_HI));
|
||||
*cycle_counter = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(CYCLE_COUNT_LO));
|
||||
hi2 = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(CYCLE_COUNT_HI));
|
||||
} while (hi1 != hi2);
|
||||
*cycle_counter |= (((u64) hi1) << 32);
|
||||
} while (hi1 != hi2);
|
||||
}
|
||||
|
||||
/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
|
||||
* correctly */
|
||||
do {
|
||||
hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
|
||||
NULL);
|
||||
*system_time = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(TIMESTAMP_LO), NULL);
|
||||
hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
|
||||
NULL);
|
||||
if (system_time) {
|
||||
/* Read hi, lo, hi to ensure a coherent u64 */
|
||||
do {
|
||||
hi1 = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(TIMESTAMP_HI));
|
||||
*system_time = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(TIMESTAMP_LO));
|
||||
hi2 = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(TIMESTAMP_HI));
|
||||
} while (hi1 != hi2);
|
||||
*system_time |= (((u64) hi1) << 32);
|
||||
} while (hi1 != hi2);
|
||||
}
|
||||
|
||||
/* Record the CPU's idea of current time */
|
||||
getrawmonotonic(ts);
|
||||
if (ts != NULL)
|
||||
getrawmonotonic(ts);
|
||||
|
||||
kbase_pm_release_gpu_cycle_counter(kbdev);
|
||||
}
|
||||
|
||||
/**
|
||||
* kbase_wait_write_flush - Wait for GPU write flush
|
||||
* @kctx: Context pointer
|
||||
* @kbdev: Kbase device
|
||||
*
|
||||
* Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
|
||||
* its write buffer.
|
||||
@@ -75,7 +78,7 @@ void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
|
||||
* not be as expected.
|
||||
*/
|
||||
#ifndef CONFIG_MALI_BIFROST_NO_MALI
|
||||
void kbase_wait_write_flush(struct kbase_context *kctx)
|
||||
void kbase_wait_write_flush(struct kbase_device *kbdev)
|
||||
{
|
||||
u32 base_count = 0;
|
||||
|
||||
@@ -83,14 +86,14 @@ void kbase_wait_write_flush(struct kbase_context *kctx)
|
||||
* The caller must be holding onto the kctx or the call is from
|
||||
* userspace.
|
||||
*/
|
||||
kbase_pm_context_active(kctx->kbdev);
|
||||
kbase_pm_request_gpu_cycle_counter(kctx->kbdev);
|
||||
kbase_pm_context_active(kbdev);
|
||||
kbase_pm_request_gpu_cycle_counter(kbdev);
|
||||
|
||||
while (true) {
|
||||
u32 new_count;
|
||||
|
||||
new_count = kbase_reg_read(kctx->kbdev,
|
||||
GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
|
||||
new_count = kbase_reg_read(kbdev,
|
||||
GPU_CONTROL_REG(CYCLE_COUNT_LO));
|
||||
/* First time around, just store the count. */
|
||||
if (base_count == 0) {
|
||||
base_count = new_count;
|
||||
@@ -102,7 +105,7 @@ void kbase_wait_write_flush(struct kbase_context *kctx)
|
||||
break;
|
||||
}
|
||||
|
||||
kbase_pm_release_gpu_cycle_counter(kctx->kbdev);
|
||||
kbase_pm_context_idle(kctx->kbdev);
|
||||
kbase_pm_release_gpu_cycle_counter(kbdev);
|
||||
kbase_pm_context_idle(kbdev);
|
||||
}
|
||||
#endif /* CONFIG_MALI_BIFROST_NO_MALI */
|
||||
|
||||
@@ -1,57 +0,0 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU licence.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, you can access it online at
|
||||
* http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _KBASE_BACKEND_TIME_H_
|
||||
#define _KBASE_BACKEND_TIME_H_
|
||||
|
||||
/**
|
||||
* kbase_backend_get_gpu_time() - Get current GPU time
|
||||
* @kbdev: Device pointer
|
||||
* @cycle_counter: Pointer to u64 to store cycle counter in
|
||||
* @system_time: Pointer to u64 to store system time in
|
||||
* @ts: Pointer to struct timespec to store current monotonic
|
||||
* time in
|
||||
*/
|
||||
void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
|
||||
u64 *system_time, struct timespec *ts);
|
||||
|
||||
/**
|
||||
* kbase_wait_write_flush() - Wait for GPU write flush
|
||||
* @kctx: Context pointer
|
||||
*
|
||||
* Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
|
||||
* its write buffer.
|
||||
*
|
||||
* If GPU resets occur then the counters are reset to zero, the delay may not be
|
||||
* as expected.
|
||||
*
|
||||
* This function is only in use for BASE_HW_ISSUE_6367
|
||||
*/
|
||||
#ifdef CONFIG_MALI_BIFROST_NO_MALI
|
||||
static inline void kbase_wait_write_flush(struct kbase_context *kctx)
|
||||
{
|
||||
}
|
||||
#else
|
||||
void kbase_wait_write_flush(struct kbase_context *kctx);
|
||||
#endif
|
||||
|
||||
#endif /* _KBASE_BACKEND_TIME_H_ */
|
||||
@@ -3,7 +3,7 @@
|
||||
* ----------------------------------------------------------------------------
|
||||
* This confidential and proprietary software may be used only as authorized
|
||||
* by a licensing agreement from ARM Limited.
|
||||
* (C) COPYRIGHT 2017-2018 ARM Limited, ALL RIGHTS RESERVED
|
||||
* (C) COPYRIGHT 2017-2019 ARM Limited, ALL RIGHTS RESERVED
|
||||
* The entire notice above must be reproduced on all authorized copies and
|
||||
* copies may only be made to the extent permitted by a licensing agreement
|
||||
* from ARM Limited.
|
||||
@@ -19,8 +19,8 @@ bob_defaults {
|
||||
no_mali: {
|
||||
kbuild_options: ["CONFIG_MALI_BIFROST_NO_MALI=y"],
|
||||
},
|
||||
mali_corestack: {
|
||||
kbuild_options: ["CONFIG_MALI_CORESTACK=y"],
|
||||
mali_real_hw: {
|
||||
kbuild_options: ["CONFIG_MALI_REAL_HW=y"],
|
||||
},
|
||||
mali_devfreq: {
|
||||
kbuild_options: ["CONFIG_MALI_BIFROST_DEVFREQ=y"],
|
||||
@@ -28,27 +28,43 @@ bob_defaults {
|
||||
mali_midgard_dvfs: {
|
||||
kbuild_options: ["CONFIG_MALI_BIFROST_DVFS=y"],
|
||||
},
|
||||
mali_trace_timeline: {
|
||||
kbuild_options: ["CONFIG_MALI_BIFROST_TRACE_TIMELINE=y"],
|
||||
},
|
||||
mali_debug: {
|
||||
kbuild_options: ["CONFIG_MALI_BIFROST_DEBUG=y"],
|
||||
},
|
||||
mali_fpga_bus_logger: {
|
||||
kbuild_options: ["CONFIG_MALI_FPGA_BUS_LOGGER=y"],
|
||||
buslog: {
|
||||
kbuild_options: ["CONFIG_MALI_BUSLOG=y"],
|
||||
},
|
||||
cinstr_job_dump: {
|
||||
kbuild_options: ["CONFIG_MALI_JOB_DUMP=y"],
|
||||
},
|
||||
cinstr_vector_dump: {
|
||||
kbuild_options: ["CONFIG_MALI_VECTOR_DUMP=y"],
|
||||
},
|
||||
cinstr_gwt: {
|
||||
kbuild_options: ["CONFIG_MALI_CINSTR_GWT=y"],
|
||||
},
|
||||
mali_gator_support: {
|
||||
kbuild_options: ["CONFIG_MALI_BIFROST_GATOR_SUPPORT=y"],
|
||||
},
|
||||
mali_system_trace: {
|
||||
kbuild_options: ["CONFIG_MALI_BIFROST_SYSTEM_TRACE=y"],
|
||||
},
|
||||
mali_pwrsoft_765: {
|
||||
kbuild_options: ["CONFIG_MALI_PWRSOFT_765=y"],
|
||||
},
|
||||
mali_memory_fully_backed: {
|
||||
kbuild_options: ["CONFIG_MALI_MEMORY_FULLY_BACKED=y"],
|
||||
},
|
||||
mali_dma_buf_map_on_demand: {
|
||||
kbuild_options: ["CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND=y"],
|
||||
},
|
||||
mali_dma_buf_legacy_compat: {
|
||||
kbuild_options: ["CONFIG_MALI_DMA_BUF_LEGACY_COMPAT=y"],
|
||||
},
|
||||
kbuild_options: [
|
||||
"MALI_UNIT_TEST={{.unit_test_code}}",
|
||||
"MALI_CUSTOMER_RELEASE={{.release}}",
|
||||
"MALI_USE_CSF={{.gpu_has_csf}}",
|
||||
"MALI_KERNEL_TEST_API={{.debug}}",
|
||||
],
|
||||
defaults: ["kernel_defaults"],
|
||||
@@ -77,25 +93,42 @@ bob_kernel_module {
|
||||
"CONFIG_MALI_MIDGARD=m",
|
||||
"CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}",
|
||||
"CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}",
|
||||
"MALI_KERNEL_TEST_API={{.unit_test_code}}",
|
||||
"MALI_MOCK_TEST={{.mali_mock_test}}",
|
||||
],
|
||||
buslog: {
|
||||
extra_symbols: [
|
||||
"bus_logger",
|
||||
],
|
||||
},
|
||||
mali_corestack: {
|
||||
kbuild_options: ["CONFIG_MALI_CORESTACK=y"],
|
||||
},
|
||||
mali_platform_power_down_only: {
|
||||
kbuild_options: ["CONFIG_MALI_PLATFORM_POWER_DOWN_ONLY=y"],
|
||||
},
|
||||
mali_error_inject: {
|
||||
kbuild_options: ["CONFIG_MALI_BIFROST_ERROR_INJECT=y"],
|
||||
},
|
||||
mali_error_inject_random: {
|
||||
kbuild_options: ["CONFIG_MALI_ERROR_INJECT_RANDOM=y"],
|
||||
},
|
||||
mali_prfcnt_set_secondary: {
|
||||
cinstr_secondary_hwc: {
|
||||
kbuild_options: ["CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY=y"],
|
||||
},
|
||||
mali_2mb_alloc: {
|
||||
kbuild_options: ["CONFIG_MALI_2MB_ALLOC=y"],
|
||||
},
|
||||
mali_mock_test: {
|
||||
srcs: ["tests/internal/src/mock/mali_kbase_pm_driver_mock.c"],
|
||||
mali_hw_errata_1485982_not_affected: {
|
||||
kbuild_options: ["CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED=y"],
|
||||
},
|
||||
mali_hw_errata_1485982_use_clock_alternative: {
|
||||
kbuild_options: ["CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE=y"],
|
||||
},
|
||||
gpu_has_csf: {
|
||||
srcs: [
|
||||
"csf/*.c",
|
||||
"csf/*.h",
|
||||
"csf/Kbuild",
|
||||
],
|
||||
},
|
||||
defaults: ["mali_kbase_shared_config_defaults"],
|
||||
}
|
||||
|
||||
optional_subdirs = ["tests"]
|
||||
|
||||
@@ -1,132 +0,0 @@
|
||||
#
|
||||
# (C) COPYRIGHT 2011-2013, 2015, 2017 ARM Limited. All rights reserved.
|
||||
#
|
||||
# This program is free software and is provided to you under the terms of the
|
||||
# GNU General Public License version 2 as published by the Free Software
|
||||
# Foundation, and any use by you of this program is subject to the terms
|
||||
# of such GNU licence.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, you can access it online at
|
||||
# http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
#
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
#
|
||||
|
||||
|
||||
##############################################################################
|
||||
|
||||
# This file contains per-module Doxygen configuration. Please do not add
|
||||
# extra settings to this file without consulting all stakeholders, as they
|
||||
# may cause override project-wide settings.
|
||||
#
|
||||
# Additionally, when defining aliases, macros, sections etc, use the module
|
||||
# name as a prefix e.g. gles_my_alias.
|
||||
|
||||
##############################################################################
|
||||
|
||||
@INCLUDE = ../../bldsys/Doxyfile_common
|
||||
|
||||
# The INPUT tag can be used to specify the files and/or directories that contain
|
||||
# documented source files. You may enter file names like "myfile.cpp" or
|
||||
# directories like "/usr/src/myproject". Separate the files or directories
|
||||
# with spaces.
|
||||
|
||||
INPUT += ../../kernel/drivers/gpu/arm/midgard/
|
||||
|
||||
##############################################################################
|
||||
# Everything below here is optional, and in most cases not required
|
||||
##############################################################################
|
||||
|
||||
# This tag can be used to specify a number of aliases that acts
|
||||
# as commands in the documentation. An alias has the form "name=value".
|
||||
# For example adding "sideeffect=\par Side Effects:\n" will allow you to
|
||||
# put the command \sideeffect (or @sideeffect) in the documentation, which
|
||||
# will result in a user-defined paragraph with heading "Side Effects:".
|
||||
# You can put \n's in the value part of an alias to insert newlines.
|
||||
|
||||
ALIASES +=
|
||||
|
||||
# The ENABLED_SECTIONS tag can be used to enable conditional
|
||||
# documentation sections, marked by \if sectionname ... \endif.
|
||||
|
||||
ENABLED_SECTIONS +=
|
||||
|
||||
# If the value of the INPUT tag contains directories, you can use the
|
||||
# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
|
||||
# and *.h) to filter out the source-files in the directories. If left
|
||||
# blank the following patterns are tested:
|
||||
# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
|
||||
# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
|
||||
|
||||
FILE_PATTERNS +=
|
||||
|
||||
# The EXCLUDE tag can be used to specify files and/or directories that should
|
||||
# excluded from the INPUT source files. This way you can easily exclude a
|
||||
# subdirectory from a directory tree whose root is specified with the INPUT tag.
|
||||
EXCLUDE += ../../kernel/drivers/gpu/arm/midgard/platform ../../kernel/drivers/gpu/arm/midgard/platform_dummy ../../kernel/drivers/gpu/arm/midgard/scripts ../../kernel/drivers/gpu/arm/midgard/tests ../../kernel/drivers/gpu/arm/midgard/Makefile ../../kernel/drivers/gpu/arm/midgard/Makefile.kbase ../../kernel/drivers/gpu/arm/midgard/Kbuild ../../kernel/drivers/gpu/arm/midgard/Kconfig ../../kernel/drivers/gpu/arm/midgard/sconscript ../../kernel/drivers/gpu/arm/midgard/docs ../../kernel/drivers/gpu/arm/midgard/mali_uk.h ../../kernel/drivers/gpu/arm/midgard/Makefile
|
||||
|
||||
|
||||
# If the value of the INPUT tag contains directories, you can use the
|
||||
# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
|
||||
# certain files from those directories. Note that the wildcards are matched
|
||||
# against the file with absolute path, so to exclude all test directories
|
||||
# for example use the pattern */test/*
|
||||
|
||||
EXCLUDE_PATTERNS +=
|
||||
|
||||
# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
|
||||
# (namespaces, classes, functions, etc.) that should be excluded from the
|
||||
# output. The symbol name can be a fully qualified name, a word, or if the
|
||||
# wildcard * is used, a substring. Examples: ANamespace, AClass,
|
||||
# AClass::ANamespace, ANamespace::*Test
|
||||
|
||||
EXCLUDE_SYMBOLS +=
|
||||
|
||||
# The EXAMPLE_PATH tag can be used to specify one or more files or
|
||||
# directories that contain example code fragments that are included (see
|
||||
# the \include command).
|
||||
|
||||
EXAMPLE_PATH +=
|
||||
|
||||
# The IMAGE_PATH tag can be used to specify one or more files or
|
||||
# directories that contain image that are included in the documentation (see
|
||||
# the \image command).
|
||||
|
||||
IMAGE_PATH +=
|
||||
|
||||
# The INCLUDE_PATH tag can be used to specify one or more directories that
|
||||
# contain include files that are not input files but should be processed by
|
||||
# the preprocessor.
|
||||
|
||||
INCLUDE_PATH +=
|
||||
|
||||
# The PREDEFINED tag can be used to specify one or more macro names that
|
||||
# are defined before the preprocessor is started (similar to the -D option of
|
||||
# gcc). The argument of the tag is a list of macros of the form: name
|
||||
# or name=definition (no spaces). If the definition and the = are
|
||||
# omitted =1 is assumed. To prevent a macro definition from being
|
||||
# undefined via #undef or recursively expanded use the := operator
|
||||
# instead of the = operator.
|
||||
|
||||
PREDEFINED +=
|
||||
|
||||
# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
|
||||
# this tag can be used to specify a list of macro names that should be expanded.
|
||||
# The macro definition that is found in the sources will be used.
|
||||
# Use the PREDEFINED tag if you want to use a different macro definition.
|
||||
|
||||
EXPAND_AS_DEFINED +=
|
||||
|
||||
# The DOTFILE_DIRS tag can be used to specify one or more directories that
|
||||
# contain dot files that are included in the documentation (see the
|
||||
# \dotfile command).
|
||||
|
||||
DOTFILE_DIRS += ../../kernel/drivers/gpu/arm/midgard/docs
|
||||
|
||||
@@ -1,117 +0,0 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU licence.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, you can access it online at
|
||||
* http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
|
||||
digraph policy_objects_diagram {
|
||||
rankdir=LR;
|
||||
size="12,8";
|
||||
compound=true;
|
||||
|
||||
node [ shape = box ];
|
||||
|
||||
subgraph cluster_policy_queues {
|
||||
low_queue [ shape=record label = "LowP | {<ql>ctx_lo | ... | <qm>ctx_i | ... | <qr>ctx_hi}" ];
|
||||
queues_middle_sep [ label="" shape=plaintext width=0 height=0 ];
|
||||
|
||||
rt_queue [ shape=record label = "RT | {<ql>ctx_lo | ... | <qm>ctx_j | ... | <qr>ctx_hi}" ];
|
||||
|
||||
label = "Policy's Queue(s)";
|
||||
}
|
||||
|
||||
call_enqueue [ shape=plaintext label="enqueue_ctx()" ];
|
||||
|
||||
{
|
||||
rank=same;
|
||||
ordering=out;
|
||||
call_dequeue [ shape=plaintext label="dequeue_head_ctx()\n+ runpool_add_ctx()" ];
|
||||
call_ctxfinish [ shape=plaintext label="runpool_remove_ctx()" ];
|
||||
|
||||
call_ctxdone [ shape=plaintext label="don't requeue;\n/* ctx has no more jobs */" ];
|
||||
}
|
||||
|
||||
subgraph cluster_runpool {
|
||||
|
||||
as0 [ width=2 height = 0.25 label="AS0: Job_1, ..., Job_n" ];
|
||||
as1 [ width=2 height = 0.25 label="AS1: Job_1, ..., Job_m" ];
|
||||
as2 [ width=2 height = 0.25 label="AS2: Job_1, ..., Job_p" ];
|
||||
as3 [ width=2 height = 0.25 label="AS3: Job_1, ..., Job_q" ];
|
||||
|
||||
label = "Policy's Run Pool";
|
||||
}
|
||||
|
||||
{
|
||||
rank=same;
|
||||
call_jdequeue [ shape=plaintext label="dequeue_job()" ];
|
||||
sstop_dotfixup [ shape=plaintext label="" width=0 height=0 ];
|
||||
}
|
||||
|
||||
{
|
||||
rank=same;
|
||||
ordering=out;
|
||||
sstop [ shape=ellipse label="SS-Timer expires" ]
|
||||
jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
|
||||
|
||||
irq [ label="IRQ" shape=ellipse ];
|
||||
|
||||
job_finish [ shape=plaintext label="don't requeue;\n/* job done */" ];
|
||||
}
|
||||
|
||||
hstop [ shape=ellipse label="HS-Timer expires" ]
|
||||
|
||||
/*
|
||||
* Edges
|
||||
*/
|
||||
|
||||
call_enqueue -> queues_middle_sep [ lhead=cluster_policy_queues ];
|
||||
|
||||
low_queue:qr -> call_dequeue:w;
|
||||
rt_queue:qr -> call_dequeue:w;
|
||||
|
||||
call_dequeue -> as1 [lhead=cluster_runpool];
|
||||
|
||||
as1->call_jdequeue [ltail=cluster_runpool];
|
||||
call_jdequeue->jobslots:0;
|
||||
call_jdequeue->sstop_dotfixup [ arrowhead=none];
|
||||
sstop_dotfixup->sstop [label="Spawn SS-Timer"];
|
||||
sstop->jobslots [label="SoftStop"];
|
||||
sstop->hstop [label="Spawn HS-Timer"];
|
||||
hstop->jobslots:ne [label="HardStop"];
|
||||
|
||||
|
||||
as3->call_ctxfinish:ne [ ltail=cluster_runpool ];
|
||||
call_ctxfinish:sw->rt_queue:qm [ lhead=cluster_policy_queues label="enqueue_ctx()\n/* ctx still has jobs */" ];
|
||||
|
||||
call_ctxfinish->call_ctxdone [constraint=false];
|
||||
|
||||
call_ctxdone->call_enqueue [weight=0.1 labeldistance=20.0 labelangle=0.0 taillabel="Job submitted to the ctx" style=dotted constraint=false];
|
||||
|
||||
|
||||
{
|
||||
jobslots->irq [constraint=false];
|
||||
|
||||
irq->job_finish [constraint=false];
|
||||
}
|
||||
|
||||
irq->as2 [lhead=cluster_runpool label="requeue_job()\n/* timeslice expired */" ];
|
||||
|
||||
}
|
||||
@@ -1,68 +0,0 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU licence.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, you can access it online at
|
||||
* http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
|
||||
digraph policy_objects_diagram {
|
||||
rankdir=LR
|
||||
size="6,6"
|
||||
compound=true;
|
||||
|
||||
node [ shape = box ];
|
||||
|
||||
call_enqueue [ shape=plaintext label="enqueue ctx" ];
|
||||
|
||||
|
||||
policy_queue [ label="Policy's Queue" ];
|
||||
|
||||
{
|
||||
rank=same;
|
||||
runpool [ label="Policy's Run Pool" ];
|
||||
|
||||
ctx_finish [ label="ctx finished" ];
|
||||
}
|
||||
|
||||
{
|
||||
rank=same;
|
||||
jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
|
||||
|
||||
job_finish [ label="Job finished" ];
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Edges
|
||||
*/
|
||||
|
||||
call_enqueue -> policy_queue;
|
||||
|
||||
policy_queue->runpool [label="dequeue ctx" weight=0.1];
|
||||
runpool->policy_queue [label="requeue ctx" weight=0.1];
|
||||
|
||||
runpool->ctx_finish [ style=dotted ];
|
||||
|
||||
runpool->jobslots [label="dequeue job" weight=0.1];
|
||||
jobslots->runpool [label="requeue job" weight=0.1];
|
||||
|
||||
jobslots->job_finish [ style=dotted ];
|
||||
}
|
||||
@@ -21,12 +21,8 @@
|
||||
|
||||
bifrost_kbase-y += \
|
||||
ipa/mali_kbase_ipa_simple.o \
|
||||
ipa/mali_kbase_ipa.o
|
||||
|
||||
bifrost_kbase-$(CONFIG_DEBUG_FS) += ipa/mali_kbase_ipa_debugfs.o
|
||||
|
||||
ifneq ($(wildcard $(srctree)/$(src)/ipa/mali_kbase_ipa_vinstr_common.c),)
|
||||
bifrost_kbase-y += \
|
||||
ipa/mali_kbase_ipa.o \
|
||||
ipa/mali_kbase_ipa_vinstr_g7x.o \
|
||||
ipa/mali_kbase_ipa_vinstr_common.o
|
||||
endif
|
||||
|
||||
bifrost_kbase-$(CONFIG_DEBUG_FS) += ipa/mali_kbase_ipa_debugfs.o
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2016-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -38,15 +38,18 @@
|
||||
#endif
|
||||
|
||||
#define KBASE_IPA_FALLBACK_MODEL_NAME "mali-simple-power-model"
|
||||
#define KBASE_IPA_G71_MODEL_NAME "mali-g71-power-model"
|
||||
#define KBASE_IPA_G72_MODEL_NAME "mali-g72-power-model"
|
||||
#define KBASE_IPA_TNOX_MODEL_NAME "mali-tnox-power-model"
|
||||
|
||||
static struct kbase_ipa_model_ops *kbase_ipa_all_model_ops[] = {
|
||||
static const struct kbase_ipa_model_ops *kbase_ipa_all_model_ops[] = {
|
||||
&kbase_simple_ipa_model_ops,
|
||||
&kbase_g71_ipa_model_ops,
|
||||
&kbase_g72_ipa_model_ops,
|
||||
&kbase_tnox_ipa_model_ops
|
||||
&kbase_g76_ipa_model_ops,
|
||||
&kbase_g52_ipa_model_ops,
|
||||
&kbase_g52_r1_ipa_model_ops,
|
||||
&kbase_g51_ipa_model_ops,
|
||||
&kbase_g77_ipa_model_ops,
|
||||
&kbase_tnax_ipa_model_ops,
|
||||
&kbase_tbex_ipa_model_ops
|
||||
};
|
||||
|
||||
int kbase_ipa_model_recalculate(struct kbase_ipa_model *model)
|
||||
@@ -67,13 +70,13 @@ int kbase_ipa_model_recalculate(struct kbase_ipa_model *model)
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev,
|
||||
const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev,
|
||||
const char *name)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(kbase_ipa_all_model_ops); ++i) {
|
||||
struct kbase_ipa_model_ops *ops = kbase_ipa_all_model_ops[i];
|
||||
const struct kbase_ipa_model_ops *ops = kbase_ipa_all_model_ops[i];
|
||||
|
||||
if (!strcmp(ops->name, name))
|
||||
return ops;
|
||||
@@ -83,42 +86,40 @@ static struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev)
|
||||
{
|
||||
atomic_set(&kbdev->ipa_use_configured_model, false);
|
||||
}
|
||||
|
||||
void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev)
|
||||
{
|
||||
atomic_set(&kbdev->ipa_use_configured_model, true);
|
||||
}
|
||||
KBASE_EXPORT_TEST_API(kbase_ipa_model_ops_find);
|
||||
|
||||
const char *kbase_ipa_model_name_from_id(u32 gpu_id)
|
||||
{
|
||||
const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
|
||||
GPU_ID_VERSION_PRODUCT_ID_SHIFT;
|
||||
|
||||
if (GPU_ID_IS_NEW_FORMAT(prod_id)) {
|
||||
switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) {
|
||||
case GPU_ID2_PRODUCT_TMIX:
|
||||
return KBASE_IPA_G71_MODEL_NAME;
|
||||
case GPU_ID2_PRODUCT_THEX:
|
||||
return KBASE_IPA_G72_MODEL_NAME;
|
||||
case GPU_ID2_PRODUCT_TNOX:
|
||||
return KBASE_IPA_TNOX_MODEL_NAME;
|
||||
case GPU_ID2_PRODUCT_TGOX:
|
||||
if ((gpu_id & GPU_ID2_VERSION_MAJOR) ==
|
||||
(0 << GPU_ID2_VERSION_MAJOR_SHIFT))
|
||||
/* TGOX r0 shares a power model with TNOX */
|
||||
return KBASE_IPA_TNOX_MODEL_NAME;
|
||||
default:
|
||||
return KBASE_IPA_FALLBACK_MODEL_NAME;
|
||||
}
|
||||
switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) {
|
||||
case GPU_ID2_PRODUCT_TMIX:
|
||||
return "mali-g71-power-model";
|
||||
case GPU_ID2_PRODUCT_THEX:
|
||||
return "mali-g72-power-model";
|
||||
case GPU_ID2_PRODUCT_TNOX:
|
||||
return "mali-g76-power-model";
|
||||
case GPU_ID2_PRODUCT_TSIX:
|
||||
return "mali-g51-power-model";
|
||||
case GPU_ID2_PRODUCT_TGOX:
|
||||
if ((gpu_id & GPU_ID2_VERSION_MAJOR) ==
|
||||
(0 << GPU_ID2_VERSION_MAJOR_SHIFT))
|
||||
/* g52 aliased to g76 power-model's ops */
|
||||
return "mali-g52-power-model";
|
||||
else
|
||||
return "mali-g52_r1-power-model";
|
||||
case GPU_ID2_PRODUCT_TNAX:
|
||||
return "mali-tnax-power-model";
|
||||
case GPU_ID2_PRODUCT_TTRX:
|
||||
return "mali-g77-power-model";
|
||||
case GPU_ID2_PRODUCT_TBEX:
|
||||
return "mali-tbex-power-model";
|
||||
default:
|
||||
return KBASE_IPA_FALLBACK_MODEL_NAME;
|
||||
}
|
||||
|
||||
return KBASE_IPA_FALLBACK_MODEL_NAME;
|
||||
}
|
||||
KBASE_EXPORT_TEST_API(kbase_ipa_model_name_from_id);
|
||||
|
||||
static struct device_node *get_model_dt_node(struct kbase_ipa_model *model)
|
||||
{
|
||||
@@ -251,7 +252,7 @@ void kbase_ipa_term_model(struct kbase_ipa_model *model)
|
||||
KBASE_EXPORT_TEST_API(kbase_ipa_term_model);
|
||||
|
||||
struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev,
|
||||
struct kbase_ipa_model_ops *ops)
|
||||
const struct kbase_ipa_model_ops *ops)
|
||||
{
|
||||
struct kbase_ipa_model *model;
|
||||
int err;
|
||||
@@ -305,7 +306,7 @@ int kbase_ipa_init(struct kbase_device *kbdev)
|
||||
{
|
||||
|
||||
const char *model_name;
|
||||
struct kbase_ipa_model_ops *ops;
|
||||
const struct kbase_ipa_model_ops *ops;
|
||||
struct kbase_ipa_model *default_model = NULL;
|
||||
int err;
|
||||
|
||||
@@ -359,8 +360,6 @@ int kbase_ipa_init(struct kbase_device *kbdev)
|
||||
kbdev->ipa.configured_model = default_model;
|
||||
}
|
||||
|
||||
kbase_ipa_model_use_configured_locked(kbdev);
|
||||
|
||||
end:
|
||||
if (err)
|
||||
kbase_ipa_term_locked(kbdev);
|
||||
@@ -380,6 +379,8 @@ void kbase_ipa_term(struct kbase_device *kbdev)
|
||||
mutex_lock(&kbdev->ipa.lock);
|
||||
kbase_ipa_term_locked(kbdev);
|
||||
mutex_unlock(&kbdev->ipa.lock);
|
||||
|
||||
mutex_destroy(&kbdev->ipa.lock);
|
||||
}
|
||||
KBASE_EXPORT_TEST_API(kbase_ipa_term);
|
||||
|
||||
@@ -449,14 +450,41 @@ u32 kbase_scale_static_power(const u32 c, const u32 voltage)
|
||||
return div_u64(v3c_big, 1000000);
|
||||
}
|
||||
|
||||
void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev)
|
||||
{
|
||||
lockdep_assert_held(&kbdev->hwaccess_lock);
|
||||
|
||||
/* Record the event of GPU entering protected mode. */
|
||||
kbdev->ipa_protection_mode_switched = true;
|
||||
}
|
||||
|
||||
static struct kbase_ipa_model *get_current_model(struct kbase_device *kbdev)
|
||||
{
|
||||
struct kbase_ipa_model *model;
|
||||
unsigned long flags;
|
||||
|
||||
lockdep_assert_held(&kbdev->ipa.lock);
|
||||
|
||||
if (atomic_read(&kbdev->ipa_use_configured_model))
|
||||
return kbdev->ipa.configured_model;
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
if (kbdev->ipa_protection_mode_switched ||
|
||||
kbdev->ipa.force_fallback_model)
|
||||
model = kbdev->ipa.fallback_model;
|
||||
else
|
||||
return kbdev->ipa.fallback_model;
|
||||
model = kbdev->ipa.configured_model;
|
||||
|
||||
/*
|
||||
* Having taken cognizance of the fact that whether GPU earlier
|
||||
* protected mode or not, the event can be now reset (if GPU is not
|
||||
* currently in protected mode) so that configured model is used
|
||||
* for the next sample.
|
||||
*/
|
||||
if (!kbdev->protected_mode)
|
||||
kbdev->ipa_protection_mode_switched = false;
|
||||
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
return model;
|
||||
}
|
||||
|
||||
static u32 get_static_power_locked(struct kbase_device *kbdev,
|
||||
@@ -499,6 +527,9 @@ static unsigned long kbase_get_static_power(unsigned long voltage)
|
||||
struct kbase_device *kbdev = kbase_find_device(-1);
|
||||
#endif
|
||||
|
||||
if (!kbdev)
|
||||
return 0ul;
|
||||
|
||||
mutex_lock(&kbdev->ipa.lock);
|
||||
|
||||
model = get_current_model(kbdev);
|
||||
@@ -534,6 +565,9 @@ static unsigned long kbase_get_dynamic_power(unsigned long freq,
|
||||
struct kbase_device *kbdev = kbase_find_device(-1);
|
||||
#endif
|
||||
|
||||
if (!kbdev)
|
||||
return 0ul;
|
||||
|
||||
mutex_lock(&kbdev->ipa.lock);
|
||||
|
||||
model = kbdev->ipa.fallback_model;
|
||||
@@ -609,6 +643,9 @@ int kbase_get_real_power(struct devfreq *df, u32 *power,
|
||||
int ret;
|
||||
struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
|
||||
|
||||
if (!kbdev)
|
||||
return -ENODEV;
|
||||
|
||||
mutex_lock(&kbdev->ipa.lock);
|
||||
ret = kbase_get_real_power_locked(kbdev, power, freq, voltage);
|
||||
mutex_unlock(&kbdev->ipa.lock);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2016-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -40,7 +40,7 @@ struct devfreq;
|
||||
struct kbase_ipa_model {
|
||||
struct kbase_device *kbdev;
|
||||
void *model_data;
|
||||
struct kbase_ipa_model_ops *ops;
|
||||
const struct kbase_ipa_model_ops *ops;
|
||||
struct list_head params;
|
||||
bool missing_dt_node_warning;
|
||||
};
|
||||
@@ -153,9 +153,28 @@ void kbase_ipa_term(struct kbase_device *kbdev);
|
||||
*/
|
||||
int kbase_ipa_model_recalculate(struct kbase_ipa_model *model);
|
||||
|
||||
/**
|
||||
* kbase_ipa_model_ops_find - Lookup an IPA model using its name
|
||||
* @kbdev: pointer to kbase device
|
||||
* @name: name of model to lookup
|
||||
*
|
||||
* Return: Pointer to model's 'ops' structure, or NULL if the lookup failed.
|
||||
*/
|
||||
const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev,
|
||||
const char *name);
|
||||
|
||||
/**
|
||||
* kbase_ipa_model_name_from_id - Find the best model for a given GPU ID
|
||||
* @gpu_id: GPU ID of GPU the model will be used for
|
||||
*
|
||||
* Return: The name of the appropriate counter-based model, or the name of the
|
||||
* fallback model if no counter model exists.
|
||||
*/
|
||||
const char *kbase_ipa_model_name_from_id(u32 gpu_id);
|
||||
|
||||
/**
|
||||
* kbase_ipa_init_model - Initilaize the particular IPA model
|
||||
* @kbdev: pointer to the IPA model object, already initialized
|
||||
* @kbdev: pointer to kbase device
|
||||
* @ops: pointer to object containing model specific methods.
|
||||
*
|
||||
* Initialize the model corresponding to the @ops pointer passed.
|
||||
@@ -164,7 +183,7 @@ int kbase_ipa_model_recalculate(struct kbase_ipa_model *model);
|
||||
* Return: pointer to kbase_ipa_model on success, NULL on error
|
||||
*/
|
||||
struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev,
|
||||
struct kbase_ipa_model_ops *ops);
|
||||
const struct kbase_ipa_model_ops *ops);
|
||||
/**
|
||||
* kbase_ipa_term_model - Terminate the particular IPA model
|
||||
* @model: pointer to the IPA model object, already initialized
|
||||
@@ -174,17 +193,25 @@ struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev,
|
||||
*/
|
||||
void kbase_ipa_term_model(struct kbase_ipa_model *model);
|
||||
|
||||
/* Switch to the fallback model */
|
||||
void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev);
|
||||
/**
|
||||
* kbase_ipa_protection_mode_switch_event - Inform IPA of the GPU's entry into
|
||||
* protected mode
|
||||
* @kbdev: pointer to kbase device
|
||||
*
|
||||
* Makes IPA aware of the GPU switching to protected mode.
|
||||
*/
|
||||
void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev);
|
||||
|
||||
/* Switch to the model retrieved from device tree */
|
||||
void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev);
|
||||
extern const struct kbase_ipa_model_ops kbase_g71_ipa_model_ops;
|
||||
extern const struct kbase_ipa_model_ops kbase_g72_ipa_model_ops;
|
||||
extern const struct kbase_ipa_model_ops kbase_g76_ipa_model_ops;
|
||||
extern const struct kbase_ipa_model_ops kbase_g52_ipa_model_ops;
|
||||
extern const struct kbase_ipa_model_ops kbase_g52_r1_ipa_model_ops;
|
||||
extern const struct kbase_ipa_model_ops kbase_g51_ipa_model_ops;
|
||||
extern const struct kbase_ipa_model_ops kbase_g77_ipa_model_ops;
|
||||
extern const struct kbase_ipa_model_ops kbase_tnax_ipa_model_ops;
|
||||
extern const struct kbase_ipa_model_ops kbase_tbex_ipa_model_ops;
|
||||
|
||||
extern struct kbase_ipa_model_ops kbase_g71_ipa_model_ops;
|
||||
extern struct kbase_ipa_model_ops kbase_g72_ipa_model_ops;
|
||||
extern struct kbase_ipa_model_ops kbase_tnox_ipa_model_ops;
|
||||
|
||||
#if MALI_UNIT_TEST
|
||||
/**
|
||||
* kbase_get_real_power() - get the real power consumption of the GPU
|
||||
* @df: dynamic voltage and frequency scaling information for the GPU.
|
||||
@@ -192,8 +219,7 @@ extern struct kbase_ipa_model_ops kbase_tnox_ipa_model_ops;
|
||||
* @freq: a frequency, in HZ.
|
||||
* @voltage: a voltage, in mV.
|
||||
*
|
||||
* This function is only exposed for use by unit tests. The returned value
|
||||
* incorporates both static and dynamic power consumption.
|
||||
* The returned value incorporates both static and dynamic power consumption.
|
||||
*
|
||||
* Return: 0 on success, or an error code.
|
||||
*/
|
||||
@@ -201,8 +227,10 @@ int kbase_get_real_power(struct devfreq *df, u32 *power,
|
||||
unsigned long freq,
|
||||
unsigned long voltage);
|
||||
|
||||
#if MALI_UNIT_TEST
|
||||
/* Called by kbase_get_real_power() to invoke the power models.
|
||||
* Must be called with kbdev->ipa.lock held.
|
||||
* This function is only exposed for use by unit tests.
|
||||
*/
|
||||
int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power,
|
||||
unsigned long freq,
|
||||
@@ -217,10 +245,7 @@ extern struct devfreq_cooling_power kbase_ipa_power_model_ops;
|
||||
|
||||
#else /* !(defined(CONFIG_MALI_BIFROST_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */
|
||||
|
||||
static inline void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev)
|
||||
{ }
|
||||
|
||||
static inline void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev)
|
||||
static inline void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev)
|
||||
{ }
|
||||
|
||||
#endif /* (defined(CONFIG_MALI_BIFROST_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2017-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -141,6 +141,7 @@ end:
|
||||
}
|
||||
|
||||
static const struct file_operations fops_string = {
|
||||
.owner = THIS_MODULE,
|
||||
.read = param_string_get,
|
||||
.write = param_string_set,
|
||||
.open = simple_open,
|
||||
@@ -188,6 +189,54 @@ void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model)
|
||||
}
|
||||
}
|
||||
|
||||
static int force_fallback_model_get(void *data, u64 *val)
|
||||
{
|
||||
struct kbase_device *kbdev = data;
|
||||
|
||||
mutex_lock(&kbdev->ipa.lock);
|
||||
*val = kbdev->ipa.force_fallback_model;
|
||||
mutex_unlock(&kbdev->ipa.lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int force_fallback_model_set(void *data, u64 val)
|
||||
{
|
||||
struct kbase_device *kbdev = data;
|
||||
|
||||
mutex_lock(&kbdev->ipa.lock);
|
||||
kbdev->ipa.force_fallback_model = (val ? true : false);
|
||||
mutex_unlock(&kbdev->ipa.lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DEFINE_DEBUGFS_ATTRIBUTE(force_fallback_model,
|
||||
force_fallback_model_get,
|
||||
force_fallback_model_set,
|
||||
"%llu\n");
|
||||
|
||||
static int current_power_get(void *data, u64 *val)
|
||||
{
|
||||
struct kbase_device *kbdev = data;
|
||||
struct devfreq *df = kbdev->devfreq;
|
||||
u32 power;
|
||||
|
||||
kbase_pm_context_active(kbdev);
|
||||
/* The current model assumes that there's no more than one voltage
|
||||
* regulator currently available in the system.
|
||||
*/
|
||||
kbase_get_real_power(df, &power,
|
||||
kbdev->current_nominal_freq,
|
||||
(kbdev->current_voltages[0] / 1000));
|
||||
kbase_pm_context_idle(kbdev);
|
||||
|
||||
*val = power;
|
||||
|
||||
return 0;
|
||||
}
|
||||
DEFINE_DEBUGFS_ATTRIBUTE(current_power, current_power_get, NULL, "%llu\n");
|
||||
|
||||
static void kbase_ipa_model_debugfs_init(struct kbase_ipa_model *model)
|
||||
{
|
||||
struct list_head *it;
|
||||
@@ -264,5 +313,10 @@ void kbase_ipa_debugfs_init(struct kbase_device *kbdev)
|
||||
kbase_ipa_model_debugfs_init(kbdev->ipa.configured_model);
|
||||
kbase_ipa_model_debugfs_init(kbdev->ipa.fallback_model);
|
||||
|
||||
debugfs_create_file("ipa_current_power", 0444,
|
||||
kbdev->mali_debugfs_directory, kbdev, ¤t_power);
|
||||
debugfs_create_file("ipa_force_fallback_model", 0644,
|
||||
kbdev->mali_debugfs_directory, kbdev, &force_fallback_model);
|
||||
|
||||
mutex_unlock(&kbdev->ipa.lock);
|
||||
}
|
||||
|
||||
@@ -133,7 +133,7 @@ static u32 calculate_temp_scaling_factor(s32 ts[4], s64 t)
|
||||
const s64 res_big = ts[3] * t3 /* +/- 2^62 */
|
||||
+ ts[2] * t2 /* +/- 2^55 */
|
||||
+ ts[1] * t /* +/- 2^48 */
|
||||
+ ts[0] * 1000; /* +/- 2^41 */
|
||||
+ ts[0] * (s64)1000; /* +/- 2^41 */
|
||||
|
||||
/* Range: -2^60 < res_unclamped < 2^60 */
|
||||
s64 res_unclamped = div_s64(res_big, 1000);
|
||||
@@ -273,8 +273,9 @@ static int kbase_simple_power_model_init(struct kbase_ipa_model *model)
|
||||
(void *) model_data,
|
||||
"mali-simple-power-model-temp-poll");
|
||||
if (IS_ERR(model_data->poll_temperature_thread)) {
|
||||
err = PTR_ERR(model_data->poll_temperature_thread);
|
||||
kfree(model_data);
|
||||
return PTR_ERR(model_data->poll_temperature_thread);
|
||||
return err;
|
||||
}
|
||||
|
||||
err = add_params(model);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2017-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -44,16 +44,23 @@ static inline u32 kbase_ipa_read_hwcnt(
|
||||
struct kbase_ipa_model_vinstr_data *model_data,
|
||||
u32 offset)
|
||||
{
|
||||
u8 *p = model_data->vinstr_buffer;
|
||||
u8 *p = (u8 *)model_data->dump_buf.dump_buf;
|
||||
|
||||
return *(u32 *)&p[offset];
|
||||
}
|
||||
|
||||
static inline s64 kbase_ipa_add_saturate(s64 a, s64 b)
|
||||
{
|
||||
if (S64_MAX - a < b)
|
||||
return S64_MAX;
|
||||
return a + b;
|
||||
s64 rtn;
|
||||
|
||||
if (a > 0 && (S64_MAX - a) < b)
|
||||
rtn = S64_MAX;
|
||||
else if (a < 0 && (S64_MIN - a) > b)
|
||||
rtn = S64_MIN;
|
||||
else
|
||||
rtn = a + b;
|
||||
|
||||
return rtn;
|
||||
}
|
||||
|
||||
s64 kbase_ipa_sum_all_shader_cores(
|
||||
@@ -83,6 +90,30 @@ s64 kbase_ipa_sum_all_shader_cores(
|
||||
return ret * coeff;
|
||||
}
|
||||
|
||||
s64 kbase_ipa_sum_all_memsys_blocks(
|
||||
struct kbase_ipa_model_vinstr_data *model_data,
|
||||
s32 coeff, u32 counter)
|
||||
{
|
||||
struct kbase_device *kbdev = model_data->kbdev;
|
||||
const u32 num_blocks = kbdev->gpu_props.props.l2_props.num_l2_slices;
|
||||
u32 base = 0;
|
||||
s64 ret = 0;
|
||||
u32 i;
|
||||
|
||||
for (i = 0; i < num_blocks; i++) {
|
||||
/* 0 < counter_value < 2^27 */
|
||||
u32 counter_value = kbase_ipa_read_hwcnt(model_data,
|
||||
base + counter);
|
||||
|
||||
/* 0 < ret < 2^27 * max_num_memsys_blocks = 2^29 */
|
||||
ret = kbase_ipa_add_saturate(ret, counter_value);
|
||||
base += KBASE_IPA_NR_BYTES_PER_BLOCK;
|
||||
}
|
||||
|
||||
/* Range: -2^51 < ret * coeff < 2^51 */
|
||||
return ret * coeff;
|
||||
}
|
||||
|
||||
s64 kbase_ipa_single_counter(
|
||||
struct kbase_ipa_model_vinstr_data *model_data,
|
||||
s32 coeff, u32 counter)
|
||||
@@ -94,115 +125,69 @@ s64 kbase_ipa_single_counter(
|
||||
return counter_value * (s64) coeff;
|
||||
}
|
||||
|
||||
/**
|
||||
* kbase_ipa_gpu_active - Inform IPA that GPU is now active
|
||||
* @model_data: Pointer to model data
|
||||
*
|
||||
* This function may cause vinstr to become active.
|
||||
*/
|
||||
static void kbase_ipa_gpu_active(struct kbase_ipa_model_vinstr_data *model_data)
|
||||
{
|
||||
struct kbase_device *kbdev = model_data->kbdev;
|
||||
|
||||
lockdep_assert_held(&kbdev->pm.lock);
|
||||
|
||||
if (!kbdev->ipa.vinstr_active) {
|
||||
kbdev->ipa.vinstr_active = true;
|
||||
kbase_vinstr_resume_client(model_data->vinstr_cli);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* kbase_ipa_gpu_idle - Inform IPA that GPU is now idle
|
||||
* @model_data: Pointer to model data
|
||||
*
|
||||
* This function may cause vinstr to become idle.
|
||||
*/
|
||||
static void kbase_ipa_gpu_idle(struct kbase_ipa_model_vinstr_data *model_data)
|
||||
{
|
||||
struct kbase_device *kbdev = model_data->kbdev;
|
||||
|
||||
lockdep_assert_held(&kbdev->pm.lock);
|
||||
|
||||
if (kbdev->ipa.vinstr_active) {
|
||||
kbase_vinstr_suspend_client(model_data->vinstr_cli);
|
||||
kbdev->ipa.vinstr_active = false;
|
||||
}
|
||||
}
|
||||
|
||||
int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data)
|
||||
{
|
||||
int errcode;
|
||||
struct kbase_device *kbdev = model_data->kbdev;
|
||||
struct kbase_ioctl_hwcnt_reader_setup setup;
|
||||
size_t dump_size;
|
||||
struct kbase_hwcnt_virtualizer *hvirt = kbdev->hwcnt_gpu_virt;
|
||||
struct kbase_hwcnt_enable_map enable_map;
|
||||
const struct kbase_hwcnt_metadata *metadata =
|
||||
kbase_hwcnt_virtualizer_metadata(hvirt);
|
||||
|
||||
dump_size = kbase_vinstr_dump_size(kbdev);
|
||||
model_data->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL);
|
||||
if (!model_data->vinstr_buffer) {
|
||||
if (!metadata)
|
||||
return -1;
|
||||
|
||||
errcode = kbase_hwcnt_enable_map_alloc(metadata, &enable_map);
|
||||
if (errcode) {
|
||||
dev_err(kbdev->dev, "Failed to allocate IPA enable map");
|
||||
return errcode;
|
||||
}
|
||||
|
||||
kbase_hwcnt_enable_map_enable_all(&enable_map);
|
||||
|
||||
errcode = kbase_hwcnt_virtualizer_client_create(
|
||||
hvirt, &enable_map, &model_data->hvirt_cli);
|
||||
kbase_hwcnt_enable_map_free(&enable_map);
|
||||
if (errcode) {
|
||||
dev_err(kbdev->dev, "Failed to register IPA with virtualizer");
|
||||
model_data->hvirt_cli = NULL;
|
||||
return errcode;
|
||||
}
|
||||
|
||||
errcode = kbase_hwcnt_dump_buffer_alloc(
|
||||
metadata, &model_data->dump_buf);
|
||||
if (errcode) {
|
||||
dev_err(kbdev->dev, "Failed to allocate IPA dump buffer");
|
||||
return -1;
|
||||
kbase_hwcnt_virtualizer_client_destroy(model_data->hvirt_cli);
|
||||
model_data->hvirt_cli = NULL;
|
||||
return errcode;
|
||||
}
|
||||
|
||||
setup.jm_bm = ~0u;
|
||||
setup.shader_bm = ~0u;
|
||||
setup.tiler_bm = ~0u;
|
||||
setup.mmu_l2_bm = ~0u;
|
||||
model_data->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(kbdev->vinstr_ctx,
|
||||
&setup, model_data->vinstr_buffer);
|
||||
if (!model_data->vinstr_cli) {
|
||||
dev_err(kbdev->dev, "Failed to register IPA with vinstr core");
|
||||
kfree(model_data->vinstr_buffer);
|
||||
model_data->vinstr_buffer = NULL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
kbase_vinstr_hwc_clear(model_data->vinstr_cli);
|
||||
|
||||
kbdev->ipa.gpu_active_callback = kbase_ipa_gpu_active;
|
||||
kbdev->ipa.gpu_idle_callback = kbase_ipa_gpu_idle;
|
||||
kbdev->ipa.model_data = model_data;
|
||||
kbdev->ipa.vinstr_active = false;
|
||||
/* Suspend vinstr, to ensure that the GPU is powered off until there is
|
||||
* something to execute.
|
||||
*/
|
||||
kbase_vinstr_suspend_client(model_data->vinstr_cli);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data)
|
||||
{
|
||||
struct kbase_device *kbdev = model_data->kbdev;
|
||||
|
||||
kbdev->ipa.gpu_active_callback = NULL;
|
||||
kbdev->ipa.gpu_idle_callback = NULL;
|
||||
kbdev->ipa.model_data = NULL;
|
||||
kbdev->ipa.vinstr_active = false;
|
||||
|
||||
if (model_data->vinstr_cli)
|
||||
kbase_vinstr_detach_client(model_data->vinstr_cli);
|
||||
|
||||
model_data->vinstr_cli = NULL;
|
||||
kfree(model_data->vinstr_buffer);
|
||||
model_data->vinstr_buffer = NULL;
|
||||
if (model_data->hvirt_cli) {
|
||||
kbase_hwcnt_virtualizer_client_destroy(model_data->hvirt_cli);
|
||||
kbase_hwcnt_dump_buffer_free(&model_data->dump_buf);
|
||||
model_data->hvirt_cli = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp)
|
||||
{
|
||||
struct kbase_ipa_model_vinstr_data *model_data =
|
||||
(struct kbase_ipa_model_vinstr_data *)model->model_data;
|
||||
struct kbase_device *kbdev = model_data->kbdev;
|
||||
s64 energy = 0;
|
||||
size_t i;
|
||||
u64 coeff = 0, coeff_mul = 0;
|
||||
u64 start_ts_ns, end_ts_ns;
|
||||
u32 active_cycles;
|
||||
int err = 0;
|
||||
|
||||
if (!kbdev->ipa.vinstr_active)
|
||||
goto err0; /* GPU powered off - no counters to collect */
|
||||
|
||||
err = kbase_vinstr_hwc_dump(model_data->vinstr_cli,
|
||||
BASE_HWCNT_READER_EVENT_MANUAL);
|
||||
err = kbase_hwcnt_virtualizer_client_dump(model_data->hvirt_cli,
|
||||
&start_ts_ns, &end_ts_ns, &model_data->dump_buf);
|
||||
if (err)
|
||||
goto err0;
|
||||
|
||||
@@ -256,12 +241,27 @@ int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp)
|
||||
*/
|
||||
coeff = div_u64(coeff, active_cycles);
|
||||
|
||||
/* Scale by user-specified factor (where unity is 1000).
|
||||
* Range: 0 <= coeff_mul < 2^61
|
||||
/* Not all models were derived at the same reference voltage. Voltage
|
||||
* scaling is done by multiplying by V^2, so we need to *divide* by
|
||||
* Vref^2 here.
|
||||
* Range: 0 <= coeff <= 2^49
|
||||
*/
|
||||
coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1));
|
||||
/* Range: 0 <= coeff <= 2^52 */
|
||||
coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1));
|
||||
|
||||
/* Scale by user-specified integer factor.
|
||||
* Range: 0 <= coeff_mul < 2^57
|
||||
*/
|
||||
coeff_mul = coeff * model_data->scaling_factor;
|
||||
|
||||
/* Range: 0 <= coeff_mul < 2^51 */
|
||||
/* The power models have results with units
|
||||
* mW/(MHz V^2), i.e. nW/(Hz V^2). With precision of 1/1000000, this
|
||||
* becomes fW/(Hz V^2), which are the units of coeff_mul. However,
|
||||
* kbase_scale_dynamic_power() expects units of pW/(Hz V^2), so divide
|
||||
* by 1000.
|
||||
* Range: 0 <= coeff_mul < 2^47
|
||||
*/
|
||||
coeff_mul = div_u64(coeff_mul, 1000u);
|
||||
|
||||
err0:
|
||||
@@ -273,7 +273,8 @@ err0:
|
||||
int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model,
|
||||
const struct kbase_ipa_group *ipa_groups_def,
|
||||
size_t ipa_group_size,
|
||||
kbase_ipa_get_active_cycles_callback get_active_cycles)
|
||||
kbase_ipa_get_active_cycles_callback get_active_cycles,
|
||||
s32 reference_voltage)
|
||||
{
|
||||
int err = 0;
|
||||
size_t i;
|
||||
@@ -318,6 +319,13 @@ int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model,
|
||||
if (err)
|
||||
goto exit;
|
||||
|
||||
model_data->reference_voltage = reference_voltage;
|
||||
err = kbase_ipa_model_add_param_s32(model, "reference_voltage",
|
||||
&model_data->reference_voltage,
|
||||
1, false);
|
||||
if (err)
|
||||
goto exit;
|
||||
|
||||
err = kbase_ipa_attach_vinstr(model_data);
|
||||
|
||||
exit:
|
||||
|
||||
@@ -24,6 +24,8 @@
|
||||
#define _KBASE_IPA_VINSTR_COMMON_H_
|
||||
|
||||
#include "mali_kbase.h"
|
||||
#include "mali_kbase_hwcnt_virtualizer.h"
|
||||
#include "mali_kbase_hwcnt_types.h"
|
||||
|
||||
/* Maximum number of IPA groups for an IPA model. */
|
||||
#define KBASE_IPA_MAX_GROUP_DEF_NUM 16
|
||||
@@ -49,12 +51,15 @@ typedef u32 (*kbase_ipa_get_active_cycles_callback)(struct kbase_ipa_model_vinst
|
||||
* @groups_def_num: Number of elements in the array of IPA groups.
|
||||
* @get_active_cycles: Callback to return number of active cycles during
|
||||
* counter sample period
|
||||
* @vinstr_cli: vinstr client handle
|
||||
* @vinstr_buffer: buffer to dump hardware counters onto
|
||||
* @scaling_factor: user-specified power scaling factor. This is
|
||||
* interpreted as a fraction where the denominator is
|
||||
* 1000. Range approx 0.0-32.0:
|
||||
* 0 < scaling_factor < 2^15
|
||||
* @hvirt_cli: hardware counter virtualizer client handle
|
||||
* @dump_buf: buffer to dump hardware counters onto
|
||||
* @reference_voltage: voltage, in mV, of the operating point used when
|
||||
* deriving the power model coefficients. Range approx
|
||||
* 0.1V - 5V (~= 8V): 2^7 <= reference_voltage <= 2^13
|
||||
* @scaling_factor: User-specified power scaling factor. This is an
|
||||
* integer, which is multiplied by the power coefficient
|
||||
* just before OPP scaling.
|
||||
* Range approx 0-32: 0 < scaling_factor < 2^5
|
||||
* @min_sample_cycles: If the value of the GPU_ACTIVE counter (the number of
|
||||
* cycles the GPU was working) is less than
|
||||
* min_sample_cycles, the counter model will return an
|
||||
@@ -69,8 +74,9 @@ struct kbase_ipa_model_vinstr_data {
|
||||
const struct kbase_ipa_group *groups_def;
|
||||
size_t groups_def_num;
|
||||
kbase_ipa_get_active_cycles_callback get_active_cycles;
|
||||
struct kbase_vinstr_client *vinstr_cli;
|
||||
void *vinstr_buffer;
|
||||
struct kbase_hwcnt_virtualizer_client *hvirt_cli;
|
||||
struct kbase_hwcnt_dump_buffer dump_buf;
|
||||
s32 reference_voltage;
|
||||
s32 scaling_factor;
|
||||
s32 min_sample_cycles;
|
||||
};
|
||||
@@ -92,11 +98,12 @@ struct kbase_ipa_group {
|
||||
};
|
||||
|
||||
/**
|
||||
* sum_all_shader_cores() - sum a counter over all cores
|
||||
* @model_data pointer to model data
|
||||
* @coeff model coefficient. Unity is ~2^20, so range approx
|
||||
* +/- 4.0: -2^22 < coeff < 2^22
|
||||
* @counter offset in bytes of the counter used to calculate energy for IPA group
|
||||
* kbase_ipa_sum_all_shader_cores() - sum a counter over all cores
|
||||
* @model_data: pointer to model data
|
||||
* @coeff: model coefficient. Unity is ~2^20, so range approx
|
||||
* +/- 4.0: -2^22 < coeff < 2^22
|
||||
* @counter offset in bytes of the counter used to calculate energy
|
||||
* for IPA group
|
||||
*
|
||||
* Calculate energy estimation based on hardware counter `counter'
|
||||
* across all shader cores.
|
||||
@@ -108,11 +115,29 @@ s64 kbase_ipa_sum_all_shader_cores(
|
||||
s32 coeff, u32 counter);
|
||||
|
||||
/**
|
||||
* sum_single_counter() - sum a single counter
|
||||
* @model_data pointer to model data
|
||||
* @coeff model coefficient. Unity is ~2^20, so range approx
|
||||
* +/- 4.0: -2^22 < coeff < 2^22
|
||||
* @counter offset in bytes of the counter used to calculate energy for IPA group
|
||||
* kbase_ipa_sum_all_memsys_blocks() - sum a counter over all mem system blocks
|
||||
* @model_data: pointer to model data
|
||||
* @coeff: model coefficient. Unity is ~2^20, so range approx
|
||||
* +/- 4.0: -2^22 < coeff < 2^22
|
||||
* @counter: offset in bytes of the counter used to calculate energy
|
||||
* for IPA group
|
||||
*
|
||||
* Calculate energy estimation based on hardware counter `counter' across all
|
||||
* memory system blocks.
|
||||
*
|
||||
* Return: Sum of counter values. Range: -2^51 < ret < 2^51
|
||||
*/
|
||||
s64 kbase_ipa_sum_all_memsys_blocks(
|
||||
struct kbase_ipa_model_vinstr_data *model_data,
|
||||
s32 coeff, u32 counter);
|
||||
|
||||
/**
|
||||
* kbase_ipa_single_counter() - sum a single counter
|
||||
* @model_data: pointer to model data
|
||||
* @coeff: model coefficient. Unity is ~2^20, so range approx
|
||||
* +/- 4.0: -2^22 < coeff < 2^22
|
||||
* @counter: offset in bytes of the counter used to calculate energy
|
||||
* for IPA group
|
||||
*
|
||||
* Calculate energy estimation based on hardware counter `counter'.
|
||||
*
|
||||
@@ -164,6 +189,8 @@ int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp);
|
||||
* @ipa_group_size: number of elements in the array @ipa_groups_def
|
||||
* @get_active_cycles: callback to return the number of cycles the GPU was
|
||||
* active during the counter sample period.
|
||||
* @reference_voltage: voltage, in mV, of the operating point used when
|
||||
* deriving the power model coefficients.
|
||||
*
|
||||
* This initialization function performs initialization steps common
|
||||
* for ipa models based on counter values. In each call, the model
|
||||
@@ -175,7 +202,8 @@ int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp);
|
||||
int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model,
|
||||
const struct kbase_ipa_group *ipa_groups_def,
|
||||
size_t ipa_group_size,
|
||||
kbase_ipa_get_active_cycles_callback get_active_cycles);
|
||||
kbase_ipa_get_active_cycles_callback get_active_cycles,
|
||||
s32 reference_voltage);
|
||||
|
||||
/**
|
||||
* kbase_ipa_vinstr_common_model_term() - terminate ipa power model
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2016-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -23,15 +23,12 @@
|
||||
|
||||
#include "mali_kbase_ipa_vinstr_common.h"
|
||||
#include "mali_kbase.h"
|
||||
#include "mali_kbase_ipa_debugfs.h"
|
||||
|
||||
|
||||
/* Performance counter blocks base offsets */
|
||||
#define JM_BASE (0 * KBASE_IPA_NR_BYTES_PER_BLOCK)
|
||||
#define TILER_BASE (1 * KBASE_IPA_NR_BYTES_PER_BLOCK)
|
||||
#define MEMSYS_BASE (2 * KBASE_IPA_NR_BYTES_PER_BLOCK)
|
||||
#define SC0_BASE_ONE_MEMSYS (3 * KBASE_IPA_NR_BYTES_PER_BLOCK)
|
||||
#define SC0_BASE_TWO_MEMSYS (4 * KBASE_IPA_NR_BYTES_PER_BLOCK)
|
||||
|
||||
/* JM counter block offsets */
|
||||
#define JM_GPU_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 6)
|
||||
@@ -45,7 +42,10 @@
|
||||
/* SC counter block offsets */
|
||||
#define SC_FRAG_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 4)
|
||||
#define SC_EXEC_CORE_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 26)
|
||||
#define SC_EXEC_INSTR_FMA (KBASE_IPA_NR_BYTES_PER_CNT * 27)
|
||||
#define SC_EXEC_INSTR_COUNT (KBASE_IPA_NR_BYTES_PER_CNT * 28)
|
||||
#define SC_EXEC_INSTR_MSG (KBASE_IPA_NR_BYTES_PER_CNT * 30)
|
||||
#define SC_TEX_FILT_NUM_OPERATIONS (KBASE_IPA_NR_BYTES_PER_CNT * 39)
|
||||
#define SC_TEX_COORD_ISSUE (KBASE_IPA_NR_BYTES_PER_CNT * 40)
|
||||
#define SC_TEX_TFCH_NUM_OPERATIONS (KBASE_IPA_NR_BYTES_PER_CNT * 42)
|
||||
#define SC_VARY_INSTR (KBASE_IPA_NR_BYTES_PER_CNT * 49)
|
||||
@@ -55,10 +55,6 @@
|
||||
#define SC_BEATS_WR_LSC (KBASE_IPA_NR_BYTES_PER_CNT * 61)
|
||||
#define SC_BEATS_WR_TIB (KBASE_IPA_NR_BYTES_PER_CNT * 62)
|
||||
|
||||
/** Maximum number of cores for which a single Memory System block of performance counters is present. */
|
||||
#define KBASE_G7x_SINGLE_MEMSYS_MAX_NUM_CORES ((u8)4)
|
||||
|
||||
|
||||
/**
|
||||
* get_jm_counter() - get performance counter offset inside the Job Manager block
|
||||
* @model_data: pointer to GPU model data.
|
||||
@@ -98,9 +94,9 @@ static u32 kbase_g7x_power_model_get_memsys_counter(struct kbase_ipa_model_vinst
|
||||
static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_data *model_data,
|
||||
u32 counter_block_offset)
|
||||
{
|
||||
const u32 sc_base = model_data->kbdev->gpu_props.num_cores <= KBASE_G7x_SINGLE_MEMSYS_MAX_NUM_CORES ?
|
||||
SC0_BASE_ONE_MEMSYS :
|
||||
SC0_BASE_TWO_MEMSYS;
|
||||
const u32 sc_base = MEMSYS_BASE +
|
||||
(model_data->kbdev->gpu_props.props.l2_props.num_l2_slices *
|
||||
KBASE_IPA_NR_BYTES_PER_BLOCK);
|
||||
|
||||
return sc_base + counter_block_offset;
|
||||
}
|
||||
@@ -113,7 +109,7 @@ static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_da
|
||||
*
|
||||
* Return: Energy estimation for a single Memory System performance counter.
|
||||
*/
|
||||
static s64 kbase_g7x_memsys_single_counter(
|
||||
static s64 kbase_g7x_sum_all_memsys_blocks(
|
||||
struct kbase_ipa_model_vinstr_data *model_data,
|
||||
s32 coeff,
|
||||
u32 offset)
|
||||
@@ -121,7 +117,7 @@ static s64 kbase_g7x_memsys_single_counter(
|
||||
u32 counter;
|
||||
|
||||
counter = kbase_g7x_power_model_get_memsys_counter(model_data, offset);
|
||||
return kbase_ipa_single_counter(model_data, coeff, counter);
|
||||
return kbase_ipa_sum_all_memsys_blocks(model_data, coeff, counter);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -192,7 +188,7 @@ static const struct kbase_ipa_group ipa_groups_def_g71[] = {
|
||||
{
|
||||
.name = "l2_access",
|
||||
.default_value = 526300,
|
||||
.op = kbase_g7x_memsys_single_counter,
|
||||
.op = kbase_g7x_sum_all_memsys_blocks,
|
||||
.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
|
||||
},
|
||||
{
|
||||
@@ -225,7 +221,7 @@ static const struct kbase_ipa_group ipa_groups_def_g72[] = {
|
||||
{
|
||||
.name = "l2_access",
|
||||
.default_value = 393000,
|
||||
.op = kbase_g7x_memsys_single_counter,
|
||||
.op = kbase_g7x_sum_all_memsys_blocks,
|
||||
.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
|
||||
},
|
||||
{
|
||||
@@ -254,7 +250,7 @@ static const struct kbase_ipa_group ipa_groups_def_g72[] = {
|
||||
},
|
||||
};
|
||||
|
||||
static const struct kbase_ipa_group ipa_groups_def_tnox[] = {
|
||||
static const struct kbase_ipa_group ipa_groups_def_g76[] = {
|
||||
{
|
||||
.name = "gpu_active",
|
||||
.default_value = 122000,
|
||||
@@ -282,12 +278,154 @@ static const struct kbase_ipa_group ipa_groups_def_tnox[] = {
|
||||
{
|
||||
.name = "l2_access",
|
||||
.default_value = 378100,
|
||||
.op = kbase_g7x_memsys_single_counter,
|
||||
.op = kbase_g7x_sum_all_memsys_blocks,
|
||||
.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
|
||||
},
|
||||
};
|
||||
|
||||
#define STANDARD_POWER_MODEL(gpu) \
|
||||
static const struct kbase_ipa_group ipa_groups_def_g52_r1[] = {
|
||||
{
|
||||
.name = "gpu_active",
|
||||
.default_value = 224200,
|
||||
.op = kbase_g7x_jm_single_counter,
|
||||
.counter_block_offset = JM_GPU_ACTIVE,
|
||||
},
|
||||
{
|
||||
.name = "exec_instr_count",
|
||||
.default_value = 384700,
|
||||
.op = kbase_g7x_sum_all_shader_cores,
|
||||
.counter_block_offset = SC_EXEC_INSTR_COUNT,
|
||||
},
|
||||
{
|
||||
.name = "vary_instr",
|
||||
.default_value = 271900,
|
||||
.op = kbase_g7x_sum_all_shader_cores,
|
||||
.counter_block_offset = SC_VARY_INSTR,
|
||||
},
|
||||
{
|
||||
.name = "tex_tfch_num_operations",
|
||||
.default_value = 477700,
|
||||
.op = kbase_g7x_sum_all_shader_cores,
|
||||
.counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS,
|
||||
},
|
||||
{
|
||||
.name = "l2_access",
|
||||
.default_value = 551400,
|
||||
.op = kbase_g7x_sum_all_memsys_blocks,
|
||||
.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
|
||||
},
|
||||
};
|
||||
|
||||
static const struct kbase_ipa_group ipa_groups_def_g51[] = {
|
||||
{
|
||||
.name = "gpu_active",
|
||||
.default_value = 201400,
|
||||
.op = kbase_g7x_jm_single_counter,
|
||||
.counter_block_offset = JM_GPU_ACTIVE,
|
||||
},
|
||||
{
|
||||
.name = "exec_instr_count",
|
||||
.default_value = 392700,
|
||||
.op = kbase_g7x_sum_all_shader_cores,
|
||||
.counter_block_offset = SC_EXEC_INSTR_COUNT,
|
||||
},
|
||||
{
|
||||
.name = "vary_instr",
|
||||
.default_value = 274000,
|
||||
.op = kbase_g7x_sum_all_shader_cores,
|
||||
.counter_block_offset = SC_VARY_INSTR,
|
||||
},
|
||||
{
|
||||
.name = "tex_tfch_num_operations",
|
||||
.default_value = 528000,
|
||||
.op = kbase_g7x_sum_all_shader_cores,
|
||||
.counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS,
|
||||
},
|
||||
{
|
||||
.name = "l2_access",
|
||||
.default_value = 506400,
|
||||
.op = kbase_g7x_sum_all_memsys_blocks,
|
||||
.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
|
||||
},
|
||||
};
|
||||
|
||||
static const struct kbase_ipa_group ipa_groups_def_g77[] = {
|
||||
{
|
||||
.name = "l2_access",
|
||||
.default_value = 710800,
|
||||
.op = kbase_g7x_sum_all_memsys_blocks,
|
||||
.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
|
||||
},
|
||||
{
|
||||
.name = "exec_instr_msg",
|
||||
.default_value = 2375300,
|
||||
.op = kbase_g7x_sum_all_shader_cores,
|
||||
.counter_block_offset = SC_EXEC_INSTR_MSG,
|
||||
},
|
||||
{
|
||||
.name = "exec_instr_fma",
|
||||
.default_value = 656100,
|
||||
.op = kbase_g7x_sum_all_shader_cores,
|
||||
.counter_block_offset = SC_EXEC_INSTR_FMA,
|
||||
},
|
||||
{
|
||||
.name = "tex_filt_num_operations",
|
||||
.default_value = 318800,
|
||||
.op = kbase_g7x_sum_all_shader_cores,
|
||||
.counter_block_offset = SC_TEX_FILT_NUM_OPERATIONS,
|
||||
},
|
||||
{
|
||||
.name = "gpu_active",
|
||||
.default_value = 172800,
|
||||
.op = kbase_g7x_jm_single_counter,
|
||||
.counter_block_offset = JM_GPU_ACTIVE,
|
||||
},
|
||||
};
|
||||
|
||||
static const struct kbase_ipa_group ipa_groups_def_tbex[] = {
|
||||
{
|
||||
.name = "l2_access",
|
||||
.default_value = 599800,
|
||||
.op = kbase_g7x_sum_all_memsys_blocks,
|
||||
.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
|
||||
},
|
||||
{
|
||||
.name = "exec_instr_msg",
|
||||
.default_value = 1830200,
|
||||
.op = kbase_g7x_sum_all_shader_cores,
|
||||
.counter_block_offset = SC_EXEC_INSTR_MSG,
|
||||
},
|
||||
{
|
||||
.name = "exec_instr_fma",
|
||||
.default_value = 407300,
|
||||
.op = kbase_g7x_sum_all_shader_cores,
|
||||
.counter_block_offset = SC_EXEC_INSTR_FMA,
|
||||
},
|
||||
{
|
||||
.name = "tex_filt_num_operations",
|
||||
.default_value = 224500,
|
||||
.op = kbase_g7x_sum_all_shader_cores,
|
||||
.counter_block_offset = SC_TEX_FILT_NUM_OPERATIONS,
|
||||
},
|
||||
{
|
||||
.name = "gpu_active",
|
||||
.default_value = 153800,
|
||||
.op = kbase_g7x_jm_single_counter,
|
||||
.counter_block_offset = JM_GPU_ACTIVE,
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
#define IPA_POWER_MODEL_OPS(gpu, init_token) \
|
||||
const struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \
|
||||
.name = "mali-" #gpu "-power-model", \
|
||||
.init = kbase_ ## init_token ## _power_model_init, \
|
||||
.term = kbase_ipa_vinstr_common_model_term, \
|
||||
.get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \
|
||||
}; \
|
||||
KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops)
|
||||
|
||||
#define STANDARD_POWER_MODEL(gpu, reference_voltage) \
|
||||
static int kbase_ ## gpu ## _power_model_init(\
|
||||
struct kbase_ipa_model *model) \
|
||||
{ \
|
||||
@@ -296,16 +434,23 @@ static const struct kbase_ipa_group ipa_groups_def_tnox[] = {
|
||||
return kbase_ipa_vinstr_common_model_init(model, \
|
||||
ipa_groups_def_ ## gpu, \
|
||||
ARRAY_SIZE(ipa_groups_def_ ## gpu), \
|
||||
kbase_g7x_get_active_cycles); \
|
||||
kbase_g7x_get_active_cycles, \
|
||||
(reference_voltage)); \
|
||||
} \
|
||||
struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \
|
||||
.name = "mali-" #gpu "-power-model", \
|
||||
.init = kbase_ ## gpu ## _power_model_init, \
|
||||
.term = kbase_ipa_vinstr_common_model_term, \
|
||||
.get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \
|
||||
}; \
|
||||
KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops)
|
||||
IPA_POWER_MODEL_OPS(gpu, gpu)
|
||||
|
||||
STANDARD_POWER_MODEL(g71);
|
||||
STANDARD_POWER_MODEL(g72);
|
||||
STANDARD_POWER_MODEL(tnox);
|
||||
#define ALIAS_POWER_MODEL(gpu, as_gpu) \
|
||||
IPA_POWER_MODEL_OPS(gpu, as_gpu)
|
||||
|
||||
STANDARD_POWER_MODEL(g71, 800);
|
||||
STANDARD_POWER_MODEL(g72, 800);
|
||||
STANDARD_POWER_MODEL(g76, 800);
|
||||
STANDARD_POWER_MODEL(g52_r1, 1000);
|
||||
STANDARD_POWER_MODEL(g51, 1000);
|
||||
STANDARD_POWER_MODEL(g77, 1000);
|
||||
STANDARD_POWER_MODEL(tbex, 1000);
|
||||
|
||||
/* g52 is an alias of g76 (TNOX) for IPA */
|
||||
ALIAS_POWER_MODEL(g52, g76);
|
||||
/* tnax is an alias of g77 (TTRX) for IPA */
|
||||
ALIAS_POWER_MODEL(tnax, g77);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -58,7 +58,9 @@ enum base_hw_feature {
|
||||
BASE_HW_FEATURE_AARCH64_MMU,
|
||||
BASE_HW_FEATURE_TLS_HASHING,
|
||||
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
|
||||
BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
|
||||
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
|
||||
BASE_HW_FEATURE_IDVS_GROUP_SIZE,
|
||||
BASE_HW_FEATURE_L2_CONFIG,
|
||||
BASE_HW_FEATURE_END
|
||||
};
|
||||
|
||||
@@ -66,120 +68,6 @@ static const enum base_hw_feature base_hw_features_generic[] = {
|
||||
BASE_HW_FEATURE_END
|
||||
};
|
||||
|
||||
static const enum base_hw_feature base_hw_features_t60x[] = {
|
||||
BASE_HW_FEATURE_LD_ST_LEA_TEX,
|
||||
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
|
||||
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
|
||||
BASE_HW_FEATURE_V4,
|
||||
BASE_HW_FEATURE_END
|
||||
};
|
||||
|
||||
static const enum base_hw_feature base_hw_features_t62x[] = {
|
||||
BASE_HW_FEATURE_LD_ST_LEA_TEX,
|
||||
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
|
||||
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
|
||||
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
|
||||
BASE_HW_FEATURE_V4,
|
||||
BASE_HW_FEATURE_END
|
||||
};
|
||||
|
||||
static const enum base_hw_feature base_hw_features_t72x[] = {
|
||||
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
|
||||
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
|
||||
BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
|
||||
BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
|
||||
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
|
||||
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
|
||||
BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
|
||||
BASE_HW_FEATURE_WARPING,
|
||||
BASE_HW_FEATURE_V4,
|
||||
BASE_HW_FEATURE_END
|
||||
};
|
||||
|
||||
static const enum base_hw_feature base_hw_features_t76x[] = {
|
||||
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
|
||||
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
|
||||
BASE_HW_FEATURE_XAFFINITY,
|
||||
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
|
||||
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
|
||||
BASE_HW_FEATURE_BRNDOUT_CC,
|
||||
BASE_HW_FEATURE_LD_ST_LEA_TEX,
|
||||
BASE_HW_FEATURE_LD_ST_TILEBUFFER,
|
||||
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
|
||||
BASE_HW_FEATURE_MRT,
|
||||
BASE_HW_FEATURE_MSAA_16X,
|
||||
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
|
||||
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
|
||||
BASE_HW_FEATURE_TEST4_DATUM_MODE,
|
||||
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
|
||||
BASE_HW_FEATURE_END
|
||||
};
|
||||
|
||||
static const enum base_hw_feature base_hw_features_tFxx[] = {
|
||||
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
|
||||
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
|
||||
BASE_HW_FEATURE_XAFFINITY,
|
||||
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
|
||||
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
|
||||
BASE_HW_FEATURE_BRNDOUT_CC,
|
||||
BASE_HW_FEATURE_BRNDOUT_KILL,
|
||||
BASE_HW_FEATURE_LD_ST_LEA_TEX,
|
||||
BASE_HW_FEATURE_LD_ST_TILEBUFFER,
|
||||
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
|
||||
BASE_HW_FEATURE_MRT,
|
||||
BASE_HW_FEATURE_MSAA_16X,
|
||||
BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
|
||||
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
|
||||
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
|
||||
BASE_HW_FEATURE_TEST4_DATUM_MODE,
|
||||
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
|
||||
BASE_HW_FEATURE_END
|
||||
};
|
||||
|
||||
static const enum base_hw_feature base_hw_features_t83x[] = {
|
||||
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
|
||||
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
|
||||
BASE_HW_FEATURE_XAFFINITY,
|
||||
BASE_HW_FEATURE_WARPING,
|
||||
BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
|
||||
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
|
||||
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
|
||||
BASE_HW_FEATURE_BRNDOUT_CC,
|
||||
BASE_HW_FEATURE_BRNDOUT_KILL,
|
||||
BASE_HW_FEATURE_LD_ST_LEA_TEX,
|
||||
BASE_HW_FEATURE_LD_ST_TILEBUFFER,
|
||||
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
|
||||
BASE_HW_FEATURE_MRT,
|
||||
BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
|
||||
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
|
||||
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
|
||||
BASE_HW_FEATURE_TEST4_DATUM_MODE,
|
||||
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
|
||||
BASE_HW_FEATURE_END
|
||||
};
|
||||
|
||||
static const enum base_hw_feature base_hw_features_t82x[] = {
|
||||
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
|
||||
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
|
||||
BASE_HW_FEATURE_XAFFINITY,
|
||||
BASE_HW_FEATURE_WARPING,
|
||||
BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
|
||||
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
|
||||
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
|
||||
BASE_HW_FEATURE_BRNDOUT_CC,
|
||||
BASE_HW_FEATURE_BRNDOUT_KILL,
|
||||
BASE_HW_FEATURE_LD_ST_LEA_TEX,
|
||||
BASE_HW_FEATURE_LD_ST_TILEBUFFER,
|
||||
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
|
||||
BASE_HW_FEATURE_MRT,
|
||||
BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
|
||||
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
|
||||
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
|
||||
BASE_HW_FEATURE_TEST4_DATUM_MODE,
|
||||
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
|
||||
BASE_HW_FEATURE_END
|
||||
};
|
||||
|
||||
static const enum base_hw_feature base_hw_features_tMIx[] = {
|
||||
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
|
||||
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
|
||||
@@ -203,6 +91,7 @@ static const enum base_hw_feature base_hw_features_tMIx[] = {
|
||||
BASE_HW_FEATURE_FLUSH_REDUCTION,
|
||||
BASE_HW_FEATURE_PROTECTED_MODE,
|
||||
BASE_HW_FEATURE_COHERENCY_REG,
|
||||
BASE_HW_FEATURE_AARCH64_MMU,
|
||||
BASE_HW_FEATURE_END
|
||||
};
|
||||
|
||||
@@ -230,6 +119,7 @@ static const enum base_hw_feature base_hw_features_tHEx[] = {
|
||||
BASE_HW_FEATURE_PROTECTED_MODE,
|
||||
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
|
||||
BASE_HW_FEATURE_COHERENCY_REG,
|
||||
BASE_HW_FEATURE_AARCH64_MMU,
|
||||
BASE_HW_FEATURE_END
|
||||
};
|
||||
|
||||
@@ -257,6 +147,7 @@ static const enum base_hw_feature base_hw_features_tSIx[] = {
|
||||
BASE_HW_FEATURE_PROTECTED_MODE,
|
||||
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
|
||||
BASE_HW_FEATURE_COHERENCY_REG,
|
||||
BASE_HW_FEATURE_AARCH64_MMU,
|
||||
BASE_HW_FEATURE_END
|
||||
};
|
||||
|
||||
@@ -284,6 +175,7 @@ static const enum base_hw_feature base_hw_features_tDVx[] = {
|
||||
BASE_HW_FEATURE_PROTECTED_MODE,
|
||||
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
|
||||
BASE_HW_FEATURE_COHERENCY_REG,
|
||||
BASE_HW_FEATURE_AARCH64_MMU,
|
||||
BASE_HW_FEATURE_END
|
||||
};
|
||||
|
||||
@@ -313,7 +205,7 @@ static const enum base_hw_feature base_hw_features_tNOx[] = {
|
||||
BASE_HW_FEATURE_COHERENCY_REG,
|
||||
BASE_HW_FEATURE_AARCH64_MMU,
|
||||
BASE_HW_FEATURE_TLS_HASHING,
|
||||
BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
|
||||
BASE_HW_FEATURE_IDVS_GROUP_SIZE,
|
||||
BASE_HW_FEATURE_END
|
||||
};
|
||||
|
||||
@@ -343,34 +235,7 @@ static const enum base_hw_feature base_hw_features_tGOx[] = {
|
||||
BASE_HW_FEATURE_COHERENCY_REG,
|
||||
BASE_HW_FEATURE_AARCH64_MMU,
|
||||
BASE_HW_FEATURE_TLS_HASHING,
|
||||
BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
|
||||
BASE_HW_FEATURE_END
|
||||
};
|
||||
|
||||
static const enum base_hw_feature base_hw_features_tKAx[] = {
|
||||
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
|
||||
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
|
||||
BASE_HW_FEATURE_XAFFINITY,
|
||||
BASE_HW_FEATURE_WARPING,
|
||||
BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
|
||||
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
|
||||
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
|
||||
BASE_HW_FEATURE_BRNDOUT_CC,
|
||||
BASE_HW_FEATURE_BRNDOUT_KILL,
|
||||
BASE_HW_FEATURE_LD_ST_LEA_TEX,
|
||||
BASE_HW_FEATURE_LD_ST_TILEBUFFER,
|
||||
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
|
||||
BASE_HW_FEATURE_MRT,
|
||||
BASE_HW_FEATURE_MSAA_16X,
|
||||
BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
|
||||
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
|
||||
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
|
||||
BASE_HW_FEATURE_TEST4_DATUM_MODE,
|
||||
BASE_HW_FEATURE_FLUSH_REDUCTION,
|
||||
BASE_HW_FEATURE_PROTECTED_MODE,
|
||||
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
|
||||
BASE_HW_FEATURE_COHERENCY_REG,
|
||||
BASE_HW_FEATURE_AARCH64_MMU,
|
||||
BASE_HW_FEATURE_IDVS_GROUP_SIZE,
|
||||
BASE_HW_FEATURE_END
|
||||
};
|
||||
|
||||
@@ -398,10 +263,12 @@ static const enum base_hw_feature base_hw_features_tTRx[] = {
|
||||
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
|
||||
BASE_HW_FEATURE_COHERENCY_REG,
|
||||
BASE_HW_FEATURE_AARCH64_MMU,
|
||||
BASE_HW_FEATURE_IDVS_GROUP_SIZE,
|
||||
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
|
||||
BASE_HW_FEATURE_END
|
||||
};
|
||||
|
||||
static const enum base_hw_feature base_hw_features_tBOx[] = {
|
||||
static const enum base_hw_feature base_hw_features_tNAx[] = {
|
||||
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
|
||||
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
|
||||
BASE_HW_FEATURE_XAFFINITY,
|
||||
@@ -425,10 +292,12 @@ static const enum base_hw_feature base_hw_features_tBOx[] = {
|
||||
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
|
||||
BASE_HW_FEATURE_COHERENCY_REG,
|
||||
BASE_HW_FEATURE_AARCH64_MMU,
|
||||
BASE_HW_FEATURE_IDVS_GROUP_SIZE,
|
||||
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
|
||||
BASE_HW_FEATURE_END
|
||||
};
|
||||
|
||||
static const enum base_hw_feature base_hw_features_tEGx[] = {
|
||||
static const enum base_hw_feature base_hw_features_tBEx[] = {
|
||||
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
|
||||
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
|
||||
BASE_HW_FEATURE_XAFFINITY,
|
||||
@@ -447,14 +316,160 @@ static const enum base_hw_feature base_hw_features_tEGx[] = {
|
||||
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
|
||||
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
|
||||
BASE_HW_FEATURE_TEST4_DATUM_MODE,
|
||||
BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
|
||||
BASE_HW_FEATURE_FLUSH_REDUCTION,
|
||||
BASE_HW_FEATURE_PROTECTED_MODE,
|
||||
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
|
||||
BASE_HW_FEATURE_COHERENCY_REG,
|
||||
BASE_HW_FEATURE_AARCH64_MMU,
|
||||
BASE_HW_FEATURE_TLS_HASHING,
|
||||
BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
|
||||
BASE_HW_FEATURE_IDVS_GROUP_SIZE,
|
||||
BASE_HW_FEATURE_L2_CONFIG,
|
||||
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
|
||||
BASE_HW_FEATURE_END
|
||||
};
|
||||
|
||||
static const enum base_hw_feature base_hw_features_tULx[] = {
|
||||
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
|
||||
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
|
||||
BASE_HW_FEATURE_XAFFINITY,
|
||||
BASE_HW_FEATURE_WARPING,
|
||||
BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
|
||||
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
|
||||
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
|
||||
BASE_HW_FEATURE_BRNDOUT_CC,
|
||||
BASE_HW_FEATURE_BRNDOUT_KILL,
|
||||
BASE_HW_FEATURE_LD_ST_LEA_TEX,
|
||||
BASE_HW_FEATURE_LD_ST_TILEBUFFER,
|
||||
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
|
||||
BASE_HW_FEATURE_MRT,
|
||||
BASE_HW_FEATURE_MSAA_16X,
|
||||
BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
|
||||
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
|
||||
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
|
||||
BASE_HW_FEATURE_TEST4_DATUM_MODE,
|
||||
BASE_HW_FEATURE_FLUSH_REDUCTION,
|
||||
BASE_HW_FEATURE_PROTECTED_MODE,
|
||||
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
|
||||
BASE_HW_FEATURE_COHERENCY_REG,
|
||||
BASE_HW_FEATURE_AARCH64_MMU,
|
||||
BASE_HW_FEATURE_L2_CONFIG,
|
||||
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
|
||||
BASE_HW_FEATURE_END
|
||||
};
|
||||
|
||||
static const enum base_hw_feature base_hw_features_tDUx[] = {
|
||||
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
|
||||
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
|
||||
BASE_HW_FEATURE_XAFFINITY,
|
||||
BASE_HW_FEATURE_WARPING,
|
||||
BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
|
||||
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
|
||||
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
|
||||
BASE_HW_FEATURE_BRNDOUT_CC,
|
||||
BASE_HW_FEATURE_BRNDOUT_KILL,
|
||||
BASE_HW_FEATURE_LD_ST_LEA_TEX,
|
||||
BASE_HW_FEATURE_LD_ST_TILEBUFFER,
|
||||
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
|
||||
BASE_HW_FEATURE_MRT,
|
||||
BASE_HW_FEATURE_MSAA_16X,
|
||||
BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
|
||||
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
|
||||
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
|
||||
BASE_HW_FEATURE_TEST4_DATUM_MODE,
|
||||
BASE_HW_FEATURE_FLUSH_REDUCTION,
|
||||
BASE_HW_FEATURE_PROTECTED_MODE,
|
||||
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
|
||||
BASE_HW_FEATURE_COHERENCY_REG,
|
||||
BASE_HW_FEATURE_AARCH64_MMU,
|
||||
BASE_HW_FEATURE_IDVS_GROUP_SIZE,
|
||||
BASE_HW_FEATURE_L2_CONFIG,
|
||||
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
|
||||
BASE_HW_FEATURE_END
|
||||
};
|
||||
|
||||
static const enum base_hw_feature base_hw_features_tODx[] = {
|
||||
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
|
||||
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
|
||||
BASE_HW_FEATURE_XAFFINITY,
|
||||
BASE_HW_FEATURE_WARPING,
|
||||
BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
|
||||
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
|
||||
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
|
||||
BASE_HW_FEATURE_BRNDOUT_CC,
|
||||
BASE_HW_FEATURE_BRNDOUT_KILL,
|
||||
BASE_HW_FEATURE_LD_ST_LEA_TEX,
|
||||
BASE_HW_FEATURE_LD_ST_TILEBUFFER,
|
||||
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
|
||||
BASE_HW_FEATURE_MRT,
|
||||
BASE_HW_FEATURE_MSAA_16X,
|
||||
BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
|
||||
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
|
||||
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
|
||||
BASE_HW_FEATURE_TEST4_DATUM_MODE,
|
||||
BASE_HW_FEATURE_FLUSH_REDUCTION,
|
||||
BASE_HW_FEATURE_PROTECTED_MODE,
|
||||
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
|
||||
BASE_HW_FEATURE_COHERENCY_REG,
|
||||
BASE_HW_FEATURE_AARCH64_MMU,
|
||||
BASE_HW_FEATURE_L2_CONFIG,
|
||||
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
|
||||
BASE_HW_FEATURE_END
|
||||
};
|
||||
|
||||
static const enum base_hw_feature base_hw_features_tIDx[] = {
|
||||
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
|
||||
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
|
||||
BASE_HW_FEATURE_XAFFINITY,
|
||||
BASE_HW_FEATURE_WARPING,
|
||||
BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
|
||||
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
|
||||
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
|
||||
BASE_HW_FEATURE_BRNDOUT_CC,
|
||||
BASE_HW_FEATURE_BRNDOUT_KILL,
|
||||
BASE_HW_FEATURE_LD_ST_LEA_TEX,
|
||||
BASE_HW_FEATURE_LD_ST_TILEBUFFER,
|
||||
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
|
||||
BASE_HW_FEATURE_MRT,
|
||||
BASE_HW_FEATURE_MSAA_16X,
|
||||
BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
|
||||
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
|
||||
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
|
||||
BASE_HW_FEATURE_TEST4_DATUM_MODE,
|
||||
BASE_HW_FEATURE_FLUSH_REDUCTION,
|
||||
BASE_HW_FEATURE_PROTECTED_MODE,
|
||||
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
|
||||
BASE_HW_FEATURE_COHERENCY_REG,
|
||||
BASE_HW_FEATURE_AARCH64_MMU,
|
||||
BASE_HW_FEATURE_L2_CONFIG,
|
||||
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
|
||||
BASE_HW_FEATURE_END
|
||||
};
|
||||
|
||||
static const enum base_hw_feature base_hw_features_tVAx[] = {
|
||||
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
|
||||
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
|
||||
BASE_HW_FEATURE_XAFFINITY,
|
||||
BASE_HW_FEATURE_WARPING,
|
||||
BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
|
||||
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
|
||||
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
|
||||
BASE_HW_FEATURE_BRNDOUT_CC,
|
||||
BASE_HW_FEATURE_BRNDOUT_KILL,
|
||||
BASE_HW_FEATURE_LD_ST_LEA_TEX,
|
||||
BASE_HW_FEATURE_LD_ST_TILEBUFFER,
|
||||
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
|
||||
BASE_HW_FEATURE_MRT,
|
||||
BASE_HW_FEATURE_MSAA_16X,
|
||||
BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
|
||||
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
|
||||
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
|
||||
BASE_HW_FEATURE_TEST4_DATUM_MODE,
|
||||
BASE_HW_FEATURE_FLUSH_REDUCTION,
|
||||
BASE_HW_FEATURE_PROTECTED_MODE,
|
||||
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
|
||||
BASE_HW_FEATURE_COHERENCY_REG,
|
||||
BASE_HW_FEATURE_AARCH64_MMU,
|
||||
BASE_HW_FEATURE_L2_CONFIG,
|
||||
BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
|
||||
BASE_HW_FEATURE_END
|
||||
};
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -36,7 +36,6 @@ typedef struct base_mem_handle {
|
||||
} base_mem_handle;
|
||||
|
||||
#include "mali_base_mem_priv.h"
|
||||
#include "mali_kbase_profiling_gator_api.h"
|
||||
#include "mali_midg_coherency.h"
|
||||
#include "mali_kbase_gpu_id.h"
|
||||
|
||||
@@ -87,6 +86,14 @@ typedef struct base_mem_handle {
|
||||
* @{
|
||||
*/
|
||||
|
||||
/* Physical memory group ID for normal usage.
|
||||
*/
|
||||
#define BASE_MEM_GROUP_DEFAULT (0)
|
||||
|
||||
/* Number of physical memory groups.
|
||||
*/
|
||||
#define BASE_MEM_GROUP_COUNT (16)
|
||||
|
||||
/**
|
||||
* typedef base_mem_alloc_flags - Memory allocation, access/hint flags.
|
||||
*
|
||||
@@ -127,19 +134,24 @@ typedef u32 base_mem_alloc_flags;
|
||||
*/
|
||||
#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4)
|
||||
|
||||
/* BASE_MEM_HINT flags have been removed, but their values are reserved
|
||||
* for backwards compatibility with older user-space drivers. The values
|
||||
* can be re-used once support for r5p0 user-space drivers is removed,
|
||||
* presumably in r7p0.
|
||||
*
|
||||
* RESERVED: (1U << 5)
|
||||
* RESERVED: (1U << 6)
|
||||
* RESERVED: (1U << 7)
|
||||
* RESERVED: (1U << 8)
|
||||
*/
|
||||
#define BASE_MEM_RESERVED_BIT_5 ((base_mem_alloc_flags)1 << 5)
|
||||
#define BASE_MEM_RESERVED_BIT_6 ((base_mem_alloc_flags)1 << 6)
|
||||
#define BASE_MEM_RESERVED_BIT_7 ((base_mem_alloc_flags)1 << 7)
|
||||
/* Will be permanently mapped in kernel space.
|
||||
* Flag is only allowed on allocations originating from kbase.
|
||||
*/
|
||||
#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5)
|
||||
|
||||
/* The allocation will completely reside within the same 4GB chunk in the GPU
|
||||
* virtual space.
|
||||
* Since this flag is primarily required only for the TLS memory which will
|
||||
* not be used to contain executable code and also not used for Tiler heap,
|
||||
* it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags.
|
||||
*/
|
||||
#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6)
|
||||
|
||||
/* Userspace is not allowed to free this memory.
|
||||
* Flag is only allowed on allocations originating from kbase.
|
||||
*/
|
||||
#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7)
|
||||
|
||||
#define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8)
|
||||
|
||||
/* Grow backing store on GPU Page Fault
|
||||
@@ -173,9 +185,9 @@ typedef u32 base_mem_alloc_flags;
|
||||
*/
|
||||
#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15)
|
||||
|
||||
/* Secure memory
|
||||
/* Protected memory
|
||||
*/
|
||||
#define BASE_MEM_SECURE ((base_mem_alloc_flags)1 << 16)
|
||||
#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16)
|
||||
|
||||
/* Not needed physical memory
|
||||
*/
|
||||
@@ -192,6 +204,7 @@ typedef u32 base_mem_alloc_flags;
|
||||
* Do not remove, use the next unreserved bit for new flags
|
||||
*/
|
||||
#define BASE_MEM_RESERVED_BIT_19 ((base_mem_alloc_flags)1 << 19)
|
||||
#define BASE_MEM_MAYBE_RESERVED_BIT_19 BASE_MEM_RESERVED_BIT_19
|
||||
|
||||
/**
|
||||
* Memory starting from the end of the initial commit is aligned to 'extent'
|
||||
@@ -200,11 +213,33 @@ typedef u32 base_mem_alloc_flags;
|
||||
*/
|
||||
#define BASE_MEM_TILER_ALIGN_TOP ((base_mem_alloc_flags)1 << 20)
|
||||
|
||||
/* Number of bits used as flags for base memory management
|
||||
/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu mode.
|
||||
* Some components within the GPU might only be able to access memory that is
|
||||
* GPU cacheable. Refer to the specific GPU implementation for more details.
|
||||
* The 3 shareability flags will be ignored for GPU uncached memory.
|
||||
* If used while importing USER_BUFFER type memory, then the import will fail
|
||||
* if the memory is not aligned to GPU and CPU cache line width.
|
||||
*/
|
||||
#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21)
|
||||
|
||||
/*
|
||||
* Bits [22:25] for group_id (0~15).
|
||||
*
|
||||
* base_mem_group_id_set() should be used to pack a memory group ID into a
|
||||
* base_mem_alloc_flags value instead of accessing the bits directly.
|
||||
* base_mem_group_id_get() should be used to extract the memory group ID from
|
||||
* a base_mem_alloc_flags value.
|
||||
*/
|
||||
#define BASEP_MEM_GROUP_ID_SHIFT 22
|
||||
#define BASE_MEM_GROUP_ID_MASK \
|
||||
((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT)
|
||||
|
||||
/**
|
||||
* Number of bits used as flags for base memory management
|
||||
*
|
||||
* Must be kept in sync with the base_mem_alloc_flags flags
|
||||
*/
|
||||
#define BASE_MEM_FLAGS_NR_BITS 21
|
||||
#define BASE_MEM_FLAGS_NR_BITS 26
|
||||
|
||||
/* A mask for all output bits, excluding IN/OUT bits.
|
||||
*/
|
||||
@@ -215,6 +250,43 @@ typedef u32 base_mem_alloc_flags;
|
||||
#define BASE_MEM_FLAGS_INPUT_MASK \
|
||||
(((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK)
|
||||
|
||||
/**
|
||||
* base_mem_group_id_get() - Get group ID from flags
|
||||
* @flags: Flags to pass to base_mem_alloc
|
||||
*
|
||||
* This inline function extracts the encoded group ID from flags
|
||||
* and converts it into numeric value (0~15).
|
||||
*
|
||||
* Return: group ID(0~15) extracted from the parameter
|
||||
*/
|
||||
static inline int base_mem_group_id_get(base_mem_alloc_flags flags)
|
||||
{
|
||||
LOCAL_ASSERT((flags & ~BASE_MEM_FLAGS_INPUT_MASK) == 0);
|
||||
return (int)((flags & BASE_MEM_GROUP_ID_MASK) >>
|
||||
BASEP_MEM_GROUP_ID_SHIFT);
|
||||
}
|
||||
|
||||
/**
|
||||
* base_mem_group_id_set() - Set group ID into base_mem_alloc_flags
|
||||
* @id: group ID(0~15) you want to encode
|
||||
*
|
||||
* This inline function encodes specific group ID into base_mem_alloc_flags.
|
||||
* Parameter 'id' should lie in-between 0 to 15.
|
||||
*
|
||||
* Return: base_mem_alloc_flags with the group ID (id) encoded
|
||||
*
|
||||
* The return value can be combined with other flags against base_mem_alloc
|
||||
* to identify a specific memory group.
|
||||
*/
|
||||
static inline base_mem_alloc_flags base_mem_group_id_set(int id)
|
||||
{
|
||||
LOCAL_ASSERT(id >= 0);
|
||||
LOCAL_ASSERT(id < BASE_MEM_GROUP_COUNT);
|
||||
|
||||
return ((base_mem_alloc_flags)id << BASEP_MEM_GROUP_ID_SHIFT) &
|
||||
BASE_MEM_GROUP_ID_MASK;
|
||||
}
|
||||
|
||||
/* A mask for all the flags which are modifiable via the base_mem_set_flags
|
||||
* interface.
|
||||
*/
|
||||
@@ -226,9 +298,13 @@ typedef u32 base_mem_alloc_flags;
|
||||
/* A mask of all currently reserved flags
|
||||
*/
|
||||
#define BASE_MEM_FLAGS_RESERVED \
|
||||
(BASE_MEM_RESERVED_BIT_5 | BASE_MEM_RESERVED_BIT_6 | \
|
||||
BASE_MEM_RESERVED_BIT_7 | BASE_MEM_RESERVED_BIT_8 | \
|
||||
BASE_MEM_RESERVED_BIT_19)
|
||||
(BASE_MEM_RESERVED_BIT_8 | BASE_MEM_MAYBE_RESERVED_BIT_19)
|
||||
|
||||
/* A mask of all the flags which are only valid for allocations within kbase,
|
||||
* and may not be passed from user space.
|
||||
*/
|
||||
#define BASEP_MEM_FLAGS_KERNEL_ONLY \
|
||||
(BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE)
|
||||
|
||||
/* A mask of all the flags that can be returned via the base_mem_get_flags()
|
||||
* interface.
|
||||
@@ -236,7 +312,8 @@ typedef u32 base_mem_alloc_flags;
|
||||
#define BASE_MEM_FLAGS_QUERYABLE \
|
||||
(BASE_MEM_FLAGS_INPUT_MASK & ~(BASE_MEM_SAME_VA | \
|
||||
BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_DONT_NEED | \
|
||||
BASE_MEM_IMPORT_SHARED | BASE_MEM_FLAGS_RESERVED))
|
||||
BASE_MEM_IMPORT_SHARED | BASE_MEM_FLAGS_RESERVED | \
|
||||
BASEP_MEM_FLAGS_KERNEL_ONLY))
|
||||
|
||||
/**
|
||||
* enum base_mem_import_type - Memory types supported by @a base_mem_import
|
||||
@@ -304,13 +381,15 @@ struct base_mem_import_user_buffer {
|
||||
#define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12)
|
||||
#define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12)
|
||||
#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12)
|
||||
/* reserved handles ..-64<<PAGE_SHIFT> for future special handles */
|
||||
/* reserved handles ..-47<<PAGE_SHIFT> for future special handles */
|
||||
#define BASE_MEM_COOKIE_BASE (64ul << 12)
|
||||
#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \
|
||||
BASE_MEM_COOKIE_BASE)
|
||||
|
||||
/* Mask to detect 4GB boundary alignment */
|
||||
#define BASE_MEM_MASK_4GB 0xfffff000UL
|
||||
/* Mask to detect 4GB boundary (in page units) alignment */
|
||||
#define BASE_MEM_PFN_MASK_4GB (BASE_MEM_MASK_4GB >> LOCAL_PAGE_SHIFT)
|
||||
|
||||
/**
|
||||
* Limit on the 'extent' parameter for an allocation with the
|
||||
@@ -326,15 +405,9 @@ struct base_mem_import_user_buffer {
|
||||
/* Bit mask of cookies used for for memory allocation setup */
|
||||
#define KBASE_COOKIE_MASK ~1UL /* bit 0 is reserved */
|
||||
|
||||
/* Maximum size allowed in a single KBASE_IOCTL_MEM_ALLOC call */
|
||||
#define KBASE_MEM_ALLOC_MAX_SIZE ((8ull << 30) >> PAGE_SHIFT) /* 8 GB */
|
||||
|
||||
/**
|
||||
* @brief Result codes of changing the size of the backing store allocated to a tmem region
|
||||
*/
|
||||
typedef enum base_backing_threshold_status {
|
||||
BASE_BACKING_THRESHOLD_OK = 0, /**< Resize successful */
|
||||
BASE_BACKING_THRESHOLD_ERROR_OOM = -2, /**< Increase failed due to an out-of-memory condition */
|
||||
BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS = -4 /**< Invalid arguments (not tmem, illegal size request, etc.) */
|
||||
} base_backing_threshold_status;
|
||||
|
||||
/**
|
||||
* @addtogroup base_user_api_memory_defered User-side Base Defered Memory Coherency APIs
|
||||
@@ -588,43 +661,8 @@ typedef u32 base_jd_core_req;
|
||||
#define BASE_JD_REQ_SOFT_FENCE_TRIGGER (BASE_JD_REQ_SOFT_JOB | 0x2)
|
||||
#define BASE_JD_REQ_SOFT_FENCE_WAIT (BASE_JD_REQ_SOFT_JOB | 0x3)
|
||||
|
||||
/**
|
||||
* SW Only requirement : Replay job.
|
||||
*
|
||||
* If the preceding job fails, the replay job will cause the jobs specified in
|
||||
* the list of base_jd_replay_payload pointed to by the jc pointer to be
|
||||
* replayed.
|
||||
*
|
||||
* A replay job will only cause jobs to be replayed up to BASEP_JD_REPLAY_LIMIT
|
||||
* times. If a job fails more than BASEP_JD_REPLAY_LIMIT times then the replay
|
||||
* job is failed, as well as any following dependencies.
|
||||
*
|
||||
* The replayed jobs will require a number of atom IDs. If there are not enough
|
||||
* free atom IDs then the replay job will fail.
|
||||
*
|
||||
* If the preceding job does not fail, then the replay job is returned as
|
||||
* completed.
|
||||
*
|
||||
* The replayed jobs will never be returned to userspace. The preceding failed
|
||||
* job will be returned to userspace as failed; the status of this job should
|
||||
* be ignored. Completion should be determined by the status of the replay soft
|
||||
* job.
|
||||
*
|
||||
* In order for the jobs to be replayed, the job headers will have to be
|
||||
* modified. The Status field will be reset to NOT_STARTED. If the Job Type
|
||||
* field indicates a Vertex Shader Job then it will be changed to Null Job.
|
||||
*
|
||||
* The replayed jobs have the following assumptions :
|
||||
*
|
||||
* - No external resources. Any required external resources will be held by the
|
||||
* replay atom.
|
||||
* - Pre-dependencies are created based on job order.
|
||||
* - Atom numbers are automatically assigned.
|
||||
* - device_nr is set to 0. This is not relevant as
|
||||
* BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set.
|
||||
* - Priority is inherited from the replay job.
|
||||
*/
|
||||
#define BASE_JD_REQ_SOFT_REPLAY (BASE_JD_REQ_SOFT_JOB | 0x4)
|
||||
/* 0x4 RESERVED for now */
|
||||
|
||||
/**
|
||||
* SW only requirement: event wait/trigger job.
|
||||
*
|
||||
@@ -643,9 +681,10 @@ typedef u32 base_jd_core_req;
|
||||
/**
|
||||
* SW only requirement: Just In Time allocation
|
||||
*
|
||||
* This job requests a JIT allocation based on the request in the
|
||||
* @base_jit_alloc_info structure which is passed via the jc element of
|
||||
* the atom.
|
||||
* This job requests a single or multiple JIT allocations through a list
|
||||
* of @base_jit_alloc_info structure which is passed via the jc element of
|
||||
* the atom. The number of @base_jit_alloc_info structures present in the
|
||||
* list is passed via the nr_extres element of the atom
|
||||
*
|
||||
* It should be noted that the id entry in @base_jit_alloc_info must not
|
||||
* be reused until it has been released via @BASE_JD_REQ_SOFT_JIT_FREE.
|
||||
@@ -659,9 +698,9 @@ typedef u32 base_jd_core_req;
|
||||
/**
|
||||
* SW only requirement: Just In Time free
|
||||
*
|
||||
* This job requests a JIT allocation created by @BASE_JD_REQ_SOFT_JIT_ALLOC
|
||||
* to be freed. The ID of the JIT allocation is passed via the jc element of
|
||||
* the atom.
|
||||
* This job requests a single or multiple JIT allocations created by
|
||||
* @BASE_JD_REQ_SOFT_JIT_ALLOC to be freed. The ID list of the JIT
|
||||
* allocations is passed via the jc element of the atom.
|
||||
*
|
||||
* The job will complete immediately.
|
||||
*/
|
||||
@@ -743,6 +782,14 @@ typedef u32 base_jd_core_req;
|
||||
*/
|
||||
#define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16)
|
||||
|
||||
/**
|
||||
* Request the atom be executed on a specific job slot.
|
||||
*
|
||||
* When this flag is specified, it takes precedence over any existing job slot
|
||||
* selection logic.
|
||||
*/
|
||||
#define BASE_JD_REQ_JOB_SLOT ((base_jd_core_req)1 << 17)
|
||||
|
||||
/**
|
||||
* These requirement bits are currently unused in base_jd_core_req
|
||||
*/
|
||||
@@ -752,7 +799,8 @@ typedef u32 base_jd_core_req;
|
||||
BASE_JD_REQ_EVENT_COALESCE | \
|
||||
BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | \
|
||||
BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \
|
||||
BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END))
|
||||
BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END | \
|
||||
BASE_JD_REQ_JOB_SLOT))
|
||||
|
||||
/**
|
||||
* Mask of all bits in base_jd_core_req that control the type of the atom.
|
||||
@@ -776,45 +824,6 @@ typedef u32 base_jd_core_req;
|
||||
((core_req & BASE_JD_REQ_SOFT_JOB) || \
|
||||
(core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP)
|
||||
|
||||
/**
|
||||
* @brief States to model state machine processed by kbasep_js_job_check_ref_cores(), which
|
||||
* handles retaining cores for power management and affinity management.
|
||||
*
|
||||
* The state @ref KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY prevents an attack
|
||||
* where lots of atoms could be submitted before powerup, and each has an
|
||||
* affinity chosen that causes other atoms to have an affinity
|
||||
* violation. Whilst the affinity was not causing violations at the time it
|
||||
* was chosen, it could cause violations thereafter. For example, 1000 jobs
|
||||
* could have had their affinity chosen during the powerup time, so any of
|
||||
* those 1000 jobs could cause an affinity violation later on.
|
||||
*
|
||||
* The attack would otherwise occur because other atoms/contexts have to wait for:
|
||||
* -# the currently running atoms (which are causing the violation) to
|
||||
* finish
|
||||
* -# and, the atoms that had their affinity chosen during powerup to
|
||||
* finish. These are run preferentially because they don't cause a
|
||||
* violation, but instead continue to cause the violation in others.
|
||||
* -# or, the attacker is scheduled out (which might not happen for just 2
|
||||
* contexts)
|
||||
*
|
||||
* By re-choosing the affinity (which is designed to avoid violations at the
|
||||
* time it's chosen), we break condition (2) of the wait, which minimizes the
|
||||
* problem to just waiting for current jobs to finish (which can be bounded if
|
||||
* the Job Scheduling Policy has a timer).
|
||||
*/
|
||||
enum kbase_atom_coreref_state {
|
||||
/** Starting state: No affinity chosen, and cores must be requested. kbase_jd_atom::affinity==0 */
|
||||
KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED,
|
||||
/** Cores requested, but waiting for them to be powered. Requested cores given by kbase_jd_atom::affinity */
|
||||
KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES,
|
||||
/** Cores given by kbase_jd_atom::affinity are powered, but affinity might be out-of-date, so must recheck */
|
||||
KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY,
|
||||
/** Cores given by kbase_jd_atom::affinity are powered, and affinity is up-to-date, but must check for violations */
|
||||
KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS,
|
||||
/** Cores are powered, kbase_jd_atom::affinity up-to-date, no affinity violations: atom can be submitted to HW */
|
||||
KBASE_ATOM_COREREF_STATE_READY
|
||||
};
|
||||
|
||||
/*
|
||||
* Base Atom priority
|
||||
*
|
||||
@@ -822,15 +831,16 @@ enum kbase_atom_coreref_state {
|
||||
* BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority
|
||||
* level that is not one of those defined below.
|
||||
*
|
||||
* Priority levels only affect scheduling between atoms of the same type within
|
||||
* a base context, and only after the atoms have had dependencies resolved.
|
||||
* Fragment atoms does not affect non-frament atoms with lower priorities, and
|
||||
* the other way around. For example, a low priority atom that has had its
|
||||
* dependencies resolved might run before a higher priority atom that has not
|
||||
* had its dependencies resolved.
|
||||
* Priority levels only affect scheduling after the atoms have had dependencies
|
||||
* resolved. For example, a low priority atom that has had its dependencies
|
||||
* resolved might run before a higher priority atom that has not had its
|
||||
* dependencies resolved.
|
||||
*
|
||||
* The scheduling between base contexts/processes and between atoms from
|
||||
* different base contexts/processes is unaffected by atom priority.
|
||||
* In general, fragment atoms do not affect non-fragment atoms with
|
||||
* lower priorities, and vice versa. One exception is that there is only one
|
||||
* priority value for each context. So a high-priority (e.g.) fragment atom
|
||||
* could increase its context priority, causing its non-fragment atoms to also
|
||||
* be scheduled sooner.
|
||||
*
|
||||
* The atoms are scheduled as follows with respect to their priorities:
|
||||
* - Let atoms 'X' and 'Y' be for the same job slot who have dependencies
|
||||
@@ -842,6 +852,14 @@ enum kbase_atom_coreref_state {
|
||||
* - Any two atoms that have the same priority could run in any order with
|
||||
* respect to each other. That is, there is no ordering constraint between
|
||||
* atoms of the same priority.
|
||||
*
|
||||
* The sysfs file 'js_ctx_scheduling_mode' is used to control how atoms are
|
||||
* scheduled between contexts. The default value, 0, will cause higher-priority
|
||||
* atoms to be scheduled first, regardless of their context. The value 1 will
|
||||
* use a round-robin algorithm when deciding which context's atoms to schedule
|
||||
* next, so higher-priority atoms can only preempt lower priority atoms within
|
||||
* the same context. See KBASE_JS_SYSTEM_PRIORITY_MODE and
|
||||
* KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE for more details.
|
||||
*/
|
||||
typedef u8 base_jd_prio;
|
||||
|
||||
@@ -889,14 +907,14 @@ typedef struct base_jd_atom_v2 {
|
||||
u64 jc; /**< job-chain GPU address */
|
||||
struct base_jd_udata udata; /**< user data */
|
||||
u64 extres_list; /**< list of external resources */
|
||||
u16 nr_extres; /**< nr of external resources */
|
||||
u16 nr_extres; /**< nr of external resources or JIT allocations */
|
||||
u16 compat_core_req; /**< core requirements which correspond to the legacy support for UK 10.2 */
|
||||
struct base_dependency pre_dep[2]; /**< pre-dependencies, one need to use SETTER function to assign this field,
|
||||
this is done in order to reduce possibility of improper assigment of a dependency field */
|
||||
base_atom_id atom_number; /**< unique number to identify the atom */
|
||||
base_jd_prio prio; /**< Atom priority. Refer to @ref base_jd_prio for more details */
|
||||
u8 device_nr; /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */
|
||||
u8 padding[1];
|
||||
u8 jobslot; /**< Job slot to use when BASE_JD_REQ_JOB_SLOT is specified */
|
||||
base_jd_core_req core_req; /**< core requirements */
|
||||
} base_jd_atom_v2;
|
||||
|
||||
@@ -1180,7 +1198,6 @@ typedef enum base_jd_event_code {
|
||||
BASE_JD_EVENT_JOB_CANCELLED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002,
|
||||
BASE_JD_EVENT_JOB_INVALID = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003,
|
||||
BASE_JD_EVENT_PM_EVENT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004,
|
||||
BASE_JD_EVENT_FORCE_REPLAY = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x005,
|
||||
|
||||
BASE_JD_EVENT_BAG_INVALID = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003,
|
||||
|
||||
@@ -1642,20 +1659,28 @@ typedef u32 base_context_create_flags;
|
||||
#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \
|
||||
((base_context_create_flags)1 << 1)
|
||||
|
||||
/**
|
||||
* Bitpattern describing the ::base_context_create_flags that can be
|
||||
* passed to base_context_init()
|
||||
*/
|
||||
#define BASE_CONTEXT_CREATE_ALLOWED_FLAGS \
|
||||
(((u32)BASE_CONTEXT_CCTX_EMBEDDED) | \
|
||||
((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED))
|
||||
|
||||
/**
|
||||
* Bitpattern describing the ::base_context_create_flags that can be
|
||||
/* Bit-shift used to encode a memory group ID in base_context_create_flags
|
||||
*/
|
||||
#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3)
|
||||
|
||||
/* Bitmask used to encode a memory group ID in base_context_create_flags
|
||||
*/
|
||||
#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \
|
||||
((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT)
|
||||
|
||||
/* Bitpattern describing the base_context_create_flags that can be
|
||||
* passed to the kernel
|
||||
*/
|
||||
#define BASE_CONTEXT_CREATE_KERNEL_FLAGS \
|
||||
((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED)
|
||||
#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \
|
||||
(BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | \
|
||||
BASEP_CONTEXT_MMU_GROUP_ID_MASK)
|
||||
|
||||
/* Bitpattern describing the ::base_context_create_flags that can be
|
||||
* passed to base_context_init()
|
||||
*/
|
||||
#define BASEP_CONTEXT_CREATE_ALLOWED_FLAGS \
|
||||
(BASE_CONTEXT_CCTX_EMBEDDED | BASEP_CONTEXT_CREATE_KERNEL_FLAGS)
|
||||
|
||||
/*
|
||||
* Private flags used on the base context
|
||||
@@ -1666,7 +1691,46 @@ typedef u32 base_context_create_flags;
|
||||
* not collide with them.
|
||||
*/
|
||||
/** Private flag tracking whether job descriptor dumping is disabled */
|
||||
#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED ((u32)(1 << 31))
|
||||
#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED \
|
||||
((base_context_create_flags)(1 << 31))
|
||||
|
||||
/**
|
||||
* base_context_mmu_group_id_set - Encode a memory group ID in
|
||||
* base_context_create_flags
|
||||
*
|
||||
* Memory allocated for GPU page tables will come from the specified group.
|
||||
*
|
||||
* @group_id: Physical memory group ID. Range is 0..(BASE_MEM_GROUP_COUNT-1).
|
||||
*
|
||||
* Return: Bitmask of flags to pass to base_context_init.
|
||||
*/
|
||||
static inline base_context_create_flags base_context_mmu_group_id_set(
|
||||
int const group_id)
|
||||
{
|
||||
LOCAL_ASSERT(group_id >= 0);
|
||||
LOCAL_ASSERT(group_id < BASE_MEM_GROUP_COUNT);
|
||||
return BASEP_CONTEXT_MMU_GROUP_ID_MASK &
|
||||
((base_context_create_flags)group_id <<
|
||||
BASEP_CONTEXT_MMU_GROUP_ID_SHIFT);
|
||||
}
|
||||
|
||||
/**
|
||||
* base_context_mmu_group_id_get - Decode a memory group ID from
|
||||
* base_context_create_flags
|
||||
*
|
||||
* Memory allocated for GPU page tables will come from the returned group.
|
||||
*
|
||||
* @flags: Bitmask of flags to pass to base_context_init.
|
||||
*
|
||||
* Return: Physical memory group ID. Valid range is 0..(BASE_MEM_GROUP_COUNT-1).
|
||||
*/
|
||||
static inline int base_context_mmu_group_id_get(
|
||||
base_context_create_flags const flags)
|
||||
{
|
||||
LOCAL_ASSERT(flags == (flags & BASEP_CONTEXT_CREATE_ALLOWED_FLAGS));
|
||||
return (int)((flags & BASEP_CONTEXT_MMU_GROUP_ID_MASK) >>
|
||||
BASEP_CONTEXT_MMU_GROUP_ID_SHIFT);
|
||||
}
|
||||
|
||||
/** @} end group base_user_api_core */
|
||||
|
||||
@@ -1693,82 +1757,8 @@ typedef u32 base_context_create_flags;
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief The payload for a replay job. This must be in GPU memory.
|
||||
*/
|
||||
typedef struct base_jd_replay_payload {
|
||||
/**
|
||||
* Pointer to the first entry in the base_jd_replay_jc list. These
|
||||
* will be replayed in @b reverse order (so that extra ones can be added
|
||||
* to the head in future soft jobs without affecting this soft job)
|
||||
*/
|
||||
u64 tiler_jc_list;
|
||||
|
||||
/**
|
||||
* Pointer to the fragment job chain.
|
||||
*/
|
||||
u64 fragment_jc;
|
||||
|
||||
/**
|
||||
* Pointer to the tiler heap free FBD field to be modified.
|
||||
*/
|
||||
u64 tiler_heap_free;
|
||||
|
||||
/**
|
||||
* Hierarchy mask for the replayed fragment jobs. May be zero.
|
||||
*/
|
||||
u16 fragment_hierarchy_mask;
|
||||
|
||||
/**
|
||||
* Hierarchy mask for the replayed tiler jobs. May be zero.
|
||||
*/
|
||||
u16 tiler_hierarchy_mask;
|
||||
|
||||
/**
|
||||
* Default weight to be used for hierarchy levels not in the original
|
||||
* mask.
|
||||
*/
|
||||
u32 hierarchy_default_weight;
|
||||
|
||||
/**
|
||||
* Core requirements for the tiler job chain
|
||||
*/
|
||||
base_jd_core_req tiler_core_req;
|
||||
|
||||
/**
|
||||
* Core requirements for the fragment job chain
|
||||
*/
|
||||
base_jd_core_req fragment_core_req;
|
||||
} base_jd_replay_payload;
|
||||
|
||||
/**
|
||||
* @brief An entry in the linked list of job chains to be replayed. This must
|
||||
* be in GPU memory.
|
||||
*/
|
||||
typedef struct base_jd_replay_jc {
|
||||
/**
|
||||
* Pointer to next entry in the list. A setting of NULL indicates the
|
||||
* end of the list.
|
||||
*/
|
||||
u64 next;
|
||||
|
||||
/**
|
||||
* Pointer to the job chain.
|
||||
*/
|
||||
u64 jc;
|
||||
|
||||
} base_jd_replay_jc;
|
||||
|
||||
/* Maximum number of jobs allowed in a fragment chain in the payload of a
|
||||
* replay job */
|
||||
#define BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT 256
|
||||
|
||||
/** @} end group base_api */
|
||||
|
||||
typedef struct base_profiling_controls {
|
||||
u32 profiling_controls[FBDUMP_CONTROL_MAX];
|
||||
} base_profiling_controls;
|
||||
|
||||
/* Enable additional tracepoints for latency measurements (TL_ATOM_READY,
|
||||
* TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) */
|
||||
#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0)
|
||||
@@ -1780,4 +1770,23 @@ typedef struct base_profiling_controls {
|
||||
#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \
|
||||
BASE_TLSTREAM_JOB_DUMPING_ENABLED)
|
||||
|
||||
/**
|
||||
* A number of bit flags are defined for requesting cpu_gpu_timeinfo. These
|
||||
* flags are also used, where applicable, for specifying which fields
|
||||
* are valid following the request operation.
|
||||
*/
|
||||
|
||||
/* For monotonic (counter) timefield */
|
||||
#define BASE_TIMEINFO_MONOTONIC_FLAG (1UL << 0)
|
||||
/* For system wide timestamp */
|
||||
#define BASE_TIMEINFO_TIMESTAMP_FLAG (1UL << 1)
|
||||
/* For GPU cycle counter */
|
||||
#define BASE_TIMEINFO_CYCLE_COUNTER_FLAG (1UL << 2)
|
||||
|
||||
#define BASE_TIMEREQUEST_ALLOWED_FLAGS (\
|
||||
BASE_TIMEINFO_MONOTONIC_FLAG | \
|
||||
BASE_TIMEINFO_TIMESTAMP_FLAG | \
|
||||
BASE_TIMEINFO_CYCLE_COUNTER_FLAG)
|
||||
|
||||
|
||||
#endif /* _BASE_KERNEL_H_ */
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -59,7 +59,6 @@
|
||||
#include "mali_kbase_context.h"
|
||||
#include "mali_kbase_strings.h"
|
||||
#include "mali_kbase_mem_lowlevel.h"
|
||||
#include "mali_kbase_trace_timeline.h"
|
||||
#include "mali_kbase_js.h"
|
||||
#include "mali_kbase_utility.h"
|
||||
#include "mali_kbase_mem.h"
|
||||
@@ -69,7 +68,7 @@
|
||||
#include "mali_kbase_jd_debugfs.h"
|
||||
#include "mali_kbase_gpuprops.h"
|
||||
#include "mali_kbase_jm.h"
|
||||
#include "mali_kbase_vinstr.h"
|
||||
#include "mali_kbase_ioctl.h"
|
||||
|
||||
#include "ipa/mali_kbase_ipa.h"
|
||||
|
||||
@@ -77,11 +76,17 @@
|
||||
#include <trace/events/gpu.h>
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef u64_to_user_ptr
|
||||
/* Introduced in Linux v4.6 */
|
||||
#define u64_to_user_ptr(x) ((void __user *)(uintptr_t)x)
|
||||
#endif
|
||||
|
||||
|
||||
/* Physical memory group ID for a special page which can alias several regions.
|
||||
*/
|
||||
#define KBASE_MEM_GROUP_SINK BASE_MEM_GROUP_DEFAULT
|
||||
|
||||
/*
|
||||
* Kernel-side Base (KBase) APIs
|
||||
*/
|
||||
@@ -109,13 +114,10 @@ int kbase_device_has_feature(struct kbase_device *kbdev, u32 feature);
|
||||
struct kbase_device *kbase_find_device(int minor);
|
||||
void kbase_release_device(struct kbase_device *kbdev);
|
||||
|
||||
void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value);
|
||||
|
||||
|
||||
/**
|
||||
* kbase_get_unmapped_area() - get an address range which is currently
|
||||
* unmapped.
|
||||
* @filp: File operations associated with kbase device.
|
||||
* kbase_context_get_unmapped_area() - get an address range which is currently
|
||||
* unmapped.
|
||||
* @kctx: A kernel base context (which has its own GPU address space).
|
||||
* @addr: CPU mapped address (set to 0 since MAP_FIXED mapping is not allowed
|
||||
* as Mali GPU driver decides about the mapping).
|
||||
* @len: Length of the address range.
|
||||
@@ -150,7 +152,7 @@ void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 va
|
||||
* Return: if successful, address of the unmapped area aligned as required;
|
||||
* error code (negative) in case of failure;
|
||||
*/
|
||||
unsigned long kbase_get_unmapped_area(struct file *filp,
|
||||
unsigned long kbase_context_get_unmapped_area(struct kbase_context *kctx,
|
||||
const unsigned long addr, const unsigned long len,
|
||||
const unsigned long pgoff, const unsigned long flags);
|
||||
|
||||
@@ -203,6 +205,16 @@ bool jd_submit_atom(struct kbase_context *kctx,
|
||||
struct kbase_jd_atom *katom);
|
||||
void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom);
|
||||
|
||||
/**
|
||||
* kbase_job_done - Process completed jobs from job interrupt
|
||||
* @kbdev: Pointer to the kbase device.
|
||||
* @done: Bitmask of done or failed jobs, from JOB_IRQ_STAT register
|
||||
*
|
||||
* This function processes the completed, or failed, jobs from the GPU job
|
||||
* slots, for the bits set in the @done bitmask.
|
||||
*
|
||||
* The hwaccess_lock must be held when calling this function.
|
||||
*/
|
||||
void kbase_job_done(struct kbase_device *kbdev, u32 done);
|
||||
|
||||
/**
|
||||
@@ -239,6 +251,70 @@ void kbase_event_close(struct kbase_context *kctx);
|
||||
void kbase_event_cleanup(struct kbase_context *kctx);
|
||||
void kbase_event_wakeup(struct kbase_context *kctx);
|
||||
|
||||
/**
|
||||
* kbasep_jit_alloc_validate() - Validate the JIT allocation info.
|
||||
*
|
||||
* @kctx: Pointer to the kbase context within which the JIT
|
||||
* allocation is to be validated.
|
||||
* @info: Pointer to struct @base_jit_alloc_info
|
||||
* which is to be validated.
|
||||
* @return: 0 if jit allocation is valid; negative error code otherwise
|
||||
*/
|
||||
int kbasep_jit_alloc_validate(struct kbase_context *kctx,
|
||||
struct base_jit_alloc_info *info);
|
||||
/**
|
||||
* kbase_free_user_buffer() - Free memory allocated for struct
|
||||
* @kbase_debug_copy_buffer.
|
||||
*
|
||||
* @buffer: Pointer to the memory location allocated for the object
|
||||
* of the type struct @kbase_debug_copy_buffer.
|
||||
*/
|
||||
static inline void kbase_free_user_buffer(
|
||||
struct kbase_debug_copy_buffer *buffer)
|
||||
{
|
||||
struct page **pages = buffer->extres_pages;
|
||||
int nr_pages = buffer->nr_extres_pages;
|
||||
|
||||
if (pages) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
struct page *pg = pages[i];
|
||||
|
||||
if (pg)
|
||||
put_page(pg);
|
||||
}
|
||||
kfree(pages);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* kbase_mem_copy_from_extres_page() - Copy pages from external resources.
|
||||
*
|
||||
* @kctx: kbase context within which the copying is to take place.
|
||||
* @extres_pages: Pointer to the pages which correspond to the external
|
||||
* resources from which the copying will take place.
|
||||
* @pages: Pointer to the pages to which the content is to be
|
||||
* copied from the provided external resources.
|
||||
* @nr_pages: Number of pages to copy.
|
||||
* @target_page_nr: Number of target pages which will be used for copying.
|
||||
* @offset: Offset into the target pages from which the copying
|
||||
* is to be performed.
|
||||
* @to_copy: Size of the chunk to be copied, in bytes.
|
||||
*/
|
||||
void kbase_mem_copy_from_extres_page(struct kbase_context *kctx,
|
||||
void *extres_page, struct page **pages, unsigned int nr_pages,
|
||||
unsigned int *target_page_nr, size_t offset, size_t *to_copy);
|
||||
/**
|
||||
* kbase_mem_copy_from_extres() - Copy from external resources.
|
||||
*
|
||||
* @kctx: kbase context within which the copying is to take place.
|
||||
* @buf_data: Pointer to the information about external resources:
|
||||
* pages pertaining to the external resource, number of
|
||||
* pages to copy.
|
||||
*/
|
||||
int kbase_mem_copy_from_extres(struct kbase_context *kctx,
|
||||
struct kbase_debug_copy_buffer *buf_data);
|
||||
int kbase_process_soft_job(struct kbase_jd_atom *katom);
|
||||
int kbase_prepare_soft_job(struct kbase_jd_atom *katom);
|
||||
void kbase_finish_soft_job(struct kbase_jd_atom *katom);
|
||||
@@ -252,17 +328,9 @@ int kbase_soft_event_update(struct kbase_context *kctx,
|
||||
u64 event,
|
||||
unsigned char new_status);
|
||||
|
||||
bool kbase_replay_process(struct kbase_jd_atom *katom);
|
||||
|
||||
void kbasep_soft_job_timeout_worker(struct timer_list *timer);
|
||||
void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt);
|
||||
|
||||
/* api used internally for register access. Contains validation and tracing */
|
||||
void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value);
|
||||
int kbase_device_trace_buffer_install(
|
||||
struct kbase_context *kctx, u32 *tb, size_t size);
|
||||
void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx);
|
||||
|
||||
void kbasep_as_do_poke(struct work_struct *work);
|
||||
|
||||
/** Returns the name associated with a Mali exception code
|
||||
@@ -292,6 +360,38 @@ static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev)
|
||||
return kbdev->pm.suspending;
|
||||
}
|
||||
|
||||
/**
|
||||
* kbase_pm_is_active - Determine whether the GPU is active
|
||||
*
|
||||
* @kbdev: The kbase device structure for the device (must be a valid pointer)
|
||||
*
|
||||
* This takes into account whether there is an active context reference.
|
||||
*
|
||||
* Return: true if the GPU is active, false otherwise
|
||||
*/
|
||||
static inline bool kbase_pm_is_active(struct kbase_device *kbdev)
|
||||
{
|
||||
return kbdev->pm.active_count > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* kbase_pm_metrics_start - Start the utilization metrics timer
|
||||
* @kbdev: Pointer to the kbase device for which to start the utilization
|
||||
* metrics calculation thread.
|
||||
*
|
||||
* Start the timer that drives the metrics calculation, runs the custom DVFS.
|
||||
*/
|
||||
void kbase_pm_metrics_start(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_pm_metrics_stop - Stop the utilization metrics timer
|
||||
* @kbdev: Pointer to the kbase device for which to stop the utilization
|
||||
* metrics calculation thread.
|
||||
*
|
||||
* Stop the timer that drives the metrics calculation, runs the custom DVFS.
|
||||
*/
|
||||
void kbase_pm_metrics_stop(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* Return the atom's ID, as was originally supplied by userspace in
|
||||
* base_jd_atom_v2::atom_number
|
||||
@@ -335,9 +435,8 @@ static inline struct kbase_jd_atom *kbase_jd_atom_from_id(
|
||||
* the counter during disjoint events we also increment the counter when jobs may be affected
|
||||
* by what the GPU is currently doing. To facilitate this we have the concept of disjoint state.
|
||||
*
|
||||
* Disjoint state is entered during GPU reset and for the entire time that an atom is replaying
|
||||
* (as part of the replay workaround). Increasing the disjoint state also increases the count of
|
||||
* disjoint events.
|
||||
* Disjoint state is entered during GPU reset. Increasing the disjoint state also increases
|
||||
* the count of disjoint events.
|
||||
*
|
||||
* The disjoint state is then used to increase the count of disjoint events during job submission
|
||||
* and job completion. Any atom submitted or completed while the disjoint state is greater than
|
||||
@@ -634,6 +733,3 @@ int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size);
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom)
|
||||
kbase_gpu_vm_lock(katom->kctx);
|
||||
region = kbase_region_tracker_find_region_enclosing_address(katom->kctx,
|
||||
katom->jc);
|
||||
if (!region || (region->flags & KBASE_REG_FREE))
|
||||
if (kbase_is_region_invalid_or_free(region))
|
||||
goto out_unlock;
|
||||
|
||||
page_array = kbase_get_cpu_phy_pages(region);
|
||||
@@ -74,7 +74,7 @@ int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom)
|
||||
|
||||
page_index = (katom->jc >> PAGE_SHIFT) - region->start_pfn;
|
||||
|
||||
p = phys_to_page(as_phys_addr_t(page_array[page_index]));
|
||||
p = as_page(page_array[page_index]);
|
||||
|
||||
/* we need the first 10 words of the fragment shader job descriptor.
|
||||
* We need to check that the offset + 10 words is less that the page
|
||||
@@ -98,7 +98,7 @@ int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom)
|
||||
/* The data needed overflows page the dimension,
|
||||
* need to map the subsequent page */
|
||||
if (copy_size < JOB_HEADER_SIZE) {
|
||||
p = phys_to_page(as_phys_addr_t(page_array[page_index + 1]));
|
||||
p = as_page(page_array[page_index + 1]);
|
||||
page_2 = kmap_atomic(p);
|
||||
|
||||
kbase_sync_single_for_cpu(katom->kctx->kbdev,
|
||||
@@ -181,7 +181,7 @@ int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom)
|
||||
|
||||
/* Flush CPU cache to update memory for future GPU reads*/
|
||||
memcpy(page_1, dst, copy_size);
|
||||
p = phys_to_page(as_phys_addr_t(page_array[page_index]));
|
||||
p = as_page(page_array[page_index]);
|
||||
|
||||
kbase_sync_single_for_device(katom->kctx->kbdev,
|
||||
kbase_dma_addr(p) + offset,
|
||||
@@ -190,8 +190,7 @@ int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom)
|
||||
if (copy_size < JOB_HEADER_SIZE) {
|
||||
memcpy(page_2, dst + copy_size,
|
||||
JOB_HEADER_SIZE - copy_size);
|
||||
p = phys_to_page(as_phys_addr_t(page_array[page_index +
|
||||
1]));
|
||||
p = as_page(page_array[page_index + 1]);
|
||||
|
||||
kbase_sync_single_for_device(katom->kctx->kbdev,
|
||||
kbase_dma_addr(p),
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2016-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -48,7 +48,7 @@ static int kbase_as_fault_read(struct seq_file *sfile, void *data)
|
||||
|
||||
/* output the last page fault addr */
|
||||
seq_printf(sfile, "%llu\n",
|
||||
(u64) kbdev->as[as_no].fault_addr);
|
||||
(u64) kbdev->as[as_no].pf_data.addr);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -64,6 +64,7 @@ static int kbase_as_fault_debugfs_open(struct inode *in, struct file *file)
|
||||
}
|
||||
|
||||
static const struct file_operations as_fault_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = kbase_as_fault_debugfs_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
@@ -87,7 +88,7 @@ void kbase_as_fault_debugfs_init(struct kbase_device *kbdev)
|
||||
kbdev->debugfs_as_read_bitmap = 0ULL;
|
||||
|
||||
KBASE_DEBUG_ASSERT(kbdev->nr_hw_address_spaces);
|
||||
KBASE_DEBUG_ASSERT(sizeof(kbdev->as[0].fault_addr) == sizeof(u64));
|
||||
KBASE_DEBUG_ASSERT(sizeof(kbdev->as[0].pf_data.addr) == sizeof(u64));
|
||||
|
||||
debugfs_directory = debugfs_create_dir("address_spaces",
|
||||
kbdev->mali_debugfs_directory);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -18,21 +18,24 @@
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
*//* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU licence.
|
||||
*/
|
||||
|
||||
#ifndef _KBASE_BITS_H_
|
||||
#define _KBASE_BITS_H_
|
||||
|
||||
#if (KERNEL_VERSION(4, 19, 0) <= LINUX_VERSION_CODE)
|
||||
#include <linux/bits.h>
|
||||
#else
|
||||
#include <linux/bitops.h>
|
||||
#endif
|
||||
|
||||
#include <mali_kbase.h>
|
||||
|
||||
bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry)
|
||||
{
|
||||
struct list_head *pos = base->next;
|
||||
|
||||
while (pos != base) {
|
||||
if (pos == entry)
|
||||
return true;
|
||||
|
||||
pos = pos->next;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
#endif /* _KBASE_BITS_H_ */
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -30,7 +30,12 @@
|
||||
|
||||
/*
|
||||
* The output flags should be a combination of the following values:
|
||||
* KBASE_REG_CPU_CACHED: CPU cache should be enabled.
|
||||
* KBASE_REG_CPU_CACHED: CPU cache should be enabled
|
||||
* KBASE_REG_GPU_CACHED: GPU cache should be enabled
|
||||
*
|
||||
* NOTE: Some components within the GPU might only be able to access memory
|
||||
* that is KBASE_REG_GPU_CACHED. Refer to the specific GPU implementation for
|
||||
* more details.
|
||||
*/
|
||||
u32 kbase_cache_enabled(u32 flags, u32 nr_pages)
|
||||
{
|
||||
@@ -38,6 +43,9 @@ u32 kbase_cache_enabled(u32 flags, u32 nr_pages)
|
||||
|
||||
CSTD_UNUSED(nr_pages);
|
||||
|
||||
if (!(flags & BASE_MEM_UNCACHED_GPU))
|
||||
cache_flags |= KBASE_REG_GPU_CACHED;
|
||||
|
||||
if (flags & BASE_MEM_CACHED_CPU)
|
||||
cache_flags |= KBASE_REG_CPU_CACHED;
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2013-2018 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2013-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -108,48 +108,6 @@ enum {
|
||||
KBASE_3BIT_AID_4 = 0x7
|
||||
};
|
||||
|
||||
/**
|
||||
* Default setting for read Address ID limiting on AXI bus.
|
||||
*
|
||||
* Attached value: u32 register value
|
||||
* KBASE_AID_32 - use the full 32 IDs (5 ID bits)
|
||||
* KBASE_AID_16 - use 16 IDs (4 ID bits)
|
||||
* KBASE_AID_8 - use 8 IDs (3 ID bits)
|
||||
* KBASE_AID_4 - use 4 IDs (2 ID bits)
|
||||
* Default value: KBASE_AID_32 (no limit). Note hardware implementation
|
||||
* may limit to a lower value.
|
||||
*/
|
||||
#define DEFAULT_ARID_LIMIT KBASE_AID_32
|
||||
|
||||
/**
|
||||
* Default setting for write Address ID limiting on AXI.
|
||||
*
|
||||
* Attached value: u32 register value
|
||||
* KBASE_AID_32 - use the full 32 IDs (5 ID bits)
|
||||
* KBASE_AID_16 - use 16 IDs (4 ID bits)
|
||||
* KBASE_AID_8 - use 8 IDs (3 ID bits)
|
||||
* KBASE_AID_4 - use 4 IDs (2 ID bits)
|
||||
* Default value: KBASE_AID_32 (no limit). Note hardware implementation
|
||||
* may limit to a lower value.
|
||||
*/
|
||||
#define DEFAULT_AWID_LIMIT KBASE_AID_32
|
||||
|
||||
/**
|
||||
* Default setting for read Address ID limiting on AXI bus.
|
||||
*
|
||||
* Default value: KBASE_3BIT_AID_32 (no limit). Note hardware implementation
|
||||
* may limit to a lower value.
|
||||
*/
|
||||
#define DEFAULT_3BIT_ARID_LIMIT KBASE_3BIT_AID_32
|
||||
|
||||
/**
|
||||
* Default setting for write Address ID limiting on AXI.
|
||||
*
|
||||
* Default value: KBASE_3BIT_AID_32 (no limit). Note hardware implementation
|
||||
* may limit to a lower value.
|
||||
*/
|
||||
#define DEFAULT_3BIT_AWID_LIMIT KBASE_3BIT_AID_32
|
||||
|
||||
/**
|
||||
* Default period for DVFS sampling
|
||||
*/
|
||||
@@ -170,11 +128,6 @@ enum {
|
||||
*/
|
||||
#define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */
|
||||
|
||||
/**
|
||||
* Power Manager number of ticks before GPU is powered off
|
||||
*/
|
||||
#define DEFAULT_PM_POWEROFF_TICK_GPU (2) /* 400-800us */
|
||||
|
||||
/**
|
||||
* Default scheduling tick granuality
|
||||
*/
|
||||
@@ -252,20 +205,6 @@ enum {
|
||||
*/
|
||||
#define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */
|
||||
|
||||
/**
|
||||
* Perform GPU power down using only platform specific code, skipping DDK power
|
||||
* management.
|
||||
*
|
||||
* If this is non-zero then kbase will avoid powering down shader cores, the
|
||||
* tiler, and the L2 cache, instead just powering down the entire GPU through
|
||||
* platform specific code. This may be required for certain platform
|
||||
* integrations.
|
||||
*
|
||||
* Note that as this prevents kbase from powering down shader cores, this limits
|
||||
* the available power policies to coarse_demand and always_on.
|
||||
*/
|
||||
#define PLATFORM_POWER_DOWN_ONLY (0)
|
||||
|
||||
/**
|
||||
* Maximum frequency (in kHz) that the GPU can be clocked. For some platforms
|
||||
* this isn't available, so we simply define a dummy value here. If devfreq
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -31,15 +31,27 @@
|
||||
#include <mali_kbase_mem_linux.h>
|
||||
#include <mali_kbase_dma_fence.h>
|
||||
#include <mali_kbase_ctx_sched.h>
|
||||
#include <mali_kbase_mem_pool_group.h>
|
||||
#include <mali_kbase_tracepoints.h>
|
||||
|
||||
struct kbase_context *
|
||||
kbase_create_context(struct kbase_device *kbdev, bool is_compat)
|
||||
kbase_create_context(struct kbase_device *kbdev, bool is_compat,
|
||||
base_context_create_flags const flags,
|
||||
unsigned long const api_version,
|
||||
struct file *const filp)
|
||||
{
|
||||
struct kbase_context *kctx;
|
||||
int err;
|
||||
struct page *p;
|
||||
struct kbasep_js_kctx_info *js_kctx_info = NULL;
|
||||
unsigned long irq_flags = 0;
|
||||
|
||||
KBASE_DEBUG_ASSERT(kbdev != NULL);
|
||||
if (WARN_ON(!kbdev))
|
||||
goto out;
|
||||
|
||||
/* Validate flags */
|
||||
if (WARN_ON(flags != (flags & BASEP_CONTEXT_CREATE_KERNEL_FLAGS)))
|
||||
goto out;
|
||||
|
||||
/* zero-inited as lot of code assume it's zero'ed out on create */
|
||||
kctx = vzalloc(sizeof(*kctx));
|
||||
@@ -60,34 +72,19 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat)
|
||||
kbase_ctx_flag_set(kctx, KCTX_FORCE_SAME_VA);
|
||||
#endif /* !defined(CONFIG_64BIT) */
|
||||
|
||||
#ifdef CONFIG_MALI_BIFROST_TRACE_TIMELINE
|
||||
kctx->timeline.owner_tgid = task_tgid_nr(current);
|
||||
#endif
|
||||
atomic_set(&kctx->setup_complete, 0);
|
||||
atomic_set(&kctx->setup_in_progress, 0);
|
||||
spin_lock_init(&kctx->mm_update_lock);
|
||||
kctx->process_mm = NULL;
|
||||
atomic_set(&kctx->nonmapped_pages, 0);
|
||||
atomic_set(&kctx->permanent_mapped_pages, 0);
|
||||
kctx->slots_pullable = 0;
|
||||
kctx->tgid = current->tgid;
|
||||
kctx->pid = current->pid;
|
||||
|
||||
err = kbase_mem_pool_init(&kctx->mem_pool,
|
||||
kbdev->mem_pool_max_size_default,
|
||||
KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER,
|
||||
kctx->kbdev,
|
||||
&kbdev->mem_pool);
|
||||
err = kbase_mem_pool_group_init(&kctx->mem_pools, kbdev,
|
||||
&kbdev->mem_pool_defaults, &kbdev->mem_pools);
|
||||
if (err)
|
||||
goto free_kctx;
|
||||
|
||||
err = kbase_mem_pool_init(&kctx->lp_mem_pool,
|
||||
(kbdev->mem_pool_max_size_default >> 9),
|
||||
KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER,
|
||||
kctx->kbdev,
|
||||
&kbdev->lp_mem_pool);
|
||||
if (err)
|
||||
goto free_mem_pool;
|
||||
|
||||
err = kbase_mem_evictable_init(kctx);
|
||||
if (err)
|
||||
goto free_both_pools;
|
||||
@@ -110,7 +107,7 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat)
|
||||
|
||||
mutex_init(&kctx->reg_lock);
|
||||
|
||||
mutex_init(&kctx->mem_partials_lock);
|
||||
spin_lock_init(&kctx->mem_partials_lock);
|
||||
INIT_LIST_HEAD(&kctx->mem_partials);
|
||||
|
||||
INIT_LIST_HEAD(&kctx->waiting_soft_jobs);
|
||||
@@ -119,22 +116,13 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat)
|
||||
if (err)
|
||||
goto free_event;
|
||||
|
||||
err = kbase_mmu_init(kctx);
|
||||
err = kbase_mmu_init(kbdev, &kctx->mmu, kctx,
|
||||
base_context_mmu_group_id_get(flags));
|
||||
if (err)
|
||||
goto term_dma_fence;
|
||||
|
||||
do {
|
||||
err = kbase_mem_pool_grow(&kctx->mem_pool,
|
||||
MIDGARD_MMU_BOTTOMLEVEL);
|
||||
if (err)
|
||||
goto pgd_no_mem;
|
||||
|
||||
mutex_lock(&kctx->mmu_lock);
|
||||
kctx->pgd = kbase_mmu_alloc_pgd(kctx);
|
||||
mutex_unlock(&kctx->mmu_lock);
|
||||
} while (!kctx->pgd);
|
||||
|
||||
p = kbase_mem_alloc_page(&kctx->mem_pool);
|
||||
p = kbase_mem_alloc_page(
|
||||
&kctx->mem_pools.small[KBASE_MEM_GROUP_SINK]);
|
||||
if (!p)
|
||||
goto no_sink_page;
|
||||
kctx->aliasing_sink_page = as_tagged(page_to_phys(p));
|
||||
@@ -155,20 +143,39 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat)
|
||||
err = kbase_jit_init(kctx);
|
||||
if (err)
|
||||
goto no_jit;
|
||||
|
||||
|
||||
#ifdef CONFIG_GPU_TRACEPOINTS
|
||||
atomic_set(&kctx->jctx.work_id, 0);
|
||||
#endif
|
||||
#ifdef CONFIG_MALI_BIFROST_TRACE_TIMELINE
|
||||
atomic_set(&kctx->timeline.jd_atoms_in_flight, 0);
|
||||
#endif
|
||||
|
||||
kctx->id = atomic_add_return(1, &(kbdev->ctx_num)) - 1;
|
||||
|
||||
mutex_init(&kctx->vinstr_cli_lock);
|
||||
mutex_init(&kctx->legacy_hwcnt_lock);
|
||||
|
||||
kbase_timer_setup(&kctx->soft_job_timeout,
|
||||
kbasep_soft_job_timeout_worker);
|
||||
|
||||
mutex_lock(&kbdev->kctx_list_lock);
|
||||
list_add(&kctx->kctx_list_link, &kbdev->kctx_list);
|
||||
KBASE_TLSTREAM_TL_NEW_CTX(kbdev, kctx, kctx->id, (u32)(kctx->tgid));
|
||||
mutex_unlock(&kbdev->kctx_list_lock);
|
||||
|
||||
kctx->api_version = api_version;
|
||||
kctx->filp = filp;
|
||||
|
||||
js_kctx_info = &kctx->jctx.sched_info;
|
||||
|
||||
mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
|
||||
spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags);
|
||||
|
||||
/* Translate the flags */
|
||||
if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0)
|
||||
kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED);
|
||||
|
||||
spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags);
|
||||
mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
|
||||
|
||||
return kctx;
|
||||
|
||||
no_jit:
|
||||
@@ -178,14 +185,10 @@ no_jit:
|
||||
no_sticky:
|
||||
kbase_region_tracker_term(kctx);
|
||||
no_region_tracker:
|
||||
kbase_mem_pool_free(&kctx->mem_pool, p, false);
|
||||
kbase_mem_pool_free(
|
||||
&kctx->mem_pools.small[KBASE_MEM_GROUP_SINK], p, false);
|
||||
no_sink_page:
|
||||
/* VM lock needed for the call to kbase_mmu_free_pgd */
|
||||
kbase_gpu_vm_lock(kctx);
|
||||
kbase_mmu_free_pgd(kctx);
|
||||
kbase_gpu_vm_unlock(kctx);
|
||||
pgd_no_mem:
|
||||
kbase_mmu_term(kctx);
|
||||
kbase_mmu_term(kbdev, &kctx->mmu);
|
||||
term_dma_fence:
|
||||
kbase_dma_fence_term(kctx);
|
||||
free_event:
|
||||
@@ -197,9 +200,7 @@ free_jd:
|
||||
deinit_evictable:
|
||||
kbase_mem_evictable_deinit(kctx);
|
||||
free_both_pools:
|
||||
kbase_mem_pool_term(&kctx->lp_mem_pool);
|
||||
free_mem_pool:
|
||||
kbase_mem_pool_term(&kctx->mem_pool);
|
||||
kbase_mem_pool_group_term(&kctx->mem_pools);
|
||||
free_kctx:
|
||||
vfree(kctx);
|
||||
out:
|
||||
@@ -207,9 +208,10 @@ out:
|
||||
}
|
||||
KBASE_EXPORT_SYMBOL(kbase_create_context);
|
||||
|
||||
static void kbase_reg_pending_dtor(struct kbase_va_region *reg)
|
||||
static void kbase_reg_pending_dtor(struct kbase_device *kbdev,
|
||||
struct kbase_va_region *reg)
|
||||
{
|
||||
dev_dbg(reg->kctx->kbdev->dev, "Freeing pending unmapped region\n");
|
||||
dev_dbg(kbdev->dev, "Freeing pending unmapped region\n");
|
||||
kbase_mem_phy_alloc_put(reg->cpu_alloc);
|
||||
kbase_mem_phy_alloc_put(reg->gpu_alloc);
|
||||
kfree(reg);
|
||||
@@ -223,10 +225,17 @@ void kbase_destroy_context(struct kbase_context *kctx)
|
||||
unsigned long flags;
|
||||
struct page *p;
|
||||
|
||||
KBASE_DEBUG_ASSERT(NULL != kctx);
|
||||
if (WARN_ON(!kctx))
|
||||
return;
|
||||
|
||||
kbdev = kctx->kbdev;
|
||||
KBASE_DEBUG_ASSERT(NULL != kbdev);
|
||||
if (WARN_ON(!kbdev))
|
||||
return;
|
||||
|
||||
mutex_lock(&kbdev->kctx_list_lock);
|
||||
KBASE_TLSTREAM_TL_DEL_CTX(kbdev, kctx);
|
||||
list_del(&kctx->kctx_list_link);
|
||||
mutex_unlock(&kbdev->kctx_list_lock);
|
||||
|
||||
KBASE_TRACE_ADD(kbdev, CORE_CTX_DESTROY, kctx, NULL, 0u, 0u);
|
||||
|
||||
@@ -235,19 +244,31 @@ void kbase_destroy_context(struct kbase_context *kctx)
|
||||
* thread. */
|
||||
kbase_pm_context_active(kbdev);
|
||||
|
||||
kbase_mem_pool_mark_dying(&kctx->mem_pool);
|
||||
kbase_mem_pool_group_mark_dying(&kctx->mem_pools);
|
||||
|
||||
kbase_jd_zap_context(kctx);
|
||||
|
||||
/* We have already waited for the jobs to complete (and hereafter there
|
||||
* can be no more submissions for the context). However the wait could
|
||||
* have timedout and there could still be work items in flight that
|
||||
* would do the completion processing of jobs.
|
||||
* kbase_jd_exit() will destroy the 'job_done_wq'. And destroying the wq
|
||||
* will cause it do drain and implicitly wait for those work items to
|
||||
* complete.
|
||||
*/
|
||||
kbase_jd_exit(kctx);
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
/* Removing the rest of the debugfs entries here as we want to keep the
|
||||
* atom debugfs interface alive until all atoms have completed. This
|
||||
* is useful for debugging hung contexts. */
|
||||
debugfs_remove_recursive(kctx->kctx_dentry);
|
||||
kbase_debug_job_fault_context_term(kctx);
|
||||
#endif
|
||||
|
||||
kbase_event_cleanup(kctx);
|
||||
|
||||
|
||||
/*
|
||||
* JIT must be terminated before the code below as it must be called
|
||||
* without the region lock being held.
|
||||
@@ -260,12 +281,10 @@ void kbase_destroy_context(struct kbase_context *kctx)
|
||||
|
||||
kbase_sticky_resource_term(kctx);
|
||||
|
||||
/* MMU is disabled as part of scheduling out the context */
|
||||
kbase_mmu_free_pgd(kctx);
|
||||
|
||||
/* drop the aliasing sink page now that it can't be mapped anymore */
|
||||
p = phys_to_page(as_phys_addr_t(kctx->aliasing_sink_page));
|
||||
kbase_mem_pool_free(&kctx->mem_pool, p, false);
|
||||
p = as_page(kctx->aliasing_sink_page);
|
||||
kbase_mem_pool_free(&kctx->mem_pools.small[KBASE_MEM_GROUP_SINK],
|
||||
p, false);
|
||||
|
||||
/* free pending region setups */
|
||||
pending_regions_to_clean = (~kctx->cookies) & KBASE_COOKIE_MASK;
|
||||
@@ -274,7 +293,7 @@ void kbase_destroy_context(struct kbase_context *kctx)
|
||||
|
||||
BUG_ON(!kctx->pending_regions[cookie]);
|
||||
|
||||
kbase_reg_pending_dtor(kctx->pending_regions[cookie]);
|
||||
kbase_reg_pending_dtor(kbdev, kctx->pending_regions[cookie]);
|
||||
|
||||
kctx->pending_regions[cookie] = NULL;
|
||||
pending_regions_to_clean &= ~(1UL << cookie);
|
||||
@@ -286,8 +305,6 @@ void kbase_destroy_context(struct kbase_context *kctx)
|
||||
/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
|
||||
kbasep_js_kctx_term(kctx);
|
||||
|
||||
kbase_jd_exit(kctx);
|
||||
|
||||
kbase_dma_fence_term(kctx);
|
||||
|
||||
mutex_lock(&kbdev->mmu_hw_mutex);
|
||||
@@ -296,15 +313,16 @@ void kbase_destroy_context(struct kbase_context *kctx)
|
||||
spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
|
||||
mutex_unlock(&kbdev->mmu_hw_mutex);
|
||||
|
||||
kbase_mmu_term(kctx);
|
||||
kbase_mmu_term(kbdev, &kctx->mmu);
|
||||
|
||||
pages = atomic_read(&kctx->used_pages);
|
||||
if (pages != 0)
|
||||
dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
|
||||
|
||||
kbase_mem_evictable_deinit(kctx);
|
||||
kbase_mem_pool_term(&kctx->mem_pool);
|
||||
kbase_mem_pool_term(&kctx->lp_mem_pool);
|
||||
|
||||
kbase_mem_pool_group_term(&kctx->mem_pools);
|
||||
|
||||
WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0);
|
||||
|
||||
vfree(kctx);
|
||||
@@ -312,36 +330,3 @@ void kbase_destroy_context(struct kbase_context *kctx)
|
||||
kbase_pm_context_idle(kbdev);
|
||||
}
|
||||
KBASE_EXPORT_SYMBOL(kbase_destroy_context);
|
||||
|
||||
int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags)
|
||||
{
|
||||
int err = 0;
|
||||
struct kbasep_js_kctx_info *js_kctx_info;
|
||||
unsigned long irq_flags;
|
||||
|
||||
KBASE_DEBUG_ASSERT(NULL != kctx);
|
||||
|
||||
js_kctx_info = &kctx->jctx.sched_info;
|
||||
|
||||
/* Validate flags */
|
||||
if (flags != (flags & BASE_CONTEXT_CREATE_KERNEL_FLAGS)) {
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
|
||||
spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags);
|
||||
|
||||
/* Translate the flags */
|
||||
if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0)
|
||||
kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED);
|
||||
|
||||
/* Latch the initial attributes into the Job Scheduler */
|
||||
kbasep_js_ctx_attr_set_initial_attrs(kctx->kbdev, kctx);
|
||||
|
||||
spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags);
|
||||
mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
KBASE_EXPORT_SYMBOL(kbase_context_set_create_flags);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2011-2016, 2018 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2011-2017, 2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -27,35 +27,37 @@
|
||||
|
||||
/**
|
||||
* kbase_create_context() - Create a kernel base context.
|
||||
* @kbdev: Kbase device
|
||||
* @is_compat: Force creation of a 32-bit context
|
||||
*
|
||||
* Allocate and init a kernel base context.
|
||||
* @kbdev: Object representing an instance of GPU platform device,
|
||||
* allocated from the probe method of the Mali driver.
|
||||
* @is_compat: Force creation of a 32-bit context
|
||||
* @flags: Flags to set, which shall be any combination of
|
||||
* BASEP_CONTEXT_CREATE_KERNEL_FLAGS.
|
||||
* @api_version: Application program interface version, as encoded in
|
||||
* a single integer by the KBASE_API_VERSION macro.
|
||||
* @filp: Pointer to the struct file corresponding to device file
|
||||
* /dev/malixx instance, passed to the file's open method.
|
||||
*
|
||||
* Return: new kbase context
|
||||
* Up to one context can be created for each client that opens the device file
|
||||
* /dev/malixx. Context creation is deferred until a special ioctl() system call
|
||||
* is made on the device file. Each context has its own GPU address space.
|
||||
*
|
||||
* Return: new kbase context or NULL on failure
|
||||
*/
|
||||
struct kbase_context *
|
||||
kbase_create_context(struct kbase_device *kbdev, bool is_compat);
|
||||
kbase_create_context(struct kbase_device *kbdev, bool is_compat,
|
||||
base_context_create_flags const flags,
|
||||
unsigned long api_version,
|
||||
struct file *filp);
|
||||
|
||||
/**
|
||||
* kbase_destroy_context - Destroy a kernel base context.
|
||||
* @kctx: Context to destroy
|
||||
*
|
||||
* Calls kbase_destroy_os_context() to free OS specific structures.
|
||||
* Will release all outstanding regions.
|
||||
*/
|
||||
void kbase_destroy_context(struct kbase_context *kctx);
|
||||
|
||||
/**
|
||||
* kbase_context_set_create_flags - Set creation flags on a context
|
||||
* @kctx: Kbase context
|
||||
* @flags: Flags to set, which shall be one of the flags of
|
||||
* BASE_CONTEXT_CREATE_KERNEL_FLAGS.
|
||||
*
|
||||
* Return: 0 on success, -EINVAL otherwise when an invalid flag is specified.
|
||||
*/
|
||||
int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags);
|
||||
|
||||
/**
|
||||
* kbase_ctx_flag - Check if @flag is set on @kctx
|
||||
* @kctx: Pointer to kbase context to check
|
||||
@@ -107,7 +109,7 @@ static inline void kbase_ctx_flag_clear(struct kbase_context *kctx,
|
||||
/**
|
||||
* kbase_ctx_flag_set - Set @flag on @kctx
|
||||
* @kctx: Pointer to kbase context
|
||||
* @flag: Flag to clear
|
||||
* @flag: Flag to set
|
||||
*
|
||||
* Set the @flag on @kctx. This is done atomically, so other flags being
|
||||
* cleared or set at the same time will be safe.
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -121,7 +121,8 @@ int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx)
|
||||
|
||||
kctx->as_nr = free_as;
|
||||
kbdev->as_to_kctx[free_as] = kctx;
|
||||
kbase_mmu_update(kctx);
|
||||
kbase_mmu_update(kbdev, &kctx->mmu,
|
||||
kctx->as_nr);
|
||||
}
|
||||
} else {
|
||||
atomic_dec(&kctx->refcount);
|
||||
@@ -193,7 +194,8 @@ void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev)
|
||||
if (atomic_read(&kctx->refcount)) {
|
||||
WARN_ON(kctx->as_nr != i);
|
||||
|
||||
kbase_mmu_update(kctx);
|
||||
kbase_mmu_update(kbdev, &kctx->mmu,
|
||||
kctx->as_nr);
|
||||
} else {
|
||||
/* This context might have been assigned an
|
||||
* AS before, clear it.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2012-2016, 2018-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -39,6 +39,30 @@ static bool kbase_is_job_fault_event_pending(struct kbase_device *kbdev)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void kbase_ctx_remove_pending_event(struct kbase_context *kctx)
|
||||
{
|
||||
struct list_head *event_list = &kctx->kbdev->job_fault_event_list;
|
||||
struct base_job_fault_event *event;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&kctx->kbdev->job_fault_event_lock, flags);
|
||||
list_for_each_entry(event, event_list, head) {
|
||||
if (event->katom->kctx == kctx) {
|
||||
list_del(&event->head);
|
||||
spin_unlock_irqrestore(&kctx->kbdev->job_fault_event_lock, flags);
|
||||
|
||||
wake_up(&kctx->kbdev->job_fault_resume_wq);
|
||||
flush_work(&event->job_fault_work);
|
||||
|
||||
/* job_fault_event_list can only have a single atom for
|
||||
* each context.
|
||||
*/
|
||||
return;
|
||||
}
|
||||
}
|
||||
spin_unlock_irqrestore(&kctx->kbdev->job_fault_event_lock, flags);
|
||||
}
|
||||
|
||||
static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx)
|
||||
{
|
||||
struct kbase_device *kbdev = kctx->kbdev;
|
||||
@@ -62,6 +86,25 @@ static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx)
|
||||
return true;
|
||||
}
|
||||
|
||||
static int wait_for_job_fault(struct kbase_device *kbdev)
|
||||
{
|
||||
#if KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE && \
|
||||
KERNEL_VERSION(4, 15, 0) > LINUX_VERSION_CODE
|
||||
int ret = wait_event_interruptible_timeout(kbdev->job_fault_wq,
|
||||
kbase_is_job_fault_event_pending(kbdev),
|
||||
msecs_to_jiffies(2000));
|
||||
if (ret == 0)
|
||||
return -EAGAIN;
|
||||
else if (ret > 0)
|
||||
return 0;
|
||||
else
|
||||
return ret;
|
||||
#else
|
||||
return wait_event_interruptible(kbdev->job_fault_wq,
|
||||
kbase_is_job_fault_event_pending(kbdev));
|
||||
#endif
|
||||
}
|
||||
|
||||
/* wait until the fault happen and copy the event */
|
||||
static int kbase_job_fault_event_wait(struct kbase_device *kbdev,
|
||||
struct base_job_fault_event *event)
|
||||
@@ -71,11 +114,15 @@ static int kbase_job_fault_event_wait(struct kbase_device *kbdev,
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
|
||||
if (list_empty(event_list)) {
|
||||
while (list_empty(event_list)) {
|
||||
int err;
|
||||
|
||||
spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
|
||||
if (wait_event_interruptible(kbdev->job_fault_wq,
|
||||
kbase_is_job_fault_event_pending(kbdev)))
|
||||
return -ERESTARTSYS;
|
||||
|
||||
err = wait_for_job_fault(kbdev);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
|
||||
}
|
||||
|
||||
@@ -122,24 +169,6 @@ static void kbase_job_fault_resume_event_cleanup(struct kbase_context *kctx)
|
||||
|
||||
}
|
||||
|
||||
/* Remove all the failed atoms that belong to different contexts
|
||||
* Resume all the contexts that were suspend due to failed job
|
||||
*/
|
||||
static void kbase_job_fault_event_cleanup(struct kbase_device *kbdev)
|
||||
{
|
||||
struct list_head *event_list = &kbdev->job_fault_event_list;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
|
||||
while (!list_empty(event_list)) {
|
||||
kbase_job_fault_event_dequeue(kbdev, event_list);
|
||||
spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
|
||||
wake_up(&kbdev->job_fault_resume_wq);
|
||||
spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
|
||||
}
|
||||
spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
|
||||
}
|
||||
|
||||
static void kbase_job_fault_resume_worker(struct work_struct *data)
|
||||
{
|
||||
struct base_job_fault_event *event = container_of(data,
|
||||
@@ -237,7 +266,10 @@ bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
|
||||
return true;
|
||||
}
|
||||
|
||||
if (kctx->kbdev->job_fault_debug == true) {
|
||||
if (kbase_ctx_flag(kctx, KCTX_DYING))
|
||||
return false;
|
||||
|
||||
if (atomic_read(&kctx->kbdev->job_fault_debug) > 0) {
|
||||
|
||||
if (completion_code != BASE_JD_EVENT_DONE) {
|
||||
|
||||
@@ -337,7 +369,7 @@ static void *debug_job_fault_start(struct seq_file *m, loff_t *pos)
|
||||
* job done but we delayed it. Now we should clean cache
|
||||
* earlier. Then the GPU memory dump should be correct.
|
||||
*/
|
||||
kbase_backend_cacheclean(kbdev, event->katom);
|
||||
kbase_backend_cache_clean(kbdev, event->katom);
|
||||
} else
|
||||
return NULL;
|
||||
|
||||
@@ -383,12 +415,16 @@ static int debug_job_fault_open(struct inode *in, struct file *file)
|
||||
{
|
||||
struct kbase_device *kbdev = in->i_private;
|
||||
|
||||
if (atomic_cmpxchg(&kbdev->job_fault_debug, 0, 1) == 1) {
|
||||
dev_warn(kbdev->dev, "debug job fault is busy, only a single client is allowed");
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
seq_open(file, &ops);
|
||||
|
||||
((struct seq_file *)file->private_data)->private = kbdev;
|
||||
dev_info(kbdev->dev, "debug job fault seq open");
|
||||
|
||||
kbdev->job_fault_debug = true;
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -397,15 +433,35 @@ static int debug_job_fault_open(struct inode *in, struct file *file)
|
||||
static int debug_job_fault_release(struct inode *in, struct file *file)
|
||||
{
|
||||
struct kbase_device *kbdev = in->i_private;
|
||||
struct list_head *event_list = &kbdev->job_fault_event_list;
|
||||
unsigned long flags;
|
||||
|
||||
seq_release(in, file);
|
||||
|
||||
kbdev->job_fault_debug = false;
|
||||
spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
|
||||
|
||||
/* Disable job fault dumping. This will let kbase run jobs as normal,
|
||||
* without blocking waiting for a job_fault client to read failed jobs.
|
||||
*
|
||||
* After this a new client may open the file, and may re-enable job
|
||||
* fault dumping, but the job_fault_event_lock we hold here will block
|
||||
* that from interfering until after we've completed the cleanup.
|
||||
*/
|
||||
atomic_dec(&kbdev->job_fault_debug);
|
||||
|
||||
/* Clean the unprocessed job fault. After that, all the suspended
|
||||
* contexts could be rescheduled.
|
||||
* contexts could be rescheduled. Remove all the failed atoms that
|
||||
* belong to different contexts Resume all the contexts that were
|
||||
* suspend due to failed job.
|
||||
*/
|
||||
kbase_job_fault_event_cleanup(kbdev);
|
||||
while (!list_empty(event_list)) {
|
||||
kbase_job_fault_event_dequeue(kbdev, event_list);
|
||||
spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
|
||||
wake_up(&kbdev->job_fault_resume_wq);
|
||||
spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
|
||||
|
||||
dev_info(kbdev->dev, "debug job fault seq close");
|
||||
|
||||
@@ -413,6 +469,7 @@ static int debug_job_fault_release(struct inode *in, struct file *file)
|
||||
}
|
||||
|
||||
static const struct file_operations kbasep_debug_job_fault_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = debug_job_fault_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
@@ -424,7 +481,7 @@ static const struct file_operations kbasep_debug_job_fault_fops = {
|
||||
*/
|
||||
void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev)
|
||||
{
|
||||
debugfs_create_file("job_fault", S_IRUGO,
|
||||
debugfs_create_file("job_fault", 0400,
|
||||
kbdev->mali_debugfs_directory, kbdev,
|
||||
&kbasep_debug_job_fault_fops);
|
||||
}
|
||||
@@ -444,7 +501,7 @@ int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
|
||||
if (!kbdev->job_fault_resume_workq)
|
||||
return -ENOMEM;
|
||||
|
||||
kbdev->job_fault_debug = false;
|
||||
atomic_set(&kbdev->job_fault_debug, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -488,12 +545,17 @@ void kbase_debug_job_fault_context_term(struct kbase_context *kctx)
|
||||
vfree(kctx->reg_dump);
|
||||
}
|
||||
|
||||
void kbase_debug_job_fault_kctx_unblock(struct kbase_context *kctx)
|
||||
{
|
||||
WARN_ON(!kbase_ctx_flag(kctx, KCTX_DYING));
|
||||
|
||||
kbase_ctx_remove_pending_event(kctx);
|
||||
}
|
||||
|
||||
#else /* CONFIG_DEBUG_FS */
|
||||
|
||||
int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
|
||||
{
|
||||
kbdev->job_fault_debug = false;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2012-2016, 2018 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -64,6 +64,21 @@ void kbase_debug_job_fault_context_init(struct kbase_context *kctx);
|
||||
*/
|
||||
void kbase_debug_job_fault_context_term(struct kbase_context *kctx);
|
||||
|
||||
/**
|
||||
* kbase_debug_job_fault_kctx_unblock - Unblock the atoms blocked on job fault
|
||||
* dumping on context termination.
|
||||
*
|
||||
* This function is called during context termination to unblock the atom for
|
||||
* which the job fault occurred and also the atoms following it. This is needed
|
||||
* otherwise the wait for zero jobs could timeout (leading to an assertion
|
||||
* failure, kernel panic in debug builds) in the pathological case where
|
||||
* although the thread/daemon capturing the job fault events is running,
|
||||
* but for some reasons has stopped consuming the events.
|
||||
*
|
||||
* @kctx: KBase context pointer
|
||||
*/
|
||||
void kbase_debug_job_fault_kctx_unblock(struct kbase_context *kctx);
|
||||
|
||||
/**
|
||||
* kbase_debug_job_fault_process - Process the failed job.
|
||||
* It will send a event and wake up the job fault waiting queue
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2013-2018 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2013-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -32,6 +32,10 @@
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
|
||||
#if (KERNEL_VERSION(4, 1, 0) > LINUX_VERSION_CODE)
|
||||
#define get_file_rcu(x) atomic_long_inc_not_zero(&(x)->f_count)
|
||||
#endif
|
||||
|
||||
struct debug_mem_mapping {
|
||||
struct list_head node;
|
||||
|
||||
@@ -130,7 +134,7 @@ static int debug_mem_show(struct seq_file *m, void *v)
|
||||
if (!(map->flags & KBASE_REG_CPU_CACHED))
|
||||
prot = pgprot_writecombine(prot);
|
||||
|
||||
page = phys_to_page(as_phys_addr_t(map->alloc->pages[data->offset]));
|
||||
page = as_page(map->alloc->pages[data->offset]);
|
||||
mapping = vmap(&page, 1, VM_MAP, prot);
|
||||
if (!mapping)
|
||||
goto out;
|
||||
@@ -194,14 +198,16 @@ out:
|
||||
|
||||
static int debug_mem_open(struct inode *i, struct file *file)
|
||||
{
|
||||
struct file *kctx_file = i->i_private;
|
||||
struct kbase_context *kctx = kctx_file->private_data;
|
||||
struct kbase_context *const kctx = i->i_private;
|
||||
struct debug_mem_data *mem_data;
|
||||
int ret;
|
||||
|
||||
if (get_file_rcu(kctx->filp) == 0)
|
||||
return -ENOENT;
|
||||
|
||||
ret = seq_open(file, &ops);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto open_fail;
|
||||
|
||||
mem_data = kmalloc(sizeof(*mem_data), GFP_KERNEL);
|
||||
if (!mem_data) {
|
||||
@@ -213,8 +219,6 @@ static int debug_mem_open(struct inode *i, struct file *file)
|
||||
|
||||
INIT_LIST_HEAD(&mem_data->mapping_list);
|
||||
|
||||
get_file(kctx_file);
|
||||
|
||||
kbase_gpu_vm_lock(kctx);
|
||||
|
||||
ret = debug_mem_zone_open(&kctx->reg_rbtree_same, mem_data);
|
||||
@@ -223,12 +227,6 @@ static int debug_mem_open(struct inode *i, struct file *file)
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = debug_mem_zone_open(&kctx->reg_rbtree_exec, mem_data);
|
||||
if (ret != 0) {
|
||||
kbase_gpu_vm_unlock(kctx);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = debug_mem_zone_open(&kctx->reg_rbtree_custom, mem_data);
|
||||
if (0 != ret) {
|
||||
kbase_gpu_vm_unlock(kctx);
|
||||
@@ -252,16 +250,18 @@ out:
|
||||
list_del(&mapping->node);
|
||||
kfree(mapping);
|
||||
}
|
||||
fput(kctx_file);
|
||||
kfree(mem_data);
|
||||
}
|
||||
seq_release(i, file);
|
||||
open_fail:
|
||||
fput(kctx->filp);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int debug_mem_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct file *kctx_file = inode->i_private;
|
||||
struct kbase_context *const kctx = inode->i_private;
|
||||
struct seq_file *sfile = file->private_data;
|
||||
struct debug_mem_data *mem_data = sfile->private;
|
||||
struct debug_mem_mapping *mapping;
|
||||
@@ -278,33 +278,29 @@ static int debug_mem_release(struct inode *inode, struct file *file)
|
||||
|
||||
kfree(mem_data);
|
||||
|
||||
fput(kctx_file);
|
||||
fput(kctx->filp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct file_operations kbase_debug_mem_view_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = debug_mem_open,
|
||||
.release = debug_mem_release,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek
|
||||
};
|
||||
|
||||
/**
|
||||
* kbase_debug_mem_view_init - Initialise the mem_view sysfs file
|
||||
* @kctx_file: The /dev/mali0 file instance for the context
|
||||
*
|
||||
* This function creates a "mem_view" file which can be used to get a view of
|
||||
* the context's memory as the GPU sees it (i.e. using the GPU's page tables).
|
||||
*
|
||||
* The file is cleaned up by a call to debugfs_remove_recursive() deleting the
|
||||
* parent directory.
|
||||
*/
|
||||
void kbase_debug_mem_view_init(struct file *kctx_file)
|
||||
void kbase_debug_mem_view_init(struct kbase_context *const kctx)
|
||||
{
|
||||
struct kbase_context *kctx = kctx_file->private_data;
|
||||
/* Caller already ensures this, but we keep the pattern for
|
||||
* maintenance safety.
|
||||
*/
|
||||
if (WARN_ON(!kctx) ||
|
||||
WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry)))
|
||||
return;
|
||||
|
||||
debugfs_create_file("mem_view", S_IRUSR, kctx->kctx_dentry, kctx_file,
|
||||
debugfs_create_file("mem_view", 0400, kctx->kctx_dentry, kctx,
|
||||
&kbase_debug_mem_view_fops);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2013-2015, 2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -25,6 +25,16 @@
|
||||
|
||||
#include <mali_kbase.h>
|
||||
|
||||
void kbase_debug_mem_view_init(struct file *kctx_file);
|
||||
/**
|
||||
* kbase_debug_mem_view_init - Initialize the mem_view sysfs file
|
||||
* @kctx: Pointer to kernel base context
|
||||
*
|
||||
* This function creates a "mem_view" file which can be used to get a view of
|
||||
* the context's memory as the GPU sees it (i.e. using the GPU's page tables).
|
||||
*
|
||||
* The file is cleaned up by a call to debugfs_remove_recursive() deleting the
|
||||
* parent directory.
|
||||
*/
|
||||
void kbase_debug_mem_view_init(struct kbase_context *kctx);
|
||||
|
||||
#endif
|
||||
|
||||
183
drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.c
Normal file
183
drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.c
Normal file
@@ -0,0 +1,183 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU licence.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, you can access it online at
|
||||
* http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
#include "mali_kbase_debugfs_helper.h"
|
||||
|
||||
/* Arbitrary maximum size to prevent user space allocating too much kernel
|
||||
* memory
|
||||
*/
|
||||
#define DEBUGFS_MEM_POOLS_MAX_WRITE_SIZE (256u)
|
||||
|
||||
/**
|
||||
* set_attr_from_string - Parse a string to set elements of an array
|
||||
*
|
||||
* This is the core of the implementation of
|
||||
* kbase_debugfs_helper_set_attr_from_string. The only difference between the
|
||||
* two functions is that this one requires the input string to be writable.
|
||||
*
|
||||
* @buf: Input string to parse. Must be nul-terminated!
|
||||
* @array: Address of an object that can be accessed like an array.
|
||||
* @nelems: Number of elements in the array.
|
||||
* @set_attr_fn: Function to be called back for each array element.
|
||||
*
|
||||
* Return: 0 if success, negative error code otherwise.
|
||||
*/
|
||||
static int set_attr_from_string(
|
||||
char *const buf,
|
||||
void *const array, size_t const nelems,
|
||||
kbase_debugfs_helper_set_attr_fn const set_attr_fn)
|
||||
{
|
||||
size_t index, err = 0;
|
||||
char *ptr = buf;
|
||||
|
||||
for (index = 0; index < nelems && *ptr; ++index) {
|
||||
unsigned long new_size;
|
||||
size_t len;
|
||||
char sep;
|
||||
|
||||
/* Drop leading spaces */
|
||||
while (*ptr == ' ')
|
||||
ptr++;
|
||||
|
||||
len = strcspn(ptr, "\n ");
|
||||
if (len == 0) {
|
||||
/* No more values (allow this) */
|
||||
break;
|
||||
}
|
||||
|
||||
/* Substitute a nul terminator for a space character
|
||||
* to make the substring valid for kstrtoul.
|
||||
*/
|
||||
sep = ptr[len];
|
||||
if (sep == ' ')
|
||||
ptr[len++] = '\0';
|
||||
|
||||
err = kstrtoul(ptr, 0, &new_size);
|
||||
if (err)
|
||||
break;
|
||||
|
||||
/* Skip the substring (including any premature nul terminator)
|
||||
*/
|
||||
ptr += len;
|
||||
|
||||
set_attr_fn(array, index, new_size);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int kbase_debugfs_helper_set_attr_from_string(
|
||||
const char *const buf, void *const array, size_t const nelems,
|
||||
kbase_debugfs_helper_set_attr_fn const set_attr_fn)
|
||||
{
|
||||
char *const wbuf = kstrdup(buf, GFP_KERNEL);
|
||||
int err = 0;
|
||||
|
||||
if (!wbuf)
|
||||
return -ENOMEM;
|
||||
|
||||
err = set_attr_from_string(wbuf, array, nelems,
|
||||
set_attr_fn);
|
||||
|
||||
kfree(wbuf);
|
||||
return err;
|
||||
}
|
||||
|
||||
ssize_t kbase_debugfs_helper_get_attr_to_string(
|
||||
char *const buf, size_t const size,
|
||||
void *const array, size_t const nelems,
|
||||
kbase_debugfs_helper_get_attr_fn const get_attr_fn)
|
||||
{
|
||||
ssize_t total = 0;
|
||||
size_t index;
|
||||
|
||||
for (index = 0; index < nelems; ++index) {
|
||||
const char *postfix = " ";
|
||||
|
||||
if (index == (nelems-1))
|
||||
postfix = "\n";
|
||||
|
||||
total += scnprintf(buf + total, size - total, "%zu%s",
|
||||
get_attr_fn(array, index), postfix);
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
int kbase_debugfs_helper_seq_write(struct file *const file,
|
||||
const char __user *const ubuf, size_t const count,
|
||||
size_t const nelems,
|
||||
kbase_debugfs_helper_set_attr_fn const set_attr_fn)
|
||||
{
|
||||
const struct seq_file *const sfile = file->private_data;
|
||||
void *const array = sfile->private;
|
||||
int err = 0;
|
||||
char *buf;
|
||||
|
||||
if (WARN_ON(!array))
|
||||
return -EINVAL;
|
||||
|
||||
if (WARN_ON(count > DEBUGFS_MEM_POOLS_MAX_WRITE_SIZE))
|
||||
return -EINVAL;
|
||||
|
||||
buf = kmalloc(count + 1, GFP_KERNEL);
|
||||
if (buf == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
if (copy_from_user(buf, ubuf, count)) {
|
||||
kfree(buf);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
buf[count] = '\0';
|
||||
err = set_attr_from_string(buf,
|
||||
array, nelems, set_attr_fn);
|
||||
kfree(buf);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int kbase_debugfs_helper_seq_read(struct seq_file *const sfile,
|
||||
size_t const nelems,
|
||||
kbase_debugfs_helper_get_attr_fn const get_attr_fn)
|
||||
{
|
||||
void *const array = sfile->private;
|
||||
size_t index;
|
||||
|
||||
if (WARN_ON(!array))
|
||||
return -EINVAL;
|
||||
|
||||
for (index = 0; index < nelems; ++index) {
|
||||
const char *postfix = " ";
|
||||
|
||||
if (index == (nelems-1))
|
||||
postfix = "\n";
|
||||
|
||||
seq_printf(sfile, "%zu%s", get_attr_fn(array, index), postfix);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
141
drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.h
Normal file
141
drivers/gpu/arm/bifrost/mali_kbase_debugfs_helper.h
Normal file
@@ -0,0 +1,141 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU licence.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, you can access it online at
|
||||
* http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _KBASE_DEBUGFS_HELPER_H_
|
||||
#define _KBASE_DEBUGFS_HELPER_H_
|
||||
|
||||
/**
|
||||
* typedef kbase_debugfs_helper_set_attr_fn - Type of function to set an
|
||||
* attribute value from an array
|
||||
*
|
||||
* @array: Address of an object that can be accessed like an array.
|
||||
* @index: An element index. The valid range depends on the use-case.
|
||||
* @value: Attribute value to be set.
|
||||
*/
|
||||
typedef void (*kbase_debugfs_helper_set_attr_fn)(
|
||||
void *array, size_t index, size_t value);
|
||||
|
||||
/**
|
||||
* kbase_debugfs_helper_set_attr_from_string - Parse a string to reconfigure an
|
||||
* array
|
||||
*
|
||||
* The given function is called once for each attribute value found in the
|
||||
* input string. It is not an error if the string specifies fewer attribute
|
||||
* values than the specified number of array elements.
|
||||
*
|
||||
* The number base of each attribute value is detected automatically
|
||||
* according to the standard rules (e.g. prefix "0x" for hexadecimal).
|
||||
* Attribute values are separated by one or more space characters.
|
||||
* Additional leading and trailing spaces are ignored.
|
||||
*
|
||||
* @buf: Input string to parse. Must be nul-terminated!
|
||||
* @array: Address of an object that can be accessed like an array.
|
||||
* @nelems: Number of elements in the array.
|
||||
* @set_attr_fn: Function to be called back for each array element.
|
||||
*
|
||||
* Return: 0 if success, negative error code otherwise.
|
||||
*/
|
||||
int kbase_debugfs_helper_set_attr_from_string(
|
||||
const char *buf, void *array, size_t nelems,
|
||||
kbase_debugfs_helper_set_attr_fn set_attr_fn);
|
||||
|
||||
/**
|
||||
* typedef kbase_debugfs_helper_get_attr_fn - Type of function to get an
|
||||
* attribute value from an array
|
||||
*
|
||||
* @array: Address of an object that can be accessed like an array.
|
||||
* @index: An element index. The valid range depends on the use-case.
|
||||
*
|
||||
* Return: Value of attribute.
|
||||
*/
|
||||
typedef size_t (*kbase_debugfs_helper_get_attr_fn)(
|
||||
void *array, size_t index);
|
||||
|
||||
/**
|
||||
* kbase_debugfs_helper_get_attr_to_string - Construct a formatted string
|
||||
* from elements in an array
|
||||
*
|
||||
* The given function is called once for each array element to get the
|
||||
* value of the attribute to be inspected. The attribute values are
|
||||
* written to the buffer as a formatted string of decimal numbers
|
||||
* separated by spaces and terminated by a linefeed.
|
||||
*
|
||||
* @buf: Buffer in which to store the formatted output string.
|
||||
* @size: The size of the buffer, in bytes.
|
||||
* @array: Address of an object that can be accessed like an array.
|
||||
* @nelems: Number of elements in the array.
|
||||
* @get_attr_fn: Function to be called back for each array element.
|
||||
*
|
||||
* Return: Number of characters written excluding the nul terminator.
|
||||
*/
|
||||
ssize_t kbase_debugfs_helper_get_attr_to_string(
|
||||
char *buf, size_t size, void *array, size_t nelems,
|
||||
kbase_debugfs_helper_get_attr_fn get_attr_fn);
|
||||
|
||||
/**
|
||||
* kbase_debugfs_helper_seq_read - Implements reads from a virtual file for an
|
||||
* array
|
||||
*
|
||||
* The virtual file must have been opened by calling single_open and passing
|
||||
* the address of an object that can be accessed like an array.
|
||||
*
|
||||
* The given function is called once for each array element to get the
|
||||
* value of the attribute to be inspected. The attribute values are
|
||||
* written to the buffer as a formatted string of decimal numbers
|
||||
* separated by spaces and terminated by a linefeed.
|
||||
*
|
||||
* @sfile: A virtual file previously opened by calling single_open.
|
||||
* @nelems: Number of elements in the array.
|
||||
* @get_attr_fn: Function to be called back for each array element.
|
||||
*
|
||||
* Return: 0 if success, negative error code otherwise.
|
||||
*/
|
||||
int kbase_debugfs_helper_seq_read(
|
||||
struct seq_file *const sfile, size_t const nelems,
|
||||
kbase_debugfs_helper_get_attr_fn const get_attr_fn);
|
||||
|
||||
/**
|
||||
* kbase_debugfs_helper_seq_write - Implements writes to a virtual file for an
|
||||
* array
|
||||
*
|
||||
* The virtual file must have been opened by calling single_open and passing
|
||||
* the address of an object that can be accessed like an array.
|
||||
*
|
||||
* The given function is called once for each attribute value found in the
|
||||
* data written to the virtual file. For further details, refer to the
|
||||
* description of set_attr_from_string.
|
||||
*
|
||||
* @file: A virtual file previously opened by calling single_open.
|
||||
* @ubuf: Source address in user space.
|
||||
* @count: Number of bytes written to the virtual file.
|
||||
* @nelems: Number of elements in the array.
|
||||
* @set_attr_fn: Function to be called back for each array element.
|
||||
*
|
||||
* Return: 0 if success, negative error code otherwise.
|
||||
*/
|
||||
int kbase_debugfs_helper_seq_write(struct file *const file,
|
||||
const char __user *const ubuf, size_t const count,
|
||||
size_t const nelems,
|
||||
kbase_debugfs_helper_set_attr_fn const set_attr_fn);
|
||||
|
||||
#endif /*_KBASE_DEBUGFS_HELPER_H_ */
|
||||
|
||||
831
drivers/gpu/arm/bifrost/mali_kbase_defs.h
Normal file → Executable file
831
drivers/gpu/arm/bifrost/mali_kbase_defs.h
Normal file → Executable file
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -39,8 +39,6 @@
|
||||
#include <mali_kbase_hw.h>
|
||||
#include <mali_kbase_config_defaults.h>
|
||||
|
||||
#include <mali_kbase_profiling_gator_api.h>
|
||||
|
||||
/* NOTE: Magic - 0x45435254 (TRCE in ASCII).
|
||||
* Supports tracing feature provided in the base module.
|
||||
* Please keep it in sync with the value of base module.
|
||||
@@ -70,20 +68,11 @@ struct kbase_device *kbase_device_alloc(void)
|
||||
|
||||
static int kbase_device_as_init(struct kbase_device *kbdev, int i)
|
||||
{
|
||||
const char format[] = "mali_mmu%d";
|
||||
char name[sizeof(format)];
|
||||
const char poke_format[] = "mali_mmu%d_poker";
|
||||
char poke_name[sizeof(poke_format)];
|
||||
|
||||
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
|
||||
snprintf(poke_name, sizeof(poke_name), poke_format, i);
|
||||
|
||||
snprintf(name, sizeof(name), format, i);
|
||||
|
||||
kbdev->as[i].number = i;
|
||||
kbdev->as[i].fault_addr = 0ULL;
|
||||
kbdev->as[i].bf_data.addr = 0ULL;
|
||||
kbdev->as[i].pf_data.addr = 0ULL;
|
||||
|
||||
kbdev->as[i].pf_wq = alloc_workqueue(name, 0, 1);
|
||||
kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", 0, 1, i);
|
||||
if (!kbdev->as[i].pf_wq)
|
||||
return -EINVAL;
|
||||
|
||||
@@ -94,7 +83,8 @@ static int kbase_device_as_init(struct kbase_device *kbdev, int i)
|
||||
struct hrtimer *poke_timer = &kbdev->as[i].poke_timer;
|
||||
struct work_struct *poke_work = &kbdev->as[i].poke_work;
|
||||
|
||||
kbdev->as[i].poke_wq = alloc_workqueue(poke_name, 0, 1);
|
||||
kbdev->as[i].poke_wq =
|
||||
alloc_workqueue("mali_mmu%d_poker", 0, 1, i);
|
||||
if (!kbdev->as[i].poke_wq) {
|
||||
destroy_workqueue(kbdev->as[i].pf_wq);
|
||||
return -EINVAL;
|
||||
@@ -148,7 +138,7 @@ static void kbase_device_all_as_term(struct kbase_device *kbdev)
|
||||
|
||||
int kbase_device_init(struct kbase_device * const kbdev)
|
||||
{
|
||||
int i, err;
|
||||
int err;
|
||||
#ifdef CONFIG_ARM64
|
||||
struct device_node *np = NULL;
|
||||
#endif /* CONFIG_ARM64 */
|
||||
@@ -223,19 +213,7 @@ int kbase_device_init(struct kbase_device * const kbdev)
|
||||
if (err)
|
||||
goto term_as;
|
||||
|
||||
mutex_init(&kbdev->cacheclean_lock);
|
||||
|
||||
#ifdef CONFIG_MALI_BIFROST_TRACE_TIMELINE
|
||||
for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
|
||||
kbdev->timeline.slot_atoms_submitted[i] = 0;
|
||||
|
||||
for (i = 0; i <= KBASEP_TIMELINE_PM_EVENT_LAST; ++i)
|
||||
atomic_set(&kbdev->timeline.pm_event_uid[i], 0);
|
||||
#endif /* CONFIG_MALI_BIFROST_TRACE_TIMELINE */
|
||||
|
||||
/* fbdump profiling controls set to 0 - fbdump not enabled until changed by gator */
|
||||
for (i = 0; i < FBDUMP_CONTROL_MAX; i++)
|
||||
kbdev->kbase_profiling_controls[i] = 0;
|
||||
init_waitqueue_head(&kbdev->cache_clean_wait);
|
||||
|
||||
kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev);
|
||||
|
||||
@@ -254,6 +232,9 @@ int kbase_device_init(struct kbase_device * const kbdev)
|
||||
else
|
||||
kbdev->mmu_mode = kbase_mmu_mode_get_lpae();
|
||||
|
||||
mutex_init(&kbdev->kctx_list_lock);
|
||||
INIT_LIST_HEAD(&kbdev->kctx_list);
|
||||
|
||||
return 0;
|
||||
term_trace:
|
||||
kbasep_trace_term(kbdev);
|
||||
@@ -269,6 +250,8 @@ void kbase_device_term(struct kbase_device *kbdev)
|
||||
{
|
||||
KBASE_DEBUG_ASSERT(kbdev);
|
||||
|
||||
WARN_ON(!list_empty(&kbdev->kctx_list));
|
||||
|
||||
#if KBASE_TRACE_ENABLE
|
||||
kbase_debug_assert_register_hook(NULL, NULL);
|
||||
#endif
|
||||
@@ -285,91 +268,6 @@ void kbase_device_free(struct kbase_device *kbdev)
|
||||
kfree(kbdev);
|
||||
}
|
||||
|
||||
int kbase_device_trace_buffer_install(
|
||||
struct kbase_context *kctx, u32 *tb, size_t size)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
KBASE_DEBUG_ASSERT(kctx);
|
||||
KBASE_DEBUG_ASSERT(tb);
|
||||
|
||||
/* Interface uses 16-bit value to track last accessed entry. Each entry
|
||||
* is composed of two 32-bit words.
|
||||
* This limits the size that can be handled without an overflow. */
|
||||
if (0xFFFF * (2 * sizeof(u32)) < size)
|
||||
return -EINVAL;
|
||||
|
||||
/* set up the header */
|
||||
/* magic number in the first 4 bytes */
|
||||
tb[0] = TRACE_BUFFER_HEADER_SPECIAL;
|
||||
/* Store (write offset = 0, wrap counter = 0, transaction active = no)
|
||||
* write offset 0 means never written.
|
||||
* Offsets 1 to (wrap_offset - 1) used to store values when trace started
|
||||
*/
|
||||
tb[1] = 0;
|
||||
|
||||
/* install trace buffer */
|
||||
spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
|
||||
kctx->jctx.tb_wrap_offset = size / 8;
|
||||
kctx->jctx.tb = tb;
|
||||
spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
KBASE_DEBUG_ASSERT(kctx);
|
||||
spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
|
||||
kctx->jctx.tb = NULL;
|
||||
kctx->jctx.tb_wrap_offset = 0;
|
||||
spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
|
||||
}
|
||||
|
||||
void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
|
||||
if (kctx->jctx.tb) {
|
||||
u16 wrap_count;
|
||||
u16 write_offset;
|
||||
u32 *tb = kctx->jctx.tb;
|
||||
u32 header_word;
|
||||
|
||||
header_word = tb[1];
|
||||
KBASE_DEBUG_ASSERT(0 == (header_word & 0x1));
|
||||
|
||||
wrap_count = (header_word >> 1) & 0x7FFF;
|
||||
write_offset = (header_word >> 16) & 0xFFFF;
|
||||
|
||||
/* mark as transaction in progress */
|
||||
tb[1] |= 0x1;
|
||||
mb();
|
||||
|
||||
/* calculate new offset */
|
||||
write_offset++;
|
||||
if (write_offset == kctx->jctx.tb_wrap_offset) {
|
||||
/* wrap */
|
||||
write_offset = 1;
|
||||
wrap_count++;
|
||||
wrap_count &= 0x7FFF; /* 15bit wrap counter */
|
||||
}
|
||||
|
||||
/* store the trace entry at the selected offset */
|
||||
tb[write_offset * 2 + 0] = (reg_offset & ~0x3) | ((type == REG_WRITE) ? 0x1 : 0x0);
|
||||
tb[write_offset * 2 + 1] = reg_value;
|
||||
mb();
|
||||
|
||||
/* new header word */
|
||||
header_word = (write_offset << 16) | (wrap_count << 1) | 0x0; /* transaction complete */
|
||||
tb[1] = header_word;
|
||||
}
|
||||
spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Device trace functions
|
||||
*/
|
||||
@@ -595,6 +493,7 @@ static int kbasep_trace_debugfs_open(struct inode *inode, struct file *file)
|
||||
}
|
||||
|
||||
static const struct file_operations kbasep_trace_debugfs_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = kbasep_trace_debugfs_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
@@ -636,39 +535,3 @@ void kbasep_trace_dump(struct kbase_device *kbdev)
|
||||
CSTD_UNUSED(kbdev);
|
||||
}
|
||||
#endif /* KBASE_TRACE_ENABLE */
|
||||
|
||||
void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value)
|
||||
{
|
||||
switch (control) {
|
||||
case FBDUMP_CONTROL_ENABLE:
|
||||
/* fall through */
|
||||
case FBDUMP_CONTROL_RATE:
|
||||
/* fall through */
|
||||
case SW_COUNTER_ENABLE:
|
||||
/* fall through */
|
||||
case FBDUMP_CONTROL_RESIZE_FACTOR:
|
||||
kbdev->kbase_profiling_controls[control] = value;
|
||||
break;
|
||||
default:
|
||||
dev_err(kbdev->dev, "Profiling control %d not found\n", control);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Called by gator to control the production of
|
||||
* profiling information at runtime
|
||||
* */
|
||||
|
||||
void _mali_profiling_control(u32 action, u32 value)
|
||||
{
|
||||
struct kbase_device *kbdev = NULL;
|
||||
|
||||
/* find the first i.e. call with -1 */
|
||||
kbdev = kbase_find_device(-1);
|
||||
|
||||
if (NULL != kbdev)
|
||||
kbase_set_profiling_control(kbdev, action, value);
|
||||
}
|
||||
KBASE_EXPORT_SYMBOL(_mali_profiling_control);
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2010-2016,2018-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -24,11 +24,12 @@
|
||||
|
||||
#include <mali_kbase.h>
|
||||
#include <mali_kbase_debug.h>
|
||||
#include <mali_kbase_tlstream.h>
|
||||
#include <mali_kbase_tracepoints.h>
|
||||
|
||||
static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom)
|
||||
{
|
||||
struct base_jd_udata data;
|
||||
struct kbase_device *kbdev;
|
||||
|
||||
lockdep_assert_held(&kctx->jctx.lock);
|
||||
|
||||
@@ -36,12 +37,11 @@ static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, stru
|
||||
KBASE_DEBUG_ASSERT(katom != NULL);
|
||||
KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
|
||||
|
||||
kbdev = kctx->kbdev;
|
||||
data = katom->udata;
|
||||
|
||||
KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(1, &kctx->timeline.jd_atoms_in_flight));
|
||||
|
||||
KBASE_TLSTREAM_TL_NRET_ATOM_CTX(katom, kctx);
|
||||
KBASE_TLSTREAM_TL_DEL_ATOM(katom);
|
||||
KBASE_TLSTREAM_TL_NRET_ATOM_CTX(kbdev, katom, kctx);
|
||||
KBASE_TLSTREAM_TL_DEL_ATOM(kbdev, katom);
|
||||
|
||||
katom->status = KBASE_JD_ATOM_STATE_UNUSED;
|
||||
|
||||
@@ -172,6 +172,8 @@ static int kbase_event_coalesce(struct kbase_context *kctx)
|
||||
|
||||
void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom)
|
||||
{
|
||||
struct kbase_device *kbdev = ctx->kbdev;
|
||||
|
||||
if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) {
|
||||
if (atom->event_code == BASE_JD_EVENT_DONE) {
|
||||
/* Don't report the event */
|
||||
@@ -185,7 +187,7 @@ void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom)
|
||||
kbase_event_process_noreport(ctx, atom);
|
||||
return;
|
||||
}
|
||||
KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, TL_ATOM_STATE_POSTED);
|
||||
KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, atom, TL_ATOM_STATE_POSTED);
|
||||
if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) {
|
||||
/* Don't report the event until other event(s) have completed */
|
||||
mutex_lock(&ctx->event_mutex);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -67,7 +67,11 @@ kbase_fence_fence_value_str(struct fence *fence, char *str, int size)
|
||||
kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size)
|
||||
#endif
|
||||
{
|
||||
#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE)
|
||||
snprintf(str, size, "%u", fence->seqno);
|
||||
#else
|
||||
snprintf(str, size, "%llu", fence->seqno);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
|
||||
@@ -177,6 +181,7 @@ kbase_fence_add_callback(struct kbase_jd_atom *katom,
|
||||
kbase_fence_cb->fence = fence;
|
||||
kbase_fence_cb->katom = katom;
|
||||
INIT_LIST_HEAD(&kbase_fence_cb->node);
|
||||
atomic_inc(&katom->dma_fence.dep_count);
|
||||
|
||||
err = dma_fence_add_callback(fence, &kbase_fence_cb->fence_cb,
|
||||
callback);
|
||||
@@ -189,15 +194,16 @@ kbase_fence_add_callback(struct kbase_jd_atom *katom,
|
||||
err = 0;
|
||||
|
||||
kfree(kbase_fence_cb);
|
||||
atomic_dec(&katom->dma_fence.dep_count);
|
||||
} else if (err) {
|
||||
kfree(kbase_fence_cb);
|
||||
atomic_dec(&katom->dma_fence.dep_count);
|
||||
} else {
|
||||
/*
|
||||
* Get reference to fence that will be kept until callback gets
|
||||
* cleaned up in kbase_fence_free_callbacks().
|
||||
*/
|
||||
dma_fence_get(fence);
|
||||
atomic_inc(&katom->dma_fence.dep_count);
|
||||
/* Add callback to katom's list of callbacks */
|
||||
list_add(&kbase_fence_cb->node, &katom->dma_fence.callbacks);
|
||||
}
|
||||
|
||||
@@ -139,15 +139,16 @@ static inline bool kbase_fence_out_is_ours(struct kbase_jd_atom *katom)
|
||||
static inline int kbase_fence_out_signal(struct kbase_jd_atom *katom,
|
||||
int status)
|
||||
{
|
||||
if (status) {
|
||||
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \
|
||||
KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)
|
||||
fence_set_error(katom->dma_fence.fence, status);
|
||||
fence_set_error(katom->dma_fence.fence, status);
|
||||
#elif (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE)
|
||||
if (status < 0)
|
||||
dma_fence_set_error(katom->dma_fence.fence, status);
|
||||
#else
|
||||
katom->dma_fence.fence->status = status;
|
||||
katom->dma_fence.fence->status = status;
|
||||
#endif
|
||||
}
|
||||
return dma_fence_signal(katom->dma_fence.fence);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -27,23 +27,26 @@
|
||||
* and s/w counter reporting. We cannot use the enums in mali_uk_types.h because
|
||||
* they are unknown inside gator.
|
||||
*/
|
||||
|
||||
#ifndef _KBASE_GATOR_H_
|
||||
#define _KBASE_GATOR_H_
|
||||
|
||||
#ifdef CONFIG_MALI_BIFROST_GATOR_SUPPORT
|
||||
#define GATOR_MAKE_EVENT(type, number) (((type) << 24) | ((number) << 16))
|
||||
#include <linux/types.h>
|
||||
|
||||
#define GATOR_JOB_SLOT_START 1
|
||||
#define GATOR_JOB_SLOT_STOP 2
|
||||
#define GATOR_JOB_SLOT_SOFT_STOPPED 3
|
||||
|
||||
void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id);
|
||||
void kbase_trace_mali_pm_status(u32 event, u64 value);
|
||||
void kbase_trace_mali_pm_power_off(u32 event, u64 value);
|
||||
void kbase_trace_mali_pm_power_on(u32 event, u64 value);
|
||||
void kbase_trace_mali_page_fault_insert_pages(int event, u32 value);
|
||||
void kbase_trace_mali_mmu_as_in_use(int event);
|
||||
void kbase_trace_mali_mmu_as_released(int event);
|
||||
void kbase_trace_mali_total_alloc_pages_change(long long int event);
|
||||
#ifdef CONFIG_MALI_BIFROST_GATOR_SUPPORT
|
||||
|
||||
#define GATOR_MAKE_EVENT(type, number) (((type) << 24) | ((number) << 16))
|
||||
|
||||
struct kbase_context;
|
||||
|
||||
void kbase_trace_mali_job_slots_event(u32 dev_id, u32 event, const struct kbase_context *kctx, u8 atom_id);
|
||||
void kbase_trace_mali_pm_status(u32 dev_id, u32 event, u64 value);
|
||||
void kbase_trace_mali_page_fault_insert_pages(u32 dev_id, int event, u32 value);
|
||||
void kbase_trace_mali_total_alloc_pages_change(u32 dev_id, long long int event);
|
||||
|
||||
#endif /* CONFIG_MALI_BIFROST_GATOR_SUPPORT */
|
||||
|
||||
|
||||
@@ -1,343 +0,0 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU licence.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, you can access it online at
|
||||
* http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
*/
|
||||
|
||||
#include "mali_kbase.h"
|
||||
#include "mali_kbase_hw.h"
|
||||
#include "mali_kbase_mem_linux.h"
|
||||
#include "mali_kbase_gator_api.h"
|
||||
#include "mali_kbase_gator_hwcnt_names.h"
|
||||
|
||||
#define MALI_MAX_CORES_PER_GROUP 4
|
||||
#define MALI_MAX_NUM_BLOCKS_PER_GROUP 8
|
||||
#define MALI_COUNTERS_PER_BLOCK 64
|
||||
#define MALI_BYTES_PER_COUNTER 4
|
||||
|
||||
struct kbase_gator_hwcnt_handles {
|
||||
struct kbase_device *kbdev;
|
||||
struct kbase_vinstr_client *vinstr_cli;
|
||||
void *vinstr_buffer;
|
||||
struct work_struct dump_work;
|
||||
int dump_complete;
|
||||
spinlock_t dump_lock;
|
||||
};
|
||||
|
||||
static void dump_worker(struct work_struct *work);
|
||||
|
||||
const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters)
|
||||
{
|
||||
const char * const *hardware_counters;
|
||||
struct kbase_device *kbdev;
|
||||
uint32_t product_id;
|
||||
uint32_t count;
|
||||
|
||||
if (!total_counters)
|
||||
return NULL;
|
||||
|
||||
/* Get the first device - it doesn't matter in this case */
|
||||
kbdev = kbase_find_device(-1);
|
||||
if (!kbdev)
|
||||
return NULL;
|
||||
|
||||
product_id = kbdev->gpu_props.props.core_props.product_id;
|
||||
|
||||
if (GPU_ID_IS_NEW_FORMAT(product_id)) {
|
||||
switch (GPU_ID2_MODEL_MATCH_VALUE(product_id)) {
|
||||
case GPU_ID2_PRODUCT_TMIX:
|
||||
hardware_counters = hardware_counters_mali_tMIx;
|
||||
count = ARRAY_SIZE(hardware_counters_mali_tMIx);
|
||||
break;
|
||||
case GPU_ID2_PRODUCT_THEX:
|
||||
hardware_counters = hardware_counters_mali_tHEx;
|
||||
count = ARRAY_SIZE(hardware_counters_mali_tHEx);
|
||||
break;
|
||||
case GPU_ID2_PRODUCT_TSIX:
|
||||
hardware_counters = hardware_counters_mali_tSIx;
|
||||
count = ARRAY_SIZE(hardware_counters_mali_tSIx);
|
||||
break;
|
||||
case GPU_ID2_PRODUCT_TNOX:
|
||||
hardware_counters = hardware_counters_mali_tNOx;
|
||||
count = ARRAY_SIZE(hardware_counters_mali_tNOx);
|
||||
break;
|
||||
default:
|
||||
hardware_counters = NULL;
|
||||
count = 0;
|
||||
dev_err(kbdev->dev, "Unrecognized product ID: %u\n",
|
||||
product_id);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (product_id) {
|
||||
/* If we are using a Mali-T60x device */
|
||||
case GPU_ID_PI_T60X:
|
||||
hardware_counters = hardware_counters_mali_t60x;
|
||||
count = ARRAY_SIZE(hardware_counters_mali_t60x);
|
||||
break;
|
||||
/* If we are using a Mali-T62x device */
|
||||
case GPU_ID_PI_T62X:
|
||||
hardware_counters = hardware_counters_mali_t62x;
|
||||
count = ARRAY_SIZE(hardware_counters_mali_t62x);
|
||||
break;
|
||||
/* If we are using a Mali-T72x device */
|
||||
case GPU_ID_PI_T72X:
|
||||
hardware_counters = hardware_counters_mali_t72x;
|
||||
count = ARRAY_SIZE(hardware_counters_mali_t72x);
|
||||
break;
|
||||
/* If we are using a Mali-T76x device */
|
||||
case GPU_ID_PI_T76X:
|
||||
hardware_counters = hardware_counters_mali_t76x;
|
||||
count = ARRAY_SIZE(hardware_counters_mali_t76x);
|
||||
break;
|
||||
/* If we are using a Mali-T82x device */
|
||||
case GPU_ID_PI_T82X:
|
||||
hardware_counters = hardware_counters_mali_t82x;
|
||||
count = ARRAY_SIZE(hardware_counters_mali_t82x);
|
||||
break;
|
||||
/* If we are using a Mali-T83x device */
|
||||
case GPU_ID_PI_T83X:
|
||||
hardware_counters = hardware_counters_mali_t83x;
|
||||
count = ARRAY_SIZE(hardware_counters_mali_t83x);
|
||||
break;
|
||||
/* If we are using a Mali-T86x device */
|
||||
case GPU_ID_PI_T86X:
|
||||
hardware_counters = hardware_counters_mali_t86x;
|
||||
count = ARRAY_SIZE(hardware_counters_mali_t86x);
|
||||
break;
|
||||
/* If we are using a Mali-T88x device */
|
||||
case GPU_ID_PI_TFRX:
|
||||
hardware_counters = hardware_counters_mali_t88x;
|
||||
count = ARRAY_SIZE(hardware_counters_mali_t88x);
|
||||
break;
|
||||
default:
|
||||
hardware_counters = NULL;
|
||||
count = 0;
|
||||
dev_err(kbdev->dev, "Unrecognized product ID: %u\n",
|
||||
product_id);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Release the kbdev reference. */
|
||||
kbase_release_device(kbdev);
|
||||
|
||||
*total_counters = count;
|
||||
|
||||
/* If we return a string array take a reference on the module (or fail). */
|
||||
if (hardware_counters && !try_module_get(THIS_MODULE))
|
||||
return NULL;
|
||||
|
||||
return hardware_counters;
|
||||
}
|
||||
KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init_names);
|
||||
|
||||
void kbase_gator_hwcnt_term_names(void)
|
||||
{
|
||||
/* Release the module reference. */
|
||||
module_put(THIS_MODULE);
|
||||
}
|
||||
KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term_names);
|
||||
|
||||
struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info)
|
||||
{
|
||||
struct kbase_gator_hwcnt_handles *hand;
|
||||
struct kbase_ioctl_hwcnt_reader_setup setup;
|
||||
uint32_t dump_size = 0, i = 0;
|
||||
|
||||
if (!in_out_info)
|
||||
return NULL;
|
||||
|
||||
hand = kzalloc(sizeof(*hand), GFP_KERNEL);
|
||||
if (!hand)
|
||||
return NULL;
|
||||
|
||||
INIT_WORK(&hand->dump_work, dump_worker);
|
||||
spin_lock_init(&hand->dump_lock);
|
||||
|
||||
/* Get the first device */
|
||||
hand->kbdev = kbase_find_device(-1);
|
||||
if (!hand->kbdev)
|
||||
goto free_hand;
|
||||
|
||||
dump_size = kbase_vinstr_dump_size(hand->kbdev);
|
||||
hand->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL);
|
||||
if (!hand->vinstr_buffer)
|
||||
goto release_device;
|
||||
in_out_info->kernel_dump_buffer = hand->vinstr_buffer;
|
||||
|
||||
in_out_info->nr_cores = hand->kbdev->gpu_props.num_cores;
|
||||
in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups;
|
||||
in_out_info->gpu_id = hand->kbdev->gpu_props.props.core_props.product_id;
|
||||
|
||||
/* If we are using a v4 device (Mali-T6xx or Mali-T72x) */
|
||||
if (kbase_hw_has_feature(hand->kbdev, BASE_HW_FEATURE_V4)) {
|
||||
uint32_t cg, j;
|
||||
uint64_t core_mask;
|
||||
|
||||
/* There are 8 hardware counters blocks per core group */
|
||||
in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) *
|
||||
MALI_MAX_NUM_BLOCKS_PER_GROUP *
|
||||
in_out_info->nr_core_groups, GFP_KERNEL);
|
||||
|
||||
if (!in_out_info->hwc_layout)
|
||||
goto free_vinstr_buffer;
|
||||
|
||||
dump_size = in_out_info->nr_core_groups *
|
||||
MALI_MAX_NUM_BLOCKS_PER_GROUP *
|
||||
MALI_COUNTERS_PER_BLOCK *
|
||||
MALI_BYTES_PER_COUNTER;
|
||||
|
||||
for (cg = 0; cg < in_out_info->nr_core_groups; cg++) {
|
||||
core_mask = hand->kbdev->gpu_props.props.coherency_info.group[cg].core_mask;
|
||||
|
||||
for (j = 0; j < MALI_MAX_CORES_PER_GROUP; j++) {
|
||||
if (core_mask & (1u << j))
|
||||
in_out_info->hwc_layout[i++] = SHADER_BLOCK;
|
||||
else
|
||||
in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
|
||||
}
|
||||
|
||||
in_out_info->hwc_layout[i++] = TILER_BLOCK;
|
||||
in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;
|
||||
|
||||
in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
|
||||
|
||||
if (0 == cg)
|
||||
in_out_info->hwc_layout[i++] = JM_BLOCK;
|
||||
else
|
||||
in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
|
||||
}
|
||||
/* If we are using any other device */
|
||||
} else {
|
||||
uint32_t nr_l2, nr_sc_bits, j;
|
||||
uint64_t core_mask;
|
||||
|
||||
nr_l2 = hand->kbdev->gpu_props.props.l2_props.num_l2_slices;
|
||||
|
||||
core_mask = hand->kbdev->gpu_props.props.coherency_info.group[0].core_mask;
|
||||
|
||||
nr_sc_bits = fls64(core_mask);
|
||||
|
||||
/* The job manager and tiler sets of counters
|
||||
* are always present */
|
||||
in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc_bits + nr_l2), GFP_KERNEL);
|
||||
|
||||
if (!in_out_info->hwc_layout)
|
||||
goto free_vinstr_buffer;
|
||||
|
||||
dump_size = (2 + nr_sc_bits + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER;
|
||||
|
||||
in_out_info->hwc_layout[i++] = JM_BLOCK;
|
||||
in_out_info->hwc_layout[i++] = TILER_BLOCK;
|
||||
|
||||
for (j = 0; j < nr_l2; j++)
|
||||
in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;
|
||||
|
||||
while (core_mask != 0ull) {
|
||||
if ((core_mask & 1ull) != 0ull)
|
||||
in_out_info->hwc_layout[i++] = SHADER_BLOCK;
|
||||
else
|
||||
in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
|
||||
core_mask >>= 1;
|
||||
}
|
||||
}
|
||||
|
||||
in_out_info->nr_hwc_blocks = i;
|
||||
in_out_info->size = dump_size;
|
||||
|
||||
setup.jm_bm = in_out_info->bitmask[0];
|
||||
setup.tiler_bm = in_out_info->bitmask[1];
|
||||
setup.shader_bm = in_out_info->bitmask[2];
|
||||
setup.mmu_l2_bm = in_out_info->bitmask[3];
|
||||
hand->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(hand->kbdev->vinstr_ctx,
|
||||
&setup, hand->vinstr_buffer);
|
||||
if (!hand->vinstr_cli) {
|
||||
dev_err(hand->kbdev->dev, "Failed to register gator with vinstr core");
|
||||
goto free_layout;
|
||||
}
|
||||
|
||||
return hand;
|
||||
|
||||
free_layout:
|
||||
kfree(in_out_info->hwc_layout);
|
||||
|
||||
free_vinstr_buffer:
|
||||
kfree(hand->vinstr_buffer);
|
||||
|
||||
release_device:
|
||||
kbase_release_device(hand->kbdev);
|
||||
|
||||
free_hand:
|
||||
kfree(hand);
|
||||
return NULL;
|
||||
}
|
||||
KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init);
|
||||
|
||||
void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles)
|
||||
{
|
||||
if (in_out_info)
|
||||
kfree(in_out_info->hwc_layout);
|
||||
|
||||
if (opaque_handles) {
|
||||
cancel_work_sync(&opaque_handles->dump_work);
|
||||
kbase_vinstr_detach_client(opaque_handles->vinstr_cli);
|
||||
kfree(opaque_handles->vinstr_buffer);
|
||||
kbase_release_device(opaque_handles->kbdev);
|
||||
kfree(opaque_handles);
|
||||
}
|
||||
}
|
||||
KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term);
|
||||
|
||||
static void dump_worker(struct work_struct *work)
|
||||
{
|
||||
struct kbase_gator_hwcnt_handles *hand;
|
||||
|
||||
hand = container_of(work, struct kbase_gator_hwcnt_handles, dump_work);
|
||||
if (!kbase_vinstr_hwc_dump(hand->vinstr_cli,
|
||||
BASE_HWCNT_READER_EVENT_MANUAL)) {
|
||||
spin_lock_bh(&hand->dump_lock);
|
||||
hand->dump_complete = 1;
|
||||
spin_unlock_bh(&hand->dump_lock);
|
||||
} else {
|
||||
schedule_work(&hand->dump_work);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t kbase_gator_instr_hwcnt_dump_complete(
|
||||
struct kbase_gator_hwcnt_handles *opaque_handles,
|
||||
uint32_t * const success)
|
||||
{
|
||||
|
||||
if (opaque_handles && success) {
|
||||
*success = opaque_handles->dump_complete;
|
||||
opaque_handles->dump_complete = 0;
|
||||
return *success;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_complete);
|
||||
|
||||
uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles)
|
||||
{
|
||||
if (opaque_handles)
|
||||
schedule_work(&opaque_handles->dump_work);
|
||||
return 0;
|
||||
}
|
||||
KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_irq);
|
||||
@@ -1,224 +0,0 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU licence.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, you can access it online at
|
||||
* http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _KBASE_GATOR_API_H_
|
||||
#define _KBASE_GATOR_API_H_
|
||||
|
||||
/**
|
||||
* @brief This file describes the API used by Gator to fetch hardware counters.
|
||||
*/
|
||||
|
||||
/* This define is used by the gator kernel module compile to select which DDK
|
||||
* API calling convention to use. If not defined (legacy DDK) gator assumes
|
||||
* version 1. The version to DDK release mapping is:
|
||||
* Version 1 API: DDK versions r1px, r2px
|
||||
* Version 2 API: DDK versions r3px, r4px
|
||||
* Version 3 API: DDK version r5p0 and newer
|
||||
*
|
||||
* API Usage
|
||||
* =========
|
||||
*
|
||||
* 1] Call kbase_gator_hwcnt_init_names() to return the list of short counter
|
||||
* names for the GPU present in this device.
|
||||
*
|
||||
* 2] Create a kbase_gator_hwcnt_info structure and set the counter enables for
|
||||
* the counters you want enabled. The enables can all be set for simplicity in
|
||||
* most use cases, but disabling some will let you minimize bandwidth impact.
|
||||
*
|
||||
* 3] Call kbase_gator_hwcnt_init() using the above structure, to create a
|
||||
* counter context. On successful return the DDK will have populated the
|
||||
* structure with a variety of useful information.
|
||||
*
|
||||
* 4] Call kbase_gator_hwcnt_dump_irq() to queue a non-blocking request for a
|
||||
* counter dump. If this returns a non-zero value the request has been queued,
|
||||
* otherwise the driver has been unable to do so (typically because of another
|
||||
* user of the instrumentation exists concurrently).
|
||||
*
|
||||
* 5] Call kbase_gator_hwcnt_dump_complete() to test whether the previously
|
||||
* requested dump has been succesful. If this returns non-zero the counter dump
|
||||
* has resolved, but the value of *success must also be tested as the dump
|
||||
* may have not been successful. If it returns zero the counter dump was
|
||||
* abandoned due to the device being busy (typically because of another
|
||||
* user of the instrumentation exists concurrently).
|
||||
*
|
||||
* 6] Process the counters stored in the buffer pointed to by ...
|
||||
*
|
||||
* kbase_gator_hwcnt_info->kernel_dump_buffer
|
||||
*
|
||||
* In pseudo code you can find all of the counters via this approach:
|
||||
*
|
||||
*
|
||||
* hwcnt_info # pointer to kbase_gator_hwcnt_info structure
|
||||
* hwcnt_name # pointer to name list
|
||||
*
|
||||
* u32 * hwcnt_data = (u32*)hwcnt_info->kernel_dump_buffer
|
||||
*
|
||||
* # Iterate over each 64-counter block in this GPU configuration
|
||||
* for( i = 0; i < hwcnt_info->nr_hwc_blocks; i++) {
|
||||
* hwc_type type = hwcnt_info->hwc_layout[i];
|
||||
*
|
||||
* # Skip reserved type blocks - they contain no counters at all
|
||||
* if( type == RESERVED_BLOCK ) {
|
||||
* continue;
|
||||
* }
|
||||
*
|
||||
* size_t name_offset = type * 64;
|
||||
* size_t data_offset = i * 64;
|
||||
*
|
||||
* # Iterate over the names of the counters in this block type
|
||||
* for( j = 0; j < 64; j++) {
|
||||
* const char * name = hwcnt_name[name_offset+j];
|
||||
*
|
||||
* # Skip empty name strings - there is no counter here
|
||||
* if( name[0] == '\0' ) {
|
||||
* continue;
|
||||
* }
|
||||
*
|
||||
* u32 data = hwcnt_data[data_offset+j];
|
||||
*
|
||||
* printk( "COUNTER: %s DATA: %u\n", name, data );
|
||||
* }
|
||||
* }
|
||||
*
|
||||
*
|
||||
* Note that in most implementations you typically want to either SUM or
|
||||
* AVERAGE multiple instances of the same counter if, for example, you have
|
||||
* multiple shader cores or multiple L2 caches. The most sensible view for
|
||||
* analysis is to AVERAGE shader core counters, but SUM L2 cache and MMU
|
||||
* counters.
|
||||
*
|
||||
* 7] Goto 4, repeating until you want to stop collecting counters.
|
||||
*
|
||||
* 8] Release the dump resources by calling kbase_gator_hwcnt_term().
|
||||
*
|
||||
* 9] Release the name table resources by calling
|
||||
* kbase_gator_hwcnt_term_names(). This function must only be called if
|
||||
* init_names() returned a non-NULL value.
|
||||
**/
|
||||
|
||||
#define MALI_DDK_GATOR_API_VERSION 3
|
||||
|
||||
enum hwc_type {
|
||||
JM_BLOCK = 0,
|
||||
TILER_BLOCK,
|
||||
SHADER_BLOCK,
|
||||
MMU_L2_BLOCK,
|
||||
RESERVED_BLOCK
|
||||
};
|
||||
|
||||
struct kbase_gator_hwcnt_info {
|
||||
/* Passed from Gator to kbase */
|
||||
|
||||
/* the bitmask of enabled hardware counters for each counter block */
|
||||
uint16_t bitmask[4];
|
||||
|
||||
/* Passed from kbase to Gator */
|
||||
|
||||
/* ptr to counter dump memory */
|
||||
void *kernel_dump_buffer;
|
||||
|
||||
/* size of counter dump memory */
|
||||
uint32_t size;
|
||||
|
||||
/* the ID of the Mali device */
|
||||
uint32_t gpu_id;
|
||||
|
||||
/* the number of shader cores in the GPU */
|
||||
uint32_t nr_cores;
|
||||
|
||||
/* the number of core groups */
|
||||
uint32_t nr_core_groups;
|
||||
|
||||
/* the memory layout of the performance counters */
|
||||
enum hwc_type *hwc_layout;
|
||||
|
||||
/* the total number of hardware couter blocks */
|
||||
uint32_t nr_hwc_blocks;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Opaque block of Mali data which Gator needs to return to the API later.
|
||||
*/
|
||||
struct kbase_gator_hwcnt_handles;
|
||||
|
||||
/**
|
||||
* @brief Initialize the resources Gator needs for performance profiling.
|
||||
*
|
||||
* @param in_out_info A pointer to a structure containing the enabled counters passed from Gator and all the Mali
|
||||
* specific information that will be returned to Gator. On entry Gator must have populated the
|
||||
* 'bitmask' field with the counters it wishes to enable for each class of counter block.
|
||||
* Each entry in the array corresponds to a single counter class based on the "hwc_type"
|
||||
* enumeration, and each bit corresponds to an enable for 4 sequential counters (LSB enables
|
||||
* the first 4 counters in the block, and so on). See the GPU counter array as returned by
|
||||
* kbase_gator_hwcnt_get_names() for the index values of each counter for the curernt GPU.
|
||||
*
|
||||
* @return Pointer to an opaque handle block on success, NULL on error.
|
||||
*/
|
||||
extern struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info);
|
||||
|
||||
/**
|
||||
* @brief Free all resources once Gator has finished using performance counters.
|
||||
*
|
||||
* @param in_out_info A pointer to a structure containing the enabled counters passed from Gator and all the
|
||||
* Mali specific information that will be returned to Gator.
|
||||
* @param opaque_handles A wrapper structure for kbase structures.
|
||||
*/
|
||||
extern void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles);
|
||||
|
||||
/**
|
||||
* @brief Poll whether a counter dump is successful.
|
||||
*
|
||||
* @param opaque_handles A wrapper structure for kbase structures.
|
||||
* @param[out] success Non-zero on success, zero on failure.
|
||||
*
|
||||
* @return Zero if the dump is still pending, non-zero if the dump has completed. Note that a
|
||||
* completed dump may not have dumped succesfully, so the caller must test for both
|
||||
* a completed and successful dump before processing counters.
|
||||
*/
|
||||
extern uint32_t kbase_gator_instr_hwcnt_dump_complete(struct kbase_gator_hwcnt_handles *opaque_handles, uint32_t * const success);
|
||||
|
||||
/**
|
||||
* @brief Request the generation of a new counter dump.
|
||||
*
|
||||
* @param opaque_handles A wrapper structure for kbase structures.
|
||||
*
|
||||
* @return Zero if the hardware device is busy and cannot handle the request, non-zero otherwise.
|
||||
*/
|
||||
extern uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles);
|
||||
|
||||
/**
|
||||
* @brief This function is used to fetch the names table based on the Mali device in use.
|
||||
*
|
||||
* @param[out] total_counters The total number of counters short names in the Mali devices' list.
|
||||
*
|
||||
* @return Pointer to an array of strings of length *total_counters.
|
||||
*/
|
||||
extern const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters);
|
||||
|
||||
/**
|
||||
* @brief This function is used to terminate the use of the names table.
|
||||
*
|
||||
* This function must only be called if the initial call to kbase_gator_hwcnt_init_names returned a non-NULL value.
|
||||
*/
|
||||
extern void kbase_gator_hwcnt_term_names(void);
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,296 +0,0 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU licence.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, you can access it online at
|
||||
* http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* This header was autogenerated, it should not be edited.
|
||||
*/
|
||||
|
||||
#ifndef _KBASE_GATOR_HWCNT_NAMES_TGOX_H_
|
||||
#define _KBASE_GATOR_HWCNT_NAMES_TGOX_H_
|
||||
|
||||
static const char * const hardware_counters_mali_tGOx[] = {
|
||||
/* Performance counters for the Job Manager */
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TGOx_MESSAGES_SENT",
|
||||
"TGOx_MESSAGES_RECEIVED",
|
||||
"TGOx_GPU_ACTIVE",
|
||||
"TGOx_IRQ_ACTIVE",
|
||||
"TGOx_JS0_JOBS",
|
||||
"TGOx_JS0_TASKS",
|
||||
"TGOx_JS0_ACTIVE",
|
||||
"",
|
||||
"TGOx_JS0_WAIT_READ",
|
||||
"TGOx_JS0_WAIT_ISSUE",
|
||||
"TGOx_JS0_WAIT_DEPEND",
|
||||
"TGOx_JS0_WAIT_FINISH",
|
||||
"TGOx_JS1_JOBS",
|
||||
"TGOx_JS1_TASKS",
|
||||
"TGOx_JS1_ACTIVE",
|
||||
"",
|
||||
"TGOx_JS1_WAIT_READ",
|
||||
"TGOx_JS1_WAIT_ISSUE",
|
||||
"TGOx_JS1_WAIT_DEPEND",
|
||||
"TGOx_JS1_WAIT_FINISH",
|
||||
"TGOx_JS2_JOBS",
|
||||
"TGOx_JS2_TASKS",
|
||||
"TGOx_JS2_ACTIVE",
|
||||
"",
|
||||
"TGOx_JS2_WAIT_READ",
|
||||
"TGOx_JS2_WAIT_ISSUE",
|
||||
"TGOx_JS2_WAIT_DEPEND",
|
||||
"TGOx_JS2_WAIT_FINISH",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
|
||||
/* Performance counters for the Tiler */
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TGOx_TILER_ACTIVE",
|
||||
"TGOx_JOBS_PROCESSED",
|
||||
"TGOx_TRIANGLES",
|
||||
"TGOx_LINES",
|
||||
"TGOx_POINTS",
|
||||
"TGOx_FRONT_FACING",
|
||||
"TGOx_BACK_FACING",
|
||||
"TGOx_PRIM_VISIBLE",
|
||||
"TGOx_PRIM_CULLED",
|
||||
"TGOx_PRIM_CLIPPED",
|
||||
"TGOx_PRIM_SAT_CULLED",
|
||||
"TGOx_BIN_ALLOC_INIT",
|
||||
"TGOx_BIN_ALLOC_OVERFLOW",
|
||||
"TGOx_BUS_READ",
|
||||
"",
|
||||
"TGOx_BUS_WRITE",
|
||||
"TGOx_LOADING_DESC",
|
||||
"TGOx_IDVS_POS_SHAD_REQ",
|
||||
"TGOx_IDVS_POS_SHAD_WAIT",
|
||||
"TGOx_IDVS_POS_SHAD_STALL",
|
||||
"TGOx_IDVS_POS_FIFO_FULL",
|
||||
"TGOx_PREFETCH_STALL",
|
||||
"TGOx_VCACHE_HIT",
|
||||
"TGOx_VCACHE_MISS",
|
||||
"TGOx_VCACHE_LINE_WAIT",
|
||||
"TGOx_VFETCH_POS_READ_WAIT",
|
||||
"TGOx_VFETCH_VERTEX_WAIT",
|
||||
"TGOx_VFETCH_STALL",
|
||||
"TGOx_PRIMASSY_STALL",
|
||||
"TGOx_BBOX_GEN_STALL",
|
||||
"TGOx_IDVS_VBU_HIT",
|
||||
"TGOx_IDVS_VBU_MISS",
|
||||
"TGOx_IDVS_VBU_LINE_DEALLOCATE",
|
||||
"TGOx_IDVS_VAR_SHAD_REQ",
|
||||
"TGOx_IDVS_VAR_SHAD_STALL",
|
||||
"TGOx_BINNER_STALL",
|
||||
"TGOx_ITER_STALL",
|
||||
"TGOx_COMPRESS_MISS",
|
||||
"TGOx_COMPRESS_STALL",
|
||||
"TGOx_PCACHE_HIT",
|
||||
"TGOx_PCACHE_MISS",
|
||||
"TGOx_PCACHE_MISS_STALL",
|
||||
"TGOx_PCACHE_EVICT_STALL",
|
||||
"TGOx_PMGR_PTR_WR_STALL",
|
||||
"TGOx_PMGR_PTR_RD_STALL",
|
||||
"TGOx_PMGR_CMD_WR_STALL",
|
||||
"TGOx_WRBUF_ACTIVE",
|
||||
"TGOx_WRBUF_HIT",
|
||||
"TGOx_WRBUF_MISS",
|
||||
"TGOx_WRBUF_NO_FREE_LINE_STALL",
|
||||
"TGOx_WRBUF_NO_AXI_ID_STALL",
|
||||
"TGOx_WRBUF_AXI_STALL",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TGOx_UTLB_TRANS",
|
||||
"TGOx_UTLB_TRANS_HIT",
|
||||
"TGOx_UTLB_TRANS_STALL",
|
||||
"TGOx_UTLB_TRANS_MISS_DELAY",
|
||||
"TGOx_UTLB_MMU_REQ",
|
||||
|
||||
/* Performance counters for the Shader Core */
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TGOx_FRAG_ACTIVE",
|
||||
"TGOx_FRAG_PRIMITIVES",
|
||||
"TGOx_FRAG_PRIM_RAST",
|
||||
"TGOx_FRAG_FPK_ACTIVE",
|
||||
"TGOx_FRAG_STARVING",
|
||||
"TGOx_FRAG_WARPS",
|
||||
"TGOx_FRAG_PARTIAL_WARPS",
|
||||
"TGOx_FRAG_QUADS_RAST",
|
||||
"TGOx_FRAG_QUADS_EZS_TEST",
|
||||
"TGOx_FRAG_QUADS_EZS_UPDATE",
|
||||
"TGOx_FRAG_QUADS_EZS_KILL",
|
||||
"TGOx_FRAG_LZS_TEST",
|
||||
"TGOx_FRAG_LZS_KILL",
|
||||
"TGOx_WARP_REG_SIZE_64",
|
||||
"TGOx_FRAG_PTILES",
|
||||
"TGOx_FRAG_TRANS_ELIM",
|
||||
"TGOx_QUAD_FPK_KILLER",
|
||||
"TGOx_FULL_QUAD_WARPS",
|
||||
"TGOx_COMPUTE_ACTIVE",
|
||||
"TGOx_COMPUTE_TASKS",
|
||||
"TGOx_COMPUTE_WARPS",
|
||||
"TGOx_COMPUTE_STARVING",
|
||||
"TGOx_EXEC_CORE_ACTIVE",
|
||||
"TGOx_EXEC_ACTIVE",
|
||||
"TGOx_EXEC_INSTR_COUNT",
|
||||
"TGOx_EXEC_INSTR_DIVERGED",
|
||||
"TGOx_EXEC_INSTR_STARVING",
|
||||
"TGOx_ARITH_INSTR_SINGLE_FMA",
|
||||
"TGOx_ARITH_INSTR_DOUBLE",
|
||||
"TGOx_ARITH_INSTR_MSG",
|
||||
"TGOx_ARITH_INSTR_MSG_ONLY",
|
||||
"TGOx_TEX_MSGI_NUM_QUADS",
|
||||
"TGOx_TEX_DFCH_NUM_PASSES",
|
||||
"TGOx_TEX_DFCH_NUM_PASSES_MISS",
|
||||
"TGOx_TEX_DFCH_NUM_PASSES_MIP_MAP",
|
||||
"TGOx_TEX_TIDX_NUM_SPLIT_MIP_MAP",
|
||||
"TGOx_TEX_TFCH_NUM_LINES_FETCHED",
|
||||
"TGOx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK",
|
||||
"TGOx_TEX_TFCH_NUM_OPERATIONS",
|
||||
"TGOx_TEX_FILT_NUM_OPERATIONS",
|
||||
"TGOx_LS_MEM_READ_FULL",
|
||||
"TGOx_LS_MEM_READ_SHORT",
|
||||
"TGOx_LS_MEM_WRITE_FULL",
|
||||
"TGOx_LS_MEM_WRITE_SHORT",
|
||||
"TGOx_LS_MEM_ATOMIC",
|
||||
"TGOx_VARY_INSTR",
|
||||
"TGOx_VARY_SLOT_32",
|
||||
"TGOx_VARY_SLOT_16",
|
||||
"TGOx_ATTR_INSTR",
|
||||
"TGOx_ARITH_INSTR_FP_MUL",
|
||||
"TGOx_BEATS_RD_FTC",
|
||||
"TGOx_BEATS_RD_FTC_EXT",
|
||||
"TGOx_BEATS_RD_LSC",
|
||||
"TGOx_BEATS_RD_LSC_EXT",
|
||||
"TGOx_BEATS_RD_TEX",
|
||||
"TGOx_BEATS_RD_TEX_EXT",
|
||||
"TGOx_BEATS_RD_OTHER",
|
||||
"TGOx_BEATS_WR_LSC_WB",
|
||||
"TGOx_BEATS_WR_TIB",
|
||||
"TGOx_BEATS_WR_LSC_OTHER",
|
||||
|
||||
/* Performance counters for the Memory System */
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TGOx_MMU_REQUESTS",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TGOx_L2_RD_MSG_IN",
|
||||
"TGOx_L2_RD_MSG_IN_STALL",
|
||||
"TGOx_L2_WR_MSG_IN",
|
||||
"TGOx_L2_WR_MSG_IN_STALL",
|
||||
"TGOx_L2_SNP_MSG_IN",
|
||||
"TGOx_L2_SNP_MSG_IN_STALL",
|
||||
"TGOx_L2_RD_MSG_OUT",
|
||||
"TGOx_L2_RD_MSG_OUT_STALL",
|
||||
"TGOx_L2_WR_MSG_OUT",
|
||||
"TGOx_L2_ANY_LOOKUP",
|
||||
"TGOx_L2_READ_LOOKUP",
|
||||
"TGOx_L2_WRITE_LOOKUP",
|
||||
"TGOx_L2_EXT_SNOOP_LOOKUP",
|
||||
"TGOx_L2_EXT_READ",
|
||||
"TGOx_L2_EXT_READ_NOSNP",
|
||||
"TGOx_L2_EXT_READ_UNIQUE",
|
||||
"TGOx_L2_EXT_READ_BEATS",
|
||||
"TGOx_L2_EXT_AR_STALL",
|
||||
"TGOx_L2_EXT_AR_CNT_Q1",
|
||||
"TGOx_L2_EXT_AR_CNT_Q2",
|
||||
"TGOx_L2_EXT_AR_CNT_Q3",
|
||||
"TGOx_L2_EXT_RRESP_0_127",
|
||||
"TGOx_L2_EXT_RRESP_128_191",
|
||||
"TGOx_L2_EXT_RRESP_192_255",
|
||||
"TGOx_L2_EXT_RRESP_256_319",
|
||||
"TGOx_L2_EXT_RRESP_320_383",
|
||||
"TGOx_L2_EXT_WRITE",
|
||||
"TGOx_L2_EXT_WRITE_NOSNP_FULL",
|
||||
"TGOx_L2_EXT_WRITE_NOSNP_PTL",
|
||||
"TGOx_L2_EXT_WRITE_SNP_FULL",
|
||||
"TGOx_L2_EXT_WRITE_SNP_PTL",
|
||||
"TGOx_L2_EXT_WRITE_BEATS",
|
||||
"TGOx_L2_EXT_W_STALL",
|
||||
"TGOx_L2_EXT_AW_CNT_Q1",
|
||||
"TGOx_L2_EXT_AW_CNT_Q2",
|
||||
"TGOx_L2_EXT_AW_CNT_Q3",
|
||||
"TGOx_L2_EXT_SNOOP",
|
||||
"TGOx_L2_EXT_SNOOP_STALL",
|
||||
"TGOx_L2_EXT_SNOOP_RESP_CLEAN",
|
||||
"TGOx_L2_EXT_SNOOP_RESP_DATA",
|
||||
"TGOx_L2_EXT_SNOOP_INTERNAL",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
};
|
||||
|
||||
#endif /* _KBASE_GATOR_HWCNT_NAMES_TGOX_H_ */
|
||||
@@ -1,296 +0,0 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU licence.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, you can access it online at
|
||||
* http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* This header was autogenerated, it should not be edited.
|
||||
*/
|
||||
|
||||
#ifndef _KBASE_GATOR_HWCNT_NAMES_THEX_H_
|
||||
#define _KBASE_GATOR_HWCNT_NAMES_THEX_H_
|
||||
|
||||
static const char * const hardware_counters_mali_tHEx[] = {
|
||||
/* Performance counters for the Job Manager */
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"THEx_MESSAGES_SENT",
|
||||
"THEx_MESSAGES_RECEIVED",
|
||||
"THEx_GPU_ACTIVE",
|
||||
"THEx_IRQ_ACTIVE",
|
||||
"THEx_JS0_JOBS",
|
||||
"THEx_JS0_TASKS",
|
||||
"THEx_JS0_ACTIVE",
|
||||
"",
|
||||
"THEx_JS0_WAIT_READ",
|
||||
"THEx_JS0_WAIT_ISSUE",
|
||||
"THEx_JS0_WAIT_DEPEND",
|
||||
"THEx_JS0_WAIT_FINISH",
|
||||
"THEx_JS1_JOBS",
|
||||
"THEx_JS1_TASKS",
|
||||
"THEx_JS1_ACTIVE",
|
||||
"",
|
||||
"THEx_JS1_WAIT_READ",
|
||||
"THEx_JS1_WAIT_ISSUE",
|
||||
"THEx_JS1_WAIT_DEPEND",
|
||||
"THEx_JS1_WAIT_FINISH",
|
||||
"THEx_JS2_JOBS",
|
||||
"THEx_JS2_TASKS",
|
||||
"THEx_JS2_ACTIVE",
|
||||
"",
|
||||
"THEx_JS2_WAIT_READ",
|
||||
"THEx_JS2_WAIT_ISSUE",
|
||||
"THEx_JS2_WAIT_DEPEND",
|
||||
"THEx_JS2_WAIT_FINISH",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
|
||||
/* Performance counters for the Tiler */
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"THEx_TILER_ACTIVE",
|
||||
"THEx_JOBS_PROCESSED",
|
||||
"THEx_TRIANGLES",
|
||||
"THEx_LINES",
|
||||
"THEx_POINTS",
|
||||
"THEx_FRONT_FACING",
|
||||
"THEx_BACK_FACING",
|
||||
"THEx_PRIM_VISIBLE",
|
||||
"THEx_PRIM_CULLED",
|
||||
"THEx_PRIM_CLIPPED",
|
||||
"THEx_PRIM_SAT_CULLED",
|
||||
"THEx_BIN_ALLOC_INIT",
|
||||
"THEx_BIN_ALLOC_OVERFLOW",
|
||||
"THEx_BUS_READ",
|
||||
"",
|
||||
"THEx_BUS_WRITE",
|
||||
"THEx_LOADING_DESC",
|
||||
"THEx_IDVS_POS_SHAD_REQ",
|
||||
"THEx_IDVS_POS_SHAD_WAIT",
|
||||
"THEx_IDVS_POS_SHAD_STALL",
|
||||
"THEx_IDVS_POS_FIFO_FULL",
|
||||
"THEx_PREFETCH_STALL",
|
||||
"THEx_VCACHE_HIT",
|
||||
"THEx_VCACHE_MISS",
|
||||
"THEx_VCACHE_LINE_WAIT",
|
||||
"THEx_VFETCH_POS_READ_WAIT",
|
||||
"THEx_VFETCH_VERTEX_WAIT",
|
||||
"THEx_VFETCH_STALL",
|
||||
"THEx_PRIMASSY_STALL",
|
||||
"THEx_BBOX_GEN_STALL",
|
||||
"THEx_IDVS_VBU_HIT",
|
||||
"THEx_IDVS_VBU_MISS",
|
||||
"THEx_IDVS_VBU_LINE_DEALLOCATE",
|
||||
"THEx_IDVS_VAR_SHAD_REQ",
|
||||
"THEx_IDVS_VAR_SHAD_STALL",
|
||||
"THEx_BINNER_STALL",
|
||||
"THEx_ITER_STALL",
|
||||
"THEx_COMPRESS_MISS",
|
||||
"THEx_COMPRESS_STALL",
|
||||
"THEx_PCACHE_HIT",
|
||||
"THEx_PCACHE_MISS",
|
||||
"THEx_PCACHE_MISS_STALL",
|
||||
"THEx_PCACHE_EVICT_STALL",
|
||||
"THEx_PMGR_PTR_WR_STALL",
|
||||
"THEx_PMGR_PTR_RD_STALL",
|
||||
"THEx_PMGR_CMD_WR_STALL",
|
||||
"THEx_WRBUF_ACTIVE",
|
||||
"THEx_WRBUF_HIT",
|
||||
"THEx_WRBUF_MISS",
|
||||
"THEx_WRBUF_NO_FREE_LINE_STALL",
|
||||
"THEx_WRBUF_NO_AXI_ID_STALL",
|
||||
"THEx_WRBUF_AXI_STALL",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"THEx_UTLB_TRANS",
|
||||
"THEx_UTLB_TRANS_HIT",
|
||||
"THEx_UTLB_TRANS_STALL",
|
||||
"THEx_UTLB_TRANS_MISS_DELAY",
|
||||
"THEx_UTLB_MMU_REQ",
|
||||
|
||||
/* Performance counters for the Shader Core */
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"THEx_FRAG_ACTIVE",
|
||||
"THEx_FRAG_PRIMITIVES",
|
||||
"THEx_FRAG_PRIM_RAST",
|
||||
"THEx_FRAG_FPK_ACTIVE",
|
||||
"THEx_FRAG_STARVING",
|
||||
"THEx_FRAG_WARPS",
|
||||
"THEx_FRAG_PARTIAL_WARPS",
|
||||
"THEx_FRAG_QUADS_RAST",
|
||||
"THEx_FRAG_QUADS_EZS_TEST",
|
||||
"THEx_FRAG_QUADS_EZS_UPDATE",
|
||||
"THEx_FRAG_QUADS_EZS_KILL",
|
||||
"THEx_FRAG_LZS_TEST",
|
||||
"THEx_FRAG_LZS_KILL",
|
||||
"",
|
||||
"THEx_FRAG_PTILES",
|
||||
"THEx_FRAG_TRANS_ELIM",
|
||||
"THEx_QUAD_FPK_KILLER",
|
||||
"",
|
||||
"THEx_COMPUTE_ACTIVE",
|
||||
"THEx_COMPUTE_TASKS",
|
||||
"THEx_COMPUTE_WARPS",
|
||||
"THEx_COMPUTE_STARVING",
|
||||
"THEx_EXEC_CORE_ACTIVE",
|
||||
"THEx_EXEC_ACTIVE",
|
||||
"THEx_EXEC_INSTR_COUNT",
|
||||
"THEx_EXEC_INSTR_DIVERGED",
|
||||
"THEx_EXEC_INSTR_STARVING",
|
||||
"THEx_ARITH_INSTR_SINGLE_FMA",
|
||||
"THEx_ARITH_INSTR_DOUBLE",
|
||||
"THEx_ARITH_INSTR_MSG",
|
||||
"THEx_ARITH_INSTR_MSG_ONLY",
|
||||
"THEx_TEX_INSTR",
|
||||
"THEx_TEX_INSTR_MIPMAP",
|
||||
"THEx_TEX_INSTR_COMPRESSED",
|
||||
"THEx_TEX_INSTR_3D",
|
||||
"THEx_TEX_INSTR_TRILINEAR",
|
||||
"THEx_TEX_COORD_ISSUE",
|
||||
"THEx_TEX_COORD_STALL",
|
||||
"THEx_TEX_STARVE_CACHE",
|
||||
"THEx_TEX_STARVE_FILTER",
|
||||
"THEx_LS_MEM_READ_FULL",
|
||||
"THEx_LS_MEM_READ_SHORT",
|
||||
"THEx_LS_MEM_WRITE_FULL",
|
||||
"THEx_LS_MEM_WRITE_SHORT",
|
||||
"THEx_LS_MEM_ATOMIC",
|
||||
"THEx_VARY_INSTR",
|
||||
"THEx_VARY_SLOT_32",
|
||||
"THEx_VARY_SLOT_16",
|
||||
"THEx_ATTR_INSTR",
|
||||
"THEx_ARITH_INSTR_FP_MUL",
|
||||
"THEx_BEATS_RD_FTC",
|
||||
"THEx_BEATS_RD_FTC_EXT",
|
||||
"THEx_BEATS_RD_LSC",
|
||||
"THEx_BEATS_RD_LSC_EXT",
|
||||
"THEx_BEATS_RD_TEX",
|
||||
"THEx_BEATS_RD_TEX_EXT",
|
||||
"THEx_BEATS_RD_OTHER",
|
||||
"THEx_BEATS_WR_LSC",
|
||||
"THEx_BEATS_WR_TIB",
|
||||
"",
|
||||
|
||||
/* Performance counters for the Memory System */
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"THEx_MMU_REQUESTS",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"THEx_L2_RD_MSG_IN",
|
||||
"THEx_L2_RD_MSG_IN_STALL",
|
||||
"THEx_L2_WR_MSG_IN",
|
||||
"THEx_L2_WR_MSG_IN_STALL",
|
||||
"THEx_L2_SNP_MSG_IN",
|
||||
"THEx_L2_SNP_MSG_IN_STALL",
|
||||
"THEx_L2_RD_MSG_OUT",
|
||||
"THEx_L2_RD_MSG_OUT_STALL",
|
||||
"THEx_L2_WR_MSG_OUT",
|
||||
"THEx_L2_ANY_LOOKUP",
|
||||
"THEx_L2_READ_LOOKUP",
|
||||
"THEx_L2_WRITE_LOOKUP",
|
||||
"THEx_L2_EXT_SNOOP_LOOKUP",
|
||||
"THEx_L2_EXT_READ",
|
||||
"THEx_L2_EXT_READ_NOSNP",
|
||||
"THEx_L2_EXT_READ_UNIQUE",
|
||||
"THEx_L2_EXT_READ_BEATS",
|
||||
"THEx_L2_EXT_AR_STALL",
|
||||
"THEx_L2_EXT_AR_CNT_Q1",
|
||||
"THEx_L2_EXT_AR_CNT_Q2",
|
||||
"THEx_L2_EXT_AR_CNT_Q3",
|
||||
"THEx_L2_EXT_RRESP_0_127",
|
||||
"THEx_L2_EXT_RRESP_128_191",
|
||||
"THEx_L2_EXT_RRESP_192_255",
|
||||
"THEx_L2_EXT_RRESP_256_319",
|
||||
"THEx_L2_EXT_RRESP_320_383",
|
||||
"THEx_L2_EXT_WRITE",
|
||||
"THEx_L2_EXT_WRITE_NOSNP_FULL",
|
||||
"THEx_L2_EXT_WRITE_NOSNP_PTL",
|
||||
"THEx_L2_EXT_WRITE_SNP_FULL",
|
||||
"THEx_L2_EXT_WRITE_SNP_PTL",
|
||||
"THEx_L2_EXT_WRITE_BEATS",
|
||||
"THEx_L2_EXT_W_STALL",
|
||||
"THEx_L2_EXT_AW_CNT_Q1",
|
||||
"THEx_L2_EXT_AW_CNT_Q2",
|
||||
"THEx_L2_EXT_AW_CNT_Q3",
|
||||
"THEx_L2_EXT_SNOOP",
|
||||
"THEx_L2_EXT_SNOOP_STALL",
|
||||
"THEx_L2_EXT_SNOOP_RESP_CLEAN",
|
||||
"THEx_L2_EXT_SNOOP_RESP_DATA",
|
||||
"THEx_L2_EXT_SNOOP_INTERNAL",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
};
|
||||
|
||||
#endif /* _KBASE_GATOR_HWCNT_NAMES_THEX_H_ */
|
||||
@@ -1,296 +0,0 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU licence.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, you can access it online at
|
||||
* http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* This header was autogenerated, it should not be edited.
|
||||
*/
|
||||
|
||||
#ifndef _KBASE_GATOR_HWCNT_NAMES_TKAX_H_
|
||||
#define _KBASE_GATOR_HWCNT_NAMES_TKAX_H_
|
||||
|
||||
static const char * const hardware_counters_mali_tKAx[] = {
|
||||
/* Performance counters for the Job Manager */
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TKAx_MESSAGES_SENT",
|
||||
"TKAx_MESSAGES_RECEIVED",
|
||||
"TKAx_GPU_ACTIVE",
|
||||
"TKAx_IRQ_ACTIVE",
|
||||
"TKAx_JS0_JOBS",
|
||||
"TKAx_JS0_TASKS",
|
||||
"TKAx_JS0_ACTIVE",
|
||||
"",
|
||||
"TKAx_JS0_WAIT_READ",
|
||||
"TKAx_JS0_WAIT_ISSUE",
|
||||
"TKAx_JS0_WAIT_DEPEND",
|
||||
"TKAx_JS0_WAIT_FINISH",
|
||||
"TKAx_JS1_JOBS",
|
||||
"TKAx_JS1_TASKS",
|
||||
"TKAx_JS1_ACTIVE",
|
||||
"",
|
||||
"TKAx_JS1_WAIT_READ",
|
||||
"TKAx_JS1_WAIT_ISSUE",
|
||||
"TKAx_JS1_WAIT_DEPEND",
|
||||
"TKAx_JS1_WAIT_FINISH",
|
||||
"TKAx_JS2_JOBS",
|
||||
"TKAx_JS2_TASKS",
|
||||
"TKAx_JS2_ACTIVE",
|
||||
"",
|
||||
"TKAx_JS2_WAIT_READ",
|
||||
"TKAx_JS2_WAIT_ISSUE",
|
||||
"TKAx_JS2_WAIT_DEPEND",
|
||||
"TKAx_JS2_WAIT_FINISH",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
|
||||
/* Performance counters for the Tiler */
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TKAx_TILER_ACTIVE",
|
||||
"TKAx_JOBS_PROCESSED",
|
||||
"TKAx_TRIANGLES",
|
||||
"TKAx_LINES",
|
||||
"TKAx_POINTS",
|
||||
"TKAx_FRONT_FACING",
|
||||
"TKAx_BACK_FACING",
|
||||
"TKAx_PRIM_VISIBLE",
|
||||
"TKAx_PRIM_CULLED",
|
||||
"TKAx_PRIM_CLIPPED",
|
||||
"TKAx_PRIM_SAT_CULLED",
|
||||
"TKAx_BIN_ALLOC_INIT",
|
||||
"TKAx_BIN_ALLOC_OVERFLOW",
|
||||
"TKAx_BUS_READ",
|
||||
"",
|
||||
"TKAx_BUS_WRITE",
|
||||
"TKAx_LOADING_DESC",
|
||||
"TKAx_IDVS_POS_SHAD_REQ",
|
||||
"TKAx_IDVS_POS_SHAD_WAIT",
|
||||
"TKAx_IDVS_POS_SHAD_STALL",
|
||||
"TKAx_IDVS_POS_FIFO_FULL",
|
||||
"TKAx_PREFETCH_STALL",
|
||||
"TKAx_VCACHE_HIT",
|
||||
"TKAx_VCACHE_MISS",
|
||||
"TKAx_VCACHE_LINE_WAIT",
|
||||
"TKAx_VFETCH_POS_READ_WAIT",
|
||||
"TKAx_VFETCH_VERTEX_WAIT",
|
||||
"TKAx_VFETCH_STALL",
|
||||
"TKAx_PRIMASSY_STALL",
|
||||
"TKAx_BBOX_GEN_STALL",
|
||||
"TKAx_IDVS_VBU_HIT",
|
||||
"TKAx_IDVS_VBU_MISS",
|
||||
"TKAx_IDVS_VBU_LINE_DEALLOCATE",
|
||||
"TKAx_IDVS_VAR_SHAD_REQ",
|
||||
"TKAx_IDVS_VAR_SHAD_STALL",
|
||||
"TKAx_BINNER_STALL",
|
||||
"TKAx_ITER_STALL",
|
||||
"TKAx_COMPRESS_MISS",
|
||||
"TKAx_COMPRESS_STALL",
|
||||
"TKAx_PCACHE_HIT",
|
||||
"TKAx_PCACHE_MISS",
|
||||
"TKAx_PCACHE_MISS_STALL",
|
||||
"TKAx_PCACHE_EVICT_STALL",
|
||||
"TKAx_PMGR_PTR_WR_STALL",
|
||||
"TKAx_PMGR_PTR_RD_STALL",
|
||||
"TKAx_PMGR_CMD_WR_STALL",
|
||||
"TKAx_WRBUF_ACTIVE",
|
||||
"TKAx_WRBUF_HIT",
|
||||
"TKAx_WRBUF_MISS",
|
||||
"TKAx_WRBUF_NO_FREE_LINE_STALL",
|
||||
"TKAx_WRBUF_NO_AXI_ID_STALL",
|
||||
"TKAx_WRBUF_AXI_STALL",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TKAx_UTLB_TRANS",
|
||||
"TKAx_UTLB_TRANS_HIT",
|
||||
"TKAx_UTLB_TRANS_STALL",
|
||||
"TKAx_UTLB_TRANS_MISS_DELAY",
|
||||
"TKAx_UTLB_MMU_REQ",
|
||||
|
||||
/* Performance counters for the Shader Core */
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TKAx_FRAG_ACTIVE",
|
||||
"TKAx_FRAG_PRIMITIVES",
|
||||
"TKAx_FRAG_PRIM_RAST",
|
||||
"TKAx_FRAG_FPK_ACTIVE",
|
||||
"TKAx_FRAG_STARVING",
|
||||
"TKAx_FRAG_WARPS",
|
||||
"TKAx_FRAG_PARTIAL_WARPS",
|
||||
"TKAx_FRAG_QUADS_RAST",
|
||||
"TKAx_FRAG_QUADS_EZS_TEST",
|
||||
"TKAx_FRAG_QUADS_EZS_UPDATE",
|
||||
"TKAx_FRAG_QUADS_EZS_KILL",
|
||||
"TKAx_FRAG_LZS_TEST",
|
||||
"TKAx_FRAG_LZS_KILL",
|
||||
"TKAx_WARP_REG_SIZE_64",
|
||||
"TKAx_FRAG_PTILES",
|
||||
"TKAx_FRAG_TRANS_ELIM",
|
||||
"TKAx_QUAD_FPK_KILLER",
|
||||
"TKAx_FULL_QUAD_WARPS",
|
||||
"TKAx_COMPUTE_ACTIVE",
|
||||
"TKAx_COMPUTE_TASKS",
|
||||
"TKAx_COMPUTE_WARPS",
|
||||
"TKAx_COMPUTE_STARVING",
|
||||
"TKAx_EXEC_CORE_ACTIVE",
|
||||
"TKAx_EXEC_ACTIVE",
|
||||
"TKAx_EXEC_INSTR_COUNT",
|
||||
"TKAx_EXEC_INSTR_DIVERGED",
|
||||
"TKAx_EXEC_INSTR_STARVING",
|
||||
"TKAx_ARITH_INSTR_SINGLE_FMA",
|
||||
"TKAx_ARITH_INSTR_DOUBLE",
|
||||
"TKAx_ARITH_INSTR_MSG",
|
||||
"TKAx_ARITH_INSTR_MSG_ONLY",
|
||||
"TKAx_TEX_MSGI_NUM_QUADS",
|
||||
"TKAx_TEX_DFCH_NUM_PASSES",
|
||||
"TKAx_TEX_DFCH_NUM_PASSES_MISS",
|
||||
"TKAx_TEX_DFCH_NUM_PASSES_MIP_MAP",
|
||||
"TKAx_TEX_TIDX_NUM_SPLIT_MIP_MAP",
|
||||
"TKAx_TEX_TFCH_NUM_LINES_FETCHED",
|
||||
"TKAx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK",
|
||||
"TKAx_TEX_TFCH_NUM_OPERATIONS",
|
||||
"TKAx_TEX_FILT_NUM_OPERATIONS",
|
||||
"TKAx_LS_MEM_READ_FULL",
|
||||
"TKAx_LS_MEM_READ_SHORT",
|
||||
"TKAx_LS_MEM_WRITE_FULL",
|
||||
"TKAx_LS_MEM_WRITE_SHORT",
|
||||
"TKAx_LS_MEM_ATOMIC",
|
||||
"TKAx_VARY_INSTR",
|
||||
"TKAx_VARY_SLOT_32",
|
||||
"TKAx_VARY_SLOT_16",
|
||||
"TKAx_ATTR_INSTR",
|
||||
"TKAx_ARITH_INSTR_FP_MUL",
|
||||
"TKAx_BEATS_RD_FTC",
|
||||
"TKAx_BEATS_RD_FTC_EXT",
|
||||
"TKAx_BEATS_RD_LSC",
|
||||
"TKAx_BEATS_RD_LSC_EXT",
|
||||
"TKAx_BEATS_RD_TEX",
|
||||
"TKAx_BEATS_RD_TEX_EXT",
|
||||
"TKAx_BEATS_RD_OTHER",
|
||||
"TKAx_BEATS_WR_LSC_OTHER",
|
||||
"TKAx_BEATS_WR_TIB",
|
||||
"TKAx_BEATS_WR_LSC_WB",
|
||||
|
||||
/* Performance counters for the Memory System */
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TKAx_MMU_REQUESTS",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TKAx_L2_RD_MSG_IN",
|
||||
"TKAx_L2_RD_MSG_IN_STALL",
|
||||
"TKAx_L2_WR_MSG_IN",
|
||||
"TKAx_L2_WR_MSG_IN_STALL",
|
||||
"TKAx_L2_SNP_MSG_IN",
|
||||
"TKAx_L2_SNP_MSG_IN_STALL",
|
||||
"TKAx_L2_RD_MSG_OUT",
|
||||
"TKAx_L2_RD_MSG_OUT_STALL",
|
||||
"TKAx_L2_WR_MSG_OUT",
|
||||
"TKAx_L2_ANY_LOOKUP",
|
||||
"TKAx_L2_READ_LOOKUP",
|
||||
"TKAx_L2_WRITE_LOOKUP",
|
||||
"TKAx_L2_EXT_SNOOP_LOOKUP",
|
||||
"TKAx_L2_EXT_READ",
|
||||
"TKAx_L2_EXT_READ_NOSNP",
|
||||
"TKAx_L2_EXT_READ_UNIQUE",
|
||||
"TKAx_L2_EXT_READ_BEATS",
|
||||
"TKAx_L2_EXT_AR_STALL",
|
||||
"TKAx_L2_EXT_AR_CNT_Q1",
|
||||
"TKAx_L2_EXT_AR_CNT_Q2",
|
||||
"TKAx_L2_EXT_AR_CNT_Q3",
|
||||
"TKAx_L2_EXT_RRESP_0_127",
|
||||
"TKAx_L2_EXT_RRESP_128_191",
|
||||
"TKAx_L2_EXT_RRESP_192_255",
|
||||
"TKAx_L2_EXT_RRESP_256_319",
|
||||
"TKAx_L2_EXT_RRESP_320_383",
|
||||
"TKAx_L2_EXT_WRITE",
|
||||
"TKAx_L2_EXT_WRITE_NOSNP_FULL",
|
||||
"TKAx_L2_EXT_WRITE_NOSNP_PTL",
|
||||
"TKAx_L2_EXT_WRITE_SNP_FULL",
|
||||
"TKAx_L2_EXT_WRITE_SNP_PTL",
|
||||
"TKAx_L2_EXT_WRITE_BEATS",
|
||||
"TKAx_L2_EXT_W_STALL",
|
||||
"TKAx_L2_EXT_AW_CNT_Q1",
|
||||
"TKAx_L2_EXT_AW_CNT_Q2",
|
||||
"TKAx_L2_EXT_AW_CNT_Q3",
|
||||
"TKAx_L2_EXT_SNOOP",
|
||||
"TKAx_L2_EXT_SNOOP_STALL",
|
||||
"TKAx_L2_EXT_SNOOP_RESP_CLEAN",
|
||||
"TKAx_L2_EXT_SNOOP_RESP_DATA",
|
||||
"TKAx_L2_EXT_SNOOP_INTERNAL",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
};
|
||||
|
||||
#endif /* _KBASE_GATOR_HWCNT_NAMES_TKAX_H_ */
|
||||
@@ -1,296 +0,0 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU licence.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, you can access it online at
|
||||
* http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* This header was autogenerated, it should not be edited.
|
||||
*/
|
||||
|
||||
#ifndef _KBASE_GATOR_HWCNT_NAMES_TMIX_H_
|
||||
#define _KBASE_GATOR_HWCNT_NAMES_TMIX_H_
|
||||
|
||||
static const char * const hardware_counters_mali_tMIx[] = {
|
||||
/* Performance counters for the Job Manager */
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TMIx_MESSAGES_SENT",
|
||||
"TMIx_MESSAGES_RECEIVED",
|
||||
"TMIx_GPU_ACTIVE",
|
||||
"TMIx_IRQ_ACTIVE",
|
||||
"TMIx_JS0_JOBS",
|
||||
"TMIx_JS0_TASKS",
|
||||
"TMIx_JS0_ACTIVE",
|
||||
"",
|
||||
"TMIx_JS0_WAIT_READ",
|
||||
"TMIx_JS0_WAIT_ISSUE",
|
||||
"TMIx_JS0_WAIT_DEPEND",
|
||||
"TMIx_JS0_WAIT_FINISH",
|
||||
"TMIx_JS1_JOBS",
|
||||
"TMIx_JS1_TASKS",
|
||||
"TMIx_JS1_ACTIVE",
|
||||
"",
|
||||
"TMIx_JS1_WAIT_READ",
|
||||
"TMIx_JS1_WAIT_ISSUE",
|
||||
"TMIx_JS1_WAIT_DEPEND",
|
||||
"TMIx_JS1_WAIT_FINISH",
|
||||
"TMIx_JS2_JOBS",
|
||||
"TMIx_JS2_TASKS",
|
||||
"TMIx_JS2_ACTIVE",
|
||||
"",
|
||||
"TMIx_JS2_WAIT_READ",
|
||||
"TMIx_JS2_WAIT_ISSUE",
|
||||
"TMIx_JS2_WAIT_DEPEND",
|
||||
"TMIx_JS2_WAIT_FINISH",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
|
||||
/* Performance counters for the Tiler */
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TMIx_TILER_ACTIVE",
|
||||
"TMIx_JOBS_PROCESSED",
|
||||
"TMIx_TRIANGLES",
|
||||
"TMIx_LINES",
|
||||
"TMIx_POINTS",
|
||||
"TMIx_FRONT_FACING",
|
||||
"TMIx_BACK_FACING",
|
||||
"TMIx_PRIM_VISIBLE",
|
||||
"TMIx_PRIM_CULLED",
|
||||
"TMIx_PRIM_CLIPPED",
|
||||
"TMIx_PRIM_SAT_CULLED",
|
||||
"TMIx_BIN_ALLOC_INIT",
|
||||
"TMIx_BIN_ALLOC_OVERFLOW",
|
||||
"TMIx_BUS_READ",
|
||||
"",
|
||||
"TMIx_BUS_WRITE",
|
||||
"TMIx_LOADING_DESC",
|
||||
"TMIx_IDVS_POS_SHAD_REQ",
|
||||
"TMIx_IDVS_POS_SHAD_WAIT",
|
||||
"TMIx_IDVS_POS_SHAD_STALL",
|
||||
"TMIx_IDVS_POS_FIFO_FULL",
|
||||
"TMIx_PREFETCH_STALL",
|
||||
"TMIx_VCACHE_HIT",
|
||||
"TMIx_VCACHE_MISS",
|
||||
"TMIx_VCACHE_LINE_WAIT",
|
||||
"TMIx_VFETCH_POS_READ_WAIT",
|
||||
"TMIx_VFETCH_VERTEX_WAIT",
|
||||
"TMIx_VFETCH_STALL",
|
||||
"TMIx_PRIMASSY_STALL",
|
||||
"TMIx_BBOX_GEN_STALL",
|
||||
"TMIx_IDVS_VBU_HIT",
|
||||
"TMIx_IDVS_VBU_MISS",
|
||||
"TMIx_IDVS_VBU_LINE_DEALLOCATE",
|
||||
"TMIx_IDVS_VAR_SHAD_REQ",
|
||||
"TMIx_IDVS_VAR_SHAD_STALL",
|
||||
"TMIx_BINNER_STALL",
|
||||
"TMIx_ITER_STALL",
|
||||
"TMIx_COMPRESS_MISS",
|
||||
"TMIx_COMPRESS_STALL",
|
||||
"TMIx_PCACHE_HIT",
|
||||
"TMIx_PCACHE_MISS",
|
||||
"TMIx_PCACHE_MISS_STALL",
|
||||
"TMIx_PCACHE_EVICT_STALL",
|
||||
"TMIx_PMGR_PTR_WR_STALL",
|
||||
"TMIx_PMGR_PTR_RD_STALL",
|
||||
"TMIx_PMGR_CMD_WR_STALL",
|
||||
"TMIx_WRBUF_ACTIVE",
|
||||
"TMIx_WRBUF_HIT",
|
||||
"TMIx_WRBUF_MISS",
|
||||
"TMIx_WRBUF_NO_FREE_LINE_STALL",
|
||||
"TMIx_WRBUF_NO_AXI_ID_STALL",
|
||||
"TMIx_WRBUF_AXI_STALL",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TMIx_UTLB_TRANS",
|
||||
"TMIx_UTLB_TRANS_HIT",
|
||||
"TMIx_UTLB_TRANS_STALL",
|
||||
"TMIx_UTLB_TRANS_MISS_DELAY",
|
||||
"TMIx_UTLB_MMU_REQ",
|
||||
|
||||
/* Performance counters for the Shader Core */
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TMIx_FRAG_ACTIVE",
|
||||
"TMIx_FRAG_PRIMITIVES",
|
||||
"TMIx_FRAG_PRIM_RAST",
|
||||
"TMIx_FRAG_FPK_ACTIVE",
|
||||
"TMIx_FRAG_STARVING",
|
||||
"TMIx_FRAG_WARPS",
|
||||
"TMIx_FRAG_PARTIAL_WARPS",
|
||||
"TMIx_FRAG_QUADS_RAST",
|
||||
"TMIx_FRAG_QUADS_EZS_TEST",
|
||||
"TMIx_FRAG_QUADS_EZS_UPDATE",
|
||||
"TMIx_FRAG_QUADS_EZS_KILL",
|
||||
"TMIx_FRAG_LZS_TEST",
|
||||
"TMIx_FRAG_LZS_KILL",
|
||||
"",
|
||||
"TMIx_FRAG_PTILES",
|
||||
"TMIx_FRAG_TRANS_ELIM",
|
||||
"TMIx_QUAD_FPK_KILLER",
|
||||
"",
|
||||
"TMIx_COMPUTE_ACTIVE",
|
||||
"TMIx_COMPUTE_TASKS",
|
||||
"TMIx_COMPUTE_WARPS",
|
||||
"TMIx_COMPUTE_STARVING",
|
||||
"TMIx_EXEC_CORE_ACTIVE",
|
||||
"TMIx_EXEC_ACTIVE",
|
||||
"TMIx_EXEC_INSTR_COUNT",
|
||||
"TMIx_EXEC_INSTR_DIVERGED",
|
||||
"TMIx_EXEC_INSTR_STARVING",
|
||||
"TMIx_ARITH_INSTR_SINGLE_FMA",
|
||||
"TMIx_ARITH_INSTR_DOUBLE",
|
||||
"TMIx_ARITH_INSTR_MSG",
|
||||
"TMIx_ARITH_INSTR_MSG_ONLY",
|
||||
"TMIx_TEX_INSTR",
|
||||
"TMIx_TEX_INSTR_MIPMAP",
|
||||
"TMIx_TEX_INSTR_COMPRESSED",
|
||||
"TMIx_TEX_INSTR_3D",
|
||||
"TMIx_TEX_INSTR_TRILINEAR",
|
||||
"TMIx_TEX_COORD_ISSUE",
|
||||
"TMIx_TEX_COORD_STALL",
|
||||
"TMIx_TEX_STARVE_CACHE",
|
||||
"TMIx_TEX_STARVE_FILTER",
|
||||
"TMIx_LS_MEM_READ_FULL",
|
||||
"TMIx_LS_MEM_READ_SHORT",
|
||||
"TMIx_LS_MEM_WRITE_FULL",
|
||||
"TMIx_LS_MEM_WRITE_SHORT",
|
||||
"TMIx_LS_MEM_ATOMIC",
|
||||
"TMIx_VARY_INSTR",
|
||||
"TMIx_VARY_SLOT_32",
|
||||
"TMIx_VARY_SLOT_16",
|
||||
"TMIx_ATTR_INSTR",
|
||||
"TMIx_ARITH_INSTR_FP_MUL",
|
||||
"TMIx_BEATS_RD_FTC",
|
||||
"TMIx_BEATS_RD_FTC_EXT",
|
||||
"TMIx_BEATS_RD_LSC",
|
||||
"TMIx_BEATS_RD_LSC_EXT",
|
||||
"TMIx_BEATS_RD_TEX",
|
||||
"TMIx_BEATS_RD_TEX_EXT",
|
||||
"TMIx_BEATS_RD_OTHER",
|
||||
"TMIx_BEATS_WR_LSC",
|
||||
"TMIx_BEATS_WR_TIB",
|
||||
"",
|
||||
|
||||
/* Performance counters for the Memory System */
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TMIx_MMU_REQUESTS",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TMIx_L2_RD_MSG_IN",
|
||||
"TMIx_L2_RD_MSG_IN_STALL",
|
||||
"TMIx_L2_WR_MSG_IN",
|
||||
"TMIx_L2_WR_MSG_IN_STALL",
|
||||
"TMIx_L2_SNP_MSG_IN",
|
||||
"TMIx_L2_SNP_MSG_IN_STALL",
|
||||
"TMIx_L2_RD_MSG_OUT",
|
||||
"TMIx_L2_RD_MSG_OUT_STALL",
|
||||
"TMIx_L2_WR_MSG_OUT",
|
||||
"TMIx_L2_ANY_LOOKUP",
|
||||
"TMIx_L2_READ_LOOKUP",
|
||||
"TMIx_L2_WRITE_LOOKUP",
|
||||
"TMIx_L2_EXT_SNOOP_LOOKUP",
|
||||
"TMIx_L2_EXT_READ",
|
||||
"TMIx_L2_EXT_READ_NOSNP",
|
||||
"TMIx_L2_EXT_READ_UNIQUE",
|
||||
"TMIx_L2_EXT_READ_BEATS",
|
||||
"TMIx_L2_EXT_AR_STALL",
|
||||
"TMIx_L2_EXT_AR_CNT_Q1",
|
||||
"TMIx_L2_EXT_AR_CNT_Q2",
|
||||
"TMIx_L2_EXT_AR_CNT_Q3",
|
||||
"TMIx_L2_EXT_RRESP_0_127",
|
||||
"TMIx_L2_EXT_RRESP_128_191",
|
||||
"TMIx_L2_EXT_RRESP_192_255",
|
||||
"TMIx_L2_EXT_RRESP_256_319",
|
||||
"TMIx_L2_EXT_RRESP_320_383",
|
||||
"TMIx_L2_EXT_WRITE",
|
||||
"TMIx_L2_EXT_WRITE_NOSNP_FULL",
|
||||
"TMIx_L2_EXT_WRITE_NOSNP_PTL",
|
||||
"TMIx_L2_EXT_WRITE_SNP_FULL",
|
||||
"TMIx_L2_EXT_WRITE_SNP_PTL",
|
||||
"TMIx_L2_EXT_WRITE_BEATS",
|
||||
"TMIx_L2_EXT_W_STALL",
|
||||
"TMIx_L2_EXT_AW_CNT_Q1",
|
||||
"TMIx_L2_EXT_AW_CNT_Q2",
|
||||
"TMIx_L2_EXT_AW_CNT_Q3",
|
||||
"TMIx_L2_EXT_SNOOP",
|
||||
"TMIx_L2_EXT_SNOOP_STALL",
|
||||
"TMIx_L2_EXT_SNOOP_RESP_CLEAN",
|
||||
"TMIx_L2_EXT_SNOOP_RESP_DATA",
|
||||
"TMIx_L2_EXT_SNOOP_INTERNAL",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
};
|
||||
|
||||
#endif /* _KBASE_GATOR_HWCNT_NAMES_TMIX_H_ */
|
||||
@@ -1,296 +0,0 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU licence.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, you can access it online at
|
||||
* http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* This header was autogenerated, it should not be edited.
|
||||
*/
|
||||
|
||||
#ifndef _KBASE_GATOR_HWCNT_NAMES_TNOX_H_
|
||||
#define _KBASE_GATOR_HWCNT_NAMES_TNOX_H_
|
||||
|
||||
static const char * const hardware_counters_mali_tNOx[] = {
|
||||
/* Performance counters for the Job Manager */
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TNOx_MESSAGES_SENT",
|
||||
"TNOx_MESSAGES_RECEIVED",
|
||||
"TNOx_GPU_ACTIVE",
|
||||
"TNOx_IRQ_ACTIVE",
|
||||
"TNOx_JS0_JOBS",
|
||||
"TNOx_JS0_TASKS",
|
||||
"TNOx_JS0_ACTIVE",
|
||||
"",
|
||||
"TNOx_JS0_WAIT_READ",
|
||||
"TNOx_JS0_WAIT_ISSUE",
|
||||
"TNOx_JS0_WAIT_DEPEND",
|
||||
"TNOx_JS0_WAIT_FINISH",
|
||||
"TNOx_JS1_JOBS",
|
||||
"TNOx_JS1_TASKS",
|
||||
"TNOx_JS1_ACTIVE",
|
||||
"",
|
||||
"TNOx_JS1_WAIT_READ",
|
||||
"TNOx_JS1_WAIT_ISSUE",
|
||||
"TNOx_JS1_WAIT_DEPEND",
|
||||
"TNOx_JS1_WAIT_FINISH",
|
||||
"TNOx_JS2_JOBS",
|
||||
"TNOx_JS2_TASKS",
|
||||
"TNOx_JS2_ACTIVE",
|
||||
"",
|
||||
"TNOx_JS2_WAIT_READ",
|
||||
"TNOx_JS2_WAIT_ISSUE",
|
||||
"TNOx_JS2_WAIT_DEPEND",
|
||||
"TNOx_JS2_WAIT_FINISH",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
|
||||
/* Performance counters for the Tiler */
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TNOx_TILER_ACTIVE",
|
||||
"TNOx_JOBS_PROCESSED",
|
||||
"TNOx_TRIANGLES",
|
||||
"TNOx_LINES",
|
||||
"TNOx_POINTS",
|
||||
"TNOx_FRONT_FACING",
|
||||
"TNOx_BACK_FACING",
|
||||
"TNOx_PRIM_VISIBLE",
|
||||
"TNOx_PRIM_CULLED",
|
||||
"TNOx_PRIM_CLIPPED",
|
||||
"TNOx_PRIM_SAT_CULLED",
|
||||
"TNOx_BIN_ALLOC_INIT",
|
||||
"TNOx_BIN_ALLOC_OVERFLOW",
|
||||
"TNOx_BUS_READ",
|
||||
"",
|
||||
"TNOx_BUS_WRITE",
|
||||
"TNOx_LOADING_DESC",
|
||||
"TNOx_IDVS_POS_SHAD_REQ",
|
||||
"TNOx_IDVS_POS_SHAD_WAIT",
|
||||
"TNOx_IDVS_POS_SHAD_STALL",
|
||||
"TNOx_IDVS_POS_FIFO_FULL",
|
||||
"TNOx_PREFETCH_STALL",
|
||||
"TNOx_VCACHE_HIT",
|
||||
"TNOx_VCACHE_MISS",
|
||||
"TNOx_VCACHE_LINE_WAIT",
|
||||
"TNOx_VFETCH_POS_READ_WAIT",
|
||||
"TNOx_VFETCH_VERTEX_WAIT",
|
||||
"TNOx_VFETCH_STALL",
|
||||
"TNOx_PRIMASSY_STALL",
|
||||
"TNOx_BBOX_GEN_STALL",
|
||||
"TNOx_IDVS_VBU_HIT",
|
||||
"TNOx_IDVS_VBU_MISS",
|
||||
"TNOx_IDVS_VBU_LINE_DEALLOCATE",
|
||||
"TNOx_IDVS_VAR_SHAD_REQ",
|
||||
"TNOx_IDVS_VAR_SHAD_STALL",
|
||||
"TNOx_BINNER_STALL",
|
||||
"TNOx_ITER_STALL",
|
||||
"TNOx_COMPRESS_MISS",
|
||||
"TNOx_COMPRESS_STALL",
|
||||
"TNOx_PCACHE_HIT",
|
||||
"TNOx_PCACHE_MISS",
|
||||
"TNOx_PCACHE_MISS_STALL",
|
||||
"TNOx_PCACHE_EVICT_STALL",
|
||||
"TNOx_PMGR_PTR_WR_STALL",
|
||||
"TNOx_PMGR_PTR_RD_STALL",
|
||||
"TNOx_PMGR_CMD_WR_STALL",
|
||||
"TNOx_WRBUF_ACTIVE",
|
||||
"TNOx_WRBUF_HIT",
|
||||
"TNOx_WRBUF_MISS",
|
||||
"TNOx_WRBUF_NO_FREE_LINE_STALL",
|
||||
"TNOx_WRBUF_NO_AXI_ID_STALL",
|
||||
"TNOx_WRBUF_AXI_STALL",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TNOx_UTLB_TRANS",
|
||||
"TNOx_UTLB_TRANS_HIT",
|
||||
"TNOx_UTLB_TRANS_STALL",
|
||||
"TNOx_UTLB_TRANS_MISS_DELAY",
|
||||
"TNOx_UTLB_MMU_REQ",
|
||||
|
||||
/* Performance counters for the Shader Core */
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TNOx_FRAG_ACTIVE",
|
||||
"TNOx_FRAG_PRIMITIVES",
|
||||
"TNOx_FRAG_PRIM_RAST",
|
||||
"TNOx_FRAG_FPK_ACTIVE",
|
||||
"TNOx_FRAG_STARVING",
|
||||
"TNOx_FRAG_WARPS",
|
||||
"TNOx_FRAG_PARTIAL_WARPS",
|
||||
"TNOx_FRAG_QUADS_RAST",
|
||||
"TNOx_FRAG_QUADS_EZS_TEST",
|
||||
"TNOx_FRAG_QUADS_EZS_UPDATE",
|
||||
"TNOx_FRAG_QUADS_EZS_KILL",
|
||||
"TNOx_FRAG_LZS_TEST",
|
||||
"TNOx_FRAG_LZS_KILL",
|
||||
"TNOx_WARP_REG_SIZE_64",
|
||||
"TNOx_FRAG_PTILES",
|
||||
"TNOx_FRAG_TRANS_ELIM",
|
||||
"TNOx_QUAD_FPK_KILLER",
|
||||
"TNOx_FULL_QUAD_WARPS",
|
||||
"TNOx_COMPUTE_ACTIVE",
|
||||
"TNOx_COMPUTE_TASKS",
|
||||
"TNOx_COMPUTE_WARPS",
|
||||
"TNOx_COMPUTE_STARVING",
|
||||
"TNOx_EXEC_CORE_ACTIVE",
|
||||
"TNOx_EXEC_ACTIVE",
|
||||
"TNOx_EXEC_INSTR_COUNT",
|
||||
"TNOx_EXEC_INSTR_DIVERGED",
|
||||
"TNOx_EXEC_INSTR_STARVING",
|
||||
"TNOx_ARITH_INSTR_SINGLE_FMA",
|
||||
"TNOx_ARITH_INSTR_DOUBLE",
|
||||
"TNOx_ARITH_INSTR_MSG",
|
||||
"TNOx_ARITH_INSTR_MSG_ONLY",
|
||||
"TNOx_TEX_MSGI_NUM_QUADS",
|
||||
"TNOx_TEX_DFCH_NUM_PASSES",
|
||||
"TNOx_TEX_DFCH_NUM_PASSES_MISS",
|
||||
"TNOx_TEX_DFCH_NUM_PASSES_MIP_MAP",
|
||||
"TNOx_TEX_TIDX_NUM_SPLIT_MIP_MAP",
|
||||
"TNOx_TEX_TFCH_NUM_LINES_FETCHED",
|
||||
"TNOx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK",
|
||||
"TNOx_TEX_TFCH_NUM_OPERATIONS",
|
||||
"TNOx_TEX_FILT_NUM_OPERATIONS",
|
||||
"TNOx_LS_MEM_READ_FULL",
|
||||
"TNOx_LS_MEM_READ_SHORT",
|
||||
"TNOx_LS_MEM_WRITE_FULL",
|
||||
"TNOx_LS_MEM_WRITE_SHORT",
|
||||
"TNOx_LS_MEM_ATOMIC",
|
||||
"TNOx_VARY_INSTR",
|
||||
"TNOx_VARY_SLOT_32",
|
||||
"TNOx_VARY_SLOT_16",
|
||||
"TNOx_ATTR_INSTR",
|
||||
"TNOx_ARITH_INSTR_FP_MUL",
|
||||
"TNOx_BEATS_RD_FTC",
|
||||
"TNOx_BEATS_RD_FTC_EXT",
|
||||
"TNOx_BEATS_RD_LSC",
|
||||
"TNOx_BEATS_RD_LSC_EXT",
|
||||
"TNOx_BEATS_RD_TEX",
|
||||
"TNOx_BEATS_RD_TEX_EXT",
|
||||
"TNOx_BEATS_RD_OTHER",
|
||||
"TNOx_BEATS_WR_LSC_OTHER",
|
||||
"TNOx_BEATS_WR_TIB",
|
||||
"TNOx_BEATS_WR_LSC_WB",
|
||||
|
||||
/* Performance counters for the Memory System */
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TNOx_MMU_REQUESTS",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TNOx_L2_RD_MSG_IN",
|
||||
"TNOx_L2_RD_MSG_IN_STALL",
|
||||
"TNOx_L2_WR_MSG_IN",
|
||||
"TNOx_L2_WR_MSG_IN_STALL",
|
||||
"TNOx_L2_SNP_MSG_IN",
|
||||
"TNOx_L2_SNP_MSG_IN_STALL",
|
||||
"TNOx_L2_RD_MSG_OUT",
|
||||
"TNOx_L2_RD_MSG_OUT_STALL",
|
||||
"TNOx_L2_WR_MSG_OUT",
|
||||
"TNOx_L2_ANY_LOOKUP",
|
||||
"TNOx_L2_READ_LOOKUP",
|
||||
"TNOx_L2_WRITE_LOOKUP",
|
||||
"TNOx_L2_EXT_SNOOP_LOOKUP",
|
||||
"TNOx_L2_EXT_READ",
|
||||
"TNOx_L2_EXT_READ_NOSNP",
|
||||
"TNOx_L2_EXT_READ_UNIQUE",
|
||||
"TNOx_L2_EXT_READ_BEATS",
|
||||
"TNOx_L2_EXT_AR_STALL",
|
||||
"TNOx_L2_EXT_AR_CNT_Q1",
|
||||
"TNOx_L2_EXT_AR_CNT_Q2",
|
||||
"TNOx_L2_EXT_AR_CNT_Q3",
|
||||
"TNOx_L2_EXT_RRESP_0_127",
|
||||
"TNOx_L2_EXT_RRESP_128_191",
|
||||
"TNOx_L2_EXT_RRESP_192_255",
|
||||
"TNOx_L2_EXT_RRESP_256_319",
|
||||
"TNOx_L2_EXT_RRESP_320_383",
|
||||
"TNOx_L2_EXT_WRITE",
|
||||
"TNOx_L2_EXT_WRITE_NOSNP_FULL",
|
||||
"TNOx_L2_EXT_WRITE_NOSNP_PTL",
|
||||
"TNOx_L2_EXT_WRITE_SNP_FULL",
|
||||
"TNOx_L2_EXT_WRITE_SNP_PTL",
|
||||
"TNOx_L2_EXT_WRITE_BEATS",
|
||||
"TNOx_L2_EXT_W_STALL",
|
||||
"TNOx_L2_EXT_AW_CNT_Q1",
|
||||
"TNOx_L2_EXT_AW_CNT_Q2",
|
||||
"TNOx_L2_EXT_AW_CNT_Q3",
|
||||
"TNOx_L2_EXT_SNOOP",
|
||||
"TNOx_L2_EXT_SNOOP_STALL",
|
||||
"TNOx_L2_EXT_SNOOP_RESP_CLEAN",
|
||||
"TNOx_L2_EXT_SNOOP_RESP_DATA",
|
||||
"TNOx_L2_EXT_SNOOP_INTERNAL",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
};
|
||||
|
||||
#endif /* _KBASE_GATOR_HWCNT_NAMES_TNOX_H_ */
|
||||
@@ -1,296 +0,0 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU licence.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, you can access it online at
|
||||
* http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* This header was autogenerated, it should not be edited.
|
||||
*/
|
||||
|
||||
#ifndef _KBASE_GATOR_HWCNT_NAMES_TSIX_H_
|
||||
#define _KBASE_GATOR_HWCNT_NAMES_TSIX_H_
|
||||
|
||||
static const char * const hardware_counters_mali_tSIx[] = {
|
||||
/* Performance counters for the Job Manager */
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TSIx_MESSAGES_SENT",
|
||||
"TSIx_MESSAGES_RECEIVED",
|
||||
"TSIx_GPU_ACTIVE",
|
||||
"TSIx_IRQ_ACTIVE",
|
||||
"TSIx_JS0_JOBS",
|
||||
"TSIx_JS0_TASKS",
|
||||
"TSIx_JS0_ACTIVE",
|
||||
"",
|
||||
"TSIx_JS0_WAIT_READ",
|
||||
"TSIx_JS0_WAIT_ISSUE",
|
||||
"TSIx_JS0_WAIT_DEPEND",
|
||||
"TSIx_JS0_WAIT_FINISH",
|
||||
"TSIx_JS1_JOBS",
|
||||
"TSIx_JS1_TASKS",
|
||||
"TSIx_JS1_ACTIVE",
|
||||
"",
|
||||
"TSIx_JS1_WAIT_READ",
|
||||
"TSIx_JS1_WAIT_ISSUE",
|
||||
"TSIx_JS1_WAIT_DEPEND",
|
||||
"TSIx_JS1_WAIT_FINISH",
|
||||
"TSIx_JS2_JOBS",
|
||||
"TSIx_JS2_TASKS",
|
||||
"TSIx_JS2_ACTIVE",
|
||||
"",
|
||||
"TSIx_JS2_WAIT_READ",
|
||||
"TSIx_JS2_WAIT_ISSUE",
|
||||
"TSIx_JS2_WAIT_DEPEND",
|
||||
"TSIx_JS2_WAIT_FINISH",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
|
||||
/* Performance counters for the Tiler */
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TSIx_TILER_ACTIVE",
|
||||
"TSIx_JOBS_PROCESSED",
|
||||
"TSIx_TRIANGLES",
|
||||
"TSIx_LINES",
|
||||
"TSIx_POINTS",
|
||||
"TSIx_FRONT_FACING",
|
||||
"TSIx_BACK_FACING",
|
||||
"TSIx_PRIM_VISIBLE",
|
||||
"TSIx_PRIM_CULLED",
|
||||
"TSIx_PRIM_CLIPPED",
|
||||
"TSIx_PRIM_SAT_CULLED",
|
||||
"TSIx_BIN_ALLOC_INIT",
|
||||
"TSIx_BIN_ALLOC_OVERFLOW",
|
||||
"TSIx_BUS_READ",
|
||||
"",
|
||||
"TSIx_BUS_WRITE",
|
||||
"TSIx_LOADING_DESC",
|
||||
"TSIx_IDVS_POS_SHAD_REQ",
|
||||
"TSIx_IDVS_POS_SHAD_WAIT",
|
||||
"TSIx_IDVS_POS_SHAD_STALL",
|
||||
"TSIx_IDVS_POS_FIFO_FULL",
|
||||
"TSIx_PREFETCH_STALL",
|
||||
"TSIx_VCACHE_HIT",
|
||||
"TSIx_VCACHE_MISS",
|
||||
"TSIx_VCACHE_LINE_WAIT",
|
||||
"TSIx_VFETCH_POS_READ_WAIT",
|
||||
"TSIx_VFETCH_VERTEX_WAIT",
|
||||
"TSIx_VFETCH_STALL",
|
||||
"TSIx_PRIMASSY_STALL",
|
||||
"TSIx_BBOX_GEN_STALL",
|
||||
"TSIx_IDVS_VBU_HIT",
|
||||
"TSIx_IDVS_VBU_MISS",
|
||||
"TSIx_IDVS_VBU_LINE_DEALLOCATE",
|
||||
"TSIx_IDVS_VAR_SHAD_REQ",
|
||||
"TSIx_IDVS_VAR_SHAD_STALL",
|
||||
"TSIx_BINNER_STALL",
|
||||
"TSIx_ITER_STALL",
|
||||
"TSIx_COMPRESS_MISS",
|
||||
"TSIx_COMPRESS_STALL",
|
||||
"TSIx_PCACHE_HIT",
|
||||
"TSIx_PCACHE_MISS",
|
||||
"TSIx_PCACHE_MISS_STALL",
|
||||
"TSIx_PCACHE_EVICT_STALL",
|
||||
"TSIx_PMGR_PTR_WR_STALL",
|
||||
"TSIx_PMGR_PTR_RD_STALL",
|
||||
"TSIx_PMGR_CMD_WR_STALL",
|
||||
"TSIx_WRBUF_ACTIVE",
|
||||
"TSIx_WRBUF_HIT",
|
||||
"TSIx_WRBUF_MISS",
|
||||
"TSIx_WRBUF_NO_FREE_LINE_STALL",
|
||||
"TSIx_WRBUF_NO_AXI_ID_STALL",
|
||||
"TSIx_WRBUF_AXI_STALL",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TSIx_UTLB_TRANS",
|
||||
"TSIx_UTLB_TRANS_HIT",
|
||||
"TSIx_UTLB_TRANS_STALL",
|
||||
"TSIx_UTLB_TRANS_MISS_DELAY",
|
||||
"TSIx_UTLB_MMU_REQ",
|
||||
|
||||
/* Performance counters for the Shader Core */
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TSIx_FRAG_ACTIVE",
|
||||
"TSIx_FRAG_PRIMITIVES",
|
||||
"TSIx_FRAG_PRIM_RAST",
|
||||
"TSIx_FRAG_FPK_ACTIVE",
|
||||
"TSIx_FRAG_STARVING",
|
||||
"TSIx_FRAG_WARPS",
|
||||
"TSIx_FRAG_PARTIAL_WARPS",
|
||||
"TSIx_FRAG_QUADS_RAST",
|
||||
"TSIx_FRAG_QUADS_EZS_TEST",
|
||||
"TSIx_FRAG_QUADS_EZS_UPDATE",
|
||||
"TSIx_FRAG_QUADS_EZS_KILL",
|
||||
"TSIx_FRAG_LZS_TEST",
|
||||
"TSIx_FRAG_LZS_KILL",
|
||||
"",
|
||||
"TSIx_FRAG_PTILES",
|
||||
"TSIx_FRAG_TRANS_ELIM",
|
||||
"TSIx_QUAD_FPK_KILLER",
|
||||
"",
|
||||
"TSIx_COMPUTE_ACTIVE",
|
||||
"TSIx_COMPUTE_TASKS",
|
||||
"TSIx_COMPUTE_WARPS",
|
||||
"TSIx_COMPUTE_STARVING",
|
||||
"TSIx_EXEC_CORE_ACTIVE",
|
||||
"TSIx_EXEC_ACTIVE",
|
||||
"TSIx_EXEC_INSTR_COUNT",
|
||||
"TSIx_EXEC_INSTR_DIVERGED",
|
||||
"TSIx_EXEC_INSTR_STARVING",
|
||||
"TSIx_ARITH_INSTR_SINGLE_FMA",
|
||||
"TSIx_ARITH_INSTR_DOUBLE",
|
||||
"TSIx_ARITH_INSTR_MSG",
|
||||
"TSIx_ARITH_INSTR_MSG_ONLY",
|
||||
"TSIx_TEX_MSGI_NUM_QUADS",
|
||||
"TSIx_TEX_DFCH_NUM_PASSES",
|
||||
"TSIx_TEX_DFCH_NUM_PASSES_MISS",
|
||||
"TSIx_TEX_DFCH_NUM_PASSES_MIP_MAP",
|
||||
"TSIx_TEX_TIDX_NUM_SPLIT_MIP_MAP",
|
||||
"TSIx_TEX_TFCH_NUM_LINES_FETCHED",
|
||||
"TSIx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK",
|
||||
"TSIx_TEX_TFCH_NUM_OPERATIONS",
|
||||
"TSIx_TEX_FILT_NUM_OPERATIONS",
|
||||
"TSIx_LS_MEM_READ_FULL",
|
||||
"TSIx_LS_MEM_READ_SHORT",
|
||||
"TSIx_LS_MEM_WRITE_FULL",
|
||||
"TSIx_LS_MEM_WRITE_SHORT",
|
||||
"TSIx_LS_MEM_ATOMIC",
|
||||
"TSIx_VARY_INSTR",
|
||||
"TSIx_VARY_SLOT_32",
|
||||
"TSIx_VARY_SLOT_16",
|
||||
"TSIx_ATTR_INSTR",
|
||||
"TSIx_ARITH_INSTR_FP_MUL",
|
||||
"TSIx_BEATS_RD_FTC",
|
||||
"TSIx_BEATS_RD_FTC_EXT",
|
||||
"TSIx_BEATS_RD_LSC",
|
||||
"TSIx_BEATS_RD_LSC_EXT",
|
||||
"TSIx_BEATS_RD_TEX",
|
||||
"TSIx_BEATS_RD_TEX_EXT",
|
||||
"TSIx_BEATS_RD_OTHER",
|
||||
"TSIx_BEATS_WR_LSC_OTHER",
|
||||
"TSIx_BEATS_WR_TIB",
|
||||
"TSIx_BEATS_WR_LSC_WB",
|
||||
|
||||
/* Performance counters for the Memory System */
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TSIx_MMU_REQUESTS",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TSIx_L2_RD_MSG_IN",
|
||||
"TSIx_L2_RD_MSG_IN_STALL",
|
||||
"TSIx_L2_WR_MSG_IN",
|
||||
"TSIx_L2_WR_MSG_IN_STALL",
|
||||
"TSIx_L2_SNP_MSG_IN",
|
||||
"TSIx_L2_SNP_MSG_IN_STALL",
|
||||
"TSIx_L2_RD_MSG_OUT",
|
||||
"TSIx_L2_RD_MSG_OUT_STALL",
|
||||
"TSIx_L2_WR_MSG_OUT",
|
||||
"TSIx_L2_ANY_LOOKUP",
|
||||
"TSIx_L2_READ_LOOKUP",
|
||||
"TSIx_L2_WRITE_LOOKUP",
|
||||
"TSIx_L2_EXT_SNOOP_LOOKUP",
|
||||
"TSIx_L2_EXT_READ",
|
||||
"TSIx_L2_EXT_READ_NOSNP",
|
||||
"TSIx_L2_EXT_READ_UNIQUE",
|
||||
"TSIx_L2_EXT_READ_BEATS",
|
||||
"TSIx_L2_EXT_AR_STALL",
|
||||
"TSIx_L2_EXT_AR_CNT_Q1",
|
||||
"TSIx_L2_EXT_AR_CNT_Q2",
|
||||
"TSIx_L2_EXT_AR_CNT_Q3",
|
||||
"TSIx_L2_EXT_RRESP_0_127",
|
||||
"TSIx_L2_EXT_RRESP_128_191",
|
||||
"TSIx_L2_EXT_RRESP_192_255",
|
||||
"TSIx_L2_EXT_RRESP_256_319",
|
||||
"TSIx_L2_EXT_RRESP_320_383",
|
||||
"TSIx_L2_EXT_WRITE",
|
||||
"TSIx_L2_EXT_WRITE_NOSNP_FULL",
|
||||
"TSIx_L2_EXT_WRITE_NOSNP_PTL",
|
||||
"TSIx_L2_EXT_WRITE_SNP_FULL",
|
||||
"TSIx_L2_EXT_WRITE_SNP_PTL",
|
||||
"TSIx_L2_EXT_WRITE_BEATS",
|
||||
"TSIx_L2_EXT_W_STALL",
|
||||
"TSIx_L2_EXT_AW_CNT_Q1",
|
||||
"TSIx_L2_EXT_AW_CNT_Q2",
|
||||
"TSIx_L2_EXT_AW_CNT_Q3",
|
||||
"TSIx_L2_EXT_SNOOP",
|
||||
"TSIx_L2_EXT_SNOOP_STALL",
|
||||
"TSIx_L2_EXT_SNOOP_RESP_CLEAN",
|
||||
"TSIx_L2_EXT_SNOOP_RESP_DATA",
|
||||
"TSIx_L2_EXT_SNOOP_INTERNAL",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
};
|
||||
|
||||
#endif /* _KBASE_GATOR_HWCNT_NAMES_TSIX_H_ */
|
||||
@@ -1,296 +0,0 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU licence.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, you can access it online at
|
||||
* http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* This header was autogenerated, it should not be edited.
|
||||
*/
|
||||
|
||||
#ifndef _KBASE_GATOR_HWCNT_NAMES_TTRX_H_
|
||||
#define _KBASE_GATOR_HWCNT_NAMES_TTRX_H_
|
||||
|
||||
static const char * const hardware_counters_mali_tTRx[] = {
|
||||
/* Performance counters for the Job Manager */
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TTRx_MESSAGES_SENT",
|
||||
"TTRx_MESSAGES_RECEIVED",
|
||||
"TTRx_GPU_ACTIVE",
|
||||
"TTRx_IRQ_ACTIVE",
|
||||
"TTRx_JS0_JOBS",
|
||||
"TTRx_JS0_TASKS",
|
||||
"TTRx_JS0_ACTIVE",
|
||||
"",
|
||||
"TTRx_JS0_WAIT_READ",
|
||||
"TTRx_JS0_WAIT_ISSUE",
|
||||
"TTRx_JS0_WAIT_DEPEND",
|
||||
"TTRx_JS0_WAIT_FINISH",
|
||||
"TTRx_JS1_JOBS",
|
||||
"TTRx_JS1_TASKS",
|
||||
"TTRx_JS1_ACTIVE",
|
||||
"",
|
||||
"TTRx_JS1_WAIT_READ",
|
||||
"TTRx_JS1_WAIT_ISSUE",
|
||||
"TTRx_JS1_WAIT_DEPEND",
|
||||
"TTRx_JS1_WAIT_FINISH",
|
||||
"TTRx_JS2_JOBS",
|
||||
"TTRx_JS2_TASKS",
|
||||
"TTRx_JS2_ACTIVE",
|
||||
"",
|
||||
"TTRx_JS2_WAIT_READ",
|
||||
"TTRx_JS2_WAIT_ISSUE",
|
||||
"TTRx_JS2_WAIT_DEPEND",
|
||||
"TTRx_JS2_WAIT_FINISH",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
|
||||
/* Performance counters for the Tiler */
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TTRx_TILER_ACTIVE",
|
||||
"TTRx_JOBS_PROCESSED",
|
||||
"TTRx_TRIANGLES",
|
||||
"TTRx_LINES",
|
||||
"TTRx_POINTS",
|
||||
"TTRx_FRONT_FACING",
|
||||
"TTRx_BACK_FACING",
|
||||
"TTRx_PRIM_VISIBLE",
|
||||
"TTRx_PRIM_CULLED",
|
||||
"TTRx_PRIM_CLIPPED",
|
||||
"TTRx_PRIM_SAT_CULLED",
|
||||
"TTRx_BIN_ALLOC_INIT",
|
||||
"TTRx_BIN_ALLOC_OVERFLOW",
|
||||
"TTRx_BUS_READ",
|
||||
"",
|
||||
"TTRx_BUS_WRITE",
|
||||
"TTRx_LOADING_DESC",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TTRx_PREFETCH_STALL",
|
||||
"TTRx_VCACHE_HIT",
|
||||
"TTRx_VCACHE_MISS",
|
||||
"TTRx_VCACHE_LINE_WAIT",
|
||||
"TTRx_VFETCH_POS_READ_WAIT",
|
||||
"TTRx_VFETCH_VERTEX_WAIT",
|
||||
"TTRx_VFETCH_STALL",
|
||||
"TTRx_PRIMASSY_STALL",
|
||||
"TTRx_BBOX_GEN_STALL",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TTRx_BINNER_STALL",
|
||||
"TTRx_ITER_STALL",
|
||||
"TTRx_COMPRESS_MISS",
|
||||
"TTRx_COMPRESS_STALL",
|
||||
"TTRx_PCACHE_HIT",
|
||||
"TTRx_PCACHE_MISS",
|
||||
"TTRx_PCACHE_MISS_STALL",
|
||||
"TTRx_PCACHE_EVICT_STALL",
|
||||
"TTRx_PMGR_PTR_WR_STALL",
|
||||
"TTRx_PMGR_PTR_RD_STALL",
|
||||
"TTRx_PMGR_CMD_WR_STALL",
|
||||
"TTRx_WRBUF_ACTIVE",
|
||||
"TTRx_WRBUF_HIT",
|
||||
"TTRx_WRBUF_MISS",
|
||||
"TTRx_WRBUF_NO_FREE_LINE_STALL",
|
||||
"TTRx_WRBUF_NO_AXI_ID_STALL",
|
||||
"TTRx_WRBUF_AXI_STALL",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TTRx_UTLB_TRANS",
|
||||
"TTRx_UTLB_TRANS_HIT",
|
||||
"TTRx_UTLB_TRANS_STALL",
|
||||
"TTRx_UTLB_TRANS_MISS_DELAY",
|
||||
"TTRx_UTLB_MMU_REQ",
|
||||
|
||||
/* Performance counters for the Shader Core */
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TTRx_FRAG_ACTIVE",
|
||||
"TTRx_FRAG_PRIMITIVES",
|
||||
"TTRx_FRAG_PRIM_RAST",
|
||||
"TTRx_FRAG_FPK_ACTIVE",
|
||||
"TTRx_FRAG_STARVING",
|
||||
"TTRx_FRAG_WARPS",
|
||||
"TTRx_FRAG_PARTIAL_WARPS",
|
||||
"TTRx_FRAG_QUADS_RAST",
|
||||
"TTRx_FRAG_QUADS_EZS_TEST",
|
||||
"TTRx_FRAG_QUADS_EZS_UPDATE",
|
||||
"TTRx_FRAG_QUADS_EZS_KILL",
|
||||
"TTRx_FRAG_LZS_TEST",
|
||||
"TTRx_FRAG_LZS_KILL",
|
||||
"TTRx_WARP_REG_SIZE_64",
|
||||
"TTRx_FRAG_PTILES",
|
||||
"TTRx_FRAG_TRANS_ELIM",
|
||||
"TTRx_QUAD_FPK_KILLER",
|
||||
"TTRx_FULL_QUAD_WARPS",
|
||||
"TTRx_COMPUTE_ACTIVE",
|
||||
"TTRx_COMPUTE_TASKS",
|
||||
"TTRx_COMPUTE_WARPS",
|
||||
"TTRx_COMPUTE_STARVING",
|
||||
"TTRx_EXEC_CORE_ACTIVE",
|
||||
"TTRx_EXEC_INSTR_FMA",
|
||||
"TTRx_EXEC_INSTR_CVT",
|
||||
"TTRx_EXEC_INSTR_SFU",
|
||||
"TTRx_EXEC_INSTR_MSG",
|
||||
"TTRx_EXEC_INSTR_DIVERGED",
|
||||
"TTRx_EXEC_ICACHE_MISS",
|
||||
"TTRx_EXEC_STARVE_ARITH",
|
||||
"TTRx_CALL_BLEND_SHADER",
|
||||
"TTRx_TEX_INSTR",
|
||||
"TTRx_TEX_INSTR_MIPMAP",
|
||||
"TTRx_TEX_INSTR_COMPRESSED",
|
||||
"TTRx_TEX_INSTR_3D",
|
||||
"TTRx_TEX_INSTR_TRILINEAR",
|
||||
"TTRx_TEX_COORD_ISSUE",
|
||||
"TTRx_TEX_COORD_STALL",
|
||||
"TTRx_TEX_STARVE_CACHE",
|
||||
"TTRx_TEX_STARVE_FILTER",
|
||||
"TTRx_LS_MEM_READ_FULL",
|
||||
"TTRx_LS_MEM_READ_SHORT",
|
||||
"TTRx_LS_MEM_WRITE_FULL",
|
||||
"TTRx_LS_MEM_WRITE_SHORT",
|
||||
"TTRx_LS_MEM_ATOMIC",
|
||||
"TTRx_VARY_INSTR",
|
||||
"TTRx_VARY_SLOT_32",
|
||||
"TTRx_VARY_SLOT_16",
|
||||
"TTRx_ATTR_INSTR",
|
||||
"TTRx_ARITH_INSTR_FP_MUL",
|
||||
"TTRx_BEATS_RD_FTC",
|
||||
"TTRx_BEATS_RD_FTC_EXT",
|
||||
"TTRx_BEATS_RD_LSC",
|
||||
"TTRx_BEATS_RD_LSC_EXT",
|
||||
"TTRx_BEATS_RD_TEX",
|
||||
"TTRx_BEATS_RD_TEX_EXT",
|
||||
"TTRx_BEATS_RD_OTHER",
|
||||
"",
|
||||
"TTRx_BEATS_WR_TIB",
|
||||
"TTRx_BEATS_WR_LSC",
|
||||
|
||||
/* Performance counters for the Memory System */
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TTRx_MMU_REQUESTS",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"TTRx_L2_RD_MSG_IN",
|
||||
"TTRx_L2_RD_MSG_IN_STALL",
|
||||
"TTRx_L2_WR_MSG_IN",
|
||||
"TTRx_L2_WR_MSG_IN_STALL",
|
||||
"TTRx_L2_SNP_MSG_IN",
|
||||
"TTRx_L2_SNP_MSG_IN_STALL",
|
||||
"TTRx_L2_RD_MSG_OUT",
|
||||
"TTRx_L2_RD_MSG_OUT_STALL",
|
||||
"TTRx_L2_WR_MSG_OUT",
|
||||
"TTRx_L2_ANY_LOOKUP",
|
||||
"TTRx_L2_READ_LOOKUP",
|
||||
"TTRx_L2_WRITE_LOOKUP",
|
||||
"TTRx_L2_EXT_SNOOP_LOOKUP",
|
||||
"TTRx_L2_EXT_READ",
|
||||
"TTRx_L2_EXT_READ_NOSNP",
|
||||
"TTRx_L2_EXT_READ_UNIQUE",
|
||||
"TTRx_L2_EXT_READ_BEATS",
|
||||
"TTRx_L2_EXT_AR_STALL",
|
||||
"TTRx_L2_EXT_AR_CNT_Q1",
|
||||
"TTRx_L2_EXT_AR_CNT_Q2",
|
||||
"TTRx_L2_EXT_AR_CNT_Q3",
|
||||
"TTRx_L2_EXT_RRESP_0_127",
|
||||
"TTRx_L2_EXT_RRESP_128_191",
|
||||
"TTRx_L2_EXT_RRESP_192_255",
|
||||
"TTRx_L2_EXT_RRESP_256_319",
|
||||
"TTRx_L2_EXT_RRESP_320_383",
|
||||
"TTRx_L2_EXT_WRITE",
|
||||
"TTRx_L2_EXT_WRITE_NOSNP_FULL",
|
||||
"TTRx_L2_EXT_WRITE_NOSNP_PTL",
|
||||
"TTRx_L2_EXT_WRITE_SNP_FULL",
|
||||
"TTRx_L2_EXT_WRITE_SNP_PTL",
|
||||
"TTRx_L2_EXT_WRITE_BEATS",
|
||||
"TTRx_L2_EXT_W_STALL",
|
||||
"TTRx_L2_EXT_AW_CNT_Q1",
|
||||
"TTRx_L2_EXT_AW_CNT_Q2",
|
||||
"TTRx_L2_EXT_AW_CNT_Q3",
|
||||
"TTRx_L2_EXT_SNOOP",
|
||||
"TTRx_L2_EXT_SNOOP_STALL",
|
||||
"TTRx_L2_EXT_SNOOP_RESP_CLEAN",
|
||||
"TTRx_L2_EXT_SNOOP_RESP_DATA",
|
||||
"TTRx_L2_EXT_SNOOP_INTERNAL",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
};
|
||||
|
||||
#endif /* _KBASE_GATOR_HWCNT_NAMES_TTRX_H_ */
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
@@ -32,22 +32,6 @@
|
||||
#define GPU_ID_VERSION_MAJOR (0xFu << GPU_ID_VERSION_MAJOR_SHIFT)
|
||||
#define GPU_ID_VERSION_PRODUCT_ID (0xFFFFu << GPU_ID_VERSION_PRODUCT_ID_SHIFT)
|
||||
|
||||
/* Values for GPU_ID_VERSION_PRODUCT_ID bitfield */
|
||||
#define GPU_ID_PI_T60X 0x6956u
|
||||
#define GPU_ID_PI_T62X 0x0620u
|
||||
#define GPU_ID_PI_T76X 0x0750u
|
||||
#define GPU_ID_PI_T72X 0x0720u
|
||||
#define GPU_ID_PI_TFRX 0x0880u
|
||||
#define GPU_ID_PI_T86X 0x0860u
|
||||
#define GPU_ID_PI_T82X 0x0820u
|
||||
#define GPU_ID_PI_T83X 0x0830u
|
||||
|
||||
/* New GPU ID format when PRODUCT_ID is >= 0x1000 (and not 0x6956) */
|
||||
#define GPU_ID_PI_NEW_FORMAT_START 0x1000
|
||||
#define GPU_ID_IS_NEW_FORMAT(product_id) ((product_id) != GPU_ID_PI_T60X && \
|
||||
(product_id) >= \
|
||||
GPU_ID_PI_NEW_FORMAT_START)
|
||||
|
||||
#define GPU_ID2_VERSION_STATUS_SHIFT 0
|
||||
#define GPU_ID2_VERSION_MINOR_SHIFT 4
|
||||
#define GPU_ID2_VERSION_MAJOR_SHIFT 12
|
||||
@@ -109,13 +93,16 @@
|
||||
#define GPU_ID2_PRODUCT_TDVX GPU_ID2_MODEL_MAKE(7, 3)
|
||||
#define GPU_ID2_PRODUCT_TNOX GPU_ID2_MODEL_MAKE(7, 1)
|
||||
#define GPU_ID2_PRODUCT_TGOX GPU_ID2_MODEL_MAKE(7, 2)
|
||||
#define GPU_ID2_PRODUCT_TKAX GPU_ID2_MODEL_MAKE(8, 0)
|
||||
#define GPU_ID2_PRODUCT_TTRX GPU_ID2_MODEL_MAKE(9, 0)
|
||||
#define GPU_ID2_PRODUCT_TBOX GPU_ID2_MODEL_MAKE(8, 2)
|
||||
|
||||
/* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */
|
||||
#define GPU_ID_S_15DEV0 0x1
|
||||
#define GPU_ID_S_EAC 0x2
|
||||
#define GPU_ID2_PRODUCT_TNAX GPU_ID2_MODEL_MAKE(9, 1)
|
||||
#define GPU_ID2_PRODUCT_TBEX GPU_ID2_MODEL_MAKE(9, 2)
|
||||
#define GPU_ID2_PRODUCT_LBEX GPU_ID2_MODEL_MAKE(9, 4)
|
||||
#define GPU_ID2_PRODUCT_TULX GPU_ID2_MODEL_MAKE(10, 0)
|
||||
#define GPU_ID2_PRODUCT_TDUX GPU_ID2_MODEL_MAKE(10, 1)
|
||||
#define GPU_ID2_PRODUCT_TODX GPU_ID2_MODEL_MAKE(10, 2)
|
||||
#define GPU_ID2_PRODUCT_TIDX GPU_ID2_MODEL_MAKE(10, 3)
|
||||
#define GPU_ID2_PRODUCT_TVAX GPU_ID2_MODEL_MAKE(10, 4)
|
||||
#define GPU_ID2_PRODUCT_LODX GPU_ID2_MODEL_MAKE(10, 5)
|
||||
|
||||
/* Helper macro to create a GPU_ID assuming valid values for id, major,
|
||||
minor, status */
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user